Advanced Computing Platform for Theoretical Physics

Commit 8652b0c3 authored by Pengfei Zhou's avatar Pengfei Zhou
Browse files

correction

parent f318b503
......@@ -75,6 +75,9 @@ group.add_argument('--d_model',type=int,default=256,help='model dimension for tr
group.add_argument('--d_ff',type=int,default=1024,help='dimesion of feed forward of transformer ')
group.add_argument('--h',type=int,default=1,help='number of multi head')
group.add_argument('--model_type',type=str,default='gru')
group.add_argument('--seed1',type=int,default=1)
group.add_argument('--seed2',type=int,default=1)
group.add_argument('--data_type',type=str,default='random_pattern')
args = parser.parse_args()
'''
if args.ingpu >= 0:
......
import torch
from config import args
import random
import numpy as np
np.random.seed(args.seed2)
random.seed(args.seed2)
torch.manual_seed(args.seed1)
torch.cuda.manual_seed_all(args.seed1)
from torch.utils.data import DataLoader
from model_T import *
from config import args
#V=11
criterion=LabelSmoothing(size=args.V,padding_idx=0,smoothing=0.0)
#model_type='trsf'
......@@ -9,35 +18,56 @@ if args.model_type=='gru':
from model import *
model= PoetryModel(args)
else:
model=model= make_model(args.V,N=args.N,d_model=args.d_model,d_ff=args.d_ff,h=args.h,dropout=0.1)
model=model= make_model(args.V,N=args.N,d_model=args.d_model,d_ff=args.d_ff,h=args.h,dropout=0.5)
optimizer =torch.optim.Adam(model.parameters(),lr=0.001)
def data_gen_all(V,num_samples):
data=torch.LongTensor(np.random.randint(1,V,size=(num_samples,args.len_seq)))
data=torch.from_numpy(np.random.randint(1,V,size=(num_samples,args.len_seq)))
data[:,0]=0
print(data)
return data
return DataLoader(data,batch_size=args.batch_size,
shuffle=True,
num_workers=2)
def make_mask(tgt,pad=0):
tgt_mask=(tgt!=pad).unsqueeze(-2)
tgt_mask=tgt_mask & subsequent_mask(tgt.size(-1)).type_as(tgt_mask.data)
return tgt_mask
dataloader=data_gen_all(args.V,args.num_samples)
for epoch in range(100):
return tgt_mask
if args.data_type=='random_pattern':
data=data_gen_all(args.V,args.num_samples)
else:
datas = np.load("tang.npz",allow_pickle=True)
data = datas['data']
print(data.shape[0])
print(np.max(data))
ix2word = datas['ix2word'].item()
#print(ix2word)
word2ix = datas['word2ix'].item()
#print(word2ix)
print(ix2word[data[1][0]])
data = torch.from_numpy(data)[:args.num_train]
dataloader=DataLoader(data,batch_size=args.batch_size,
shuffle=True,
num_workers=2)
model.to(args.device)
print(args.device)
for epoch in range(args.epoch):
model.train()
start=time.time()
total_loss=0
num_samples=0
for i, batch in enumerate(dataloader):
optimizer.zero_grad()
for i, batch in (enumerate(dataloader)):
batch=batch.long().contiguous().to(args.device)
#print(batch.device)
src,trg_y=batch[:,:-1],batch[:,1:]
if model_type=='gru':
optimizer.zero_grad()
if args.model_type=='gru':
#out=model.forward(batch.src,batch.src_mask)
out,_=model(src.transpose(1,0))
#loss=criterion(out.view(-1,V),batch.trg_y.view(-1))
loss=criterion(out,trg_y.view(-1))
loss=criterion(out,trg_y.reshape(-1))
else:
src_mask=make_mask(src,0)
src_mask=make_mask(src,0).to(args.device)
out=model(src,src_mask)
loss=criterion(out.view(-1,args.V),trg_y.view(-1))
total_loss+=loss
......@@ -46,7 +76,7 @@ for epoch in range(100):
print('step: %d Loss: %f Sec 50 batchs: %f' %(i,loss,elapsed))
start=time.time()
loss.backward()
if model_type!='gru':
if args.model_type!='gru':
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10, norm_type=2)
optimizer.step()
......
......@@ -19,9 +19,11 @@ def make_model(src_vocab,N=6,d_model=256,d_ff=1024,h=1,dropout=0.1):
Encoder(EncoderLayer(d_model,c(attn),c(ff),dropout),N),
nn.Sequential(Embeddings(d_model,src_vocab)),
Generator(d_model,src_vocab))
for p in model.parameters():
if p.dim()>1:
nn.init.uniform_(p,-0.0001,0.0001)
nn.init.xavier_uniform_(p)
return model
class Batch:
def __init__(self,src,pad=8292):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment