Advanced Computing Platform for Theoretical Physics

Commit 38b39860 authored by Pengfei Zhou's avatar Pengfei Zhou
Browse files

correction

parent 41474319
......@@ -51,7 +51,7 @@ group.add_argument( '--plot_every', type=int, default=200, help='plot every x')
group.add_argument( '--env', type=str, default='poetry', help='visdom env')
group.add_argument( '--max_gen_len', type=int, default=200, help='maximum generation length of poet')
group.add_argument( '--debug_file', type=str, default='/tmp/debugp', help='debug_file')
group.add_argument( '--model_path', type=str, default="./checkpoints/tang_True_36.pth", help='model_path')
group.add_argument( '--model_path', type=str, default="./checkpoints/tang_False_1.pth", help='model_path')
group.add_argument( '--prefix_words', type=str, default='仙路尽头谁为峰?一见无始道成空。', help='prefix_words')
group.add_argument( '--start_words', type=str, default='闲云潭影日悠悠', help='start_words')
group.add_argument( '--acrostic', action='store_true', help='acrostic')
......
......@@ -4,8 +4,8 @@ from torch.utils.data import DataLoader
from torch import optim
from torch import nn
from model import *
from torchnet import meter
import tqdm
#from torchnet import meter
#import tqdm
from config import args
from test import *
import sys
......@@ -40,20 +40,24 @@ def train():
model = PoetryModel(args)
Configimizer = optim.Adam(model.parameters(),lr=args.lr)
criterion = nn.CrossEntropyLoss()
'''
if args.model_path:
model.load_state_dict(t.load(args.model_path,map_location='cpu'))
'''
model.to(device)
loss_meter = meter.AverageValueMeter()
f = open('result.txt','w')
#loss_meter = meter.AverageValueMeter()
#f = open('result.txt','w')
#sys.exit(0)
for epoch in range(args.epoch):
time0=time.time()
loss_meter.reset()
#loss_meter.reset()
total_loss=0.0
all_samples=0
#for li,data_ in tqdm.tqdm(enumerate(dataloader)):
for li, data_ in (enumerate(dataloader)):
#print(data_.shape)
now_samples=data_.size(0)
all_samples+=now_samples
data_ = data_.long().transpose(1,0).contiguous()
data_ = data_.to(device)
......@@ -70,7 +74,8 @@ def train():
loss = criterion(output,target.view(-1))
loss.backward()
Configimizer.step()
loss_meter.add(loss.item())
total_loss+=loss.item()*now_samples
#loss_meter.add(loss.item())
# 进行可视化
'''
if (1+li)%args.plot_every == 0:
......@@ -83,7 +88,9 @@ def train():
f.write("\n\n\n")
f.flush()
'''
print("训练损失为%s"%(str(loss_meter.mean)))
#print("训练损失为%s"%(str(loss_meter.mean)))
print(all_samples)
print(' average loss: ', total_loss/all_samples)
t.save(model.state_dict(),'%s_%s_%s.pth'%(args.model_prefix,args.mpo,epoch))
print('used time: ', time.time()-time0)
......
......@@ -110,6 +110,8 @@ else:
if args.model_type=='gru':
from model import *
model= PoetryModel(args)
if args.model_path:
model.load_state_dict(t.load(args.model_path,map_location='cpu'))
elif args.model_type=='mytransf':
model=make_model(ntokens,N=args.N,d_model=args.d_model,d_ff=args.d_ff,h=args.h,dropout=0.2)
elif args.model_type=='pytransf':
......@@ -117,19 +119,22 @@ elif args.model_type=='pytransf':
model.to(device)
#model = TransformerModel(args.V, args.d_model, args.h, args.d_ff, args.N, dropout).to(device)
criterion = nn.CrossEntropyLoss()
lr = 5.0 # learning rate
lr = args.lr # learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.95)
bptt=args.len_seq
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=1)
import time
def train():
model.train() # Turn on the train mode
total_loss = 0.
start_time = time.time()
all_samples=0
#for batch, i in enumerate(range(0, bptt-1, bptt)):
for i, batch in (enumerate(train_dataloader)):
#data, targets = get_batch(train_data, i)
bptt=batch.size(1)
now_samples=batch.size(0)
all_samples+=batch.size(0)
optimizer.zero_grad()
if args.model_type=='gru':
batch=batch.transpose(1,0).to(args.device)
......@@ -147,29 +152,29 @@ def train():
else:
batch=batch.transpose(1,0).to(args.device)
data,targets=batch[:-1],batch[1:]
print(data.size(0))
targets=targets.reshape(-1)
src_mask = model.generate_square_subsequent_mask(data.size(0)).to(args.device)
output = model(data, src_mask)
loss = criterion(output.view(-1, ntokens), targets)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.2)
#torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
optimizer.step()
total_loss += loss.item()
total_loss += loss.item()*now_samples
log_interval = 200
if True :
cur_loss = total_loss / log_interval
if i%log_interval==0:
cur_loss = total_loss/all_samples
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | '
print('| epoch {:3d} | '
'lr {:02.2f} | ms/batch {:5.2f} | '
'loss {:.8g} | ppl {:.8g}'.format(
epoch, i, len(train_data) // bptt, scheduler.get_lr()[0],
epoch, scheduler.get_lr()[0],
elapsed * 1000 / log_interval,
cur_loss*bptt, math.exp(cur_loss)))
total_loss = 0
cur_loss, math.exp(cur_loss)))
#total_loss = 0
start_time = time.time()
print(all_samples)
print(total_loss/all_samples)
def evaluate(eval_model, dataloader):
eval_model.eval() # Turn on the evaluation mode
total_loss = 0.
......@@ -182,20 +187,20 @@ def evaluate(eval_model, dataloader):
if args.model_type=='gru':
batch=batch.transpose(1,0).to(args.device)
data,targets=batch[:-1],batch[1:]
targets=targets.view(-1)
targets=targets.reshape(-1)
output,_ = eval_model(data)
loss = criterion(output, targets)
elif args.model_type=='mytransf':
batch=batch.to(args.device)
data,targets=batch[:,:-1],batch[:,1:]
targets=targets.view(-1)
targets=targets.reshape(-1)
src_mask=make_mask(data,-1).to(args.device)
output = eval_model(data, src_mask)
loss = criterion(output.view(-1, ntokens), targets)
else:
batch=batch.transpose(1,0).to(args.device)
data,targets=batch[:-1],batch[1:]
targets=targets.view(-1)
targets=targets.reshape(-1)
src_mask = model.generate_square_subsequent_mask(data.size(0)).to(args.device)
output = eval_model(data, src_mask)
loss = criterion(output.view(-1, ntokens), targets)
......@@ -210,6 +215,7 @@ best_model = None
for epoch in range(1, epochs + 1):
epoch_start_time = time.time()
train()
'''
val_loss = evaluate(model, val_dataloader)
#print('-' * 89)
#print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
......@@ -220,7 +226,7 @@ for epoch in range(1, epochs + 1):
if val_loss < best_val_loss:
best_val_loss = val_loss
best_model = model
'''
scheduler.step()
test_loss = evaluate(best_model, test_dataloader)
print('=' * 89)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment