Advanced Computing Platform for Theoretical Physics
Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhangjingtong
rnn_mpo
Commits
38b39860
Commit
38b39860
authored
Nov 26, 2020
by
Pengfei Zhou
Browse files
correction
parent
41474319
Changes
3
Hide whitespace changes
Inline
Side-by-side
config.py
View file @
38b39860
...
...
@@ -51,7 +51,7 @@ group.add_argument( '--plot_every', type=int, default=200, help='plot every x')
group
.
add_argument
(
'--env'
,
type
=
str
,
default
=
'poetry'
,
help
=
'visdom env'
)
group
.
add_argument
(
'--max_gen_len'
,
type
=
int
,
default
=
200
,
help
=
'maximum generation length of poet'
)
group
.
add_argument
(
'--debug_file'
,
type
=
str
,
default
=
'/tmp/debugp'
,
help
=
'debug_file'
)
group
.
add_argument
(
'--model_path'
,
type
=
str
,
default
=
"./checkpoints/tang_
True_36
.pth"
,
help
=
'model_path'
)
group
.
add_argument
(
'--model_path'
,
type
=
str
,
default
=
"./checkpoints/tang_
False_1
.pth"
,
help
=
'model_path'
)
group
.
add_argument
(
'--prefix_words'
,
type
=
str
,
default
=
'仙路尽头谁为峰?一见无始道成空。'
,
help
=
'prefix_words'
)
group
.
add_argument
(
'--start_words'
,
type
=
str
,
default
=
'闲云潭影日悠悠'
,
help
=
'start_words'
)
group
.
add_argument
(
'--acrostic'
,
action
=
'store_true'
,
help
=
'acrostic'
)
...
...
main.py
View file @
38b39860
...
...
@@ -4,8 +4,8 @@ from torch.utils.data import DataLoader
from
torch
import
optim
from
torch
import
nn
from
model
import
*
from
torchnet
import
meter
import
tqdm
#
from torchnet import meter
#
import tqdm
from
config
import
args
from
test
import
*
import
sys
...
...
@@ -40,20 +40,24 @@ def train():
model
=
PoetryModel
(
args
)
Configimizer
=
optim
.
Adam
(
model
.
parameters
(),
lr
=
args
.
lr
)
criterion
=
nn
.
CrossEntropyLoss
()
'''
if
args
.
model_path
:
model
.
load_state_dict
(
t
.
load
(
args
.
model_path
,
map_location
=
'cpu'
))
'''
model
.
to
(
device
)
loss_meter
=
meter
.
AverageValueMeter
()
f
=
open
(
'result.txt'
,
'w'
)
#
loss_meter = meter.AverageValueMeter()
#
f = open('result.txt','w')
#sys.exit(0)
for
epoch
in
range
(
args
.
epoch
):
time0
=
time
.
time
()
loss_meter
.
reset
()
#loss_meter.reset()
total_loss
=
0.0
all_samples
=
0
#for li,data_ in tqdm.tqdm(enumerate(dataloader)):
for
li
,
data_
in
(
enumerate
(
dataloader
)):
#print(data_.shape)
now_samples
=
data_
.
size
(
0
)
all_samples
+=
now_samples
data_
=
data_
.
long
().
transpose
(
1
,
0
).
contiguous
()
data_
=
data_
.
to
(
device
)
...
...
@@ -70,7 +74,8 @@ def train():
loss
=
criterion
(
output
,
target
.
view
(
-
1
))
loss
.
backward
()
Configimizer
.
step
()
loss_meter
.
add
(
loss
.
item
())
total_loss
+=
loss
.
item
()
*
now_samples
#loss_meter.add(loss.item())
# 进行可视化
'''
if (1+li)%args.plot_every == 0:
...
...
@@ -83,7 +88,9 @@ def train():
f.write("
\n\n\n
")
f.flush()
'''
print
(
"训练损失为%s"
%
(
str
(
loss_meter
.
mean
)))
#print("训练损失为%s"%(str(loss_meter.mean)))
print
(
all_samples
)
print
(
' average loss: '
,
total_loss
/
all_samples
)
t
.
save
(
model
.
state_dict
(),
'%s_%s_%s.pth'
%
(
args
.
model_prefix
,
args
.
mpo
,
epoch
))
print
(
'used time: '
,
time
.
time
()
-
time0
)
...
...
main_all.py
View file @
38b39860
...
...
@@ -110,6 +110,8 @@ else:
if
args
.
model_type
==
'gru'
:
from
model
import
*
model
=
PoetryModel
(
args
)
if
args
.
model_path
:
model
.
load_state_dict
(
t
.
load
(
args
.
model_path
,
map_location
=
'cpu'
))
elif
args
.
model_type
==
'mytransf'
:
model
=
make_model
(
ntokens
,
N
=
args
.
N
,
d_model
=
args
.
d_model
,
d_ff
=
args
.
d_ff
,
h
=
args
.
h
,
dropout
=
0.2
)
elif
args
.
model_type
==
'pytransf'
:
...
...
@@ -117,19 +119,22 @@ elif args.model_type=='pytransf':
model
.
to
(
device
)
#model = TransformerModel(args.V, args.d_model, args.h, args.d_ff, args.N, dropout).to(device)
criterion
=
nn
.
CrossEntropyLoss
()
lr
=
5.0
# learning rate
lr
=
args
.
lr
# learning rate
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
lr
)
scheduler
=
torch
.
optim
.
lr_scheduler
.
StepLR
(
optimizer
,
1
,
gamma
=
0.95
)
bptt
=
args
.
len_seq
scheduler
=
torch
.
optim
.
lr_scheduler
.
StepLR
(
optimizer
,
1
,
gamma
=
1
)
import
time
def
train
():
model
.
train
()
# Turn on the train mode
total_loss
=
0.
start_time
=
time
.
time
()
all_samples
=
0
#for batch, i in enumerate(range(0, bptt-1, bptt)):
for
i
,
batch
in
(
enumerate
(
train_dataloader
)):
#data, targets = get_batch(train_data, i)
bptt
=
batch
.
size
(
1
)
now_samples
=
batch
.
size
(
0
)
all_samples
+=
batch
.
size
(
0
)
optimizer
.
zero_grad
()
if
args
.
model_type
==
'gru'
:
batch
=
batch
.
transpose
(
1
,
0
).
to
(
args
.
device
)
...
...
@@ -147,29 +152,29 @@ def train():
else
:
batch
=
batch
.
transpose
(
1
,
0
).
to
(
args
.
device
)
data
,
targets
=
batch
[:
-
1
],
batch
[
1
:]
print
(
data
.
size
(
0
))
targets
=
targets
.
reshape
(
-
1
)
src_mask
=
model
.
generate_square_subsequent_mask
(
data
.
size
(
0
)).
to
(
args
.
device
)
output
=
model
(
data
,
src_mask
)
loss
=
criterion
(
output
.
view
(
-
1
,
ntokens
),
targets
)
loss
.
backward
()
torch
.
nn
.
utils
.
clip_grad_norm_
(
model
.
parameters
(),
0.
2
)
#
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.
5
)
optimizer
.
step
()
total_loss
+=
loss
.
item
()
total_loss
+=
loss
.
item
()
*
now_samples
log_interval
=
200
if
True
:
cur_loss
=
total_loss
/
log_interval
if
i
%
log_interval
==
0
:
cur_loss
=
total_loss
/
all_samples
elapsed
=
time
.
time
()
-
start_time
print
(
'| epoch {:3d} |
{:5d}/{:5d} batches |
'
print
(
'| epoch {:3d} | '
'lr {:02.2f} | ms/batch {:5.2f} | '
'loss {:.8g} | ppl {:.8g}'
.
format
(
epoch
,
i
,
len
(
train_data
)
//
bptt
,
scheduler
.
get_lr
()[
0
],
epoch
,
scheduler
.
get_lr
()[
0
],
elapsed
*
1000
/
log_interval
,
cur_loss
*
bptt
,
math
.
exp
(
cur_loss
)))
total_loss
=
0
cur_loss
,
math
.
exp
(
cur_loss
)))
#
total_loss = 0
start_time
=
time
.
time
()
print
(
all_samples
)
print
(
total_loss
/
all_samples
)
def
evaluate
(
eval_model
,
dataloader
):
eval_model
.
eval
()
# Turn on the evaluation mode
total_loss
=
0.
...
...
@@ -182,20 +187,20 @@ def evaluate(eval_model, dataloader):
if
args
.
model_type
==
'gru'
:
batch
=
batch
.
transpose
(
1
,
0
).
to
(
args
.
device
)
data
,
targets
=
batch
[:
-
1
],
batch
[
1
:]
targets
=
targets
.
view
(
-
1
)
targets
=
targets
.
reshape
(
-
1
)
output
,
_
=
eval_model
(
data
)
loss
=
criterion
(
output
,
targets
)
elif
args
.
model_type
==
'mytransf'
:
batch
=
batch
.
to
(
args
.
device
)
data
,
targets
=
batch
[:,:
-
1
],
batch
[:,
1
:]
targets
=
targets
.
view
(
-
1
)
targets
=
targets
.
reshape
(
-
1
)
src_mask
=
make_mask
(
data
,
-
1
).
to
(
args
.
device
)
output
=
eval_model
(
data
,
src_mask
)
loss
=
criterion
(
output
.
view
(
-
1
,
ntokens
),
targets
)
else
:
batch
=
batch
.
transpose
(
1
,
0
).
to
(
args
.
device
)
data
,
targets
=
batch
[:
-
1
],
batch
[
1
:]
targets
=
targets
.
view
(
-
1
)
targets
=
targets
.
reshape
(
-
1
)
src_mask
=
model
.
generate_square_subsequent_mask
(
data
.
size
(
0
)).
to
(
args
.
device
)
output
=
eval_model
(
data
,
src_mask
)
loss
=
criterion
(
output
.
view
(
-
1
,
ntokens
),
targets
)
...
...
@@ -210,6 +215,7 @@ best_model = None
for
epoch
in
range
(
1
,
epochs
+
1
):
epoch_start_time
=
time
.
time
()
train
()
'''
val_loss = evaluate(model, val_dataloader)
#print('-' * 89)
#print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
...
...
@@ -220,7 +226,7 @@ for epoch in range(1, epochs + 1):
if val_loss < best_val_loss:
best_val_loss = val_loss
best_model = model
'''
scheduler
.
step
()
test_loss
=
evaluate
(
best_model
,
test_dataloader
)
print
(
'='
*
89
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment