Advanced Computing Platform for Theoretical Physics

Commit 39e225bc authored by telestellar's avatar telestellar
Browse files

new

parents
# rnn compression
using mpo to compress rnn
#
#
# class Config(object):
# num_layers = 2 # LSTM层数
# data_path = 'data/' # 诗歌的文本文件存放路径
# pickle_path = 'tang.npz' # 预处理好的二进制文件
# author = None # 只学习某位作者的诗歌
# constrain = None # 长度限制
# category = 'poet.tang' # 类别,唐诗还是宋诗歌(poet.song)
# lr = 5e-4
# weight_decay = 5e-4
# use_gpu = True
# epoch = 50
# batch_size = 64
# maxlen = 125 # 超过这个长度的之后字被丢弃,小于这个长度的在前面补空格
# plot_every = 200 # 每200个batch 可视化一次
# # use_env = True # 是否使用visodm
# env = 'poetry' # visdom env
# max_gen_len = 200 # 生成诗歌最长长度
# debug_file = '/tmp/debugp'
# model_path = "./checkpoints/tang_36.pth" # 预训练模型路径
# prefix_words = '仙路尽头谁为峰?一见无始道成空。' # 不是诗歌的组成部分,用来控制生成诗歌的意境
# start_words = '闲云潭影日悠悠' # 诗歌开始
# acrostic = False # 是否是藏头诗
# model_prefix = 'checkpoints/tang' # 模型保存路径
# embedding_dim = 512
# hidden_dim = 1024
# mpo=False
import argparse
import torch
import os
parser = argparse.ArgumentParser()
group = parser.add_argument_group('parameters')
group.add_argument( '--net', type=str, default='mpo', choices=['mpo','mpo2'], help='network type')
group.add_argument( '--num_layers', type=int, default=2, help='num_layers of rnn')
group.add_argument( '--data_path', type=str, default='data/', help='data_path of poet')
group.add_argument( '--pickle_path', type=str, default='tang.npz', help='pickle_path')
group.add_argument( '--category', type=str, default='poet.tang', help='category of poet')
group.add_argument( '--lr', type=float, default=5e-4, help='learning rate')
group.add_argument( '--weight_decay', type=float, default=1e-4, help='weight_decay')
group.add_argument( '--ingpu', action='store_false', help='store everything in GPU')
group.add_argument( '--cuda', type=int, default=0, help='ID of GPU to use, -1 for disabled')
group.add_argument( '--epoch', type=int, default=50, help='maximum number of steps')
group.add_argument( '--batch_size', type=int, default=64, help='number of samples')
group.add_argument( '--vocab_size', type=int, default=8400, help='vocab_size')
group.add_argument( '--maxlen', type=int, default=125, help='maximum number of word in one sample')
group.add_argument( '--plot_every', type=int, default=200, help='plot every x')
group.add_argument( '--env', type=str, default='poetry', help='visdom env')
group.add_argument( '--max_gen_len', type=int, default=200, help='maximum generation length of poet')
group.add_argument( '--debug_file', type=str, default='/tmp/debugp', help='debug_file')
group.add_argument( '--model_path', type=str, default="./checkpoints/tang_49.pth", help='model_path')
group.add_argument( '--prefix_words', type=str, default='仙路尽头谁为峰?一见无始道成空。', help='prefix_words')
group.add_argument( '--start_words', type=str, default='闲云潭影日悠悠', help='start_words')
group.add_argument( '--acrostic', action='store_true', help='acrostic')
group.add_argument( '--model_prefix', type=str, default='checkpoints/tang', help='model_prefix')
group.add_argument( '--embedding_dim', type=int, default=512, help='embedding_dim')
group.add_argument( '--hidden_dim', type=int, default=1024, help='hidden_dim')
group.add_argument( '--mpo', action='store_true', help='mpo')
group.add_argument( '--chi', type=int, default=4, help='Rank in the lr or mpo representations')
group.add_argument( '--dims', type=str, default="8,4,4,8;12,10,7,10", help='dtype')
group.add_argument( '--bias', action='store_true', help='use bias')
group.add_argument( '--dtype', type=str, default='float32', choices=['float32', 'float64'], help='dtype')
group.add_argument( '--cpu', action='store_true', help='use batch_norm_1d')
args = parser.parse_args()
if args.cuda >= 0:
os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda)
args.use_cuda=True
args.device = torch.device('cuda:0')
else:
args.use_cuda=False
args.device = torch.device('cpu')
if args.dtype == 'float32':
args.dtype = torch.float32
elif args.dtype == 'float64':
args.dtype = torch.float64
else:
raise ValueError('Unknown dtype: {}'.format(args.dtype))
if args.cpu:
args.use_cuda=False
args.device = torch.device('cpu')
#print("args.device=",args.device)
import numpy as np
datas = np.load("tang.npz",allow_pickle=True)
print(type(datas))
print(datas.files)
data = datas['data']
print(type(data))
print(type(datas['ix2word']))
#print(datas['ix2word'].shape[0])
#print(datas['ix2word'].item())
ix2word = datas['ix2word'].item()
word2ix = datas['word2ix'].item()
print(data[1])
for i in range(len(data[4])):
print(ix2word[data[4][i]])
print(len(ix2word))
print(len(word2ix))
print(type(word2ix))
\ No newline at end of file
import torch as t
import numpy as np
from torch.utils.data import DataLoader
from torch import optim
from torch import nn
from model import *
from torchnet import meter
import tqdm
from config import args
from test import *
# 给定首句生成诗歌
def generate(model, start_words, ix2word, word2ix, prefix_words=None):
results = list(start_words)
start_words_len = len(start_words)
# 第一个词语是<START>
input = t.Tensor([word2ix['<START>']]).view(1, 1).long()
if args.ingpu:
input = input.cuda()
hidden = None
# 若有风格前缀,则先用风格前缀生成hidden
if prefix_words:
# 第一个input是<START>,后面就是prefix中的汉字
# 第一个hidden是None,后面就是前面生成的hidden
for word in prefix_words:
output, hidden = model(input, hidden)
input = input.data.new([word2ix[word]]).view(1, 1)
# 开始真正生成诗句,如果没有使用风格前缀,则hidden = None,input = <START>
# 否则,input就是风格前缀的最后一个词语,hidden也是生成出来的
for i in range(args.max_gen_len):
output, hidden = model(input, hidden)
# print(output.shape)
# 如果还在诗句内部,输入就是诗句的字,不取出结果,只为了得到
# 最后的hidden
if i < start_words_len:
w = results[i]
input = input.data.new([word2ix[w]]).view(1, 1)
# 否则将output作为下一个input进行
else:
# print(output.data[0].topk(1))
top_index = output.data[0].topk(1)[1][0].item()
w = ix2word[top_index]
results.append(w)
input = input.data.new([top_index]).view(1, 1)
if w == '<EOP>':
del results[-1]
break
return results
# 生成藏头诗
def gen_acrostic(model, start_words, ix2word, word2ix, prefix_words=None):
result = []
start_words_len = len(start_words)
input = (t.Tensor([word2ix['<START>']]).view(1, 1).long())
if args.ingpu:
input = input.cuda()
# 指示已经生成了几句藏头诗
index = 0
pre_word = '<START>'
hidden = None
# 存在风格前缀,则生成hidden
if prefix_words:
for word in prefix_words:
output, hidden = model(input, hidden)
input = (input.data.new([word2ix[word]])).view(1, 1)
# 开始生成诗句
for i in range(args.max_gen_len):
output, hidden = model(input, hidden)
top_index = output.data[0].topk(1)[1][0].item()
w = ix2word[top_index]
# 说明上个字是句末
if pre_word in {'。', ',', '?', '!', '<START>'}:
if index == start_words_len:
break
else:
w = start_words[index]
index += 1
# print(w,word2ix[w])
input = (input.data.new([word2ix[w]])).view(1, 1)
else:
input = (input.data.new([top_index])).view(1, 1)
result.append(w)
pre_word = w
return result
import torch as t
import numpy as np
from torch.utils.data import DataLoader
from torch import optim
from torch import nn
from model import *
from torchnet import meter
import tqdm
from config import args
from test import *
import sys
import time
def train():
if args.ingpu:
args.device = t.device("cuda")
else:
args.device = t.device("cpu")
device = args.device
datas = np.load("tang.npz",allow_pickle=True)
print(datas)
data = datas['data']
print(data)
#print(ix2word[data[1]])
print(data.shape[0])
print(np.max(data))
ix2word = datas['ix2word'].item()
#print(ix2word)
word2ix = datas['word2ix'].item()
#print(word2ix)
print(ix2word[data[1][0]])
data = t.from_numpy(data)
dataloader = DataLoader(data,
batch_size=args.batch_size,
shuffle=True,
num_workers=2)
# model = PoetryModel(args, vocab_size=8400,
# embedding_dim=args.embedding_dim,
# hidden_dim = args.hidden_dim,mpo=args.mpo)
model = PoetryModel(args)
Configimizer = optim.Adam(model.parameters(),lr=args.lr)
criterion = nn.CrossEntropyLoss()
#if Config.model_path:
#model.load_state_dict(t.load(Config.model_path,map_location='cpu'))
model.to(device)
loss_meter = meter.AverageValueMeter()
f = open('result.txt','w')
#sys.exit(0)
for epoch in range(args.epoch):
time0=time.time()
loss_meter.reset()
#for li,data_ in tqdm.tqdm(enumerate(dataloader)):
for li, data_ in (enumerate(dataloader)):
#print(data_.shape)
data_ = data_.long().transpose(1,0).contiguous()
data_ = data_.to(device)
#print(data_.shape)
Configimizer.zero_grad()
input_,target = data_[:-1,:],data_[1:,:]
output,_ = model(input_)
#print(output)
#print(target)
#print("Here",output.shape)
#print(target.shape)
# 这里要view(-1)
loss = criterion(output,target.view(-1))
loss.backward()
Configimizer.step()
loss_meter.add(loss.item())
# 进行可视化
if (1+li)%args.plot_every == 0:
print("训练损失为%s"%(str(loss_meter.mean)))
f.write("训练损失为%s"%(str(loss_meter.mean)))
for word in list(u"春江花月夜"):
gen_poetry = ''.join(generate(model,word,ix2word,word2ix))
print(gen_poetry)
f.write(gen_poetry)
f.write("\n\n\n")
f.flush()
t.save(model.state_dict(),'%s_%s_%s.pth'%(args.model_prefix,args.mpo,epoch))
print('used time: ', time.time()-time0)
if __name__ == '__main__':
train()
from main import *
from config import args
import torch.nn as nn
from torch.nn.parameter import Parameter
import torch
import torch.nn.functional as F
import math
import mpo
class Linear(nn.Module):
def __init__(self, in_feat, out_feat,bias=True):
super(Linear,self).__init__()
self.in_feat=in_feat
self.out_feat=out_feat
self.weight=Parameter(torch.Tensor(out_feat,in_feat))
if bias:
self.bias=Parameter(torch.Tensor(out_feat))
self.reset_parameters()
def reset_parameters(self):
torch.nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight)
bound = 1 / math.sqrt(fan_in)
torch.nn.init.uniform_(self.bias, -bound, bound)
def forward(self,input):
return F.linear(input, self.weight, self.bias)
class PoetryModel(nn.Module):
# def __init__(self, args, vocab_size, embedding_dim, hidden_dim=1024, mpo=True):
def __init__(self, args):
super(PoetryModel, self).__init__()
self.args = args
self.hidden_dim = args.hidden_dim
self.embeddings = nn.Embedding(args.vocab_size, args.embedding_dim)
self.gru = nn.GRU(args.embedding_dim, self.hidden_dim, num_layers=args.num_layers)
self.bond_dim = args.chi
self.dims = []
for dim in args.dims.split(';'):
self.dims.append([int(i) for i in dim.split(",")])
assert (np.prod(self.dims[0]) == self.hidden_dim)
assert (np.prod(self.dims[-1]) == args.vocab_size)
if args.mpo == True and args.vocab_size == 8400:
if args.net == 'mpo':
self.linear = mpo.MPO(self.hidden_dim, args.vocab_size, self.dims[0], self.dims[1], self.bond_dim)
elif args.net == 'mpo2':
self.linear = mpo.MPO2(self.dims[0], self.dims[1], chi=args.chi)
elif args.mpo == False:
self.linear = Linear(self.hidden_dim, args.vocab_size)
def forward(self, input, hidden=None):
seq_len, batch_size = input.size()
#print(input.shape)
if hidden is None:
h_0 = input.data.new(args.num_layers, batch_size, self.hidden_dim).fill_(0).float()
#c_0 = input.data.new(Config.num_layers, batch_size, self.hidden_dim).fill_(0).float()
else:
#h_0, c_0 = hidden
h_0 = hidden
embeds = self.embeddings(input)
#output, hidden = self.lstm(embeds, (h_0, c_0))
output, hidden = self.gru(embeds, h_0)
output = self.linear(output.view(seq_len * batch_size, -1))
#output = self.linear(output)
return output, hidden
import torch.nn as nn
from torch.nn.parameter import Parameter
import torch
import numpy as np
import torch.nn.functional as F
import math
from config import args
from utils import AttrProxy
class MPO(nn.Module):
#8400*1024 reshape to 10*10*7*12 * 4*4*8*8
def __init__(self,in_feat,out_feat,array_in,array_out,bond_dim,bias=True) :
super(MPO,self).__init__()
self.array_in=array_in
self.array_out=array_out
self.bond_dim=bond_dim
self.in_feat=in_feat
self.out_feat=out_feat
self.define_parameters()
if bias:
self.bias=Parameter(torch.Tensor(out_feat))
self.reset_parameters()
def define_parameters(self):
self.weight=torch.nn.ParameterList([])
for i in range(len(self.array_in)):
if i==0:
self.weight.append(Parameter(torch.Tensor(self.array_in[0],self.array_out[0],self.bond_dim)))
elif i==len(self.array_in)-1:
self.weight.append(Parameter(torch.Tensor(self.array_in[i],self.bond_dim,self.array_out[i])))
else:
self.weight.append(Parameter(torch.Tensor(self.array_in[i],self.bond_dim,self.array_out[i],self.bond_dim)))
def reset_parameters(self):
if self.bias is not None:
fan_out=self.out_feat
bound = 1 / math.sqrt(fan_out)
torch.nn.init.uniform_(self.bias, -bound, bound)
gain=1.0
std = gain * math.sqrt(2.0 / float(self.in_feat + self.out_feat))
a = math.sqrt(3.0) * std
for i in self.weight:
# print(i.shape)
a=math.sqrt(a*math.sqrt(3.0/self.bond_dim))
torch.nn.init.uniform_(i,-a,a)
def forward(self,input):
shape=self.array_in.copy()
shape.insert(0,input.shape[0])
output=input.reshape(shape)
'''
output=torch.einsum('abcde,bmf->acdefm',output,self.weight[0])
output=torch.einsum('acdefm,cfng->adegmn',output,self.weight[1])
output=torch.einsum('adegmn,dgph->aehmnp',output,self.weight[2])
output=torch.einsum('aehmnp,ehq->amnpq',output,self.weight[3]).reshape(-1,self.out_feat)
'''
for i in range(len(self.weight)):
if i==0:
output = torch.einsum('abcde,bmf->acfdem',output,self.weight[i])
elif i==len(self.weight)-1:
output = torch.einsum('abcdef,bcg->adefg',output,self.weight[i]).reshape(-1,self.out_feat)
else:
output = torch.einsum('abcdef,bcgh->adhefg',output,self.weight[i])
if self.bias is not None:
output+=self.bias
# to be auto-contraction
return output
class MPO2(nn.Module):
def __init__(self, Din, Dout, bias=False, chi=2, seed=-1):
"""
Din (and Dout) should be a tuple containing all input (output) dimensions
"""
super(MPO2, self).__init__()
self.Din = Din
self.Dout = Dout
self.bondim = [chi for i in Din]
self.bondim[-1] = 1
print("Din=", Din, "Dout=", Dout)
assert (len(self.Din) == len(self.Dout))
self.tensors = []
self.npin = np.prod(self.Din)
self.npout = np.prod(self.Dout)
if seed > 0:
torch.manual_seed(seed)
for i, din in enumerate(self.Din):
dout = self.Dout[i]
a = torch.rand(self.bondim[i - 1], self.bondim[i], din, dout) / math.sqrt(self.npout)
exec("self.tensors_" + str(i) + "=Parameter(a.clone())")
if bias:
self.bias = Parameter(torch.zeros([self.npout, 1]))
else:
self.register_parameter('bias', None)
self.tensors = AttrProxy(self, 'tensors_')
print(self)
print("Parameters in the class")
params = list(self.parameters())
params = list(filter(lambda p: p.requires_grad, params))
nparams = int(sum([np.prod(p.shape) for p in params]))
print('Total number of trainable parameters: {}'.format(nparams))
for param in self.parameters():
print(type(param.data), param.size())
self.reset_parameters()
def reset_parameters(self):
self.in_feat = args.hidden_dim
self.out_feat = args.vocab_size
self.bond_dim = args.chi
if self.bias is not None:
fan_out = self.out_feat
bound = 1 / math.sqrt(fan_out)
torch.nn.init.uniform_(self.bias, -bound, bound)
gain = 1.0
std = gain * math.sqrt(2.0 / float(self.in_feat + self.out_feat))
a = math.sqrt(3.0) * std
tensors = []
for j in range(len(self.Din)):
tensors.append(self.tensors[j])
for i in tensors:
# print('i', i.shape)
a = math.sqrt(a * math.sqrt(3.0 / self.bond_dim))
torch.nn.init.uniform_(i, -a, a)
def forward(self, input):
input = input.reshape(input.shape[0],1,1,self.Din[0],-1)
for i in range(len(self.Din)):
input = torch.einsum("bijkl,jakm->bimal",input,self.tensors[i])
Dnext = self.Din[i+1] if i<len(self.Din)-1 else 1
newshape=[input.shape[0],input.shape[1]*input.shape[2],input.shape[3],Dnext,-1]
input = input.contiguous().view(newshape)
return input.contiguous().view(input.shape[0],-1)
\ No newline at end of file
This diff is collapsed.
File added
from main import *
from model import *
from config import args
import torch as t
from generate import *
import time
def userTest():
print("正在初始化......")
datas = np.load("tang.npz")
data = datas['data']
ix2word = datas['ix2word'].item()
word2ix = datas['word2ix'].item()
# model = PoetryModel(args, 8400, args.embedding_dim, args.hidden_dim,mpo=args.mpo)
model = PoetryModel(args)
model.load_state_dict(t.load(args.model_path, 'cpu'))
if args.ingpu:
model.to(t.device('cuda'))
print("初始化完成!\n")
while True:
print("欢迎使用唐诗生成器,\n"
"输入1 进入首句生成模式\n"
"输入2 进入藏头诗生成模式\n")
mode = int(input())
if mode == 1:
print("请输入您想要的诗歌首句,可以是五言或七言")
start_words = str(input())
time0=time.time()
gen_poetry = ''.join(generate(model, start_words, ix2word, word2ix))
print("生成的诗句如下:%s\n" % (gen_poetry))
print('used_time:',time.time()-time0)
elif mode == 2:
print("请输入您想要的诗歌藏头部分,不超过16个字,最好是偶数")
start_words = str(input())
time0=time.time()
gen_poetry = ''.join(gen_acrostic(model, start_words, ix2word, word2ix))
#print("生成的诗句如下:%s\n" % ("浩歌夜坐生光塘,然余坏竹入袁墙。最爱林泉多往事,喜逢日月共流光。欢讴未暇听雷响,芷壑已惊蛛雁忙。若无一年离世曰,宝莲山中有仙郎。"))
print("生成的诗句如下:%s\n" % (gen_poetry))
print('used_time:',time.time()-time0)
if __name__ == '__main__':
userTest()
import glob
import io
import os
import numpy as np
import torch
from torch import nn
from config import args
def mylog(s):
if args.log:
with open(args.log, 'a', newline='\n') as f:
f.write(s + u'\n')
if not args.no_stdout:
print(s)
else:
print(s)
class AttrProxy(object):
"""Translates index lookups into attribute lookups."""
def __init__(self, module, prefix):
self.module = module
self.prefix = prefix
def __getitem__(self, i):
return getattr(self.module, self.prefix + str(i))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment