Advanced Computing Platform for Theoretical Physics

Commit fc796aba authored by Lei Wang's avatar Lei Wang
Browse files

better control of gradient flow

parent d9aea05f
import torch import torch
torch.set_num_threads(4) torch.set_num_threads(4)
def vmps(T, d, D, no_iter, Nepochs=5): def mpsrg(A, T):
A = torch.nn.Parameter(0.01*torch.randn(D, d, D, dtype=T.dtype, device=T.device)) Asymm = (A + A.permute(2, 1, 0))*0.5
D, d = Asymm.shape[0], Asymm.shape[1]
#t0 = time.time()
B = torch.einsum('ldr,adcb,icj->lairbj', (Asymm, T, Asymm)).contiguous().view(D**2*d, D**2*d)
C = torch.einsum('ldr,idj->lirj', (Asymm, Asymm)).contiguous().view(D**2, D**2)
w, _ = torch.symeig(B, eigenvectors=True)
lnZ1 = torch.log(w.abs().max())
w, _ = torch.symeig(C, eigenvectors=True)
lnZ2 = torch.log(w.abs().max())
def mpsrg(B, C): #lnZ1 = 0.0
w, _ = torch.symeig(B, eigenvectors=True) #lnZ2 = 0.0
lnZ1 = torch.log(w.abs().max()) #for i in range(no_iter):
w, _ = torch.symeig(C, eigenvectors=True) # s = B.norm(1)
lnZ2 = torch.log(w.abs().max()) # lnZ1 = lnZ1 + torch.log(s)/2**i
# B = B/s
# B = torch.mm(B, B)
#lnZ1 = 0.0 # s = C.norm(1)
#lnZ2 = 0.0 # lnZ2 = lnZ2 + torch.log(s)/2**i
#for i in range(no_iter): # C = C/s
# s = B.norm(1) # C = torch.mm(C, C)
# lnZ1 = lnZ1 + torch.log(s)/2**i return -lnZ1 + lnZ2
# B = B/s
# B = torch.mm(B, B)
# s = C.norm(1) def vmps(T, d, D, no_iter, Nepochs=5):
# lnZ2 = lnZ2 + torch.log(s)/2**i
# C = C/s A = torch.nn.Parameter(0.01*torch.randn(D, d, D, dtype=T.dtype, device=T.device))
# C = torch.mm(C, C)
return lnZ1 , lnZ2
optimizer = torch.optim.LBFGS([A], max_iter=10) optimizer = torch.optim.LBFGS([A], max_iter=10)
def closure(): def closure():
optimizer.zero_grad() optimizer.zero_grad()
Asymm = (A + A.permute(2, 1, 0))*0.5
#t0 = time.time()
B = torch.einsum('ldr,adcb,icj->lairbj', (Asymm, T, Asymm)).contiguous().view(D**2*d, D**2*d)
C = torch.einsum('ldr,idj->lirj', (Asymm, Asymm)).contiguous().view(D**2, D**2)
#print ('einsum', time.time()- t0) #print ('einsum', time.time()- t0)
#print ((B-B.t()).abs().sum(), (C-C.t()).abs().sum()) #print ((B-B.t()).abs().sum(), (C-C.t()).abs().sum())
#t0 = time.time() #t0 = time.time()
lnZ1, lnZ2= mpsrg(B, C) loss = mpsrg(A, T) # -lnZ
#print ('mpsrg', time.time()- t0) #print ('mpsrg', time.time()- t0)
print (' loss', loss.item())
loss = -lnZ1 + lnZ2
print (' loss', loss.item(), lnZ1.item(), lnZ2.item())
#t0 = time.time() #t0 = time.time()
loss.backward(retain_graph=False) loss.backward(retain_graph=False)
#print ('backward', time.time()- t0) #print ('backward', time.time()- t0)
...@@ -51,7 +50,7 @@ def vmps(T, d, D, no_iter, Nepochs=5): ...@@ -51,7 +50,7 @@ def vmps(T, d, D, no_iter, Nepochs=5):
loss = optimizer.step(closure) loss = optimizer.step(closure)
print (' epoch, free energy', epoch, loss.item()) print (' epoch, free energy', epoch, loss.item())
return -loss, A return loss, A
if __name__=='__main__': if __name__=='__main__':
import time import time
...@@ -67,27 +66,18 @@ if __name__=='__main__': ...@@ -67,27 +66,18 @@ if __name__=='__main__':
args = parser.parse_args() args = parser.parse_args()
device = torch.device("cpu" if args.cuda<0 else "cuda:"+str(args.cuda)) device = torch.device("cpu" if args.cuda<0 else "cuda:"+str(args.cuda))
dtype = torch.float32 if args.float32 else torch.float64 dtype = torch.float32 if args.float32 else torch.float64
def build_tensor(K):
c = torch.sqrt(torch.cosh(K))
s = torch.sqrt(torch.sinh(K))
M = torch.stack([torch.cat([c, s]), torch.cat([c, -s])])
T = torch.einsum('ai,aj,ak,al->ijkl', (M, M, M, M))
return T
#optimization
K = torch.tensor([args.beta], dtype=torch.float64, device=device)
T = build_tensor(K)
loss, A = vmps(T, 2, args.Dcut, args.Niter, args.Nepochs)
#for computing physical quantity #build tensor
K = torch.tensor([args.beta], dtype=torch.float64, device=device, requires_grad=True) K = torch.tensor([args.beta], dtype=torch.float64, device=device, requires_grad=True)
T = build_tensor(K) c = torch.sqrt(torch.cosh(K))
Asymm = (A + A.permute(2, 1, 0))*0.5 s = torch.sqrt(torch.sinh(K))
D, d = Asymm.shape[0], Asymm.shape[1] M = torch.stack([torch.cat([c, s]), torch.cat([c, -s])])
B = torch.einsum('ldr,adcb,icj->lairbj', (Asymm, T, Asymm)).contiguous().view(D**2*d, D**2*d) T = torch.einsum('ai,aj,ak,al->ijkl', (M, M, M, M))
w, _ = torch.symeig(B, eigenvectors=True)
lnZ = torch.log(w.abs().max()) #optimization
_, A = vmps(T.detach(), 2, args.Dcut, args.Niter, args.Nepochs)
#recompute lnZ using optimized A
lnZ = -mpsrg(A.detach(), T)
dlnZ = torch.autograd.grad(lnZ, K, create_graph=True)[0] # En = -d lnZ / d beta dlnZ = torch.autograd.grad(lnZ, K, create_graph=True)[0] # En = -d lnZ / d beta
print (-dlnZ.item()) print (-dlnZ.item())
#second order derivative evaluated in this way seems not correct, no effect of envioment #second order derivative evaluated in this way seems not correct, no effect of envioment
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment