Advanced Computing Platform for Theoretical Physics

Commit 174d156e authored by Pengfei Zhou's avatar Pengfei Zhou
Browse files

graph classification

parent cd68302b
......@@ -4,9 +4,8 @@ import argparse
parser = argparse.ArgumentParser(description='')
parser.add_argument("-n", type=int, default=30, help="number of nodes")
parser.add_argument("-k", type=int, default=3, help="degree, integer")
parser.add_argument("-c", type=float, default=3.0, help="average degree in gnp")
parser.add_argument("-beta", type=float, default=0.8, help="beta")
parser.add_argument("-gamma", type=float, default=1.0, help="external fields strength")
parser.add_argument('-channel',type=int, default=32,help='graph convolution channel')
parser.add_argument("-maxdim", type=int, default=30, help="maximum dimension of intermediate tensor")
parser.add_argument("-bond", type=int, default=2,help="initial bond dimention of every dimention of tensor ")
parser.add_argument("-choice", type=int,default=1,help="choice to initialize the tensor,1 denote on node,2 denote on edge")
......@@ -14,22 +13,25 @@ parser.add_argument("-seed", type=int, default=1, help="seed")
parser.add_argument("-seed1",type=int,default=2,help='seed for initialize tensors')
parser.add_argument("-seed2", type=int, default=-1, help="seed2")
parser.add_argument("-graph", type=str, default='rrg', help="graph")
parser.add_argument("-Jij", type=str, default='ferro',
choices=['ferro', 'rand', 'randn', 'sk', 'binary'])
parser.add_argument("-which", type=int,default=0,help='choose which graph to calculate freeenergy')
parser.add_argument("-field", type=str, default='zero', choices=['one', 'rand', 'randn'])
parser.add_argument("-node", type=str, default='mps', choices=['raw', 'mps'],
help="node representation, raw or mps")
parser.add_argument("-cuda", type=int, default=-1, help="GPU #")
parser.add_argument("-verbose", type=int, default=-1, help="verbose")
# parser.add_argument("-raw", action='store_true', help="Node type set to 'raw'")
parser.add_argument("-Dmax", type=int, default=32,
help="Maximum physical bond dimension of the tensors. With Dmax<0, contraction will be exact")
parser.add_argument("-chi", type=int, default=32, help="Maximum virtual bond dimension of the mps.")
parser.add_argument("-fvsenum", action='store_true',
help="compute exact solution by enumerating configurations of feedback set")
parser.add_argument("-permutation",action='store_true',help='an isomorphism bijection')
parser.add_argument('-output', action='store_true')
parser.add_argument('-nx_iso',action='store_true')
parser.add_argument('-wl', action='store_true')
parser.add_argument('-dataset', type=str, default='MUTAG',help='name of dataset')
parser.add_argument('-batch_size',type=int,default=32)
parser.add_argument('-fold_idx',type=int,default=0)
parser.add_argument('-filename',type=str,default='out.txt')
parser.add_argument('-disable-cuda',action='store_true',help='Disable CUda')
parser.add_argument('-device',type=int,default=0,help="which gpu device to use")
parser.add_argument('-num_layer',type=int,default=10,help='number of nmf neural network')
parser.add_argument('-lr',type=float,default=0.01)
parser.add_argument('-epoch',type=int,default=200)
args = parser.parse_args()
import math
import numpy as np
import torch
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.model_selection import StratifiedKFold
import dgl
# default collate function
def collate(samples):
# The input `samples` is a list of pairs (graph, label).
graphs, labels = map(list, zip(*samples))
for g in graphs:
# deal with node feats
for key in g.node_attr_schemes().keys():
g.ndata[key] = g.ndata[key].float()
# no edge feats
batched_graph = dgl.batch(graphs)
labels = torch.tensor(labels)
return batched_graph, labels
class GraphDataLoader():
def __init__(self,
dataset,
batch_size,
device,
collate_fn=collate,
seed=0,
shuffle=True,
split_name='fold10',
fold_idx=0,
split_ratio=0.7):
self.shuffle = shuffle
self.seed = seed
self.kwargs = {'pin_memory': True} if 'cuda' in device.type else {}
labels = [l for _, l in dataset]
if split_name == 'fold10':
train_idx, valid_idx = self._split_fold10(
labels, fold_idx, seed, shuffle)
elif split_name == 'rand':
train_idx, valid_idx = self._split_rand(
labels, split_ratio, seed, shuffle)
else:
raise NotImplementedError()
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
self.train_loader = DataLoader(
dataset, sampler=train_sampler,
batch_size=batch_size, collate_fn=collate_fn, **self.kwargs)
self.valid_loader = DataLoader(
dataset, sampler=valid_sampler,
batch_size=batch_size, collate_fn=collate_fn, **self.kwargs)
def train_valid_loader(self):
return self.train_loader, self.valid_loader
def _split_fold10(self, labels, fold_idx=0, seed=0, shuffle=True):
''' 10 flod '''
assert 0 <= fold_idx and fold_idx < 10, print(
"fold_idx must be from 0 to 9.")
skf = StratifiedKFold(n_splits=10, shuffle=shuffle, random_state=seed)
idx_list = []
for idx in skf.split(np.zeros(len(labels)), labels): # split(x, y)
idx_list.append(idx)
train_idx, valid_idx = idx_list[fold_idx]
print(
"train_set : test_set = %d : %d",
len(train_idx), len(valid_idx))
return train_idx, valid_idx
def _split_rand(self, labels, split_ratio=0.7, seed=0, shuffle=True):
num_entries = len(labels)
indices = list(range(num_entries))
np.random.seed(seed)
np.random.shuffle(indices)
split = int(math.floor(split_ratio * num_entries))
train_idx, valid_idx = indices[:split], indices[split:]
print(
"train_set : test_set = %d : %d",
len(train_idx), len(valid_idx))
return train_idx, valid_idx
\ No newline at end of file
from mfnet import NMFnet
import torch
import dgl
import sys
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from dgl.data.gindt import GINDataset
from dataloader import GraphDataLoader, collate
from args import args
import torch.nn.functional as F
def init_mf(n,bond,dim3):
torch.manual_seed(args.seed1)
marg_i=F.softmax(torch.ones(n, bond, dim3), dim=1)
return marg_i
def train(args, net, trainloader, optimizer, criterion, epoch):
net.train()
running_loss = 0
total_iters = len(trainloader)
# setup the offset to avoid the overlap with mouse cursor
#bar = tqdm(range(total_iters), unit='batch', position=2, file=sys.stdout)
#for pos, (graphs, labels) in zip(bar, trainloader):
for graphs,labels in trainloader:
# batch graphs will be shipped to device in forward part of model
labels = labels.to(args.device)
#feat = graphs.ndata.pop('attr').to(args.device)
feat = init_mf(len(graphs),args.bond,args.channel).to(args.device)
#print(graphs)
#graphs.batch_num_nodes()
#batch_n=graphs.batch_num_nodes().to(args.device)
graphs = graphs.to(args.device)
outputs = net(graphs, feat,args.num_layer)
loss = criterion(outputs, labels)
running_loss += loss.item()
# backprop
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
# report
#bar.set_description('epoch-{}'.format(epoch))
#bar.close()
# the final batch will be aligned
running_loss = running_loss / total_iters
return running_loss
def eval_net(args, net, dataloader, criterion):
net.eval()
total = 0
total_loss = 0
total_correct = 0
for data in dataloader:
graphs, labels = data
#batch_n=graphs.batch_num_nodes().to(args.device)
#feat = graphs.ndata.pop('attr').to(args.device)
feat = init_mf(len(graphs),args.bond,args.channel).to(args.device)
graphs = graphs.to(args.device)
labels = labels.to(args.device)
total += len(labels)
outputs = net(graphs, feat, args.num_layer)
_, predicted = torch.max(outputs.data, 1)
total_correct += (predicted == labels.data).sum().item()
loss = criterion(outputs, labels)
# crossentropy(reduce=True) for default
total_loss += loss.item() * len(labels)
loss, acc = 1.0*total_loss / total, 1.0*total_correct / total
net.train()
return loss, acc
def main(args):
# set up seeds, args.seed supported
torch.manual_seed(seed=args.seed)
np.random.seed(seed=args.seed)
is_cuda = not args.disable_cuda and torch.cuda.is_available()
if is_cuda:
args.device = torch.device("cuda:" + str(args.device))
torch.cuda.manual_seed_all(seed=args.seed)
else:
args.device = torch.device("cpu")
dataset = GINDataset(args.dataset, False)
trainloader, validloader = GraphDataLoader(
dataset, batch_size=args.batch_size, device=args.device,
collate_fn=collate, seed=args.seed, shuffle=True,
split_name='fold10', fold_idx=args.fold_idx).train_valid_loader()
# or split_name='rand', split_ratio=0.7
weight=args.beta*torch.exp(torch.randn(args.bond, args.bond, args.channel)).to(args.device)
model = NMFnet(weight,dataset.gclasses).to(args.device)
criterion = nn.CrossEntropyLoss() # defaul reduce is true
optimizer = optim.Adam(model.parameters(), lr=args.lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
for epoch in range(args.epoch):
train(args, model, trainloader, optimizer, criterion, epoch)
scheduler.step()
train_loss, train_acc = eval_net(
args, model, trainloader, criterion)
valid_loss, valid_acc = eval_net(
args, model, validloader, criterion)
if not args.filename == "":
with open(args.filename, 'a') as f:
f.write('%s' % (
args.dataset
))
f.write("\n")
f.write("%f %f %f %f" % (
train_loss,
train_acc,
valid_loss,
valid_acc
))
f.write("\n")
if __name__=="__main__":
main(args)
\ No newline at end of file
import torch
import dgl
import dgl.function as fn
import torch.nn as nn
from dgl import DGLGraph
import torch.nn.functional as F
import networkx as nx
from args import args
import random
import time
import numpy as np
def convert(filename,x):
with open(filename,'r') as f:
l=f.readlines()
f.close
n=ord(l[0][0])-63 #number of vertexs
m=int(n*3/2)
p=int(n*(n-1)/2)
last=p%6
uptri_edges=[]
for i in range(1,n,1):
for j in range(0,i):
uptri_edges.append([j,i])
edges_index=[]
line=l[x].strip()
bitstring=''
for j in range(1,len(line)):
bitstring+=f'{ord(line[j])-63:6b}'
#print(f'{ord(line[j])-63:6b}')
for i in range(len(bitstring)):
if bitstring[i]=='1':
edges_index.append(i)
return n,np.array(uptri_edges)[edges_index]
def init_mf(n,bond,dim3):
torch.manual_seed(args.seed1)
marg_i=F.softmax(torch.rand(n, bond, dim3), dim=1)
return marg_i
#B= A @ A.t() is a symmetric matrix
class NMFnet(nn.Module):
def __init__(self,weight,n_classes):
super(NMFnet,self).__init__()
self.weight = nn.Parameter(weight)#weight is A
self.W=torch.log(torch.einsum('abc,dbc->adc',self.weight,self.weight))
self.classify = nn.Linear(self.W.shape[2], n_classes)
def forward(self, g, feature,k): # feature can be 3-order tensor, convolution channel
g.ndata['h'] = feature
for i in range(k):
g.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
g.ndata['h']=F.softmax(torch.einsum('abc,bdc->adc',g.ndata['h'],self.W),dim=1)
# use free energy as readout funciton
edges=g.all_edges()
g.edata['y']=torch.einsum('abc,bdc->adc',g.nodes[edges[0]].data['h'],self.W)
g.edata['y']=torch.einsum('abc,abc->ac',g.edata['y'],g.nodes[edges[1]].data['h'])
U=-dgl.sum_edges(g,'y')/2.0
g.ndata['h']=torch.einsum('abc,abc->ac',g.ndata['h'],torch.log(g.ndata['h']))
S=-dgl.sum_nodes(g,'h')
with torch.no_grad():
S_=-dgl.mean_nodes(g,'h')
batch_n=S/S_
#batch_n=g.batch_num_nodes()
return self.classify((U-S)/batch_n)
if __name__ =='__main__' :
torch.set_default_dtype(torch.float32)
n=20
beta=1
#graph = nx.random_regular_graph(3, n, seed=1)
n,edges=convert('Generated_graphs.20.03.g6',args.which)
if args.permutation:
randomindex=[i for i in range(n)]
random.shuffle(randomindex)
randomindex=np.array(randomindex)
edges[:,0]=randomindex[edges[:,0]]
edges[:,1]=randomindex[edges[:,1]]
G=nx.Graph()
G.add_nodes_from(np.arange(n))
G.add_edges_from(edges)
g=dgl.DGLGraph(G)
n2,edges2=convert('Generated_graphs.20.03.g6',args.which+1)
G1=nx.Graph()
G1.add_nodes_from(np.arange(n2))
G1.add_edges_from(edges2)
torch.manual_seed(args.seed1)
tensor2=torch.randn(args.bond,args.bond,4,dtype=torch.float32,device='cpu')
#tensor2=torch.triu(tensor2)+torch.triu(tensor2,diagonal=1).t()
mf=NMFnet(torch.exp(beta*tensor2))
feature=init_mf(n,tensor2.shape[0],4)
#fe_bp=mf.BP()
#print("free_energy={:.15g}".format(fe_bp.item()))
fe_nmf=mf(g,feature,10)
#print("free_energy={:.15g}".format(fe_nmf.item()))
print(fe_nmf)
\ No newline at end of file
......@@ -44,7 +44,7 @@ def init_bp(n,E1,bond):
return marg_i,cav_ij
class MeanField():
def __init__(self, graph,W,beta,mydevice):
self.W=W
self.W=W@W.t()
self.conv_crit = 1e-6
self.max_iter = 2*10**3
......@@ -140,11 +140,11 @@ if __name__ =='__main__' :
G1.add_edges_from(edges2)
torch.manual_seed(args.seed1)
tensor2=torch.randn(args.bond,args.bond,dtype=torch.float64,device='cpu')
tensor2=torch.triu(tensor2)+torch.triu(tensor2,diagonal=1).t()
#tensor2=torch.triu(tensor2)+torch.triu(tensor2,diagonal=1).t()
mf=MeanField(G,torch.exp(beta*tensor2),beta,'cpu')
fe_bp=mf.BP()
print("free_energy={:.15g}".format(fe_bp.item()))
fe_nmf=mf.NMF()
fe_nmf=mf.NMF()/n
print("free_energy={:.15g}".format(fe_nmf.item()))
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment