importosimportpickleimportnumpyasnpfromscipy.spatial.distanceimportcdistfromtqdm.autoimporttqdmfrom..importbackendasFfrom..convertimportgraphasdgl_graphfrom.dgl_datasetimportDGLDatasetfrom.utilsimportdownload,extract_archive,load_graphs,save_graphs,Subsetdefsigma(dists,kth=8):num_nodes=dists.shape[0]# Compute sigma and reshape.ifkth>num_nodes:# Handling for graphs with num_nodes less than kth.sigma=np.array([1]*num_nodes).reshape(num_nodes,1)else:# Get k-nearest neighbors for each node.knns=np.partition(dists,kth,axis=-1)[:,:kth+1]sigma=knns.sum(axis=1).reshape((knns.shape[0],1))/kthreturnsigma+1e-8defcompute_adjacency_matrix_images(coord,feat,use_feat=True):coord=coord.reshape(-1,2)# Compute coordinate distance.c_dist=cdist(coord,coord)ifuse_feat:# Compute feature distance.f_dist=cdist(feat,feat)# Compute adjacency.A=np.exp(-((c_dist/sigma(c_dist))**2)-(f_dist/sigma(f_dist))**2)else:A=np.exp(-((c_dist/sigma(c_dist))**2))# Convert to symmetric matrix.A=0.5*(A+A.T)A[np.diag_indices_from(A)]=0returnAdefcompute_edges_list(A,kth=9):# Get k-similar neighbor indices for each node.num_nodes=A.shape[0]new_kth=num_nodes-kthifnum_nodes>kth:knns=np.argpartition(A,new_kth-1,axis=-1)[:,new_kth:-1]knn_values=np.partition(A,new_kth-1,axis=-1)[:,new_kth:-1]else:# Handling for graphs with less than kth nodes.# In such cases, the resulting graph will be fully connected.knns=np.tile(np.arange(num_nodes),num_nodes).reshape(num_nodes,num_nodes)knn_values=A# Removing self loop.ifnum_nodes!=1:knn_values=A[knns!=np.arange(num_nodes)[:,None]].reshape(num_nodes,-1)knns=knns[knns!=np.arange(num_nodes)[:,None]].reshape(num_nodes,-1)returnknns,knn_valuesclassSuperPixelDataset(DGLDataset):def__init__(self,raw_dir=None,name="MNIST",split="train",use_feature=False,force_reload=False,verbose=False,transform=None,):assertsplitin["train","test"],"split not valid."assertnamein["MNIST","CIFAR10"],"name not valid."self.use_feature=use_featureself.split=splitself._dataset_name=nameself.graphs=[]self.labels=[]super().__init__(name="Superpixel",raw_dir=raw_dir,url=""" https://www.dropbox.com/s/y2qwa77a0fxem47/superpixels.zip?dl=1 """,force_reload=force_reload,verbose=verbose,transform=transform,)@propertydefimg_size(self):r"""Size of dataset image."""ifself._dataset_name=="MNIST":return28return32@propertydefsave_path(self):r"""Directory to save the processed dataset."""returnos.path.join(self.raw_path,"processed")@propertydefraw_data_path(self):r"""Path to save the raw dataset file."""returnos.path.join(self.raw_path,"superpixels.zip")@propertydefgraph_path(self):r"""Path to save the processed dataset file."""ifself.use_feature:returnos.path.join(self.save_path,f"use_feat_{self._dataset_name}_{self.split}.pkl",)returnos.path.join(self.save_path,f"{self._dataset_name}_{self.split}.pkl")defdownload(self):path=download(self.url,path=self.raw_data_path)extract_archive(path,target_dir=self.raw_path,overwrite=True)defprocess(self):ifself._dataset_name=="MNIST":plk_file="mnist_75sp"elifself._dataset_name=="CIFAR10":plk_file="cifar10_150sp"withopen(os.path.join(self.raw_path,"superpixels",f"{plk_file}_{self.split}.pkl"),"rb",)asf:self.labels,self.sp_data=pickle.load(f)self.labels=F.tensor(self.labels)self.Adj_matrices=[]self.node_features=[]self.edges_lists=[]self.edge_features=[]forindex,sampleinenumerate(tqdm(self.sp_data,desc=f"Processing {self.split} dataset")):mean_px,coord=sample[:2]coord=coord/self.img_sizeifself.use_feature:A=compute_adjacency_matrix_images(coord,mean_px)# using super-pixel locations + featureselse:A=compute_adjacency_matrix_images(coord,mean_px,False)# using only super-pixel locationsedges_list,edge_values_list=compute_edges_list(A)N_nodes=A.shape[0]mean_px=mean_px.reshape(N_nodes,-1)coord=coord.reshape(N_nodes,2)x=np.concatenate((mean_px,coord),axis=1)edge_values_list=edge_values_list.reshape(-1)self.node_features.append(x)self.edge_features.append(edge_values_list)self.Adj_matrices.append(A)self.edges_lists.append(edges_list)forindexintqdm(range(len(self.sp_data)),desc=f"Dump {self.split} dataset"):N=self.node_features[index].shape[0]src_nodes=[]dst_nodes=[]forsrc,dstsinenumerate(self.edges_lists[index]):# handling for 1 node where the self loop would be the only edgeifN==1:src_nodes.append(src)dst_nodes.append(dsts)else:dsts=dsts[dsts!=src]srcs=[src]*len(dsts)src_nodes.extend(srcs)dst_nodes.extend(dsts)src_nodes=F.tensor(src_nodes)dst_nodes=F.tensor(dst_nodes)g=dgl_graph((src_nodes,dst_nodes),num_nodes=N)g.ndata["feat"]=F.zerocopy_from_numpy(self.node_features[index]).to(F.float32)g.edata["feat"]=(F.zerocopy_from_numpy(self.edge_features[index]).to(F.float32).unsqueeze(1))self.graphs.append(g)defload(self):self.graphs,label_dict=load_graphs(self.graph_path)self.labels=label_dict["labels"]defsave(self):save_graphs(self.graph_path,self.graphs,labels={"labels":self.labels})defhas_cache(self):returnos.path.exists(self.graph_path)def__len__(self):returnlen(self.graphs)def__getitem__(self,idx):"""Get the idx-th sample. Parameters --------- idx : int or tensor The sample index. 1-D tensor as `idx` is allowed when transform is None. Returns ------- (:class:`dgl.DGLGraph`, Tensor) Graph with node feature stored in ``feat`` field and its label. or :class:`dgl.data.utils.Subset` Subset of the dataset at specified indices """ifF.is_tensor(idx)andidx.dim()==1:ifself._transformisNone:returnSubset(self,idx.cpu())raiseValueError("Tensor idx not supported when transform is not None.")ifself._transformisNone:returnself.graphs[idx],self.labels[idx]returnself._transform(self.graphs[idx]),self.labels[idx]
[docs]classMNISTSuperPixelDataset(SuperPixelDataset):r"""MNIST superpixel dataset for the graph classification task. DGL dataset of MNIST and CIFAR10 in the benchmark-gnn which contains graphs converted fromt the original MINST and CIFAR10 images. Reference `<http://arxiv.org/abs/2003.00982>`_ Statistics: - Train examples: 60,000 - Test examples: 10,000 - Size of dataset images: 28 Parameters ---------- raw_dir : str Directory to store all the downloaded raw datasets. Default: "~/.dgl/". split : str Should be chosen from ["train", "test"] Default: "train". use_feature: bool - True: Adj matrix defined from super-pixel locations + features - False: Adj matrix defined from super-pixel locations (only) Default: False. force_reload : bool Whether to reload the dataset. Default: False. verbose : bool Whether to print out progress information. Default: False. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Examples --------- >>> from dgl.data import MNISTSuperPixelDataset >>> # MNIST dataset >>> train_dataset = MNISTSuperPixelDataset(split="train") >>> len(train_dataset) 60000 >>> graph, label = train_dataset[0] >>> graph Graph(num_nodes=71, num_edges=568, ndata_schemes={'feat': Scheme(shape=(3,), dtype=torch.float32)} edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}) >>> # support tensor to be index when transform is None >>> # see details in __getitem__ function >>> import torch >>> idx = torch.tensor([0, 1, 2]) >>> train_dataset_subset = train_dataset[idx] >>> train_dataset_subset[0] Graph(num_nodes=71, num_edges=568, ndata_schemes={'feat': Scheme(shape=(3,), dtype=torch.float32)} edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}) """def__init__(self,raw_dir=None,split="train",use_feature=False,force_reload=False,verbose=False,transform=None,):super().__init__(raw_dir=raw_dir,name="MNIST",split=split,use_feature=use_feature,force_reload=force_reload,verbose=verbose,transform=transform,)
[docs]classCIFAR10SuperPixelDataset(SuperPixelDataset):r"""CIFAR10 superpixel dataset for the graph classification task. DGL dataset of CIFAR10 in the benchmark-gnn which contains graphs converted fromt the original CIFAR10 images. Reference `<http://arxiv.org/abs/2003.00982>`_ Statistics: - Train examples: 50,000 - Test examples: 10,000 - Size of dataset images: 32 Parameters ---------- raw_dir : str Directory to store all the downloaded raw datasets. Default: "~/.dgl/". split : str Should be chosen from ["train", "test"] Default: "train". use_feature: bool - True: Adj matrix defined from super-pixel locations + features - False: Adj matrix defined from super-pixel locations (only) Default: False. force_reload : bool Whether to reload the dataset. Default: False. verbose : bool Whether to print out progress information. Default: False. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Examples --------- >>> from dgl.data import CIFAR10SuperPixelDataset >>> # CIFAR10 dataset >>> train_dataset = CIFAR10SuperPixelDataset(split="train") >>> len(train_dataset) 50000 >>> graph, label = train_dataset[0] >>> graph Graph(num_nodes=123, num_edges=984, ndata_schemes={'feat': Scheme(shape=(5,), dtype=torch.float32)} edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), >>> # support tensor to be index when transform is None >>> # see details in __getitem__ function >>> import torch >>> idx = torch.tensor([0, 1, 2]) >>> train_dataset_subset = train_dataset[idx] >>> train_dataset_subset[0] Graph(num_nodes=123, num_edges=984, ndata_schemes={'feat': Scheme(shape=(5,), dtype=torch.float32)} edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), """def__init__(self,raw_dir=None,split="train",use_feature=False,force_reload=False,verbose=False,transform=None,):super().__init__(raw_dir=raw_dir,name="CIFAR10",split=split,use_feature=use_feature,force_reload=force_reload,verbose=verbose,transform=transform,)