[docs]classCSVDataset(DGLDataset):"""Dataset class that loads and parses graph data from CSV files. This class requires the following additional packages: - pyyaml >= 5.4.1 - pandas >= 1.1.5 - pydantic >= 1.9.0 The parsed graph and feature data will be cached for faster reloading. If the source CSV files are modified, please specify ``force_reload=True`` to re-parse from them. Parameters ---------- data_path : str Directory which contains 'meta.yaml' and CSV files force_reload : bool, optional Whether to reload the dataset. Default: False verbose: bool, optional Whether to print out progress information. Default: True. ndata_parser : dict[str, callable] or callable, optional Callable object which takes in the ``pandas.DataFrame`` object created from CSV file, parses node data and returns a dictionary of parsed data. If given a dictionary, the key is node type and the value is a callable object which is used to parse data of corresponding node type. If given a single callable object, such object is used to parse data of all node type data. Default: None. If None, a default data parser is applied which load data directly and tries to convert list into array. edata_parser : dict[(str, str, str), callable], or callable, optional Callable object which takes in the ``pandas.DataFrame`` object created from CSV file, parses edge data and returns a dictionary of parsed data. If given a dictionary, the key is edge type and the value is a callable object which is used to parse data of corresponding edge type. If given a single callable object, such object is used to parse data of all edge type data. Default: None. If None, a default data parser is applied which load data directly and tries to convert list into array. gdata_parser : callable, optional Callable object which takes in the ``pandas.DataFrame`` object created from CSV file, parses graph data and returns a dictionary of parsed data. Default: None. If None, a default data parser is applied which load data directly and tries to convert list into array. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- graphs : :class:`dgl.DGLGraph` Graphs of the dataset data : dict any available graph-level data such as graph-level feature, labels. Examples -------- Please refer to :ref:`guide-data-pipeline-loadcsv`. """META_YAML_NAME="meta.yaml"def__init__(self,data_path,force_reload=False,verbose=True,ndata_parser=None,edata_parser=None,gdata_parser=None,transform=None,):from.csv_dataset_baseimport(DefaultDataParser,load_yaml_with_sanity_check,)self.graphs=Noneself.data=Noneself.ndata_parser={}ifndata_parserisNoneelsendata_parserself.edata_parser={}ifedata_parserisNoneelseedata_parserself.gdata_parser=gdata_parserself.default_data_parser=DefaultDataParser()meta_yaml_path=os.path.join(data_path,CSVDataset.META_YAML_NAME)ifnotos.path.exists(meta_yaml_path):raiseDGLError("'{}' cannot be found under {}.".format(CSVDataset.META_YAML_NAME,data_path))self.meta_yaml=load_yaml_with_sanity_check(meta_yaml_path)ds_name=self.meta_yaml.dataset_namesuper().__init__(ds_name,raw_dir=os.path.dirname(meta_yaml_path),force_reload=force_reload,verbose=verbose,transform=transform,)defprocess(self):"""Parse node/edge data from CSV files and construct DGL.Graphs"""from.csv_dataset_baseimport(DGLGraphConstructor,EdgeData,GraphData,NodeData,)meta_yaml=self.meta_yamlbase_dir=self.raw_dirnode_data=[]formeta_nodeinmeta_yaml.node_data:ifmeta_nodeisNone:continuentype=meta_node.ntypedata_parser=(self.ndata_parserifcallable(self.ndata_parser)elseself.ndata_parser.get(ntype,self.default_data_parser))ndata=NodeData.load_from_csv(meta_node,base_dir=base_dir,separator=meta_yaml.separator,data_parser=data_parser,)node_data.append(ndata)edge_data=[]formeta_edgeinmeta_yaml.edge_data:ifmeta_edgeisNone:continueetype=tuple(meta_edge.etype)data_parser=(self.edata_parserifcallable(self.edata_parser)elseself.edata_parser.get(etype,self.default_data_parser))edata=EdgeData.load_from_csv(meta_edge,base_dir=base_dir,separator=meta_yaml.separator,data_parser=data_parser,)edge_data.append(edata)graph_data=Noneifmeta_yaml.graph_dataisnotNone:meta_graph=meta_yaml.graph_datadata_parser=(self.default_data_parserifself.gdata_parserisNoneelseself.gdata_parser)graph_data=GraphData.load_from_csv(meta_graph,base_dir=base_dir,separator=meta_yaml.separator,data_parser=data_parser,)# construct graphsself.graphs,self.data=DGLGraphConstructor.construct_graphs(node_data,edge_data,graph_data)iflen(self.data)==1:self.labels=list(self.data.values())[0]defhas_cache(self):graph_path=os.path.join(self.save_path,self.name+".bin")ifos.path.exists(graph_path):returnTruereturnFalsedefsave(self):ifself.graphsisNone:raiseDGLError("No graphs available in dataset")graph_path=os.path.join(self.save_path,self.name+".bin")save_graphs(graph_path,self.graphs,labels=self.data)defload(self):graph_path=os.path.join(self.save_path,self.name+".bin")self.graphs,self.data=load_graphs(graph_path)iflen(self.data)==1:self.labels=list(self.data.values())[0]