forked from DSE-MSU/eigenpooling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_data.py
110 lines (99 loc) · 3.48 KB
/
load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import networkx as nx
import numpy as np
import scipy as sc
import os
import re
def read_graphfile(datadir, dataname, max_nodes=None):
''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
graph index starts with 1 in file
Returns:
List of networkx objects with graph and node labels
'''
prefix = os.path.join(datadir, dataname, dataname)
filename_graph_indic = prefix + '_graph_indicator.txt'
# index of graphs that a given node belongs to
graph_indic={}
with open(filename_graph_indic) as f:
i=1
for line in f:
line=line.strip("\n")
graph_indic[i]=int(line)
i+=1
filename_nodes=prefix + '_node_labels.txt'
node_labels=[]
try:
with open(filename_nodes) as f:
for line in f:
line=line.strip("\n")
node_labels+=[int(line) - 1]
num_unique_node_labels = max(node_labels) + 1
except IOError:
print('No node labels')
filename_node_attrs=prefix + '_node_attributes.txt'
node_attrs=[]
try:
with open(filename_node_attrs) as f:
for line in f:
line = line.strip("\s\n")
attrs = [float(attr) for attr in re.split("[,\s]+", line) if not attr == '']
node_attrs.append(np.array(attrs))
except IOError:
print('No node attributes')
label_has_zero = False
filename_graphs=prefix + '_graph_labels.txt'
graph_labels=[]
with open(filename_graphs) as f:
for line in f:
line=line.strip("\n")
val = int(line)
if val == 0:
label_has_zero = True
graph_labels.append(val - 1)
graph_labels = np.array(graph_labels)
if label_has_zero:
graph_labels += 1
filename_adj=prefix + '_A.txt'
adj_list={i:[] for i in range(1,len(graph_labels)+1)}
index_graph={i:[] for i in range(1,len(graph_labels)+1)}
num_edges = 0
with open(filename_adj) as f:
for line in f:
line=line.strip("\n").split(",")
e0,e1=(int(line[0].strip(" ")),int(line[1].strip(" ")))
adj_list[graph_indic[e0]].append((e0,e1))
index_graph[graph_indic[e0]]+=[e0,e1]
num_edges += 1
for k in index_graph.keys():
index_graph[k]=[u-1 for u in set(index_graph[k])]
graphs=[]
for i in range(1,1+len(adj_list)):
# indexed from 1 here
G=nx.from_edgelist(adj_list[i])
if max_nodes is not None and G.number_of_nodes() > max_nodes:
continue
# add features and labels
G.graph['label'] = graph_labels[i-1]
for u in G.nodes():
if len(node_labels) > 0:
node_label_one_hot = [0] * num_unique_node_labels
node_label = node_labels[u-1]
node_label_one_hot[node_label] = 1
G.node[u]['label'] = node_label_one_hot
if len(node_attrs) > 0:
G.node[u]['feat'] = node_attrs[u-1]
if len(node_attrs) > 0:
G.graph['feat_dim'] = node_attrs[0].shape[0]
# relabeling
mapping={}
it=0
if float(nx.__version__)<2.0:
for n in G.nodes():
mapping[n]=it
it+=1
else:
for n in G.nodes:
mapping[n]=it
it+=1
# indexed from 0
graphs.append(nx.relabel_nodes(G, mapping))
return graphs