forked from GuanyuCui/BiSPER
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_lambda.py
142 lines (109 loc) · 3.23 KB
/
get_lambda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import struct
import time
from os import path as osp
import numpy as np
from scipy.sparse import csr_matrix, diags
from scipy.sparse.linalg import eigs, eigsh
import sys
def timer(func):
def wrapper(*args, **kwargs):
print(f'Function {func.__name__} starts.')
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
elapsed_time = end_time - start_time
print(f'Done. It takes {elapsed_time : .3f} seconds.\n')
return result
return wrapper
@timer
def from_txt(file_name : str):
f = open(file = file_name, mode = 'r')
# The maximum node_id used.
max_node_id = 0
# Read edges and update adjacency_list and degree_list.
lines = f.readlines()
row_ind = []
col_ind = []
for line_buffer in lines:
# Skip the comments.
if line_buffer[0] == '#':
continue
# Line format: <from_node> <to_node>
nodes = line_buffer.split()
# Read node ids.
from_node = int(nodes[0])
to_node = int(nodes[1])
max_node_id = max(max_node_id, from_node)
max_node_id = max(max_node_id, to_node)
row_ind.append(from_node)
col_ind.append(to_node)
row_ind.append(to_node)
col_ind.append(from_node)
f.close()
row_ind = np.array(row_ind)
col_ind = np.array(col_ind)
data = np.ones_like(row_ind)
adj_sparse = csr_matrix((data, (row_ind, col_ind)))
return adj_sparse
@timer
def from_bin(file_name : str):
f = open(file = file_name, mode = 'rb')
# Read whole size.
all_size = struct.unpack('I', f.read(struct.calcsize('I')))[0]
# Read all data into raw_data.
raw_data = struct.unpack(f'{all_size}I', f.read(struct.calcsize(f'{all_size}I')))
pos = 1
row_ind = []
col_ind = []
# Read vectors one by one.
for i in range(raw_data[0]):
# Read vector size.
num_neighbors = raw_data[pos]
pos += 1
for j in range(num_neighbors):
row_ind.append(i)
col_ind.append(raw_data[pos + j])
pos += num_neighbors
row_ind = np.array(row_ind)
col_ind = np.array(col_ind)
data = np.ones_like(row_ind)
adj_sparse = csr_matrix((data, (row_ind, col_ind)))
return adj_sparse
@timer
def get_P_lambda(A : csr_matrix):
degree_vector = np.asarray(A.sum(axis = 1)).flatten()
D_inv = diags(1.0 / degree_vector, format = 'csr')
P = A @ D_inv
w, v = eigs(P, k = 2, which = 'LM')
return np.abs(w[1])
@timer
def get_L_lambda_2(A : csr_matrix):
D = diags(np.asarray(A.sum(axis = 1)).flatten(), format = 'csr')
L = D - A
w, v = eigsh(L, k = 2, which = 'SA')
return np.abs(w[1])
@timer
def get_L_lambda_n(A : csr_matrix):
D = diags(np.asarray(A.sum(axis = 1)).flatten(), format = 'csr')
L = D - A
w, v = eigsh(L, k = 1, which = 'LA')
return np.abs(w[0])
if __name__ == '__main__':
arguments = sys.argv
if len(arguments) < 2:
print('Lack arguments!')
raise
dataset_name = arguments[1]
compressed_sorted_bin_path = './datasets/' + dataset_name + '-compressed_sorted.bin'
lambda_path = './datasets/' + dataset_name + '.lambda'
if osp.exists(compressed_sorted_bin_path):
adj_sparse = from_bin(compressed_sorted_bin_path)
else:
print('No graph file, please run main.cpp first to get graph in compressed binary format.')
raise
# A = get_largest_connected_component(adj_sparse = adj_sparse)
A = adj_sparse
lam = get_P_lambda(A)
print('lambda = ', lam)
with open(lambda_path, 'w') as f:
f.write(str(lam))