-
Notifications
You must be signed in to change notification settings - Fork 78
/
magcl_eval.py
52 lines (38 loc) · 1.5 KB
/
magcl_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import numpy as np
import functools
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import normalize, OneHotEncoder
import tensorlayerx as tlx
def prob_to_one_hot(y_pred):
ret = np.zeros(y_pred.shape, np.bool8)
indices = np.argmax(y_pred, axis=1)
for i in range(y_pred.shape[0]):
ret[i][indices[i]] = True
return ret
def evaluate(embeddings, y, train_mask, test_mask, split='random', ratio=0.1):
X = embeddings
Y = tlx.convert_to_numpy(y)
Y = Y.reshape(-1, 1)
onehot_encoder = OneHotEncoder(categories='auto').fit(Y)
Y = onehot_encoder.transform(Y).toarray().astype(np.bool8)
X = normalize(X, norm='l2')
if split == 'random':
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=1 - ratio)
elif split == 'public':
X_train = X[train_mask]
X_test = X[test_mask]
y_train = Y[train_mask]
y_test = Y[test_mask]
logreg = LogisticRegression(solver='liblinear')
c = 2.0 ** np.arange(-10, 10)
clf = GridSearchCV(estimator=OneVsRestClassifier(logreg),
param_grid=dict(estimator__C=c), n_jobs=8, cv=5,
verbose=0)
clf.fit(X_train, y_train)
y_pred = clf.predict_proba(X_test)
y_pred = prob_to_one_hot(y_pred)
acc = accuracy_score(y_test, y_pred)
return acc