-
Notifications
You must be signed in to change notification settings - Fork 50
/
Copy pathutils.py
79 lines (57 loc) · 2.05 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from sklearn.utils import shuffle
import pickle
def read_TREC():
data = {}
def read(mode):
x, y = [], []
with open("data/TREC/TREC_" + mode + ".txt", "r", encoding="utf-8") as f:
for line in f:
if line[-1] == "\n":
line = line[:-1]
y.append(line.split()[0].split(":")[0])
x.append(line.split()[1:])
x, y = shuffle(x, y)
if mode == "train":
dev_idx = len(x) // 10
data["dev_x"], data["dev_y"] = x[:dev_idx], y[:dev_idx]
data["train_x"], data["train_y"] = x[dev_idx:], y[dev_idx:]
else:
data["test_x"], data["test_y"] = x, y
read("train")
read("test")
return data
def read_MR():
data = {}
x, y = [], []
with open("data/MR/rt-polarity.pos", "r", encoding="utf-8") as f:
for line in f:
if line[-1] == "\n":
line = line[:-1]
x.append(line.split())
y.append(1)
with open("data/MR/rt-polarity.neg", "r", encoding="utf-8") as f:
for line in f:
if line[-1] == "\n":
line = line[:-1]
x.append(line.split())
y.append(0)
x, y = shuffle(x, y)
dev_idx = len(x) // 10 * 8
test_idx = len(x) // 10 * 9
data["train_x"], data["train_y"] = x[:dev_idx], y[:dev_idx]
data["dev_x"], data["dev_y"] = x[dev_idx:test_idx], y[dev_idx:test_idx]
data["test_x"], data["test_y"] = x[test_idx:], y[test_idx:]
return data
def save_model(model, params):
path = f"saved_models/{params['DATASET']}_{params['MODEL']}_{params['EPOCH']}.pkl"
pickle.dump(model, open(path, "wb"))
print(f"A model is saved successfully as {path}!")
def load_model(params):
path = f"saved_models/{params['DATASET']}_{params['MODEL']}_{params['EPOCH']}.pkl"
try:
model = pickle.load(open(path, "rb"))
print(f"Model in {path} loaded successfully!")
return model
except:
print(f"No available model such as {path}.")
exit()