-
Notifications
You must be signed in to change notification settings - Fork 2
/
train.py
89 lines (65 loc) · 2.22 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
import numpy as np
import nltk
import matplotlib.pyplot as plt
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,r2_score
from sklearn import metrics
import joblib
import string
import pickle
def preprocesses():
data = np.load("./data/700.npy",allow_pickle=True)
new_data = []
fea = data[0]
target = data[1]
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()
cv = CountVectorizer()
stopWords = set(stopwords.words('english'))
for i,f in enumerate(fea):
sent = nltk.sent_tokenize(f)
for s in sent:
panc = [char for char in s if char not in string.punctuation]
s = ''.join(panc)
words = nltk.word_tokenize(s)
s = [w for w in words if w not in stopWords ]
s = ' '.join(s)
new_data.append([s,target[i]])
new_data = np.array(new_data)
features = cv.fit_transform(new_data[:,0])
target = np.array([tar for tar in new_data[:,1]])
return new_data,features,target
def train():
_,x,y = preprocesses()
x_train,x_test,y_train,y_test = train_test_split(x,y)
network = MLPRegressor(verbose=True,max_iter=500,
learning_rate='constant',learning_rate_init=0.01)
network.fit(x_train,y_train)
#save model
"""
file_name = "neural_model_v2.pkl"
with open(file_name, 'wb') as file:
pickle.dump(network, file) """
predict = network.predict(x_test)
<<<<<<< HEAD
y = network.loss_curve_
x = [i for i in range(len(y))]
plt.plot(x,y,color="red")
plt.show()
=======
#print(r2_score(y_test,predict,multioutput="raw_values"))
print(network.score(x_test, y_test))
>>>>>>> 19eab487e2973af40eac7c66d7be2afa5ca1e3c7
print(r2_score(y_test,predict,multioutput="raw_values"))
def main():
train()
if __name__ == "__main__":
main()