-
Notifications
You must be signed in to change notification settings - Fork 2
/
train.py
80 lines (47 loc) · 1.8 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import Dataset_load
from flair.embeddings import CharLMEmbeddings
from flair.data import Sentence
from flair.embeddings import WordEmbeddings, CharLMEmbeddings
import Corpus
import Evaluation
import NER
import os
import gensim
import numpy as np
from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors
from flair.data import Sentence
from flair.embeddings import StackedEmbeddings
import cv2
SEED = 86
np.random.seed(SEED)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from gensim.models import word2vec
custom_embedding = WordEmbeddings('pathtotoEmbeddings.vec')
# now create the StackedEmbedding object that combines all embeddings
stacked_embeddings =StackedEmbeddings(embeddings=[custom_embedding])# , charlm_embedding_forward,charlm_embedding_backward])
dataset_dict = Dataset_load.load()
corp = Corpus.Corpus(dataset_dict, embeddings_file_path=None,stacked_embeddings = stacked_embeddings)
model_params = {"filter_width": 3,
"embeddings_dropout": True,
"n_filters": [
256
],
"dense_dropout" : True,
"token_embeddings_dim": 300,
"char_embeddings_dim": 50,
"cell_type":'lstm',
"use_batch_norm": True,
"concat_embeddings":True,
"use_crf": True,
"use_char_embeddins":True,
"net_type": 'rnn',
"use_capitalization":False ,
}
net = NER.NER(corp,stacked_embeddings, **model_params)
learning_params = {'dropout_rate': 0.5,
'epochs':200,
'learning_rate': 0.001,# 0.0003
'batch_size':20,
'learning_rate_decay': 0.94}
results = net.fit(**learning_params)