forked from dkarunakaran/entity_recoginition_deep_learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.py
62 lines (47 loc) · 1.57 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
class Config():
# shared global variables to be imported from model also
UNK = "$UNK$"
NUM = "$NUM$"
NONE = "O"
# dataset
filename_dev = "data/coNLL/eng.validate.iob"
filename_test = filename_train = "data/coNLL/eng.train.iob"
max_iter = None # if not None, max number of examples in Dataset
# vocab (created from dataset with build_data.py)
filename_words = "data/words.txt"
filename_tags = "data/tags.txt"
filename_chars = "data/chars.txt"
# embeddings
dim_word = 300
dim_char = 100
#filename_glove = "data/glove.6B/glove.6B.300d.txt"
filename_glove = "data/glove.840B/glove.840B.300d.txt"
# trimmed embeddings (created from glove_filename with build_data.py)
#filename_trimmed = "data/glove.6B.300d.trimmed.npz"
filename_trimmed = "data/glove.840B.300d.trimmed.npz"
use_pretrained = True
# general config
dir_output = "results/test/"
dir_model = dir_output + "model.weights/"
path_log = dir_output + "log.txt"
# embeddings
dim_word = 300
dim_char = 100
nwords = 10
nchars = 100#10
ntags = 10
# training
use_crf = True
train_embeddings = False
nepochs = 40
dropout = 0.5
#batch_size = 20
batch_size = 64
lr_method = "adam"
lr = 0.001
lr_decay = 0.9
clip = -1 # if negative, no clipping
nepoch_no_imprv = 5
# model hyperparameters
hidden_size_char = 100 # lstm on chars
hidden_size_lstm = 300 # lstm on word embeddings