-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
111 lines (102 loc) · 5.09 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Copyright (C) 2017 Pierpaolo Basile, Pierluigi Cassotti
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
from keras.models import Model as KerasModel
from keras.layers import Input
from keras.layers import Embedding
from keras.layers import concatenate
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers.convolutional import Convolution1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.wrappers import TimeDistributed
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import ChainCRF
from keras.layers.wrappers import Bidirectional
from keras.optimizers import Adadelta
from keras.optimizers import Adagrad
from keras.optimizers import SGD
from keras.initializers import RandomUniform
from keras.metrics import sparse_categorical_accuracy
from math import sqrt
class Model(object):
EMBEDDING_WORD_DIM= 300
EMBEDDING_CHAR_DIM= 30
EMBEDDING_FEATURE_DIM= 40
N_FILTERS=30
window=3
embedding_char= True
features=False
lstm_size=200
def themodel(embedding_weights, dictonary_size, MAX_SEQUENCE_LENGTH,MAX_CHARACTER_LENGTH, alfabeth_size, feature_size, tags):
word_input= Input((MAX_SEQUENCE_LENGTH,))
embed_out=Embedding(dictonary_size+1,
Model.EMBEDDING_WORD_DIM,
weights=[embedding_weights],
input_length=MAX_SEQUENCE_LENGTH, name='word_embedding')(word_input)
word=TimeDistributed(Flatten())(embed_out)
conc_list=[]
conc_list.append(word)
if Model.embedding_char:
character_input=Input((MAX_SEQUENCE_LENGTH,MAX_CHARACTER_LENGTH,))
embed_char_out=TimeDistributed(Embedding(alfabeth_size+1,Model.EMBEDDING_CHAR_DIM,
embeddings_initializer=RandomUniform(-sqrt(3/Model.EMBEDDING_CHAR_DIM),sqrt(3/Model.EMBEDDING_CHAR_DIM))), name='char_embedding')(character_input)
dropout= Dropout(0.5)(embed_char_out)
conv1d_out= TimeDistributed(Convolution1D(kernel_size=Model.window, filters=Model.N_FILTERS, padding='same',activation='tanh', strides=1))(dropout)
maxpool_out=TimeDistributed(MaxPooling1D(MAX_CHARACTER_LENGTH))(conv1d_out)
char= TimeDistributed(Flatten())(maxpool_out)
conc_list.append(char)
if Model.features:
feature_input=Input((MAX_SEQUENCE_LENGTH,))
featu= Embedding(feature_size, Model.EMBEDDING_FEATURE_DIM, input_length=MAX_SEQUENCE_LENGTH, name='feature_embedding')(feature_input)
conc_list.append(featu)
if Model.embedding_char or Model.features:
themodel= concatenate(conc_list)
else:
themodel=embed_out
themodel= Dropout(0.5)(themodel)
themodel= Bidirectional(LSTM(Model.lstm_size,return_sequences=True))(themodel)
themodel= Dropout(0.5)(themodel)
themodel= TimeDistributed(Dense(tags))(themodel)
crf=ChainCRF()
output= crf(themodel)
input_list=[]
input_list.append(word_input)
if Model.embedding_char:
input_list.append(character_input)
if Model.features:
input_list.append(feature_input)
model= KerasModel(inputs=input_list,outputs=output)
return crf,model
def __init__(self, features, feature_dim,embed_char,grad_clipping,char_dim,filters,lstm_size,window,learning_alghoritm, learning_rate, decay,embedding_weights, dictonary_size, MAX_SEQUENCE_LENGTH,MAX_CHARACTER_LENGTH, alfabeth_size, feature_size, tags):
Model.EMBEDDING_WORD_DIM=embedding_weights.shape[1]
Model.EMBEDDING_CHAR_DIM=char_dim
Model.EMBEDDING_FEATURE_DIM= feature_dim
Model.N_FILTERS=filters
Model.lstm_size=lstm_size
Model.window=window
Model.embedding_char=embed_char
Model.features=features
crf, self.model=Model.themodel(embedding_weights, dictonary_size, MAX_SEQUENCE_LENGTH,MAX_CHARACTER_LENGTH, alfabeth_size,feature_size, tags)
# , metrics=[sparse_categorical_accuracy]
optimizer=None
if learning_alghoritm=='Adadelta':
optimizer=Adadelta(clipvalue=grad_clipping)
elif learning_alghoritm=='Adagrad':
optimizer=Adagrad(clipvalue=grad_clipping)
else:
optimizer=SGD(lr=learning_rate,decay=decay, momentum=0.9, clipvalue=grad_clipping)
self.model.compile(loss=crf.sparse_loss, optimizer=optimizer,metrics=[sparse_categorical_accuracy])