-
Notifications
You must be signed in to change notification settings - Fork 204
/
Copy pathBiLSTM_CRF.py
62 lines (54 loc) · 4.12 KB
/
BiLSTM_CRF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# coding: utf-8
import tensorflow as tf
from tensorflow.contrib import crf
import random
tf.reset_default_graph()
class BiLSTM_CRF(object):
def __init__(self,batch_size,tag_nums,hidden_nums,sentence_len,word_embeddings,device='/gpu:1'):
self.batch_size=batch_size
self.tag_nums = tag_nums
self.hidden_nums = hidden_nums
self.sentence_len = sentence_len
self.word_embeddings = word_embeddings
self.device = device
with tf.device(device):
#网络的变量
word_embeddings = tf.Variable(initial_value=word_embeddings,trainable=True) #参与训练
#输入占位符
self.input_x = tf.placeholder(dtype=tf.int32,shape=[None,self.sentence_len],name='input_word_id')#输入词的id
self.input_y = tf.placeholder(dtype=tf.int32,shape=[None,self.sentence_len],name='input_labels')
self.sequence_lengths=tf.placeholder(dtype=tf.int32,shape=[None],name='sequence_lengths_vector')
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
with tf.name_scope('projection'):
#投影层,先将输入的词投影成相应的词向量
word_id = self.input_x
word_vectors = tf.nn.embedding_lookup(word_embeddings,ids=word_id,name='word_vectors')
#word_vectors = tf.nn.dropout(word_vectors,0.8)
with tf.name_scope('bi-lstm'):
#labels = tf.reshape(input_y,shape=[-1,self.sentence_len],name='labels')
#labels = tf.reshape(input_y,shape=[-1,self.tag_nums],name='labels')
labels = tf.reshape(self.input_y,shape=[self.batch_size,self.sentence_len],name='labels')
fw_lstm_cell =tf.nn.rnn_cell.LSTMCell(self.hidden_nums)
bw_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_nums)
#双向传播
output,_state = tf.nn.bidirectional_dynamic_rnn(fw_lstm_cell,bw_lstm_cell,inputs=word_vectors,sequence_length=self.sequence_lengths,dtype=tf.float32)
fw_output = output[0]#[batch_size,self.sentence_len,self.hidden_nums]
bw_output =output[1]#[batch_size,self.sentence_len,self.hidden_nums]
V1=tf.get_variable('V1',dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer(),shape=[self.hidden_nums,self.hidden_nums])
V2=tf.get_variable('V2',dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer(),shape=[self.hidden_nums,self.hidden_nums])
fw_output = tf.reshape(tf.matmul(tf.reshape(fw_output,[-1,self.hidden_nums],name='Lai') , V1),shape=tf.shape(output[0]))
bw_output = tf.reshape(tf.matmul( tf.reshape(bw_output,[-1,self.hidden_nums],name='Rai') , V2),shape=tf.shape(output[1]))
contact = tf.concat([fw_output,bw_output],-1,name='bi_lstm_concat')#[batch_size,self.sentence_len,2*self.hidden_nums]
contact = tf.nn.dropout(contact,self.dropout_keep_prob)
s=tf.shape(contact)
contact_reshape=tf.reshape(contact,shape=[-1,2*self.hidden_nums],name='contact')
W_lstm=tf.get_variable('W_lstm',dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer(),shape=[2*self.hidden_nums,self.tag_nums],trainable=True)
b_lstm=tf.get_variable('b_lstm',initializer=tf.zeros(shape=[self.tag_nums]))
p=tf.nn.relu(tf.matmul(contact_reshape,W_lstm)+b_lstm)
#logit= tf.reshape(p,shape=[-1,s[1],self.tag_nums],name='omit_matrix')
#logit= tf.reshape(p,shape=[-1,s[1],self.sentence_len],name='omit_matrix')
self.logit= tf.reshape(p,shape=[-1,self.sentence_len,self.tag_nums],name='omit_matrix')
with tf.name_scope("crf") :
log_likelihood,transition_matrix=crf.crf_log_likelihood(self.logit,labels,sequence_lengths=self.sequence_lengths)
self.cost = -tf.reduce_mean(log_likelihood)
self.crf_labels,_=crf.crf_decode(self.logit,transition_matrix,sequence_length=self.sequence_lengths) #返回的第一个值:decode_tags: A [batch_size, max_seq_len]