-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrnnAttention.py
155 lines (115 loc) · 6.02 KB
/
rnnAttention.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# -*- coding: utf-8 -*-
"""
"""
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
dtype = torch.float
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, device=torch.device("cpu")):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
dropout=0
self.device = device
#encoder
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True).to(self.device)
#attention model
self.attn = torch.nn.Parameter(torch.randn(hidden_size , 1)).to(self.device) #(20)*1
self.dropout = nn.Dropout(p = dropout)
# NOTE: pasted this forward function from guided epideep
def forward(self, x, h0=None):
# Set initial states
if h0 is None:
h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device))
c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device))
#print("*"*10)
# print('x',x.shape)
# Forward propagate RNN, lstm output shape: output,(h_n,c_n)
out, (hidden, cell) = self.lstm(x, (h0, c0))
# print("out", out.shape)
# attention layer
#print("hidden", hidden[1].shape)
hidden_state = hidden[1].unsqueeze(0)
hidden_state = hidden_state.squeeze(0).unsqueeze(2)
#print("m2", merged_state.shape)
# (batch, seq_len, cell_size) * (batch, cell_size, 1) = (batch, seq_len, 1)
weights = torch.bmm(out, hidden_state) # NOTE: ?
#print("weight1", weights.shape)
weights = torch.nn.functional.softmax(weights.squeeze(2)).unsqueeze(2)
# (batch, cell_size, seq_len) * (batch, seq_len, 1) = (batch, cell_size, 1)
#print("weight2", weights.shape)
out = torch.bmm(torch.transpose(out, 1, 2), weights).squeeze(2)
# print("out", out.shape)
#print("Output/hidden size = ",out[0].size(),hidden[0][0].size(),out.size(),hidden[0].size(),hidden[1].size(),out[:,-1,:].size())
#x.size() : torch.Size([13, 20, 1])
#emd.size() : torch.Size([13, 20])
#print("out", out.shape) #: torch.Size([20, 20])
#print("hidden", hidden.shape)
#hidden[0][0].size(): torch.Size([13, 20])
#out.size() : torch.Size([13, 20, 20])
#hidden[0].size() : torch.Size([2, 13, 20])
#hidden[1].size() : torch.Size([2, 13, 20])
#out[:,-1,:].size() : torch.Size([13, 20])
# Attention model
#attnExpand = self.attn.expand(x.size()[0],self.hidden_size,1) # expand the attn from 20*1 to 13*20*1
#out_attn = torch.bmm(out, attnExpand)# 13*20*20 * 13*20*1, before we only use 13*1*20 from the out, now we use the full 20
#print("self.attn = ", self.attn)
out = out.unsqueeze(2)
# Dropout
out = self.dropout(out)
#print("emd", emd.size(), "out", out.size())
out_new = torch.squeeze(out, 2)
return out_new
class RNNTime(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_internal, emd_size, out_size):
super(RNNTime, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
#encoder
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
#attention model
self.attn = torch.nn.Parameter(torch.randn(hidden_size , 1))#(20)*1
#decoder
self.dropout = nn.Dropout(p = 0)
self.fc = nn.Linear(hidden_size+emd_size, num_internal)
self.fc2 = nn.Linear(num_internal, num_internal)
self.fc3 = nn.Linear(num_internal, num_internal)
self.fc4 = nn.Linear(num_internal, out_size)
self.stmax = nn.Softmax()
self.activation = nn.LeakyReLU()
self.relu = nn.ReLU()
def forward(self, x, emd):
# Set initial states
h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
# Forward propagate RNN, lstm output shape: output,(h_n,c_n)
out, hidden = self.lstm(x, (h0, c0))
#print("Output/hidden size = ",out[0].size(),hidden[0][0].size(),out.size(),hidden[0].size(),hidden[1].size(),out[:,-1,:].size())
#x.size() : torch.Size([13, 20, 1])
#emd.size() : torch.Size([13, 20])
#out[0].size() : torch.Size([20, 20])
#hidden[0][0].size(): torch.Size([13, 20])
#out.size() : torch.Size([13, 20, 20])
#hidden[0].size() : torch.Size([2, 13, 20])
#hidden[1].size() : torch.Size([2, 13, 20])
#out[:,-1,:].size() : torch.Size([13, 20])
# Attention model
attnExpand = self.attn.expand(x.size()[0],self.hidden_size,1) # expand the attn from 20*1 to 13*20*1
out_attn = torch.bmm(out, attnExpand)# 13*20*20 * 13*20*1, before we only use 13*1*20 from the out, now we use the full 20
#print("self.attn = ", self.attn)
# Dropout
out = self.dropout(out_attn)
#print("emd", emd.size(), "out", out.size())
out_new = torch.squeeze(out, 2)
#out_new = torch.squeeze(out) #last dimension is 1 and redundant
#print("emd", emd.size(), "out_new", out_new.size())
out1 = torch.cat((out_new, emd),1) #merge out_new and emd through the dimension 1: [20+13,20]
out2 = self.activation(self.fc(out1))
out3 = self.activation(self.fc2(out2))
out4 = self.activation(self.fc3(out3))
out = self.stmax(self.fc4(out4))
#out = self.activation(self.fc4(out4))
#out = self.fc4(out4)
return out