-
Notifications
You must be signed in to change notification settings - Fork 0
/
CompactBilinearPooling.py
142 lines (108 loc) · 5.43 KB
/
CompactBilinearPooling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
import pytorch_fft.fft.autograd as afft
class CompactBilinearPooling(nn.Module):
"""
Compute compact bilinear pooling over two bottom inputs.
Args:
output_dim: output dimension for compact bilinear pooling.
sum_pool: (Optional) If True, sum the output along height and width
dimensions and return output shape [batch_size, output_dim].
Otherwise return [batch_size, height, width, output_dim].
Default: True.
rand_h_1: (Optional) an 1D numpy array containing indices in interval
`[0, output_dim)`. Automatically generated from `seed_h_1`
if is None.
rand_s_1: (Optional) an 1D numpy array of 1 and -1, having the same shape
as `rand_h_1`. Automatically generated from `seed_s_1` if is
None.
rand_h_2: (Optional) an 1D numpy array containing indices in interval
`[0, output_dim)`. Automatically generated from `seed_h_2`
if is None.
rand_s_2: (Optional) an 1D numpy array of 1 and -1, having the same shape
as `rand_h_2`. Automatically generated from `seed_s_2` if is
None.
"""
def __init__(self, input_dim1, input_dim2, output_dim,
sum_pool=True, cuda=True,
rand_h_1=None, rand_s_1=None, rand_h_2=None, rand_s_2=None):
super(CompactBilinearPooling, self).__init__()
self.input_dim1 = input_dim1
self.input_dim2 = input_dim2
self.output_dim = output_dim
self.sum_pool = sum_pool
if rand_h_1 is None:
np.random.seed(1)
rand_h_1 = np.random.randint(output_dim, size=self.input_dim1)
if rand_s_1 is None:
np.random.seed(3)
rand_s_1 = 2 * np.random.randint(2, size=self.input_dim1) - 1
self.sparse_sketch_matrix1 = Variable(self.generate_sketch_matrix(
rand_h_1, rand_s_1, self.output_dim))
if rand_h_2 is None:
np.random.seed(5)
rand_h_2 = np.random.randint(output_dim, size=self.input_dim2)
if rand_s_2 is None:
np.random.seed(7)
rand_s_2 = 2 * np.random.randint(2, size=self.input_dim2) - 1
self.sparse_sketch_matrix2 = Variable(self.generate_sketch_matrix(
rand_h_2, rand_s_2, self.output_dim))
if cuda:
self.sparse_sketch_matrix1 = self.sparse_sketch_matrix1.cuda()
self.sparse_sketch_matrix2 = self.sparse_sketch_matrix2.cuda()
def forward(self, bottom1, bottom2):
"""
bottom1: 1st input, 4D Tensor of shape [batch_size, input_dim1, height, width].
bottom2: 2nd input, 4D Tensor of shape [batch_size, input_dim2, height, width].
"""
assert bottom1.size(1) == self.input_dim1 and \
bottom2.size(1) == self.input_dim2
batch_size, _, height, width = bottom1.size()
bottom1_flat = bottom1.permute(0, 2, 3, 1).contiguous().view(-1, self.input_dim1)
bottom2_flat = bottom2.permute(0, 2, 3, 1).contiguous().view(-1, self.input_dim2)
sketch_1 = bottom1_flat.mm(self.sparse_sketch_matrix1)
sketch_2 = bottom2_flat.mm(self.sparse_sketch_matrix2)
fft1_real, fft1_imag = afft.Fft()(sketch_1, Variable(torch.zeros(sketch_1.size())).cuda())
fft2_real, fft2_imag = afft.Fft()(sketch_2, Variable(torch.zeros(sketch_2.size())).cuda())
fft_product_real = fft1_real.mul(fft2_real) - fft1_imag.mul(fft2_imag)
fft_product_imag = fft1_real.mul(fft2_imag) + fft1_imag.mul(fft2_real)
cbp_flat = afft.Ifft()(fft_product_real, fft_product_imag)[0]
cbp = cbp_flat.view(batch_size, height, width, self.output_dim)
if self.sum_pool:
cbp = cbp.sum(dim=1).sum(dim=1)
return cbp
@staticmethod
def generate_sketch_matrix(rand_h, rand_s, output_dim):
"""
Return a sparse matrix used for tensor sketch operation in compact bilinear
pooling
Args:
rand_h: an 1D numpy array containing indices in interval `[0, output_dim)`.
rand_s: an 1D numpy array of 1 and -1, having the same shape as `rand_h`.
output_dim: the output dimensions of compact bilinear pooling.
Returns:
a sparse matrix of shape [input_dim, output_dim] for tensor sketch.
"""
# Generate a sparse matrix for tensor count sketch
rand_h = rand_h.astype(np.int64)
rand_s = rand_s.astype(np.float32)
assert(rand_h.ndim == 1 and rand_s.ndim ==
1 and len(rand_h) == len(rand_s))
assert(np.all(rand_h >= 0) and np.all(rand_h < output_dim))
input_dim = len(rand_h)
indices = np.concatenate((np.arange(input_dim)[..., np.newaxis],
rand_h[..., np.newaxis]), axis=1)
indices = torch.from_numpy(indices)
rand_s = torch.from_numpy(rand_s)
sparse_sketch_matrix = torch.sparse.FloatTensor(
indices.t(), rand_s, torch.Size([input_dim, output_dim]))
return sparse_sketch_matrix.to_dense()
if __name__ == '__main__':
bottom1 = Variable(torch.randn(128, 512, 14, 14)).cuda()
bottom2 = Variable(torch.randn(128, 512, 14, 14)).cuda()
layer = CompactBilinearPooling(512, 512, 8000)
layer.cuda()
layer.train()
out = layer(bottom1, bottom2)