-
Notifications
You must be signed in to change notification settings - Fork 13
/
hypershot_uncertainty.py
253 lines (199 loc) · 9.25 KB
/
hypershot_uncertainty.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
import json
import os
from functools import reduce
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from neptune.new.types import File
import configs
from data.datamgr import SetDataManager
from io_utils import model_dict, parse_args, setup_neptune
from methods.hypernets.hypernet_kernel import HyperShot
# NOTE: This uncertainty experiment was created on the master branch.
# But still we have to use it on other branches with different implementations of model architectures (and different set of parameters).
# If it is necessary to use this on other branches but differences in code does not allow to merge master you can do the following:
# Checkout those files from master:
# * `hypershot_uncertainty.py`
# * `hypershot_uncertainty.sh`
# * `parse_args.py`
# Then in io_utils.py create a function `create_parser` that simply creates parser and returns it (see how it works on master branch).
# Parsers of different tested architectures may differ so results of mergin io_utils.py might be dangerous and time-consuming.
# NOTE: To run an experiment just copy a file `experiment_template.sh` and customize it.
def train_fs_params(params):
n_query = max(1, int(16 * params.test_n_way / params.train_n_way))
return dict(n_way=params.train_n_way, n_support=params.n_shot, n_query=n_query)
def create_model_instance(params):
return HyperShot(model_dict[params.model], params=params, **train_fs_params(params)).cuda()
def get_image_size(params):
image_size = 224
if params.dataset in ['omniglot', 'cross_char']:
image_size = 28
else:
image_size = 84
return image_size
def load_dataset(params):
file = configs.data_dir['omniglot'] + 'noLatin.json'
if params.dataset == 'cross':
file = configs.data_dir['miniImagenet'] + 'all.json'
elif params.dataset == 'cross_char':
file = configs.data_dir['omniglot'] + 'noLatin.json'
else:
file = configs.data_dir[params.dataset] + 'base.json'
image_size = get_image_size(params)
data_mgr = SetDataManager(image_size, **train_fs_params(params))
return iter(data_mgr.get_data_loader(file, aug=False))
def find_targets_with_non_empty_difference(QY1, QY2):
QY1 = set(QY1.flatten().tolist())
QY2 = set(QY2.flatten().tolist())
diff = QY2.difference(QY1)
if len(diff) == 0:
return None
return next(iter(diff))
def experiment(N):
params = parse_args('train') # We need to parse the same parameters as during training
print(f"Setting checkpoint_dir to {os.environ.get('BASEPATH')}")
params.checkpoint_dir = os.environ.get('BASEPATH')
neptune_run = setup_neptune(params)
print(f"Loading model from {os.environ.get('MODELPATH')}")
model_path = os.environ.get('MODELPATH')
# Load model
model = create_model_instance(params)
tmp = torch.load(model_path)
model.load_state_dict(tmp['state'])
dataset = load_dataset(params)
def take_next():
return next(dataset, (None, None))
def isAnyNone(x, y):
return (x is None) or (y is None)
X = torch.Tensor()
Y = torch.Tensor()
x, y = take_next()
while not isAnyNone(x, y):
Y = torch.cat((Y, y), 0)
X = torch.cat((X, x), 0)
x, y = take_next()
ims = get_image_size(params)
bb = model.n_way*(model.n_support + model.n_query)
bs = bb*ims*ims
bn = int(torch.numel(X)/(bs*(X.size()[2])))
B = torch.reshape(X, (bn, model.n_way, model.n_support + model.n_query, *X.size()[2:]))
Y = torch.reshape(Y, (bn, model.n_way, model.n_support + model.n_query))
# Here is our main support, query pair with targets (classifier will be generated from S1)
zippedDataset = [(b,y) for b,y in zip(B,Y)]
# We get first task (s1,q1) we need only one! and then we will focus on probabilities for only one image!
b, y = zippedDataset[0]
s1, q1 = model.parse_feature(b, is_feature=False)
sy1 = y[:, :model.n_support].cuda()
qy1 = y[:, model.n_support:].cuda()
# s1 = torch.reshape(s1, (1, *s1.size()))
# q1 = torch.reshape(q1, (1, *q1.size()))
# sy1 = torch.reshape(sy1, (1, *sy1.size()))
# qy1 = torch.reshape(qy1, (1, *qy1.size()))
# Now we need to find the other pair that has class such that this class cannot be found in s1
desired_class = None
sy2 = torch.Tensor()
qy2 = torch.Tensor()
for b, y in zippedDataset:
s2, q2 = model.parse_feature(b, is_feature=False)
sy2 = y[:, :model.n_support].cuda()
qy2 = y[:, model.n_support:].cuda()
# s2 = torch.reshape(s2, (1, *s2.size()))
# q2 = torch.reshape(q2, (1, *q2.size()))
# sy2 = torch.reshape(sy2, (1, *sy2.size()))
# qy2 = torch.reshape(qy2, (1, *qy2.size()))
desired_class = find_targets_with_non_empty_difference(sy1, sy2)
if desired_class:
break
else:
continue
print(f"desired_class {desired_class}")
print(sy1.shape)
print(sy2.shape)
print("======")
print(qy1.shape)
print(qy2.shape)
print("======")
#NOTE!! WE NEED TO RESHAPE qy{1,2} to [80] sy{1,2} to [5] and since this will be the output of the classifier for each class
# and we need to track index of desired_element in classifier output
sy1 = sy1.flatten()
sy2 = sy2.flatten()
qy1 = qy1.flatten()
qy2 = qy2.flatten()
# THEN:
# we need to get the exact index of this class (after reshape!)
qy2_index = (qy2 == desired_class).nonzero(as_tuple=False)[0] # of course there might be more than one element of this class
print(f"QY2 index: {qy2_index}")
# for those images from distribution we just pick first element
qy1_index = torch.tensor([0], device='cuda:0')
sy1_index = torch.tensor([0], device='cuda:0')
model.n_query = X[0].size(1) - model.n_support #found that n_query gets changed
model.eval()
# Here we prepare q1 and classifier generated with s1
q1p = torch.clone(q1)
# S1 Q1
R1 = [ [] for _ in range(model.n_way) ]
q1 = q1.reshape(-1, q1.shape[-1])
classifier, _ = model.generate_target_net(s1)
rel = model.build_relations_features(support_feature=s1, feature_to_classify=q1)
for _ in range(N):
o = classifier(rel)[0].flatten()
sample = torch.nn.functional.softmax(o).clone().data.cpu().numpy()
for i in range(model.n_way):
R1[i].append(sample[i])
# in this loop we do a forward pass (above)
# we get tensor [80,5] 80 is number of images, and 5 is number of classes
# in other words each element has 5 class probabilities
# now we focus on measuring probabilities of element with qy1_index (since forward pass was for q1) Probably something like: sample[0,:]
# gather data from N sampling stages and find its expected value and standard deviation
# THEN:
# S1, S1
R2 = [ [] for _ in range(model.n_way) ]
q1p[0] = s1[0]
q1p = q1p.reshape(-1, q1p.shape[-1])
classifier, _ = model.generate_target_net(s1)
rel = model.build_relations_features(support_feature=s1, feature_to_classify=q1p)
for _ in range(N):
o = classifier(rel)[0].flatten()
sample = torch.nn.functional.softmax(o).clone().data.cpu().numpy()
for i in range(model.n_way):
R2[i].append(sample[i])
# do a forward pass for s1 tensor (buld_relation_features for support_feature=s1, feature_to_classify=s1)
# if it will result in wrong dimension there is a workaround
# in tensor q1 we can swap first image with first image from s1 (it will be again sample[0, :] to get probability for each class) (PROBABLY THE BEST SOLUTION SO PLZ GO FOR IT)
# (of course most of the images in tensor still will be from this query set but we just need to focus on probabilities of this one image as previously for q1)
# S1, Q2
R3 = [ [] for _ in range(model.n_way) ]
q2 = q2.reshape(-1, q2.shape[-1])
classifier, _ = model.generate_target_net(s1)
rel = model.build_relations_features(support_feature=s1, feature_to_classify=q2)
for _ in range(N):
o = classifier(rel)[qy2_index].flatten()
print(o.shape)
sample = torch.nn.functional.softmax(o).clone().data.cpu().numpy()
print(sample.shape)
for i in range(model.n_way):
R3[i].append(sample[i])
# finally we want to pass q2 to build_relational_features as feature_to_classify=q2
# and focus on probabilities for qy2_index (those are probabilities of a class that does not exist in support set s1) HERE IS A CHANGE sample[qy2_index, :]
df = pd.DataFrame(columns=['Class', 'Type', 'Activation'])
for i in range(model.n_way):
df1 = pd.DataFrame(R1[i], columns=['Activation'])
df1['Class'] = i+1
df1['Type'] = "Element from query set"
df2 = pd.DataFrame(R2[i], columns=['Activation'])
df2['Class'] = i+1
df2['Type'] = "Element from support set"
df3 = pd.DataFrame(R3[i], columns=['Activation'])
df3['Class'] = i+1
df3['Type'] = "Element out of distribution"
df = df.append(pd.concat([df1, df2, df3]))
df.head()
fig = plt.figure()
sns.boxplot(data=df, x='Class', y='Activation', hue='Type', showfliers = False)
neptune_run[f"Plot"].upload(File.as_image(fig))
plt.close(fig)
if __name__ == '__main__':
experiment(1000)