-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
113 lines (79 loc) · 3.43 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
from collections import namedtuple
from itertools import product
import h5py
import numpy as np
import sklearn.metrics as metrics
import torch
def get_device():
return torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
def get_correct(y_hat, y, num_classes):
if num_classes == 1:
y_hat = [1 if y_hat[i] >= 0.5 else 0 for i in range(len(y_hat))]
correct = [1 if y_hat[i] == y[i] else 0 for i in range(len(y_hat))]
return np.sum(correct)
else:
return y_hat.argmax(dim=1).eq(y).sum().item()
def flatten_cnn_activations_using_max_pooled(activations, kernel_size, stride=1):
max_pool = torch.nn.MaxPool2d(kernel_size, stride=1)
torch_activation = torch.from_numpy(activations)
max_pool_activation = max_pool(torch_activation)
flatten_activations = max_pool_activation.view(
max_pool_activation.size()[0], -1
).numpy()
return flatten_activations
def flatten_cnn_activations_using_avg_pooled(activations, kernel_size, stride=1):
avg_pool = torch.nn.AvgPool2d(kernel_size, stride=1)
torch_activation = torch.from_numpy(activations)
avg_pool_activation = avg_pool(torch_activation)
flatten_activations = avg_pool_activation.view(
avg_pool_activation.size()[0], -1
).numpy()
return flatten_activations
def flatten_cnn_activations_using_adaptive_avg_pooled(activations, kernel_size, stride=1):
avg_pool = torch.nn.AdaptiveAvgPool2d(output_size=(1, 1))
torch_activation = torch.from_numpy(activations)
avg_pool_activation = avg_pool(torch_activation)
flatten_activations = avg_pool_activation.view(
avg_pool_activation.size()[0], -1
).numpy()
return flatten_activations
def flatten_cnn_activations_using_activations(activations):
return activations.reshape(activations.shape[0], -1)
def cal_accuracy(label, out):
return metrics.accuracy_score(label, out)
def cal_precision(label, out):
return metrics.precision_score(label, out)
def cal_recall(label, out):
return metrics.recall_score(label, out)
def cal_roc_auc(label, out_prob):
return metrics.roc_auc_score(label, out_prob)
def cal_f1_score(label, out, avg="macro"):
return metrics.f1_score(label, out, average=avg)
def cal_completeness_score(num_labels, acc_g, acc_bb):
random_pred_acc = 1 / num_labels
completeness_score = (acc_g - random_pred_acc) / (acc_bb - random_pred_acc)
return completeness_score
def create_dir(path_dict):
try:
os.makedirs(path_dict["path_name"], exist_ok=True)
print(f"{path_dict['path_type']} directory is created successfully at:")
print(path_dict["path_name"])
except OSError as error:
print(f"{path_dict['path_type']} directory at {path_dict['path_name']} can not be created")
def get_runs(params):
"""
Gets the run parameters using cartesian products of the different parameters.
:param params: different parameters like batch size, learning rates
:return: iterable run set
"""
Run = namedtuple("Run", params.keys())
runs = []
for v in product(*params.values()):
runs.append(Run(*v))
return runs
def get_num_correct(y_hat, y):
return y_hat.argmax(dim=1).eq(y).sum().item()
def save_activations(activations_path, activation_file, bb_layer, train_activations):
with h5py.File(os.path.join(activations_path, activation_file), 'w') as f:
f.create_dataset(bb_layer, data=train_activations[bb_layer])