-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis_utils.py
227 lines (202 loc) · 7.02 KB
/
analysis_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import math
import numpy as np
import os
import json
import itertools
from argparse import Namespace
import main
MAIN_RESULTS_HEADER = """
|Dataset|Method|Micro-avg AUC|Macro-avg AUC|Min AUC|Minority AUC|Accuracy|
|---|---|---|---|---|---|---|
"""
DEVIATION_HEADER = """
|Dataset|Method|Micro-avg AUC|Macro-avg AUC|Min AUC|Minority AUC|
|---|---|---|---|---|---|
"""
IDENTIFIABILITY_HEADER = """
|Target|Group|Adult|LSAC|COMPAS|
|---|---|---|---|---|
"""
IDENTIFIABILITY_IMAGE_HEADER = """
|Adversary strength|Group|EMNIST 35|EMNIST 10|
|---|---|---|---|
"""
TF_HEADER = """
|Dataset|Method|Micro-avg AUC|Minority AUC|Accuracy|
|---|---|---|---|---|
"""
MAIN_RESULTS_KEYS = ['micro_avg_auc', 'macro_avg_auc', 'min_auc', 'minority_auc', 'accuracy']
DEVIATION_KEYS = ['micro_avg_auc', 'macro_avg_auc', 'min_auc', 'minority_auc']
TF_KEYS = ['micro_avg_auc', 'minority_auc', 'accuracy']
def get_our_path(base_path, model, dataset, seed_run_version=0):
path = os.path.join(base_path, dataset, model, f'seed_run_version_{seed_run_version}', 'mean_std.json')
return path
def get_their_path(base_path, model, dataset):
path = os.path.join(base_path, f"{model}_{dataset}.json")
return path
def get_tf_path(base_path, model, dataset):
if model == "IPW(S)":
model = "IPW"
return os.path.join(base_path, f'{model}_{dataset}', 'mean_std.json')
def subtract(dict1, dict2):
if isinstance(dict1, dict):
assert dict1.keys() == dict2.keys()
return {k: subtract(dict1[k], dict2[k]) for k in dict1}
else:
return dict1 - dict2
def add(dict1, dict2):
if isinstance(dict1, dict):
assert dict1.keys() == dict2.keys()
return {k: add(dict1[k], dict2[k]) for k in dict1}
else:
return dict1 + dict2
def valmap(dictionary, f):
if isinstance(dictionary, dict):
return {k: valmap(v, f) for k, v in dictionary.items()}
else:
return f(dictionary)
def square(dictionary):
return valmap(dictionary, lambda x: x**2)
def div(dict1, dict2):
if isinstance(dict1, dict):
assert dict1.keys() == dict2.keys()
return {k: div(dict1[k], dict2[k]) for k in dict1}
else:
return dict1 / dict2
def load_result_dict(base_path, datasets, models, path_func):
results = {}
for dataset, model in itertools.product(datasets, models):
path = path_func(base_path, model, dataset)
try:
with open(path) as f:
results[(dataset, model)] = json.load(f)
except FileNotFoundError:
print(f"Didn't find results for {dataset}, {model}, skipping")
return results
def is_max(result_dict, metrics):
# list of all datasets
datasets = set(dataset for dataset, model in result_dict)
# for each dataset, find the maximum over all models with that dataset
max_vals = {
dataset: {
metric: max(
result_dict[(ds, model)][metric]["mean"]
for ds, model in result_dict
if ds == dataset)
for metric in metrics
}
for dataset in datasets
}
# now for each item, check whether it is the maximum
return {
(dataset, model): {
metric: v[metric]["mean"] == max_vals[dataset][metric]
for metric in metrics
}
for (dataset, model), v in result_dict.items()
}
def create_latex_line_scalar(row_key, result_entry, keys, bold_mask):
if isinstance(row_key, tuple):
string = ''
for i, item in enumerate(row_key):
string += f'{item}'
if i + 1 < len(row_key):
string += ' & '
else:
string = f'{row_key}'
for key in keys:
val = result_entry[key]
if bold_mask[key]:
string += r' & \textbf{' + f'{val:1.3f}' + r'}'
else:
string += f' & {val:1.3f}'
string += '\\\\\n'
return string
def create_latex_line_with_std(row_key, result_entry, keys, bold_mask):
if isinstance(row_key, tuple):
string = ''
for i, item in enumerate(row_key):
string += f'{item}'
if i + 1 < len(row_key):
string += ' & '
else:
string = f'{row_key}'
for key in keys:
mean = result_entry[key]['mean']
std = result_entry[key]['std']
if bold_mask[key]:
string += r' & \textbf{'+f'{mean:1.4f}' + r'} $\pm$ \textbf{' + f'{std:1.4f}' + r'}'
else:
string += f' & {mean:1.4f}' + r' $\pm$ ' + f'{std:1.4f}'
string += '\\\\\n'
return string
def create_markdown_line_scalar(row_key, result_entry, keys, bold_mask):
if isinstance(row_key, tuple):
string = ''
for item in row_key:
string += f'| {item}'
else:
string = f'| {row_key}'
for key in keys:
val = result_entry[key]
if bold_mask[key]:
string += f' | **{val:1.2f}**'
else:
string += f' | {val:1.2f}'
string += ' |\n'
return string
def create_markdown_line_with_std(row_key, result_entry, keys, bold_mask):
if isinstance(row_key, tuple):
string = ''
for item in row_key:
string += f'| {item}'
else:
string = f'| {row_key}'
for key in keys:
mean = result_entry[key]['mean']
std = result_entry[key]['std']
if bold_mask[key]:
string += f' | **{mean:1.4f} +- {std:1.4f}**'
else:
string += f' | {mean:1.4f} +- {std:1.4f}'
string += '\n'
return string
def create_table(result_dict, keys, bold_dict, line_func):
table = ''
for row_key in result_dict:
result_entry = result_dict[row_key]
bold_mask = bold_dict[row_key]
table += line_func(row_key, result_entry, keys, bold_mask)
return table
def run_models(seed, args, optimal_hparams, dataset_model_list):
result_dict = {}
for dataset, model in dataset_model_list:
# don't overwrite the defaults:
temp_args = Namespace(**vars(args))
if model == 'IPW(S)':
temp_args.model = 'IPW'
temp_args.sensitive_label = False
elif model == 'IPW(S+Y)':
temp_args.model = 'IPW'
temp_args.sensitive_label = True
else:
temp_args.model = model
temp_args.dataset = dataset
temp_args.seed = seed
temp_args.dataset_type = 'image' if temp_args.dataset in ['EMNIST_35', 'EMNIST_10'] else 'tabular'
# set the optimal hyperparameters:
if optimal_hparams is not None:
for k, v in optimal_hparams[dataset][model].items():
setattr(temp_args, k, v)
# train and evaluate the model:
result_dict[(dataset, model)] = main.main(temp_args)
return result_dict
def result_list_to_dict(results, dataset_model_list, metrics):
return {
k: {
metric: {
'mean': np.mean([result_dict[k][metric] for result_dict in results]),
'std': np.std([result_dict[k][metric] for result_dict in results])
} for metric in metrics
} for k in dataset_model_list
}