-
Notifications
You must be signed in to change notification settings - Fork 2
/
reproducing_chang.py
259 lines (216 loc) · 8.64 KB
/
reproducing_chang.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.12.0
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---
# %%
FIGWIDTH = 8
import os
NUM_SHUFFLES = int(os.environ.get("NUM_SHUFFLES") or 200)
# %%
# %load_ext autoreload
# %autoreload 2
import itertools
import pandas as pd
import numpy as np
import seaborn as sns
import diffprivlib.models as dp
from tqdm import autonotebook as tqdm
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from scipy import stats
from model_zoo import model_zoo, lr_setup, renaming_dict
from mia import run_threshold_estimator, run_shadow_model_attack
from utils import infer_from
import plot_params
# %%
# Control model
class ThreshClassifier:
def __init__(self, threshold=0.):
self.threshold = threshold
# The model is data-independent
def fit(self, *args, **kwargs):
pass
def predict_proba(self, xs, *args, **kwargs):
if isinstance(xs, pd.DataFrame):
xs = xs.values
p = xs[:, 0] > self.threshold
p = np.expand_dims(p, 1)
return np.hstack([1-p, p])
ThreshClassifier().predict_proba(np.array([[0, 1], [1, 0]]))
model_zoo["threshold"] = lambda: ThreshClassifier(1)
# %%
total_size = 2500
size0 = 0.2
size1 = 0.8
gen = np.random.RandomState(seed=1)
minus0 = gen.multivariate_normal([0, -1], [[7, 1], [1, 7]],
size=int(0.1 * size0 * total_size))
plus0 = gen.multivariate_normal([1, 2], [[5, 2], [2, 5]],
size=int(0.9 * size0 * total_size))
minus1 = gen.multivariate_normal([-5, 0], [[5, 1], [1, 5]],
size=int(0.5 * size1 * total_size))
plus1 = gen.multivariate_normal([2, 3], [[10, 1], [1, 4]],
size=int(0.5 * size1 * total_size))
len(minus0), len(plus0), len(minus1), len(plus1)
data = pd.concat([
pd.DataFrame(minus0).assign(z=0, y=0),
pd.DataFrame(plus0).assign(z=0, y=1),
pd.DataFrame(minus1).assign(z=1, y=0),
pd.DataFrame(plus1).assign(z=1, y=1),
], axis=0, ignore_index=True)
data.head()
# %%
data.groupby(["z", "y"]).count()
# %%
sns.displot(data, x=0, hue="z", col="y")
# %%
def get_subgroup_vulns(clf, data_train, data_test,
sensitive_features=False, ys=None, zs=None,
ignore_y=False, visualize=False,
method="average_loss_threshold"):
if ys is None: ys = [0, 1]
if zs is None: zs = [0, 1]
result = pd.DataFrame()
for y, z in itertools.product(ys, zs):
group_train = data_train.query(f"y == {y} and z == {z}")
group_test = data_test.query(f"y == {y} and z == {z}")
preds_train = infer_from(clf, group_train[[0, 1]])
preds_test = infer_from(clf, group_test[[0, 1]])
assert "threshold" in method
vulns = run_threshold_estimator(
group_train.y, preds_train, group_test.y, preds_test,
microdata=False,
method=method,
# With False, all threshold estimators will have additional bias due
# to unequal representations of in/out challenge examples in the subgroups,
# which should be accounted by a change of baseline in the advantage computation
# from 2 * (attack_acc - 0.5) to 2 * (attack_acc - skewed_group_baseline);
# alternatively, sampling of train/test datasets should be stratified by subgroup.
# Therefore, True is the right usage if no corrections are performed, and we use
# True for these experiments; however, the arxiv preprint does not include enough
# implementation details to tell how exactly the original paper implemented this.
enforce_uniform_prior=True,
)
result = result.append(
pd.DataFrame(dict(vuln=[vulns], y=y, z=z)),
ignore_index=True
)
return result
methods = ["best_loss_threshold", "average_loss_threshold"]
sim_results = pd.DataFrame()
for rep in tqdm.trange(NUM_SHUFFLES):
data_train, data_test = train_test_split(
data, test_size=0.5, random_state=rep)
X_train = data_train[[0, 1]].values
y_train = data_train.y.values
control_model = ThreshClassifier(0)
control_model.fit(X_train, y_train)
# control_model1 = ThreshClassifier(2)
# control_model1.fit(X_train, y_train)
# control_model2 = ThreshClassifier(10)
# control_model2.fit(X_train, y_train)
normal_model = MLPClassifier(hidden_layer_sizes=[8, 8, 8]).fit(X_train, y_train)
fair_model = model_zoo["lr_eo_expgrad"]()
fair_model.fit(X_train, y_train, sensitive_features=data_train.z)
for method in methods:
vulns_data = pd.concat([
get_subgroup_vulns(control_model, data_train, data_test,
method=method) \
.assign(model="control", method=method),
# get_subgroup_vulns(control_model1, data_train, data_test,
# method=method) \
# .assign(model="control1", method=method),
# get_subgroup_vulns(control_model2, data_train, data_test,
# method=method) \
# .assign(model="control2", method=method),
get_subgroup_vulns(normal_model, data_train, data_test,
method=method) \
.assign(model="nn", method=method),
get_subgroup_vulns(fair_model, data_train, data_test,
sensitive_features=True,
method=method) \
.assign(model="fair", method=method),
], axis=0, ignore_index=True).assign(rep=rep)
sim_results = sim_results.append(vulns_data, ignore_index=True)
sim_results.head()
# %%
sim_results.groupby(["model", "method", "z", "y", "rep"]).vuln.mean().reset_index() \
.groupby(["model", "method", "z", "y"]).agg(dict(vuln="mean"))
# %%
sim_results.groupby(["model", "method", "z", "y"]).vuln.mean()
# %%
sim_results["subgroup"] = list(f"{z}-{y}" for z, y in zip(sim_results["z"], sim_results["y"]))
# %% [markdown]
# Compute ANOVA F-test p-values to check if there is significant disparity between subgroup vulnerabilities
# %%
from statsmodels.stats.anova import AnovaRM
from statsmodels.stats.multicomp import pairwise_tukeyhsd
for model, method in itertools.product(sim_results.model.unique(), methods):
df = sim_results.query(f"model == '{model}' and method == '{method}'")
anova = AnovaRM(
data=df,
depvar="vuln",
subject="rep",
within=["subgroup"],
aggregate_func=np.mean
)
res = anova.fit()
f, p = (
res.anova_table.loc["subgroup", "F Value"],
res.anova_table.loc["subgroup", "Pr > F"]
)
print(f"{model=} {method=}")
print(f"{p=} {f=}\n")
sim_results.loc[df.index, "p"] = p
sim_results.loc[df.index, "F"] = f
# %%
plot_df = sim_results.copy()
plot_df = plot_df.replace({
"average_loss_threshold": "Avg. loss threshold",
"best_loss_threshold": "Opt. loss threshold",
}).rename(columns={
"subgroup": "Subgroup",
"vuln": "Estimate of subgroup vuln.",
"method": "Method",
})
fig, ax = plt.subplots(figsize=(12, 8))
sns.barplot(
data=plot_df.query("model == 'control'"),
estimator=lambda vulns: (2 * vulns.mean() - 1) * 100,
x="Subgroup", y="Estimate of subgroup vuln.", hue="Method",
order=["0-0", "0-1", "1-0", "1-1"],
ax=ax
)
fig.set_tight_layout(tight=True)
# plt.savefig("images/plot_estimation_bias_chang.pdf")
# %% [markdown]
# The following is the evidence that the advantage from the optimal threshold attack is not only due to small-sample bias, but also due to legitimate advantage, but it is not possible to tell which part of the estimate is bias (would have been there if the target model was independent of the data) and which one is not, as we cannot just, e.g., subtract the vulnerability of the control model.
# %%
plot_df = sim_results.copy()
plot_df
plot_df = plot_df.replace({
"average_loss_threshold": "Avg. loss threshold",
"best_loss_threshold": "Opt. loss threshold",
}).rename(columns={
"model": "Model",
"subgroup": "Subgroup",
"vuln": "Est. of vuln.",
"method": "Method",
})
sns.catplot(data=plot_df, x="Subgroup", y="Est. of vuln.",
hue="Model", col="Method",
estimator=lambda vulns: (2 * vulns.mean() - 1) * 100,
kind="bar")