forked from MadryLab/cifar10_challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval.py
162 lines (137 loc) · 5.37 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""
Infinite evaluation loop going through the checkpoints in the model directory
as they appear and evaluating them. Accuracy and average loss are printed and
added as tensorboard summaries.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
import json
import math
import os
import sys
import time
import tensorflow as tf
import cifar10_input
from model import Model
from pgd_attack import LinfPGDAttack
# Global constants
with open('config.json') as config_file:
config = json.load(config_file)
num_eval_examples = config['num_eval_examples']
eval_batch_size = config['eval_batch_size']
eval_on_cpu = config['eval_on_cpu']
data_path = config['data_path']
model_dir = config['model_dir']
# Set upd the data, hyperparameters, and the model
cifar = cifar10_input.CIFAR10Data(data_path)
if eval_on_cpu:
with tf.device("/cpu:0"):
model = Model(mode='eval')
attack = LinfPGDAttack(model,
config['epsilon'],
config['num_steps'],
config['step_size'],
config['random_start'],
config['loss_func'])
else:
model = Model(mode='eval')
attack = LinfPGDAttack(model,
config['epsilon'],
config['num_steps'],
config['step_size'],
config['random_start'],
config['loss_func'])
global_step = tf.contrib.framework.get_or_create_global_step()
# Setting up the Tensorboard and checkpoint outputs
if not os.path.exists(model_dir):
os.makedirs(model_dir)
eval_dir = os.path.join(model_dir, 'eval')
if not os.path.exists(eval_dir):
os.makedirs(eval_dir)
last_checkpoint_filename = ''
already_seen_state = False
saver = tf.train.Saver()
summary_writer = tf.summary.FileWriter(eval_dir)
# A function for evaluating a single checkpoint
def evaluate_checkpoint(filename):
with tf.Session() as sess:
# Restore the checkpoint
saver.restore(sess, filename)
# Iterate over the samples batch-by-batch
num_batches = int(math.ceil(num_eval_examples / eval_batch_size))
total_xent_nat = 0.
total_xent_adv = 0.
total_corr_nat = 0
total_corr_adv = 0
for ibatch in range(num_batches):
bstart = ibatch * eval_batch_size
bend = min(bstart + eval_batch_size, num_eval_examples)
x_batch = cifar.eval_data.xs[bstart:bend, :]
y_batch = cifar.eval_data.ys[bstart:bend]
dict_nat = {model.x_input: x_batch,
model.y_input: y_batch}
x_batch_adv = attack.perturb(x_batch, y_batch, sess)
dict_adv = {model.x_input: x_batch_adv,
model.y_input: y_batch}
cur_corr_nat, cur_xent_nat = sess.run(
[model.num_correct,model.xent],
feed_dict = dict_nat)
cur_corr_adv, cur_xent_adv = sess.run(
[model.num_correct,model.xent],
feed_dict = dict_adv)
print(eval_batch_size)
print("Correctly classified natural examples: {}".format(cur_corr_nat))
print("Correctly classified adversarial examples: {}".format(cur_corr_adv))
total_xent_nat += cur_xent_nat
total_xent_adv += cur_xent_adv
total_corr_nat += cur_corr_nat
total_corr_adv += cur_corr_adv
avg_xent_nat = total_xent_nat / num_eval_examples
avg_xent_adv = total_xent_adv / num_eval_examples
acc_nat = total_corr_nat / num_eval_examples
acc_adv = total_corr_adv / num_eval_examples
summary = tf.Summary(value=[
tf.Summary.Value(tag='xent adv eval', simple_value= avg_xent_adv),
tf.Summary.Value(tag='xent adv', simple_value= avg_xent_adv),
tf.Summary.Value(tag='xent nat', simple_value= avg_xent_nat),
tf.Summary.Value(tag='accuracy adv eval', simple_value= acc_adv),
tf.Summary.Value(tag='accuracy adv', simple_value= acc_adv),
tf.Summary.Value(tag='accuracy nat', simple_value= acc_nat)])
summary_writer.add_summary(summary, global_step.eval(sess))
print('natural: {:.2f}%'.format(100 * acc_nat))
print('adversarial: {:.2f}%'.format(100 * acc_adv))
print('avg nat loss: {:.4f}'.format(avg_xent_nat))
print('avg adv loss: {:.4f}'.format(avg_xent_adv))
# Infinite eval loop
while True:
cur_checkpoint = tf.train.latest_checkpoint(model_dir)
# Case 1: No checkpoint yet
if cur_checkpoint is None:
if not already_seen_state:
print('No checkpoint yet, waiting ...', end='')
already_seen_state = True
else:
print('.', end='')
sys.stdout.flush()
time.sleep(10)
# Case 2: Previously unseen checkpoint
elif cur_checkpoint != last_checkpoint_filename:
print('\nCheckpoint {}, evaluating ... ({})'.format(cur_checkpoint,
datetime.now()))
sys.stdout.flush()
last_checkpoint_filename = cur_checkpoint
already_seen_state = False
evaluate_checkpoint(cur_checkpoint)
# Case 3: Previously evaluated checkpoint
else:
if not already_seen_state:
print('Waiting for the next checkpoint ... ({}) '.format(
datetime.now()),
end='')
already_seen_state = True
else:
print('.', end='')
sys.stdout.flush()
time.sleep(10)