-
Notifications
You must be signed in to change notification settings - Fork 2
/
eval.py
119 lines (95 loc) · 4.98 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import os
import data_helpers
from sklearn.metrics import f1_score
import warnings
import sklearn.exceptions
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)
# Parameters
# ==================================================
# Data loading params
tf.flags.DEFINE_string("eval_dir", "SemEval2010_task8_all_data/SemEval2010_task8_testing_keys/TEST_FILE_FULL.TXT", "Path of evaluation data")
tf.flags.DEFINE_string("output_dir", "SemEval2010_task8_all_data/SemEval2010_task8_scorer-v1.2/prediction.txt", "Path of prediction for evaluation data")
tf.flags.DEFINE_string("target_dir", "SemEval2010_task8_all_data/SemEval2010_task8_scorer-v1.2//answer.txt", "Path of target(answer) file for evaluation data")
# Eval Parameters
tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (Default: 64)")
tf.flags.DEFINE_string("checkpoint_dir", "runs/1526734166/checkpoints/", "Checkpoint directory from training run")
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
print("{} = {}".format(attr.upper(), value))
print("")
def eval():
with tf.device('/cpu:0'):
x_text, y = data_helpers.load_data_and_labels(FLAGS.eval_dir)
# Map data into vocabulary
text_path = os.path.join(FLAGS.checkpoint_dir, "..", "text_vocab")
text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(text_path)
text_vec = np.array(list(text_vocab_processor.transform(x_text)))
# Map data into position
# position_path = os.path.join(FLAGS.checkpoint_dir, "..", "position_vocab")
# position_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(position_path)
# pos1_vec = np.array(list(position_vocab_processor.transform(pos1)))
# pos2_vec = np.array(list(position_vocab_processor.transform(pos2)))
x_eval = np.array(text_vec)
y_eval = np.argmax(y, axis=1)
checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
graph = tf.Graph()
with graph.as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
session_conf = tf.ConfigProto(gpu_options=gpu_options)
sess = tf.Session(config=session_conf)
with sess.as_default():
# Load the saved meta graph and restore variables
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
# Get the placeholders from the graph by name
input_text = graph.get_operation_by_name("input_text").outputs[0]
dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
# Tensors we want to evaluate
predictions = graph.get_operation_by_name("predictions").outputs[0]
# Generate batches for one epoch
batches = data_helpers.batch_iter(list(x_eval), FLAGS.batch_size, 1, shuffle=False)
# Collect the predictions here
all_predictions = []
for x_eval_batch in batches:
# x_batch = np.array(x_eval_batch).transpose((1, 0, 2))
batch_predictions = sess.run(predictions, {input_text: x_eval_batch,
dropout_keep_prob: 1.0})
all_predictions = np.concatenate([all_predictions, batch_predictions])
correct_predictions = float(sum(all_predictions == y_eval))
print("Total number of test examples: {}".format(len(y_eval)))
print("Accuracy: {:g}".format(correct_predictions / float(len(y_eval))))
print("Macro-Average F1 Score: {:g}".format(f1_score(y_eval, all_predictions, average="macro")))
labelsMapping = {0: 'Other',
1: 'Message-Topic(e1,e2)', 2: 'Message-Topic(e2,e1)',
3: 'Product-Producer(e1,e2)', 4: 'Product-Producer(e2,e1)',
5: 'Instrument-Agency(e1,e2)', 6: 'Instrument-Agency(e2,e1)',
7: 'Entity-Destination(e1,e2)', 8: 'Entity-Destination(e2,e1)',
9: 'Cause-Effect(e1,e2)', 10: 'Cause-Effect(e2,e1)',
11: 'Component-Whole(e1,e2)', 12: 'Component-Whole(e2,e1)',
13: 'Entity-Origin(e1,e2)', 14: 'Entity-Origin(e2,e1)',
15: 'Member-Collection(e1,e2)', 16: 'Member-Collection(e2,e1)',
17: 'Content-Container(e1,e2)', 18: 'Content-Container(e2,e1)'}
output_file = open(FLAGS.output_dir, 'w')
target_file = open(FLAGS.target_dir, 'w')
for i in range(len(all_predictions)):
output_file.write("{}\t{}\n".format(i, labelsMapping[all_predictions[i]]))
target_file.write("{}\t{}\n".format(i, labelsMapping[y_eval[i]]))
output_file.close()
target_file.close()
correct_predictions = float(sum(all_predictions == y_eval))
print("\nTotal number of test examples: {}".format(len(y_eval)))
print("Accuracy: {:g}".format(correct_predictions / float(len(y_eval))))
print("(2*9+1)-Way Macro-Average F1 Score (excluding Other): {:g}".format(
f1_score(y_eval, all_predictions, labels=np.array(range(1, 19)), average="macro")))
def main(_):
eval()
if __name__ == "__main__":
tf.app.run()