diff --git a/code/eval_tacred.py b/code/eval_tacred.py index ee389d3..f303370 100644 --- a/code/eval_tacred.py +++ b/code/eval_tacred.py @@ -37,9 +37,9 @@ from knowledge_bert.optimization import BertAdam from knowledge_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE -logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', - datefmt = '%m/%d/%Y %H:%M:%S', - level = logging.INFO) +logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt='%m/%d/%Y %H:%M:%S', + level=logging.INFO) logger = logging.getLogger(__name__) @@ -90,12 +90,13 @@ def get_dev_examples(self, data_dir): def get_labels(self): """Gets the list of labels for this data set.""" raise NotImplementedError() - + @classmethod def _read_json(cls, input_file): with open(input_file, "r", encoding='utf-8') as f: return json.loads(f.read()) + class TacredProcessor(DataProcessor): """Processor for the CoLA data set (GLUE version).""" @@ -116,7 +117,6 @@ def get_test_examples(self, data_dir): return self._create_examples( self._read_json(os.path.join(data_dir, "test.json")), "dev") - def get_labels(self): """Useless""" return ["0", "1"] @@ -129,7 +129,7 @@ def _create_examples(self, lines, set_type): for x in line['ents']: if x[1] == 1: x[1] = 0 - #print(line['text'][x[1]:x[2]].encode("utf-8")) + # print(line['text'][x[1]:x[2]].encode("utf-8")) text_a = (line['text'], line['ents']) label = line['label'] examples.append( @@ -139,9 +139,9 @@ def _create_examples(self, lines, set_type): def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer, threshold): """Loads a data file into a list of `InputBatch`s.""" - + label_list = sorted(label_list) - label_map = {label : i for i, label in enumerate(label_list)} + label_map = {label: i for i, label in enumerate(label_list)} entity2id = {} with open("kg_embed/entity2id.txt") as fin: @@ -157,11 +157,13 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer h_name = ex_text_a[h[1]:h[2]] t_name = ex_text_a[t[1]:t[2]] if h[1] < t[1]: - ex_text_a = ex_text_a[:h[1]] + "# "+h_name+" #" + ex_text_a[h[2]:t[1]] + "$ "+t_name+" $" + ex_text_a[t[2]:] + ex_text_a = ex_text_a[:h[1]] + "# "+h_name+" #" + \ + ex_text_a[h[2]:t[1]] + "$ "+t_name+" $" + ex_text_a[t[2]:] else: - ex_text_a = ex_text_a[:t[1]] + "$ "+t_name+" $" + ex_text_a[t[2]:h[1]] + "# "+h_name+" #" + ex_text_a[h[2]:] + ex_text_a = ex_text_a[:t[1]] + "$ "+t_name+" $" + \ + ex_text_a[t[2]:h[1]] + "# "+h_name+" #" + ex_text_a[h[2]:] - ent_pos = [x for x in example.text_b if x[-1]>threshold] + ent_pos = [x for x in example.text_b if x[-1] > threshold] for x in ent_pos: cnt = 0 if x[1] > h[2]: @@ -178,11 +180,13 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer tokens_b = None if False: - tokens_b, entities_b = tokenizer.tokenize(example.text_b[0], [x for x in example.text_b[1] if x[-1]>threshold]) + tokens_b, entities_b = tokenizer.tokenize( + example.text_b[0], [x for x in example.text_b[1] if x[-1] > threshold]) # Modifies `tokens_a` and `tokens_b` in place so that the total # length is less than the specified length. # Account for [CLS], [SEP], [SEP] with "- 3" - _truncate_seq_pair(tokens_a, tokens_b, entities_a, entities_b, max_seq_length - 3) + _truncate_seq_pair(tokens_a, tokens_b, entities_a, + entities_b, max_seq_length - 3) else: # Account for [CLS] and [SEP] with "- 2" if len(tokens_a) > max_seq_length - 2: @@ -252,22 +256,24 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer logger.info("*** Example ***") logger.info("guid: %s" % (example.guid)) logger.info("tokens: %s" % " ".join( - [str(x) for x in tokens])) + [str(x) for x in tokens])) logger.info("ents: %s" % " ".join( - [str(x) for x in ents])) - logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) - logger.info("input_mask: %s" % " ".join([str(x) for x in input_mask])) + [str(x) for x in ents])) + logger.info("input_ids: %s" % + " ".join([str(x) for x in input_ids])) + logger.info("input_mask: %s" % + " ".join([str(x) for x in input_mask])) logger.info( - "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) + "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) logger.info("label: %s (id = %d)" % (example.label, label_id)) features.append( - InputFeatures(input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - input_ent=input_ent, - ent_mask=ent_mask, - label_id=label_id)) + InputFeatures(input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + input_ent=input_ent, + ent_mask=ent_mask, + label_id=label_id)) return features @@ -289,19 +295,51 @@ def _truncate_seq_pair(tokens_a, tokens_b, ents_a, ents_b, max_length): tokens_b.pop() ents_b.pop() -def accuracy(out, labels): - outputs = np.argmax(out, axis=1) - return np.sum(outputs == labels), outputs def warmup_linear(x, warmup=0.002): if x < warmup: return x/warmup return 1.0 + +def eval_result(pred_result, labels, na_id): + correct = 0 + total = len(labels) + correct_positive = 0 + pred_positive = 0 + gold_positive = 0 + + for i in range(total): + if labels[i] == pred_result[i]: + correct += 1 + if labels[i] != na_id: + correct_positive += 1 + if labels[i] != na_id: + gold_positive += 1 + if pred_result[i] != na_id: + pred_positive += 1 + acc = float(correct) / float(total) + try: + micro_p = float(correct_positive) / float(pred_positive) + except: + micro_p = 0 + try: + micro_r = float(correct_positive) / float(gold_positive) + except: + micro_r = 0 + try: + micro_f1 = 2 * micro_p * micro_r / (micro_p + micro_r) + except: + micro_f1 = 0 + result = {'acc': acc, 'micro_p': micro_p, + 'micro_r': micro_r, 'micro_f1': micro_f1} + return result + + def main(): parser = argparse.ArgumentParser() - ## Required parameters + # Required parameters parser.add_argument("--data_dir", default=None, type=str, @@ -315,7 +353,7 @@ def main(): required=True, help="The output directory where the model predictions and checkpoints will be written.") - ## Other parameters + # Other parameters parser.add_argument("--max_seq_length", default=128, type=int, @@ -389,7 +427,8 @@ def main(): num_labels_task = 42 if args.local_rank == -1 or args.no_cuda: - device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") + device = torch.device( + "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: torch.cuda.set_device(args.local_rank) @@ -402,7 +441,7 @@ def main(): if args.gradient_accumulation_steps < 1: raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( - args.gradient_accumulation_steps)) + args.gradient_accumulation_steps)) random.seed(args.seed) np.random.seed(args.seed) @@ -411,13 +450,15 @@ def main(): torch.cuda.manual_seed_all(args.seed) if not args.do_train and not args.do_eval: - raise ValueError("At least one of `do_train` or `do_eval` must be True.") + raise ValueError( + "At least one of `do_train` or `do_eval` must be True.") processor = processors() num_labels = num_labels_task label_list = None - tokenizer = BertTokenizer.from_pretrained(args.ernie_model, do_lower_case=args.do_lower_case) + tokenizer = BertTokenizer.from_pretrained( + args.ernie_model, do_lower_case=args.do_lower_case) train_examples = None num_train_steps = None @@ -429,8 +470,11 @@ def main(): for line in fin: vec = line.strip().split('\t') vec = [float(x) for x in vec] + if len(vec) != 100: + diff = 100 - len(vec) + vec = vec + [0 for _ in range(diff)] vecs.append(vec) - embed = torch.FloatTensor(vecs) + embed = torch.tensor(vecs, dtype=torch.float) embed = torch.nn.Embedding.from_pretrained(embed) logger.info("Shape of entity embedding: "+str(embed.weight.size())) @@ -451,22 +495,26 @@ def main(): test = convert_examples_to_features( eval_examples, label_list, args.max_seq_length, tokenizer, args.threshold) - for x, mark in file_mark: print(x, mark) output_model_file = os.path.join(args.output_dir, x) model_state_dict = torch.load(output_model_file) - model, _ = BertForSequenceClassification.from_pretrained(args.ernie_model, state_dict=model_state_dict, num_labels=len(label_list)) + model, _ = BertForSequenceClassification.from_pretrained( + args.ernie_model, state_dict=model_state_dict, num_labels=len(label_list)) model.to(device) if mark: eval_features = dev - output_file = os.path.join(args.output_dir, "eval_pred_{}.txt".format(x.split("_")[-1])) - output_file_ = os.path.join(args.output_dir, "eval_gold_{}.txt".format(x.split("_")[-1])) + output_file = os.path.join( + args.output_dir, "eval_pred_{}.txt".format(x.split("_")[-1])) + output_file_ = os.path.join( + args.output_dir, "eval_gold_{}.txt".format(x.split("_")[-1])) else: eval_features = test - output_file = os.path.join(args.output_dir, "test_pred_{}.txt".format(x.split("_")[-1])) - output_file_ = os.path.join(args.output_dir, "test_gold_{}.txt".format(x.split("_")[-1])) + output_file = os.path.join( + args.output_dir, "test_pred_{}.txt".format(x.split("_")[-1])) + output_file_ = os.path.join( + args.output_dir, "test_gold_{}.txt".format(x.split("_")[-1])) fpred = open(output_file, "w") fgold = open(output_file_, "w") @@ -476,22 +524,31 @@ def main(): # zeros = [0 for _ in range(args.max_seq_length)] # zeros_ent = [0 for _ in range(100)] # zeros_ent = [zeros_ent for _ in range(args.max_seq_length)] - all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) - all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) - all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) - all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) - all_ent = torch.tensor([f.input_ent for f in eval_features], dtype=torch.long) - all_ent_masks = torch.tensor([f.ent_mask for f in eval_features], dtype=torch.long) - eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_ent, all_ent_masks, all_label_ids) + all_input_ids = torch.tensor( + [f.input_ids for f in eval_features], dtype=torch.long) + all_input_mask = torch.tensor( + [f.input_mask for f in eval_features], dtype=torch.long) + all_segment_ids = torch.tensor( + [f.segment_ids for f in eval_features], dtype=torch.long) + all_label_ids = torch.tensor( + [f.label_id for f in eval_features], dtype=torch.long) + all_ent = torch.tensor( + [f.input_ent for f in eval_features], dtype=torch.long) + all_ent_masks = torch.tensor( + [f.ent_mask for f in eval_features], dtype=torch.long) + eval_data = TensorDataset(all_input_ids, all_input_mask, + all_segment_ids, all_ent, all_ent_masks, all_label_ids) # Run prediction for full data eval_sampler = SequentialSampler(eval_data) - eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size) + eval_dataloader = DataLoader( + eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size) model.eval() - eval_loss, eval_accuracy = 0, 0 + eval_loss = 0 nb_eval_steps, nb_eval_examples = 0, 0 + pred_all, label_all = [], [] for input_ids, input_mask, segment_ids, input_ent, ent_mask, label_ids in eval_dataloader: - input_ent = embed(input_ent+1) # -1 -> 0 + input_ent = embed(input_ent+1) # -1 -> 0 input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) @@ -500,32 +557,35 @@ def main(): label_ids = label_ids.to(device) with torch.no_grad(): - tmp_eval_loss = model(input_ids, segment_ids, input_mask, input_ent, ent_mask, label_ids) - logits = model(input_ids, segment_ids, input_mask, input_ent, ent_mask) + tmp_eval_loss = model( + input_ids, segment_ids, input_mask, input_ent, ent_mask, label_ids) + logits = model(input_ids, segment_ids, + input_mask, input_ent, ent_mask) logits = logits.detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() - tmp_eval_accuracy, pred = accuracy(logits, label_ids) + pred = np.argmax(logits, axis=1) for a, b in zip(pred, label_ids): + pred_all.append(a) + label_all.append(b) fgold.write("{}\n".format(label_list[b])) fpred.write("{}\n".format(label_list[a])) eval_loss += tmp_eval_loss.mean().item() - eval_accuracy += tmp_eval_accuracy nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 eval_loss = eval_loss / nb_eval_steps - eval_accuracy = eval_accuracy / nb_eval_examples - result = {'eval_loss': eval_loss, - 'eval_accuracy': eval_accuracy - } + result = eval_result(pred_all, label_all, label_list.index("NA")) + if mark: - output_eval_file = os.path.join(args.output_dir, "eval_results_{}.txt".format(x.split("_")[-1])) + output_eval_file = os.path.join( + args.output_dir, "eval_results_{}.txt".format(x.split("_")[-1])) else: - output_eval_file = os.path.join(args.output_dir, "test_results_{}.txt".format(x.split("_")[-1])) + output_eval_file = os.path.join( + args.output_dir, "test_results_{}.txt".format(x.split("_")[-1])) with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") @@ -533,5 +593,6 @@ def main(): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) + if __name__ == "__main__": main()