From edafee0576825220088036805baea4bb607a49e4 Mon Sep 17 00:00:00 2001 From: hammad001 Date: Mon, 4 Mar 2019 17:47:47 +0000 Subject: [PATCH 1/3] Included bert embeddings and json in gitignore --- .gitignore | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.gitignore b/.gitignore index 894a44c..6643ebb 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,12 @@ venv.bak/ # mypy .mypy_cache/ + +# bert-embeddings +data/bert_da* + +# Data Annotations +data/*.jsonl + +# Data symbolic link +data/vcr1images From cc187abb723d05e6dcc8115628ea1da58844059d Mon Sep 17 00:00:00 2001 From: hammad001 Date: Tue, 5 Mar 2019 00:45:19 +0000 Subject: [PATCH 2/3] 1. Changed saved model paths in eval file. \n 2. Included condition not to convert metadata to gpu. --- .gitignore | 3 +++ models/eval_for_leaderboard.py | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 6643ebb..d79b997 100644 --- a/.gitignore +++ b/.gitignore @@ -111,3 +111,6 @@ data/*.jsonl # Data symbolic link data/vcr1images + +# Submission File +models/submission.csv diff --git a/models/eval_for_leaderboard.py b/models/eval_for_leaderboard.py index 402afea..ac3fa78 100644 --- a/models/eval_for_leaderboard.py +++ b/models/eval_for_leaderboard.py @@ -39,14 +39,14 @@ parser.add_argument( '-answer_ckpt', dest='answer_ckpt', - default='saves/flagship_answer/best.th', + default='/data/vcr/saves/flagship_answer/best.th', help='Answer checkpoint', type=str, ) parser.add_argument( '-rationale_ckpt', dest='rationale_ckpt', - default='saves/flagship_rationale/best.th', + default='/data/vcr/saves/flagship_rationale/best.th', help='Rationale checkpoint', type=str, ) @@ -71,7 +71,8 @@ def _to_gpu(td): if NUM_GPUS > 1: return td for k in td: - td[k] = {k2: v.cuda(async=True) for k2, v in td[k].items()} if isinstance(td[k], dict) else td[k].cuda( + if k != 'metadata': + td[k] = {k2: v.cuda(async=True) for k2, v in td[k].items()} if isinstance(td[k], dict) else td[k].cuda( async=True) return td From d9480fa5b002e72dd4f640a92ad9abc17c45206d Mon Sep 17 00:00:00 2001 From: hammad001 Date: Wed, 13 Mar 2019 23:47:50 +0000 Subject: [PATCH 3/3] Changed tf constants to tf placeholders. Added session hooks. --- data/get_bert_embeddings/extract_features.py | 6 +- data/get_bert_embeddings/vcr_loader.py | 60 +++++++++++++------- 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/data/get_bert_embeddings/extract_features.py b/data/get_bert_embeddings/extract_features.py index 28d0bba..eb912e6 100644 --- a/data/get_bert_embeddings/extract_features.py +++ b/data/get_bert_embeddings/extract_features.py @@ -206,7 +206,7 @@ def _truncate_seq_pair(tokens_a, tokens_b, max_length): config=run_config, predict_batch_size=FLAGS.batch_size) -input_fn = input_fn_builder( +input_fn, input_init_hook = input_fn_builder( features=features, seq_length=FLAGS.max_seq_length) output_h5_qa = h5py.File(f'../{FLAGS.name}_answer_{FLAGS.split}.h5', 'w') @@ -254,7 +254,9 @@ def alignment_gather(alignment, layer): return output_embs -for result in tqdm(estimator.predict(input_fn, yield_single_examples=True)): +for result in tqdm(estimator.predict(input_fn, + hooks=[input_init_hook], + yield_single_examples=True)): ind = unique_id_to_ind[int(result["unique_id"])] text, ctx_alignment, choice_alignment = examples[ind] diff --git a/data/get_bert_embeddings/vcr_loader.py b/data/get_bert_embeddings/vcr_loader.py index d740a28..93012fa 100644 --- a/data/get_bert_embeddings/vcr_loader.py +++ b/data/get_bert_embeddings/vcr_loader.py @@ -25,9 +25,25 @@ def __init__(self, unique_id, tokens, input_ids, input_mask, input_type_ids, is_ self.is_correct = is_correct +class IteratorInitializerHook(tf.train.SessionRunHook): + """Hook to initialise data iterator after Session is created.""" + + def __init__(self): + super(IteratorInitializerHook, self).__init__() + self.__init__iterator_initializer_func = None + + def after_create_session(self, session, coord): + """Initialise the iterator after the session has been created.""" + self.iterator_initializer_func(session) + + def input_fn_builder(features, seq_length): """Creates an `input_fn` closure to be passed to TPUEstimator.""" + iterator_initializer_hook = IteratorInitializerHook() + + num_examples = len(features) + all_unique_ids = [] all_input_ids = [] all_input_mask = [] @@ -39,38 +55,42 @@ def input_fn_builder(features, seq_length): all_input_mask.append(feature.input_mask) all_input_type_ids.append(feature.input_type_ids) + def input_fn(params): """The actual input function.""" batch_size = params["batch_size"] - num_examples = len(features) - # This is for demo purposes and does NOT scale to large data sets. We do # not use Dataset.from_generator() because that uses tf.py_func which is # not TPU compatible. The right way to load data is with TFRecordReader. + + unique_ids_placeholder = tf.placeholder(tf.int32, [num_examples]) + input_ids_placeholder = tf.placeholder(tf.int32, [num_examples, seq_length]) + input_mask_placeholder = tf.placeholder(tf.int32, [num_examples, seq_length]) + input_type_ids_placeholder = tf.placeholder(tf.int32, [num_examples, seq_length]) + + d = tf.data.Dataset.from_tensor_slices({ - "unique_ids": - tf.constant(all_unique_ids, shape=[num_examples], dtype=tf.int32), - "input_ids": - tf.constant( - all_input_ids, shape=[num_examples, seq_length], - dtype=tf.int32), - "input_mask": - tf.constant( - all_input_mask, - shape=[num_examples, seq_length], - dtype=tf.int32), - "input_type_ids": - tf.constant( - all_input_type_ids, - shape=[num_examples, seq_length], - dtype=tf.int32), + "unique_ids": unique_ids_placeholder, + "input_ids": input_ids_placeholder, + "input_mask": input_mask_placeholder, + "input_type_ids": input_type_ids_placeholder }) d = d.batch(batch_size=batch_size, drop_remainder=False) - return d + + feed_dict_d = {unique_ids_placeholder:all_unique_ids, input_ids_placeholder: all_input_ids, + input_mask_placeholder: all_input_mask, input_type_ids_placeholder: all_input_type_ids} + + iterator = d.make_initializable_iterator() + feats = iterator.get_next() + + iterator_initializer_hook.iterator_initializer_func = lambda sess: sess.run(iterator.initializer, + feed_dict=feed_dict_d) + + return feats - return input_fn + return input_fn, iterator_initializer_hook GENDER_NEUTRAL_NAMES = ['Casey', 'Riley', 'Jessie', 'Jackie', 'Avery', 'Jaime', 'Peyton', 'Kerry', 'Jody', 'Kendall',