Skip to content

Commit

Permalink
Add dump_sentences.py
Browse files Browse the repository at this point in the history
  • Loading branch information
cifkao committed Aug 12, 2018
1 parent e0ee87d commit 2e01172
Showing 1 changed file with 71 additions and 0 deletions.
71 changes: 71 additions & 0 deletions examples/dump_sentences.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#

"""
A script to dump all sentences (tokenized) to standard output.
"""

from __future__ import absolute_import, division, unicode_literals

import argparse
import logging
import sys

# Set PATHs
PATH_TO_SENTEVAL = '../'
PATH_TO_DATA = '../data'

sys.path.insert(0, PATH_TO_SENTEVAL)
import senteval


def main():
logging.basicConfig(format='%(asctime)s : %(message)s',
level=logging.DEBUG)

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("-t", "--tasks",
help="a comma-separated list of tasks")
args = parser.parse_args()

def prepare(params, samples):
for sent in samples:
if sys.version_info < (3, 0):
sent = [w.decode('utf-8') if isinstance(w, str) else w for w in sent]
print(' '.join(sent).encode('utf-8'))
else:
sent = [w.decode('utf-8') if isinstance(w, bytes) else w for w in sent]
print(' '.join(sent))

def batcher(params, batch):
# Block evaluation and continue with the next task.
raise Done

params_senteval = {
'task_path': PATH_TO_DATA
}

se = senteval.engine.SE(params_senteval, batcher, prepare)
if args.tasks is not None:
transfer_tasks = args.tasks.split(',')
else:
transfer_tasks = se.list_tasks

for task in transfer_tasks:
try:
se.eval([task])
raise RuntimeError(task + " not completed")
except Done:
pass


class Done(Exception):
pass


if __name__ == "__main__":
main()

0 comments on commit 2e01172

Please sign in to comment.