From d33b7f0d437e199ec45c0730bee171fd6708faa0 Mon Sep 17 00:00:00 2001 From: Hiroaki Hayashi Date: Fri, 7 Feb 2020 12:27:19 -0500 Subject: [PATCH] Initial commit Add unk_prob mention and download links Fix typo --- .gitignore | 14 + README.md | 209 + arguments.py | 184 + cleanup.sh | 36 + data/canonical_forms/test_canonical_forms.txt | 60 + .../canonical_forms/train_canonical_forms.txt | 27685 +++++++++++++++ .../canonical_forms/valid_canonical_forms.txt | 60 + data/found_ids/test.ids.txt | 60 + data/found_ids/train.ids.txt | 27704 ++++++++++++++++ data/found_ids/valid.ids.txt | 60 + dataset/__init__.py | 6 + dataset/aliaslm.py | 55 + dataset/base.py | 457 + dataset/lrlm.py | 120 + dataset/nklm.py | 150 + dataset/utils.py | 268 + dataset/vocab.py | 172 + docs/data.md | 131 + docs/lrlm-fig.png | Bin 0 -> 261988 bytes models/__init__.py | 4 + models/aliaslm.py | 87 + models/base.py | 112 + models/lrlm.py | 542 + models/nklm.py | 709 + models/rnns/__init__.py | 3 + models/rnns/base.py | 22 + models/rnns/lstm.py | 37 + models/rnns/transformer.py | 344 + models/sample_utils.py | 60 + models/utils.py | 332 + models/vanillalm.py | 116 + mypy.ini | 16 + nnlib/arguments/__init__.py | 1 + nnlib/arguments/arguments.py | 378 + nnlib/arguments/custom_types.py | 46 + nnlib/arguments/validator.py | 66 + nnlib/utils/__init__.py | 8 + nnlib/utils/io.py | 160 + nnlib/utils/logging.py | 104 + nnlib/utils/math.py | 12 + nnlib/utils/misc.py | 128 + nnlib/utils/timing.py | 59 + nnlib/utils/values.py | 91 + preprocess/analyze_probs.py | 291 + preprocess/process_wikifacts.py | 462 + preprocess/process_wikitext.py | 424 + preprocess/scripts/preprocess_wikitext.sh | 47 + preprocess/train_fasttext.py | 67 + preprocess/unkprob/README.md | 13 + preprocess/unkprob/prepare_charlm_data.py | 62 + preprocess/wikifacts_preprocess.md | 103 + preprocess/wikitext/extract_wikidata.py | 243 + preprocess/wikitext/match_wikidata.py | 576 + preprocess/wikitext/split_wikitext.py | 237 + repl.py | 358 + requirements.txt | 6 + run.py | 439 + run_charlm.py | 111 + scripts/run_charlm_wikitext.sh | 34 + .../train_aliaslm_transformer_wikifacts.sh | 46 + ...train_aliaslm_transformer_wikitext_full.sh | 48 + ...rain_aliaslm_transformer_wikitext_short.sh | 49 + scripts/train_aliaslm_wikifacts.sh | 36 + scripts/train_aliaslm_wikitext_full.sh | 37 + scripts/train_aliaslm_wikitext_short.sh | 38 + scripts/train_lrlm_transformer_wikifacts.sh | 53 + .../train_lrlm_transformer_wikitext_full.sh | 54 + .../train_lrlm_transformer_wikitext_short.sh | 55 + scripts/train_lrlm_wikifacts.sh | 42 + scripts/train_lrlm_wikitext_full.sh | 45 + scripts/train_lrlm_wikitext_short.sh | 46 + scripts/train_nklm_orig.sh | 42 + scripts/train_nklm_transformer_wikifacts.sh | 52 + .../train_nklm_transformer_wikitext_full.sh | 54 + .../train_nklm_transformer_wikitext_short.sh | 55 + scripts/train_nklm_wikifacts.sh | 37 + scripts/train_nklm_wikitext_full.sh | 42 + scripts/train_nklm_wikitext_short.sh | 42 + .../train_vanillalm_transformer_wikifacts.sh | 44 + ...ain_vanillalm_transformer_wikitext_full.sh | 47 + ...in_vanillalm_transformer_wikitext_short.sh | 47 + scripts/train_vanillalm_wikifacts.sh | 31 + scripts/train_vanillalm_wikitext_full.sh | 36 + scripts/train_vanillalm_wikitext_short.sh | 37 + stubs/torch/__init__.pyi | 2861 ++ stubs/torch/nn/__init__.pyi | 6 + stubs/torch/nn/modules/__init__.pyi | 46 + stubs/torch/nn/modules/container.pyi | 146 + stubs/torch/nn/modules/module.pyi | 100 + stubs/torch/nn/parameter.pyi | 7 + stubs/torch/optim/__init__.pyi | 4 + stubs/torch/optim/lr_scheduler.pyi | 32 + stubs/torch/optim/optimizer.pyi | 14 + utils.py | 292 + 94 files changed, 68964 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 arguments.py create mode 100755 cleanup.sh create mode 100644 data/canonical_forms/test_canonical_forms.txt create mode 100644 data/canonical_forms/train_canonical_forms.txt create mode 100644 data/canonical_forms/valid_canonical_forms.txt create mode 100644 data/found_ids/test.ids.txt create mode 100644 data/found_ids/train.ids.txt create mode 100644 data/found_ids/valid.ids.txt create mode 100644 dataset/__init__.py create mode 100644 dataset/aliaslm.py create mode 100644 dataset/base.py create mode 100644 dataset/lrlm.py create mode 100644 dataset/nklm.py create mode 100644 dataset/utils.py create mode 100644 dataset/vocab.py create mode 100644 docs/data.md create mode 100644 docs/lrlm-fig.png create mode 100644 models/__init__.py create mode 100644 models/aliaslm.py create mode 100644 models/base.py create mode 100644 models/lrlm.py create mode 100644 models/nklm.py create mode 100644 models/rnns/__init__.py create mode 100644 models/rnns/base.py create mode 100644 models/rnns/lstm.py create mode 100644 models/rnns/transformer.py create mode 100644 models/sample_utils.py create mode 100644 models/utils.py create mode 100644 models/vanillalm.py create mode 100644 mypy.ini create mode 100644 nnlib/arguments/__init__.py create mode 100644 nnlib/arguments/arguments.py create mode 100644 nnlib/arguments/custom_types.py create mode 100644 nnlib/arguments/validator.py create mode 100644 nnlib/utils/__init__.py create mode 100644 nnlib/utils/io.py create mode 100644 nnlib/utils/logging.py create mode 100644 nnlib/utils/math.py create mode 100644 nnlib/utils/misc.py create mode 100644 nnlib/utils/timing.py create mode 100644 nnlib/utils/values.py create mode 100644 preprocess/analyze_probs.py create mode 100644 preprocess/process_wikifacts.py create mode 100644 preprocess/process_wikitext.py create mode 100644 preprocess/scripts/preprocess_wikitext.sh create mode 100644 preprocess/train_fasttext.py create mode 100644 preprocess/unkprob/README.md create mode 100644 preprocess/unkprob/prepare_charlm_data.py create mode 100644 preprocess/wikifacts_preprocess.md create mode 100644 preprocess/wikitext/extract_wikidata.py create mode 100644 preprocess/wikitext/match_wikidata.py create mode 100644 preprocess/wikitext/split_wikitext.py create mode 100644 repl.py create mode 100644 requirements.txt create mode 100644 run.py create mode 100644 run_charlm.py create mode 100755 scripts/run_charlm_wikitext.sh create mode 100755 scripts/train_aliaslm_transformer_wikifacts.sh create mode 100755 scripts/train_aliaslm_transformer_wikitext_full.sh create mode 100755 scripts/train_aliaslm_transformer_wikitext_short.sh create mode 100755 scripts/train_aliaslm_wikifacts.sh create mode 100755 scripts/train_aliaslm_wikitext_full.sh create mode 100755 scripts/train_aliaslm_wikitext_short.sh create mode 100755 scripts/train_lrlm_transformer_wikifacts.sh create mode 100755 scripts/train_lrlm_transformer_wikitext_full.sh create mode 100755 scripts/train_lrlm_transformer_wikitext_short.sh create mode 100755 scripts/train_lrlm_wikifacts.sh create mode 100755 scripts/train_lrlm_wikitext_full.sh create mode 100755 scripts/train_lrlm_wikitext_short.sh create mode 100755 scripts/train_nklm_orig.sh create mode 100755 scripts/train_nklm_transformer_wikifacts.sh create mode 100755 scripts/train_nklm_transformer_wikitext_full.sh create mode 100755 scripts/train_nklm_transformer_wikitext_short.sh create mode 100755 scripts/train_nklm_wikifacts.sh create mode 100755 scripts/train_nklm_wikitext_full.sh create mode 100755 scripts/train_nklm_wikitext_short.sh create mode 100755 scripts/train_vanillalm_transformer_wikifacts.sh create mode 100755 scripts/train_vanillalm_transformer_wikitext_full.sh create mode 100755 scripts/train_vanillalm_transformer_wikitext_short.sh create mode 100755 scripts/train_vanillalm_wikifacts.sh create mode 100755 scripts/train_vanillalm_wikitext_full.sh create mode 100755 scripts/train_vanillalm_wikitext_short.sh create mode 100644 stubs/torch/__init__.pyi create mode 100644 stubs/torch/nn/__init__.pyi create mode 100644 stubs/torch/nn/modules/__init__.pyi create mode 100644 stubs/torch/nn/modules/container.pyi create mode 100644 stubs/torch/nn/modules/module.pyi create mode 100644 stubs/torch/nn/parameter.pyi create mode 100644 stubs/torch/optim/__init__.pyi create mode 100644 stubs/torch/optim/lr_scheduler.pyi create mode 100644 stubs/torch/optim/optimizer.pyi create mode 100644 utils.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..abc8e5a --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +.mypy_cache +__pycache__ +*.pyc +.idea/ + +results/ +kb/ +kb_embed*/ + +slurm* +*/model*.pt +*/*.log +*/*.sh +!scripts/*.sh diff --git a/README.md b/README.md new file mode 100644 index 0000000..c17a41f --- /dev/null +++ b/README.md @@ -0,0 +1,209 @@ +# Latent Relation Language Models + +This repository contains the official PyTorch implementation of Latent Relation Language Models +([arXiv](https://arxiv.org/abs/1908.07690)): + +> Hiroaki Hayashi\*, Zecong Hu\*, Chenyan Xiong, Graham Neubig
+> _Latent Relation Language Models_
+> The 34th AAAI Conference on Artificial Intelligence (AAAI 2020) + +![lrlm](docs/lrlm-fig.png) + + +## Requirements + +- Python 3.6+ +- PyTorch 0.4+ +- Other packages: + - IPython + - tqdm + - tensorboardX + - [fastText@bc12859](https://github.com/facebookresearch/fastText/tree/bc1285939f1c216bd358425c3685a049dd8f56c0) + + +## Usage + +1. Clone this repository and install dependencies: + ```bash + git clone https://github.com/neulab/lrlm.git + cd lrlm + pip install -r requirements.txt + ``` + To prevent incorrect or conflicting package versions, it is recommended to install dependencies in a virtual + environment. + +2. Download required files, including the dataset, unknown word probabilities, and (for inference only) pre-trained + model weights: + ```bash + # Wikitext-S,F dataset + wget https://github.com/neulab/lrlm/releases/download/v1.0/wikitext.tar.bz2 + + # Wikifacts dataset + wget https://github.com/neulab/lrlm/releases/download/v1.0/wikfacts.tar.bz2 + + # Transformer-XL on WikiText-F model weights + wget https://github.com/neulab/lrlm/releases/download/v1.0/t-xl_wt-f_model17.pt + + # Transformer-XL on WikiText-S model weights + wget https://github.com/neulab/lrlm/releases/download/v1.0/t-xl_wt-s_model17.pt + ``` + Note that the list of resources above are available under [releases](https://github.com/neulab/lrlm/releases). + + Please contact us for pretrained models for different configurations. + + FastText model weights can be downloaded via [google drive](https://drive.google.com/file/d/1zBBMnhYEMWXAS0QK3Wg2q_fENcKLAXTE/view?usp=sharing). + +3. To train a new model, use one of the scripts in the `scripts/` directory, e.g. + `scripts/train_lrlm_transformer_wikitext_short.sh`. This will create a directory for the experiment named + `lrlm-transformer-wikitext-short` under the working directory, containing the following files: + + - `model.pt`: The model checkpoint at the k-th epoch. Checkpoints are only saved when the validation results for + that epoch improves over previous epochs. + - `.txt`: The training log file. + - `