From 0f4cf44a4edd09bac3d6d83b5a5df869c9ab1ab0 Mon Sep 17 00:00:00 2001 From: kys10 Date: Wed, 11 Jul 2018 15:36:26 +0800 Subject: [PATCH] Fix concat bug --- data.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/data.py b/data.py index 3882f02..e3d672e 100644 --- a/data.py +++ b/data.py @@ -116,6 +116,11 @@ def __init__(self, path, vocab_path=None, batch_size=1, shuffle=False, self.concat = concat self.vocab = get_vocab(path, ['train.txt'], min_freq=min_freq, vocab_file=vocab_path) + if self.concat: + # set the frequencies for special tokens by miracle trial + self.vocab.idx2count[1] = self.vocab.freqs[BOS] # + self.vocab.idx2count[2] = 0 # + self.train = self.get_dataloader('train.txt', self.batch_size) self.valid = self.get_dataloader('valid.txt', 1) self.test = self.get_dataloader('test.txt', 1)