Skip to content

Commit

Permalink
reduced memory footprint
Browse files Browse the repository at this point in the history
  • Loading branch information
vansky committed Mar 22, 2021
1 parent ee4201c commit a91691e
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions data.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def tokenize(self, path):

# Tokenize file content
with gzip.open(path, 'rb') as file_handle:
ids = torch.LongTensor(tokens)
ids = torch.IntTensor(tokens)
token = 0
first_flag = True
for fchunk in file_handle.readlines():
Expand Down Expand Up @@ -201,7 +201,7 @@ def tokenize(self, path):

# Tokenize file content
with open(path, 'r') as file_handle:
ids = torch.LongTensor(tokens)
ids = torch.IntTensor(tokens)
token = 0
first_flag = True
for fchunk in file_handle:
Expand Down Expand Up @@ -252,7 +252,7 @@ def tokenize_with_unks(self, path):

# Tokenize file content
with gzip.open(path, 'rb') as file_handle:
ids = torch.LongTensor(tokens)
ids = torch.IntTensor(tokens)
token = 0
first_flag = True
for fchunk in file_handle.readlines():
Expand Down Expand Up @@ -304,7 +304,7 @@ def tokenize_with_unks(self, path):

# Tokenize file content
with open(path, 'r') as file_handle:
ids = torch.LongTensor(tokens)
ids = torch.IntTensor(tokens)
token = 0
first_flag = True
for fchunk in file_handle:
Expand Down Expand Up @@ -384,7 +384,7 @@ def convert_to_ids(self, words, tokens=None):
tokens = len(words)

# Tokenize file content
ids = torch.LongTensor(tokens)
ids = torch.IntTensor(tokens)
token = 0
if self.lower:
for word in words:
Expand Down

0 comments on commit a91691e

Please sign in to comment.