reduced memory footprint

vansky · Mar 22, 2021 · a91691e · a91691e
1 parent ee4201c
commit a91691e
Showing 1 changed file with 5 additions and 5 deletions.
diff --git a/data.py b/data.py
@@ -144,7 +144,7 @@ def tokenize(self, path):
 
             # Tokenize file content
             with gzip.open(path, 'rb') as file_handle:
-                ids = torch.LongTensor(tokens)
+                ids = torch.IntTensor(tokens)
                 token = 0
                 first_flag = True
                 for fchunk in file_handle.readlines():
@@ -201,7 +201,7 @@ def tokenize(self, path):
 
             # Tokenize file content
             with open(path, 'r') as file_handle:
-                ids = torch.LongTensor(tokens)
+                ids = torch.IntTensor(tokens)
                 token = 0
                 first_flag = True
                 for fchunk in file_handle:
@@ -252,7 +252,7 @@ def tokenize_with_unks(self, path):
 
             # Tokenize file content
             with gzip.open(path, 'rb') as file_handle:
-                ids = torch.LongTensor(tokens)
+                ids = torch.IntTensor(tokens)
                 token = 0
                 first_flag = True
                 for fchunk in file_handle.readlines():
@@ -304,7 +304,7 @@ def tokenize_with_unks(self, path):
 
             # Tokenize file content
             with open(path, 'r') as file_handle:
-                ids = torch.LongTensor(tokens)
+                ids = torch.IntTensor(tokens)
                 token = 0
                 first_flag = True
                 for fchunk in file_handle:
@@ -384,7 +384,7 @@ def convert_to_ids(self, words, tokens=None):
             tokens = len(words)
 
         # Tokenize file content
-        ids = torch.LongTensor(tokens)
+        ids = torch.IntTensor(tokens)
         token = 0
         if self.lower:
             for word in words: