forked from macournoyer/neuralconvo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlarge.lua
61 lines (45 loc) · 1.08 KB
/
large.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
local Large = torch.class("neuralconvo.Large")
local stringx = require "pl.stringx"
local xlua = require "xlua"
local TOTAL_LINES = 100000000
local FILE_NAME = "100000000.txt"
local function parsedLines(file)
local f = assert(io.open(file, 'r'))
return function()
local line = f:read("*line")
if line == nil then
f:close()
return
end
local t = {}
t["text"] = line
return t
end
end
function Large:__init(dir)
self.dir = dir
end
local function progress(c)
if c % 100000 == 0 then
xlua.progress(c, TOTAL_LINES)
end
end
function Large:load()
local lines = {}
local conversations = {}
local conversation = {}
local count = 0
--local lineID = 1
for line in parsedLines(self.dir .. "/"..FILE_NAME) do
table.insert(conversation, line)
if count % 100 == 0 then
table.insert(conversations, conversation)
conversation = {}
end
count = count + 1
progress(count)
end
xlua.progress(TOTAL_LINES, TOTAL_LINES)
print("-- Finished Parsing Open Subtitle data set ...")
return conversations
end