-
Notifications
You must be signed in to change notification settings - Fork 1
/
gdt-parse-csv.lua
164 lines (144 loc) · 3.39 KB
/
gdt-parse-csv.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
local gdt = require 'gdt'
local csv = require 'csv'
local max = math.max
local match, gsub = string.match, string.gsub
local function is_string_only(ls)
for _, s in ipairs(ls) do
if tonumber(s) then return false end
end
return true
end
local function pre_parse_csv(source)
local head_vs = source()
local nrows, ncols = 1, #head_vs
local all_strings = true
local header_dup = {}
for vs in source do
if #vs == 0 then break end
if all_strings then all_strings = is_string_only(vs) end
for k= 1, #vs do
if head_vs[k] == vs[k] then header_dup[k] = true end
end
ncols = max(ncols, #vs)
nrows = nrows + 1
end
local header_dup_count = 0
for k= 1, ncols do
if header_dup[k] then header_dup_count = header_dup_count + 1 end
end
local header_stand = (header_dup_count < ncols/2)
local head_all_string = is_string_only(head_vs)
local has_header = head_all_string and (header_stand or (not all_strings))
if has_header then nrows = nrows - 1 end
return nrows, ncols, has_header
end
local function is_not_empty(s)
return (match(s, '^%s*$') == nil)
end
local function gdt_parse(source_init)
local source = source_init()
local nrows, ncols, has_header = pre_parse_csv(source)
local t = gdt.alloc(nrows, ncols)
source = source_init()
if has_header then
local vs = source()
for k, s in ipairs(vs) do
t:set_header(k, s)
end
end
local i = 1
for vs in source do
if #vs == 0 then break end
for j = 1, ncols do
local v = (vs[j] and is_not_empty(vs[j]) and vs[j] or nil)
gdt.set(t, i, j, v)
end
i = i + 1
end
return t
end
local function trim_spaces(line)
for j = 1, #line do
if type(line[j]) == 'string' then
local a = gsub(line[j], "^%s+", "")
line[j] = gsub(a, "%s+$", "")
end
end
end
local function source_csv(filename, options)
local strip_spaces = true
if options and (options.strip_spaces ~= nil) then
strip_spaces = options.strip_spaces
end
local f
local it, s, i
local source = function()
local line = it(s, i)
if line then
local ls = csv.line(line)
if strip_spaces then trim_spaces(ls) end
return ls
else
f:close()
end
end
return function()
f = assert(io.open(filename, 'r'), 'cannot open file: ' .. filename)
it, s, i = f:lines()
return source
end
end
local function source_def(def)
local n, i = #def, 0
local source = function()
if i + 1 <= n then
i = i + 1
return def[i]
end
end
return function() i = 0; return source end
end
local function csv_format(x)
if type(x) == 'number' then
return x
elseif type(x) == 'string' then
if match(x, "^%a[%w_]+$") then
return x
else
local cs = {}
for i = 1, #x do
local c = x:sub(i, i)
if c == '"' then
cs[#cs+1] = '"'
cs[#cs+1] = '"'
else
cs[#cs+1] = c
end
end
return string.format("\"%s\"", table.concat(cs, ""))
end
else
return ""
end
end
local function write_csv_row(f, row, nc)
local rf = {}
for i = 1, nc do
rf[i] = csv_format(row[i])
end
f:write(string.format("%s\n", table.concat(rf, ",")))
end
function gdt.write_csv(t, filename)
local f = assert(io.open(filename, "w"))
local hs = t:headers()
local nc = #hs
write_csv_row(f, hs, nc)
for i, r in t:rows() do
local s = {}
for j, k in ipairs(hs) do s[j] = r[k] end
write_csv_row(f, s, nc)
end
f:close()
end
gdt.read_csv = function(filename, options) return gdt_parse(source_csv(filename, options)) end
gdt.def = function(def) return gdt_parse(source_def(def)) end