-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcsv.go
157 lines (126 loc) · 2.51 KB
/
csv.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
package difftable
import (
"bytes"
"encoding/csv"
"fmt"
"io"
)
var bom = []byte{0xef, 0xbb, 0xbf}
// uniReader wraps an io.Reader to replace carriage returns with newlines.
// This is used with the csv.Reader so it can properly delimit lines.
type uniReader struct {
r io.Reader
}
func (r *uniReader) Read(buf []byte) (int, error) {
n, err := r.r.Read(buf)
// Detect and remove BOM.
if bytes.HasPrefix(buf, bom) {
copy(buf, buf[len(bom):])
n -= len(bom)
}
// Replace carriage returns with newlines
for i, b := range buf {
if b == '\r' {
buf[i] = '\n'
}
}
return n, err
}
func (r *uniReader) Close() error {
if rc, ok := r.r.(io.Closer); ok {
return rc.Close()
}
return nil
}
func NewCSVReader(r io.Reader, d rune) *csv.Reader {
cr := csv.NewReader(&uniReader{r})
cr.Comma = d
cr.LazyQuotes = true
cr.TrimLeadingSpace = true
cr.ReuseRecord = true
return cr
}
func CSVTable(cr *csv.Reader, key []string, renames map[string]string) (Table, error) {
cols, err := cr.Read()
if err != nil {
return nil, err
}
for i, k := range key {
if n, ok := renames[k]; ok {
key[i] = n
}
}
// Create map of column name to index in the array.
colIdxs := make(map[string]int, len(cols))
colTypes := make(map[string]string, len(cols))
for i, c := range cols {
if n, ok := renames[c]; ok {
c = n
}
colIdxs[c] = i
colTypes[c] = "string"
}
return &csvTable{
rows: cr,
key: key,
colLen: len(cols),
colIdxs: colIdxs,
colTypes: colTypes,
}, nil
}
type csvTable struct {
rows *csv.Reader
key []string
colLen int
colIdxs map[string]int
colTypes map[string]string
row []string
}
func (t *csvTable) Key() []string {
return t.key
}
func (t *csvTable) Cols() map[string]string {
return t.colTypes
}
func (t *csvTable) Row() Row {
return &csvRow{
colIdxs: t.colIdxs,
row: t.row,
}
}
func (t *csvTable) Next() (bool, error) {
t.row = nil
row, err := t.rows.Read()
if err != nil {
// Done.
if err == io.EOF {
return false, nil
}
return false, err
}
if len(row) != t.colLen {
return false, fmt.Errorf("expected %d columns, got %d", t.colLen, len(row))
}
t.row = row
return true, nil
}
type csvRow struct {
// Unused by unsorted CSV table.
key string
colIdxs map[string]int
row []string
}
func (r *csvRow) Bytes(col string) []byte {
i, ok := r.colIdxs[col]
if !ok {
return nil
}
return []byte(r.row[i])
}
func (r *csvRow) Value(col string) interface{} {
i, ok := r.colIdxs[col]
if !ok {
return nil
}
return r.row[i]
}