Skip to content

Commit

Permalink
Introducing CSV parser configurations that could be done using out-of…
Browse files Browse the repository at this point in the history
…-the-box configurations
  • Loading branch information
danielfireman committed Nov 24, 2017
1 parent df55ce6 commit de3a51f
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 8 deletions.
8 changes: 4 additions & 4 deletions csv/iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const (
func TestNewIterator(t *testing.T) {
t.Run("EmptyString", func(t *testing.T) {
is := is.New(t)
iter := newIterator(stringReadCloser(""), dontSkipHeaders)
iter := newIterator(stringReadCloser(""), defaultDialect, dontSkipHeaders)
is.True(!iter.Next()) // more iterations than it should
is.NoErr(iter.Err())
})
Expand All @@ -27,15 +27,15 @@ func TestNewIterator(t *testing.T) {
func TestIterator_Next(t *testing.T) {
t.Run("TwoRows", func(t *testing.T) {
is := is.New(t)
iter := newIterator(stringReadCloser("foo\nbar"), dontSkipHeaders)
iter := newIterator(stringReadCloser("foo\nbar"), defaultDialect, dontSkipHeaders)
is.True(iter.Next()) // want two more iterations
is.True(iter.Next()) // want one more interation
is.True(!iter.Next()) // more iterations than it should
is.NoErr(iter.Err())
})
t.Run("TwoRowsSkipHeaders", func(t *testing.T) {
is := is.New(t)
iter := newIterator(stringReadCloser("name\nbar"), skipHeaders)
iter := newIterator(stringReadCloser("name\nbar"), defaultDialect, skipHeaders)
is.True(iter.Next()) // want one interation
is.True(!iter.Next()) // more iterations than it should
is.NoErr(iter.Err())
Expand All @@ -45,7 +45,7 @@ func TestIterator_Next(t *testing.T) {
func TestIterator_Row(t *testing.T) {
t.Run("OneRow", func(t *testing.T) {
is := is.New(t)
iter := newIterator(stringReadCloser("name"), dontSkipHeaders)
iter := newIterator(stringReadCloser("name"), defaultDialect, dontSkipHeaders)
is.True(iter.Next()) // want one iteration

got := iter.Row()
Expand Down
45 changes: 41 additions & 4 deletions csv/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,29 @@ type Table struct {
headers []string
source Source
skipHeaders bool
dialect dialect
}

// dialect represents CSV dialect configuration options.
// http://frictionlessdata.io/specs/csv-dialect/
type dialect struct {
// Delimiter specifies the character sequence which should separate fields (aka columns).
delimiter rune
// Specifies how to interpret whitespace which immediately follows a delimiter;
// if false, it means that whitespace immediately after a delimiter should be treated as part of the following field.
skipInitialSpace bool
}

var defaultDialect = dialect{
delimiter: ',',
skipInitialSpace: true,
}

// NewTable creates a table.Table from the CSV table physical representation.
// CreationOpts are executed in the order they are declared.
// If a dialect is not configured via SetDialect, DefautltDialect is used.
func NewTable(source Source, opts ...CreationOpts) (*Table, error) {
t := Table{source: source}
t := Table{source: source, dialect: defaultDialect}
for _, opt := range opts {
if err := opt(&t); err != nil {
return nil, err
Expand All @@ -43,7 +60,7 @@ func (table *Table) Iter() (table.Iterator, error) {
if err != nil {
return nil, err
}
return newIterator(src, table.skipHeaders), nil
return newIterator(src, table.dialect, table.skipHeaders), nil
}

// ReadAll reads all rows from the table and return it as strings.
Expand Down Expand Up @@ -77,10 +94,13 @@ func (table *Table) String() string {
return buf.String()
}

func newIterator(source io.ReadCloser, skipHeaders bool) *csvIterator {
func newIterator(source io.ReadCloser, dialect dialect, skipHeaders bool) *csvIterator {
r := csv.NewReader(source)
r.Comma = dialect.delimiter
r.TrimLeadingSpace = dialect.skipInitialSpace
return &csvIterator{
source: source,
reader: csv.NewReader(source),
reader: r,
skipHeaders: skipHeaders,
}
}
Expand Down Expand Up @@ -185,6 +205,7 @@ func errorSource() Source {
// The header line will be skipped during iteration
func LoadHeaders() CreationOpts {
return func(reader *Table) error {
reader.skipHeaders = false
iter, err := reader.Iter()
if err != nil {
return err
Expand All @@ -205,6 +226,22 @@ func SetHeaders(headers ...string) CreationOpts {
}
}

// Delimiter specifies the character sequence which should separate fields (aka columns).
func Delimiter(d rune) CreationOpts {
return func(t *Table) error {
t.dialect.delimiter = d
return nil
}
}

// ConsiderInitialSpace configures the CSV parser to treat the whitespace immediately after a delimiter as part of the following field.
func ConsiderInitialSpace() CreationOpts {
return func(t *Table) error {
t.dialect.skipInitialSpace = false
return nil
}
}

func errorOpts(headers ...string) CreationOpts {
return func(_ *Table) error {
return fmt.Errorf("error opts")
Expand Down
20 changes: 20 additions & 0 deletions csv/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,23 @@ func TestSetHeaders(t *testing.T) {
is.Equal(want, iter.Row())
is.True(!iter.Next())
}

func TestDelimiter(t *testing.T) {
is := is.New(t)
in := "Foo;Bar"
table, err := NewTable(FromString(in), Delimiter(';'))
is.NoErr(err)
contents, err := table.ReadAll()
is.NoErr(err)
is.Equal(contents, [][]string{{"Foo", "Bar"}})
}

func TestConsiderInitialSpace(t *testing.T) {
is := is.New(t)
in := " Foo"
table, err := NewTable(FromString(in), ConsiderInitialSpace())
is.NoErr(err)
contents, err := table.ReadAll()
is.NoErr(err)
is.Equal(contents, [][]string{{" Foo"}})
}

0 comments on commit de3a51f

Please sign in to comment.