Skip to content

Commit

Permalink
cleaned up range parsing, added random row selection support to csvrows
Browse files Browse the repository at this point in the history
  • Loading branch information
rsdoiel committed Dec 12, 2017
1 parent 1f9bf58 commit c316b43
Show file tree
Hide file tree
Showing 7 changed files with 408 additions and 123 deletions.
7 changes: 1 addition & 6 deletions cmds/csvcols/csvcols.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ import (
"github.com/google/uuid"
)

const (
//FIXME: maxColumns needs to be calculated from the data rather than being a constant
maxColumns = 2048
)

var (
description = `
%s converts a set of command line args into columns output in CSV format.
Expand Down Expand Up @@ -207,7 +202,7 @@ func main() {
}

if outputColumns != "" {
columnNos, err := datatools.ParseRange(outputColumns, maxColumns)
columnNos, err := datatools.ParseRange(outputColumns)
cli.ExitOnError(app.Eout, err, quiet)

// NOTE: We need to adjust from humans counting from 1 to counting from zero
Expand Down
34 changes: 19 additions & 15 deletions cmds/csvrows/csvrows.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ func main() {

// Application specific options
app.StringVar(&delimiter, "d,delimiter", "", "set delimiter character")
app.StringVar(&outputRows, "row,rows", "", "output specified rows in order (e.g. -row 1,5,2:4))")
app.BoolVar(&skipHeaderRow, "skip-header-row", false, "skip the header row (alias for -row 2:")
app.StringVar(&outputRows, "row,rows", "", "output specified rows in order (e.g. -row 1,5,2-4))")
app.BoolVar(&skipHeaderRow, "skip-header-row", false, "skip the header row (alias for -row 2-")
app.BoolVar(&showHeader, "header", false, "display the header row (alias for '-rows 1')")
app.IntVar(&randomRows, "random", 0, "return N randomly selected rows")

Expand Down Expand Up @@ -161,22 +161,15 @@ func main() {
}

if randomRows > 0 {
datatools.CSVRandomRows(app.In, app.Out, app.Eout, showHeader, randomRows, delimiter)
os.Exit(0)
}

if showHeader == true {
outputRows = "1"
}
if len(args) == 0 && outputRows == "" {
outputRows = "1:"
if skipHeaderRow == true {
outputRows = "2:"
if err := datatools.CSVRandomRows(app.In, app.Out, showHeader, randomRows, delimiter); err != nil {
fmt.Fprintf(app.Eout, "%s, %s\n", inputFName, err)
os.Exit(1)
}
os.Exit(0)
}

if outputRows != "" {
rowNos, err := datatools.ParseRange(outputRows, maxRows)
rowNos, err := datatools.ParseRange(outputRows)
cli.ExitOnError(app.Eout, err, quiet)

// NOTE: We need to adjust from humans counting from 1 to counting from zero
Expand All @@ -186,10 +179,21 @@ func main() {
rowNos[i] = 0
}
}
datatools.CSVRows(app.In, app.Out, app.Eout, rowNos, delimiter)
if err := datatools.CSVRows(app.In, app.Out, showHeader, rowNos, delimiter); err != nil {
fmt.Fprintf(app.Eout, "%s, %s\n", inputFName, err)
os.Exit(1)
}
os.Exit(0)
}
if inputFName != "" {
if err := datatools.CSVRowsAll(app.In, app.Out, showHeader, delimiter); err != nil {
fmt.Fprintf(app.Eout, "%s, %s\n", inputFName, err)
os.Exit(1)
}
os.Exit(0)
}

// NOTE: If we're not processing an existing CSV source for input we're turning parameters into CSV rows!
if len(delimiter) > 0 && len(args) == 1 {
args = strings.Split(args[0], datatools.NormalizeDelimiter(delimiter))
}
Expand Down
186 changes: 186 additions & 0 deletions csv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
//
// datatools.go is a package for working with various types of data (e.g. CSV, XLSX, JSON) in support
// of the utilities included in the datatools.go package.
//
// Copyright (c) 2017, Caltech
// All rights not granted herein are expressly reserved by Caltech.
//
// Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
package datatools

import (
"encoding/csv"
"fmt"
"io"
"math/rand"
"time"
)

func selectedRow(rowNo int, record []string, rowNos []int) []string {
if len(rowNos) == 0 {
return record
}
for _, i := range rowNos {
if i == rowNo {
return record
}
}
return nil
}

func shuffleRows(rows [][]string, src rand.Source) {
// Create our random number source
rn := rand.New(src)
for a := len(rows) - 1; a > 0; a-- {
// Pick a random element to swap with
b := rn.Intn(a + 1)
// Swap with a random element
rows[a], rows[b] = rows[b], rows[a]
}
}

// CSVRandomRows reads a in, creates a csv Reader and Writer and randomly selectes the rowCount
// number of rows to write out. If showHeader is true it is excluded from the random row selection
// and will be written to out before the randomized rows.
// rowCount is the number of rows to return independent of the header row.
func CSVRandomRows(in io.Reader, out io.Writer, showHeader bool, rowCount int, delimiter string) error {
var err error

headerRow := []string{}
rows := [][]string{}
r := csv.NewReader(in)
w := csv.NewWriter(out)
if delimiter != "" {
r.Comma = NormalizeDelimiterRune(delimiter)
w.Comma = NormalizeDelimiterRune(delimiter)
}

// read in our rows.
for i := 0; err != io.EOF; i++ {
rec, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("%s (%T %+v)", err, rec, rec)
}
if i == 0 && showHeader {
headerRow = rec
} else {
rows = append(rows, rec)
}
}
if showHeader && len(headerRow) > 0 {
if err := w.Write(headerRow); err != nil {
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, headerRow, headerRow)
}
}

// Shuffle the rows, then write out the desired number of rows.
rSrc := rand.NewSource(time.Now().UnixNano())
shuffleRows(rows, rSrc)

// Now render the rowCount of the suffled rows
if rowCount > len(rows) {
rowCount = len(rows)
}
for i := 0; i < rowCount; i++ {
row := rows[i]
if row != nil {
if err := w.Write(row); err != nil {
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, row, row)
}
}
}
w.Flush()
err = w.Error()
if err != nil {
return fmt.Errorf("%s\n", err)
}
return nil
}

// CSVRows renders the rows numbers in rowNos using the delimiter to out
func CSVRows(in io.Reader, out io.Writer, showHeader bool, rowNos []int, delimiter string) error {
var err error

r := csv.NewReader(in)
w := csv.NewWriter(out)
if delimiter != "" {
r.Comma = NormalizeDelimiterRune(delimiter)
w.Comma = NormalizeDelimiterRune(delimiter)
}
for i := 0; err != io.EOF; i++ {
rec, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("%s (%T %+v)", err, rec, rec)
}
if i == 0 && showHeader {
if err = w.Write(rec); err != nil {
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, rec, rec)
}
} else {
row := selectedRow(i, rec, rowNos)
if row != nil {
if err = w.Write(row); err != nil {
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, row, row)
}
}
}
}
w.Flush()
err = w.Error()
if err != nil {
return fmt.Errorf("%s\n", err)
}
return nil
}

// CSVRowsAll renders the all rows in rowNos using the delimiter to out
func CSVRowsAll(in io.Reader, out io.Writer, showHeader bool, delimiter string) error {
var err error

r := csv.NewReader(in)
w := csv.NewWriter(out)
if delimiter != "" {
r.Comma = NormalizeDelimiterRune(delimiter)
w.Comma = NormalizeDelimiterRune(delimiter)
}
for i := 0; err != io.EOF; i++ {
row, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("%s (%T %+v)", err, row, row)
}
if i == 0 && showHeader {
if err = w.Write(row); err != nil {
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, row, row)
}
continue
} else if i > 0 {
if err = w.Write(row); err != nil {
return fmt.Errorf("Error writing record to csv: %s (Row %T %+v)", err, row, row)
}
}
}
w.Flush()
err = w.Error()
if err != nil {
return fmt.Errorf("%s\n", err)
}
return nil
}
42 changes: 0 additions & 42 deletions datatools.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"encoding/json"
"fmt"
"io"
"strconv"
"strings"
"unicode"

Expand Down Expand Up @@ -209,47 +208,6 @@ func Levenshtein(src string, target string, insertCost int, deleteCost int, subs
})
}

// ParseRange take a range notation string and convert it into a list of integers
func ParseRange(s string, max int) ([]int, error) {
var err error

nums := []int{}
for _, arg := range strings.Split(s, ",") {
if strings.Contains(arg, ":") {
a, b := 0, max
parts := strings.Split(arg, ":")
if parts[0] != "" {
a, err = strconv.Atoi(strings.TrimSpace(parts[0]))
if err != nil {
return nums, fmt.Errorf("Expected a number for start of range, %q, %s\n", arg, err)
}
}
if parts[1] != "" {
b, err = strconv.Atoi(strings.TrimSpace(parts[1]))
if err != nil {
return nums, fmt.Errorf("Expected a number for end of range, %q, %s\n", arg, err)
}
}
if a <= b {
for i := a; i <= b; i++ {
nums = append(nums, i)
}
} else if a > b {
return nums, fmt.Errorf("%d less than %d, invalid range", b, a)
} else {
nums = append(nums, a)
}
} else {
i, err := strconv.Atoi(strings.TrimSpace(arg))
if err != nil {
return nums, err
}
nums = append(nums, i)
}
}
return nums, nil
}

// EnglishTitle - uses an improve capitalization rules for English titles.
// This is based on the approach suggested in the Go language Cookbook:
// http://golangcookbook.com/chapters/strings/title/
Expand Down
60 changes: 0 additions & 60 deletions datatools_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,63 +107,3 @@ func TestFilter(t *testing.T) {
t.Errorf("expected %t, got %t", expected, result)
}
}

func TestParseRange(t *testing.T) {
src := `1`
expected := []int{1}
result, err := ParseRange(src, 10000)
if err != nil {
t.Errorf("ParseRange failed, %s", err)
t.FailNow()
}
for i, val := range expected {
if i >= len(result) {
t.Errorf("item %d: expected %d, missing element in result d", i, val)
} else {
if result[i] != val {
t.Errorf("item %d: expected %d, got %d", i, val, result[i])
}
}
}

src = `1:3`
expected = []int{1, 2, 3}
result, err = ParseRange(src, 10000)
if err != nil {
t.Errorf("ParseRange failed, %s", err)
t.FailNow()
}
for i, val := range expected {
if i >= len(result) {
t.Errorf("item %d: expected %d, missing element in result d", i, val)
} else {
if result[i] != val {
t.Errorf("item %d: expected %d, got %d", i, val, result[i])
}
}
}

src = `1,4:6,10`
expected = []int{
1,
4,
5,
6,
10,
}

result, err = ParseRange(src, 10000)
if err != nil {
t.Errorf("ParseRange failed, %s", err)
t.FailNow()
}
for i, val := range expected {
if i >= len(result) {
t.Errorf("item %d: expected %d, missing element in result d", i, val)
} else {
if result[i] != val {
t.Errorf("item %d: expected %d, got %d", i, val, result[i])
}
}
}
}
Loading

0 comments on commit c316b43

Please sign in to comment.