diff --git a/go.mod b/go.mod index c69418a3dbd..468b2e4aef5 100644 --- a/go.mod +++ b/go.mod @@ -99,7 +99,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/receiver/opencensusreceiver v0.102.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/zipkinreceiver v0.102.0 github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe - github.com/stoewer/parquet-cli v0.0.7 + github.com/stoewer/parquet-cli v0.0.9 go.opentelemetry.io/collector/config/configgrpc v0.102.1 go.opentelemetry.io/collector/config/confighttp v0.102.1 go.opentelemetry.io/collector/config/configtls v1.18.0 diff --git a/go.sum b/go.sum index b6602d7172c..cd3a17ef782 100644 --- a/go.sum +++ b/go.sum @@ -857,8 +857,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.18.2 h1:LUXCnvUvSM6FXAsj6nnfc8Q2tp1dIgUfY9Kc8GsSOiQ= github.com/spf13/viper v1.18.2/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk= -github.com/stoewer/parquet-cli v0.0.7 h1:rhdZODIbyMS3twr4OM3am8BPPT5pbfMcHLH93whDM5o= -github.com/stoewer/parquet-cli v0.0.7/go.mod h1:bskxHdj8q3H1EmfuCqjViFoeO3NEvs5lzZAQvI8Nfjk= +github.com/stoewer/parquet-cli v0.0.9 h1:qFjncPnEnzwPJxnADcwvdiUzWwMch7PRWloaBNeBDE0= +github.com/stoewer/parquet-cli v0.0.9/go.mod h1:bskxHdj8q3H1EmfuCqjViFoeO3NEvs5lzZAQvI8Nfjk= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= diff --git a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/aggregate.go b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/aggregate.go index 60fbb767252..591ed36ff1a 100644 --- a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/aggregate.go +++ b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/aggregate.go @@ -26,7 +26,7 @@ type Aggregate struct { Stats []AggregateCellStats `json:"stats"` } -func (rs *Aggregate) Data() any { +func (rs *Aggregate) SerializableData() any { return rs } diff --git a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/col_stats.go b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/col_stats.go index 1b464a78b7b..ea8fe2cb41c 100644 --- a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/col_stats.go +++ b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/col_stats.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "io" + "strings" "github.com/parquet-go/parquet-go" "github.com/stoewer/parquet-cli/pkg/output" @@ -11,6 +12,19 @@ import ( var ( columnStatHeader = [...]any{ + "Index", + "Name", + "Max Def", + "Max Rep", + "Size", + "Compressed size", + "Pages", + "Rows", + "Values", + "Nulls", + "Path", + } + columnStatHeaderFull = [...]any{ "Index", "Name", "Max Def", @@ -27,59 +41,73 @@ var ( "Nulls", "Page min nulls", "Page max nulls", + "Path", } ) type ColumnStats struct { Index int `json:"index"` Name string `json:"name"` - MaxDef int `json:"maxDef"` - MaxRep int `json:"maxRep"` + MaxDef int `json:"max_def"` + MaxRep int `json:"max_rep"` Size int64 `json:"size"` - CompressedSize int64 `json:"compressedSize"` + CompressedSize int64 `json:"compressed_size"` Pages int `json:"pages"` Rows int64 `json:"rows"` - PageMinRows int64 `json:"pageMinRows"` - PageMaxRows int64 `json:"pageMaxRows"` Values int64 `json:"values"` - PageMinValues int64 `json:"pageMinValues"` - PageMaxValues int64 `json:"pageMaxValues"` Nulls int64 `json:"nulls"` - PageMinNulls int64 `json:"pageMinNulls"` - PageMaxNulls int64 `json:"pageMaxNulls"` + Path string `json:"path"` +} - cells []any +func (rs *ColumnStats) Cells() []any { + return []any{ + rs.Index, + rs.Name, + rs.MaxDef, + rs.MaxRep, + rs.Size, + rs.CompressedSize, + rs.Pages, + rs.Rows, + rs.Values, + rs.Nulls, + rs.Path, + } } -func (rs *ColumnStats) Data() any { - return rs +type ColumnStatsFull struct { + ColumnStats + PageMinRows int64 `json:"page_min_rows"` + PageMaxRows int64 `json:"page_max_rows"` + PageMinValues int64 `json:"page_min_values"` + PageMaxValues int64 `json:"page_max_values"` + PageMinNulls int64 `json:"page_min_nulls"` + PageMaxNulls int64 `json:"page_max_nulls"` } -func (rs *ColumnStats) Cells() []any { - if rs.cells == nil { - rs.cells = []any{ - rs.Index, - rs.Name, - rs.MaxDef, - rs.MaxRep, - rs.Size, - rs.CompressedSize, - rs.Pages, - rs.Rows, - rs.PageMinRows, - rs.PageMaxRows, - rs.Values, - rs.PageMinValues, - rs.PageMaxValues, - rs.Nulls, - rs.PageMinNulls, - rs.PageMaxNulls, - } +func (rs *ColumnStatsFull) Cells() []any { + return []any{ + rs.Index, + rs.Name, + rs.MaxDef, + rs.MaxRep, + rs.Size, + rs.CompressedSize, + rs.Pages, + rs.Rows, + rs.PageMinRows, + rs.PageMaxRows, + rs.Values, + rs.PageMinValues, + rs.PageMaxValues, + rs.Nulls, + rs.PageMinNulls, + rs.PageMaxNulls, + rs.Path, } - return rs.cells } -func NewColStatCalculator(file *parquet.File, selectedCols []int) (*ColStatCalculator, error) { +func NewColStatCalculator(file *parquet.File, selectedCols []int, verbose bool) (*ColStatCalculator, error) { all := LeafColumns(file) var columns []*parquet.Column @@ -95,16 +123,20 @@ func NewColStatCalculator(file *parquet.File, selectedCols []int) (*ColStatCalcu } } - return &ColStatCalculator{file: file, columns: columns}, nil + return &ColStatCalculator{file: file, columns: columns, verbose: verbose}, nil } type ColStatCalculator struct { file *parquet.File + verbose bool columns []*parquet.Column current int } func (cc *ColStatCalculator) Header() []any { + if cc.verbose { + return columnStatHeaderFull[:] + } return columnStatHeader[:] } @@ -115,11 +147,13 @@ func (cc *ColStatCalculator) NextRow() (output.TableRow, error) { col := cc.columns[cc.current] cc.current++ - stats := ColumnStats{ - Index: col.Index(), - Name: col.Name(), - MaxDef: col.MaxDefinitionLevel(), - MaxRep: col.MaxRepetitionLevel(), + stats := ColumnStatsFull{ + ColumnStats: ColumnStats{ + Index: col.Index(), + Name: PathToDisplayName(col.Path()), + MaxDef: col.MaxDefinitionLevel(), + MaxRep: col.MaxRepetitionLevel(), + }, } for _, rg := range cc.file.RowGroups() { @@ -135,26 +169,40 @@ func (cc *ColStatCalculator) NextRow() (output.TableRow, error) { } } + path := strings.Join(col.Path(), ".") + pages := chunk.Pages() page, err := pages.ReadPage() for err == nil { stats.Pages++ stats.Size += page.Size() stats.Rows += page.NumRows() - stats.PageMinRows = min(stats.PageMinRows, page.NumRows()) - stats.PageMaxRows = max(stats.PageMaxRows, page.NumRows()) stats.Values += page.NumValues() - stats.PageMinValues = min(stats.PageMinValues, page.NumRows()) - stats.PageMaxValues = max(stats.PageMaxValues, page.NumRows()) stats.Nulls += page.NumNulls() + stats.PageMinNulls = min(stats.PageMinNulls, page.NumNulls()) stats.PageMaxNulls = max(stats.PageMaxNulls, page.NumNulls()) + stats.PageMinValues = min(stats.PageMinValues, page.NumRows()) + stats.PageMaxValues = max(stats.PageMaxValues, page.NumRows()) + stats.PageMinRows = min(stats.PageMinRows, page.NumRows()) + stats.PageMaxRows = max(stats.PageMaxRows, page.NumRows()) + + stats.Path = path + page, err = pages.ReadPage() } + if !errors.Is(err, io.EOF) { return nil, fmt.Errorf("unable to read page rom column '%s': %w", col.Name(), err) } } - return &stats, nil + if cc.verbose { + return &stats, nil + } + return &stats.ColumnStats, nil +} + +func (cc *ColStatCalculator) NextSerializable() (any, error) { + return cc.NextRow() } diff --git a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/file_info.go b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/file_info.go index ab884aa9bf0..a2b268a239f 100644 --- a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/file_info.go +++ b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/file_info.go @@ -68,7 +68,7 @@ func (i *FileInfo) Add(k string, v any) { i.keys = append(i.keys, k) } -func (i *FileInfo) Data() any { +func (i *FileInfo) SerializableData() any { return i.elem } diff --git a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/inspect.go b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/inspect.go index c9d2f2a4d8a..af7efe7ea49 100644 --- a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/inspect.go +++ b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/inspect.go @@ -29,3 +29,15 @@ func LeafColumns(file *parquet.File) []*parquet.Column { sort.SliceStable(leafs, func(i, j int) bool { return leafs[i].Index() < leafs[j].Index() }) return leafs } + +func PathToDisplayName(path []string) string { + l := len(path) + if l > 3 { + if path[l-2] == "list" && path[l-1] == "element" { + return path[l-3] + } else if path[l-2] == "key_value" && (path[l-1] == "key" || path[l-1] == "value") { + return path[l-3] + "." + path[l-1] + } + } + return path[l-1] +} diff --git a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/row_dump.go b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/row_dump.go index 895f34646e4..cb1e4df6a4a 100644 --- a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/row_dump.go +++ b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/row_dump.go @@ -12,10 +12,6 @@ type DumpLine struct { Line []*parquet.Value } -func (d *DumpLine) Data() any { - return d.Line -} - func (d *DumpLine) Cells() []any { cells := make([]any, 0, len(d.Line)+1) if d.RowNumber == nil { @@ -27,12 +23,10 @@ func (d *DumpLine) Cells() []any { for _, v := range d.Line { if v == nil { cells = append(cells, "") + } else if v.IsNull() { + cells = append(cells, "null") } else { - if v.IsNull() { - cells = append(cells, fmt.Sprintf("%v %d:%d", v, v.DefinitionLevel(), v.RepetitionLevel())) - } else { - cells = append(cells, fmt.Sprintf("'%v' %d:%d", v, v.DefinitionLevel(), v.RepetitionLevel())) - } + cells = append(cells, v.String()) } } return cells @@ -74,7 +68,7 @@ func NewRowDump(file *parquet.File, options RowDumpOptions) (*RowDump, error) { return nil, fmt.Errorf("unable to create row stats calculator: %w", err) } c.columnIter = append(c.columnIter, it) - c.header = append(c.header, col.Name()+" d:r") + c.header = append(c.header, col.Name()) } return &c, nil diff --git a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/row_stats.go b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/row_stats.go index 81b25f525f0..22534cc28bf 100644 --- a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/row_stats.go +++ b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/row_stats.go @@ -23,7 +23,7 @@ type RowStats struct { Stats []RowCellStats } -func (rs *RowStats) Data() any { +func (rs *RowStats) SerializableData() any { return rs.Stats } diff --git a/vendor/github.com/stoewer/parquet-cli/pkg/inspect/schema.go b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/schema.go new file mode 100644 index 00000000000..bfed052ac4d --- /dev/null +++ b/vendor/github.com/stoewer/parquet-cli/pkg/inspect/schema.go @@ -0,0 +1,168 @@ +package inspect + +import ( + "fmt" + "io" + "strings" + + "github.com/stoewer/parquet-cli/pkg/output" + + "github.com/parquet-go/parquet-go" +) + +var schemaHeader = [...]any{ + "Index", + "Name", + "Optional", + "Repeated", + "Required", + "Is Leaf", + "Type", + "Go Type", + "Encoding", + "Compression", + "Path", +} + +type Schema struct { + pf *parquet.File + + fields []fieldWithPath + next int +} + +func NewSchema(pf *parquet.File) *Schema { + return &Schema{pf: pf} +} + +func (s *Schema) Text() (string, error) { + textRaw := s.pf.Schema().String() + + var text strings.Builder + for _, r := range textRaw { + if r == '\t' { + text.WriteString(" ") + } else { + text.WriteRune(r) + } + } + + return text.String(), nil +} + +func (s *Schema) Header() []any { + return schemaHeader[:] +} + +func (s *Schema) NextRow() (output.TableRow, error) { + if s.fields == nil { + s.fields = fieldsFromSchema(s.pf.Schema()) + } + if s.next >= len(s.fields) { + return nil, fmt.Errorf("no more fields: %w", io.EOF) + } + + nextField := s.fields[s.next] + s.next++ + return toSchemaNode(&nextField), nil +} + +func (s *Schema) NextSerializable() (any, error) { + return s.NextRow() +} + +func toSchemaNode(n *fieldWithPath) *schemaNode { + sn := &schemaNode{ + Index: n.Index, + Name: n.Name(), + Optional: n.Optional(), + Repeated: n.Repeated(), + Required: n.Required(), + IsLeaf: n.Leaf(), + } + + if n.Leaf() { + sn.Type = n.Type().String() + sn.GoType = n.GoType().String() + if n.Encoding() != nil { + sn.Encoding = n.Encoding().String() + } + if n.Compression() != nil { + sn.Compression = n.Compression().String() + } + } + + if len(n.Path) > 0 { + sn.Path = strings.Join(n.Path, ".") + sn.Name = PathToDisplayName(n.Path) + } + + return sn +} + +type schemaNode struct { + Index int `json:"index,omitempty"` + Name string `json:"name"` + Optional bool `json:"optional"` + Repeated bool `json:"repeated"` + Required bool `json:"required"` + IsLeaf bool `json:"is_leaf"` + Type string `json:"type,omitempty"` + GoType string `json:"go_type,omitempty"` + Encoding string `json:"encoding,omitempty"` + Compression string `json:"compression,omitempty"` + Path string `json:"path,omitempty"` +} + +func (sn *schemaNode) Cells() []any { + return []any{ + sn.Index, + sn.Name, + sn.Optional, + sn.Repeated, + sn.Required, + sn.IsLeaf, + sn.Type, + sn.GoType, + sn.Encoding, + sn.Compression, + sn.Path, + } +} + +type fieldWithPath struct { + parquet.Field + Path []string + Index int +} + +func fieldsFromSchema(schema *parquet.Schema) []fieldWithPath { + result := make([]fieldWithPath, 0) + + for _, field := range schema.Fields() { + result = fieldsFromPathRecursive(field, []string{}, result) + } + + var idx int + for i := range result { + if result[i].Leaf() { + result[i].Index = idx + idx++ + } + } + + return result +} + +func fieldsFromPathRecursive(field parquet.Field, path []string, result []fieldWithPath) []fieldWithPath { + cpy := path[:len(path):len(path)] + path = append(cpy, field.Name()) + + result = append(result, fieldWithPath{Field: field, Path: path}) + + for _, child := range field.Fields() { + result = fieldsFromPathRecursive(child, path, result) + } + + return result +} diff --git a/vendor/github.com/stoewer/parquet-cli/pkg/output/format.go b/vendor/github.com/stoewer/parquet-cli/pkg/output/format.go new file mode 100644 index 00000000000..feef17ebac1 --- /dev/null +++ b/vendor/github.com/stoewer/parquet-cli/pkg/output/format.go @@ -0,0 +1,56 @@ +package output + +import ( + "errors" + "fmt" +) + +// Format describes a printable data representation. +type Format string + +const ( + FormatJSON = "json" + FormatCSV = "csv" + FormatTab = "tab" + FormatText = "text" +) + +func (f *Format) Validate() error { + switch *f { + case FormatJSON, FormatTab, FormatCSV, FormatText: + return nil + default: + return errors.New("output format is expected to be 'json', 'tab', 'text' or 'csv'") + } +} + +func supportedFormats(data any) []Format { + var formats []Format + switch data.(type) { + case Serializable, SerializableIterator: + formats = append(formats, FormatJSON) + case Table, TableIterator: + formats = append(formats, FormatTab, FormatCSV) + case Text: + formats = append(formats, FormatText) + } + return formats +} + +func errUnsupportedFormat(data any, f Format) error { + supported := supportedFormats(data) + + var supportedPretty string + for i, format := range supportedFormats(data) { + if i > 0 { + if i == len(supported)-1 { + supportedPretty += " or " + } else { + supportedPretty += ", " + } + } + supportedPretty += "'" + string(format) + "'" + } + + return fmt.Errorf("format '%s' is not supported must be %s", f, supportedPretty) +} diff --git a/vendor/github.com/stoewer/parquet-cli/pkg/output/interfaces.go b/vendor/github.com/stoewer/parquet-cli/pkg/output/interfaces.go new file mode 100644 index 00000000000..fd6b7867378 --- /dev/null +++ b/vendor/github.com/stoewer/parquet-cli/pkg/output/interfaces.go @@ -0,0 +1,42 @@ +package output + +// A Table represents a tabular data that can also be printed as CSV. +// Suitable for small tables that fit into memory. +type Table interface { + Header() []string + Rows() []TableRow +} + +// A TableIterator that can efficiently be printed as large table or CSV. +// Suitable for larger tables that do not fit into memory. +type TableIterator interface { + // Header returns the header of the table + Header() []any + // NextRow returns a new TableRow until the error is io.EOF + NextRow() (TableRow, error) +} + +// A TableRow represents all data that belongs to a table row. +type TableRow interface { + // Cells returns all table cells for this row. This is used to + // print tabular formats such csv. The returned slice has the same + // length as the header slice returned by the parent TableIterator. + Cells() []any +} + +// Serializable represents data that can be converted to JSON or YAML. +type Serializable interface { + // SerializableData returns arbitrary data that can be converted to formats like JSON or YAML. + SerializableData() any +} + +// SerializableIterator represents a stream of data that can be converted to JSON or YAML. +type SerializableIterator interface { + NextSerializable() (any, error) +} + +// Text represents a multi line text that can be printed but is not a table or another +// structured format such as JSON or YAML. +type Text interface { + Text() (string, error) +} diff --git a/vendor/github.com/stoewer/parquet-cli/pkg/output/output.go b/vendor/github.com/stoewer/parquet-cli/pkg/output/output.go index 44b490bb523..d668c3ddee2 100644 --- a/vendor/github.com/stoewer/parquet-cli/pkg/output/output.go +++ b/vendor/github.com/stoewer/parquet-cli/pkg/output/output.go @@ -2,61 +2,17 @@ package output import ( "bytes" - "encoding/csv" "encoding/json" "errors" "fmt" "io" - "strings" - "text/tabwriter" ) -// Format describes a printable data representation. -type Format string - -const ( - FormatJSON = "json" - FormatCSV = "csv" - FormatTab = "tab" -) - -func (f *Format) Validate() error { - switch *f { - case FormatJSON, FormatTab, FormatCSV: - return nil - default: - return errors.New("output format is expected to be 'json', 'tab', or 'csv'") - } -} - -// A Table that can be printed / encoded in different output formats. -type Table interface { - // Header returns the header of the table - Header() []any - // NextRow returns a new TableRow until the error is io.EOF - NextRow() (TableRow, error) -} - -// SerializableData represents table data that can be converted to JSON. -type SerializableData interface { - // Data returns the table data suitable for structured data formats - // such as json. - Data() any -} - -// A TableRow represents all data that belongs to a table row. -type TableRow interface { - // Cells returns all table cells for this row. This is used to - // print tabular formats such csv. The returned slice has the same - // length as the header slice returned by the parent Table. - Cells() []any -} - -// PrintTable writes the Table data to w using the provided format. -func PrintTable(w io.Writer, f Format, data Table) error { +// PrintTable writes the TableIterator data to w using the provided format. +func PrintTable(w io.Writer, f Format, data TableIterator) error { switch f { case FormatJSON: - return printJSON(w, data) + return printTableToJSON(w, data) case FormatTab: return printTab(w, data) case FormatCSV: @@ -66,73 +22,11 @@ func PrintTable(w io.Writer, f Format, data Table) error { } } -func printTab(w io.Writer, data Table) error { - tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) - - formatBuilder := strings.Builder{} - for range data.Header() { - formatBuilder.WriteString("%v\t") - } - formatBuilder.WriteRune('\n') - format := formatBuilder.String() - - _, err := fmt.Fprintf(tw, format, data.Header()...) - if err != nil { - return err - } - - row, err := data.NextRow() - for err == nil { - _, err = fmt.Fprintf(tw, format, row.Cells()...) - if err != nil { - return err - } - - row, err = data.NextRow() - } - if err != nil && !errors.Is(err, io.EOF) { - return err - } - - return tw.Flush() -} - -func printCSV(w io.Writer, data Table) error { - cw := csv.NewWriter(w) - cw.Comma = ';' - - header := data.Header() - lineBuffer := make([]string, len(header)) - - line := toStringSlice(header, lineBuffer) - err := cw.Write(line) - if err != nil { - return err - } - - row, err := data.NextRow() - for err == nil { - line = toStringSlice(row.Cells(), lineBuffer) - err = cw.Write(line) - if err != nil { - return err - } - - row, err = data.NextRow() - } - if err != nil && !errors.Is(err, io.EOF) { - return err - } - - cw.Flush() - return cw.Error() -} - -func printJSON(w io.Writer, data Table) error { - if serializable, ok := data.(SerializableData); ok { +func printTableToJSON(w io.Writer, data TableIterator) error { + if serializable, ok := data.(Serializable); ok { enc := json.NewEncoder(w) enc.SetIndent("", " ") - return enc.Encode(serializable.Data()) + return enc.Encode(serializable.SerializableData()) } _, err := fmt.Fprintln(w, "[") @@ -153,13 +47,13 @@ func printJSON(w io.Writer, data Table) error { if err != nil { return err } - serializableRow, ok := row.(SerializableData) + serializableRow, ok := row.(Serializable) if !ok { return errors.New("JSON not supported for sub command") } buf.Reset() - err = json.NewEncoder(buf).Encode(serializableRow.Data()) + err = json.NewEncoder(buf).Encode(serializableRow.SerializableData()) if err != nil { return err } @@ -180,24 +74,3 @@ func printJSON(w io.Writer, data Table) error { _, err = fmt.Println("\n]") return err } - -func toStringSlice(in []any, buf []string) []string { - for i, v := range in { - var s string - switch v := v.(type) { - case string: - s = v - case fmt.Stringer: - s = v.String() - default: - s = fmt.Sprint(v) - } - - if i < len(buf) { - buf[i] = s - } else { - buf = append(buf, s) - } - } - return buf[0:len(in)] -} diff --git a/vendor/github.com/stoewer/parquet-cli/pkg/output/print.go b/vendor/github.com/stoewer/parquet-cli/pkg/output/print.go new file mode 100644 index 00000000000..f902475bdc2 --- /dev/null +++ b/vendor/github.com/stoewer/parquet-cli/pkg/output/print.go @@ -0,0 +1,178 @@ +package output + +import ( + "bytes" + "encoding/csv" + "encoding/json" + "errors" + "fmt" + "io" + "strings" + "text/tabwriter" + "unsafe" +) + +type PrintOptions struct { + Format Format + Color bool +} + +func Print(out io.Writer, data any, opts *PrintOptions) error { + switch opts.Format { + case FormatText: + if text, ok := data.(Text); ok { + return printText(out, text) + } + case FormatTab: + if table, ok := data.(TableIterator); ok { + return printTab(out, table) + } + case FormatCSV: + if table, ok := data.(TableIterator); ok { + return printCSV(out, table) + } + case FormatJSON: + if ser, ok := data.(SerializableIterator); ok { + return printJSON(out, ser) + } + } + return errUnsupportedFormat(data, opts.Format) +} + +func printJSON(w io.Writer, data SerializableIterator) error { + _, err := fmt.Fprintln(w, "[") + if err != nil { + return err + } + + var count int + buf := bytes.NewBuffer(make([]byte, 10240)) + next, err := data.NextSerializable() + + for err == nil { + if count > 0 { + _, err = fmt.Fprint(w, ",\n ") + } else { + _, err = fmt.Fprint(w, " ") + } + if err != nil { + return err + } + + buf.Reset() + err = json.NewEncoder(buf).Encode(next) + if err != nil { + return err + } + buf.Truncate(buf.Len() - 1) // remove the newline + + _, err = fmt.Fprint(w, buf) + if err != nil { + return err + } + + count++ + next, err = data.NextSerializable() + } + if !errors.Is(err, io.EOF) { + return err + } + + _, err = fmt.Println("\n]") + return err +} + +func printTab(w io.Writer, data TableIterator) error { + tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) + + formatBuilder := strings.Builder{} + for range data.Header() { + formatBuilder.WriteString("%v\t") + } + formatBuilder.WriteRune('\n') + format := formatBuilder.String() + + _, err := fmt.Fprintf(tw, format, data.Header()...) + if err != nil { + return err + } + + row, err := data.NextRow() + for err == nil { + _, err = fmt.Fprintf(tw, format, row.Cells()...) + if err != nil { + return err + } + + row, err = data.NextRow() + } + if !errors.Is(err, io.EOF) { + return err + } + + return tw.Flush() +} + +func printCSV(w io.Writer, data TableIterator) error { + cw := csv.NewWriter(w) + cw.Comma = ';' + + header := data.Header() + lineBuffer := make([]string, len(header)) + + line := toStringSlice(header, lineBuffer) + err := cw.Write(line) + if err != nil { + return err + } + + row, err := data.NextRow() + for err == nil { + line = toStringSlice(row.Cells(), lineBuffer) + err = cw.Write(line) + if err != nil { + return err + } + + row, err = data.NextRow() + } + if !errors.Is(err, io.EOF) { + return err + } + + cw.Flush() + return cw.Error() +} + +func toStringSlice(in []any, buf []string) []string { + for i, v := range in { + var s string + switch v := v.(type) { + case string: + s = v + case fmt.Stringer: + s = v.String() + default: + s = fmt.Sprint(v) + } + + if i < len(buf) { + buf[i] = s + } else { + buf = append(buf, s) + } + } + return buf[0:len(in)] +} + +func printText(out io.Writer, data Text) error { + s, err := data.Text() + if err != nil { + return fmt.Errorf("unable to print text: %w", err) + } + + b := unsafe.Slice(unsafe.StringData(s), len(s)) + + _, err = out.Write(b) + return err +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 2d25b64a34b..d482321adf0 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1216,7 +1216,7 @@ github.com/spf13/viper/internal/encoding/json github.com/spf13/viper/internal/encoding/toml github.com/spf13/viper/internal/encoding/yaml github.com/spf13/viper/internal/features -# github.com/stoewer/parquet-cli v0.0.7 +# github.com/stoewer/parquet-cli v0.0.9 ## explicit; go 1.21 github.com/stoewer/parquet-cli/pkg/inspect github.com/stoewer/parquet-cli/pkg/output