From f09dfbcda238b880e8ae0b1a924a59c868a505e2 Mon Sep 17 00:00:00 2001 From: Aleksandr Razumov Date: Sun, 19 Jun 2022 17:39:28 +0300 Subject: [PATCH] feat(proto): improve DateTime64 precision handling --- block_fuzz_test.go | 5 ++- otel_test.go | 4 +-- proto/col_datetime64.go | 74 ++++++++++++++++++++++++++++++++++++----- proto/datetime64.go | 45 +++++++------------------ 4 files changed, 81 insertions(+), 47 deletions(-) diff --git a/block_fuzz_test.go b/block_fuzz_test.go index f7accf90..b8c0ba92 100644 --- a/block_fuzz_test.go +++ b/block_fuzz_test.go @@ -188,12 +188,11 @@ func FuzzDecodeBlockAuto(f *testing.F) { {}, {100}, }), - proto.ColDateTime64{ - Precision: 9, + (&proto.ColDateTime64{ Data: []proto.DateTime64{ 1, 2, 3, }, - }, + }).WithPrecision(9), makeArr[string](new(proto.ColStr).LowCardinality(), [][]string{ {"foo", "bar", "baz"}, {"1000", "20000", "3000", "40000", "5000", "6000", "abc"}, diff --git a/otel_test.go b/otel_test.go index 01794907..7cfc8323 100644 --- a/otel_test.go +++ b/otel_test.go @@ -59,7 +59,7 @@ type OTEL struct { func (t *OTEL) Input() proto.Input { return proto.Input{ {Name: "body", Data: t.Body}, - {Name: "timestamp", Data: t.Timestamp.Wrap(proto.PrecisionNano)}, + {Name: "timestamp", Data: t.Timestamp}, {Name: "trace_id", Data: t.TraceID}, {Name: "span_id", Data: t.SpanID}, {Name: "severity_text", Data: &t.SevText}, @@ -91,7 +91,7 @@ type OTELRow struct { func (t *OTEL) Append(row OTELRow) { t.Body.AppendBytes(row.Body) - t.Timestamp.Append(proto.DateTime64(row.Timestamp).Time(proto.PrecisionNano)) + t.Timestamp.AppendRaw(proto.DateTime64(row.Timestamp)) t.SevNumber.Append(row.SeverityNumber) t.SevText.Append(row.SeverityText) diff --git a/proto/col_datetime64.go b/proto/col_datetime64.go index a676ecad..d929da95 100644 --- a/proto/col_datetime64.go +++ b/proto/col_datetime64.go @@ -1,7 +1,9 @@ package proto import ( + "fmt" "strconv" + "strings" "time" "github.com/go-faster/errors" @@ -14,14 +16,19 @@ var ( ) // ColDateTime64 implements ColumnOf[time.Time]. +// +// If Precision is not set, Append and Row() panics. +// Use ColDateTime64Raw to work with raw DateTime64 values. type ColDateTime64 struct { - Data []DateTime64 - Precision Precision - Location *time.Location + Data []DateTime64 + Location *time.Location + Precision Precision + PrecisionSet bool } func (c *ColDateTime64) WithPrecision(p Precision) *ColDateTime64 { c.Precision = p + c.PrecisionSet = true return c } @@ -39,14 +46,26 @@ func (c *ColDateTime64) Reset() { } func (c ColDateTime64) Type() ColumnType { - sub := ColumnType(strconv.Itoa(int(c.Precision))) - return ColumnTypeDateTime64.Sub(sub) + var elems []string + if p := c.Precision; c.PrecisionSet { + elems = append(elems, strconv.Itoa(int(p))) + } + if loc := c.Location; loc != nil { + elems = append(elems, fmt.Sprintf(`'%s'`, loc)) + } + return ColumnTypeDateTime64.With(elems...) } func (c *ColDateTime64) Infer(t ColumnType) error { - // TODO(ernado): handle (ignore) timezone - pRaw := t.Elem() - n, err := strconv.ParseUint(string(pRaw), 10, 8) + elem := string(t.Elem()) + if elem == "" { + return errors.Errorf("invalid DateTime64: no elements in %q", t) + } + elems := strings.SplitN(elem, ",", 2) + for i := range elems { + elems[i] = strings.Trim(elems[i], `' `) + } + n, err := strconv.ParseUint(elems[0], 10, 8) if err != nil { return errors.Wrap(err, "parse precision") } @@ -55,10 +74,21 @@ func (c *ColDateTime64) Infer(t ColumnType) error { return errors.Errorf("precision %d is invalid", n) } c.Precision = p + c.PrecisionSet = true + if len(elems) > 1 { + loc, err := time.LoadLocation(elems[1]) + if err != nil { + return errors.Wrap(err, "invalid location") + } + c.Location = loc + } return nil } func (c ColDateTime64) Row(i int) time.Time { + if !c.PrecisionSet { + panic("DateTime64: no precision set") + } return c.Data[i].Time(c.Precision).In(c.loc()) } @@ -70,6 +100,32 @@ func (c ColDateTime64) loc() *time.Location { return c.Location } +func (c *ColDateTime64) AppendRaw(v DateTime64) { + c.Data = append(c.Data, v) +} + func (c *ColDateTime64) Append(v time.Time) { - c.Data = append(c.Data, ToDateTime64(v, c.Precision)) + if !c.PrecisionSet { + panic("DateTime64: no precision set") + } + c.AppendRaw(ToDateTime64(v, c.Precision)) } + +// Raw version of ColDateTime64 for ColumnOf[DateTime64]. +func (c ColDateTime64) Raw() *ColDateTime64Raw { + return &ColDateTime64Raw{ColDateTime64: c} +} + +var ( + _ ColumnOf[DateTime64] = (*ColDateTime64Raw)(nil) + _ Inferable = (*ColDateTime64Raw)(nil) + _ Column = (*ColDateTime64Raw)(nil) +) + +// ColDateTime64Raw is DateTime64 wrapper to implement ColumnOf[DateTime64]. +type ColDateTime64Raw struct { + ColDateTime64 +} + +func (c *ColDateTime64Raw) Append(v DateTime64) { c.AppendRaw(v) } +func (c ColDateTime64Raw) Row(i int) DateTime64 { return c.Data[i] } diff --git a/proto/datetime64.go b/proto/datetime64.go index 8cfb4c93..ea87ebf5 100644 --- a/proto/datetime64.go +++ b/proto/datetime64.go @@ -13,11 +13,7 @@ type Precision byte // Duration returns duration of single tick for precision. func (p Precision) Duration() time.Duration { - d := time.Nanosecond - for i := PrecisionNano; i > p; i-- { - d *= 10 - } - return d + return time.Nanosecond * time.Duration(p.Scale()) } // Valid reports whether precision is valid. @@ -25,6 +21,14 @@ func (p Precision) Valid() bool { return p <= PrecisionMax } +func (p Precision) Scale() int64 { + d := int64(1) + for i := PrecisionNano; i > p; i-- { + d *= 10 + } + return d +} + const ( // PrecisionSecond is one second precision. PrecisionSecond Precision = 0 @@ -46,38 +50,13 @@ type DateTime64 int64 // ToDateTime64 converts time.Time to DateTime64. func ToDateTime64(t time.Time, p Precision) DateTime64 { - switch p { - case PrecisionMicro: - return DateTime64(t.UnixMicro()) - case PrecisionMilli: - return DateTime64(t.UnixMilli()) - case PrecisionNano: - return DateTime64(t.UnixNano()) - case PrecisionSecond: - return DateTime64(t.Unix()) - default: - // TODO(ernado): support all precisions - panic("precision not supported") - } + return DateTime64(t.UnixNano() / p.Scale()) } // Time returns DateTime64 as time.Time. func (d DateTime64) Time(p Precision) time.Time { - switch p { - case PrecisionMicro: - return time.UnixMicro(int64(d)) - case PrecisionMilli: - return time.UnixMilli(int64(d)) - case PrecisionNano: - nsec := int64(d) - return time.Unix(nsec/1e9, nsec%1e9) - case PrecisionSecond: - sec := int64(d) - return time.Unix(sec, 0) - default: - // TODO(ernado): support all precisions - panic("precision not supported") - } + nsec := int64(d) * p.Scale() + return time.Unix(nsec/1e9, nsec%1e9) } // Wrap column with explicit precision.