Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Hydrolix dates 1 #835

Merged
merged 13 commits into from
Oct 14, 2024
1 change: 0 additions & 1 deletion quesma/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ require (
github.com/knadh/koanf/providers/env v1.0.0
github.com/knadh/koanf/providers/file v1.1.2
github.com/knadh/koanf/v2 v2.1.1
github.com/relvacode/iso8601 v1.4.0
github.com/rs/zerolog v1.33.0
github.com/shirou/gopsutil/v3 v3.24.5
github.com/stretchr/testify v1.9.0
Expand Down
2 changes: 0 additions & 2 deletions quesma/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,6 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/relvacode/iso8601 v1.4.0 h1:GsInVSEJfkYuirYFxa80nMLbH2aydgZpIf52gYZXUJs=
github.com/relvacode/iso8601 v1.4.0/go.mod h1:FlNp+jz+TXpyRqgmM7tnzHHzBnz776kmAH2h3sZCn0I=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
Expand Down
33 changes: 25 additions & 8 deletions quesma/kibana/dates.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package kibana

import (
"quesma/model"
"quesma/util"
"strconv"
"time"
Expand All @@ -17,19 +18,18 @@ func NewDateManager() DateManager {
var acceptableDateTimeFormats = []string{"2006", "2006-01", "2006-01-02", "2006-01-02", "2006-01-02T15",
"2006-01-02T15:04", "2006-01-02T15:04:05", "2006-01-02T15:04:05Z07", "2006-01-02T15:04:05Z07:00"}

// MissingInDateHistogramToUnixTimestamp parses date_histogram's missing field.
// If missing is present, it's in [strict_date_optional_time || epoch_millis] format
// parseStrictDateOptionalTimeOrEpochMillis parses date, which is in [strict_date_optional_time || epoch_millis] format
// (https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-date-format.html)
func (dm DateManager) MissingInDateHistogramToUnixTimestamp(missing any) (unixTimestamp int64, parsingSucceeded bool) {
if asInt, success := util.ExtractInt64Maybe(missing); success {
func (dm DateManager) parseStrictDateOptionalTimeOrEpochMillis(date any) (unixTimestamp int64, parsingSucceeded bool) {
if asInt, success := util.ExtractInt64Maybe(date); success {
return asInt, true
}

if asFloat, success := util.ExtractFloat64Maybe(missing); success {
if asFloat, success := util.ExtractFloat64Maybe(date); success {
return int64(asFloat), true
}

asString, success := missing.(string)
asString, success := date.(string)
if !success {
return -1, false
}
Expand All @@ -41,9 +41,9 @@ func (dm DateManager) MissingInDateHistogramToUnixTimestamp(missing any) (unixTi
const yearOrTsDelimiter = 10000

if asInt, err := strconv.ParseInt(asString, 10, 64); err == nil && asInt >= yearOrTsDelimiter {
return dm.MissingInDateHistogramToUnixTimestamp(asInt)
return dm.parseStrictDateOptionalTimeOrEpochMillis(asInt)
} else if asFloat, err := strconv.ParseFloat(asString, 64); err == nil && asFloat >= yearOrTsDelimiter {
return dm.MissingInDateHistogramToUnixTimestamp(asFloat)
return dm.parseStrictDateOptionalTimeOrEpochMillis(asFloat)
}

// It could be replaced with iso8601.ParseString() after the fixes to 1.4.0:
Expand All @@ -56,3 +56,20 @@ func (dm DateManager) MissingInDateHistogramToUnixTimestamp(missing any) (unixTi

return -1, false
}

// ParseMissingInDateHistogram parses date_histogram's missing field.
// If missing is present, it's in [strict_date_optional_time || epoch_millis] format
// (https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-date-format.html)
func (dm DateManager) ParseMissingInDateHistogram(missing any) (unixTimestamp int64, parsingSucceeded bool) {
return dm.parseStrictDateOptionalTimeOrEpochMillis(missing)
}

// ParseRange parses range filter.
// We assume it's in [strict_date_optional_time || epoch_millis] format (TODO: other formats)
// (https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-date-format.html)
func (dm DateManager) ParseRange(Range any) (timestampExpr model.Expr, parsingSucceeded bool) {
if timestamp, success := dm.parseStrictDateOptionalTimeOrEpochMillis(Range); success {
return model.NewFunction("fromUnixTimestamp64Milli", model.NewLiteral(timestamp)), true
}
return nil, false
}
4 changes: 2 additions & 2 deletions quesma/kibana/dates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"testing"
)

func TestDateManager_MissingInDateHistogramToUnixTimestamp(t *testing.T) {
func TestDateManager_parseStrictDateOptionalTimeOrEpochMillis(t *testing.T) {
tests := []struct {
missing any
wantUnixTimestamp int64
Expand Down Expand Up @@ -38,7 +38,7 @@ func TestDateManager_MissingInDateHistogramToUnixTimestamp(t *testing.T) {
for _, tt := range tests {
t.Run(fmt.Sprintf("%v", tt.missing), func(t *testing.T) {
dm := NewDateManager()
gotUnixTs, gotParsingSucceeded := dm.MissingInDateHistogramToUnixTimestamp(tt.missing)
gotUnixTs, gotParsingSucceeded := dm.parseStrictDateOptionalTimeOrEpochMillis(tt.missing)
assert.Equalf(t, tt.wantUnixTimestamp, gotUnixTs, "MissingInDateHistogramToUnixTimestamp(%v)", tt.missing)
assert.Equalf(t, tt.wantParsingSucceeded, gotParsingSucceeded, "MissingInDateHistogramToUnixTimestamp(%v)", tt.missing)
})
Expand Down
12 changes: 6 additions & 6 deletions quesma/queryparser/pancake_aggregation_parser_buckets.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,14 @@ func (cw *ClickhouseQueryTranslator) pancakeTryBucketAggregation(aggregation *pa
}
field := cw.parseFieldField(dateHistogram, "date_histogram")

didWeAddMissing := false
weAddedMissing := false
if missingRaw, exists := dateHistogram["missing"]; exists {
if missing, ok := missingRaw.(string); ok {
dateManager := kibana.NewDateManager()
timestamp, parsingTimestampOk := dateManager.MissingInDateHistogramToUnixTimestamp(missing)
if parsingTimestampOk {
if unixTimestamp, parsingOk := dateManager.ParseMissingInDateHistogram(missing); parsingOk {
field = model.NewFunction("COALESCE", field,
model.NewFunction("toDateTime", model.NewLiteral(timestamp)))
didWeAddMissing = true
model.NewFunction("fromUnixTimestamp64Milli", model.NewLiteral(unixTimestamp)))
weAddedMissing = true
} else {
logger.ErrorWithCtx(cw.Ctx).Msgf("unknown format of missing in date_histogram: %v. Skipping it.", missing)
}
Expand All @@ -95,7 +94,8 @@ func (cw *ClickhouseQueryTranslator) pancakeTryBucketAggregation(aggregation *pa
}
}

if !didWeAddMissing {
if !weAddedMissing {
// if we don't add missing, we need to filter out nulls later
aggregation.filterOutEmptyKeyBucket = true
}

Expand Down
131 changes: 55 additions & 76 deletions quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"github.com/k0kubun/pp"
"quesma/clickhouse"
"quesma/kibana"
"quesma/logger"
"quesma/model"
"quesma/model/bucket_aggregations"
Expand All @@ -19,9 +21,6 @@ import (
"strconv"
"strings"
"unicode"

"github.com/k0kubun/pp"
"github.com/relvacode/iso8601"
)

type QueryMap = map[string]interface{}
Expand Down Expand Up @@ -764,7 +763,6 @@ func (cw *ClickhouseQueryTranslator) parseDateMathExpression(expr string) (strin

exp, err := ParseDateMathExpression(expr)
if err != nil {
logger.Warn().Msgf("error parsing date math expression: %s", expr)
return "", err
}

Expand All @@ -775,7 +773,6 @@ func (cw *ClickhouseQueryTranslator) parseDateMathExpression(expr string) (strin

sql, err := builder.RenderSQL(exp)
if err != nil {
logger.Warn().Msgf("error rendering date math expression: %s", expr)
return "", err
}

Expand All @@ -792,84 +789,81 @@ func (cw *ClickhouseQueryTranslator) parseRange(queryMap QueryMap) model.SimpleQ
return model.NewSimpleQuery(nil, false)
}

for field, v := range queryMap {
field = cw.ResolveField(cw.Ctx, field)
for fieldName, v := range queryMap {
fieldName = cw.ResolveField(cw.Ctx, fieldName)
fieldType := cw.Table.GetDateTimeType(cw.Ctx, cw.ResolveField(cw.Ctx, fieldName))
stmts := make([]model.Expr, 0)
if _, ok := v.(QueryMap); !ok {
logger.WarnWithCtx(cw.Ctx).Msgf("invalid range type: %T, value: %v", v, v)
continue
}
isDatetimeInDefaultFormat := true // in 99% requests, format is "strict_date_optional_time", which we can parse with time.Parse(time.RFC3339Nano, ..)
if format, ok := v.(QueryMap)["format"]; ok && format == "epoch_millis" {
isDatetimeInDefaultFormat = false
}

keysSorted := util.MapKeysSorted(v.(QueryMap))
for _, op := range keysSorted {
v := v.(QueryMap)[op]
var timeFormatFuncName string
var finalLHS, valueToCompare model.Expr
fieldType := cw.Table.GetDateTimeType(cw.Ctx, cw.ResolveField(cw.Ctx, field))
vToPrint := sprint(v)
valueToCompare = model.NewLiteral(vToPrint)
finalLHS = model.NewColumnRef(field)
if !isDatetimeInDefaultFormat {
timeFormatFuncName = "toUnixTimestamp64Milli"
finalLHS = model.NewFunction(timeFormatFuncName, model.NewColumnRef(field))
} else {
switch fieldType {
case clickhouse.DateTime64, clickhouse.DateTime:
if dateTime, ok := v.(string); ok {
// if it's a date, we need to parse it to Clickhouse's DateTime format
// how to check if it does not contain date math expression?
if _, err := iso8601.ParseString(dateTime); err == nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are not using iso8601, we should drop it from dependencies.

_, timeFormatFuncName = cw.parseDateTimeString(cw.Table, field, dateTime)
// TODO Investigate the quotation below
valueToCompare = model.NewFunction(timeFormatFuncName, model.NewLiteral(fmt.Sprintf("'%s'", dateTime)))
} else if op == "gte" || op == "lte" || op == "gt" || op == "lt" {
vToPrint, err = cw.parseDateMathExpression(vToPrint)
valueToCompare = model.NewLiteral(vToPrint)
if err != nil {
logger.WarnWithCtx(cw.Ctx).Msgf("error parsing date math expression: %s", vToPrint)
return model.NewSimpleQuery(nil, false)
}
}
} else if v == nil {
vToPrint = "NULL"
valueToCompare = model.NewLiteral("NULL")
valueRaw := v.(QueryMap)[op]
value := sprint(valueRaw)
defaultValue := model.NewLiteral(value)
dateManager := kibana.NewDateManager()

// Three stages:
// 1. dateManager.ParseRange
// 2. cw.parseDateMathExpression
// 3. just a number
// Dates use 1-3 and finish as soon as any succeeds
// Numbers use just 3rd

var finalValue model.Expr
doneParsing, isQuoted := false, len(value) > 2 && value[0] == '\'' && value[len(value)-1] == '\''
switch fieldType {
case clickhouse.DateTime, clickhouse.DateTime64:
// TODO add support for "time_zone" parameter in ParseRange
finalValue, doneParsing = dateManager.ParseRange(value) // stage 1

if !doneParsing && (op == "gte" || op == "lte" || op == "gt" || op == "lt") { // stage 2
parsed, err := cw.parseDateMathExpression(value)
if err == nil {
doneParsing = true
finalValue = model.NewLiteral(parsed)
}
case clickhouse.Invalid: // assumes it is number that does not need formatting
if len(vToPrint) > 2 && vToPrint[0] == '\'' && vToPrint[len(vToPrint)-1] == '\'' {
isNumber := true
for _, c := range vToPrint[1 : len(vToPrint)-1] {
if !unicode.IsDigit(c) && c != '.' {
isNumber = false
}
}
if isNumber {
vToPrint = vToPrint[1 : len(vToPrint)-1]
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("we use range with unknown literal %s, field %s", vToPrint, field)
}

if !doneParsing && isQuoted { // stage 3
finalValue, doneParsing = dateManager.ParseRange(value[1 : len(value)-1])
}
case clickhouse.Invalid:
if isQuoted {
isNumber, unquoted := true, value[1:len(value)-1]
for _, c := range unquoted {
if !unicode.IsDigit(c) && c != '.' {
isNumber = false
}
valueToCompare = model.NewLiteral(vToPrint)
}
default:
logger.WarnWithCtx(cw.Ctx).Msgf("invalid DateTime type for field: %s, parsed dateTime value: %s", field, vToPrint)
if isNumber {
finalValue = model.NewLiteral(unquoted)
doneParsing = true
}
}
default:
logger.ErrorWithCtx(cw.Ctx).Msgf("invalid DateTime type for field: %s, parsed dateTime value: %s", fieldName, value)
}

if !doneParsing {
finalValue = defaultValue
}

field := model.NewColumnRef(fieldName)
switch op {
case "gte":
stmt := model.NewInfixExpr(finalLHS, ">=", valueToCompare)
stmt := model.NewInfixExpr(field, ">=", finalValue)
stmts = append(stmts, stmt)
case "lte":
stmt := model.NewInfixExpr(finalLHS, "<=", valueToCompare)
stmt := model.NewInfixExpr(field, "<=", finalValue)
stmts = append(stmts, stmt)
case "gt":
stmt := model.NewInfixExpr(finalLHS, ">", valueToCompare)
stmt := model.NewInfixExpr(field, ">", finalValue)
stmts = append(stmts, stmt)
case "lt":
stmt := model.NewInfixExpr(finalLHS, "<", valueToCompare)
stmt := model.NewInfixExpr(field, "<", finalValue)
stmts = append(stmts, stmt)
case "format":
// ignored
Expand All @@ -885,21 +879,6 @@ func (cw *ClickhouseQueryTranslator) parseRange(queryMap QueryMap) model.SimpleQ
return model.NewSimpleQuery(nil, false)
}

// parseDateTimeString returns string used to parse DateTime in Clickhouse (depends on column type)

func (cw *ClickhouseQueryTranslator) parseDateTimeString(table *clickhouse.Table, field, dateTime string) (string, string) {
typ := table.GetDateTimeType(cw.Ctx, cw.ResolveField(cw.Ctx, field))
switch typ {
case clickhouse.DateTime64:
return "parseDateTime64BestEffort('" + dateTime + "')", "parseDateTime64BestEffort"
case clickhouse.DateTime:
return "parseDateTimeBestEffort('" + dateTime + "')", "parseDateTimeBestEffort"
default:
logger.Error().Msgf("invalid DateTime type: %T for field: %s, parsed dateTime value: %s", typ, field, dateTime)
return "", ""
}
}

// TODO: not supported:
// - The field has "index" : false and "doc_values" : false set in the mapping
// - The length of the field value exceeded an ignore_above setting in the mapping
Expand Down
6 changes: 3 additions & 3 deletions quesma/queryparser/query_parser_range_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ var parseRangeTests = []parseRangeTest{
`CREATE TABLE ` + tableName + `
( "message" String, "timestamp" DateTime64(3, 'UTC') )
ENGINE = Memory`,
`("timestamp">=parseDateTime64BestEffort('2024-02-02T13:47:16.029Z') AND "timestamp"<=parseDateTime64BestEffort('2024-02-09T13:47:16.029Z'))`,
`("timestamp">=fromUnixTimestamp64Milli(1706881636029) AND "timestamp"<=fromUnixTimestamp64Milli(1707486436029))`,
},
{
"parseDateTimeBestEffort",
Expand All @@ -46,7 +46,7 @@ var parseRangeTests = []parseRangeTest{
`CREATE TABLE ` + tableName + `
( "message" String, "timestamp" DateTime )
ENGINE = Memory`,
`("timestamp">=parseDateTimeBestEffort('2024-02-02T13:47:16.029Z') AND "timestamp"<=parseDateTimeBestEffort('2024-02-09T13:47:16.029Z'))`,
`("timestamp">=fromUnixTimestamp64Milli(1706881636029) AND "timestamp"<=fromUnixTimestamp64Milli(1707486436029))`,
},
{
"numeric range",
Expand All @@ -72,7 +72,7 @@ var parseRangeTests = []parseRangeTest{
`CREATE TABLE ` + tableName + `
( "message" String, "timestamp" DateTime64(3, 'UTC') )
ENGINE = Memory`,
`("timestamp">=parseDateTime64BestEffort('2024-02-02T13:47:16') AND "timestamp"<=parseDateTime64BestEffort('2024-02-09T13:47:16'))`,
`("timestamp">=fromUnixTimestamp64Milli(1706881636000) AND "timestamp"<=fromUnixTimestamp64Milli(1707486436000))`,
},
}

Expand Down
4 changes: 2 additions & 2 deletions quesma/quesma/functionality/terms_enum/terms_enum_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ func testHandleTermsEnumRequest(t *testing.T, requestBody []byte) {
}
qt := &queryparser.ClickhouseQueryTranslator{ClickhouseLM: lm, Table: table, Ctx: context.Background(), Schema: s.Tables[schema.TableName(testTableName)]}
// Here we additionally verify that terms for `_tier` are **NOT** included in the SQL query
expectedQuery1 := `SELECT DISTINCT "client_name" FROM ` + testTableName + ` WHERE ("epoch_time">=parseDateTimeBestEffort('2024-02-27T12:25:00.000Z') AND "epoch_time"<=parseDateTimeBestEffort('2024-02-27T12:40:59.999Z')) LIMIT 13`
expectedQuery2 := `SELECT DISTINCT "client_name" FROM ` + testTableName + ` WHERE ("epoch_time"<=parseDateTimeBestEffort('2024-02-27T12:40:59.999Z') AND "epoch_time">=parseDateTimeBestEffort('2024-02-27T12:25:00.000Z')) LIMIT 13`
expectedQuery1 := `SELECT DISTINCT "client_name" FROM ` + testTableName + ` WHERE ("epoch_time">=fromUnixTimestamp64Milli(1709036700000) AND "epoch_time"<=fromUnixTimestamp64Milli(1709037659999)) LIMIT 13`
expectedQuery2 := `SELECT DISTINCT "client_name" FROM ` + testTableName + ` WHERE ("epoch_time">=fromUnixTimestamp64Milli(1709036700000) AND "epoch_time"<=fromUnixTimestamp64Milli(1709037659999)) LIMIT 13`

// Once in a while `AND` conditions could be swapped, so we match both cases
mock.ExpectQuery(fmt.Sprintf("%s|%s", regexp.QuoteMeta(expectedQuery1), regexp.QuoteMeta(expectedQuery2))).
Expand Down
Loading
Loading