Skip to content

Commit

Permalink
done need update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
trzysiek committed Oct 3, 2024
1 parent 96e56c0 commit 832f6f9
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 103 deletions.
39 changes: 31 additions & 8 deletions quesma/kibana/dates.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package kibana

import (
"quesma/model"
"quesma/util"
"strconv"
"time"
Expand All @@ -17,19 +18,19 @@ func NewDateManager() DateManager {
var acceptableDateTimeFormats = []string{"2006", "2006-01", "2006-01-02", "2006-01-02", "2006-01-02T15",
"2006-01-02T15:04", "2006-01-02T15:04:05", "2006-01-02T15:04:05Z07", "2006-01-02T15:04:05Z07:00"}

// MissingInDateHistogramToUnixTimestamp parses date_histogram's missing field.
// If missing is present, it's in [strict_date_optional_time || epoch_millis] format
// MissingInDateHistogramToUnixTimestamp parses date, which is in [strict_date_optional_time || epoch_millis] format
// (https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-date-format.html)
func (dm DateManager) MissingInDateHistogramToUnixTimestamp(missing any) (unixTimestamp int64, parsingSucceeded bool) {
if asInt, success := util.ExtractInt64Maybe(missing); success {

func (dm DateManager) parseStrictDateOptionalTimeOrEpochMillis(date any) (unixTimestamp int64, parsingSucceeded bool) {
if asInt, success := util.ExtractInt64Maybe(date); success {
return asInt, true
}

if asFloat, success := util.ExtractFloat64Maybe(missing); success {
if asFloat, success := util.ExtractFloat64Maybe(date); success {
return int64(asFloat), true
}

asString, success := missing.(string)
asString, success := date.(string)
if !success {
return -1, false
}
Expand All @@ -41,9 +42,9 @@ func (dm DateManager) MissingInDateHistogramToUnixTimestamp(missing any) (unixTi
const yearOrTsDelimiter = 10000

if asInt, err := strconv.ParseInt(asString, 10, 64); err == nil && asInt >= yearOrTsDelimiter {
return dm.MissingInDateHistogramToUnixTimestamp(asInt)
return dm.parseStrictDateOptionalTimeOrEpochMillis(asInt)
} else if asFloat, err := strconv.ParseFloat(asString, 64); err == nil && asFloat >= yearOrTsDelimiter {
return dm.MissingInDateHistogramToUnixTimestamp(asFloat)
return dm.parseStrictDateOptionalTimeOrEpochMillis(asFloat)
}

// It could be replaced with iso8601.ParseString() after the fixes to 1.4.0:
Expand All @@ -56,3 +57,25 @@ func (dm DateManager) MissingInDateHistogramToUnixTimestamp(missing any) (unixTi

return -1, false
}

// ParseMissingInDateHistogram parses date_histogram's missing field.
// If missing is present, it's in [strict_date_optional_time || epoch_millis] format
// (https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-date-format.html)
func (dm DateManager) ParseMissingInDateHistogram(field model.Expr, missing any) (timestampExpr model.Expr, parsingSucceeded bool) {
timestamp, success := dm.parseStrictDateOptionalTimeOrEpochMillis(missing)
if success {
return model.NewFunction("toDateTime", model.NewLiteral(timestamp)), true
}
return nil, false
}

// ParseMissingInDateHistogram parses range filter.
// We assume it's in [strict_date_optional_time || epoch_millis] format (TODO: other formats)
// (https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-date-format.html)
func (dm DateManager) ParseRange(Range any) (timestampExpr model.Expr, parsingSucceeded bool) {
timestamp, success := dm.parseStrictDateOptionalTimeOrEpochMillis(Range)
if success {
return model.NewFunction("toDateTime", model.NewLiteral(timestamp)), true
}
return nil, false
}
23 changes: 5 additions & 18 deletions quesma/queryparser/pancake_aggregation_parser_buckets.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,24 +79,11 @@ func (cw *ClickhouseQueryTranslator) pancakeTryBucketAggregation(aggregation *pa
}
field := cw.parseFieldField(dateHistogram, "date_histogram")

didWeAddMissing := false
if missingRaw, exists := dateHistogram["missing"]; exists {
if missing, ok := missingRaw.(string); ok {
dateManager := kibana.NewDateManager()
timestamp, parsingTimestampOk := dateManager.MissingInDateHistogramToUnixTimestamp(missing)
if parsingTimestampOk {
field = model.NewFunction("COALESCE", field,
model.NewFunction("toDateTime", model.NewLiteral(timestamp)))
didWeAddMissing = true
} else {
logger.ErrorWithCtx(cw.Ctx).Msgf("unknown format of missing in date_histogram: %v. Skipping it.", missing)
}
} else {
logger.ErrorWithCtx(cw.Ctx).Msgf("missing %v is not a string, but: %T. Skipping it.", missingRaw, missingRaw)
}
}

if !didWeAddMissing {
dateManager := kibana.NewDateManager()
if timestampExpr, parsingMissingSuccess := dateManager.ParseMissingInDateHistogram(field, dateHistogram["missing"]); parsingMissingSuccess {
field = model.NewFunction("COALESCE", field, timestampExpr)
} else {
// if we don't add missing, we need to filter out nulls later
aggregation.filterOutEmptyKeyBucket = true
}

Expand Down
131 changes: 56 additions & 75 deletions quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"github.com/k0kubun/pp"
"quesma/clickhouse"
"quesma/kibana"
"quesma/logger"
"quesma/model"
"quesma/model/bucket_aggregations"
Expand All @@ -19,9 +21,6 @@ import (
"strconv"
"strings"
"unicode"

"github.com/k0kubun/pp"
"github.com/relvacode/iso8601"
)

type QueryMap = map[string]interface{}
Expand Down Expand Up @@ -789,114 +788,96 @@ func (cw *ClickhouseQueryTranslator) parseRange(queryMap QueryMap) model.SimpleQ
return model.NewSimpleQuery(nil, false)
}

for field, v := range queryMap {
field = cw.ResolveField(cw.Ctx, field)
for fieldName, v := range queryMap {
fieldName = cw.ResolveField(cw.Ctx, fieldName)
fieldType := cw.Table.GetDateTimeType(cw.Ctx, cw.ResolveField(cw.Ctx, fieldName))
stmts := make([]model.Expr, 0)
if _, ok := v.(QueryMap); !ok {
logger.WarnWithCtx(cw.Ctx).Msgf("invalid range type: %T, value: %v", v, v)
continue
}
isDatetimeInDefaultFormat := true // in 99% requests, format is "strict_date_optional_time", which we can parse with time.Parse(time.RFC3339Nano, ..)
if format, ok := v.(QueryMap)["format"]; ok && format == "epoch_millis" {
isDatetimeInDefaultFormat = false
}

keysSorted := util.MapKeysSorted(v.(QueryMap))
for _, op := range keysSorted {
v := v.(QueryMap)[op]
var timeFormatFuncName string
var finalLHS, valueToCompare model.Expr
fieldType := cw.Table.GetDateTimeType(cw.Ctx, cw.ResolveField(cw.Ctx, field))
vToPrint := sprint(v)
valueToCompare = model.NewLiteral(vToPrint)
finalLHS = model.NewColumnRef(field)
if !isDatetimeInDefaultFormat {
timeFormatFuncName = "toUnixTimestamp64Milli"
finalLHS = model.NewFunction(timeFormatFuncName, model.NewColumnRef(field))
} else {
switch fieldType {
case clickhouse.DateTime64, clickhouse.DateTime:
if dateTime, ok := v.(string); ok {
// if it's a date, we need to parse it to Clickhouse's DateTime format
// how to check if it does not contain date math expression?
if _, err := iso8601.ParseString(dateTime); err == nil {
_, timeFormatFuncName = cw.parseDateTimeString(cw.Table, field, dateTime)
// TODO Investigate the quotation below
valueToCompare = model.NewFunction(timeFormatFuncName, model.NewLiteral(fmt.Sprintf("'%s'", dateTime)))
} else if op == "gte" || op == "lte" || op == "gt" || op == "lt" {
vToPrint, err = cw.parseDateMathExpression(vToPrint)
valueToCompare = model.NewLiteral(vToPrint)
if err != nil {
logger.WarnWithCtx(cw.Ctx).Msgf("error parsing date math expression: %s", vToPrint)
return model.NewSimpleQuery(nil, false)
}
}
} else if v == nil {
vToPrint = "NULL"
valueToCompare = model.NewLiteral("NULL")
valueRaw := v.(QueryMap)[op]
value := sprint(valueRaw)
defaultValue := model.NewLiteral(value)
dateManager := kibana.NewDateManager()

// Three stages:
// 1. dateManager.ParseRange
// 2. cw.parseDateMathExpression
// 3. just a number
// Dates use 1-3 and finish as soon as any succeeds
// Numbers use just 3rd

var finalValue model.Expr
doneParsing, isQuoted := false, len(value) > 2 && value[0] == '\'' && value[len(value)-1] == '\''
switch fieldType {
case clickhouse.DateTime, clickhouse.DateTime64:
// TODO add support for "time_zone" parameter in ParseRange
finalValue, doneParsing = dateManager.ParseRange(value) // stage 1

if !doneParsing && (op == "gte" || op == "lte" || op == "gt" || op == "lt") { // stage 2
parsed, err := cw.parseDateMathExpression(value)
if err == nil {
doneParsing = true
finalValue = model.NewLiteral(parsed)
}
case clickhouse.Invalid: // assumes it is number that does not need formatting
if len(vToPrint) > 2 && vToPrint[0] == '\'' && vToPrint[len(vToPrint)-1] == '\'' {
isNumber := true
for _, c := range vToPrint[1 : len(vToPrint)-1] {
if !unicode.IsDigit(c) && c != '.' {
isNumber = false
}
}
if isNumber {
vToPrint = vToPrint[1 : len(vToPrint)-1]
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("we use range with unknown literal %s, field %s", vToPrint, field)
}

if !doneParsing && isQuoted { // stage 3
finalValue, doneParsing = dateManager.ParseRange(value[1 : len(value)-1])
}
case clickhouse.Invalid:
if isQuoted {
isNumber, unquoted := true, value[1:len(value)-1]
for _, c := range unquoted {
if !unicode.IsDigit(c) && c != '.' {
isNumber = false
}
valueToCompare = model.NewLiteral(vToPrint)
}
default:
logger.WarnWithCtx(cw.Ctx).Msgf("invalid DateTime type for field: %s, parsed dateTime value: %s", field, vToPrint)
if isNumber {
finalValue = model.NewLiteral(unquoted)
doneParsing = true
}
}
default:
logger.ErrorWithCtx(cw.Ctx).Msgf("invalid DateTime type for field: %s, parsed dateTime value: %s", fieldName, value)
}

if !doneParsing {
finalValue = defaultValue
}

field := model.NewColumnRef(fieldName)
switch op {
case "gte":
stmt := model.NewInfixExpr(finalLHS, ">=", valueToCompare)
stmt := model.NewInfixExpr(field, ">=", finalValue)
stmts = append(stmts, stmt)
case "lte":
stmt := model.NewInfixExpr(finalLHS, "<=", valueToCompare)
stmt := model.NewInfixExpr(field, "<=", finalValue)
stmts = append(stmts, stmt)
case "gt":
stmt := model.NewInfixExpr(finalLHS, ">", valueToCompare)
stmt := model.NewInfixExpr(field, ">", finalValue)
stmts = append(stmts, stmt)
case "lt":
stmt := model.NewInfixExpr(finalLHS, "<", valueToCompare)
stmt := model.NewInfixExpr(field, "<", finalValue)
stmts = append(stmts, stmt)
case "format":
// ignored
default:
logger.WarnWithCtx(cw.Ctx).Msgf("invalid range operator: %s", op)
}
}
return model.NewSimpleQueryWithFieldName(model.And(stmts), true, field)
return model.NewSimpleQueryWithFieldName(model.And(stmts), true, fieldName)
}

// unreachable unless something really weird happens
logger.ErrorWithCtx(cw.Ctx).Msg("theoretically unreachable code")
return model.NewSimpleQuery(nil, false)
}

// parseDateTimeString returns string used to parse DateTime in Clickhouse (depends on column type)

func (cw *ClickhouseQueryTranslator) parseDateTimeString(table *clickhouse.Table, field, dateTime string) (string, string) {
typ := table.GetDateTimeType(cw.Ctx, cw.ResolveField(cw.Ctx, field))
switch typ {
case clickhouse.DateTime64:
return "parseDateTime64BestEffort('" + dateTime + "')", "parseDateTime64BestEffort"
case clickhouse.DateTime:
return "parseDateTimeBestEffort('" + dateTime + "')", "parseDateTimeBestEffort"
default:
logger.Error().Msgf("invalid DateTime type: %T for field: %s, parsed dateTime value: %s", typ, field, dateTime)
return "", ""
}
}

// TODO: not supported:
// - The field has "index" : false and "doc_values" : false set in the mapping
// - The length of the field value exceeded an ignore_above setting in the mapping
Expand Down
5 changes: 3 additions & 2 deletions quesma/testdata/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -2659,11 +2659,12 @@ var TestSearchFilter = []SearchTestCase{
//},
[]string{},
[]string{
`SELECT qqqsum(count(*)) OVER () AS "metric____quesma_total_count_col_0",
`SELECT sum(count(*)) OVER () AS "metric____quesma_total_count_col_0",
toInt64(toUnixTimestamp64Milli("@timestamp") / 30000) AS "aggr__0__key_0",
count(*) AS "aggr__0__count"
FROM __quesma_table_name
WHERE "@timestamp">subDate(now(), INTERVAL 15 minute)
WHERE ("@timestamp">=toDateTime(1727858503270) AND "@timestamp"<=toDateTime(
1727859403270))
GROUP BY toInt64(toUnixTimestamp64Milli("@timestamp") / 30000) AS
"aggr__0__key_0"
ORDER BY "aggr__0__key_0" ASC`,
Expand Down

0 comments on commit 832f6f9

Please sign in to comment.