Skip to content

Commit

Permalink
Quickfix to remove nondeterminism for pipeline aggr with multiple par…
Browse files Browse the repository at this point in the history
…ents (#943)

All dashboards work after this

Again, hacky, but when we implement fully correct `bucket_script`, hacks
will disappear.
  • Loading branch information
trzysiek authored Nov 1, 2024
1 parent 7f5dd2b commit f527396
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 11 deletions.
2 changes: 1 addition & 1 deletion quesma/model/bucket_aggregations/auto_date_histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (query *AutoDateHistogram) TranslateSqlResponseToJson(rows []model.QueryRes
return model.JsonMap{
"buckets": []model.JsonMap{{
"key": query.key,
"key_as_string": time.UnixMilli(query.key).Format("2006-01-02T15:04:05.000-07:00"),
"key_as_string": time.UnixMilli(query.key).UTC().Format("2006-01-02T15:04:05.000"),
"doc_count": rows[0].LastColValue(),
}},
"interval": "100y", // seems working for bucketsNr=1 case. Will have to be changed for other cases.
Expand Down
10 changes: 6 additions & 4 deletions quesma/model/pipeline_aggregations/bucket_script.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,13 @@ func (query BucketScript) findFilterValue(rows []model.QueryResultRow, filterNam
for _, row := range rows {
for _, col := range row.Cols {
colName := col.ColName
if !strings.HasSuffix(colName, "_col_0") {
continue
switch { // remove possible suffix
case strings.HasSuffix(colName, "_col_0"):
colName = strings.TrimSuffix(colName, "_col_0")
case strings.HasSuffix(colName, "__count"):
colName = strings.TrimSuffix(colName, "__count")
}
colName = strings.TrimSuffix(colName, "_col_0")
if strings.HasSuffix(colName, "-"+filterName) {
if strings.HasSuffix(colName, filterName) {
return util.ExtractNumeric64(col.Value)
}
}
Expand Down
15 changes: 9 additions & 6 deletions quesma/queryparser/pipeline_aggregations.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"quesma/logger"
"quesma/model"
"quesma/model/pipeline_aggregations"
"quesma/util"
"strings"
)

Expand Down Expand Up @@ -225,14 +226,16 @@ func (cw *ClickhouseQueryTranslator) parseBucketsPath(shouldBeQueryMap any, aggr
case QueryMap:
// TODO: handle arbitrary nr of keys (and arbitrary scripts, because we also handle only one special case)
if len(bucketsPath) == 1 || len(bucketsPath) == 2 {
for _, bucketPath := range bucketsPath {
if _, ok = bucketPath.(string); !ok {
logger.WarnWithCtx(cw.Ctx).Msgf("buckets_path is not a map with string values, but %T. Skipping this aggregation", bucketPath)
// We return just 1 value here (for smallest key) (determinism here important, returning any of them is incorrect)
// Seems iffy, but works for all cases so far.
// After fixing the TODO above, it should also get fixed.
for _, key := range util.MapKeysSorted(bucketsPath) {
if path, ok := bucketsPath[key].(string); ok {
return path, true
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("buckets_path is not a map with string values, but %T. Skipping this aggregation", path)
return
}
// Kinda weird to return just the first value, but seems working on all cases so far.
// After fixing the TODO above, it should also get fixed.
return bucketPath.(string), true
}
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("buckets_path is not a map with one or two keys, but %d. Skipping this aggregation", len(bucketsPath))
Expand Down
170 changes: 170 additions & 0 deletions quesma/testdata/clients/clover.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,4 +473,174 @@ var CloverTests = []testdata.AggregationTestCase{
fromUnixTimestamp64Milli(1728635627125))`,
AdditionalAcceptableDifference: []string{"key_as_string"}, // timezone differences between local and github runs... There's always 2h difference between those, need to investigate. Maybe come back to .UTC() so there's no "+timezone" (e.g. +02:00)?
},
{ // [6]
TestName: "bucket_script with multiple buckets_path",
QueryRequestJson: `
{
"aggs": {
"timeseries": {
"aggs": {
"f2": {
"bucket_script": {
"buckets_path": {
"denominator": "f2-denominator>_count",
"numerator": "f2-numerator>_count"
},
"script": "params.numerator != null && params.denominator != null && params.denominator != 0 ? params.numerator / params.denominator : 0"
}
},
"f2-denominator": {
"filter": {
"bool": {
"filter": [],
"must": [],
"must_not": [],
"should": []
}
}
},
"f2-numerator": {
"filter": {
"bool": {
"filter": [],
"must": [
{
"query_string": {
"analyze_wildcard": true,
"query": "!_exists_:a.b_str"
}
}
],
"must_not": [],
"should": []
}
}
}
},
"auto_date_histogram": {
"buckets": 1,
"field": "@timestamp"
},
"meta": {
"indexPatternString": "ab*",
"intervalString": "9075600000ms",
"normalized": true,
"panelId": "f0",
"seriesId": "f1",
"timeField": "@timestamp"
}
}
},
"query": {
"bool": {
"filter": [],
"must": [
{
"range": {
"@timestamp": {
"format": "strict_date_optional_time",
"gte": "2024-07-19T14:38:24.783Z",
"lte": "2024-11-01T15:38:24.783Z"
}
}
},
{
"bool": {
"filter": [],
"must": [],
"must_not": [],
"should": []
}
},
{
"bool": {
"filter": [],
"must": [],
"must_not": [],
"should": []
}
}
],
"must_not": [],
"should": []
}
},
"runtime_mappings": {},
"size": 0,
"timeout": "30000ms",
"track_total_hits": true
}`,
ExpectedResponse: `
{
"completion_status": 200,
"completion_time_in_millis": 1730475504882,
"expiration_time_in_millis": 1730898929116,
"id": "quesma_async_0192e860-b4cb-7be4-a193-43d38686c80d",
"is_partial": false,
"is_running": false,
"response": {
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"aggregations": {
"timeseries": {
"buckets": [
{
"f2": {
"value": 0.178
},
"f2-denominator": {
"doc_count": 1000
},
"f2-numerator": {
"doc_count": 178
},
"doc_count": 1000,
"key": 1721399904783,
"key_as_string": "2024-07-19T14:38:24.783"
}
],
"interval": "100y",
"meta": {
"indexPatternString": "ab*",
"intervalString": "9075600000ms",
"normalized": true,
"panelId": "f0",
"seriesId": "f1",
"timeField": "@timestamp"
}
}
},
"hits": {
"hits": [],
"max_score": null,
"total": {
"relation": "eq",
"value": 1000
}
},
"timed_out": false,
"took": 0
},
"start_time_in_millis": 0
}`,
ExpectedPancakeResults: []model.QueryResultRow{
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("aggr__timeseries__count", int64(1000)),
model.NewQueryResultCol("metric__timeseries__f2-numerator_col_0", int64(178)),
model.NewQueryResultCol("aggr__timeseries__f2-denominator__count", int64(1000)),
}},
},
ExpectedPancakeSQL: `
SELECT count(*) AS "aggr__timeseries__count",
countIf(NOT ("a.b_str" IS NOT NULL)) AS
"metric__timeseries__f2-numerator_col_0",
countIf(true) AS "aggr__timeseries__f2-denominator__count"
FROM __quesma_table_name
WHERE ("@timestamp">=fromUnixTimestamp64Milli(1721399904783) AND "@timestamp"<=
fromUnixTimestamp64Milli(1730475504783))`,
},
}

0 comments on commit f527396

Please sign in to comment.