Skip to content

Commit

Permalink
Missing bucket results for date_histogram aggregation (#1113)
Browse files Browse the repository at this point in the history
  • Loading branch information
trzysiek authored Dec 17, 2024
1 parent 526e5c1 commit db1feea
Show file tree
Hide file tree
Showing 8 changed files with 208 additions and 39 deletions.
1 change: 0 additions & 1 deletion quesma/model/bucket_aggregations/date_histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
type DateHistogramIntervalType bool

const (
DefaultMinDocCount = -1
DateHistogramFixedInterval DateHistogramIntervalType = true
DateHistogramCalendarInterval DateHistogramIntervalType = false
defaultDateTimeType = clickhouse.DateTime64
Expand Down
9 changes: 7 additions & 2 deletions quesma/model/bucket_aggregations/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,14 @@ func (query *HistogramRowsTransformer) Transform(ctx context.Context, rowsFromDB
return postprocessedRows
}

// we're sure key is float64
// we're sure key is either float64, or in rare cases nil
func (query *HistogramRowsTransformer) getKeyFloat64(row model.QueryResultRow) (float64, bool) {
return row.Cols[len(row.Cols)-2].Value.(float64), true
switch val := row.Cols[len(row.Cols)-2].Value.(type) {
case float64:
return val, true
default:
return -1, false
}
}

// we don't know the type
Expand Down
17 changes: 0 additions & 17 deletions quesma/queryparser/aggregation_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"quesma/clickhouse"
"quesma/logger"
"quesma/model"
"quesma/model/bucket_aggregations"
"regexp"
"slices"
"strconv"
Expand Down Expand Up @@ -366,22 +365,6 @@ func (cw *ClickhouseQueryTranslator) parseFieldFromScriptField(queryMap QueryMap
return
}

func (cw *ClickhouseQueryTranslator) parseMinDocCount(queryMap QueryMap) int {
if minDocCountRaw, exists := queryMap["min_doc_count"]; exists {
if minDocCount, ok := minDocCountRaw.(float64); ok {
asInt := int(minDocCount)
if asInt != 0 && asInt != 1 {
logger.WarnWithCtx(cw.Ctx).Msgf("min_doc_count is not 0 or 1, but %d. Not really supported", asInt)
}
return asInt
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("min_doc_count is not a number, but %T, value: %v. Using default value: %d",
minDocCountRaw, minDocCountRaw, bucket_aggregations.DefaultMinDocCount)
}
}
return bucket_aggregations.DefaultMinDocCount
}

// quoteArray returns a new array with the same elements, but quoted
func quoteArray(array []string) []string {
quotedArray := make([]string, 0, len(array))
Expand Down
17 changes: 17 additions & 0 deletions quesma/queryparser/pancake_aggregation_parser_buckets.go
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,23 @@ func (cw *ClickhouseQueryTranslator) parseOrder(params QueryMap, fieldExpression
return fullOrderBy, nil
}

func (cw *ClickhouseQueryTranslator) parseMinDocCount(queryMap QueryMap) int {
const defaultMinDocCount = 0
if minDocCountRaw, exists := queryMap["min_doc_count"]; exists {
if minDocCount, ok := minDocCountRaw.(float64); ok {
asInt := int(minDocCount)
if asInt != 0 && asInt != 1 {
logger.WarnWithCtx(cw.Ctx).Msgf("min_doc_count is not 0 or 1, but %d. Not really supported", asInt)
}
return asInt
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("min_doc_count is not a number, but %T, value: %v. Using default value: %d",
minDocCountRaw, minDocCountRaw, defaultMinDocCount)
}
}
return defaultMinDocCount
}

// addMissingParameterIfPresent parses 'missing' parameter from 'params'.
func (cw *ClickhouseQueryTranslator) addMissingParameterIfPresent(field model.Expr, params QueryMap) (updatedField model.Expr, didWeAddMissing bool) {
if params["missing"] == nil {
Expand Down
33 changes: 23 additions & 10 deletions quesma/testdata/aggregation_requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,10 +329,6 @@ var AggregationTests = []AggregationTestCase{
"aggs": {
"1": {
"date_histogram": {
"extended_bounds": {
"max": 1707486436029,
"min": 1706881636029
},
"field": "timestamp",
"fixed_interval": "3h",
"time_zone": "Europe/Warsaw"
Expand Down Expand Up @@ -2631,7 +2627,8 @@ var AggregationTests = []AggregationTestCase{
"series": {
"date_histogram": {
"field": "@timestamp",
"fixed_interval": "60s"
"fixed_interval": "60s",
"min_doc_count": 12
}
}
}
Expand Down Expand Up @@ -2869,10 +2866,6 @@ var AggregationTests = []AggregationTestCase{
},
"date_histogram": {
"calendar_interval": "1d",
"extended_bounds": {
"max": 1708969256351,
"min": 1708364456351
},
"field": "order_date"
}
}
Expand Down Expand Up @@ -3106,7 +3099,7 @@ var AggregationTests = []AggregationTestCase{
"fixed_interval": "12h",
"extended_bounds": {
"min": 1708627654149,
"max": 1709232454149
"max": 1708782454149
}
},
"aggs": {
Expand Down Expand Up @@ -3215,6 +3208,26 @@ var AggregationTests = []AggregationTestCase{
"doc_count": 83,
"key": 1708689600000,
"key_as_string": "2024-02-23T12:00:00.000"
},
{
"1-bucket": {
"1-metric": {
"value": null
}
},
"doc_count": 0,
"key": 1708732800000,
"key_as_string": "2024-02-24T00:00:00.000"
},
{
"1-bucket": {
"1-metric": {
"value": null
}
},
"doc_count": 0,
"key": 1708776000000,
"key_as_string": "2024-02-24T12:00:00.000"
}
]
}
Expand Down
15 changes: 15 additions & 0 deletions quesma/testdata/aggregation_requests_2.go
Original file line number Diff line number Diff line change
Expand Up @@ -3377,6 +3377,11 @@ var AggregationTests2 = []AggregationTestCase{
"key": 1706021640000,
"key_as_string": "2024-01-23T14:54:00.000"
},
{
"doc_count": 0,
"key": 1706021670000,
"key_as_string": "2024-01-23T14:54:30.000"
},
{
"doc_count": 17,
"key": 1706021700000,
Expand Down Expand Up @@ -3632,11 +3637,21 @@ var AggregationTests2 = []AggregationTestCase{
"sum_other_doc_count": 1917
}
},
{
"doc_count": 0,
"key": 1706021670000,
"key_as_string": "2024-01-23T14:54:30.000"
},
{
"doc_count": 17,
"key": 1706021700000,
"key_as_string": "2024-01-23T14:55:00.000"
},
{
"doc_count": 0,
"key": 1706021730000,
"key_as_string": "2024-01-23T14:55:30.000"
},
{
"doc_count": 15,
"key": 1706021760000,
Expand Down
132 changes: 131 additions & 1 deletion quesma/testdata/dates.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ var AggregationTestsWithDates = []AggregationTestCase{
("@timestamp", 'Europe/Warsaw'))*1000) / 10000) AS "aggr__timeseries__key_0"
ORDER BY "aggr__timeseries__key_0" ASC`,
},
{ // [1]
{ // [2]
TestName: "extended_bounds post keys (timezone calculations most tricky to get right)",
QueryRequestJson: `
{
Expand Down Expand Up @@ -504,4 +504,134 @@ var AggregationTestsWithDates = []AggregationTestCase{
("@timestamp", 'Europe/Warsaw'))*1000) / 10000) AS "aggr__timeseries__key_0"
ORDER BY "aggr__timeseries__key_0" ASC`,
},
{ // [3]
TestName: "empty results, we still should add empty buckets, because of the extended_bounds and min_doc_count defaulting to 0",
QueryRequestJson: `
{
"_source": {
"excludes": []
},
"aggs": {
"0": {
"aggs": {
"1": {
"sum": {
"field": "body_bytes_sent"
}
}
},
"date_histogram": {
"calendar_interval": "1d",
"extended_bounds": {
"min": 1732327903466,
"max": 1732713503466
},
"field": "@timestamp",
"time_zone": "Europe/Warsaw"
}
}
},
"query": {
"bool": {
"filter": [
{
"range": {
"@timestamp": {
"format": "strict_date_optional_time",
"gte": "2009-11-27T13:18:23.466Z",
"lte": "2024-11-27T13:18:23.466Z"
}
}
}
],
"must": [],
"must_not": [],
"should": []
}
},
"runtime_mappings": {},
"script_fields": {},
"size": 0,
"stored_fields": [
"*"
],
"track_total_hits": true
}`,
ExpectedResponse: `
{
"completion_time_in_millis": 1707486436398,
"expiration_time_in_millis": 1707486496397,
"is_partial": false,
"is_running": false,
"response": {
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"aggregations": {
"0": {
"buckets": [
{
"doc_count": 0,
"key": 1732402800000,
"key_as_string": "2024-11-23T23:00:00.000",
"1": {
"value": null
}
},
{
"doc_count": 0,
"key": 1732489200000,
"key_as_string": "2024-11-24T23:00:00.000",
"1": {
"value": null
}
},
{
"doc_count": 0,
"key": 1732575600000,
"key_as_string": "2024-11-25T23:00:00.000",
"1": {
"value": null
}
},
{
"doc_count": 0,
"key": 1732662000000,
"key_as_string": "2024-11-26T23:00:00.000",
"1": {
"value": null
}
}
]
}
},
"hits": {
"hits": [],
"max_score": null,
"total": {
"relation": "eq",
"value": 2200
}
},
"timed_out": false,
"took": 1
},
"start_time_in_millis": 1707486436397
}`,
ExpectedPancakeResults: []model.QueryResultRow{},
ExpectedPancakeSQL: `
SELECT toInt64((toUnixTimestamp64Milli("@timestamp")+timeZoneOffset(toTimezone(
"@timestamp", 'Europe/Warsaw'))*1000) / 86400000) AS "aggr__0__key_0",
count(*) AS "aggr__0__count",
sumOrNull("body_bytes_sent") AS "metric__0__1_col_0"
FROM __quesma_table_name
WHERE ("@timestamp">=fromUnixTimestamp64Milli(1259327903466) AND "@timestamp"<=
fromUnixTimestamp64Milli(1732713503466))
GROUP BY toInt64((toUnixTimestamp64Milli("@timestamp")+timeZoneOffset(toTimezone
("@timestamp", 'Europe/Warsaw'))*1000) / 86400000) AS "aggr__0__key_0"
ORDER BY "aggr__0__key_0" ASC`,
},
}
23 changes: 15 additions & 8 deletions quesma/testdata/kibana-visualize/aggregation_requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,6 @@ var AggregationTests = []testdata.AggregationTestCase{
}
},
"date_histogram": {
"extended_bounds": {
"max": 1716812096627,
"min": 1716811196627
},
"field": "@timestamp",
"fixed_interval": "30s",
"time_zone": "Europe/Warsaw"
Expand Down Expand Up @@ -141,6 +137,15 @@ var AggregationTests = []testdata.AggregationTestCase{
"key": 1716827010000,
"key_as_string": "2024-05-27T16:23:30.000"
},
{
"doc_count": 0,
"key": 1716827040000,
"key_as_string": "2024-05-27T16:24:00.000",
"1": {
"buckets": [],
"sum_other_doc_count": 0
}
},
{
"1": {
"buckets": [
Expand Down Expand Up @@ -267,10 +272,6 @@ var AggregationTests = []testdata.AggregationTestCase{
"aggs": {
"1": {
"date_histogram": {
"extended_bounds": {
"max": 1716812073493,
"min": 1716811173493
},
"field": "@timestamp",
"fixed_interval": "30s"
}
Expand Down Expand Up @@ -345,6 +346,11 @@ var AggregationTests = []testdata.AggregationTestCase{
"key": 1716834450000,
"key_as_string": "2024-05-27T18:27:30.000"
},
{
"doc_count": 0,
"key": 1716834480000,
"key_as_string": "2024-05-27T18:28:00.000"
},
{
"doc_count": 2,
"key": 1716834510000,
Expand Down Expand Up @@ -454,6 +460,7 @@ var AggregationTests = []testdata.AggregationTestCase{
"max": 1716834478178,
"min": 1716833578178
},
"min_doc_count": 1,
"field": "@timestamp",
"fixed_interval": "30s"
}
Expand Down

0 comments on commit db1feea

Please sign in to comment.