From d4745d38019b1b768022302670495276fb769c02 Mon Sep 17 00:00:00 2001 From: Krzysztof Kiewicz Date: Sat, 28 Dec 2024 17:18:36 +0100 Subject: [PATCH] Quickfix: `bool` column with `terms` aggr (#1145) Response for a `bool` bucket needs to be slightly different. Before: Screenshot 2024-12-28 at 15 42 21 After: Screenshot 2024-12-28 at 15 17 41 --- quesma/model/bucket_aggregations/terms.go | 14 +++- quesma/model/pipeline_aggregations/common.go | 7 +- .../model/pipeline_aggregations/max_bucket.go | 8 +-- .../model/pipeline_aggregations/sum_bucket.go | 8 +-- quesma/model/query_result.go | 9 +++ quesma/testdata/aggregation_requests_2.go | 64 +++++++++++++++++++ .../kibana-visualize/aggregation_requests.go | 6 +- .../pipeline_aggregation_requests.go | 16 +++-- quesma/util/utils.go | 14 ++++ 9 files changed, 118 insertions(+), 28 deletions(-) diff --git a/quesma/model/bucket_aggregations/terms.go b/quesma/model/bucket_aggregations/terms.go index 2c62ec6b2..f816fa3e9 100644 --- a/quesma/model/bucket_aggregations/terms.go +++ b/quesma/model/bucket_aggregations/terms.go @@ -49,13 +49,25 @@ func (query Terms) TranslateSqlResponseToJson(rows []model.QueryResultRow) model for _, row := range rows { docCount := query.docCount(row) bucket := model.JsonMap{ - "key": query.key(row), "doc_count": docCount, } if query.significant { bucket["score"] = docCount bucket["bg_count"] = docCount } + + // response for bool keys is different + key := query.key(row) + if boolPtr, isBoolPtr := key.(*bool); isBoolPtr { + key = *boolPtr + } + if keyAsBool, ok := key.(bool); ok { + bucket["key"] = util.BoolToInt(keyAsBool) + bucket["key_as_string"] = util.BoolToString(keyAsBool) + } else { + bucket["key"] = key + } + response = append(response, bucket) } diff --git a/quesma/model/pipeline_aggregations/common.go b/quesma/model/pipeline_aggregations/common.go index 564d9ffb8..1697ffaf3 100644 --- a/quesma/model/pipeline_aggregations/common.go +++ b/quesma/model/pipeline_aggregations/common.go @@ -43,11 +43,10 @@ func calculateResultWhenMissingCommonForDiffAggregations(ctx context.Context, pa if row.LastColValue() != nil { firstNonNilIndex = i + rowsWithNilValueCnt break - } else { - resultRow := row.Copy() - resultRow.Cols[len(resultRow.Cols)-1].Value = nil - resultRows = append(resultRows, resultRow) } + resultRow := row.Copy() + resultRow.Cols[len(resultRow.Cols)-1].Value = nil + resultRows = append(resultRows, resultRow) } if firstNonNilIndex == -1 { return resultRows diff --git a/quesma/model/pipeline_aggregations/max_bucket.go b/quesma/model/pipeline_aggregations/max_bucket.go index c8a2d587c..da6836f07 100644 --- a/quesma/model/pipeline_aggregations/max_bucket.go +++ b/quesma/model/pipeline_aggregations/max_bucket.go @@ -61,13 +61,7 @@ func (query MaxBucket) calculateSingleMaxBucket(parentRows []model.QueryResultRo var resultValue any var resultKeys []any - firstNonNilIndex := -1 - for i, row := range parentRows { - if row.LastColValue() != nil { - firstNonNilIndex = i - break - } - } + firstNonNilIndex := model.FirstNonNilIndex(parentRows) if firstNonNilIndex == -1 { resultRow := parentRows[0].Copy() resultRow.Cols[len(resultRow.Cols)-1].Value = model.JsonMap{ diff --git a/quesma/model/pipeline_aggregations/sum_bucket.go b/quesma/model/pipeline_aggregations/sum_bucket.go index 2bfafda05..1e38bd449 100644 --- a/quesma/model/pipeline_aggregations/sum_bucket.go +++ b/quesma/model/pipeline_aggregations/sum_bucket.go @@ -60,13 +60,7 @@ func (query SumBucket) CalculateResultWhenMissing(parentRows []model.QueryResult func (query SumBucket) calculateSingleSumBucket(parentRows []model.QueryResultRow) model.QueryResultRow { var resultValue any - firstNonNilIndex := -1 - for i, row := range parentRows { - if row.LastColValue() != nil { - firstNonNilIndex = i - break - } - } + firstNonNilIndex := model.FirstNonNilIndex(parentRows) if firstNonNilIndex == -1 { resultRow := parentRows[0].Copy() resultRow.Cols[len(resultRow.Cols)-1].Value = model.JsonMap{ diff --git a/quesma/model/query_result.go b/quesma/model/query_result.go index 5648e7a1c..348428bff 100644 --- a/quesma/model/query_result.go +++ b/quesma/model/query_result.go @@ -111,3 +111,12 @@ func (r *QueryResultRow) Copy() QueryResultRow { func (r *QueryResultRow) LastColValue() any { return r.Cols[len(r.Cols)-1].Value } + +func FirstNonNilIndex(rows []QueryResultRow) int { + for i, row := range rows { + if row.LastColValue() != nil { + return i + } + } + return -1 +} diff --git a/quesma/testdata/aggregation_requests_2.go b/quesma/testdata/aggregation_requests_2.go index 6ea38f072..aa70d53be 100644 --- a/quesma/testdata/aggregation_requests_2.go +++ b/quesma/testdata/aggregation_requests_2.go @@ -8,6 +8,8 @@ import ( "time" ) +var randomTrueVariableUsedBelow = true + // Goland lags a lot when you edit aggregation_requests.go file, so let's add new tests to this one. var AggregationTests2 = []AggregationTestCase{ @@ -5290,6 +5292,68 @@ var AggregationTests2 = []AggregationTestCase{ LIMIT 3`}, }, { // [77] + TestName: "terms with bool field", + QueryRequestJson: ` + { + "aggs": { + "terms": { + "terms": { + "field": "Cancelled", + "size": 2 + } + } + }, + "size": 0, + "track_total_hits": true + }`, + // I omit "took", "timed_out", "_shards", and "hits" from the response for brevity (they can also be easily unit-tested) + ExpectedResponse: ` + { + "aggregations": { + "terms": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 38654, + "buckets": [ + { + "key": 0, + "key_as_string": "false", + "doc_count": 11344 + }, + { + "key": 1, + "key_as_string": "true", + "doc_count": 2 + } + ] + } + } + }`, + ExpectedPancakeResults: []model.QueryResultRow{ + {Cols: []model.QueryResultCol{ + model.NewQueryResultCol("aggr__terms__parent_count", int64(50000)), + model.NewQueryResultCol("aggr__terms__key_0", nil), + model.NewQueryResultCol("aggr__terms__count", int64(12000)), + }}, + {Cols: []model.QueryResultCol{ + model.NewQueryResultCol("aggr__terms__parent_count", int64(50000)), + model.NewQueryResultCol("aggr__terms__key_0", false), + model.NewQueryResultCol("aggr__terms__count", int64(11344)), + }}, + {Cols: []model.QueryResultCol{ + model.NewQueryResultCol("aggr__terms__parent_count", int64(50000)), + model.NewQueryResultCol("aggr__terms__key_0", &randomTrueVariableUsedBelow), // used here + model.NewQueryResultCol("aggr__terms__count", int64(2)), + }}, + }, + ExpectedPancakeSQL: ` + SELECT sum(count(*)) OVER () AS "aggr__terms__parent_count", + "Cancelled" AS "aggr__terms__key_0", count(*) AS "aggr__terms__count" + FROM __quesma_table_name + GROUP BY "Cancelled" AS "aggr__terms__key_0" + ORDER BY "aggr__terms__count" DESC, "aggr__terms__key_0" ASC + LIMIT 3`, + }, + { // [78] TestName: `Escaping of ', \, \n, and \t in some example aggregations. No tests for other escape characters, e.g. \r or 'b. Add if needed.`, QueryRequestJson: ` { diff --git a/quesma/testdata/kibana-visualize/aggregation_requests.go b/quesma/testdata/kibana-visualize/aggregation_requests.go index 7a0f3fd2d..23c393893 100644 --- a/quesma/testdata/kibana-visualize/aggregation_requests.go +++ b/quesma/testdata/kibana-visualize/aggregation_requests.go @@ -960,7 +960,8 @@ var AggregationTests = []testdata.AggregationTestCase{ "95.0": 15480.335426897316 } }, - "key": false, + "key": 0, + "key_as_string": "false", "doc_count": 2974 }, { @@ -991,7 +992,8 @@ var AggregationTests = []testdata.AggregationTestCase{ "95.0": 14463.254101562497 } }, - "key": true, + "key": 1, + "key_as_string": "true", "doc_count": 419 } ] diff --git a/quesma/testdata/opensearch-visualize/pipeline_aggregation_requests.go b/quesma/testdata/opensearch-visualize/pipeline_aggregation_requests.go index bb683c855..83a492084 100644 --- a/quesma/testdata/opensearch-visualize/pipeline_aggregation_requests.go +++ b/quesma/testdata/opensearch-visualize/pipeline_aggregation_requests.go @@ -988,7 +988,7 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "key": 1714871400000, "key_as_string": "2024-05-05T01:10:00.000" }, -{ + { "1": { "value": 0.0 }, @@ -999,7 +999,7 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "key": 1714872000000, "key_as_string": "2024-05-05T01:20:00.000" }, -{ + { "1": { "value": 0.0 }, @@ -1010,7 +1010,7 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "key": 1714872600000, "key_as_string": "2024-05-05T01:30:00.000" }, -{ + { "1": { "value": 0.0 }, @@ -1021,7 +1021,7 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "key": 1714873200000, "key_as_string": "2024-05-05T01:40:00.000" }, -{ + { "1": { "value": 0.0 }, @@ -1032,7 +1032,7 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "key": 1714873800000, "key_as_string": "2024-05-05T01:50:00.000" }, -{ + { "1": { "value": 0.0 }, @@ -3345,11 +3345,13 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "buckets": [ { "doc_count": 260, - "key": true + "key": 1, + "key_as_string": "true" }, { "doc_count": 1923, - "key": false + "key": 0, + "key_as_string": "false" } ], "doc_count_error_upper_bound": 0, diff --git a/quesma/util/utils.go b/quesma/util/utils.go index 2f58ba916..6dc96759d 100644 --- a/quesma/util/utils.go +++ b/quesma/util/utils.go @@ -737,6 +737,20 @@ func ExtractNumeric64(value any) float64 { return asFloat64 } +func BoolToInt(b bool) int { + if b { + return 1 + } + return 0 +} + +func BoolToString(b bool) string { + if b { + return "true" + } + return "false" +} + // SingleQuote is a simple helper function: str -> 'str' func SingleQuote(value string) string { return "'" + value + "'"