From 046303e978e623e8cf1c9160dcfc12c7fdb0f8ad Mon Sep 17 00:00:00 2001 From: Krzysztof Kiewicz Date: Sat, 28 Dec 2024 14:59:39 +0100 Subject: [PATCH 1/4] Done --- quesma/model/bucket_aggregations/terms.go | 64 ++++++++++++++++--- quesma/model/pipeline_aggregations/common.go | 7 +- .../model/pipeline_aggregations/max_bucket.go | 8 +-- .../model/pipeline_aggregations/sum_bucket.go | 8 +-- quesma/model/query_result.go | 9 +++ .../pancake_sql_query_generation_test.go | 4 ++ quesma/testdata/aggregation_requests_2.go | 63 ++++++++++++++++++ .../kibana-visualize/aggregation_requests.go | 6 +- .../pipeline_aggregation_requests.go | 16 +++-- quesma/util/utils.go | 13 ++++ 10 files changed, 162 insertions(+), 36 deletions(-) diff --git a/quesma/model/bucket_aggregations/terms.go b/quesma/model/bucket_aggregations/terms.go index e9c7dbfd4..bd12db889 100644 --- a/quesma/model/bucket_aggregations/terms.go +++ b/quesma/model/bucket_aggregations/terms.go @@ -4,6 +4,7 @@ package bucket_aggregations import ( "context" + "fmt" "quesma/logger" "quesma/model" "quesma/util" @@ -32,18 +33,63 @@ func (query Terms) TranslateSqlResponseToJson(rows []model.QueryResultRow) model return model.JsonMap{} } - var response []model.JsonMap + fmt.Println(rows) + + var keyIsBool bool for _, row := range rows { - docCount := query.docCount(row) - bucket := model.JsonMap{ - "key": query.key(row), - "doc_count": docCount, + switch query.key(row).(type) { + case bool, *bool: + keyIsBool = true + break + case nil: + continue + default: + keyIsBool = false + break + } + } + + var response []model.JsonMap + if !keyIsBool { + for _, row := range rows { + docCount := query.docCount(row) + bucket := model.JsonMap{ + "key": query.key(row), + "doc_count": docCount, + } + if query.significant { + bucket["score"] = docCount + bucket["bg_count"] = docCount + } + response = append(response, bucket) } - if query.significant { - bucket["score"] = docCount - bucket["bg_count"] = docCount + } else { // key is bool + for _, row := range rows { + var ( + key any + keyAsString string + ) + if val, err := util.ExtractBool(query.key(row)); err == nil { + if val { + key = 1 + keyAsString = "true" + } else { + key = 0 + keyAsString = "false" + } + } + docCount := query.docCount(row) + bucket := model.JsonMap{ + "key": key, + "key_as_string": keyAsString, + "doc_count": docCount, + } + if query.significant { + bucket["score"] = docCount + bucket["bg_count"] = docCount + } + response = append(response, bucket) } - response = append(response, bucket) } if !query.significant { diff --git a/quesma/model/pipeline_aggregations/common.go b/quesma/model/pipeline_aggregations/common.go index 564d9ffb8..1697ffaf3 100644 --- a/quesma/model/pipeline_aggregations/common.go +++ b/quesma/model/pipeline_aggregations/common.go @@ -43,11 +43,10 @@ func calculateResultWhenMissingCommonForDiffAggregations(ctx context.Context, pa if row.LastColValue() != nil { firstNonNilIndex = i + rowsWithNilValueCnt break - } else { - resultRow := row.Copy() - resultRow.Cols[len(resultRow.Cols)-1].Value = nil - resultRows = append(resultRows, resultRow) } + resultRow := row.Copy() + resultRow.Cols[len(resultRow.Cols)-1].Value = nil + resultRows = append(resultRows, resultRow) } if firstNonNilIndex == -1 { return resultRows diff --git a/quesma/model/pipeline_aggregations/max_bucket.go b/quesma/model/pipeline_aggregations/max_bucket.go index c8a2d587c..da6836f07 100644 --- a/quesma/model/pipeline_aggregations/max_bucket.go +++ b/quesma/model/pipeline_aggregations/max_bucket.go @@ -61,13 +61,7 @@ func (query MaxBucket) calculateSingleMaxBucket(parentRows []model.QueryResultRo var resultValue any var resultKeys []any - firstNonNilIndex := -1 - for i, row := range parentRows { - if row.LastColValue() != nil { - firstNonNilIndex = i - break - } - } + firstNonNilIndex := model.FirstNonNilIndex(parentRows) if firstNonNilIndex == -1 { resultRow := parentRows[0].Copy() resultRow.Cols[len(resultRow.Cols)-1].Value = model.JsonMap{ diff --git a/quesma/model/pipeline_aggregations/sum_bucket.go b/quesma/model/pipeline_aggregations/sum_bucket.go index 2bfafda05..1e38bd449 100644 --- a/quesma/model/pipeline_aggregations/sum_bucket.go +++ b/quesma/model/pipeline_aggregations/sum_bucket.go @@ -60,13 +60,7 @@ func (query SumBucket) CalculateResultWhenMissing(parentRows []model.QueryResult func (query SumBucket) calculateSingleSumBucket(parentRows []model.QueryResultRow) model.QueryResultRow { var resultValue any - firstNonNilIndex := -1 - for i, row := range parentRows { - if row.LastColValue() != nil { - firstNonNilIndex = i - break - } - } + firstNonNilIndex := model.FirstNonNilIndex(parentRows) if firstNonNilIndex == -1 { resultRow := parentRows[0].Copy() resultRow.Cols[len(resultRow.Cols)-1].Value = model.JsonMap{ diff --git a/quesma/model/query_result.go b/quesma/model/query_result.go index 5648e7a1c..348428bff 100644 --- a/quesma/model/query_result.go +++ b/quesma/model/query_result.go @@ -111,3 +111,12 @@ func (r *QueryResultRow) Copy() QueryResultRow { func (r *QueryResultRow) LastColValue() any { return r.Cols[len(r.Cols)-1].Value } + +func FirstNonNilIndex(rows []QueryResultRow) int { + for i, row := range rows { + if row.LastColValue() != nil { + return i + } + } + return -1 +} diff --git a/quesma/queryparser/pancake_sql_query_generation_test.go b/quesma/queryparser/pancake_sql_query_generation_test.go index 3f99e3527..0870ac4e3 100644 --- a/quesma/queryparser/pancake_sql_query_generation_test.go +++ b/quesma/queryparser/pancake_sql_query_generation_test.go @@ -52,6 +52,10 @@ func TestPancakeQueryGeneration(t *testing.T) { t.Skip("Fix filters") } + if i != 104 { + //t.Skip() + } + if test.TestName == "Line, Y-axis: Min, Buckets: Date Range, X-Axis: Terms, Split Chart: Date Histogram(file:kibana-visualize/agg_req,nr:9)" { t.Skip("Date range is broken, fix in progress (PR #971)") } diff --git a/quesma/testdata/aggregation_requests_2.go b/quesma/testdata/aggregation_requests_2.go index 6530c5618..64a16e650 100644 --- a/quesma/testdata/aggregation_requests_2.go +++ b/quesma/testdata/aggregation_requests_2.go @@ -10,6 +10,8 @@ import ( // Goland lags a lot when you edit aggregation_requests.go file, so let's add new tests to this one. +var someTrue = true + var AggregationTests2 = []AggregationTestCase{ { // [42] TestName: "histogram with all possible calendar_intervals", @@ -4689,4 +4691,65 @@ var AggregationTests2 = []AggregationTestCase{ "aggr__my_buckets__key_1" ASC LIMIT 4`, }, + { // [70] + TestName: "terms with bool field", + QueryRequestJson: ` + { + "aggs": { + "terms": { + "terms": { + "field": "Cancelled", + "size": 2 + } + } + }, + "size": 0, + "track_total_hits": true + }`, + ExpectedResponse: ` + { + "aggregations": { + "terms": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 38654, + "buckets": [ + { + "key": 0, + "key_as_string": "false", + "doc_count": 11344 + }, + { + "key": 1, + "key_as_string": "true", + "doc_count": 2 + } + ] + } + } + }`, + ExpectedPancakeResults: []model.QueryResultRow{ + {Cols: []model.QueryResultCol{ + model.NewQueryResultCol("aggr__terms__parent_count", int64(50000)), + model.NewQueryResultCol("aggr__terms__key_0", nil), + model.NewQueryResultCol("aggr__terms__count", int64(12000)), + }}, + {Cols: []model.QueryResultCol{ + model.NewQueryResultCol("aggr__terms__parent_count", int64(50000)), + model.NewQueryResultCol("aggr__terms__key_0", false), + model.NewQueryResultCol("aggr__terms__count", int64(11344)), + }}, + {Cols: []model.QueryResultCol{ + model.NewQueryResultCol("aggr__terms__parent_count", int64(50000)), + model.NewQueryResultCol("aggr__terms__key_0", &someTrue), + model.NewQueryResultCol("aggr__terms__count", int64(2)), + }}, + }, + ExpectedPancakeSQL: ` + SELECT sum(count(*)) OVER () AS "aggr__terms__parent_count", + "Cancelled" AS "aggr__terms__key_0", count(*) AS "aggr__terms__count" + FROM __quesma_table_name + GROUP BY "Cancelled" AS "aggr__terms__key_0" + ORDER BY "aggr__terms__count" DESC, "aggr__terms__key_0" ASC + LIMIT 3`, + }, } diff --git a/quesma/testdata/kibana-visualize/aggregation_requests.go b/quesma/testdata/kibana-visualize/aggregation_requests.go index 7a0f3fd2d..23c393893 100644 --- a/quesma/testdata/kibana-visualize/aggregation_requests.go +++ b/quesma/testdata/kibana-visualize/aggregation_requests.go @@ -960,7 +960,8 @@ var AggregationTests = []testdata.AggregationTestCase{ "95.0": 15480.335426897316 } }, - "key": false, + "key": 0, + "key_as_string": "false", "doc_count": 2974 }, { @@ -991,7 +992,8 @@ var AggregationTests = []testdata.AggregationTestCase{ "95.0": 14463.254101562497 } }, - "key": true, + "key": 1, + "key_as_string": "true", "doc_count": 419 } ] diff --git a/quesma/testdata/opensearch-visualize/pipeline_aggregation_requests.go b/quesma/testdata/opensearch-visualize/pipeline_aggregation_requests.go index bb683c855..83a492084 100644 --- a/quesma/testdata/opensearch-visualize/pipeline_aggregation_requests.go +++ b/quesma/testdata/opensearch-visualize/pipeline_aggregation_requests.go @@ -988,7 +988,7 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "key": 1714871400000, "key_as_string": "2024-05-05T01:10:00.000" }, -{ + { "1": { "value": 0.0 }, @@ -999,7 +999,7 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "key": 1714872000000, "key_as_string": "2024-05-05T01:20:00.000" }, -{ + { "1": { "value": 0.0 }, @@ -1010,7 +1010,7 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "key": 1714872600000, "key_as_string": "2024-05-05T01:30:00.000" }, -{ + { "1": { "value": 0.0 }, @@ -1021,7 +1021,7 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "key": 1714873200000, "key_as_string": "2024-05-05T01:40:00.000" }, -{ + { "1": { "value": 0.0 }, @@ -1032,7 +1032,7 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "key": 1714873800000, "key_as_string": "2024-05-05T01:50:00.000" }, -{ + { "1": { "value": 0.0 }, @@ -3345,11 +3345,13 @@ var PipelineAggregationTests = []testdata.AggregationTestCase{ "buckets": [ { "doc_count": 260, - "key": true + "key": 1, + "key_as_string": "true" }, { "doc_count": 1923, - "key": false + "key": 0, + "key_as_string": "false" } ], "doc_count_error_upper_bound": 0, diff --git a/quesma/util/utils.go b/quesma/util/utils.go index fdb377cfa..2ae21c4db 100644 --- a/quesma/util/utils.go +++ b/quesma/util/utils.go @@ -737,6 +737,19 @@ func ExtractNumeric64(value any) float64 { return asFloat64 } +// ExtractBool returns: +// * (value as bool, nil), if value is either bool or *bool +// * (false, err), otherwise +func ExtractBool(value any) (bool, error) { + switch valueTyped := value.(type) { + case bool: + return valueTyped, nil + case *bool: + return *valueTyped, nil + } + return false, fmt.Errorf("ExtractBool, value of incorrect type. Expected (*)bool, received: %v; type: %T", value, value) +} + type sqlMockMismatchSql struct { expected string actual string From d27823caf7383324118b4a2e6d5ff3e79091e363 Mon Sep 17 00:00:00 2001 From: Krzysztof Kiewicz Date: Sat, 28 Dec 2024 15:23:29 +0100 Subject: [PATCH 2/4] Fix linter --- quesma/model/bucket_aggregations/terms.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/quesma/model/bucket_aggregations/terms.go b/quesma/model/bucket_aggregations/terms.go index bd12db889..36ed72b8e 100644 --- a/quesma/model/bucket_aggregations/terms.go +++ b/quesma/model/bucket_aggregations/terms.go @@ -37,16 +37,18 @@ func (query Terms) TranslateSqlResponseToJson(rows []model.QueryResultRow) model var keyIsBool bool for _, row := range rows { + key := query.key(row) + if key == nil { + continue + } + switch query.key(row).(type) { case bool, *bool: keyIsBool = true - break - case nil: - continue default: keyIsBool = false - break } + break } var response []model.JsonMap From 5ce52f0900237764a2d9b2ee5aa5e9a1a211388d Mon Sep 17 00:00:00 2001 From: Krzysztof Kiewicz Date: Sat, 28 Dec 2024 15:24:37 +0100 Subject: [PATCH 3/4] Cleanup --- quesma/model/bucket_aggregations/terms.go | 72 +++++-------------- .../pancake_sql_query_generation_test.go | 4 -- quesma/util/utils.go | 21 +++--- 3 files changed, 29 insertions(+), 68 deletions(-) diff --git a/quesma/model/bucket_aggregations/terms.go b/quesma/model/bucket_aggregations/terms.go index 36ed72b8e..8a4096f3c 100644 --- a/quesma/model/bucket_aggregations/terms.go +++ b/quesma/model/bucket_aggregations/terms.go @@ -4,7 +4,6 @@ package bucket_aggregations import ( "context" - "fmt" "quesma/logger" "quesma/model" "quesma/util" @@ -33,65 +32,30 @@ func (query Terms) TranslateSqlResponseToJson(rows []model.QueryResultRow) model return model.JsonMap{} } - fmt.Println(rows) - - var keyIsBool bool + var response []model.JsonMap for _, row := range rows { - key := query.key(row) - if key == nil { - continue + docCount := query.docCount(row) + bucket := model.JsonMap{ + "doc_count": docCount, } - - switch query.key(row).(type) { - case bool, *bool: - keyIsBool = true - default: - keyIsBool = false + if query.significant { + bucket["score"] = docCount + bucket["bg_count"] = docCount } - break - } - var response []model.JsonMap - if !keyIsBool { - for _, row := range rows { - docCount := query.docCount(row) - bucket := model.JsonMap{ - "key": query.key(row), - "doc_count": docCount, - } - if query.significant { - bucket["score"] = docCount - bucket["bg_count"] = docCount - } - response = append(response, bucket) + // response for bool keys is different + key := query.key(row) + if boolPtr, isBoolPtr := key.(*bool); isBoolPtr { + key = *boolPtr } - } else { // key is bool - for _, row := range rows { - var ( - key any - keyAsString string - ) - if val, err := util.ExtractBool(query.key(row)); err == nil { - if val { - key = 1 - keyAsString = "true" - } else { - key = 0 - keyAsString = "false" - } - } - docCount := query.docCount(row) - bucket := model.JsonMap{ - "key": key, - "key_as_string": keyAsString, - "doc_count": docCount, - } - if query.significant { - bucket["score"] = docCount - bucket["bg_count"] = docCount - } - response = append(response, bucket) + if keyAsBool, ok := key.(bool); ok { + bucket["key"] = util.BoolToInt(keyAsBool) + bucket["key_as_string"] = util.BoolToString(keyAsBool) + } else { + bucket["key"] = key } + + response = append(response, bucket) } if !query.significant { diff --git a/quesma/queryparser/pancake_sql_query_generation_test.go b/quesma/queryparser/pancake_sql_query_generation_test.go index 0870ac4e3..3f99e3527 100644 --- a/quesma/queryparser/pancake_sql_query_generation_test.go +++ b/quesma/queryparser/pancake_sql_query_generation_test.go @@ -52,10 +52,6 @@ func TestPancakeQueryGeneration(t *testing.T) { t.Skip("Fix filters") } - if i != 104 { - //t.Skip() - } - if test.TestName == "Line, Y-axis: Min, Buckets: Date Range, X-Axis: Terms, Split Chart: Date Histogram(file:kibana-visualize/agg_req,nr:9)" { t.Skip("Date range is broken, fix in progress (PR #971)") } diff --git a/quesma/util/utils.go b/quesma/util/utils.go index 2ae21c4db..34f3c36c2 100644 --- a/quesma/util/utils.go +++ b/quesma/util/utils.go @@ -737,17 +737,18 @@ func ExtractNumeric64(value any) float64 { return asFloat64 } -// ExtractBool returns: -// * (value as bool, nil), if value is either bool or *bool -// * (false, err), otherwise -func ExtractBool(value any) (bool, error) { - switch valueTyped := value.(type) { - case bool: - return valueTyped, nil - case *bool: - return *valueTyped, nil +func BoolToInt(b bool) int { + if b { + return 1 + } + return 0 +} + +func BoolToString(b bool) string { + if b { + return "true" } - return false, fmt.Errorf("ExtractBool, value of incorrect type. Expected (*)bool, received: %v; type: %T", value, value) + return "false" } type sqlMockMismatchSql struct { From a2c3f74771c212cae33d126c3fd8588f8e1a8719 Mon Sep 17 00:00:00 2001 From: Krzysztof Kiewicz Date: Sat, 28 Dec 2024 15:46:44 +0100 Subject: [PATCH 4/4] Cleanup --- quesma/testdata/aggregation_requests_2.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/quesma/testdata/aggregation_requests_2.go b/quesma/testdata/aggregation_requests_2.go index 253899bcb..3fb4b4132 100644 --- a/quesma/testdata/aggregation_requests_2.go +++ b/quesma/testdata/aggregation_requests_2.go @@ -8,6 +8,8 @@ import ( "time" ) +var randomTrueVariableUsedBelow = true + // Goland lags a lot when you edit aggregation_requests.go file, so let's add new tests to this one. var AggregationTests2 = []AggregationTestCase{ @@ -5340,7 +5342,7 @@ var AggregationTests2 = []AggregationTestCase{ }}, {Cols: []model.QueryResultCol{ model.NewQueryResultCol("aggr__terms__parent_count", int64(50000)), - model.NewQueryResultCol("aggr__terms__key_0", &someTrue), + model.NewQueryResultCol("aggr__terms__key_0", &randomTrueVariableUsedBelow), // used here model.NewQueryResultCol("aggr__terms__count", int64(2)), }}, },