diff --git a/quesma/model/bucket_aggregations/terms.go b/quesma/model/bucket_aggregations/terms.go index 61d92b7ad..e25cb0c0a 100644 --- a/quesma/model/bucket_aggregations/terms.go +++ b/quesma/model/bucket_aggregations/terms.go @@ -15,12 +15,12 @@ import ( type Terms struct { ctx context.Context significant bool // true <=> significant_terms, false <=> terms - // Either: + // include is either: // - single value: then for strings, it can be a regex. // - array: then field must match exactly one of the values (never a regex) // Nil if missing in request. include any - // Either: + // exclude is either: // - single value: then for strings, it can be a regex. // - array: then field must match exactly one of the values (never a regex) // Nil if missing in request. @@ -119,12 +119,13 @@ func (query Terms) parentCount(row model.QueryResultRow) any { return row.Cols[len(row.Cols)-3].Value } -func (query Terms) UpdateFieldForIncludeAndExclude(field model.Expr) (updatedField model.Expr, didWeUpdate bool) { +func (query Terms) UpdateFieldForIncludeAndExclude(field model.Expr) (updatedField model.Expr, didWeUpdateField bool) { // We'll use here everywhere Clickhouse 'if' function: if(condition, then, else) - // In our case: if(condition that field is not excluded, field, NULL) + // In our case field becomes: if(condition that field is not excluded, field, NULL) ifOrNull := func(condition model.Expr) model.FunctionExpr { return model.NewFunction("if", condition, field, model.NullExpr) } + hasExclude := query.exclude != nil excludeArr, excludeIsArray := query.exclude.([]any) switch { @@ -149,7 +150,7 @@ func (query Terms) UpdateFieldForIncludeAndExclude(field model.Expr) (updatedFie } default: - return field, false // TODO implement support for include this in next PR + return field, false // TODO implement similar support for 'include' in next PR } } @@ -159,14 +160,14 @@ func (query Terms) UpdateFieldForIncludeAndExclude(field model.Expr) (updatedFie func CheckParamsTerms(ctx context.Context, paramsRaw any) error { requiredParams := map[string]string{"field": "string"} optionalParams := map[string]string{ - "size": "float64", // TODO should be int, will be fixed - "shard_size": "float64", // TODO should be int, will be fixed - "order": "order", // TODO add order type - "min_doc_count": "float64", // TODO should be int, will be fixed - "shard_min_doc_count": "float64", // TODO should be int, will be fixed + "size": "float64|string", // TODO should be int|string, will be fixed + "shard_size": "float64", // TODO should be int, will be fixed + "order": "order", // TODO add order type + "min_doc_count": "float64", // TODO should be int, will be fixed + "shard_min_doc_count": "float64", // TODO should be int, will be fixed "show_term_doc_count_error": "bool", - "exclude": "not-checking-type-rn-complicated", - "include": "not-checking-type-rn-complicated", + "exclude": "not-checking-type-now-complicated", + "include": "not-checking-type-now-complicated", "collect_mode": "string", "execution_hint": "string", "missing": "string", @@ -200,7 +201,7 @@ func CheckParamsTerms(ctx context.Context, paramsRaw any) error { if !isOptional { return fmt.Errorf("unexpected parameter %s found in Terms params %v", paramName, params) } - if wantedType == "not-checking-type-rn-complicated" || wantedType == "order" { + if wantedType == "not-checking-type-now-complicated" || wantedType == "order" || wantedType == "float64|string" { continue // TODO: add that later } if reflect.TypeOf(params[paramName]).Name() != wantedType { // TODO I'll make a small rewrite to not use reflect here diff --git a/quesma/model/expr.go b/quesma/model/expr.go index 69030e070..3ad5b330e 100644 --- a/quesma/model/expr.go +++ b/quesma/model/expr.go @@ -130,6 +130,7 @@ func NewLiteral(value any) LiteralExpr { return LiteralExpr{Value: value} } +// NewLiteralSingleQuoteString simply does: string -> 'string', anything_else -> anything_else func NewLiteralSingleQuoteString(value any) LiteralExpr { switch v := value.(type) { case string: diff --git a/quesma/queryparser/pancake_aggregation_parser_buckets.go b/quesma/queryparser/pancake_aggregation_parser_buckets.go index 55f329acf..9bf196515 100644 --- a/quesma/queryparser/pancake_aggregation_parser_buckets.go +++ b/quesma/queryparser/pancake_aggregation_parser_buckets.go @@ -147,17 +147,21 @@ func (cw *ClickhouseQueryTranslator) parseDateHistogram(aggregation *pancakeAggr // aggrName - "terms" or "significant_terms" func (cw *ClickhouseQueryTranslator) parseTermsAggregation(aggregation *pancakeAggregationTreeNode, params QueryMap, aggrName string) error { + if err := bucket_aggregations.CheckParamsTerms(cw.Ctx, params); err != nil { + return err + } + terms := bucket_aggregations.NewTerms( cw.Ctx, aggrName == "significant_terms", params["include"], params["exclude"], ) - var didWeAddMissing, didWeUpdate bool + var didWeAddMissing, didWeUpdateFieldHere bool field := cw.parseFieldField(params, aggrName) field, didWeAddMissing = cw.addMissingParameterIfPresent(field, params) - field, didWeUpdate = terms.UpdateFieldForIncludeAndExclude(field) + field, didWeUpdateFieldHere = terms.UpdateFieldForIncludeAndExclude(field) // If we updated above, we change our select to if(condition, field, NULL), so we also need to filter out those NULLs later - if !didWeAddMissing || didWeUpdate { + if !didWeAddMissing || didWeUpdateFieldHere { aggregation.filterOutEmptyKeyBucket = true } diff --git a/quesma/queryparser/pancake_sql_query_generation_test.go b/quesma/queryparser/pancake_sql_query_generation_test.go index 9038230be..3f99e3527 100644 --- a/quesma/queryparser/pancake_sql_query_generation_test.go +++ b/quesma/queryparser/pancake_sql_query_generation_test.go @@ -52,10 +52,6 @@ func TestPancakeQueryGeneration(t *testing.T) { t.Skip("Fix filters") } - if i != 73 { - t.Skip() - } - if test.TestName == "Line, Y-axis: Min, Buckets: Date Range, X-Axis: Terms, Split Chart: Date Histogram(file:kibana-visualize/agg_req,nr:9)" { t.Skip("Date range is broken, fix in progress (PR #971)") } diff --git a/quesma/testdata/aggregation_requests_2.go b/quesma/testdata/aggregation_requests_2.go index 78b77ab97..21acfba83 100644 --- a/quesma/testdata/aggregation_requests_2.go +++ b/quesma/testdata/aggregation_requests_2.go @@ -4859,7 +4859,7 @@ var AggregationTests2 = []AggregationTestCase{ "aggs": { "1": { "terms": { - "field": "agi_birth_year", + "field": "chess_goat", "size": 1, "exclude": ["abc"] } @@ -4898,9 +4898,10 @@ var AggregationTests2 = []AggregationTestCase{ }, ExpectedPancakeSQL: ` SELECT sum(count(*)) OVER () AS "aggr__1__parent_count", - "agi_birth_year" AS "aggr__1__key_0", count(*) AS "aggr__1__count" + if("chess_goat" NOT IN 'abc', "chess_goat", NULL) AS "aggr__1__key_0", + count(*) AS "aggr__1__count" FROM __quesma_table_name - GROUP BY "agi_birth_year" AS "aggr__1__key_0" + GROUP BY if("chess_goat" NOT IN 'abc', "chess_goat", NULL) AS "aggr__1__key_0" ORDER BY "aggr__1__count" DESC, "aggr__1__key_0" ASC LIMIT 2`, }, @@ -4951,10 +4952,10 @@ var AggregationTests2 = []AggregationTestCase{ }, ExpectedPancakeSQL: ` SELECT sum(count(*)) OVER () AS "aggr__1__parent_count", - if("chess_goat" LIKE 'K%', "chess_goat", NULL) AS "aggr__1__key_0", + if("chess_goat" NOT LIKE 'K%', "chess_goat", NULL) AS "aggr__1__key_0", count(*) AS "aggr__1__count" FROM __quesma_table_name - GROUP BY if("chess_goat" LIKE 'K%', "chess_goat", NULL) AS "aggr__1__key_0" + GROUP BY if("chess_goat" NOT LIKE 'K%', "chess_goat", NULL) AS "aggr__1__key_0" ORDER BY "aggr__1__count" DESC, "aggr__1__key_0" ASC LIMIT 2`, }, @@ -5060,7 +5061,7 @@ var AggregationTests2 = []AggregationTestCase{ model.NewQueryResultCol("aggr__terms1__terms2__count", int64(173)), model.NewQueryResultCol("metric__terms1__terms2__metric2_col_0", 102370.42402648926), }}, - {Cols: []model.QueryResultCol{ // should be discarded by us because of size=1ยง + {Cols: []model.QueryResultCol{ // should be discarded by us because of terms2's size=1 model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)), model.NewQueryResultCol("aggr__terms1__key_0", "Logstash Airways"), model.NewQueryResultCol("aggr__terms1__count", int64(3323)), @@ -5080,7 +5081,7 @@ var AggregationTests2 = []AggregationTestCase{ model.NewQueryResultCol("aggr__terms1__terms2__count", int64(167)), model.NewQueryResultCol("metric__terms1__terms2__metric2_col_0", 92215.763779), }}, - {Cols: []model.QueryResultCol{ // should be discarded by us because of size=1 + {Cols: []model.QueryResultCol{ // should be discarded by us because of terms2's size=1 model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)), model.NewQueryResultCol("aggr__terms1__key_0", "JetBeats"), model.NewQueryResultCol("aggr__terms1__count", int64(3261)), @@ -5090,7 +5091,7 @@ var AggregationTests2 = []AggregationTestCase{ model.NewQueryResultCol("aggr__terms1__terms2__count", int64(147)), model.NewQueryResultCol("metric__terms1__terms2__metric2_col_0", 90242.31663285477), }}, - {Cols: []model.QueryResultCol{ // should be discarded by us because of size=2 + {Cols: []model.QueryResultCol{ // should be discarded by us because of terms1's size=2 model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)), model.NewQueryResultCol("aggr__terms1__key_0", "Kibana Airlines"), model.NewQueryResultCol("aggr__terms1__count", int64(3219)), @@ -5138,7 +5139,6 @@ var AggregationTests2 = []AggregationTestCase{ }, { // [76] TestName: "terms with exclude, but with branched off aggregation tree", - // One simple test, for more regex tests see util/regex unit tests QueryRequestJson: ` { "aggs": { diff --git a/quesma/util/regex/regex.go b/quesma/util/regex/regex.go index 4164cda05..e5a42aa89 100644 --- a/quesma/util/regex/regex.go +++ b/quesma/util/regex/regex.go @@ -18,7 +18,8 @@ func ToClickhouseExpr(pattern string) (clickhouseFuncName string, patternExpr mo } // .* allowed, but [any other char]* - not for i, char := range pattern[1:] { - if char == '*' && pattern[i] != '.' { + prevChar := pattern[i] + if char == '*' && prevChar != '.' { return false } }