Cleanup

QuesmaOrg · Dec 27, 2024 · 2f94547 · 2f94547
1 parent 360220f
commit 2f94547
Show file tree

Hide file tree

Showing 6 changed files with 33 additions and 30 deletions.
diff --git a/quesma/model/bucket_aggregations/terms.go b/quesma/model/bucket_aggregations/terms.go
@@ -15,12 +15,12 @@ import (
 type Terms struct {
 	ctx         context.Context
 	significant bool // true <=> significant_terms, false <=> terms
-	// Either:
+	// include is either:
 	//   - single value: then for strings, it can be a regex.
 	//   - array: then field must match exactly one of the values (never a regex)
 	// Nil if missing in request.
 	include any
-	// Either:
+	// exclude is either:
 	//   - single value: then for strings, it can be a regex.
 	//   - array: then field must match exactly one of the values (never a regex)
 	// Nil if missing in request.
@@ -119,12 +119,13 @@ func (query Terms) parentCount(row model.QueryResultRow) any {
 	return row.Cols[len(row.Cols)-3].Value
 }
 
-func (query Terms) UpdateFieldForIncludeAndExclude(field model.Expr) (updatedField model.Expr, didWeUpdate bool) {
+func (query Terms) UpdateFieldForIncludeAndExclude(field model.Expr) (updatedField model.Expr, didWeUpdateField bool) {
 	// We'll use here everywhere Clickhouse 'if' function: if(condition, then, else)
-	// In our case: if(condition that field is not excluded, field, NULL)
+	// In our case field becomes: if(condition that field is not excluded, field, NULL)
 	ifOrNull := func(condition model.Expr) model.FunctionExpr {
 		return model.NewFunction("if", condition, field, model.NullExpr)
 	}
+
 	hasExclude := query.exclude != nil
 	excludeArr, excludeIsArray := query.exclude.([]any)
 	switch {
@@ -149,7 +150,7 @@ func (query Terms) UpdateFieldForIncludeAndExclude(field model.Expr) (updatedFie
 		}
 
 	default:
-		return field, false // TODO implement support for include this in next PR
+		return field, false // TODO implement similar support for 'include' in next PR
 	}
 }
 
@@ -159,14 +160,14 @@ func (query Terms) UpdateFieldForIncludeAndExclude(field model.Expr) (updatedFie
 func CheckParamsTerms(ctx context.Context, paramsRaw any) error {
 	requiredParams := map[string]string{"field": "string"}
 	optionalParams := map[string]string{
-		"size":                      "float64", // TODO should be int, will be fixed
-		"shard_size":                "float64", // TODO should be int, will be fixed
-		"order":                     "order",   // TODO add order type
-		"min_doc_count":             "float64", // TODO should be int, will be fixed
-		"shard_min_doc_count":       "float64", // TODO should be int, will be fixed
+		"size":                      "float64|string", // TODO should be int|string, will be fixed
+		"shard_size":                "float64",        // TODO should be int, will be fixed
+		"order":                     "order",          // TODO add order type
+		"min_doc_count":             "float64",        // TODO should be int, will be fixed
+		"shard_min_doc_count":       "float64",        // TODO should be int, will be fixed
 		"show_term_doc_count_error": "bool",
-		"exclude":                   "not-checking-type-rn-complicated",
-		"include":                   "not-checking-type-rn-complicated",
+		"exclude":                   "not-checking-type-now-complicated",
+		"include":                   "not-checking-type-now-complicated",
 		"collect_mode":              "string",
 		"execution_hint":            "string",
 		"missing":                   "string",
@@ -200,7 +201,7 @@ func CheckParamsTerms(ctx context.Context, paramsRaw any) error {
 			if !isOptional {
 				return fmt.Errorf("unexpected parameter %s found in Terms params %v", paramName, params)
 			}
-			if wantedType == "not-checking-type-rn-complicated" || wantedType == "order" {
+			if wantedType == "not-checking-type-now-complicated" || wantedType == "order" || wantedType == "float64|string" {
 				continue // TODO: add that later
 			}
 			if reflect.TypeOf(params[paramName]).Name() != wantedType { // TODO I'll make a small rewrite to not use reflect here

diff --git a/quesma/model/expr.go b/quesma/model/expr.go
@@ -130,6 +130,7 @@ func NewLiteral(value any) LiteralExpr {
 	return LiteralExpr{Value: value}
 }
 
+// NewLiteralSingleQuoteString simply does: string -> 'string', anything_else -> anything_else
 func NewLiteralSingleQuoteString(value any) LiteralExpr {
 	switch v := value.(type) {
 	case string:

diff --git a/quesma/queryparser/pancake_aggregation_parser_buckets.go b/quesma/queryparser/pancake_aggregation_parser_buckets.go
@@ -147,17 +147,21 @@ func (cw *ClickhouseQueryTranslator) parseDateHistogram(aggregation *pancakeAggr
 
 // aggrName - "terms" or "significant_terms"
 func (cw *ClickhouseQueryTranslator) parseTermsAggregation(aggregation *pancakeAggregationTreeNode, params QueryMap, aggrName string) error {
+	if err := bucket_aggregations.CheckParamsTerms(cw.Ctx, params); err != nil {
+		return err
+	}
+
 	terms := bucket_aggregations.NewTerms(
 		cw.Ctx, aggrName == "significant_terms", params["include"], params["exclude"],
 	)
 
-	var didWeAddMissing, didWeUpdate bool
+	var didWeAddMissing, didWeUpdateFieldHere bool
 	field := cw.parseFieldField(params, aggrName)
 	field, didWeAddMissing = cw.addMissingParameterIfPresent(field, params)
-	field, didWeUpdate = terms.UpdateFieldForIncludeAndExclude(field)
+	field, didWeUpdateFieldHere = terms.UpdateFieldForIncludeAndExclude(field)
 
 	// If we updated above, we change our select to if(condition, field, NULL), so we also need to filter out those NULLs later
-	if !didWeAddMissing || didWeUpdate {
+	if !didWeAddMissing || didWeUpdateFieldHere {
 		aggregation.filterOutEmptyKeyBucket = true
 	}
 

diff --git a/quesma/queryparser/pancake_sql_query_generation_test.go b/quesma/queryparser/pancake_sql_query_generation_test.go
@@ -52,10 +52,6 @@ func TestPancakeQueryGeneration(t *testing.T) {
 				t.Skip("Fix filters")
 			}
 
-			if i != 73 {
-				t.Skip()
-			}
-
 			if test.TestName == "Line, Y-axis: Min, Buckets: Date Range, X-Axis: Terms, Split Chart: Date Histogram(file:kibana-visualize/agg_req,nr:9)" {
 				t.Skip("Date range is broken, fix in progress (PR #971)")
 			}

diff --git a/quesma/testdata/aggregation_requests_2.go b/quesma/testdata/aggregation_requests_2.go
@@ -4859,7 +4859,7 @@ var AggregationTests2 = []AggregationTestCase{
 			"aggs": {
 				"1": {
 					"terms": {
-						"field": "agi_birth_year", 
+						"field": "chess_goat", 
 						"size": 1,
 						"exclude": ["abc"]
 					}
@@ -4898,9 +4898,10 @@ var AggregationTests2 = []AggregationTestCase{
 		},
 		ExpectedPancakeSQL: `
 			SELECT sum(count(*)) OVER () AS "aggr__1__parent_count",
-			  "agi_birth_year" AS "aggr__1__key_0", count(*) AS "aggr__1__count"
+			  if("chess_goat" NOT IN 'abc', "chess_goat", NULL) AS "aggr__1__key_0",
+			  count(*) AS "aggr__1__count"
 			FROM __quesma_table_name
-			GROUP BY "agi_birth_year" AS "aggr__1__key_0"
+			GROUP BY if("chess_goat" NOT IN 'abc', "chess_goat", NULL) AS "aggr__1__key_0"
 			ORDER BY "aggr__1__count" DESC, "aggr__1__key_0" ASC
 			LIMIT 2`,
 	},
@@ -4951,10 +4952,10 @@ var AggregationTests2 = []AggregationTestCase{
 		},
 		ExpectedPancakeSQL: `
 			SELECT sum(count(*)) OVER () AS "aggr__1__parent_count",
-			  if("chess_goat" LIKE 'K%', "chess_goat", NULL) AS "aggr__1__key_0",
+			  if("chess_goat" NOT LIKE 'K%', "chess_goat", NULL) AS "aggr__1__key_0",
 			  count(*) AS "aggr__1__count"
 			FROM __quesma_table_name
-			GROUP BY if("chess_goat" LIKE 'K%', "chess_goat", NULL) AS "aggr__1__key_0"
+			GROUP BY if("chess_goat" NOT LIKE 'K%', "chess_goat", NULL) AS "aggr__1__key_0"
 			ORDER BY "aggr__1__count" DESC, "aggr__1__key_0" ASC
 			LIMIT 2`,
 	},
@@ -5060,7 +5061,7 @@ var AggregationTests2 = []AggregationTestCase{
 				model.NewQueryResultCol("aggr__terms1__terms2__count", int64(173)),
 				model.NewQueryResultCol("metric__terms1__terms2__metric2_col_0", 102370.42402648926),
 			}},
-			{Cols: []model.QueryResultCol{ // should be discarded by us because of size=1§
+			{Cols: []model.QueryResultCol{ // should be discarded by us because of terms2's size=1
 				model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)),
 				model.NewQueryResultCol("aggr__terms1__key_0", "Logstash Airways"),
 				model.NewQueryResultCol("aggr__terms1__count", int64(3323)),
@@ -5080,7 +5081,7 @@ var AggregationTests2 = []AggregationTestCase{
 				model.NewQueryResultCol("aggr__terms1__terms2__count", int64(167)),
 				model.NewQueryResultCol("metric__terms1__terms2__metric2_col_0", 92215.763779),
 			}},
-			{Cols: []model.QueryResultCol{ // should be discarded by us because of size=1
+			{Cols: []model.QueryResultCol{ // should be discarded by us because of terms2's size=1
 				model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)),
 				model.NewQueryResultCol("aggr__terms1__key_0", "JetBeats"),
 				model.NewQueryResultCol("aggr__terms1__count", int64(3261)),
@@ -5090,7 +5091,7 @@ var AggregationTests2 = []AggregationTestCase{
 				model.NewQueryResultCol("aggr__terms1__terms2__count", int64(147)),
 				model.NewQueryResultCol("metric__terms1__terms2__metric2_col_0", 90242.31663285477),
 			}},
-			{Cols: []model.QueryResultCol{ // should be discarded by us because of size=2
+			{Cols: []model.QueryResultCol{ // should be discarded by us because of terms1's size=2
 				model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)),
 				model.NewQueryResultCol("aggr__terms1__key_0", "Kibana Airlines"),
 				model.NewQueryResultCol("aggr__terms1__count", int64(3219)),
@@ -5138,7 +5139,6 @@ var AggregationTests2 = []AggregationTestCase{
 	},
 	{ // [76]
 		TestName: "terms with exclude, but with branched off aggregation tree",
-		// One simple test, for more regex tests see util/regex unit tests
 		QueryRequestJson: `
 		{
 			"aggs": {

diff --git a/quesma/util/regex/regex.go b/quesma/util/regex/regex.go
@@ -18,7 +18,8 @@ func ToClickhouseExpr(pattern string) (clickhouseFuncName string, patternExpr mo
 		}
 		// .* allowed, but [any other char]* - not
 		for i, char := range pattern[1:] {
-			if char == '*' && pattern[i] != '.' {
+			prevChar := pattern[i]
+			if char == '*' && prevChar != '.' {
 				return false
 			}
 		}