From f9521c9e01afa9741a497ee001d644a5368fbbef Mon Sep 17 00:00:00 2001 From: Jacek Migdal Date: Tue, 14 May 2024 21:04:08 +0200 Subject: [PATCH] Jacek respect size (#110) --- quesma/model/query.go | 38 +++++++++++++++--------- quesma/queryparser/aggregation_parser.go | 17 +++++++++++ quesma/quesma/search_test.go | 9 +++++- quesma/testdata/aggregation_requests.go | 8 ++--- quesma/testdata/requests.go | 2 +- 5 files changed, 54 insertions(+), 20 deletions(-) diff --git a/quesma/model/query.go b/quesma/model/query.go index ebc3e2fd3..e13fe2959 100644 --- a/quesma/model/query.go +++ b/quesma/model/query.go @@ -76,7 +76,7 @@ func (q *Query) String() string { if len(q.WhereClause) == 0 { where = "" } - sb.WriteString(" FROM " + q.FromClause + where + q.WhereClause + " " + strings.Join(q.SuffixClauses, " ")) + sb.WriteString(" FROM " + q.FromClause + where + q.WhereClause + " ") if len(q.GroupByFields) > 0 { sb.WriteString(" GROUP BY (") for i, field := range q.GroupByFields { @@ -87,14 +87,19 @@ func (q *Query) String() string { } sb.WriteString(")") - sb.WriteString(" ORDER BY (") - for i, field := range q.GroupByFields { - sb.WriteString(field) - if i < len(q.GroupByFields)-1 { - sb.WriteString(", ") + if len(q.SuffixClauses) == 0 { + sb.WriteString(" ORDER BY (") + for i, field := range q.GroupByFields { + sb.WriteString(field) + if i < len(q.GroupByFields)-1 { + sb.WriteString(", ") + } } + sb.WriteString(")") } - sb.WriteString(")") + } + if len(q.SuffixClauses) > 0 { + sb.WriteString(" " + strings.Join(q.SuffixClauses, " ")) } return sb.String() } @@ -127,7 +132,7 @@ func (q *Query) StringFromColumns(colNames []string) string { if len(q.WhereClause) == 0 { where = "" } - sb.WriteString(" FROM " + q.FromClause + where + q.WhereClause + " " + strings.Join(q.SuffixClauses, " ")) + sb.WriteString(" FROM " + q.FromClause + where + q.WhereClause + " ") if len(q.GroupByFields) > 0 { sb.WriteString(" GROUP BY (") for i, field := range q.GroupByFields { @@ -138,14 +143,19 @@ func (q *Query) StringFromColumns(colNames []string) string { } sb.WriteString(")") - sb.WriteString(" ORDER BY (") - for i, field := range q.GroupByFields { - sb.WriteString(field) - if i < len(q.GroupByFields)-1 { - sb.WriteString(", ") + if len(q.SuffixClauses) == 0 { + sb.WriteString(" ORDER BY (") + for i, field := range q.GroupByFields { + sb.WriteString(field) + if i < len(q.GroupByFields)-1 { + sb.WriteString(", ") + } } + sb.WriteString(")") } - sb.WriteString(")") + } + if len(q.SuffixClauses) > 0 { + sb.WriteString(" " + strings.Join(q.SuffixClauses, " ")) } return sb.String() } diff --git a/quesma/queryparser/aggregation_parser.go b/quesma/queryparser/aggregation_parser.go index 7c4b31990..d4bea00a5 100644 --- a/quesma/queryparser/aggregation_parser.go +++ b/quesma/queryparser/aggregation_parser.go @@ -300,6 +300,7 @@ func (cw *ClickhouseQueryTranslator) parseAggregation(currentAggr *aggrQueryBuil filterOnThisLevel := false whereBeforeNesting := currentAggr.whereBuilder // to restore it after processing this level queryTypeBeforeNesting := currentAggr.Type + suffixBeforeNesting := currentAggr.SuffixClauses // check if metadata's present var metadata model.JsonMap @@ -424,6 +425,7 @@ func (cw *ClickhouseQueryTranslator) parseAggregation(currentAggr *aggrQueryBuil } } currentAggr.Type = queryTypeBeforeNesting + currentAggr.SuffixClauses = suffixBeforeNesting } // Tries to parse metrics aggregation from queryMap. If it's not a metrics aggregation, returns false. @@ -596,8 +598,23 @@ func (cw *ClickhouseQueryTranslator) tryBucketAggregation(currentAggr *aggrQuery if terms, ok := queryMap[termsType]; ok { currentAggr.Type = bucket_aggregations.NewTerms(cw.Ctx, termsType == "significant_terms") fieldName := strconv.Quote(cw.parseFieldField(terms, termsType)) + isEmptyGroupBy := len(currentAggr.GroupByFields) == 0 currentAggr.GroupByFields = append(currentAggr.GroupByFields, fieldName) currentAggr.NonSchemaFields = append(currentAggr.NonSchemaFields, fieldName) + size := 10 + if _, ok := queryMap["aggs"]; isEmptyGroupBy && !ok { // we can do limit only it terms are not nested + if jsonMap, ok := terms.(QueryMap); ok { + if sizeRaw, ok := jsonMap["size"]; ok { + if sizeParsed, ok := sizeRaw.(float64); ok { + size = int(sizeParsed) + } else { + logger.WarnWithCtx(cw.Ctx).Msgf("size is not an float64, but %T, value: %v. Using default", sizeRaw, sizeRaw) + } + } + } + currentAggr.SuffixClauses = append(currentAggr.SuffixClauses, "ORDER BY count() DESC") + currentAggr.SuffixClauses = append(currentAggr.SuffixClauses, fmt.Sprintf("LIMIT %d", size)) + } delete(queryMap, termsType) return success, 1, 1 } diff --git a/quesma/quesma/search_test.go b/quesma/quesma/search_test.go index 8fe625eab..6330a4363 100644 --- a/quesma/quesma/search_test.go +++ b/quesma/quesma/search_test.go @@ -41,7 +41,7 @@ func TestNoAsciiTableName(t *testing.T) { const Limit = 1000 query := queryTranslator.BuildSimpleSelectQuery(simpleQuery.Sql.Stmt, Limit) assert.True(t, query.CanParse) - assert.Equal(t, fmt.Sprintf(`SELECT * FROM "%s" LIMIT %d`, tableName, Limit), query.String()) + assert.Equal(t, fmt.Sprintf(`SELECT * FROM "%s" LIMIT %d`, tableName, Limit), query.String()) } var ctx = context.WithValue(context.TODO(), tracing.RequestIdCtxKey, tracing.GetRequestId()) @@ -77,6 +77,13 @@ func TestAsyncSearchHandler(t *testing.T) { for i, tt := range testdata.TestsAsyncSearch { t.Run(strconv.Itoa(i)+tt.Name, func(t *testing.T) { db, mock, err := sqlmock.New() + if tt.Name == "Histogram: possible query nr 2" { + queryMatcher := sqlmock.QueryMatcherFunc(func(expectedSQL, actualSQL string) error { + fmt.Printf("JM SQL: %s\n", actualSQL) + return sqlmock.QueryMatcherRegexp.Match(expectedSQL, actualSQL) + }) + db, mock, err = sqlmock.New(sqlmock.QueryMatcherOption(queryMatcher)) + } if err != nil { t.Fatal(err) } diff --git a/quesma/testdata/aggregation_requests.go b/quesma/testdata/aggregation_requests.go index 12b3b81c0..1db0e14b8 100644 --- a/quesma/testdata/aggregation_requests.go +++ b/quesma/testdata/aggregation_requests.go @@ -716,7 +716,7 @@ var AggregationTests = []AggregationTestCase{ }, []string{ `SELECT count() FROM "` + TableName + `" WHERE "timestamp">=parseDateTime64BestEffort('2024-02-02T13:47:16.029Z') AND "timestamp"<=parseDateTime64BestEffort('2024-02-09T13:47:16.029Z') `, - `SELECT "OriginCityName", count() FROM "` + TableName + `" WHERE "timestamp">=parseDateTime64BestEffort('2024-02-02T13:47:16.029Z') AND "timestamp"<=parseDateTime64BestEffort('2024-02-09T13:47:16.029Z') GROUP BY ("OriginCityName") ORDER BY ("OriginCityName")`, + `SELECT "OriginCityName", count() FROM "` + TableName + `" WHERE "timestamp">=parseDateTime64BestEffort('2024-02-02T13:47:16.029Z') AND "timestamp"<=parseDateTime64BestEffort('2024-02-09T13:47:16.029Z') GROUP BY ("OriginCityName") ORDER BY count() DESC LIMIT 10`, `SELECT COUNT(DISTINCT "OriginCityName") FROM "` + TableName + `" WHERE "timestamp">=parseDateTime64BestEffort('2024-02-02T13:47:16.029Z') AND "timestamp"<=parseDateTime64BestEffort('2024-02-09T13:47:16.029Z') `, }, }, @@ -2072,7 +2072,7 @@ var AggregationTests = []AggregationTestCase{ []string{ `SELECT count() FROM "` + TableName + `" WHERE ("@timestamp">=parseDateTime64BestEffort('2024-01-23T11:27:16.820Z') AND "@timestamp"<=parseDateTime64BestEffort('2024-01-23T11:42:16.820Z')) AND "message" iLIKE '%user%' `, `SELECT count() FROM "` + TableName + `" WHERE ("@timestamp">=parseDateTime64BestEffort('2024-01-23T11:27:16.820Z') AND "@timestamp"<=parseDateTime64BestEffort('2024-01-23T11:42:16.820Z')) AND "message" iLIKE '%user%' `, - `SELECT "host.name", count() FROM "` + TableName + `" WHERE ("@timestamp">=parseDateTime64BestEffort('2024-01-23T11:27:16.820Z') AND "@timestamp"<=parseDateTime64BestEffort('2024-01-23T11:42:16.820Z')) AND "message" iLIKE '%user%' GROUP BY ("host.name") ORDER BY ("host.name")`, + `SELECT "host.name", count() FROM "` + TableName + `" WHERE ("@timestamp">=parseDateTime64BestEffort('2024-01-23T11:27:16.820Z') AND "@timestamp"<=parseDateTime64BestEffort('2024-01-23T11:42:16.820Z')) AND "message" iLIKE '%user%' GROUP BY ("host.name") ORDER BY count() DESC LIMIT 10`, `SELECT count() FROM "` + TableName + `" WHERE ("@timestamp">=parseDateTime64BestEffort('2024-01-23T11:27:16.820Z') AND "@timestamp"<=parseDateTime64BestEffort('2024-01-23T11:42:16.820Z')) AND "message" iLIKE '%user%' `, }, }, @@ -2653,7 +2653,7 @@ var AggregationTests = []AggregationTestCase{ `SELECT "message", count() FROM ` + QuotedTableName + ` ` + `WHERE "timestamp"<=parseDateTime64BestEffort('2024-02-21T04:01:14.920Z') ` + `AND "timestamp">=parseDateTime64BestEffort('2024-02-20T19:13:33.795Z') ` + - `GROUP BY ("message") ORDER BY ("message")`, + `GROUP BY ("message") ORDER BY count() DESC LIMIT 3`, }, }, { // [17] @@ -3888,7 +3888,7 @@ var AggregationTests = []AggregationTestCase{ }, ExpectedSQLs: []string{ `SELECT count() FROM ` + QuotedTableName + ` `, - `SELECT "message", count() FROM ` + QuotedTableName + ` GROUP BY ("message") ORDER BY ("message")`, + `SELECT "message", count() FROM ` + QuotedTableName + ` GROUP BY ("message") ORDER BY count() DESC LIMIT 4`, }, }, { // [24] diff --git a/quesma/testdata/requests.go b/quesma/testdata/requests.go index 331a6c36f..7f2cc72de 100644 --- a/quesma/testdata/requests.go +++ b/quesma/testdata/requests.go @@ -703,7 +703,7 @@ var TestsAsyncSearch = []AsyncSearchTestCase{ model.SearchQueryInfo{Typ: model.Normal}, []string{ `SELECT count() FROM "logs-generic-default" WHERE "@timestamp".*parseDateTime64BestEffort('2024-01-25T..:..:59.033Z') AND "@timestamp".*parseDateTime64BestEffort('2024-01-25T..:..:59.033Z') `, - `SELECT "event.dataset", ` + clickhouse.TimestampGroupBy("@timestamp", clickhouse.DateTime64, time.Minute) + `, count() FROM "logs-generic-default" WHERE "@timestamp".*parseDateTime64BestEffort('2024-01-25T1.:..:59.033Z') AND "@timestamp".*parseDateTime64BestEffort('2024-01-25T1.:..:59.033Z') GROUP BY ("event.dataset", ` + clickhouse.TimestampGroupBy("@timestamp", clickhouse.DateTime64, time.Minute) + `) ORDER BY ("event.dataset", ` + clickhouse.TimestampGroupBy("@timestamp", clickhouse.DateTime64, time.Minute) + ")", + `SELECT "event.dataset", ` + clickhouse.TimestampGroupBy("@timestamp", clickhouse.DateTime64, time.Minute) + `, count() FROM "logs-generic-default" WHERE "@timestamp".*parseDateTime64BestEffort('2024-01-25T1.:..:59.033Z') AND "@timestamp".*parseDateTime64BestEffort('2024-01-25T1.:..:59.033Z') GROUP BY ("event.dataset", ` + clickhouse.TimestampGroupBy("@timestamp", clickhouse.DateTime64, time.Minute) + `) ORDER BY ("event.dataset", ` + clickhouse.TimestampGroupBy("@timestamp", clickhouse.DateTime64, time.Minute) + `)`, `SELECT "event.dataset", count() FROM "logs-generic-default" WHERE "@timestamp".*parseDateTime64BestEffort('2024-01-25T1.:..:59.033Z') AND "@timestamp".*parseDateTime64BestEffort('2024-01-25T1.:..:59.033Z') GROUP BY ("event.dataset") ORDER BY ("event.dataset")`, }, true,