From e19f6f41c0a3922f74a88a88ee0c7c3604b9944f Mon Sep 17 00:00:00 2001 From: Jacek Migdal Date: Mon, 20 May 2024 16:12:44 +0200 Subject: [PATCH] Create SELECT in aggregations (#166) Fixed bugs: - We always should parse `size` and create hits query, both in async and as default - We should set hits total based on count if available, not based on returned hits Another step is to unify aggregates and non-aggregates. --- quesma/queryparser/aggregation_parser.go | 11 +++++ quesma/queryparser/aggregation_parser_test.go | 42 ++++++++++++------- quesma/queryparser/query_parser.go | 20 +++++++-- quesma/queryparser/query_translator.go | 11 ++++- quesma/quesma/search.go | 20 ++------- 5 files changed, 68 insertions(+), 36 deletions(-) diff --git a/quesma/queryparser/aggregation_parser.go b/quesma/queryparser/aggregation_parser.go index a9973d278..c47e12a5f 100644 --- a/quesma/queryparser/aggregation_parser.go +++ b/quesma/queryparser/aggregation_parser.go @@ -241,6 +241,17 @@ func (cw *ClickhouseQueryTranslator) ParseAggregationJson(queryAsJson string) ([ return nil, fmt.Errorf("no aggs -> request is not an aggregation query") } + const defaultSearchSize = 10 + size := cw.parseSize(queryAsMap, defaultSearchSize) + if size > 0 { + simpleQuery := currentAggr.whereBuilder + if sort, ok := queryAsMap["sort"]; ok { + simpleQuery.SortFields = cw.parseSortFields(sort) + } + hitQuery := cw.BuildNRowsQuery("*", simpleQuery, size) + aggregations = append(aggregations, *hitQuery) + } + return aggregations, nil } diff --git a/quesma/queryparser/aggregation_parser_test.go b/quesma/queryparser/aggregation_parser_test.go index 64a1ea105..c7fdaafd1 100644 --- a/quesma/queryparser/aggregation_parser_test.go +++ b/quesma/queryparser/aggregation_parser_test.go @@ -41,7 +41,8 @@ var aggregationTests = []struct { "field": "AvgTicketPrice" } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -110,7 +111,8 @@ var aggregationTests = []struct { "size": 1000 } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -146,7 +148,8 @@ var aggregationTests = []struct { "size": 10 } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -181,7 +184,8 @@ var aggregationTests = []struct { } } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -217,7 +221,8 @@ var aggregationTests = []struct { } } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -236,7 +241,8 @@ var aggregationTests = []struct { "min_doc_count": 1 } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -284,7 +290,8 @@ var aggregationTests = []struct { "size": 10000 } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -321,7 +328,8 @@ var aggregationTests = []struct { "size": 10 } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -338,7 +346,8 @@ var aggregationTests = []struct { "field": "taxful_total_price" } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -357,7 +366,8 @@ var aggregationTests = []struct { ] } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -373,7 +383,8 @@ var aggregationTests = []struct { "field": "total_quantity" } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -438,7 +449,8 @@ var aggregationTests = []struct { } } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -465,7 +477,8 @@ var aggregationTests = []struct { "field": "OriginCityName" } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, @@ -489,7 +502,8 @@ var aggregationTests = []struct { "shard_size": 5000 } } - } + }, + "size": 0 }`, []string{ `SELECT count() FROM ` + tableNameQuoted, diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index daeb3ffa6..8d15e52eb 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -949,7 +949,7 @@ func (cw *ClickhouseQueryTranslator) tryProcessSearchMetadata(queryMap QueryMap) // case 3: maybe it's a normal request var queryMapNested QueryMap var ok bool - size, _ := cw.parseSize(metadata) + size := cw.parseSize(metadata, model.DefaultSizeListQuery) if queryMapNested, ok = queryMap["aggs"].(QueryMap); !ok { return model.SearchQueryInfo{Typ: model.Normal, I2: size} } @@ -1002,7 +1002,7 @@ func (cw *ClickhouseQueryTranslator) isItFacetsRequest(queryMap QueryMap) (model return model.NewSearchQueryInfoNone(), false } - size, ok := cw.parseSize(firstNestingMap) + size, ok := cw.parseSizeExists(firstNestingMap) if !ok { return model.NewSearchQueryInfoNone(), false } @@ -1046,7 +1046,7 @@ func (cw *ClickhouseQueryTranslator) isItFacetsRequest(queryMap QueryMap) (model // returns (model.NewSearchQueryInfoNone, false) if it's not ListAllFields/ListByField request func (cw *ClickhouseQueryTranslator) isItListRequest(queryMap QueryMap) (model.SearchQueryInfo, bool) { // 1) case: very simple SELECT * kind of request - size, ok := cw.parseSize(queryMap) + size, ok := cw.parseSizeExists(queryMap) if !ok { return model.NewSearchQueryInfoNone(), false } @@ -1213,7 +1213,7 @@ func (cw *ClickhouseQueryTranslator) parseSortFields(sortMaps any) []string { } } -func (cw *ClickhouseQueryTranslator) parseSize(queryMap QueryMap) (size int, ok bool) { +func (cw *ClickhouseQueryTranslator) parseSizeExists(queryMap QueryMap) (size int, ok bool) { sizeRaw, exists := queryMap["size"] if !exists { return model.DefaultSizeListQuery, false @@ -1224,3 +1224,15 @@ func (cw *ClickhouseQueryTranslator) parseSize(queryMap QueryMap) (size int, ok return model.DefaultSizeListQuery, false } } + +func (cw *ClickhouseQueryTranslator) parseSize(queryMap QueryMap, defaultSize int) int { + sizeRaw, exists := queryMap["size"] + if !exists { + return defaultSize + } else if sizeAsFloat, ok := sizeRaw.(float64); ok { + return int(sizeAsFloat) + } else { + logger.WarnWithCtx(cw.Ctx).Msgf("invalid size type: %T, value: %v. Expected float64", sizeRaw, sizeRaw) + return defaultSize + } +} diff --git a/quesma/queryparser/query_translator.go b/quesma/queryparser/query_translator.go index ab3567777..f9105d1f0 100644 --- a/quesma/queryparser/query_translator.go +++ b/quesma/queryparser/query_translator.go @@ -471,6 +471,15 @@ func (cw *ClickhouseQueryTranslator) MakeAggregationPartOfResponse(queries []mod } func (cw *ClickhouseQueryTranslator) MakeResponseAggregation(queries []model.Query, ResultSets [][]model.QueryResultRow) *model.SearchResp { + hits := []model.SearchHit{} + // Process hits as last aggregation + if len(queries) > 0 && len(ResultSets) > 0 && queries[len(queries)-1].IsWildcard() { + response := cw.makeSearchResponseNormal(ResultSets[len(ResultSets)-1], queries[len(queries)-1].Highlighter) + hits = response.Hits.Hits + queries = queries[:len(queries)-1] + ResultSets = ResultSets[:len(ResultSets)-1] + } + var totalCount uint64 if len(ResultSets) > 0 && len(ResultSets[0]) > 0 && len(ResultSets[0][0].Cols) > 0 { // This if: doesn't hurt much, but mostly for tests, never seen need for this on "production". @@ -486,7 +495,7 @@ func (cw *ClickhouseQueryTranslator) MakeResponseAggregation(queries []model.Que return &model.SearchResp{ Aggregations: cw.MakeAggregationPartOfResponse(queries, ResultSets), Hits: model.SearchHits{ - Hits: []model.SearchHit{}, // seems redundant, but can't remove this, created JSON won't match + Hits: hits, Total: &model.Total{ Value: int(totalCount), // TODO just change this to uint64? It works now. Relation: "eq", diff --git a/quesma/quesma/search.go b/quesma/quesma/search.go index a27b533b7..d24289229 100644 --- a/quesma/quesma/search.go +++ b/quesma/quesma/search.go @@ -196,7 +196,6 @@ func (q *QueryRunner) handleSearchCommon(ctx context.Context, indexPattern strin var hits []model.QueryResultRow var aggregationResults [][]model.QueryResultRow oldHandlingUsed := false - newAggregationHandlingUsed := false tables := q.logManager.GetTableDefinitions() @@ -259,7 +258,6 @@ func (q *QueryRunner) handleSearchCommon(ctx context.Context, indexPattern strin } } } else if aggregations, err = queryTranslator.ParseAggregationJson(string(body)); err == nil { - newAggregationHandlingUsed = true columns := make([][]string, len(aggregations)) if optAsync != nil { go func() { @@ -272,16 +270,6 @@ func (q *QueryRunner) handleSearchCommon(ctx context.Context, indexPattern strin }() } else { translatedQueryBody, aggregationResults = q.searchWorker(ctx, aggregations, columns, table, true, nil) - if queryInfo.Size > 0 { - listQuery := queryTranslator.BuildNRowsQuery("*", simpleQuery, queryInfo.Size) - hits, err = q.logManager.ProcessQuery(ctx, table, listQuery, nil) - translatedQueryBody = append(translatedQueryBody, []byte("\n"+listQuery.String()+"\n")...) - if err != nil { - logger.ErrorWithCtx(ctx).Msgf("error processing fallback query. Err: %v, query: %+v", err, listQuery) - pushSecondaryInfo(q.quesmaManagementConsole, id, path, body, translatedQueryBody, responseBody, startTime) - return responseBody, err - } - } } } } else { @@ -292,14 +280,12 @@ func (q *QueryRunner) handleSearchCommon(ctx context.Context, indexPattern strin } if optAsync == nil { - var response, responseHits *model.SearchResp = nil, nil + var response *model.SearchResp = nil err = nil if oldHandlingUsed { response, err = queryTranslator.MakeSearchResponse(hits, model.Query{QueryInfo: queryInfo, Highlighter: highlighter}) - } else if newAggregationHandlingUsed { + } else { response = queryTranslator.MakeResponseAggregation(aggregations, aggregationResults) - responseHits, err = queryTranslator.MakeSearchResponse(hits, model.Query{QueryInfo: queryInfo, Highlighter: highlighter}) - response.Hits = responseHits.Hits } if err != nil { logger.ErrorWithCtx(ctx).Msgf("error making response: %v, queryInfo: %+v, rows: %v", err, queryInfo, hits) @@ -508,7 +494,7 @@ func (q *QueryRunner) searchWorkerCommon( logger.ErrorWithCtx(ctx).Msg(err.Error()) continue } - if doPostProcessing { + if doPostProcessing && query.Type != nil { rows = query.Type.PostprocessResults(rows) } hits = append(hits, rows)