diff --git a/quesma/eql/query_translator.go b/quesma/eql/query_translator.go index 583ae4a64..46f9a4362 100644 --- a/quesma/eql/query_translator.go +++ b/quesma/eql/query_translator.go @@ -8,6 +8,7 @@ import ( "mitmproxy/quesma/logger" "mitmproxy/quesma/model" "mitmproxy/quesma/queryparser" + "mitmproxy/quesma/queryparser/query_util" "strconv" "strings" ) @@ -20,34 +21,6 @@ type ClickhouseEQLQueryTranslator struct { Ctx context.Context } -func (cw *ClickhouseEQLQueryTranslator) applySizeLimit(size int) int { - // FIXME hard limit here to prevent OOM - const quesmaMaxSize = 10000 - if size > quesmaMaxSize { - logger.WarnWithCtx(cw.Ctx).Msgf("setting hits size to=%d, got=%d", quesmaMaxSize, size) - size = quesmaMaxSize - } - return size -} - -func (cw *ClickhouseEQLQueryTranslator) BuildNRowsQuery(fieldName string, simpleQuery model.SimpleQuery, limit int) *model.Query { - suffixClauses := make([]string, 0) - if len(simpleQuery.SortFields) > 0 { - suffixClauses = append(suffixClauses, "ORDER BY "+queryparser.AsQueryString(simpleQuery.SortFields)) - } - if limit > 0 { - suffixClauses = append(suffixClauses, "LIMIT "+strconv.Itoa(cw.applySizeLimit(limit))) - } - return &model.Query{ - Fields: []string{fieldName}, - NonSchemaFields: []string{}, - WhereClause: simpleQuery.Sql.Stmt, - SuffixClauses: suffixClauses, - FromClause: cw.Table.FullTableName(), - CanParse: true, - } -} - func (cw *ClickhouseEQLQueryTranslator) MakeSearchResponse(ResultSet []model.QueryResultRow, query model.Query) (*model.SearchResp, error) { // This shares a lot of code with the ClickhouseQueryTranslator @@ -88,7 +61,33 @@ func (cw *ClickhouseEQLQueryTranslator) MakeSearchResponse(ResultSet []model.Que }, nil } -func (cw *ClickhouseEQLQueryTranslator) ParseQuery(queryAsJson string) (query model.SimpleQuery, searchQueryInfo model.SearchQueryInfo, highlighter model.Highlighter, err error) { +func (cw *ClickhouseEQLQueryTranslator) ParseQuery(body []byte) ([]model.Query, []string, bool, bool, error) { + simpleQuery, queryInfo, highlighter, err := cw.parseQuery(string(body)) + if err != nil { + logger.ErrorWithCtx(cw.Ctx).Msgf("error parsing query: %v", err) + return nil, nil, false, false, err + } + var columns []string + var query *model.Query + var queries []model.Query + var isAggregation bool + canParse := false + + if simpleQuery.CanParse { + canParse = true + query = query_util.BuildNRowsQuery(cw.Ctx, cw.Table.Name, "*", simpleQuery, queryInfo.I2) + query.QueryInfo = queryInfo + query.Highlighter = highlighter + query.SortFields = simpleQuery.SortFields + queries = append(queries, *query) + isAggregation = false + return queries, columns, isAggregation, canParse, nil + } + + return nil, nil, false, false, err +} + +func (cw *ClickhouseEQLQueryTranslator) parseQuery(queryAsJson string) (query model.SimpleQuery, searchQueryInfo model.SearchQueryInfo, highlighter model.Highlighter, err error) { // no highlighting here highlighter = queryparser.NewEmptyHighlighter() @@ -150,18 +149,6 @@ func (cw *ClickhouseEQLQueryTranslator) ParseQuery(queryAsJson string) (query mo // These methods are not supported by EQL. They are here to satisfy the interface. -func (cw *ClickhouseEQLQueryTranslator) BuildSimpleCountQuery(whereClause string) *model.Query { - panic("EQL does not support count") -} - func (cw *ClickhouseEQLQueryTranslator) MakeResponseAggregation(aggregations []model.Query, aggregationResults [][]model.QueryResultRow) *model.SearchResp { panic("EQL does not support aggregations") } - -func (cw *ClickhouseEQLQueryTranslator) BuildFacetsQuery(fieldName string, simpleQuery model.SimpleQuery, limit int) *model.Query { - panic("EQL does not support facets") -} - -func (cw *ClickhouseEQLQueryTranslator) ParseAggregationJson(aggregationJson string) ([]model.Query, error) { - panic("EQL does not support aggregations") -} diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index 3d0a1606f..22d5569c5 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -8,6 +8,7 @@ import ( "mitmproxy/quesma/logger" "mitmproxy/quesma/model" "mitmproxy/quesma/queryparser/lucene" + "mitmproxy/quesma/queryparser/query_util" "strconv" "strings" "time" @@ -23,7 +24,68 @@ func NewEmptyHighlighter() model.Highlighter { } } -func (cw *ClickhouseQueryTranslator) ParseQuery(queryAsJson string) (model.SimpleQuery, model.SearchQueryInfo, model.Highlighter, error) { +func (cw *ClickhouseQueryTranslator) ParseQuery(body []byte) ([]model.Query, []string, bool, bool, error) { + simpleQuery, queryInfo, highlighter, err := cw.ParseQueryInternal(string(body)) + if err != nil { + logger.ErrorWithCtx(cw.Ctx).Msgf("error parsing query: %v", err) + return nil, nil, false, false, err + } + var columns []string + var query *model.Query + var queries []model.Query + var isAggregation bool + canParse := false + + if simpleQuery.CanParse { + canParse = true + if query_util.IsNonAggregationQuery(queryInfo, body) { + query, columns = cw.makeBasicQuery(simpleQuery, queryInfo, highlighter) + query.SortFields = simpleQuery.SortFields + queries = append(queries, *query) + isAggregation = false + return queries, columns, isAggregation, canParse, nil + } else { + queries, err = cw.ParseAggregationJson(string(body)) + if err != nil { + logger.ErrorWithCtx(cw.Ctx).Msgf("error parsing aggregation: %v", err) + return nil, nil, false, false, err + } + isAggregation = true + return queries, columns, isAggregation, canParse, nil + } + } + + return nil, nil, false, false, err +} + +func (cw *ClickhouseQueryTranslator) makeBasicQuery( + simpleQuery model.SimpleQuery, queryInfo model.SearchQueryInfo, highlighter model.Highlighter) (*model.Query, []string) { + var fullQuery *model.Query + var columns []string + switch queryInfo.Typ { + case model.CountAsync: + fullQuery = cw.BuildSimpleCountQuery(simpleQuery.Sql.Stmt) + columns = []string{"doc_count"} + case model.Facets, model.FacetsNumeric: + // queryInfo = (Facets, fieldName, Limit results, Limit last rows to look into) + fullQuery = cw.BuildFacetsQuery(queryInfo.FieldName, simpleQuery, queryInfo.I2) + columns = []string{"key", "doc_count"} + case model.ListByField: + // queryInfo = (ListByField, fieldName, 0, LIMIT) + fullQuery = cw.BuildNRowsQuery(queryInfo.FieldName, simpleQuery, queryInfo.I2) + columns = []string{queryInfo.FieldName} + case model.ListAllFields: + // queryInfo = (ListAllFields, "*", 0, LIMIT) + fullQuery = cw.BuildNRowsQuery("*", simpleQuery, queryInfo.I2) + case model.Normal: + fullQuery = cw.BuildNRowsQuery("*", simpleQuery, queryInfo.I2) + } + fullQuery.QueryInfo = queryInfo + fullQuery.Highlighter = highlighter + return fullQuery, columns +} + +func (cw *ClickhouseQueryTranslator) ParseQueryInternal(queryAsJson string) (model.SimpleQuery, model.SearchQueryInfo, model.Highlighter, error) { cw.ClearTokensToHighlight() queryAsMap := make(QueryMap) if queryAsJson != "" { diff --git a/quesma/queryparser/query_parser_test.go b/quesma/queryparser/query_parser_test.go index e37755569..992359a94 100644 --- a/quesma/queryparser/query_parser_test.go +++ b/quesma/queryparser/query_parser_test.go @@ -48,7 +48,7 @@ func TestQueryParserStringAttrConfig(t *testing.T) { for _, tt := range testdata.TestsSearch { t.Run(tt.Name, func(t *testing.T) { - simpleQuery, queryInfo, _, _ := cw.ParseQuery(tt.QueryJson) + simpleQuery, queryInfo, _, _ := cw.ParseQueryInternal(tt.QueryJson) assert.True(t, simpleQuery.CanParse, "can parse") assert.Contains(t, tt.WantedSql, simpleQuery.Sql.Stmt, "contains wanted sql") assert.Equal(t, tt.WantedQueryType, queryInfo.Typ, "equals to wanted query type") @@ -75,7 +75,7 @@ func TestQueryParserNoFullTextFields(t *testing.T) { for i, tt := range testdata.TestsSearchNoFullTextFields { t.Run(strconv.Itoa(i), func(t *testing.T) { - simpleQuery, queryInfo, _, _ := cw.ParseQuery(tt.QueryJson) + simpleQuery, queryInfo, _, _ := cw.ParseQueryInternal(tt.QueryJson) assert.True(t, simpleQuery.CanParse, "can parse") assert.Contains(t, tt.WantedSql, simpleQuery.Sql.Stmt, "contains wanted sql") assert.Equal(t, tt.WantedQueryType, queryInfo.Typ, "equals to wanted query type") @@ -100,7 +100,7 @@ func TestQueryParserNoAttrsConfig(t *testing.T) { cw := ClickhouseQueryTranslator{ClickhouseLM: lm, Table: table, Ctx: context.Background()} for _, tt := range testdata.TestsSearchNoAttrs { t.Run(tt.Name, func(t *testing.T) { - simpleQuery, queryInfo, _, _ := cw.ParseQuery(tt.QueryJson) + simpleQuery, queryInfo, _, _ := cw.ParseQueryInternal(tt.QueryJson) assert.True(t, simpleQuery.CanParse) assert.Contains(t, tt.WantedSql, simpleQuery.Sql.Stmt) assert.Equal(t, tt.WantedQueryType, queryInfo.Typ) diff --git a/quesma/queryparser/query_translator.go b/quesma/queryparser/query_translator.go index cf07681e6..952f11a73 100644 --- a/quesma/queryparser/query_translator.go +++ b/quesma/queryparser/query_translator.go @@ -8,10 +8,10 @@ import ( "mitmproxy/quesma/logger" "mitmproxy/quesma/model" "mitmproxy/quesma/model/bucket_aggregations" + "mitmproxy/quesma/queryparser/query_util" "mitmproxy/quesma/queryprocessor" "mitmproxy/quesma/util" "strconv" - "strings" "time" ) @@ -564,33 +564,8 @@ func (cw *ClickhouseQueryTranslator) BuildSimpleCountQuery(whereClause string) * } } -func (cw *ClickhouseQueryTranslator) applySizeLimit(size int) int { - // FIXME hard limit here to prevent OOM - const quesmaMaxSize = 10000 - if size > quesmaMaxSize { - logger.WarnWithCtx(cw.Ctx).Msgf("setting hits size to=%d, got=%d", quesmaMaxSize, size) - size = quesmaMaxSize - } - return size -} - -// GetNMostRecentRows fieldName == "*" ==> we query all -// otherwise ==> only this 1 field func (cw *ClickhouseQueryTranslator) BuildNRowsQuery(fieldName string, query model.SimpleQuery, limit int) *model.Query { - suffixClauses := make([]string, 0) - if len(query.SortFields) > 0 { - suffixClauses = append(suffixClauses, "ORDER BY "+AsQueryString(query.SortFields)) - } - if limit > 0 { - suffixClauses = append(suffixClauses, "LIMIT "+strconv.Itoa(cw.applySizeLimit(limit))) - } - return &model.Query{ - Fields: []string{fieldName}, - WhereClause: query.Sql.Stmt, - SuffixClauses: suffixClauses, - FromClause: cw.Table.FullTableName(), - CanParse: true, - } + return query_util.BuildNRowsQuery(cw.Ctx, cw.Table.FullTableName(), fieldName, query, limit) } func (cw *ClickhouseQueryTranslator) BuildAutocompleteQuery(fieldName, whereClause string, limit int) *model.Query { @@ -728,19 +703,3 @@ func (cw *ClickhouseQueryTranslator) sortInTopologicalOrder(queries []model.Quer } return indexesSorted } - -func AsQueryString(sortFields []model.SortField) string { - if len(sortFields) == 0 { - return "" - } - sortStrings := make([]string, 0, len(sortFields)) - for _, sortField := range sortFields { - query := strings.Builder{} - query.WriteString(strconv.Quote(sortField.Field)) - if sortField.Desc { - query.WriteString(" desc") - } - sortStrings = append(sortStrings, query.String()) - } - return strings.Join(sortStrings, ", ") -} diff --git a/quesma/queryparser/query_util/query_util.go b/quesma/queryparser/query_util/query_util.go new file mode 100644 index 000000000..e715cb59d --- /dev/null +++ b/quesma/queryparser/query_util/query_util.go @@ -0,0 +1,63 @@ +package query_util + +import ( + "bytes" + "context" + "mitmproxy/quesma/logger" + "mitmproxy/quesma/model" + "strconv" + "strings" +) + +func IsNonAggregationQuery(queryInfo model.SearchQueryInfo, body []byte) bool { + return ((queryInfo.Typ == model.ListByField || + queryInfo.Typ == model.ListAllFields || + queryInfo.Typ == model.Normal) && + !bytes.Contains(body, []byte("aggs"))) || + queryInfo.Typ == model.Facets || + queryInfo.Typ == model.FacetsNumeric || + queryInfo.Typ == model.CountAsync +} + +func BuildNRowsQuery(ctx context.Context, tableName string, fieldName string, query model.SimpleQuery, limit int) *model.Query { + suffixClauses := make([]string, 0) + if len(query.SortFields) > 0 { + suffixClauses = append(suffixClauses, "ORDER BY "+AsQueryString(query.SortFields)) + } + if limit > 0 { + suffixClauses = append(suffixClauses, "LIMIT "+strconv.Itoa(applySizeLimit(ctx, limit))) + } + return &model.Query{ + Fields: []string{fieldName}, + WhereClause: query.Sql.Stmt, + SuffixClauses: suffixClauses, + FromClause: tableName, + CanParse: true, + } +} + +func AsQueryString(sortFields []model.SortField) string { + if len(sortFields) == 0 { + return "" + } + sortStrings := make([]string, 0, len(sortFields)) + for _, sortField := range sortFields { + query := strings.Builder{} + query.WriteString(strconv.Quote(sortField.Field)) + if sortField.Desc { + query.WriteString(" desc") + } + sortStrings = append(sortStrings, query.String()) + } + return strings.Join(sortStrings, ", ") +} + +func applySizeLimit(ctx context.Context, size int) int { + // FIXME hard limit here to prevent OOM + const quesmaMaxSize = 10000 + if size > quesmaMaxSize { + logger.WarnWithCtx(ctx).Msgf("setting hits size to=%d, got=%d", quesmaMaxSize, size) + size = quesmaMaxSize + } + return size +} diff --git a/quesma/quesma/query_translator.go b/quesma/quesma/query_translator.go index c512cc992..9b5a48ad3 100644 --- a/quesma/quesma/query_translator.go +++ b/quesma/quesma/query_translator.go @@ -17,12 +17,7 @@ import ( // 2. ClickhouseEQLQueryTranslator (implements only a subset of methods) type IQueryTranslator interface { - ParseQuery(queryAsJson string) (model.SimpleQuery, model.SearchQueryInfo, model.Highlighter, error) - ParseAggregationJson(aggregationJson string) ([]model.Query, error) - - BuildSimpleCountQuery(whereClause string) *model.Query - BuildNRowsQuery(fieldName string, simpleQuery model.SimpleQuery, limit int) *model.Query - BuildFacetsQuery(fieldName string, simpleQuery model.SimpleQuery, limit int) *model.Query + ParseQuery(body []byte) ([]model.Query, []string, bool, bool, error) MakeSearchResponse(ResultSet []model.QueryResultRow, query model.Query) (*model.SearchResp, error) MakeResponseAggregation(aggregations []model.Query, aggregationResults [][]model.QueryResultRow) *model.SearchResp @@ -36,7 +31,6 @@ const ( ) func NewQueryTranslator(ctx context.Context, language QueryLanguage, table *clickhouse.Table, logManager *clickhouse.LogManager, dateMathRenderer string) (queryTranslator IQueryTranslator) { - switch language { case QueryLanguageEQL: queryTranslator = &eql.ClickhouseEQLQueryTranslator{ClickhouseLM: logManager, Table: table, Ctx: ctx} @@ -45,5 +39,4 @@ func NewQueryTranslator(ctx context.Context, language QueryLanguage, table *clic } return queryTranslator - } diff --git a/quesma/quesma/search.go b/quesma/quesma/search.go index 0601c684f..00edb2ee3 100644 --- a/quesma/quesma/search.go +++ b/quesma/quesma/search.go @@ -1,7 +1,6 @@ package quesma import ( - "bytes" "context" "errors" "fmt" @@ -11,6 +10,7 @@ import ( "mitmproxy/quesma/logger" "mitmproxy/quesma/model" "mitmproxy/quesma/queryparser" + "mitmproxy/quesma/queryparser/query_util" "mitmproxy/quesma/quesma/config" "mitmproxy/quesma/quesma/recovery" "mitmproxy/quesma/quesma/ui" @@ -123,53 +123,6 @@ type AsyncQuery struct { startTime time.Time } -func isNonAggregationQuery(queryInfo model.SearchQueryInfo, body []byte) bool { - return ((queryInfo.Typ == model.ListByField || - queryInfo.Typ == model.ListAllFields || - queryInfo.Typ == model.Normal) && - !bytes.Contains(body, []byte("aggs"))) || - queryInfo.Typ == model.Facets || - queryInfo.Typ == model.FacetsNumeric || - queryInfo.Typ == model.CountAsync -} - -func (q *QueryRunner) ParseQuery(ctx context.Context, - queryTranslator IQueryTranslator, - body []byte, - table *clickhouse.Table) ([]model.Query, []string, bool, bool, error) { - simpleQuery, queryInfo, highlighter, err := queryTranslator.ParseQuery(string(body)) - if err != nil { - logger.ErrorWithCtx(ctx).Msgf("error parsing query: %v", err) - return nil, nil, false, false, err - } - var columns []string - var query *model.Query - var queries []model.Query - var isAggregation bool - canParse := false - - if simpleQuery.CanParse { - canParse = true - if isNonAggregationQuery(queryInfo, body) { - query, columns = q.makeBasicQuery(ctx, queryTranslator, table, simpleQuery, queryInfo, highlighter) - query.SortFields = simpleQuery.SortFields - queries = append(queries, *query) - isAggregation = false - return queries, columns, isAggregation, canParse, nil - } else { - queries, err = queryTranslator.ParseAggregationJson(string(body)) - if err != nil { - logger.ErrorWithCtx(ctx).Msgf("error parsing aggregation: %v", err) - return nil, nil, false, false, err - } - isAggregation = true - return queries, columns, isAggregation, canParse, nil - } - } - - return nil, nil, false, false, err -} - func (q *QueryRunner) handleSearchCommon(ctx context.Context, indexPattern string, body []byte, optAsync *AsyncQuery, queryLanguage QueryLanguage) ([]byte, error) { sources, sourcesElastic, sourcesClickhouse := ResolveSources(indexPattern, q.cfg, q.im, q.logManager) @@ -241,10 +194,10 @@ func (q *QueryRunner) handleSearchCommon(ctx context.Context, indexPattern strin queryTranslator := NewQueryTranslator(ctx, queryLanguage, table, q.logManager, q.DateMathRenderer) - queries, columns, isAggregation, canParse, err := q.ParseQuery(ctx, queryTranslator, body, table) + queries, columns, isAggregation, canParse, err := queryTranslator.ParseQuery(body) if canParse { - if isNonAggregationQuery(queries[0].QueryInfo, body) { + if query_util.IsNonAggregationQuery(queries[0].QueryInfo, body) { if properties := q.findNonexistingProperties(queries[0].QueryInfo, queries[0].SortFields, table); len(properties) > 0 { logger.DebugWithCtx(ctx).Msgf("properties %s not found in table %s", properties, table.Name) if elasticsearch.IsIndexPattern(indexPattern) { @@ -437,34 +390,6 @@ func (q *QueryRunner) addAsyncQueryContext(ctx context.Context, cancel context.C q.AsyncQueriesContexts.Store(asyncRequestIdStr, NewAsyncQueryContext(ctx, cancel, asyncRequestIdStr)) } -func (q *QueryRunner) makeBasicQuery(ctx context.Context, - queryTranslator IQueryTranslator, table *clickhouse.Table, - simpleQuery model.SimpleQuery, queryInfo model.SearchQueryInfo, highlighter model.Highlighter) (*model.Query, []string) { - var fullQuery *model.Query - var columns []string - switch queryInfo.Typ { - case model.CountAsync: - fullQuery = queryTranslator.BuildSimpleCountQuery(simpleQuery.Sql.Stmt) - columns = []string{"doc_count"} - case model.Facets, model.FacetsNumeric: - // queryInfo = (Facets, fieldName, Limit results, Limit last rows to look into) - fullQuery = queryTranslator.BuildFacetsQuery(queryInfo.FieldName, simpleQuery, queryInfo.I2) - columns = []string{"key", "doc_count"} - case model.ListByField: - // queryInfo = (ListByField, fieldName, 0, LIMIT) - fullQuery = queryTranslator.BuildNRowsQuery(queryInfo.FieldName, simpleQuery, queryInfo.I2) - columns = []string{queryInfo.FieldName} - case model.ListAllFields: - // queryInfo = (ListAllFields, "*", 0, LIMIT) - fullQuery = queryTranslator.BuildNRowsQuery("*", simpleQuery, queryInfo.I2) - case model.Normal: - fullQuery = queryTranslator.BuildNRowsQuery("*", simpleQuery, queryInfo.I2) - } - fullQuery.QueryInfo = queryInfo - fullQuery.Highlighter = highlighter - return fullQuery, columns -} - func (q *QueryRunner) searchWorkerCommon( ctx context.Context, queries []model.Query, diff --git a/quesma/quesma/search_opensearch_test.go b/quesma/quesma/search_opensearch_test.go index f121cb7da..c8f942b38 100644 --- a/quesma/quesma/search_opensearch_test.go +++ b/quesma/quesma/search_opensearch_test.go @@ -40,7 +40,7 @@ func TestSearchOpensearch(t *testing.T) { managementConsole := ui.NewQuesmaManagementConsole(cfg, nil, nil, make(<-chan tracing.LogWithLevel, 50000), telemetry.NewPhoneHomeEmptyAgent()) cw := queryparser.ClickhouseQueryTranslator{ClickhouseLM: lm, Table: &table, Ctx: context.Background()} - simpleQuery, queryInfo, _, _ := cw.ParseQuery(tt.QueryJson) + simpleQuery, queryInfo, _, _ := cw.ParseQueryInternal(tt.QueryJson) assert.True(t, simpleQuery.CanParse, "can parse") assert.Contains(t, tt.WantedSql, simpleQuery.Sql.Stmt, "contains wanted sql") assert.Equal(t, tt.WantedQueryType, queryInfo.Typ, "equals to wanted query type")