Skip to content

Commit

Permalink
Refactor query translator (#193)
Browse files Browse the repository at this point in the history
No change of logic, just shuffling code around.

I am stepping stone towards introducing schema.

Motivation:
- `IQueryTranslator` is bad abstraction as we see many methods not
implemented.
- obvious code repetitions `applySizeLimit` and `BuildNRowsQuery`
- `makeBasicQuery` with EQL is very confusing, we just create one type
of EQL query `ListByField`
  • Loading branch information
jakozaur authored May 22, 2024
1 parent d6376c4 commit 83e2c0a
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 175 deletions.
69 changes: 28 additions & 41 deletions quesma/eql/query_translator.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"mitmproxy/quesma/logger"
"mitmproxy/quesma/model"
"mitmproxy/quesma/queryparser"
"mitmproxy/quesma/queryparser/query_util"
"strconv"
"strings"
)
Expand All @@ -20,34 +21,6 @@ type ClickhouseEQLQueryTranslator struct {
Ctx context.Context
}

func (cw *ClickhouseEQLQueryTranslator) applySizeLimit(size int) int {
// FIXME hard limit here to prevent OOM
const quesmaMaxSize = 10000
if size > quesmaMaxSize {
logger.WarnWithCtx(cw.Ctx).Msgf("setting hits size to=%d, got=%d", quesmaMaxSize, size)
size = quesmaMaxSize
}
return size
}

func (cw *ClickhouseEQLQueryTranslator) BuildNRowsQuery(fieldName string, simpleQuery model.SimpleQuery, limit int) *model.Query {
suffixClauses := make([]string, 0)
if len(simpleQuery.SortFields) > 0 {
suffixClauses = append(suffixClauses, "ORDER BY "+queryparser.AsQueryString(simpleQuery.SortFields))
}
if limit > 0 {
suffixClauses = append(suffixClauses, "LIMIT "+strconv.Itoa(cw.applySizeLimit(limit)))
}
return &model.Query{
Fields: []string{fieldName},
NonSchemaFields: []string{},
WhereClause: simpleQuery.Sql.Stmt,
SuffixClauses: suffixClauses,
FromClause: cw.Table.FullTableName(),
CanParse: true,
}
}

func (cw *ClickhouseEQLQueryTranslator) MakeSearchResponse(ResultSet []model.QueryResultRow, query model.Query) (*model.SearchResp, error) {

// This shares a lot of code with the ClickhouseQueryTranslator
Expand Down Expand Up @@ -88,7 +61,33 @@ func (cw *ClickhouseEQLQueryTranslator) MakeSearchResponse(ResultSet []model.Que
}, nil
}

func (cw *ClickhouseEQLQueryTranslator) ParseQuery(queryAsJson string) (query model.SimpleQuery, searchQueryInfo model.SearchQueryInfo, highlighter model.Highlighter, err error) {
func (cw *ClickhouseEQLQueryTranslator) ParseQuery(body []byte) ([]model.Query, []string, bool, bool, error) {
simpleQuery, queryInfo, highlighter, err := cw.parseQuery(string(body))
if err != nil {
logger.ErrorWithCtx(cw.Ctx).Msgf("error parsing query: %v", err)
return nil, nil, false, false, err
}
var columns []string
var query *model.Query
var queries []model.Query
var isAggregation bool
canParse := false

if simpleQuery.CanParse {
canParse = true
query = query_util.BuildNRowsQuery(cw.Ctx, cw.Table.Name, "*", simpleQuery, queryInfo.I2)
query.QueryInfo = queryInfo
query.Highlighter = highlighter
query.SortFields = simpleQuery.SortFields
queries = append(queries, *query)
isAggregation = false
return queries, columns, isAggregation, canParse, nil
}

return nil, nil, false, false, err
}

func (cw *ClickhouseEQLQueryTranslator) parseQuery(queryAsJson string) (query model.SimpleQuery, searchQueryInfo model.SearchQueryInfo, highlighter model.Highlighter, err error) {

// no highlighting here
highlighter = queryparser.NewEmptyHighlighter()
Expand Down Expand Up @@ -150,18 +149,6 @@ func (cw *ClickhouseEQLQueryTranslator) ParseQuery(queryAsJson string) (query mo

// These methods are not supported by EQL. They are here to satisfy the interface.

func (cw *ClickhouseEQLQueryTranslator) BuildSimpleCountQuery(whereClause string) *model.Query {
panic("EQL does not support count")
}

func (cw *ClickhouseEQLQueryTranslator) MakeResponseAggregation(aggregations []model.Query, aggregationResults [][]model.QueryResultRow) *model.SearchResp {
panic("EQL does not support aggregations")
}

func (cw *ClickhouseEQLQueryTranslator) BuildFacetsQuery(fieldName string, simpleQuery model.SimpleQuery, limit int) *model.Query {
panic("EQL does not support facets")
}

func (cw *ClickhouseEQLQueryTranslator) ParseAggregationJson(aggregationJson string) ([]model.Query, error) {
panic("EQL does not support aggregations")
}
64 changes: 63 additions & 1 deletion quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"mitmproxy/quesma/logger"
"mitmproxy/quesma/model"
"mitmproxy/quesma/queryparser/lucene"
"mitmproxy/quesma/queryparser/query_util"
"strconv"
"strings"
"time"
Expand All @@ -23,7 +24,68 @@ func NewEmptyHighlighter() model.Highlighter {
}
}

func (cw *ClickhouseQueryTranslator) ParseQuery(queryAsJson string) (model.SimpleQuery, model.SearchQueryInfo, model.Highlighter, error) {
func (cw *ClickhouseQueryTranslator) ParseQuery(body []byte) ([]model.Query, []string, bool, bool, error) {
simpleQuery, queryInfo, highlighter, err := cw.ParseQueryInternal(string(body))
if err != nil {
logger.ErrorWithCtx(cw.Ctx).Msgf("error parsing query: %v", err)
return nil, nil, false, false, err
}
var columns []string
var query *model.Query
var queries []model.Query
var isAggregation bool
canParse := false

if simpleQuery.CanParse {
canParse = true
if query_util.IsNonAggregationQuery(queryInfo, body) {
query, columns = cw.makeBasicQuery(simpleQuery, queryInfo, highlighter)
query.SortFields = simpleQuery.SortFields
queries = append(queries, *query)
isAggregation = false
return queries, columns, isAggregation, canParse, nil
} else {
queries, err = cw.ParseAggregationJson(string(body))
if err != nil {
logger.ErrorWithCtx(cw.Ctx).Msgf("error parsing aggregation: %v", err)
return nil, nil, false, false, err
}
isAggregation = true
return queries, columns, isAggregation, canParse, nil
}
}

return nil, nil, false, false, err
}

func (cw *ClickhouseQueryTranslator) makeBasicQuery(
simpleQuery model.SimpleQuery, queryInfo model.SearchQueryInfo, highlighter model.Highlighter) (*model.Query, []string) {
var fullQuery *model.Query
var columns []string
switch queryInfo.Typ {
case model.CountAsync:
fullQuery = cw.BuildSimpleCountQuery(simpleQuery.Sql.Stmt)
columns = []string{"doc_count"}
case model.Facets, model.FacetsNumeric:
// queryInfo = (Facets, fieldName, Limit results, Limit last rows to look into)
fullQuery = cw.BuildFacetsQuery(queryInfo.FieldName, simpleQuery, queryInfo.I2)
columns = []string{"key", "doc_count"}
case model.ListByField:
// queryInfo = (ListByField, fieldName, 0, LIMIT)
fullQuery = cw.BuildNRowsQuery(queryInfo.FieldName, simpleQuery, queryInfo.I2)
columns = []string{queryInfo.FieldName}
case model.ListAllFields:
// queryInfo = (ListAllFields, "*", 0, LIMIT)
fullQuery = cw.BuildNRowsQuery("*", simpleQuery, queryInfo.I2)
case model.Normal:
fullQuery = cw.BuildNRowsQuery("*", simpleQuery, queryInfo.I2)
}
fullQuery.QueryInfo = queryInfo
fullQuery.Highlighter = highlighter
return fullQuery, columns
}

func (cw *ClickhouseQueryTranslator) ParseQueryInternal(queryAsJson string) (model.SimpleQuery, model.SearchQueryInfo, model.Highlighter, error) {
cw.ClearTokensToHighlight()
queryAsMap := make(QueryMap)
if queryAsJson != "" {
Expand Down
6 changes: 3 additions & 3 deletions quesma/queryparser/query_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func TestQueryParserStringAttrConfig(t *testing.T) {

for _, tt := range testdata.TestsSearch {
t.Run(tt.Name, func(t *testing.T) {
simpleQuery, queryInfo, _, _ := cw.ParseQuery(tt.QueryJson)
simpleQuery, queryInfo, _, _ := cw.ParseQueryInternal(tt.QueryJson)
assert.True(t, simpleQuery.CanParse, "can parse")
assert.Contains(t, tt.WantedSql, simpleQuery.Sql.Stmt, "contains wanted sql")
assert.Equal(t, tt.WantedQueryType, queryInfo.Typ, "equals to wanted query type")
Expand All @@ -75,7 +75,7 @@ func TestQueryParserNoFullTextFields(t *testing.T) {

for i, tt := range testdata.TestsSearchNoFullTextFields {
t.Run(strconv.Itoa(i), func(t *testing.T) {
simpleQuery, queryInfo, _, _ := cw.ParseQuery(tt.QueryJson)
simpleQuery, queryInfo, _, _ := cw.ParseQueryInternal(tt.QueryJson)
assert.True(t, simpleQuery.CanParse, "can parse")
assert.Contains(t, tt.WantedSql, simpleQuery.Sql.Stmt, "contains wanted sql")
assert.Equal(t, tt.WantedQueryType, queryInfo.Typ, "equals to wanted query type")
Expand All @@ -100,7 +100,7 @@ func TestQueryParserNoAttrsConfig(t *testing.T) {
cw := ClickhouseQueryTranslator{ClickhouseLM: lm, Table: table, Ctx: context.Background()}
for _, tt := range testdata.TestsSearchNoAttrs {
t.Run(tt.Name, func(t *testing.T) {
simpleQuery, queryInfo, _, _ := cw.ParseQuery(tt.QueryJson)
simpleQuery, queryInfo, _, _ := cw.ParseQueryInternal(tt.QueryJson)
assert.True(t, simpleQuery.CanParse)
assert.Contains(t, tt.WantedSql, simpleQuery.Sql.Stmt)
assert.Equal(t, tt.WantedQueryType, queryInfo.Typ)
Expand Down
45 changes: 2 additions & 43 deletions quesma/queryparser/query_translator.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ import (
"mitmproxy/quesma/logger"
"mitmproxy/quesma/model"
"mitmproxy/quesma/model/bucket_aggregations"
"mitmproxy/quesma/queryparser/query_util"
"mitmproxy/quesma/queryprocessor"
"mitmproxy/quesma/util"
"strconv"
"strings"
"time"
)

Expand Down Expand Up @@ -564,33 +564,8 @@ func (cw *ClickhouseQueryTranslator) BuildSimpleCountQuery(whereClause string) *
}
}

func (cw *ClickhouseQueryTranslator) applySizeLimit(size int) int {
// FIXME hard limit here to prevent OOM
const quesmaMaxSize = 10000
if size > quesmaMaxSize {
logger.WarnWithCtx(cw.Ctx).Msgf("setting hits size to=%d, got=%d", quesmaMaxSize, size)
size = quesmaMaxSize
}
return size
}

// GetNMostRecentRows fieldName == "*" ==> we query all
// otherwise ==> only this 1 field
func (cw *ClickhouseQueryTranslator) BuildNRowsQuery(fieldName string, query model.SimpleQuery, limit int) *model.Query {
suffixClauses := make([]string, 0)
if len(query.SortFields) > 0 {
suffixClauses = append(suffixClauses, "ORDER BY "+AsQueryString(query.SortFields))
}
if limit > 0 {
suffixClauses = append(suffixClauses, "LIMIT "+strconv.Itoa(cw.applySizeLimit(limit)))
}
return &model.Query{
Fields: []string{fieldName},
WhereClause: query.Sql.Stmt,
SuffixClauses: suffixClauses,
FromClause: cw.Table.FullTableName(),
CanParse: true,
}
return query_util.BuildNRowsQuery(cw.Ctx, cw.Table.FullTableName(), fieldName, query, limit)
}

func (cw *ClickhouseQueryTranslator) BuildAutocompleteQuery(fieldName, whereClause string, limit int) *model.Query {
Expand Down Expand Up @@ -728,19 +703,3 @@ func (cw *ClickhouseQueryTranslator) sortInTopologicalOrder(queries []model.Quer
}
return indexesSorted
}

func AsQueryString(sortFields []model.SortField) string {
if len(sortFields) == 0 {
return ""
}
sortStrings := make([]string, 0, len(sortFields))
for _, sortField := range sortFields {
query := strings.Builder{}
query.WriteString(strconv.Quote(sortField.Field))
if sortField.Desc {
query.WriteString(" desc")
}
sortStrings = append(sortStrings, query.String())
}
return strings.Join(sortStrings, ", ")
}
63 changes: 63 additions & 0 deletions quesma/queryparser/query_util/query_util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package query_util

import (
"bytes"
"context"
"mitmproxy/quesma/logger"
"mitmproxy/quesma/model"
"strconv"
"strings"
)

func IsNonAggregationQuery(queryInfo model.SearchQueryInfo, body []byte) bool {
return ((queryInfo.Typ == model.ListByField ||
queryInfo.Typ == model.ListAllFields ||
queryInfo.Typ == model.Normal) &&
!bytes.Contains(body, []byte("aggs"))) ||
queryInfo.Typ == model.Facets ||
queryInfo.Typ == model.FacetsNumeric ||
queryInfo.Typ == model.CountAsync
}

func BuildNRowsQuery(ctx context.Context, tableName string, fieldName string, query model.SimpleQuery, limit int) *model.Query {
suffixClauses := make([]string, 0)
if len(query.SortFields) > 0 {
suffixClauses = append(suffixClauses, "ORDER BY "+AsQueryString(query.SortFields))
}
if limit > 0 {
suffixClauses = append(suffixClauses, "LIMIT "+strconv.Itoa(applySizeLimit(ctx, limit)))
}
return &model.Query{
Fields: []string{fieldName},
WhereClause: query.Sql.Stmt,
SuffixClauses: suffixClauses,
FromClause: tableName,
CanParse: true,
}
}

func AsQueryString(sortFields []model.SortField) string {
if len(sortFields) == 0 {
return ""
}
sortStrings := make([]string, 0, len(sortFields))
for _, sortField := range sortFields {
query := strings.Builder{}
query.WriteString(strconv.Quote(sortField.Field))
if sortField.Desc {
query.WriteString(" desc")
}
sortStrings = append(sortStrings, query.String())
}
return strings.Join(sortStrings, ", ")
}

func applySizeLimit(ctx context.Context, size int) int {
// FIXME hard limit here to prevent OOM
const quesmaMaxSize = 10000
if size > quesmaMaxSize {
logger.WarnWithCtx(ctx).Msgf("setting hits size to=%d, got=%d", quesmaMaxSize, size)
size = quesmaMaxSize
}
return size
}
9 changes: 1 addition & 8 deletions quesma/quesma/query_translator.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,7 @@ import (
// 2. ClickhouseEQLQueryTranslator (implements only a subset of methods)

type IQueryTranslator interface {
ParseQuery(queryAsJson string) (model.SimpleQuery, model.SearchQueryInfo, model.Highlighter, error)
ParseAggregationJson(aggregationJson string) ([]model.Query, error)

BuildSimpleCountQuery(whereClause string) *model.Query
BuildNRowsQuery(fieldName string, simpleQuery model.SimpleQuery, limit int) *model.Query
BuildFacetsQuery(fieldName string, simpleQuery model.SimpleQuery, limit int) *model.Query
ParseQuery(body []byte) ([]model.Query, []string, bool, bool, error)

MakeSearchResponse(ResultSet []model.QueryResultRow, query model.Query) (*model.SearchResp, error)
MakeResponseAggregation(aggregations []model.Query, aggregationResults [][]model.QueryResultRow) *model.SearchResp
Expand All @@ -36,7 +31,6 @@ const (
)

func NewQueryTranslator(ctx context.Context, language QueryLanguage, table *clickhouse.Table, logManager *clickhouse.LogManager, dateMathRenderer string) (queryTranslator IQueryTranslator) {

switch language {
case QueryLanguageEQL:
queryTranslator = &eql.ClickhouseEQLQueryTranslator{ClickhouseLM: logManager, Table: table, Ctx: ctx}
Expand All @@ -45,5 +39,4 @@ func NewQueryTranslator(ctx context.Context, language QueryLanguage, table *clic
}

return queryTranslator

}
Loading

0 comments on commit 83e2c0a

Please sign in to comment.