Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Second part of reporting unsupported query types #44

Merged
merged 5 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 76 additions & 2 deletions quesma/model/query_types_list.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package model

// list of all aggregation types in Elasticsearch.
// AggregationQueryTypes is a list of all aggregation types in Elasticsearch.
// More details: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html
var AggregationQueryTypes = []string{
// metrics:
"avg",
Expand Down Expand Up @@ -89,4 +90,77 @@ var AggregationQueryTypes = []string{
"sum_bucket",
}

// TODO list of all Query DSL types in Elasticsearch.
// QueryDSLTypes is a list of all Query DSL types in Elasticsearch.
// More details: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
var QueryDSLTypes = []string{
// Compound queries:
"bool",
"boosting",
"constant_score",
"dis_max",
"function_score",
// Full text queries:
"intervals",
"match",
"match_bool_prefix",
"match_phrase",
"match_phrase_prefix",
"combined_fields",
"multi_match",
"query_string",
"simple_query_string",
// Geo queries
"geo_bounding_box",
// "geo_distance", // same name as aggregation. Easier to have it commented out for now, and catch both cases in one way.
"geo_grid",
"geo_polygon",
"geo_shape",
// Shape
"shape",
// Joining queries
"nested",
"has_child",
"has_parent",
"parent_id",
// Match all
"match_all",
// Span queries
"span_containing",
"span_field_masking",
"span_first",
"span_multi",
"span_near",
"span_not",
"span_or",
"span_term",
"span_within",
// Specialized queries
"distance_feature",
"more_like_this",
"percolate",
"knn",
"rank_feature",
"script",
"script_score",
"wrapper",
"pinned",
"rule_query",
"weighted_tokens",
// Term-level queries
"exists",
"fuzzy",
"ids",
"prefix",
"range",
"regexp",
"term",
"terms",
"terms_set",
"wildcard",
// Text expansion
"text_expansion",
}

// AllQueryTypes is a list of all query types in Elasticsearch.
// So far used for listing types of queries we received, but don't support.
var AllQueryTypes = append(AggregationQueryTypes, QueryDSLTypes...)
44 changes: 30 additions & 14 deletions quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,41 +364,54 @@ func (cw *ClickhouseQueryTranslator) parseQueryMap(queryMap QueryMap) SimpleQuer
logger.WarnWithCtx(cw.Ctx).Msgf("query is not a dict. key: %s, value: %v", k, v)
}
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("unsupported query type: %s, value: %v", k, v)
logger.WarnWithCtxAndReason(cw.Ctx, logger.ReasonUnsupportedQuery(k)).Msgf("unsupported query type: %s, value: %v", k, v)
}
}
return newSimpleQuery(NewSimpleStatement("can't parse query: "+pp.Sprint(queryMap)), false)
}

// Parses each SimpleQuery separately, returns list of translated SQLs
func (cw *ClickhouseQueryTranslator) parseQueryMapArray(queryMaps []interface{}) []Statement {
results := make([]Statement, len(queryMaps))
func (cw *ClickhouseQueryTranslator) parseQueryMapArray(queryMaps []interface{}) (stmts []Statement, canParse bool) {
stmts = make([]Statement, len(queryMaps))
canParse = true
for i, v := range queryMaps {
qmap := cw.parseQueryMap(v.(QueryMap))
results[i] = qmap.Sql
results[i].FieldName = qmap.FieldName
if vAsMap, ok := v.(QueryMap); ok {
query := cw.parseQueryMap(vAsMap)
stmts[i] = query.Sql
stmts[i].FieldName = query.FieldName
if !query.CanParse {
canParse = false
}
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("invalid query type: %T, value: %v", v, v)
canParse = false
}
}
return results
return stmts, canParse
}

func (cw *ClickhouseQueryTranslator) iterateListOrDictAndParse(queryMaps interface{}) []Statement {
func (cw *ClickhouseQueryTranslator) iterateListOrDictAndParse(queryMaps interface{}) (stmts []Statement, canParse bool) {
switch queryMapsTyped := queryMaps.(type) {
case []interface{}:
return cw.parseQueryMapArray(queryMapsTyped)
case QueryMap:
return []Statement{cw.parseQueryMap(queryMapsTyped).Sql}
simpleQuery := cw.parseQueryMap(queryMapsTyped)
return []Statement{simpleQuery.Sql}, simpleQuery.CanParse
default:
logger.WarnWithCtx(cw.Ctx).Msgf("Invalid query type: %T, value: %v", queryMapsTyped, queryMapsTyped)
return []Statement{NewSimpleStatement("invalid iteration")}
return []Statement{NewSimpleStatement("invalid iteration")}, false
}
}

// TODO: minimum_should_match parameter. Now only ints supported and >1 changed into 1
func (cw *ClickhouseQueryTranslator) parseBool(queryMap QueryMap) SimpleQuery {
var andStmts []Statement
canParse := true // will stay true only if all subqueries can be parsed
for _, andPhrase := range []string{"must", "filter"} {
if queries, ok := queryMap[andPhrase]; ok {
andStmts = append(andStmts, cw.iterateListOrDictAndParse(queries)...)
newAndStmts, canParseThis := cw.iterateListOrDictAndParse(queries)
andStmts = append(andStmts, newAndStmts...)
canParse = canParse && canParseThis
}
}
sql := and(andStmts)
Expand All @@ -419,7 +432,9 @@ func (cw *ClickhouseQueryTranslator) parseBool(queryMap QueryMap) SimpleQuery {
minimumShouldMatch = 1
}
if queries, ok := queryMap["should"]; ok && minimumShouldMatch == 1 {
orSql := or(cw.iterateListOrDictAndParse(queries))
orSqls, canParseThis := cw.iterateListOrDictAndParse(queries)
orSql := or(orSqls)
canParse = canParse && canParseThis
if len(andStmts) == 0 {
sql = orSql
} else if len(orSql.Stmt) > 0 {
Expand All @@ -428,8 +443,9 @@ func (cw *ClickhouseQueryTranslator) parseBool(queryMap QueryMap) SimpleQuery {
}

if queries, ok := queryMap["must_not"]; ok {
sqlNots := cw.iterateListOrDictAndParse(queries)
sqlNots, canParseThis := cw.iterateListOrDictAndParse(queries)
sqlNots = filterNonEmpty(sqlNots)
canParse = canParse && canParseThis
if len(sqlNots) > 0 {
orSql := or(sqlNots)
if orSql.isCompound {
Expand All @@ -441,7 +457,7 @@ func (cw *ClickhouseQueryTranslator) parseBool(queryMap QueryMap) SimpleQuery {
sql = and([]Statement{sql, orSql})
}
}
return newSimpleQueryWithFieldName(sql, true, sql.FieldName)
return newSimpleQueryWithFieldName(sql, canParse, sql.FieldName)
}

func (cw *ClickhouseQueryTranslator) parseTerm(queryMap QueryMap) SimpleQuery {
Expand Down
30 changes: 18 additions & 12 deletions quesma/quesma/search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,11 @@ func TestNumericFacetsQueries(t *testing.T) {
// It runs |testdata.UnsupportedAggregationsTests| tests, each of them sends one query of unsupported type.
// It ensures that this query type is recorded in the management console, and that all other query types are not.
func TestAllUnsupportedQueryTypesAreProperlyRecorded(t *testing.T) {
for _, tt := range testdata.UnsupportedAggregationsTests {
for _, tt := range testdata.UnsupportedQueriesTests {
t.Run(tt.TestName, func(t *testing.T) {
if tt.QueryType == "script" {
t.Skip("Only 1 test. We can't deal with scripts inside queries yet. It fails very early, during JSON unmarshalling, so we can't even know the type of aggregation.")
}
db, _, err := sqlmock.New()
if err != nil {
t.Fatal(err)
Expand All @@ -424,17 +427,17 @@ func TestAllUnsupportedQueryTypesAreProperlyRecorded(t *testing.T) {
newCtx := context.WithValue(ctx, tracing.RequestIdCtxKey, tracing.GetRequestId())
_, _ = queryRunner.handleSearch(newCtx, tableName, []byte(tt.QueryRequestJson))

for _, queryType := range model.AggregationQueryTypes {
if queryType != tt.AggregationName {
for _, queryType := range model.AllQueryTypes {
if queryType != tt.QueryType {
assert.Len(t, managementConsole.QueriesWithUnsupportedType(queryType), 0)
}
}

// Update of the count below is done asynchronously in another goroutine
// (go managementConsole.RunOnlyChannelProcessor() above), so we might need to wait a bit
assert.Eventually(t, func() bool {
return len(managementConsole.QueriesWithUnsupportedType(tt.AggregationName)) == 1
}, 50*time.Millisecond, 1*time.Millisecond)
return len(managementConsole.QueriesWithUnsupportedType(tt.QueryType)) == 1
}, 150*time.Millisecond, 1*time.Millisecond)
assert.Equal(t, 1, managementConsole.GetTotalUnsupportedQueries())
assert.Equal(t, 1, managementConsole.GetSavedUnsupportedQueries())
assert.Equal(t, 1, len(managementConsole.GetUnsupportedTypesWithCount()))
Expand All @@ -451,9 +454,13 @@ func TestDifferentUnsupportedQueries(t *testing.T) {

// generate random |requestsNr| queries to send
testNrs := make([]int, 0, requestsNr)
testCounts := make([]int, len(testdata.UnsupportedAggregationsTests))
testCounts := make([]int, len(testdata.UnsupportedQueriesTests))
for range requestsNr {
randInt := rand.Intn(len(testdata.UnsupportedAggregationsTests))
randInt := rand.Intn(len(testdata.UnsupportedQueriesTests))
if testdata.UnsupportedQueriesTests[randInt].QueryType == "script" {
// We can't deal with scripts inside queries yet. It fails very early, during JSON unmarshalling, so we can't even know the type of aggregation.
continue
}
testNrs = append(testNrs, randInt)
testCounts[randInt]++
}
Expand All @@ -474,18 +481,17 @@ func TestDifferentUnsupportedQueries(t *testing.T) {
queryRunner := NewQueryRunner(lm, cfg, nil, managementConsole)
for _, testNr := range testNrs {
newCtx := context.WithValue(ctx, tracing.RequestIdCtxKey, tracing.GetRequestId())
_, _ = queryRunner.handleSearch(newCtx, tableName, []byte(testdata.UnsupportedAggregationsTests[testNr].QueryRequestJson))

_, _ = queryRunner.handleSearch(newCtx, tableName, []byte(testdata.UnsupportedQueriesTests[testNr].QueryRequestJson))
}

for i, tt := range testdata.UnsupportedAggregationsTests {
for i, tt := range testdata.UnsupportedQueriesTests {
// Update of the count below is done asynchronously in another goroutine
// (go managementConsole.RunOnlyChannelProcessor() above), so we might need to wait a bit
assert.Eventually(t, func() bool {
return len(managementConsole.QueriesWithUnsupportedType(tt.AggregationName)) == min(testCounts[i], maxSavedQueriesPerQueryType)
return len(managementConsole.QueriesWithUnsupportedType(tt.QueryType)) == min(testCounts[i], maxSavedQueriesPerQueryType)
}, 500*time.Millisecond, 1*time.Millisecond,
tt.TestName+": wanted: %d, got: %d", min(testCounts[i], maxSavedQueriesPerQueryType),
len(managementConsole.QueriesWithUnsupportedType(tt.AggregationName)),
len(managementConsole.QueriesWithUnsupportedType(tt.QueryType)),
)
}
}
2 changes: 1 addition & 1 deletion quesma/quesma/ui/unsupported_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func processUnsupportedLogMessage(log tracing.LogWithLevel) *string {
searchQueryType := match[1]

knownType := false
for _, queryType := range model.AggregationQueryTypes {
for _, queryType := range model.AllQueryTypes {
if queryType == searchQueryType {
knownType = true
break
Expand Down
4 changes: 2 additions & 2 deletions quesma/testdata/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ type AggregationTestCase struct {
ExpectedSQLs []string // [0] = translated SQLs for first aggregation, [1] = translated SQL for second aggregation, etc.
}

type UnsupportedAggregationTestCase struct {
type UnsupportedQueryTestCase struct {
TestName string
AggregationName string
QueryType string
QueryRequestJson string
}
Loading
Loading