Skip to content

Commit

Permalink
Second part of reporting unsupported query types (#44)
Browse files Browse the repository at this point in the history
Basically did the same thing as with aggregations. Tested receiving
every `Query DSL` query type possible, to see if we catch it. We seem
to. E.g. at the screens `shape` is a `Query DSL` unsupported query,
while `ip_range` is an aggregation - you can see both.

In the code I needed to only start returning if parsing went fine or not
for bool expressions, as they can contain subqueries with all other
`Query DSL` queries.
![Screenshot 2024-05-07 at 17 07
50](https://github.com/QuesmaOrg/quesma/assets/5407146/f2552446-f7fe-4e9d-a851-2c911bad95c5)
![Screenshot 2024-05-07 at 17 08
02](https://github.com/QuesmaOrg/quesma/assets/5407146/5aca5ea2-477f-4f9b-ab46-7cf5e3cf800b)
  • Loading branch information
trzysiek authored May 7, 2024
1 parent cda7452 commit 7d86155
Show file tree
Hide file tree
Showing 6 changed files with 986 additions and 164 deletions.
78 changes: 76 additions & 2 deletions quesma/model/query_types_list.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package model

// list of all aggregation types in Elasticsearch.
// AggregationQueryTypes is a list of all aggregation types in Elasticsearch.
// More details: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html
var AggregationQueryTypes = []string{
// metrics:
"avg",
Expand Down Expand Up @@ -89,4 +90,77 @@ var AggregationQueryTypes = []string{
"sum_bucket",
}

// TODO list of all Query DSL types in Elasticsearch.
// QueryDSLTypes is a list of all Query DSL types in Elasticsearch.
// More details: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
var QueryDSLTypes = []string{
// Compound queries:
"bool",
"boosting",
"constant_score",
"dis_max",
"function_score",
// Full text queries:
"intervals",
"match",
"match_bool_prefix",
"match_phrase",
"match_phrase_prefix",
"combined_fields",
"multi_match",
"query_string",
"simple_query_string",
// Geo queries
"geo_bounding_box",
// "geo_distance", // same name as aggregation. Easier to have it commented out for now, and catch both cases in one way.
"geo_grid",
"geo_polygon",
"geo_shape",
// Shape
"shape",
// Joining queries
"nested",
"has_child",
"has_parent",
"parent_id",
// Match all
"match_all",
// Span queries
"span_containing",
"span_field_masking",
"span_first",
"span_multi",
"span_near",
"span_not",
"span_or",
"span_term",
"span_within",
// Specialized queries
"distance_feature",
"more_like_this",
"percolate",
"knn",
"rank_feature",
"script",
"script_score",
"wrapper",
"pinned",
"rule_query",
"weighted_tokens",
// Term-level queries
"exists",
"fuzzy",
"ids",
"prefix",
"range",
"regexp",
"term",
"terms",
"terms_set",
"wildcard",
// Text expansion
"text_expansion",
}

// AllQueryTypes is a list of all query types in Elasticsearch.
// So far used for listing types of queries we received, but don't support.
var AllQueryTypes = append(AggregationQueryTypes, QueryDSLTypes...)
44 changes: 30 additions & 14 deletions quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,41 +364,54 @@ func (cw *ClickhouseQueryTranslator) parseQueryMap(queryMap QueryMap) SimpleQuer
logger.WarnWithCtx(cw.Ctx).Msgf("query is not a dict. key: %s, value: %v", k, v)
}
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("unsupported query type: %s, value: %v", k, v)
logger.WarnWithCtxAndReason(cw.Ctx, logger.ReasonUnsupportedQuery(k)).Msgf("unsupported query type: %s, value: %v", k, v)
}
}
return newSimpleQuery(NewSimpleStatement("can't parse query: "+pp.Sprint(queryMap)), false)
}

// Parses each SimpleQuery separately, returns list of translated SQLs
func (cw *ClickhouseQueryTranslator) parseQueryMapArray(queryMaps []interface{}) []Statement {
results := make([]Statement, len(queryMaps))
func (cw *ClickhouseQueryTranslator) parseQueryMapArray(queryMaps []interface{}) (stmts []Statement, canParse bool) {
stmts = make([]Statement, len(queryMaps))
canParse = true
for i, v := range queryMaps {
qmap := cw.parseQueryMap(v.(QueryMap))
results[i] = qmap.Sql
results[i].FieldName = qmap.FieldName
if vAsMap, ok := v.(QueryMap); ok {
query := cw.parseQueryMap(vAsMap)
stmts[i] = query.Sql
stmts[i].FieldName = query.FieldName
if !query.CanParse {
canParse = false
}
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("invalid query type: %T, value: %v", v, v)
canParse = false
}
}
return results
return stmts, canParse
}

func (cw *ClickhouseQueryTranslator) iterateListOrDictAndParse(queryMaps interface{}) []Statement {
func (cw *ClickhouseQueryTranslator) iterateListOrDictAndParse(queryMaps interface{}) (stmts []Statement, canParse bool) {
switch queryMapsTyped := queryMaps.(type) {
case []interface{}:
return cw.parseQueryMapArray(queryMapsTyped)
case QueryMap:
return []Statement{cw.parseQueryMap(queryMapsTyped).Sql}
simpleQuery := cw.parseQueryMap(queryMapsTyped)
return []Statement{simpleQuery.Sql}, simpleQuery.CanParse
default:
logger.WarnWithCtx(cw.Ctx).Msgf("Invalid query type: %T, value: %v", queryMapsTyped, queryMapsTyped)
return []Statement{NewSimpleStatement("invalid iteration")}
return []Statement{NewSimpleStatement("invalid iteration")}, false
}
}

// TODO: minimum_should_match parameter. Now only ints supported and >1 changed into 1
func (cw *ClickhouseQueryTranslator) parseBool(queryMap QueryMap) SimpleQuery {
var andStmts []Statement
canParse := true // will stay true only if all subqueries can be parsed
for _, andPhrase := range []string{"must", "filter"} {
if queries, ok := queryMap[andPhrase]; ok {
andStmts = append(andStmts, cw.iterateListOrDictAndParse(queries)...)
newAndStmts, canParseThis := cw.iterateListOrDictAndParse(queries)
andStmts = append(andStmts, newAndStmts...)
canParse = canParse && canParseThis
}
}
sql := and(andStmts)
Expand All @@ -419,7 +432,9 @@ func (cw *ClickhouseQueryTranslator) parseBool(queryMap QueryMap) SimpleQuery {
minimumShouldMatch = 1
}
if queries, ok := queryMap["should"]; ok && minimumShouldMatch == 1 {
orSql := or(cw.iterateListOrDictAndParse(queries))
orSqls, canParseThis := cw.iterateListOrDictAndParse(queries)
orSql := or(orSqls)
canParse = canParse && canParseThis
if len(andStmts) == 0 {
sql = orSql
} else if len(orSql.Stmt) > 0 {
Expand All @@ -428,8 +443,9 @@ func (cw *ClickhouseQueryTranslator) parseBool(queryMap QueryMap) SimpleQuery {
}

if queries, ok := queryMap["must_not"]; ok {
sqlNots := cw.iterateListOrDictAndParse(queries)
sqlNots, canParseThis := cw.iterateListOrDictAndParse(queries)
sqlNots = filterNonEmpty(sqlNots)
canParse = canParse && canParseThis
if len(sqlNots) > 0 {
orSql := or(sqlNots)
if orSql.isCompound {
Expand All @@ -441,7 +457,7 @@ func (cw *ClickhouseQueryTranslator) parseBool(queryMap QueryMap) SimpleQuery {
sql = and([]Statement{sql, orSql})
}
}
return newSimpleQueryWithFieldName(sql, true, sql.FieldName)
return newSimpleQueryWithFieldName(sql, canParse, sql.FieldName)
}

func (cw *ClickhouseQueryTranslator) parseTerm(queryMap QueryMap) SimpleQuery {
Expand Down
30 changes: 18 additions & 12 deletions quesma/quesma/search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,11 @@ func TestNumericFacetsQueries(t *testing.T) {
// It runs |testdata.UnsupportedAggregationsTests| tests, each of them sends one query of unsupported type.
// It ensures that this query type is recorded in the management console, and that all other query types are not.
func TestAllUnsupportedQueryTypesAreProperlyRecorded(t *testing.T) {
for _, tt := range testdata.UnsupportedAggregationsTests {
for _, tt := range testdata.UnsupportedQueriesTests {
t.Run(tt.TestName, func(t *testing.T) {
if tt.QueryType == "script" {
t.Skip("Only 1 test. We can't deal with scripts inside queries yet. It fails very early, during JSON unmarshalling, so we can't even know the type of aggregation.")
}
db, _, err := sqlmock.New()
if err != nil {
t.Fatal(err)
Expand All @@ -424,17 +427,17 @@ func TestAllUnsupportedQueryTypesAreProperlyRecorded(t *testing.T) {
newCtx := context.WithValue(ctx, tracing.RequestIdCtxKey, tracing.GetRequestId())
_, _ = queryRunner.handleSearch(newCtx, tableName, []byte(tt.QueryRequestJson))

for _, queryType := range model.AggregationQueryTypes {
if queryType != tt.AggregationName {
for _, queryType := range model.AllQueryTypes {
if queryType != tt.QueryType {
assert.Len(t, managementConsole.QueriesWithUnsupportedType(queryType), 0)
}
}

// Update of the count below is done asynchronously in another goroutine
// (go managementConsole.RunOnlyChannelProcessor() above), so we might need to wait a bit
assert.Eventually(t, func() bool {
return len(managementConsole.QueriesWithUnsupportedType(tt.AggregationName)) == 1
}, 50*time.Millisecond, 1*time.Millisecond)
return len(managementConsole.QueriesWithUnsupportedType(tt.QueryType)) == 1
}, 150*time.Millisecond, 1*time.Millisecond)
assert.Equal(t, 1, managementConsole.GetTotalUnsupportedQueries())
assert.Equal(t, 1, managementConsole.GetSavedUnsupportedQueries())
assert.Equal(t, 1, len(managementConsole.GetUnsupportedTypesWithCount()))
Expand All @@ -451,9 +454,13 @@ func TestDifferentUnsupportedQueries(t *testing.T) {

// generate random |requestsNr| queries to send
testNrs := make([]int, 0, requestsNr)
testCounts := make([]int, len(testdata.UnsupportedAggregationsTests))
testCounts := make([]int, len(testdata.UnsupportedQueriesTests))
for range requestsNr {
randInt := rand.Intn(len(testdata.UnsupportedAggregationsTests))
randInt := rand.Intn(len(testdata.UnsupportedQueriesTests))
if testdata.UnsupportedQueriesTests[randInt].QueryType == "script" {
// We can't deal with scripts inside queries yet. It fails very early, during JSON unmarshalling, so we can't even know the type of aggregation.
continue
}
testNrs = append(testNrs, randInt)
testCounts[randInt]++
}
Expand All @@ -474,18 +481,17 @@ func TestDifferentUnsupportedQueries(t *testing.T) {
queryRunner := NewQueryRunner(lm, cfg, nil, managementConsole)
for _, testNr := range testNrs {
newCtx := context.WithValue(ctx, tracing.RequestIdCtxKey, tracing.GetRequestId())
_, _ = queryRunner.handleSearch(newCtx, tableName, []byte(testdata.UnsupportedAggregationsTests[testNr].QueryRequestJson))

_, _ = queryRunner.handleSearch(newCtx, tableName, []byte(testdata.UnsupportedQueriesTests[testNr].QueryRequestJson))
}

for i, tt := range testdata.UnsupportedAggregationsTests {
for i, tt := range testdata.UnsupportedQueriesTests {
// Update of the count below is done asynchronously in another goroutine
// (go managementConsole.RunOnlyChannelProcessor() above), so we might need to wait a bit
assert.Eventually(t, func() bool {
return len(managementConsole.QueriesWithUnsupportedType(tt.AggregationName)) == min(testCounts[i], maxSavedQueriesPerQueryType)
return len(managementConsole.QueriesWithUnsupportedType(tt.QueryType)) == min(testCounts[i], maxSavedQueriesPerQueryType)
}, 500*time.Millisecond, 1*time.Millisecond,
tt.TestName+": wanted: %d, got: %d", min(testCounts[i], maxSavedQueriesPerQueryType),
len(managementConsole.QueriesWithUnsupportedType(tt.AggregationName)),
len(managementConsole.QueriesWithUnsupportedType(tt.QueryType)),
)
}
}
2 changes: 1 addition & 1 deletion quesma/quesma/ui/unsupported_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func processUnsupportedLogMessage(log tracing.LogWithLevel) *string {
searchQueryType := match[1]

knownType := false
for _, queryType := range model.AggregationQueryTypes {
for _, queryType := range model.AllQueryTypes {
if queryType == searchQueryType {
knownType = true
break
Expand Down
4 changes: 2 additions & 2 deletions quesma/testdata/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ type AggregationTestCase struct {
ExpectedSQLs []string // [0] = translated SQLs for first aggregation, [1] = translated SQL for second aggregation, etc.
}

type UnsupportedAggregationTestCase struct {
type UnsupportedQueryTestCase struct {
TestName string
AggregationName string
QueryType string
QueryRequestJson string
}
Loading

0 comments on commit 7d86155

Please sign in to comment.