From 4b666266cdbc688dc3defc32d43f9d82044c07d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Hejman?= Date: Thu, 16 May 2024 15:26:35 +0200 Subject: [PATCH] Parser fixes for _id query (#125) Fixes to our parser. **Note:** Semantically for now this doesn't make much sense (we might show different documents as IDs are changing) but at least we're not blowing up during ID query execution. ![image (8)](https://github.com/QuesmaOrg/quesma/assets/2097938/d8e69ee2-a21d-40ba-b3be-bcc73fc4eeab) image --- docker/local-debug.yml | 4 +++ quesma/queryparser/query_parser.go | 35 +++++++++++++++++++++ quesma/testdata/requests.go | 17 ++++++++++ quesma/testdata/unsupported_requests.go | 41 +++++++++---------------- 4 files changed, 70 insertions(+), 27 deletions(-) diff --git a/docker/local-debug.yml b/docker/local-debug.yml index 85ba35c34..55f920fc2 100644 --- a/docker/local-debug.yml +++ b/docker/local-debug.yml @@ -37,6 +37,8 @@ services: image: docker.elastic.co/kibana/kibana:8.11.1 container_name: kibana network_mode: "bridge" + links: + - "mitmproxy" environment: ELASTICSEARCH_HOSTS: '["http://mitmproxy:6666"]' XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY: 'QUESMAQUESMAQUESMAQUESMAQUESMAQUESMAQUESMAQUESMA' # Just to get rid of annoying ERROR in logs @@ -54,6 +56,8 @@ services: kibana-sidecar: image: docker.elastic.co/kibana/kibana:8.11.1 container_name: kiabana-sidecar + links: + - "kibana" network_mode: "bridge" restart: "no" depends_on: diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index 5183921a9..814eb414e 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -246,6 +246,8 @@ func (cw *ClickhouseQueryTranslator) parseQueryMap(queryMap QueryMap) SimpleQuer "match_phrase": func(qm QueryMap) SimpleQuery { return cw.parseMatch(qm, true) }, "range": cw.parseRange, "exists": cw.parseExists, + "ids": cw.parseIds, + "constant_score": cw.parseConstantScore, "wildcard": cw.parseWildcard, "query_string": cw.parseQueryString, "simple_query_string": cw.parseQueryString, @@ -273,6 +275,33 @@ func (cw *ClickhouseQueryTranslator) parseQueryMap(queryMap QueryMap) SimpleQuer return newSimpleQuery(NewSimpleStatement("can't parse query: "+unparsedQuery), false) } +// `constant_score` query is just a wrapper for filter query which returns constant relevance score, which we ignore anyway +func (cw *ClickhouseQueryTranslator) parseConstantScore(queryMap QueryMap) SimpleQuery { + if _, ok := queryMap["filter"]; ok { + return cw.parseBool(queryMap) + } else { + return newSimpleQuery(NewSimpleStatement("parsing error: `constant_score` needs to wrap `filter` query"), false) + } +} + +func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) SimpleQuery { + var ids []string + if val, ok := queryMap["values"]; ok { + if values, ok := val.([]interface{}); ok { + for _, id := range values { + ids = append(ids, id.(string)) + } + } + } else { + return newSimpleQuery(NewSimpleStatement("parsing error: missing mandatory `values` field"), false) + } + logger.Warn().Msgf("unsupported id query executed, requested ids of [%s]", strings.Join(ids, "','")) + // We'll make this something along the lines of: + // fmt.Sprintf("COMPUTED_ID(document) IN ('%s') */ ", strings.Join(ids, "','")) + // but for now leaving empty + return newSimpleQuery(NewSimpleStatement(""), true) +} + // Parses each SimpleQuery separately, returns list of translated SQLs func (cw *ClickhouseQueryTranslator) parseQueryMapArray(queryMaps []interface{}) (stmts []Statement, canParse bool) { stmts = make([]Statement, len(queryMaps)) @@ -367,6 +396,10 @@ func (cw *ClickhouseQueryTranslator) parseTerm(queryMap QueryMap) SimpleQuery { if len(queryMap) == 1 { for k, v := range queryMap { cw.AddTokenToHighlight(v) + if k == "_index" { // index is a table name, already taken from URI and moved to FROM clause + logger.Warn().Msgf("term %s=%v in query body, ignoring in result SQL", k, v) + return newSimpleQuery(NewSimpleStatement(" 0=0 /* "+strconv.Quote(k)+"="+sprint(v)+" */ "), true) + } return newSimpleQuery(NewSimpleStatement(strconv.Quote(k)+"="+sprint(v)), true) } } @@ -769,6 +802,8 @@ func (cw *ClickhouseQueryTranslator) parseRange(queryMap QueryMap) SimpleQuery { } else if op == "gte" || op == "lte" || op == "gt" || op == "lt" { vToPrint = parseDateMathExpression(vToPrint) } + } else if v == nil { + vToPrint = "NULL" } case clickhouse.Invalid: // assumes it is number that does not need formatting if len(vToPrint) > 2 && vToPrint[0] == '\'' && vToPrint[len(vToPrint)-1] == '\'' { diff --git a/quesma/testdata/requests.go b/quesma/testdata/requests.go index d15c423d2..367ae3004 100644 --- a/quesma/testdata/requests.go +++ b/quesma/testdata/requests.go @@ -1883,6 +1883,23 @@ var TestsSearch = []SearchTestCase{ []model.Query{newSimplestQuery()}, []string{qToStr(newSimplestQuery())}, }, + { // [32] + "Constant score query", + `{ + "query": { + "constant_score": { + "filter": { + "term": { "user.id": "kimchy" } + }, + "boost": 1.2 + } + } + }`, + []string{`"user.id"='kimchy'`}, + model.Normal, + []model.Query{justSimplestWhere(`"user.id"='kimchy'`)}, + []string{qToStr(justSimplestWhere(`"user.id"='kimchy'`))}, + }, } var TestsSearchNoAttrs = []SearchTestCase{ diff --git a/quesma/testdata/unsupported_requests.go b/quesma/testdata/unsupported_requests.go index 599f8fe60..4c38d6e7b 100644 --- a/quesma/testdata/unsupported_requests.go +++ b/quesma/testdata/unsupported_requests.go @@ -1347,21 +1347,6 @@ var UnsupportedQueriesTests = []UnsupportedQueryTestCase{ } }`, }, - { // [59] - TestName: "Compound query: constant_score", - QueryType: "constant_score", - QueryRequestJson: ` - { - "query": { - "constant_score": { - "filter": { - "term": { "user.id": "kimchy" } - }, - "boost": 1.2 - } - } - }`, - }, { // [60] TestName: "Compound query: disjunction_max", QueryType: "dis_max", @@ -2020,18 +2005,20 @@ var UnsupportedQueriesTests = []UnsupportedQueryTestCase{ } }`, }, - { // [95] - TestName: "Term-level queries: IDs", - QueryType: "ids", - QueryRequestJson: ` - { - "query": { - "ids" : { - "values" : ["1", "4", "100"] - } - } - }`, - }, + //{ // [95] + // The query is partially supported, doesn't blow up, + // but the response is not as expected due to the nature of the backend (ClickHouse). + // TestName: "Term-level queries: IDs", + // QueryType: "ids", + // QueryRequestJson: ` + // { + // "query": { + // "ids" : { + // "values" : ["1", "4", "100"] + // } + // } + // }`, + //}, { // [96] TestName: "Term-level queries: Regexp", QueryType: "regexp",