Skip to content

Commit

Permalink
Fix match query for integer fields (#1037)
Browse files Browse the repository at this point in the history
The `match` query is in most cases a full-text search, e.g.:

```json
{
  "query": {
    "match": {
      "message": "this is a test"
    }
  }
}
```

However, it can also be used to find an exact value against a integer
field:

```json
{
  "query": {
    "match": {
      "products_count": "5"
    }
  }
}
```

In such case Quesma would generate an invalid SQL, trying to do an
`ILIKE` against an `Int64` column:

```
Illegal type Int64 of argument of function ilike
```

Fix the issue by introducing an internal __quesma_match operator and a
transformation which transforms it either to `ILIKE` or `=`.

Fixes #1018

---------

Co-authored-by: Krzysztof Kiewicz <[email protected]>
  • Loading branch information
avelanarius and trzysiek authored Dec 4, 2024
1 parent c2f696f commit d085fa5
Show file tree
Hide file tree
Showing 9 changed files with 173 additions and 23 deletions.
1 change: 1 addition & 0 deletions quesma/model/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ const (
TimestampFieldName = "@timestamp"

DateHourFunction = "__quesma_date_hour"
MatchOperator = "__quesma_match"
)
2 changes: 1 addition & 1 deletion quesma/model/expr_string_renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func (v *renderer) VisitInfix(e InfixExpr) interface{} {
}
// This might look like a strange heuristics to but is aligned with the way we are currently generating the statement
// I think in the future every infix op should be in braces.
if e.Op == "AND" || e.Op == "OR" {
if strings.HasPrefix(e.Op, "_") || e.Op == "AND" || e.Op == "OR" {
return fmt.Sprintf("(%v %v %v)", lhs, e.Op, rhs)
} else if strings.Contains(e.Op, "LIKE") || e.Op == "IS" || e.Op == "IN" || e.Op == "REGEXP" {
return fmt.Sprintf("%v %v %v", lhs, e.Op, rhs)
Expand Down
4 changes: 2 additions & 2 deletions quesma/model/highlighter.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ func (h *Highlighter) SetTokensToHighlight(selectCmd SelectCommand) {

visitor.OverrideVisitInfix = func(b *BaseExprVisitor, e InfixExpr) interface{} {
switch e.Op {
case "iLIKE", "LIKE", "IN", "=":
case "iLIKE", "LIKE", "IN", "=", MatchOperator:
lhs, isColumnRef := e.Left.(ColumnRef)
rhs, isLiteral := e.Right.(LiteralExpr)
if isLiteral && isColumnRef { // we only highlight in this case
switch literalAsString := rhs.Value.(type) {
case string:
literalAsString = strings.TrimPrefix(literalAsString, "'%")
literalAsString = strings.TrimPrefix(literalAsString, "'")
literalAsString = strings.TrimPrefix(literalAsString, "%")
literalAsString = strings.TrimSuffix(literalAsString, "'")
literalAsString = strings.TrimSuffix(literalAsString, "%")
Expand Down
2 changes: 1 addition & 1 deletion quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ func (cw *ClickhouseQueryTranslator) parseMatch(queryMap QueryMap, matchPhrase b
computedIdMatchingQuery := cw.parseIds(QueryMap{"values": []interface{}{subQuery}})
statements = append(statements, computedIdMatchingQuery.WhereClause)
} else {
simpleStat := model.NewInfixExpr(model.NewColumnRef(fieldName), "iLIKE", model.NewLiteral("'%"+subQuery+"%'"))
simpleStat := model.NewInfixExpr(model.NewColumnRef(fieldName), model.MatchOperator, model.NewLiteral("'"+subQuery+"'"))
statements = append(statements, simpleStat)
}
}
Expand Down
45 changes: 45 additions & 0 deletions quesma/quesma/schema_transformer.go
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,7 @@ func (s *SchemaCheckPass) Transform(queries []*model.Query) ([]*model.Query, err
{TransformationName: "GeoTransformation", Transformation: s.applyGeoTransformations},
{TransformationName: "ArrayTransformation", Transformation: s.applyArrayTransformations},
{TransformationName: "MapTransformation", Transformation: s.applyMapTransformations},
{TransformationName: "MatchOperatorTransformation", Transformation: s.applyMatchOperator},

// Section 4: compensations and checks
{TransformationName: "BooleanLiteralTransformation", Transformation: s.applyBooleanLiteralLowering},
Expand Down Expand Up @@ -789,3 +790,47 @@ func (s *SchemaCheckPass) Transform(queries []*model.Query) ([]*model.Query, err
}
return queries, nil
}

func (s *SchemaCheckPass) applyMatchOperator(indexSchema schema.Schema, query *model.Query) (*model.Query, error) {

visitor := model.NewBaseVisitor()

var err error

visitor.OverrideVisitInfix = func(b *model.BaseExprVisitor, e model.InfixExpr) interface{} {
lhs, ok := e.Left.(model.ColumnRef)
rhs, ok2 := e.Right.(model.LiteralExpr)

if ok && ok2 && e.Op == model.MatchOperator {
field, found := indexSchema.ResolveFieldByInternalName(lhs.ColumnName)
if !found {
logger.Error().Msgf("Field %s not found in schema for table %s, should never happen here", lhs.ColumnName, query.TableName)
}

rhsValue := rhs.Value.(string)
rhsValue = strings.TrimPrefix(rhsValue, "'")
rhsValue = strings.TrimSuffix(rhsValue, "'")

switch field.Type.String() {
case schema.QuesmaTypeInteger.Name, schema.QuesmaTypeLong.Name, schema.QuesmaTypeUnsignedLong.Name, schema.QuesmaTypeBoolean.Name:
return model.NewInfixExpr(lhs, "=", model.NewLiteral(rhsValue))
default:
return model.NewInfixExpr(lhs, "iLIKE", model.NewLiteral("'%"+rhsValue+"%'"))
}
}

return model.NewInfixExpr(e.Left.Accept(b).(model.Expr), e.Op, e.Right.Accept(b).(model.Expr))
}

expr := query.SelectCommand.Accept(visitor)

if err != nil {
return nil, err
}

if _, ok := expr.(*model.SelectCommand); ok {
query.SelectCommand = *expr.(*model.SelectCommand)
}
return query, nil

}
104 changes: 104 additions & 0 deletions quesma/quesma/schema_transformer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -979,3 +979,107 @@ func TestFullTextFields(t *testing.T) {
})
}
}

func Test_applyMatchOperator(t *testing.T) {
schemaTable := schema.Table{
Columns: map[string]schema.Column{
"message": {Name: "message", Type: "String"},
"count": {Name: "count", Type: "Int64"},
},
}

tests := []struct {
name string
query *model.Query
expected *model.Query
}{
{
name: "match operator transformation for String (ILIKE)",
query: &model.Query{
TableName: "test",
SelectCommand: model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{model.NewColumnRef("message")},
WhereClause: model.NewInfixExpr(
model.NewColumnRef("message"),
model.MatchOperator,
model.NewLiteral("'needle'"),
),
},
},
expected: &model.Query{
TableName: "test",
SelectCommand: model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{model.NewColumnRef("message")},
WhereClause: model.NewInfixExpr(
model.NewColumnRef("message"),
"iLIKE",
model.NewLiteral("'%needle%'"),
),
},
},
},
{
name: "match operator transformation for Int64 (=)",
query: &model.Query{
TableName: "test",
SelectCommand: model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{model.NewColumnRef("message")},
WhereClause: model.NewInfixExpr(
model.NewColumnRef("count"),
model.MatchOperator,
model.NewLiteral("'123'"),
),
},
},
expected: &model.Query{
TableName: "test",
SelectCommand: model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{model.NewColumnRef("message")},
WhereClause: model.NewInfixExpr(
model.NewColumnRef("count"),
"=",
model.NewLiteral("123"),
),
},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tableDiscovery :=
fixedTableProvider{tables: map[string]schema.Table{
"test": schemaTable,
}}

indexConfig := map[string]config.IndexConfiguration{
"test": {
Name: "test",
},
}

cfg := config.QuesmaConfiguration{
IndexConfig: indexConfig,
}

s := schema.NewSchemaRegistry(tableDiscovery, &cfg, clickhouse.SchemaTypeAdapter{})
transform := &SchemaCheckPass{cfg: &cfg}

indexSchema, ok := s.FindSchema("test")
if !ok {
t.Fatal("schema not found")
}

actual, err := transform.applyMatchOperator(indexSchema, tt.query)
if err != nil {
t.Fatal(err)
}

assert.Equal(t, model.AsString(tt.expected.SelectCommand), model.AsString(actual.SelectCommand))
})
}
}
18 changes: 9 additions & 9 deletions quesma/testdata/aggregation_requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1844,7 +1844,7 @@ var AggregationTests = []AggregationTestCase{
"@timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS "aggr__0__1__key_0",
count(*) AS "aggr__0__1__count"
FROM __quesma_table_name
WHERE ("host.name" iLIKE '%prometheus%' AND ("@timestamp">=
WHERE (("host.name" __quesma_match 'prometheus') AND ("@timestamp">=
fromUnixTimestamp64Milli(1706891809940) AND "@timestamp"<=
fromUnixTimestamp64Milli(1707496609940)))
GROUP BY "severity" AS "aggr__0__key_0",
Expand Down Expand Up @@ -2848,8 +2848,8 @@ var AggregationTests = []AggregationTestCase{
"@timestamp") AS "metric__earliest_timestamp_col_0", maxOrNull("@timestamp")
AS "metric__latest_timestamp_col_0"
FROM ` + TableName + `
WHERE ((` + fullTextFieldName + ` iLIKE '%posei%' AND "message" iLIKE '%User logged out%') AND
"host.name" iLIKE '%poseidon%')`,
WHERE ((` + fullTextFieldName + ` iLIKE '%posei%' AND ("message" __quesma_match 'User logged out')) AND
("host.name" __quesma_match 'poseidon'))`,
},
{ // [15]
TestName: "date_histogram: regression test",
Expand Down Expand Up @@ -6195,7 +6195,7 @@ var AggregationTests = []AggregationTestCase{
"aggr__0__1__parent_count", "message" AS "aggr__0__1__key_0",
count(*) AS "aggr__0__1__count"
FROM __quesma_table_name
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
GROUP BY "host.name" AS "aggr__0__key_0", "message" AS "aggr__0__1__key_0"))
WHERE ("aggr__0__order_1_rank"<=11 AND "aggr__0__1__order_1_rank"<=4)
ORDER BY "aggr__0__order_1_rank" ASC, "aggr__0__1__order_1_rank" ASC`,
Expand Down Expand Up @@ -6311,7 +6311,7 @@ var AggregationTests = []AggregationTestCase{
"aggr__0__1__2__parent_count", "message" AS "aggr__0__1__2__key_0",
count(*) AS "aggr__0__1__2__count"
FROM __quesma_table_name
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
GROUP BY "host.name" AS "aggr__0__key_0", "message" AS "aggr__0__1__key_0",
"message" AS "aggr__0__1__2__key_0"))
WHERE (("aggr__0__order_1_rank"<=11 AND "aggr__0__1__order_1_rank"<=4) AND
Expand Down Expand Up @@ -6403,7 +6403,7 @@ var AggregationTests = []AggregationTestCase{
sum(count(*)) OVER (PARTITION BY "aggr__0__key_0") AS "aggr__0__count",
"FlightDelayMin" AS "aggr__0__1__key_0", count(*) AS "aggr__0__1__count"
FROM ` + TableName + `
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
GROUP BY "host.name" AS "aggr__0__key_0",
"FlightDelayMin" AS "aggr__0__1__key_0"))
WHERE "aggr__0__order_1_rank"<=9
Expand Down Expand Up @@ -6513,7 +6513,7 @@ var AggregationTests = []AggregationTestCase{
sum(count(*)) OVER (PARTITION BY "aggr__0__key_0") AS "aggr__0__count",
"FlightDelayMin" AS "aggr__0__1__key_0", count(*) AS "aggr__0__1__count"
FROM ` + TableName + `
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
GROUP BY "host.name" AS "aggr__0__key_0",
"FlightDelayMin" AS "aggr__0__1__key_0"))
WHERE "aggr__0__order_1_rank"<=11
Expand Down Expand Up @@ -6610,7 +6610,7 @@ var AggregationTests = []AggregationTestCase{
sum(count(*)) OVER (PARTITION BY "aggr__0__key_0") AS "aggr__0__count",
"FlightDelayMin" AS "aggr__0__1__key_0", count(*) AS "aggr__0__1__count"
FROM __quesma_table_name
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
GROUP BY "host.name" AS "aggr__0__key_0",
"FlightDelayMin" AS "aggr__0__1__key_0"))
WHERE "aggr__0__order_1_rank"<=11
Expand Down Expand Up @@ -6826,7 +6826,7 @@ var AggregationTests = []AggregationTestCase{
count(*) AS "aggr__2__count",
sumOrNull("total") AS "metric__2__1_col_0"
FROM ` + TableName + `
WHERE NOT ((("abc">=0 AND "abc"<600) OR "type" iLIKE '%def%'))
WHERE NOT ((("abc">=0 AND "abc"<600) OR ("type" __quesma_match 'def')))
GROUP BY "name" AS "aggr__2__key_0"
ORDER BY "metric__2__1_col_0" DESC, "aggr__2__key_0" ASC
LIMIT 11`,
Expand Down
2 changes: 1 addition & 1 deletion quesma/testdata/clients/clover.go
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,7 @@ var CloverTests = []testdata.AggregationTestCase{
"field" AS "aggr__other-filter__3__key_0",
count(*) AS "aggr__other-filter__3__count"
FROM __quesma_table_name
WHERE ("a" iLIKE '%b%' AND "c" iLIKE '%d%')
WHERE (("a" __quesma_match 'b') AND ("c" __quesma_match 'd'))
GROUP BY "field" AS "aggr__other-filter__3__key_0"
ORDER BY "aggr__other-filter__3__count" DESC,
"aggr__other-filter__3__key_0" ASC
Expand Down
18 changes: 9 additions & 9 deletions quesma/testdata/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ var TestsSearch = []SearchTestCase{
},
"track_total_hits": false
}`,
[]string{`"host_name" iLIKE '%prometheus%'`},
[]string{`("host_name" __quesma_match 'prometheus')`},
model.ListAllFields,
[]string{`SELECT "message" FROM ` + TableName + ` WHERE "host_name" iLIKE '%prometheus%' LIMIT 10`},
[]string{},
Expand All @@ -1148,7 +1148,7 @@ var TestsSearch = []SearchTestCase{
"size": 100,
"track_total_hits": false
}`,
[]string{`((("message" iLIKE '%this%' OR "message" iLIKE '%is%') OR "message" iLIKE '%a%') OR "message" iLIKE '%test%')`},
[]string{`(((("message" __quesma_match 'this') OR ("message" __quesma_match 'is')) OR ("message" __quesma_match 'a')) OR ("message" __quesma_match 'test'))`},
model.ListAllFields,
[]string{
`SELECT "message" FROM ` + TableName + ` WHERE ((("message" iLIKE '%this%' OR "message" iLIKE '%is%') ` +
Expand Down Expand Up @@ -1405,7 +1405,7 @@ var TestsSearch = []SearchTestCase{
},
"track_total_hits": false
}`,
[]string{`"message" iLIKE '%this is a test%'`},
[]string{`("message" __quesma_match 'this is a test')`},
model.ListAllFields,
[]string{`SELECT "message" FROM ` + TableName + ` WHERE "message" iLIKE '%this is a test%'`},
[]string{},
Expand All @@ -1423,7 +1423,7 @@ var TestsSearch = []SearchTestCase{
},
"track_total_hits": false
}`,
[]string{`"message" iLIKE '%this is a test%'`},
[]string{`("message" __quesma_match 'this is a test')`},
model.ListAllFields,
[]string{`SELECT "message" FROM ` + TableName + ` WHERE "message" iLIKE '%this is a test%'`},
[]string{},
Expand Down Expand Up @@ -1687,9 +1687,9 @@ var TestsSearch = []SearchTestCase{
"track_total_hits": true
}`,
[]string{
`(("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
`((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491)))`,
`((("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
`(((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491))) ` +
`AND "stream.namespace" IS NOT NULL)`,
},
Expand Down Expand Up @@ -1847,10 +1847,10 @@ var TestsSearch = []SearchTestCase{
"timeout": "1000ms"
}`,
[]string{
`((("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
`(((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491))) ` +
`AND "namespace" IS NOT NULL)`,
`(("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
`((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491)))`,
},
model.Normal,
Expand Down Expand Up @@ -2085,7 +2085,7 @@ var TestsSearch = []SearchTestCase{
"track_total_hits": false,
"size": 12
}`,
[]string{`("message" iLIKE '%User logged out%' AND "message" iLIKE '%User logged out%')`},
[]string{`(("message" __quesma_match 'User logged out') AND ("message" __quesma_match 'User logged out'))`},
model.ListAllFields,
[]string{
`SELECT "message" ` +
Expand Down

0 comments on commit d085fa5

Please sign in to comment.