Skip to content

Commit

Permalink
Not escaping ' in iLIKE query properly (#1114)
Browse files Browse the repository at this point in the history
#1038

As new tests show for a little bit, the issue seems fixed. Instead of
doing nothing, I escape all `\` and `'` in all string literals. There
might be some edge case where it's not the correct behaviour, but I
thought for a bit and haven't found any.
Even in `regexp`, where we escape `_` (Clickhouse's equivalent of `.` in
regex), escaping it twice because of this change (sending `\\_` to
Clickhouse instead of `\_` before), seems to work just as fine.

Because of array joins issue, sample panel still doesn't work, but there
are no more any escaping error messages, like in the issue description.
So, when running on `main`, we get a lot of:
```
quesma-1                 | Dec 22 15:36:13.876 ERR quesma/quesma/dual_write_proxy.go:252 > quesma request failed: Q3006: Database query has failed. You may get more details in the Quesma console.  clickhouse: query failed. err: code: 62, message: Syntax error: failed at position 488 ('(') (line 1, col 488): (((arrayJoin("category") __quesma_match 'Men's Clothing')
```
But when running on this branch, we only have errors like this (notice
`'` properly escaped):
```
quesma-1                 | Dec 22 15:32:01.714 ERR quesma/quesma/dual_write_proxy.go:252 > quesma request failed: Q3006: Database query has failed. You may get more details in the Quesma console.  clickhouse: query failed. err: code: 62, message: Syntax error: failed at position 278 ('__quesma_match') (line 1, col 278): __quesma_match 'Men\'s Clothing')
```

**Additional info:**
We discussed with Rafał some other way to make it work: stop sending any
strings in the query itself, but pass all of them as parameters, where
the driver will take care of any escaping issues, etc.
It's probably a good idea for the future, but would require some
non-trivial amount of work, unlike this straightforward solution, which
I already had done.
  • Loading branch information
trzysiek authored Dec 28, 2024
1 parent c5d1baa commit 9173255
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 25 deletions.
8 changes: 5 additions & 3 deletions quesma/logger/log_with_throttling.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"time"
)

// throttleMap: (reason name -> last logged time)
// We log only once per throttleDuration for each reason name, so that we don't spam the logs.
var throttleMap = util.NewSyncMap[string, time.Time]()

const throttleDuration = 30 * time.Minute
Expand All @@ -28,11 +30,11 @@ func WarnWithCtxAndThrottling(ctx context.Context, aggrName, paramName, format s

// WarnWithThrottling - logs a warning message with throttling.
// We only log once per throttleDuration for each warnName, so that we don't spam the logs.
func WarnWithThrottling(warnName, format string, v ...any) {
timestamp, ok := throttleMap.Load(warnName)
func WarnWithThrottling(reasonName, format string, v ...any) {
timestamp, ok := throttleMap.Load(reasonName)
weThrottle := ok && time.Since(timestamp) < throttleDuration
if !weThrottle {
Warn().Msgf(format, v...)
throttleMap.Store(warnName, time.Now())
throttleMap.Store(reasonName, time.Now())
}
}
6 changes: 3 additions & 3 deletions quesma/model/base_visitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ func (v *BaseExprVisitor) VisitLiteral(e LiteralExpr) interface{} {
return NewLiteral(e.Value)
}

func (v *BaseExprVisitor) VisitTuple(e TupleExpr) interface{} {
func (v *BaseExprVisitor) VisitTuple(t TupleExpr) interface{} {
if v.OverrideVisitTuple != nil {
return v.OverrideVisitTuple(v, e)
return v.OverrideVisitTuple(v, t)
}
return NewTupleExpr(v.VisitChildren(e.Exprs)...)
return NewTupleExpr(v.VisitChildren(t.Exprs)...)
}

func (v *BaseExprVisitor) VisitInfix(e InfixExpr) interface{} {
Expand Down
1 change: 1 addition & 0 deletions quesma/model/bucket_aggregations/terms.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"reflect"
)

// TODO when adding include/exclude, check escaping of ' and \ in those fields
type Terms struct {
ctx context.Context
significant bool // true <=> significant_terms, false <=> terms
Expand Down
19 changes: 18 additions & 1 deletion quesma/model/expr_string_renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"quesma/logger"
"quesma/quesma/types"
"quesma/util"
"regexp"
"sort"
"strconv"
Expand Down Expand Up @@ -66,7 +67,12 @@ func (v *renderer) VisitFunction(e FunctionExpr) interface{} {
}

func (v *renderer) VisitLiteral(l LiteralExpr) interface{} {
return fmt.Sprintf("%v", l.Value)
switch val := l.Value.(type) {
case string:
return escapeString(val)
default:
return fmt.Sprintf("%v", val)
}
}

func (v *renderer) VisitTuple(t TupleExpr) interface{} {
Expand Down Expand Up @@ -349,3 +355,14 @@ func (v *renderer) VisitJoinExpr(j JoinExpr) interface{} {
func (v *renderer) VisitCTE(c CTE) interface{} {
return fmt.Sprintf("%s AS (%s) ", c.Name, AsString(c.SelectCommand))
}

// escapeString escapes the given string so that it can be used in a SQL Clickhouse query.
// It escapes ' and \ characters: ' -> \', \ -> \\.
func escapeString(s string) string {
s = strings.ReplaceAll(s, `\`, `\\`) // \ should be escaped with no exceptions
if len(s) >= 2 && s[0] == '\'' && s[len(s)-1] == '\'' {
// don't escape the first and last '
return util.SingleQuote(strings.ReplaceAll(s[1:len(s)-1], `'`, `\'`))
}
return strings.ReplaceAll(s, `'`, `\'`)
}
8 changes: 4 additions & 4 deletions quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -513,12 +513,12 @@ func (cw *ClickhouseQueryTranslator) parseTerms(queryMap QueryMap) model.SimpleQ
simpleStatement := model.NewInfixExpr(model.NewColumnRef(k), "=", model.NewLiteral(sprint(vAsArray[0])))
return model.NewSimpleQuery(simpleStatement, true)
}
values := make([]string, len(vAsArray))
values := make([]model.Expr, len(vAsArray))
for i, v := range vAsArray {
values[i] = sprint(v)
values[i] = model.NewLiteral(sprint(v))
}
combinedValues := "(" + strings.Join(values, ",") + ")"
compoundStatement := model.NewInfixExpr(model.NewColumnRef(k), "IN", model.NewLiteral(combinedValues))
tuple := model.NewTupleExpr(values...)
compoundStatement := model.NewInfixExpr(model.NewColumnRef(k), "IN", tuple)
return model.NewSimpleQuery(compoundStatement, true)
}

Expand Down
78 changes: 74 additions & 4 deletions quesma/testdata/aggregation_requests_2.go
Original file line number Diff line number Diff line change
Expand Up @@ -4691,15 +4691,14 @@ var AggregationTests2 = []AggregationTestCase{
},
{ // [70]
TestName: "simplest terms with exclude (array of values)",
// TODO add ' somewhere in exclude after the merge!
QueryRequestJson: `
{
"aggs": {
"1": {
"terms": {
"field": "chess_goat",
"size": 2,
"exclude": ["Carlsen", "Kasparov", "Fis._er*"]
"exclude": ["Carlsen", "Kasparov", "Fis._er'*"]
}
}
},
Expand Down Expand Up @@ -4740,10 +4739,10 @@ var AggregationTests2 = []AggregationTestCase{
},
ExpectedPancakeSQL: `
SELECT sum(count(*)) OVER () AS "aggr__1__parent_count",
if("chess_goat" NOT IN tuple('Carlsen', 'Kasparov', 'Fis._er*'), "chess_goat", NULL)
if("chess_goat" NOT IN tuple('Carlsen', 'Kasparov', 'Fis._er\'*'), "chess_goat", NULL)
AS "aggr__1__key_0", count(*) AS "aggr__1__count"
FROM __quesma_table_name
GROUP BY if("chess_goat" NOT IN tuple('Carlsen', 'Kasparov', 'Fis._er*'), "chess_goat", NULL) AS "aggr__1__key_0"
GROUP BY if("chess_goat" NOT IN tuple('Carlsen', 'Kasparov', 'Fis._er\'*'), "chess_goat", NULL) AS "aggr__1__key_0"
ORDER BY "aggr__1__count" DESC, "aggr__1__key_0" ASC
LIMIT 3`,
},
Expand Down Expand Up @@ -5290,4 +5289,75 @@ var AggregationTests2 = []AggregationTestCase{
ORDER BY "aggr__terms2__count" DESC, "aggr__terms2__key_0" ASC
LIMIT 3`},
},
{ // [77]
TestName: `Escaping of ', \, \n, and \t in some example aggregations. No tests for other escape characters, e.g. \r or 'b. Add if needed.`,
QueryRequestJson: `
{
"aggs": {
"avg": {
"avg": {
"field": "@timestamp's\\"
}
},
"terms": {
"terms": {
"field": "agent.keyword",
"size": 1,
"missing": "quote ' and slash \\ Also \t \n"
}
}
},
"size": 0
}`,
ExpectedResponse: `
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"aggregations": {
"avg": {
"value": null
},
"terms": {
"buckets": [
{
"doc_count": 5362,
"key": "Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421 Firefox/6.0a1"
}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 8712
}
},
"hits": {
"hits": [],
"max_score": null
},
"timed_out": false,
"took": 5
}`,
ExpectedPancakeResults: []model.QueryResultRow{
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("metric__avg_col_0", nil),
model.NewQueryResultCol("aggr__terms__parent_count", int64(14074)),
model.NewQueryResultCol("aggr__terms__key_0", "Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421 Firefox/6.0a1"),
model.NewQueryResultCol("aggr__terms__count", int64(5362)),
}},
},
ExpectedPancakeSQL: `
SELECT avgOrNullMerge(avgOrNullState("@timestamp's\\")) OVER () AS
"metric__avg_col_0", sum(count(*)) OVER () AS "aggr__terms__parent_count",
COALESCE("agent", 'quote \' and slash \\ Also
') AS "aggr__terms__key_0",
count(*) AS "aggr__terms__count"
FROM __quesma_table_name
GROUP BY COALESCE("agent", 'quote \' and slash \\ Also
') AS
"aggr__terms__key_0"
ORDER BY "aggr__terms__count" DESC, "aggr__terms__key_0" ASC
LIMIT 1`,
},
}
44 changes: 35 additions & 9 deletions quesma/testdata/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1006,18 +1006,18 @@ var TestsSearch = []SearchTestCase{
},
{
"terms": {
"task.enabled": [true, 54]
"task.enabled": [true, 54, "abc", "abc's"]
}
}
]
}
},
"track_total_hits": true
}`,
[]string{`("type"='task' AND "task.enabled" IN (true,54))`},
[]string{`("type"='task' AND "task.enabled" IN tuple(true, 54, 'abc', 'abc\'s'))`},
model.ListAllFields,
[]string{
`SELECT "message" FROM ` + TableName + ` WHERE ("type"='task' AND "task.enabled" IN (true,54)) LIMIT 10`,
`SELECT "message" FROM ` + TableName + ` WHERE ("type"='task' AND "task.enabled" IN tuple(true, 54, 'abc', 'abc\\'s')) LIMIT 10`,
`SELECT count(*) AS "column_0" FROM ` + TableName,
},
[]string{},
Expand Down Expand Up @@ -2196,13 +2196,13 @@ var TestsSearch = []SearchTestCase{
},
"track_total_hits": false
}`,
[]string{`("cliIP" IN ('2601:204:c503:c240:9c41:5531:ad94:4d90','50.116.43.98','75.246.0.64') AND ("@timestamp">=fromUnixTimestamp64Milli(1715817600000) AND "@timestamp"<=fromUnixTimestamp64Milli(1715990399000)))`},
[]string{`("cliIP" IN tuple('2601:204:c503:c240:9c41:5531:ad94:4d90', '50.116.43.98', '75.246.0.64') AND ("@timestamp">=fromUnixTimestamp64Milli(1715817600000) AND "@timestamp"<=fromUnixTimestamp64Milli(1715990399000)))`},
model.ListAllFields,
//[]model.Query{withLimit(justSimplestWhere(`("cliIP" IN ('2601:204:c503:c240:9c41:5531:ad94:4d90','50.116.43.98','75.246.0.64') AND ("@timestamp">=parseDateTime64BestEffort('2024-05-16T00:00:00') AND "@timestamp"<=parseDateTime64BestEffort('2024-05-17T23:59:59')))`), 1)},
[]string{
`SELECT "message" ` +
`FROM ` + TableName + ` ` +
`WHERE ("cliIP" IN ('2601:204:c503:c240:9c41:5531:ad94:4d90','50.116.43.98','75.246.0.64') ` +
`WHERE ("cliIP" IN tuple('2601:204:c503:c240:9c41:5531:ad94:4d90', '50.116.43.98', '75.246.0.64') ` +
`AND ("@timestamp">=fromUnixTimestamp64Milli(1715817600000) AND "@timestamp"<=fromUnixTimestamp64Milli(1715990399000))) ` +
`LIMIT 1`,
},
Expand Down Expand Up @@ -2254,12 +2254,14 @@ var TestsSearch = []SearchTestCase{
},
"track_total_hits": false
}`,
[]string{`"field" LIKE '%\___'`},
// Escaping _ twice ("\\_") seemed wrong, but it actually works in Clickhouse!
// \\\\ means 2 escaped backslashes, actual returned string is "\\"
[]string{`"field" LIKE '%\\___'`},
model.ListAllFields,
[]string{
`SELECT "message" ` +
`FROM ` + TableName + ` ` +
`WHERE "field" LIKE '%\\___' ` +
`WHERE "field" LIKE '%\\\\___' ` +
`LIMIT 10`,
},
[]string{},
Expand Down Expand Up @@ -2321,6 +2323,30 @@ var TestsSearch = []SearchTestCase{
[]string{},
},
{ // [40]
`Escaping of ', \, \t and \n`,
`
{
"query": {
"bool": {
"filter": [
{
"match_phrase": {
"message": "\nMen's Clothing \\ \t"
}
}
]
}
},
"track_total_hits": false
}`,
[]string{`("message" __quesma_match '
Men\'s Clothing \\ ')`},
model.ListAllFields,
[]string{`SELECT "message" FROM ` + TableName + ` WHERE "message" iLIKE '%
Men\\'s Clothing \\\\ %' LIMIT 10`},
[]string{},
},
{ // [41]
"ids, 0 values",
`{
"query": {
Expand All @@ -2340,7 +2366,7 @@ var TestsSearch = []SearchTestCase{
},
[]string{},
},
{ // [41]
{ // [42]
"ids, 1 value",
`{
"query": {
Expand All @@ -2360,7 +2386,7 @@ var TestsSearch = []SearchTestCase{
},
[]string{},
},
{ // [42]
{ // [43]
"ids, 2+ values",
`{
"query": {
Expand Down
7 changes: 6 additions & 1 deletion quesma/util/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,11 @@ func ExtractNumeric64(value any) float64 {
return asFloat64
}

// SingleQuote is a simple helper function: str -> 'str'
func SingleQuote(value string) string {
return "'" + value + "'"
}

type sqlMockMismatchSql struct {
expected string
actual string
Expand Down Expand Up @@ -847,7 +852,7 @@ func stringifyHelper(v interface{}, isInsideArray bool) string {

// This functions returns a string from an interface{}.
func Stringify(v interface{}) string {
isInsideArray := false
const isInsideArray = false
return stringifyHelper(v, isInsideArray)
}

Expand Down

0 comments on commit 9173255

Please sign in to comment.