diff --git a/quesma/go.mod b/quesma/go.mod index 932de804b..211a28210 100644 --- a/quesma/go.mod +++ b/quesma/go.mod @@ -32,6 +32,7 @@ require ( github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/pkg/errors v0.9.1 // indirect + github.com/relvacode/iso8601 v1.4.0 // indirect github.com/tailscale/hujson v0.0.0-20221223112325-20486734a56a // indirect golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect ) diff --git a/quesma/go.sum b/quesma/go.sum index f04b6442e..db48339a3 100644 --- a/quesma/go.sum +++ b/quesma/go.sum @@ -103,6 +103,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/relvacode/iso8601 v1.4.0 h1:GsInVSEJfkYuirYFxa80nMLbH2aydgZpIf52gYZXUJs= +github.com/relvacode/iso8601 v1.4.0/go.mod h1:FlNp+jz+TXpyRqgmM7tnzHHzBnz776kmAH2h3sZCn0I= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index f3951d380..96c9800f5 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -2,19 +2,19 @@ package queryparser import ( "encoding/json" - wc "mitmproxy/quesma/queryparser/where_clause" - "mitmproxy/quesma/quesma/types" - "fmt" "github.com/k0kubun/pp" + "github.com/relvacode/iso8601" "mitmproxy/quesma/clickhouse" "mitmproxy/quesma/logger" "mitmproxy/quesma/model" "mitmproxy/quesma/queryparser/lucene" "mitmproxy/quesma/queryparser/query_util" + wc "mitmproxy/quesma/queryparser/where_clause" + "mitmproxy/quesma/quesma/types" + "mitmproxy/quesma/util" "strconv" "strings" - "time" "unicode" ) @@ -777,7 +777,9 @@ func (cw *ClickhouseQueryTranslator) parseRange(queryMap QueryMap) model.SimpleQ isDatetimeInDefaultFormat = false } - for op, v := range v.(QueryMap) { + keysSorted := util.MapKeysSorted(v.(QueryMap)) + for _, op := range keysSorted { + v := v.(QueryMap)[op] var fieldToPrint, timeFormatFuncName string var valueToCompare wc.Statement fieldType := cw.Table.GetDateTimeType(cw.Ctx, field) @@ -793,7 +795,7 @@ func (cw *ClickhouseQueryTranslator) parseRange(queryMap QueryMap) model.SimpleQ if dateTime, ok := v.(string); ok { // if it's a date, we need to parse it to Clickhouse's DateTime format // how to check if it does not contain date math expression? - if _, err := time.Parse(time.RFC3339Nano, dateTime); err == nil { + if _, err := iso8601.ParseString(dateTime); err == nil { vToPrint, timeFormatFuncName = cw.parseDateTimeString(cw.Table, field, dateTime) // TODO Investigate the quotation below valueToCompare = wc.NewFunction(timeFormatFuncName, wc.NewLiteral(fmt.Sprintf("'%s'", dateTime))) diff --git a/quesma/queryparser/query_parser_range_test.go b/quesma/queryparser/query_parser_range_test.go new file mode 100644 index 000000000..954038ee7 --- /dev/null +++ b/quesma/queryparser/query_parser_range_test.go @@ -0,0 +1,91 @@ +package queryparser + +import ( + "context" + "github.com/stretchr/testify/assert" + "mitmproxy/quesma/clickhouse" + "mitmproxy/quesma/concurrent" + "mitmproxy/quesma/quesma/config" + "testing" +) + +type parseRangeTest struct { + name string + rangePartOfQuery QueryMap + createTableQuery string + expectedWhere string +} + +var parseRangeTests = []parseRangeTest{ + { + "DateTime64", + QueryMap{ + "timestamp": QueryMap{ + "format": "strict_date_optional_time", + "gte": "2024-02-02T13:47:16.029Z", + "lte": "2024-02-09T13:47:16.029Z", + }, + }, + `CREATE TABLE ` + tableName + ` + ( "message" String, "timestamp" DateTime64(3, 'UTC') ) + ENGINE = Memory`, + `"timestamp">=parseDateTime64BestEffort('2024-02-02T13:47:16.029Z') AND "timestamp"<=parseDateTime64BestEffort('2024-02-09T13:47:16.029Z')`, + }, + { + "parseDateTimeBestEffort", + QueryMap{ + "timestamp": QueryMap{ + "format": "strict_date_optional_time", + "gte": "2024-02-02T13:47:16.029Z", + "lte": "2024-02-09T13:47:16.029Z", + }, + }, + `CREATE TABLE ` + tableName + ` + ( "message" String, "timestamp" DateTime ) + ENGINE = Memory`, + `"timestamp">=parseDateTimeBestEffort('2024-02-02T13:47:16.029Z') AND "timestamp"<=parseDateTimeBestEffort('2024-02-09T13:47:16.029Z')`, + }, + { + "numeric range", + QueryMap{ + "time_taken": QueryMap{ + "gt": "100", + }, + }, + `CREATE TABLE ` + tableName + ` + ( "message" String, "timestamp" DateTime, "time_taken" UInt32 ) + ENGINE = Memory`, + `"time_taken">100`, + }, + { + "DateTime64", + QueryMap{ + "timestamp": QueryMap{ + "format": "strict_date_optional_time", + "gte": "2024-02-02T13:47:16", + "lte": "2024-02-09T13:47:16", + }, + }, + `CREATE TABLE ` + tableName + ` + ( "message" String, "timestamp" DateTime64(3, 'UTC') ) + ENGINE = Memory`, + `"timestamp">=parseDateTime64BestEffort('2024-02-02T13:47:16') AND "timestamp"<=parseDateTime64BestEffort('2024-02-09T13:47:16')`, + }, +} + +func Test_parseRange(t *testing.T) { + for _, test := range parseRangeTests { + t.Run(test.name, func(t *testing.T) { + table, err := clickhouse.NewTable(test.createTableQuery, clickhouse.NewNoTimestampOnlyStringAttrCHConfig()) + if err != nil { + t.Fatal(err) + } + assert.NoError(t, err) + lm := clickhouse.NewLogManager(concurrent.NewMapWith(tableName, table), config.QuesmaConfiguration{}) + cw := ClickhouseQueryTranslator{ClickhouseLM: lm, Table: table, Ctx: context.Background()} + + whereClause := cw.parseRange(test.rangePartOfQuery).Sql.Stmt + assert.Equal(t, test.expectedWhere, whereClause) + }) + } +} diff --git a/quesma/queryparser/query_parser_test.go b/quesma/queryparser/query_parser_test.go index 4d2b165ef..dca930500 100644 --- a/quesma/queryparser/query_parser_test.go +++ b/quesma/queryparser/query_parser_test.go @@ -54,9 +54,14 @@ func TestQueryParserStringAttrConfig(t *testing.T) { t.Run(tt.Name, func(t *testing.T) { simpleQuery, queryInfo, _, _ := cw.ParseQueryInternal(tt.QueryJson) assert.True(t, simpleQuery.CanParse, "can parse") + simpleQuery, queryInfo, _, _ = cw.ParseQueryInternal(tt.QueryJson) assert.Contains(t, tt.WantedSql, simpleQuery.Sql.Stmt, "contains wanted sql") assert.Equal(t, tt.WantedQueryType, queryInfo.Typ, "equals to wanted query type") - query := cw.BuildNRowsQuery("*", simpleQuery, model.DefaultSizeListQuery) + size := model.DefaultSizeListQuery + if queryInfo.Size != 0 { + size = queryInfo.Size + } + query := cw.BuildNRowsQuery("*", simpleQuery, size) assert.Contains(t, tt.WantedQuery, *query) // Test the new WhereStatement if simpleQuery.Sql.WhereStatement != nil { @@ -336,77 +341,6 @@ func TestNew(t *testing.T) { } } -// Test_parseRange tests if DateTime64 field properly uses Clickhouse's 'parseDateTime64BestEffort' function -func Test_parseRange_DateTime64(t *testing.T) { - rangePartOfQuery := QueryMap{ - "timestamp": QueryMap{ - "format": "strict_date_optional_time", - "gte": "2024-02-02T13:47:16.029Z", - "lte": "2024-02-09T13:47:16.029Z", - }, - } - table, err := clickhouse.NewTable(`CREATE TABLE `+tableName+` - ( "message" String, "timestamp" DateTime64(3, 'UTC') ) - ENGINE = Memory`, - clickhouse.NewNoTimestampOnlyStringAttrCHConfig(), - ) - if err != nil { - t.Fatal(err) - } - lm := clickhouse.NewLogManager(concurrent.NewMapWith(tableName, table), config.QuesmaConfiguration{}) - cw := ClickhouseQueryTranslator{ClickhouseLM: lm, Table: table, Ctx: context.Background()} - - whereClause := cw.parseRange(rangePartOfQuery).Sql.Stmt - split := strings.Split(whereClause, "parseDateTime64BestEffort") - assert.Len(t, split, 3) -} - -// Test_parseRange tests if DateTime field properly uses Clickhouse's 'parseDateTimeBestEffort' function -func Test_parseRange_DateTime(t *testing.T) { - rangePartOfQuery := QueryMap{ - "timestamp": QueryMap{ - "format": "strict_date_optional_time", - "gte": "2024-02-02T13:47:16.029Z", - "lte": "2024-02-09T13:47:16.029Z", - }, - } - table, err := clickhouse.NewTable(`CREATE TABLE `+tableName+` - ( "message" String, "timestamp" DateTime ) - ENGINE = Memory`, - clickhouse.NewNoTimestampOnlyStringAttrCHConfig(), - ) - if err != nil { - t.Fatal(err) - } - lm := clickhouse.NewLogManager(concurrent.NewMapWith(tableName, table), config.QuesmaConfiguration{}) - cw := ClickhouseQueryTranslator{ClickhouseLM: lm, Table: table, Ctx: context.Background()} - - whereClause := cw.parseRange(rangePartOfQuery).Sql.Stmt - split := strings.Split(whereClause, "parseDateTimeBestEffort") - assert.Len(t, split, 3) -} - -func Test_parseRange_numeric(t *testing.T) { - rangePartOfQuery := QueryMap{ - "time_taken": QueryMap{ - "gt": "100", - }, - } - table, err := clickhouse.NewTable(`CREATE TABLE `+tableName+` - ( "message" String, "timestamp" DateTime, "time_taken" UInt32 ) - ENGINE = Memory`, - clickhouse.NewNoTimestampOnlyStringAttrCHConfig(), - ) - if err != nil { - t.Fatal(err) - } - lm := clickhouse.NewLogManager(concurrent.NewMapWith(tableName, table), config.QuesmaConfiguration{}) - cw := ClickhouseQueryTranslator{ClickhouseLM: lm, Table: table, Ctx: context.Background()} - - whereClause := cw.parseRange(rangePartOfQuery).Sql.Stmt - assert.Equal(t, "\"time_taken\">100", whereClause) -} - func Test_parseSortFields(t *testing.T) { tests := []struct { name string diff --git a/quesma/quesma/search_test.go b/quesma/quesma/search_test.go index 661393bb8..107e3f986 100644 --- a/quesma/quesma/search_test.go +++ b/quesma/quesma/search_test.go @@ -74,13 +74,6 @@ func TestAsyncSearchHandler(t *testing.T) { for i, tt := range testdata.TestsAsyncSearch { t.Run(strconv.Itoa(i)+tt.Name, func(t *testing.T) { db, mock, err := sqlmock.New() - if tt.Name == "Histogram: possible query nr 2" { - queryMatcher := sqlmock.QueryMatcherFunc(func(expectedSQL, actualSQL string) error { - fmt.Printf("JM SQL: %s\n", actualSQL) - return sqlmock.QueryMatcherRegexp.Match(expectedSQL, actualSQL) - }) - db, mock, err = sqlmock.New(sqlmock.QueryMatcherOption(queryMatcher)) - } if err != nil { t.Fatal(err) } @@ -172,6 +165,7 @@ func TestSearchHandler(t *testing.T) { cfg := config.QuesmaConfiguration{IndexConfig: map[string]config.IndexConfiguration{tableName: {Enabled: true}}} for _, tt := range testdata.TestsSearch { t.Run(tt.Name, func(t *testing.T) { + db, mock, err := sqlmock.New() if err != nil { t.Fatal(err) diff --git a/quesma/testdata/requests.go b/quesma/testdata/requests.go index 2c6b9140f..da2ede08a 100644 --- a/quesma/testdata/requests.go +++ b/quesma/testdata/requests.go @@ -1781,7 +1781,7 @@ var TestsSearch = []SearchTestCase{ }`, []string{""}, model.ListByField, - []model.Query{newSimplestQuery()}, + []model.Query{withLimit(newSimplestQuery(), 500)}, []string{`SELECT "message" FROM "logs-generic-default" LIMIT 500`}, }, { // [26] @@ -1952,6 +1952,39 @@ var TestsSearch = []SearchTestCase{ []model.Query{justSimplestWhere(`"user.id"='kimchy'`)}, []string{qToStr(justSimplestWhere(`"user.id"='kimchy'`))}, }, + { // [35] terms with range + "Terms with range", + `{ + "size": 1, + "query": { + "bool": { + "filter": [ + { + "terms": { + "cliIP": [ + "2601:204:c503:c240:9c41:5531:ad94:4d90", + "50.116.43.98", + "75.246.0.64" + ] + } + }, + { + "range": { + "@timestamp": { + "gte": "2024-05-16T00:00:00", + "lte": "2024-05-17T23:59:59" + } + } + } + ] + } + } + }`, + []string{`("cliIP"='2601:204:c503:c240:9c41:5531:ad94:4d90' OR "cliIP"='50.116.43.98' OR "cliIP"='75.246.0.64') AND ("@timestamp">=parseDateTime64BestEffort('2024-05-16T00:00:00') AND "@timestamp"<=parseDateTime64BestEffort('2024-05-17T23:59:59'))`}, + model.Normal, + []model.Query{withLimit(justSimplestWhere(`("cliIP"='2601:204:c503:c240:9c41:5531:ad94:4d90' OR "cliIP"='50.116.43.98' OR "cliIP"='75.246.0.64') AND ("@timestamp">=parseDateTime64BestEffort('2024-05-16T00:00:00') AND "@timestamp"<=parseDateTime64BestEffort('2024-05-17T23:59:59'))`), 1)}, + []string{qToStr(withLimit(justSimplestWhere(`("cliIP"='2601:204:c503:c240:9c41:5531:ad94:4d90' OR "cliIP"='50.116.43.98' OR "cliIP"='75.246.0.64') AND ("@timestamp">=parseDateTime64BestEffort('2024-05-16T00:00:00') AND "@timestamp"<=parseDateTime64BestEffort('2024-05-17T23:59:59'))`), 1))}, + }, } var TestsSearchNoAttrs = []SearchTestCase{ diff --git a/quesma/testdata/util.go b/quesma/testdata/util.go index fe651c793..f66f3e5ed 100644 --- a/quesma/testdata/util.go +++ b/quesma/testdata/util.go @@ -64,6 +64,11 @@ func justSimplestWhere(whereClause string) model.Query { return query } +func withLimit(query model.Query, limit int) model.Query { + query.SuffixClauses = []string{"LIMIT " + strconv.Itoa(limit)} + return query +} + // EscapeBrackets is a simple helper function used in sqlmock's tests. // Example usage: sqlmock.ExpectQuery(EscapeBrackets(`SELECT count() FROM "logs-generic-default" WHERE `)) func EscapeBrackets(s string) string {