Skip to content

Commit

Permalink
Support timestamp in iso 8601 format (#210)
Browse files Browse the repository at this point in the history
Why:
- our prospect got many failed queries into that trap
- Elastic and Opensearch support [ISO
8601](https://en.wikipedia.org/wiki/ISO_8601)
- Clickhouse also support it as part of
https://clickhouse.com/docs/en/sql-reference/functions/type-conversion-functions#parsedatetimebesteffort
- Go native support of ISO 8601 is poor, but there is good library for
that
  • Loading branch information
jakozaur authored May 24, 2024
1 parent e0fc128 commit dc8c289
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 86 deletions.
1 change: 1 addition & 0 deletions quesma/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ require (
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/relvacode/iso8601 v1.4.0 // indirect
github.com/tailscale/hujson v0.0.0-20221223112325-20486734a56a // indirect
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
)
Expand Down
2 changes: 2 additions & 0 deletions quesma/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/relvacode/iso8601 v1.4.0 h1:GsInVSEJfkYuirYFxa80nMLbH2aydgZpIf52gYZXUJs=
github.com/relvacode/iso8601 v1.4.0/go.mod h1:FlNp+jz+TXpyRqgmM7tnzHHzBnz776kmAH2h3sZCn0I=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
Expand Down
14 changes: 8 additions & 6 deletions quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@ package queryparser

import (
"encoding/json"
wc "mitmproxy/quesma/queryparser/where_clause"
"mitmproxy/quesma/quesma/types"

"fmt"
"github.com/k0kubun/pp"
"github.com/relvacode/iso8601"
"mitmproxy/quesma/clickhouse"
"mitmproxy/quesma/logger"
"mitmproxy/quesma/model"
"mitmproxy/quesma/queryparser/lucene"
"mitmproxy/quesma/queryparser/query_util"
wc "mitmproxy/quesma/queryparser/where_clause"
"mitmproxy/quesma/quesma/types"
"mitmproxy/quesma/util"
"strconv"
"strings"
"time"
"unicode"
)

Expand Down Expand Up @@ -777,7 +777,9 @@ func (cw *ClickhouseQueryTranslator) parseRange(queryMap QueryMap) model.SimpleQ
isDatetimeInDefaultFormat = false
}

for op, v := range v.(QueryMap) {
keysSorted := util.MapKeysSorted(v.(QueryMap))
for _, op := range keysSorted {
v := v.(QueryMap)[op]
var fieldToPrint, timeFormatFuncName string
var valueToCompare wc.Statement
fieldType := cw.Table.GetDateTimeType(cw.Ctx, field)
Expand All @@ -793,7 +795,7 @@ func (cw *ClickhouseQueryTranslator) parseRange(queryMap QueryMap) model.SimpleQ
if dateTime, ok := v.(string); ok {
// if it's a date, we need to parse it to Clickhouse's DateTime format
// how to check if it does not contain date math expression?
if _, err := time.Parse(time.RFC3339Nano, dateTime); err == nil {
if _, err := iso8601.ParseString(dateTime); err == nil {
vToPrint, timeFormatFuncName = cw.parseDateTimeString(cw.Table, field, dateTime)
// TODO Investigate the quotation below
valueToCompare = wc.NewFunction(timeFormatFuncName, wc.NewLiteral(fmt.Sprintf("'%s'", dateTime)))
Expand Down
91 changes: 91 additions & 0 deletions quesma/queryparser/query_parser_range_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package queryparser

import (
"context"
"github.com/stretchr/testify/assert"
"mitmproxy/quesma/clickhouse"
"mitmproxy/quesma/concurrent"
"mitmproxy/quesma/quesma/config"
"testing"
)

type parseRangeTest struct {
name string
rangePartOfQuery QueryMap
createTableQuery string
expectedWhere string
}

var parseRangeTests = []parseRangeTest{
{
"DateTime64",
QueryMap{
"timestamp": QueryMap{
"format": "strict_date_optional_time",
"gte": "2024-02-02T13:47:16.029Z",
"lte": "2024-02-09T13:47:16.029Z",
},
},
`CREATE TABLE ` + tableName + `
( "message" String, "timestamp" DateTime64(3, 'UTC') )
ENGINE = Memory`,
`"timestamp">=parseDateTime64BestEffort('2024-02-02T13:47:16.029Z') AND "timestamp"<=parseDateTime64BestEffort('2024-02-09T13:47:16.029Z')`,
},
{
"parseDateTimeBestEffort",
QueryMap{
"timestamp": QueryMap{
"format": "strict_date_optional_time",
"gte": "2024-02-02T13:47:16.029Z",
"lte": "2024-02-09T13:47:16.029Z",
},
},
`CREATE TABLE ` + tableName + `
( "message" String, "timestamp" DateTime )
ENGINE = Memory`,
`"timestamp">=parseDateTimeBestEffort('2024-02-02T13:47:16.029Z') AND "timestamp"<=parseDateTimeBestEffort('2024-02-09T13:47:16.029Z')`,
},
{
"numeric range",
QueryMap{
"time_taken": QueryMap{
"gt": "100",
},
},
`CREATE TABLE ` + tableName + `
( "message" String, "timestamp" DateTime, "time_taken" UInt32 )
ENGINE = Memory`,
`"time_taken">100`,
},
{
"DateTime64",
QueryMap{
"timestamp": QueryMap{
"format": "strict_date_optional_time",
"gte": "2024-02-02T13:47:16",
"lte": "2024-02-09T13:47:16",
},
},
`CREATE TABLE ` + tableName + `
( "message" String, "timestamp" DateTime64(3, 'UTC') )
ENGINE = Memory`,
`"timestamp">=parseDateTime64BestEffort('2024-02-02T13:47:16') AND "timestamp"<=parseDateTime64BestEffort('2024-02-09T13:47:16')`,
},
}

func Test_parseRange(t *testing.T) {
for _, test := range parseRangeTests {
t.Run(test.name, func(t *testing.T) {
table, err := clickhouse.NewTable(test.createTableQuery, clickhouse.NewNoTimestampOnlyStringAttrCHConfig())
if err != nil {
t.Fatal(err)
}
assert.NoError(t, err)
lm := clickhouse.NewLogManager(concurrent.NewMapWith(tableName, table), config.QuesmaConfiguration{})
cw := ClickhouseQueryTranslator{ClickhouseLM: lm, Table: table, Ctx: context.Background()}

whereClause := cw.parseRange(test.rangePartOfQuery).Sql.Stmt
assert.Equal(t, test.expectedWhere, whereClause)
})
}
}
78 changes: 6 additions & 72 deletions quesma/queryparser/query_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,14 @@ func TestQueryParserStringAttrConfig(t *testing.T) {
t.Run(tt.Name, func(t *testing.T) {
simpleQuery, queryInfo, _, _ := cw.ParseQueryInternal(tt.QueryJson)
assert.True(t, simpleQuery.CanParse, "can parse")
simpleQuery, queryInfo, _, _ = cw.ParseQueryInternal(tt.QueryJson)
assert.Contains(t, tt.WantedSql, simpleQuery.Sql.Stmt, "contains wanted sql")
assert.Equal(t, tt.WantedQueryType, queryInfo.Typ, "equals to wanted query type")
query := cw.BuildNRowsQuery("*", simpleQuery, model.DefaultSizeListQuery)
size := model.DefaultSizeListQuery
if queryInfo.Size != 0 {
size = queryInfo.Size
}
query := cw.BuildNRowsQuery("*", simpleQuery, size)
assert.Contains(t, tt.WantedQuery, *query)
// Test the new WhereStatement
if simpleQuery.Sql.WhereStatement != nil {
Expand Down Expand Up @@ -336,77 +341,6 @@ func TestNew(t *testing.T) {
}
}

// Test_parseRange tests if DateTime64 field properly uses Clickhouse's 'parseDateTime64BestEffort' function
func Test_parseRange_DateTime64(t *testing.T) {
rangePartOfQuery := QueryMap{
"timestamp": QueryMap{
"format": "strict_date_optional_time",
"gte": "2024-02-02T13:47:16.029Z",
"lte": "2024-02-09T13:47:16.029Z",
},
}
table, err := clickhouse.NewTable(`CREATE TABLE `+tableName+`
( "message" String, "timestamp" DateTime64(3, 'UTC') )
ENGINE = Memory`,
clickhouse.NewNoTimestampOnlyStringAttrCHConfig(),
)
if err != nil {
t.Fatal(err)
}
lm := clickhouse.NewLogManager(concurrent.NewMapWith(tableName, table), config.QuesmaConfiguration{})
cw := ClickhouseQueryTranslator{ClickhouseLM: lm, Table: table, Ctx: context.Background()}

whereClause := cw.parseRange(rangePartOfQuery).Sql.Stmt
split := strings.Split(whereClause, "parseDateTime64BestEffort")
assert.Len(t, split, 3)
}

// Test_parseRange tests if DateTime field properly uses Clickhouse's 'parseDateTimeBestEffort' function
func Test_parseRange_DateTime(t *testing.T) {
rangePartOfQuery := QueryMap{
"timestamp": QueryMap{
"format": "strict_date_optional_time",
"gte": "2024-02-02T13:47:16.029Z",
"lte": "2024-02-09T13:47:16.029Z",
},
}
table, err := clickhouse.NewTable(`CREATE TABLE `+tableName+`
( "message" String, "timestamp" DateTime )
ENGINE = Memory`,
clickhouse.NewNoTimestampOnlyStringAttrCHConfig(),
)
if err != nil {
t.Fatal(err)
}
lm := clickhouse.NewLogManager(concurrent.NewMapWith(tableName, table), config.QuesmaConfiguration{})
cw := ClickhouseQueryTranslator{ClickhouseLM: lm, Table: table, Ctx: context.Background()}

whereClause := cw.parseRange(rangePartOfQuery).Sql.Stmt
split := strings.Split(whereClause, "parseDateTimeBestEffort")
assert.Len(t, split, 3)
}

func Test_parseRange_numeric(t *testing.T) {
rangePartOfQuery := QueryMap{
"time_taken": QueryMap{
"gt": "100",
},
}
table, err := clickhouse.NewTable(`CREATE TABLE `+tableName+`
( "message" String, "timestamp" DateTime, "time_taken" UInt32 )
ENGINE = Memory`,
clickhouse.NewNoTimestampOnlyStringAttrCHConfig(),
)
if err != nil {
t.Fatal(err)
}
lm := clickhouse.NewLogManager(concurrent.NewMapWith(tableName, table), config.QuesmaConfiguration{})
cw := ClickhouseQueryTranslator{ClickhouseLM: lm, Table: table, Ctx: context.Background()}

whereClause := cw.parseRange(rangePartOfQuery).Sql.Stmt
assert.Equal(t, "\"time_taken\">100", whereClause)
}

func Test_parseSortFields(t *testing.T) {
tests := []struct {
name string
Expand Down
8 changes: 1 addition & 7 deletions quesma/quesma/search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,6 @@ func TestAsyncSearchHandler(t *testing.T) {
for i, tt := range testdata.TestsAsyncSearch {
t.Run(strconv.Itoa(i)+tt.Name, func(t *testing.T) {
db, mock, err := sqlmock.New()
if tt.Name == "Histogram: possible query nr 2" {
queryMatcher := sqlmock.QueryMatcherFunc(func(expectedSQL, actualSQL string) error {
fmt.Printf("JM SQL: %s\n", actualSQL)
return sqlmock.QueryMatcherRegexp.Match(expectedSQL, actualSQL)
})
db, mock, err = sqlmock.New(sqlmock.QueryMatcherOption(queryMatcher))
}
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -172,6 +165,7 @@ func TestSearchHandler(t *testing.T) {
cfg := config.QuesmaConfiguration{IndexConfig: map[string]config.IndexConfiguration{tableName: {Enabled: true}}}
for _, tt := range testdata.TestsSearch {
t.Run(tt.Name, func(t *testing.T) {

db, mock, err := sqlmock.New()
if err != nil {
t.Fatal(err)
Expand Down
35 changes: 34 additions & 1 deletion quesma/testdata/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1781,7 +1781,7 @@ var TestsSearch = []SearchTestCase{
}`,
[]string{""},
model.ListByField,
[]model.Query{newSimplestQuery()},
[]model.Query{withLimit(newSimplestQuery(), 500)},
[]string{`SELECT "message" FROM "logs-generic-default" LIMIT 500`},
},
{ // [26]
Expand Down Expand Up @@ -1952,6 +1952,39 @@ var TestsSearch = []SearchTestCase{
[]model.Query{justSimplestWhere(`"user.id"='kimchy'`)},
[]string{qToStr(justSimplestWhere(`"user.id"='kimchy'`))},
},
{ // [35] terms with range
"Terms with range",
`{
"size": 1,
"query": {
"bool": {
"filter": [
{
"terms": {
"cliIP": [
"2601:204:c503:c240:9c41:5531:ad94:4d90",
"50.116.43.98",
"75.246.0.64"
]
}
},
{
"range": {
"@timestamp": {
"gte": "2024-05-16T00:00:00",
"lte": "2024-05-17T23:59:59"
}
}
}
]
}
}
}`,
[]string{`("cliIP"='2601:204:c503:c240:9c41:5531:ad94:4d90' OR "cliIP"='50.116.43.98' OR "cliIP"='75.246.0.64') AND ("@timestamp">=parseDateTime64BestEffort('2024-05-16T00:00:00') AND "@timestamp"<=parseDateTime64BestEffort('2024-05-17T23:59:59'))`},
model.Normal,
[]model.Query{withLimit(justSimplestWhere(`("cliIP"='2601:204:c503:c240:9c41:5531:ad94:4d90' OR "cliIP"='50.116.43.98' OR "cliIP"='75.246.0.64') AND ("@timestamp">=parseDateTime64BestEffort('2024-05-16T00:00:00') AND "@timestamp"<=parseDateTime64BestEffort('2024-05-17T23:59:59'))`), 1)},
[]string{qToStr(withLimit(justSimplestWhere(`("cliIP"='2601:204:c503:c240:9c41:5531:ad94:4d90' OR "cliIP"='50.116.43.98' OR "cliIP"='75.246.0.64') AND ("@timestamp">=parseDateTime64BestEffort('2024-05-16T00:00:00') AND "@timestamp"<=parseDateTime64BestEffort('2024-05-17T23:59:59'))`), 1))},
},
}

var TestsSearchNoAttrs = []SearchTestCase{
Expand Down
5 changes: 5 additions & 0 deletions quesma/testdata/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ func justSimplestWhere(whereClause string) model.Query {
return query
}

func withLimit(query model.Query, limit int) model.Query {
query.SuffixClauses = []string{"LIMIT " + strconv.Itoa(limit)}
return query
}

// EscapeBrackets is a simple helper function used in sqlmock's tests.
// Example usage: sqlmock.ExpectQuery(EscapeBrackets(`SELECT count() FROM "logs-generic-default" WHERE `))
func EscapeBrackets(s string) string {
Expand Down

0 comments on commit dc8c289

Please sign in to comment.