From b7ba1e69f788b4fa3a4470ee57648ce61e628695 Mon Sep 17 00:00:00 2001 From: Jacek Migdal Date: Thu, 23 May 2024 15:38:19 +0200 Subject: [PATCH] Support json with comments (#204) Apparently this is also valid query: ``` { "size": 0, // Set size to 0 to disable retrieving documents, only perform aggregations "aggs": { "unique_paths_per_ip": { "terms": { "field": "ip", "size": 10 }, // Group by "ip" with size 10 (top 10) "aggs": { "unique_paths": { "cardinality": { "field": "path" } // Count unique values in the "path" field } } } } } ``` --- quesma/clickhouse/clickhouse.go | 2 +- quesma/clickhouse/clickhouse_test.go | 2 +- quesma/clickhouse/parser.go | 15 ++------------- quesma/eql/query_translator.go | 5 ++--- quesma/go.mod | 1 + quesma/go.sum | 2 ++ quesma/queryparser/aggregation_parser.go | 5 ++--- quesma/queryparser/query_parser.go | 7 ++++--- quesma/quesma/termsenum/request.go | 13 ------------- quesma/quesma/types/json.go | 6 ++++++ quesma/quesma/types/json_test.go | 12 ++++++++++++ quesma/testdata/requests.go | 16 ++++++++++++++++ 12 files changed, 49 insertions(+), 37 deletions(-) diff --git a/quesma/clickhouse/clickhouse.go b/quesma/clickhouse/clickhouse.go index c5342fbbc..5579ea3e7 100644 --- a/quesma/clickhouse/clickhouse.go +++ b/quesma/clickhouse/clickhouse.go @@ -322,7 +322,7 @@ func (lm *LogManager) BuildInsertJson(tableName string, data types.JSON, config return js, nil } // we find all non-schema fields - m, err := JsonToFieldsMap(js) + m, err := types.ParseJSON(js) if err != nil { return "", err } diff --git a/quesma/clickhouse/clickhouse_test.go b/quesma/clickhouse/clickhouse_test.go index c3f59c7aa..3aa214bf8 100644 --- a/quesma/clickhouse/clickhouse_test.go +++ b/quesma/clickhouse/clickhouse_test.go @@ -133,7 +133,7 @@ func TestJsonToFieldsMap(t *testing.T) { "timestamp": "2024-01-08T18:56:08.454Z", } j := `{"host.name":"hermes","message":"User password reset requested","service.name":"queue","severity":"info","source":"azure","timestamp":"2024-01-08T18:56:08.454Z"}` - m, err := JsonToFieldsMap(j) + m, err := types.ParseJSON(j) assert.NoError(t, err) assert.Equal(t, len(mExpected), len(m)) for k, vExpected := range mExpected { diff --git a/quesma/clickhouse/parser.go b/quesma/clickhouse/parser.go index 551c01339..0f8f65989 100644 --- a/quesma/clickhouse/parser.go +++ b/quesma/clickhouse/parser.go @@ -1,8 +1,8 @@ package clickhouse import ( - "encoding/json" "fmt" + "mitmproxy/quesma/quesma/types" "mitmproxy/quesma/util" "slices" "strings" @@ -10,19 +10,8 @@ import ( const NestedSeparator = "::" -// TODO remove schemamap type? -// TODO change all return types to * when worth it like here -func JsonToFieldsMap(jsonn string) (SchemaMap, error) { - m := make(SchemaMap) - err := json.Unmarshal([]byte(jsonn), &m) - if err != nil { - return nil, err - } - return m, nil -} - func JsonToTableSchema(jsonn, tableName string, config *ChTableConfig) (*Table, error) { - m, err := JsonToFieldsMap(jsonn) + m, err := types.ParseJSON(jsonn) if err != nil { return nil, err } diff --git a/quesma/eql/query_translator.go b/quesma/eql/query_translator.go index 46f9a4362..de62f1fe2 100644 --- a/quesma/eql/query_translator.go +++ b/quesma/eql/query_translator.go @@ -2,13 +2,13 @@ package eql import ( "context" - "encoding/json" "mitmproxy/quesma/clickhouse" "mitmproxy/quesma/eql/transform" "mitmproxy/quesma/logger" "mitmproxy/quesma/model" "mitmproxy/quesma/queryparser" "mitmproxy/quesma/queryparser/query_util" + "mitmproxy/quesma/quesma/types" "strconv" "strings" ) @@ -95,8 +95,7 @@ func (cw *ClickhouseEQLQueryTranslator) parseQuery(queryAsJson string) (query mo searchQueryInfo.Typ = model.ListAllFields query.Sql = model.Statement{} - queryAsMap := make(map[string]interface{}) - err = json.Unmarshal([]byte(queryAsJson), &queryAsMap) + queryAsMap, err := types.ParseJSON(queryAsJson) if err != nil { logger.ErrorWithCtx(cw.Ctx).Err(err).Msg("error parsing query request's JSON") diff --git a/quesma/go.mod b/quesma/go.mod index 77322b16c..932de804b 100644 --- a/quesma/go.mod +++ b/quesma/go.mod @@ -32,6 +32,7 @@ require ( github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/pkg/errors v0.9.1 // indirect + github.com/tailscale/hujson v0.0.0-20221223112325-20486734a56a // indirect golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect ) diff --git a/quesma/go.sum b/quesma/go.sum index b4d293bb5..f04b6442e 100644 --- a/quesma/go.sum +++ b/quesma/go.sum @@ -128,6 +128,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tailscale/hujson v0.0.0-20221223112325-20486734a56a h1:SJy1Pu0eH1C29XwJucQo73FrleVK6t4kYz4NVhp34Yw= +github.com/tailscale/hujson v0.0.0-20221223112325-20486734a56a/go.mod h1:DFSS3NAGHthKo1gTlmEcSBiZrRJXi28rLNd/1udP1c8= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= diff --git a/quesma/queryparser/aggregation_parser.go b/quesma/queryparser/aggregation_parser.go index b17ad4b06..7cd99b9db 100644 --- a/quesma/queryparser/aggregation_parser.go +++ b/quesma/queryparser/aggregation_parser.go @@ -2,13 +2,13 @@ package queryparser import ( "context" - "encoding/json" "fmt" "mitmproxy/quesma/clickhouse" "mitmproxy/quesma/logger" "mitmproxy/quesma/model" "mitmproxy/quesma/model/bucket_aggregations" "mitmproxy/quesma/model/metrics_aggregations" + "mitmproxy/quesma/quesma/types" "mitmproxy/quesma/util" "regexp" "slices" @@ -196,8 +196,7 @@ func (b *aggrQueryBuilder) buildMetricsAggregation(metricsAggr metricsAggregatio // ParseAggregationJson parses JSON with aggregation query and returns array of queries with aggregations. // If there are no aggregations, returns nil. func (cw *ClickhouseQueryTranslator) ParseAggregationJson(queryAsJson string) ([]model.Query, error) { - queryAsMap := make(QueryMap) - err := json.Unmarshal([]byte(queryAsJson), &queryAsMap) + queryAsMap, err := types.ParseJSON(queryAsJson) if err != nil { return nil, fmt.Errorf("unmarshal error: %v", err) } diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index 3186fc15b..f3951d380 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -3,6 +3,7 @@ package queryparser import ( "encoding/json" wc "mitmproxy/quesma/queryparser/where_clause" + "mitmproxy/quesma/quesma/types" "fmt" "github.com/k0kubun/pp" @@ -91,7 +92,8 @@ func (cw *ClickhouseQueryTranslator) ParseQueryInternal(queryAsJson string) (mod cw.ClearTokensToHighlight() queryAsMap := make(QueryMap) if queryAsJson != "" { - err := json.Unmarshal([]byte(queryAsJson), &queryAsMap) + var err error + queryAsMap, err = types.ParseJSON(queryAsJson) if err != nil { logger.ErrorWithCtx(cw.Ctx).Err(err).Msg("error parsing query request's JSON") return model.SimpleQuery{}, model.SearchQueryInfo{}, NewEmptyHighlighter(), err @@ -177,8 +179,7 @@ func (cw *ClickhouseQueryTranslator) ParseHighlighter(queryMap QueryMap) model.H func (cw *ClickhouseQueryTranslator) ParseQueryAsyncSearch(queryAsJson string) (model.SimpleQuery, model.SearchQueryInfo, model.Highlighter) { cw.ClearTokensToHighlight() - queryAsMap := make(QueryMap) - err := json.Unmarshal([]byte(queryAsJson), &queryAsMap) + queryAsMap, err := types.ParseJSON(queryAsJson) if err != nil { logger.ErrorWithCtx(cw.Ctx).Err(err).Msg("error parsing query request's JSON") return model.NewSimpleQuery(model.NewSimpleStatement("invalid JSON (ParseQueryAsyncSearch)"), false), model.NewSearchQueryInfoNone(), NewEmptyHighlighter() diff --git a/quesma/quesma/termsenum/request.go b/quesma/quesma/termsenum/request.go index 602a5c669..1e6747082 100644 --- a/quesma/quesma/termsenum/request.go +++ b/quesma/quesma/termsenum/request.go @@ -4,7 +4,6 @@ import ( "bytes" "encoding/json" "errors" - "fmt" "github.com/gogo/protobuf/types" "io" "strconv" @@ -74,18 +73,6 @@ func NewRequest() *Request { return r } -// FromJSON allows to load an arbitrary json into the request structure -func (r *Request) FromJSON(data string) (*Request, error) { - var req Request - err := json.Unmarshal([]byte(data), &req) - - if err != nil { - return nil, fmt.Errorf("could not deserialise json into Termsenum request: %w", err) - } - - return &req, nil -} - func (s *Request) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) diff --git a/quesma/quesma/types/json.go b/quesma/quesma/types/json.go index 6d4573b5c..b169828a5 100644 --- a/quesma/quesma/types/json.go +++ b/quesma/quesma/types/json.go @@ -3,6 +3,7 @@ package types import ( "encoding/json" "fmt" + "github.com/tailscale/hujson" ) type JSON map[string]interface{} @@ -11,6 +12,11 @@ func ParseJSON(body string) (JSON, error) { var res JSON err := json.Unmarshal([]byte(body), &res) + if err != nil { + if newBytes, errStd := hujson.Standardize([]byte(body)); errStd == nil { + err = json.Unmarshal(newBytes, &res) + } + } return res, err } diff --git a/quesma/quesma/types/json_test.go b/quesma/quesma/types/json_test.go index eecbd1762..fb5321989 100644 --- a/quesma/quesma/types/json_test.go +++ b/quesma/quesma/types/json_test.go @@ -6,6 +6,18 @@ import ( "testing" ) +func TestCommentedJson(t *testing.T) { + jsonStr := `{"key1":"value1","key2":"value2"}` + commentedJsonStr := `// comment +{"key1":"value1","key2":"value2" /* another comment */ }` + + jsonStruct, err := ParseJSON(commentedJsonStr) + assert.NoError(t, err) + withoutComment := jsonStruct.ShortString() + + assert.Equal(t, jsonStr, withoutComment) +} + func TestReMarshalJSON(t *testing.T) { type dest struct { diff --git a/quesma/testdata/requests.go b/quesma/testdata/requests.go index 9ac320cd7..2c6b9140f 100644 --- a/quesma/testdata/requests.go +++ b/quesma/testdata/requests.go @@ -1936,6 +1936,22 @@ var TestsSearch = []SearchTestCase{ // We will probably refactor it as we move forwards with schema which will get even more side-effecting []string{qToStr(justSimplestWhere(`"@timestamp".=parseDateTime64BestEffort('2024-01-22T09:..:10.299Z')`))}, }, + { // [34] Comments in queries + "Comments in filter", + `{ + "query": { /*one comment */ + "bool": { + "must": { + "term": { "user.id": "kimchy" } // One comment + } + } + } + }`, + []string{`"user.id"='kimchy'`}, + model.Normal, + []model.Query{justSimplestWhere(`"user.id"='kimchy'`)}, + []string{qToStr(justSimplestWhere(`"user.id"='kimchy'`))}, + }, } var TestsSearchNoAttrs = []SearchTestCase{