Skip to content

Commit

Permalink
Rework fetching documents by _id (#213)
Browse files Browse the repository at this point in the history
  • Loading branch information
mieciu authored May 24, 2024
1 parent dd5d126 commit 58bdcac
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 17 deletions.
30 changes: 19 additions & 11 deletions quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package queryparser

import (
"encoding/hex"
"encoding/json"
"fmt"
"mitmproxy/quesma/clickhouse"
Expand Down Expand Up @@ -308,7 +309,7 @@ func (cw *ClickhouseQueryTranslator) parseConstantScore(queryMap QueryMap) model
}

func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) model.SimpleQuery {
var ids []string
var ids, finalIds []string
if val, ok := queryMap["values"]; ok {
if values, ok := val.([]interface{}); ok {
for _, id := range values {
Expand All @@ -325,32 +326,39 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) model.SimpleQue
logger.Warn().Msgf("id query executed, but not timestamp field configured")
return model.NewSimpleQuery(model.NewSimpleStatement(""), true)
}
if len(ids) == 0 {
return model.NewSimpleQuery(model.NewSimpleStatement("parsing error: empty _id array"), false)
}

// when our generated ID appears in query looks like this: `18f7b8800b8q1`
// when our generated ID appears in query looks like this: `1d<TRUNCATED>0b8q1`
// therefore we need to strip the hex part (before `q`) and convert it to decimal
// then we can query at DB level
for i, id := range ids {
idInHex := strings.Split(id, "q")[0]
if decimalValue, err := strconv.ParseUint(idInHex, 16, 64); err != nil {
if idAsStr, err := hex.DecodeString(idInHex); err != nil {
logger.Error().Msgf("error parsing document id %s: %v", id, err)
return model.NewSimpleQuery(model.NewSimpleStatement(""), true)
} else {
ids[i] = fmt.Sprintf("%d", decimalValue)
tsWithoutTZ := strings.TrimSuffix(string(idAsStr), " +0000 UTC")
ids[i] = fmt.Sprintf("'%s'", tsWithoutTZ)
}
}

var statement model.Statement
if v, ok := cw.Table.Cols[timestampColumnName]; ok {
switch v.Type.String() {
case clickhouse.DateTime64.String():
statement = model.NewSimpleStatement(fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s)", strconv.Quote(timestampColumnName), ids))
statement.WhereStatement = wc.NewInfixOp(wc.NewFunction("toUnixTimestamp64Milli", []wc.Statement{wc.NewColumnRef(timestampColumnName)}...), "IN", wc.NewLiteral("(["+strings.Join(ids, ",")+"])"))
for _, id := range ids {
finalIds = append(finalIds, fmt.Sprintf("toDateTime64(%s,3)", id))
}
statement = model.NewSimpleStatement(fmt.Sprintf("%s IN (%s)", strconv.Quote(timestampColumnName), strings.Join(finalIds, ",")))
statement.WhereStatement = wc.NewInfixOp(wc.NewColumnRef(timestampColumnName), " IN ", wc.NewFunction("toDateTime64", wc.NewLiteral(strings.Join(ids, ",")), wc.NewLiteral("3")))
case clickhouse.DateTime.String():
statement = model.NewSimpleStatement(fmt.Sprintf("toUnixTimestamp(%s) * 1000 IN (%s)", strconv.Quote(timestampColumnName), ids))
statement.WhereStatement = wc.NewInfixOp(wc.NewInfixOp(
wc.NewFunction("toUnixTimestamp", []wc.Statement{wc.NewColumnRef(timestampColumnName)}...),
"*",
wc.NewLiteral("1000")), "IN", wc.NewLiteral("("+strings.Join(ids, ",")+")"))
for _, id := range ids {
finalIds = append(finalIds, fmt.Sprintf("toDateTime(%s)", id))
}
statement = model.NewSimpleStatement(fmt.Sprintf("%s IN (%s)", strconv.Quote(timestampColumnName), strings.Join(finalIds, ",")))
statement.WhereStatement = wc.NewInfixOp(wc.NewColumnRef(timestampColumnName), " IN ", wc.NewFunction("toDateTime", wc.NewLiteral(strings.Join(ids, ","))))
default:
logger.Warn().Msgf("timestamp field of unsupported type %s", v.Type.String())
return model.NewSimpleQuery(model.NewSimpleStatement(""), true)
Expand Down
2 changes: 1 addition & 1 deletion quesma/queryparser/query_translator.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ func (cw *ClickhouseQueryTranslator) computeIdForDocument(doc model.SearchHit, d
// At database level we only compare timestamps with millisecond precision
// However in search results we append `q` plus generated digits (we use q because it's not in hex)
// so that kibana can iterate over documents in UI
pseudoUniqueId = fmt.Sprintf("%xq%s", int(vv.UnixMilli()), defaultID)
pseudoUniqueId = fmt.Sprintf("%xq%s", vv, defaultID)
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("failed to convert timestamp field [%v] to time.Time", v[0])
return defaultID
Expand Down
10 changes: 5 additions & 5 deletions quesma/testdata/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1916,21 +1916,21 @@ var TestsSearch = []SearchTestCase{
},
{
"match_phrase": {
"_id": "18f86fcd014q6"
"_id": "323032342d30352d32342031333a33323a34372e333037202b3030303020555443q1"
}
}
]
}
}
}`,
[]string{
`"@timestamp">=parseDateTime64BestEffort('2024-01-22T09:26:10.299Z') AND toUnixTimestamp64Milli("@timestamp") IN ([1715956666388])`,
`toUnixTimestamp64Milli("@timestamp") IN ([1715956666388]) AND "@timestamp">=parseDateTime64BestEffort('2024-01-22T09:26:10.299Z')`,
`"@timestamp">=parseDateTime64BestEffort('2024-01-22T09:26:10.299Z') AND "@timestamp" IN (toDateTime64('2024-05-24 13:32:47.307',3))`,
`"@timestamp" IN (toDateTime64('2024-05-24 13:32:47.307',3)) AND "@timestamp">=parseDateTime64BestEffort('2024-01-22T09:26:10.299Z')`,
},
model.Normal,
[]model.Query{
justSimplestWhere(`"@timestamp">=parseDateTime64BestEffort('2024-01-22T09:26:10.299Z') AND toUnixTimestamp64Milli("@timestamp") IN ([1715956666388])`),
justSimplestWhere(`toUnixTimestamp64Milli("@timestamp") IN ([1715956666388]) AND "@timestamp">=parseDateTime64BestEffort('2024-01-22T09:26:10.299Z')`),
justSimplestWhere(`"@timestamp">=parseDateTime64BestEffort('2024-01-22T09:26:10.299Z') AND "@timestamp" IN (toDateTime64('2024-05-24 13:32:47.307',3))`),
justSimplestWhere(`"@timestamp" IN (toDateTime64('2024-05-24 13:32:47.307',3)) AND "@timestamp">=parseDateTime64BestEffort('2024-01-22T09:26:10.299Z')`),
},
// TestSearchHandler is pretty blunt with config loading so the test below can't be used.
// We will probably refactor it as we move forwards with schema which will get even more side-effecting
Expand Down

0 comments on commit 58bdcac

Please sign in to comment.