Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Computing document _id on the fly #133

Merged
merged 9 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions quesma/clickhouse/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ type Table struct {
aliases map[string]string
Comment string // this human-readable comment
CreateTableQuery string
TimestampColumn *string
}

func (t *Table) GetFields() []string {
Expand Down Expand Up @@ -166,6 +167,10 @@ func (t *Table) applyIndexConfig(configuration config.QuesmaConfiguration) {
t.aliases[alias.SourceFieldName] = alias.TargetFieldName
}
}
if v, ok := configuration.IndexConfig[t.Name]; ok {
t.TimestampColumn = v.TimestampField
}

}

func (t *Table) ResolveField(ctx context.Context, fieldName string) (field string) {
Expand Down
7 changes: 7 additions & 0 deletions quesma/config.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@ logging:
remoteUrl: "https://api.quesma.com/phone-home"
disableFileLogging: false
indexes:
logs:
timestampField: "reqTimeSec"
enabled: true
siem:
timestampField: "timestamp"
enabled: true
kibana_sample_data_ecommerce:
timestampField: "@timestamp"
enabled: true
kibana_sample_data_flights:
enabled: true
Expand Down
37 changes: 33 additions & 4 deletions quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,10 +296,39 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) SimpleQuery {
return newSimpleQuery(NewSimpleStatement("parsing error: missing mandatory `values` field"), false)
}
logger.Warn().Msgf("unsupported id query executed, requested ids of [%s]", strings.Join(ids, "','"))
// We'll make this something along the lines of:
// fmt.Sprintf("COMPUTED_ID(document) IN ('%s') */ ", strings.Join(ids, "','"))
// but for now leaving empty
return newSimpleQuery(NewSimpleStatement(""), true)

timestampColumnName, err := cw.GetTimestampFieldName()
if err != nil {
logger.Warn().Msgf("id query executed, but not timestamp field configured")
return newSimpleQuery(NewSimpleStatement(""), true)
}

// when our generated ID appears in query looks like this: `18f7b8800b8q1`
// therefore we need to strip the hex part (before `q`) and convert it to decimal
// then we can query at DB level
for i, id := range ids {
idInHex := strings.Split(id, "q")[0]
if decimalValue, err := strconv.ParseUint(idInHex, 16, 64); err != nil {
logger.Error().Msgf("error parsing document id %s: %v", id, err)
return newSimpleQuery(NewSimpleStatement(""), true)
} else {
ids[i] = fmt.Sprintf("%d", decimalValue)
}
}

var statement string
if v, ok := cw.Table.Cols[timestampColumnName]; ok {
switch v.Type.String() {
case clickhouse.DateTime64.String():
statement = fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s) ", strconv.Quote(timestampColumnName), ids)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needed to double-check and they indeed called the function *Milli and not *Millis.

case clickhouse.DateTime.String():
statement = fmt.Sprintf("toUnixTimestamp(%s) *1000 IN (%s) ", strconv.Quote(timestampColumnName), ids)
default:
logger.Warn().Msgf("timestamp field of unsupported type %s", v.Type.String())
return newSimpleQuery(NewSimpleStatement(""), true)
}
}
return newSimpleQuery(NewSimpleStatement(statement), true)
}

// Parses each SimpleQuery separately, returns list of translated SQLs
Expand Down
33 changes: 33 additions & 0 deletions quesma/queryparser/query_translator.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"mitmproxy/quesma/util"
"strconv"
"strings"
"time"
)

const facetsSampleSize = "20000"
Expand Down Expand Up @@ -48,6 +49,14 @@ func (cw *ClickhouseQueryTranslator) AddTokenToHighlight(token any) {

}

func (cw *ClickhouseQueryTranslator) GetTimestampFieldName() (string, error) {
if cw.Table.TimestampColumn != nil {
return *cw.Table.TimestampColumn, nil
} else {
return "", fmt.Errorf("no pseudo unique field configured for table %s", cw.Table.Name)
}
}

func (cw *ClickhouseQueryTranslator) ClearTokensToHighlight() {
cw.tokensToHighlight = []string{}
}
Expand Down Expand Up @@ -88,6 +97,7 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseNormal(ResultSet []model.
Highlight: make(map[string][]string),
}
cw.highlightHit(&hits[i], highlighter, ResultSet[i])
hits[i].ID = cw.computeIdForDocument(hits[i], strconv.Itoa(i+1))
}

return &model.SearchResp{
Expand Down Expand Up @@ -277,6 +287,28 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseFacets(ResultSet []model.
}
}

func (cw *ClickhouseQueryTranslator) computeIdForDocument(doc model.SearchHit, defaultID string) string {
tsFieldName, err := cw.GetTimestampFieldName()
if err != nil {
return defaultID
}

var pseudoUniqueId string

if v, ok := doc.Fields[tsFieldName]; ok {
if vv, okk := v[0].(time.Time); okk {
// At database level we only compare timestamps with millisecond precision
// However in search results we append `q` plus generated digits (we use q because it's not in hex)
// so that kibana can iterate over documents in UI
pseudoUniqueId = fmt.Sprintf("%xq%s", int(vv.UnixMilli()), defaultID)
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("failed to convert timestamp field [%v] to time.Time", v[0])
return defaultID
}
}
return pseudoUniqueId
}

func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.QueryResultRow, typ model.SearchQueryType, highlighter model.Highlighter) *model.SearchResp {
hits := make([]model.SearchHit, len(ResultSet))
for i := range ResultSet {
Expand All @@ -293,6 +325,7 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.Qu
}
}
cw.highlightHit(&hits[i], highlighter, ResultSet[i])
hits[i].ID = cw.computeIdForDocument(hits[i], strconv.Itoa(i+1))
}

return &model.SearchResp{
Expand Down
1 change: 1 addition & 0 deletions quesma/quesma/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ type IndexConfiguration struct {
FullTextFields []string `koanf:"fullTextFields"`
Aliases map[string]FieldAlias `koanf:"aliases"`
IgnoredFields map[string]bool `koanf:"ignoredFields"`
TimestampField *string `koanf:"timestampField"`
}

func (c IndexConfiguration) Matches(indexName string) bool {
Expand Down
Loading