diff --git a/quesma/clickhouse/table.go b/quesma/clickhouse/table.go index 87f9ff22e..61eb42f6a 100644 --- a/quesma/clickhouse/table.go +++ b/quesma/clickhouse/table.go @@ -22,6 +22,7 @@ type Table struct { aliases map[string]string Comment string // this human-readable comment CreateTableQuery string + TimestampColumn *string } func (t *Table) GetFields() []string { @@ -166,6 +167,10 @@ func (t *Table) applyIndexConfig(configuration config.QuesmaConfiguration) { t.aliases[alias.SourceFieldName] = alias.TargetFieldName } } + if v, ok := configuration.IndexConfig[t.Name]; ok { + t.TimestampColumn = v.TimestampField + } + } func (t *Table) ResolveField(ctx context.Context, fieldName string) (field string) { diff --git a/quesma/config.yaml.template b/quesma/config.yaml.template index f03d63e6a..71d9d05e2 100644 --- a/quesma/config.yaml.template +++ b/quesma/config.yaml.template @@ -19,7 +19,14 @@ logging: remoteUrl: "https://api.quesma.com/phone-home" disableFileLogging: false indexes: + logs: + timestampField: "reqTimeSec" + enabled: true + siem: + timestampField: "timestamp" + enabled: true kibana_sample_data_ecommerce: + timestampField: "@timestamp" enabled: true kibana_sample_data_flights: enabled: true diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index af9c92c24..60c8b70af 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -296,10 +296,39 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) SimpleQuery { return newSimpleQuery(NewSimpleStatement("parsing error: missing mandatory `values` field"), false) } logger.Warn().Msgf("unsupported id query executed, requested ids of [%s]", strings.Join(ids, "','")) - // We'll make this something along the lines of: - // fmt.Sprintf("COMPUTED_ID(document) IN ('%s') */ ", strings.Join(ids, "','")) - // but for now leaving empty - return newSimpleQuery(NewSimpleStatement(""), true) + + timestampColumnName, err := cw.GetTimestampFieldName() + if err != nil { + logger.Warn().Msgf("id query executed, but not timestamp field configured") + return newSimpleQuery(NewSimpleStatement(""), true) + } + + // when our generated ID appears in query looks like this: `18f7b8800b8q1` + // therefore we need to strip the hex part (before `q`) and convert it to decimal + // then we can query at DB level + for i, id := range ids { + idInHex := strings.Split(id, "q")[0] + if decimalValue, err := strconv.ParseUint(idInHex, 16, 64); err != nil { + logger.Error().Msgf("error parsing document id %s: %v", id, err) + return newSimpleQuery(NewSimpleStatement(""), true) + } else { + ids[i] = fmt.Sprintf("%d", decimalValue) + } + } + + var statement string + if v, ok := cw.Table.Cols[timestampColumnName]; ok { + switch v.Type.String() { + case clickhouse.DateTime64.String(): + statement = fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s) ", strconv.Quote(timestampColumnName), ids) + case clickhouse.DateTime.String(): + statement = fmt.Sprintf("toUnixTimestamp(%s) *1000 IN (%s) ", strconv.Quote(timestampColumnName), ids) + default: + logger.Warn().Msgf("timestamp field of unsupported type %s", v.Type.String()) + return newSimpleQuery(NewSimpleStatement(""), true) + } + } + return newSimpleQuery(NewSimpleStatement(statement), true) } // Parses each SimpleQuery separately, returns list of translated SQLs diff --git a/quesma/queryparser/query_translator.go b/quesma/queryparser/query_translator.go index a123d3f9a..3c119e522 100644 --- a/quesma/queryparser/query_translator.go +++ b/quesma/queryparser/query_translator.go @@ -12,6 +12,7 @@ import ( "mitmproxy/quesma/util" "strconv" "strings" + "time" ) const facetsSampleSize = "20000" @@ -48,6 +49,14 @@ func (cw *ClickhouseQueryTranslator) AddTokenToHighlight(token any) { } +func (cw *ClickhouseQueryTranslator) GetTimestampFieldName() (string, error) { + if cw.Table.TimestampColumn != nil { + return *cw.Table.TimestampColumn, nil + } else { + return "", fmt.Errorf("no pseudo unique field configured for table %s", cw.Table.Name) + } +} + func (cw *ClickhouseQueryTranslator) ClearTokensToHighlight() { cw.tokensToHighlight = []string{} } @@ -88,6 +97,7 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseNormal(ResultSet []model. Highlight: make(map[string][]string), } cw.highlightHit(&hits[i], highlighter, ResultSet[i]) + hits[i].ID = cw.computeIdForDocument(hits[i], strconv.Itoa(i+1)) } return &model.SearchResp{ @@ -277,6 +287,28 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseFacets(ResultSet []model. } } +func (cw *ClickhouseQueryTranslator) computeIdForDocument(doc model.SearchHit, defaultID string) string { + tsFieldName, err := cw.GetTimestampFieldName() + if err != nil { + return defaultID + } + + var pseudoUniqueId string + + if v, ok := doc.Fields[tsFieldName]; ok { + if vv, okk := v[0].(time.Time); okk { + // At database level we only compare timestamps with millisecond precision + // However in search results we append `q` plus generated digits (we use q because it's not in hex) + // so that kibana can iterate over documents in UI + pseudoUniqueId = fmt.Sprintf("%xq%s", int(vv.UnixMilli()), defaultID) + } else { + logger.WarnWithCtx(cw.Ctx).Msgf("failed to convert timestamp field [%v] to time.Time", v[0]) + return defaultID + } + } + return pseudoUniqueId +} + func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.QueryResultRow, typ model.SearchQueryType, highlighter model.Highlighter) *model.SearchResp { hits := make([]model.SearchHit, len(ResultSet)) for i := range ResultSet { @@ -293,6 +325,7 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.Qu } } cw.highlightHit(&hits[i], highlighter, ResultSet[i]) + hits[i].ID = cw.computeIdForDocument(hits[i], strconv.Itoa(i+1)) } return &model.SearchResp{ diff --git a/quesma/quesma/config/config.go b/quesma/quesma/config/config.go index 0b5536f7a..4d4b8d7ad 100644 --- a/quesma/quesma/config/config.go +++ b/quesma/quesma/config/config.go @@ -85,6 +85,7 @@ type IndexConfiguration struct { FullTextFields []string `koanf:"fullTextFields"` Aliases map[string]FieldAlias `koanf:"aliases"` IgnoredFields map[string]bool `koanf:"ignoredFields"` + TimestampField *string `koanf:"timestampField"` } func (c IndexConfiguration) Matches(indexName string) bool {