From b1ddb474b9fd41a4bdb4ad1c4db7e0800eeee1c5 Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Thu, 16 May 2024 17:00:43 +0200 Subject: [PATCH 1/8] Very rough initial impl --- quesma/queryparser/query_parser.go | 9 +++-- quesma/queryparser/query_translator.go | 52 ++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index 814eb414e..555e40fef 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -296,10 +296,11 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) SimpleQuery { return newSimpleQuery(NewSimpleStatement("parsing error: missing mandatory `values` field"), false) } logger.Warn().Msgf("unsupported id query executed, requested ids of [%s]", strings.Join(ids, "','")) - // We'll make this something along the lines of: - // fmt.Sprintf("COMPUTED_ID(document) IN ('%s') */ ", strings.Join(ids, "','")) - // but for now leaving empty - return newSimpleQuery(NewSimpleStatement(""), true) + + // ugh, we need to know fields for hash computation ... + statement := fmt.Sprintf("lower(hex(SHA1(CONCAT(email, customer_full_name)))) IN ('%s') ", strings.Join(ids, "','")) + + return newSimpleQuery(NewSimpleStatement(statement), true) } // Parses each SimpleQuery separately, returns list of translated SQLs diff --git a/quesma/queryparser/query_translator.go b/quesma/queryparser/query_translator.go index 8b69657b8..f07e7b6d4 100644 --- a/quesma/queryparser/query_translator.go +++ b/quesma/queryparser/query_translator.go @@ -2,6 +2,8 @@ package queryparser import ( "context" + "crypto/sha1" + "encoding/hex" "encoding/json" "fmt" "mitmproxy/quesma/clickhouse" @@ -90,6 +92,15 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseNormal(ResultSet []model. cw.highlightHit(&hits[i], highlighter, ResultSet[i]) } + // Set the IDs + for i, hit := range hits { + if id, err := computeIdFromDocument(hit); err != nil { + hits[i].ID = strconv.Itoa(i + 1) + } else { + hits[i].ID = id + } + } + return &model.SearchResp{ Hits: model.SearchHits{ Hits: hits, @@ -315,6 +326,38 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseFacets(ResultSet []model. } } +func computeIdFromDocument(doc model.SearchHit) (string, error) { + // TBD which fields, eventually configurable + /* + This is ugly as hell but surprisingly gets the job done. + However works only on strings... if we are concatenating different types, we might end up with something different at ClickHouse/Hydrolix end... + */ + var email, fullName string + if v, ok := doc.Fields["email"]; ok { + if e, okk := v[0].(*string); okk { + email = *e + } + } else { + logger.Error().Msgf("PRZEMYSLAW FAIL email: [%v]", v) + return "", fmt.Errorf("missing email field") + } + if v, ok := doc.Fields["customer_full_name"]; ok { + if e, okk := v[0].(*string); okk { + fullName = *e + } + } else { + logger.Error().Msgf("PRZEMYSLAW FAIL customer_full_name: [%v]", v) + return "", fmt.Errorf("missing customer_full_name field") + } + + concat := email + fullName + logger.Info().Msgf("concat: [%v]", concat) + hash := sha1.Sum([]byte(concat)) + hashEncodedToString := hex.EncodeToString(hash[:]) + logger.Info().Msgf("hash: [%s]", hashEncodedToString) + return hashEncodedToString, nil +} + func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.QueryResultRow, typ model.SearchQueryType, highlighter model.Highlighter) *model.SearchResp { hits := make([]model.SearchHit, len(ResultSet)) for i := range ResultSet { @@ -333,6 +376,15 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.Qu cw.highlightHit(&hits[i], highlighter, ResultSet[i]) } + // Set the IDs + for i, hit := range hits { + if id, err := computeIdFromDocument(hit); err != nil { + hits[i].ID = strconv.Itoa(i + 1) + } else { + hits[i].ID = id + } + } + return &model.SearchResp{ Hits: model.SearchHits{ Total: &model.Total{ From ccfb0c26bad33c456e25670a63600d2223dd588c Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Fri, 17 May 2024 11:06:08 +0200 Subject: [PATCH 2/8] timestamp --- quesma/queryparser/query_parser.go | 15 +++++++-- quesma/queryparser/query_translator.go | 42 +++++++++++--------------- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index 555e40fef..e874f152f 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -297,8 +297,19 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) SimpleQuery { } logger.Warn().Msgf("unsupported id query executed, requested ids of [%s]", strings.Join(ids, "','")) - // ugh, we need to know fields for hash computation ... - statement := fmt.Sprintf("lower(hex(SHA1(CONCAT(email, customer_full_name)))) IN ('%s') ", strings.Join(ids, "','")) + //statement := fmt.Sprintf("lower(hex(SHA1(CONCAT(email, customer_full_name)))) IN ('%s') ", strings.Join(ids, "','")) + statement := fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s) ", strconv.Quote("@timestamp"), ids) + //var colNames []string + //for _, col := range cw.Table.Cols { + // colNames = append(colNames, fmt.Sprintf("ifNull(%s, '')", strconv.Quote(col.Name))) + //} + //sort.Strings(colNames) + //qq := fmt.Sprintf("lower(hex(SHA1(CONCAT(%s)))) IN ('44') ", strings.Join(colNames, ",")) + //print(qq) + ///* + // + // */ + // clickhouse toUnix return newSimpleQuery(NewSimpleStatement(statement), true) } diff --git a/quesma/queryparser/query_translator.go b/quesma/queryparser/query_translator.go index f07e7b6d4..9488240b4 100644 --- a/quesma/queryparser/query_translator.go +++ b/quesma/queryparser/query_translator.go @@ -2,8 +2,6 @@ package queryparser import ( "context" - "crypto/sha1" - "encoding/hex" "encoding/json" "fmt" "mitmproxy/quesma/clickhouse" @@ -14,6 +12,7 @@ import ( "mitmproxy/quesma/util" "strconv" "strings" + "time" ) const facetsSampleSize = "20000" @@ -330,32 +329,25 @@ func computeIdFromDocument(doc model.SearchHit) (string, error) { // TBD which fields, eventually configurable /* This is ugly as hell but surprisingly gets the job done. - However works only on strings... if we are concatenating different types, we might end up with something different at ClickHouse/Hydrolix end... + However, works only on strings... if we are concatenating different types, we might end up with something different at ClickHouse/Hydrolix end... */ - var email, fullName string - if v, ok := doc.Fields["email"]; ok { - if e, okk := v[0].(*string); okk { - email = *e - } - } else { - logger.Error().Msgf("PRZEMYSLAW FAIL email: [%v]", v) - return "", fmt.Errorf("missing email field") - } - if v, ok := doc.Fields["customer_full_name"]; ok { - if e, okk := v[0].(*string); okk { - fullName = *e + var timestamp string + if v, ok := doc.Fields["@timestamp"]; ok { + if vv, okk := v[0].(time.Time); okk { + timestamp = strconv.Itoa(int(vv.UnixMilli())) + } else { + fmt.Sprintf("????? FAILed timestamp type assert : [%v]", v) } } else { - logger.Error().Msgf("PRZEMYSLAW FAIL customer_full_name: [%v]", v) - return "", fmt.Errorf("missing customer_full_name field") - } - - concat := email + fullName - logger.Info().Msgf("concat: [%v]", concat) - hash := sha1.Sum([]byte(concat)) - hashEncodedToString := hex.EncodeToString(hash[:]) - logger.Info().Msgf("hash: [%s]", hashEncodedToString) - return hashEncodedToString, nil + logger.Error().Msgf("NO @timestamp FIELD [%v]", v) + return "", fmt.Errorf("missing @timestamp field") + } + //concat := email + fullName + //logger.Info().Msgf("concat: [%v]", concat) + //hash := sha1.Sum([]byte(concat)) + //hashEncodedToString := hex.EncodeToString(hash[:]) + //logger.Info().Msgf("hash: [%s]", hashEncodedToString) + return timestamp, nil } func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.QueryResultRow, typ model.SearchQueryType, highlighter model.Highlighter) *model.SearchResp { From c0cfe52d672529670633376fdaeb9bc128aab73c Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Fri, 17 May 2024 11:28:47 +0200 Subject: [PATCH 3/8] hydrolix --- quesma/queryparser/query_parser.go | 16 +++------------- quesma/queryparser/query_translator.go | 2 +- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index e874f152f..f25df5078 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -297,19 +297,9 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) SimpleQuery { } logger.Warn().Msgf("unsupported id query executed, requested ids of [%s]", strings.Join(ids, "','")) - //statement := fmt.Sprintf("lower(hex(SHA1(CONCAT(email, customer_full_name)))) IN ('%s') ", strings.Join(ids, "','")) - statement := fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s) ", strconv.Quote("@timestamp"), ids) - //var colNames []string - //for _, col := range cw.Table.Cols { - // colNames = append(colNames, fmt.Sprintf("ifNull(%s, '')", strconv.Quote(col.Name))) - //} - //sort.Strings(colNames) - //qq := fmt.Sprintf("lower(hex(SHA1(CONCAT(%s)))) IN ('44') ", strings.Join(colNames, ",")) - //print(qq) - ///* - // - // */ - // clickhouse toUnix + // variant when field is a datetime64 + //statement := fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s) ", strconv.Quote("timestamp"), ids) + statement := fmt.Sprintf("toUnixTimestamp(%s) *1000 IN (%s) ", strconv.Quote("timestamp"), ids) return newSimpleQuery(NewSimpleStatement(statement), true) } diff --git a/quesma/queryparser/query_translator.go b/quesma/queryparser/query_translator.go index 9488240b4..72a49abe3 100644 --- a/quesma/queryparser/query_translator.go +++ b/quesma/queryparser/query_translator.go @@ -332,7 +332,7 @@ func computeIdFromDocument(doc model.SearchHit) (string, error) { However, works only on strings... if we are concatenating different types, we might end up with something different at ClickHouse/Hydrolix end... */ var timestamp string - if v, ok := doc.Fields["@timestamp"]; ok { + if v, ok := doc.Fields["timestamp"]; ok { if vv, okk := v[0].(time.Time); okk { timestamp = strconv.Itoa(int(vv.UnixMilli())) } else { From fc47445ed08cdb6e3e41b16a9dedb721c092685d Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Fri, 17 May 2024 11:59:55 +0200 Subject: [PATCH 4/8] hydrolix test --- quesma/clickhouse/clickhouse.go | 13 +++++++++++++ quesma/queryparser/query_parser.go | 19 ++++++++++++++++++- quesma/queryparser/query_translator.go | 25 +++++++++++++------------ quesma/quesma/config/config.go | 11 ++++++----- 4 files changed, 50 insertions(+), 18 deletions(-) diff --git a/quesma/clickhouse/clickhouse.go b/quesma/clickhouse/clickhouse.go index 29fedf9a8..edb1802b5 100644 --- a/quesma/clickhouse/clickhouse.go +++ b/quesma/clickhouse/clickhouse.go @@ -114,6 +114,19 @@ func (lm *LogManager) Close() { _ = lm.chDb.Close() } +func (lm *LogManager) GetPseudoUniqueField(tableName string) (string, error) { + + if v, ok := lm.cfg.IndexConfig[tableName]; ok { + if v.PseudoUniqueField != "" { + return v.PseudoUniqueField, nil + } else { + return "", fmt.Errorf("no pseudo unique field configured for table %s", tableName) + } + } else { + return "", fmt.Errorf("no index configuration for table %s", tableName) + } +} + func (lm *LogManager) matchIndex(ctx context.Context, indexNamePattern, indexName string) bool { r, err := regexp.Compile("^" + strings.Replace(indexNamePattern, "*", ".*", -1) + "$") if err != nil { diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index f25df5078..65208eac2 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -299,7 +299,24 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) SimpleQuery { // variant when field is a datetime64 //statement := fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s) ", strconv.Quote("timestamp"), ids) - statement := fmt.Sprintf("toUnixTimestamp(%s) *1000 IN (%s) ", strconv.Quote("timestamp"), ids) + + pseudoUniqueFieldName, err := cw.ClickhouseLM.GetPseudoUniqueField(cw.Table.Name) + if err != nil && pseudoUniqueFieldName != "" { + return newSimpleQuery(NewSimpleStatement(""), true) + } + var statement string + if v, ok := cw.Table.Cols[pseudoUniqueFieldName]; !ok { + switch v.Type.String() { + case clickhouse.DateTime64.String(): + statement = fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s) ", strconv.Quote("timestamp"), ids) + case clickhouse.DateTime.String(): + statement = fmt.Sprintf("toUnixTimestamp(%s) *1000 IN (%s) ", strconv.Quote("timestamp"), ids) + default: + return newSimpleQuery(NewSimpleStatement(""), true) + } + } + + //statement := fmt.Sprintf("toUnixTimestamp(%s) *1000 IN (%s) ", strconv.Quote("timestamp"), ids) return newSimpleQuery(NewSimpleStatement(statement), true) } diff --git a/quesma/queryparser/query_translator.go b/quesma/queryparser/query_translator.go index 72a49abe3..0175bd463 100644 --- a/quesma/queryparser/query_translator.go +++ b/quesma/queryparser/query_translator.go @@ -93,7 +93,7 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseNormal(ResultSet []model. // Set the IDs for i, hit := range hits { - if id, err := computeIdFromDocument(hit); err != nil { + if id, err := cw.computeIdFromDocument(hit); err != nil { hits[i].ID = strconv.Itoa(i + 1) } else { hits[i].ID = id @@ -325,16 +325,17 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseFacets(ResultSet []model. } } -func computeIdFromDocument(doc model.SearchHit) (string, error) { - // TBD which fields, eventually configurable - /* - This is ugly as hell but surprisingly gets the job done. - However, works only on strings... if we are concatenating different types, we might end up with something different at ClickHouse/Hydrolix end... - */ - var timestamp string - if v, ok := doc.Fields["timestamp"]; ok { +func (cw *ClickhouseQueryTranslator) computeIdFromDocument(doc model.SearchHit) (string, error) { + var pseudoUniqueFieldName, pseudoUniqueId string + + if v, err := cw.ClickhouseLM.GetPseudoUniqueField(doc.Index); err != nil { + return "", fmt.Errorf("missing pseudo unique field for index %s", doc.Index) + } else { + pseudoUniqueFieldName = v + } + if v, ok := doc.Fields[pseudoUniqueFieldName]; ok { if vv, okk := v[0].(time.Time); okk { - timestamp = strconv.Itoa(int(vv.UnixMilli())) + pseudoUniqueId = strconv.Itoa(int(vv.UnixMilli())) } else { fmt.Sprintf("????? FAILed timestamp type assert : [%v]", v) } @@ -347,7 +348,7 @@ func computeIdFromDocument(doc model.SearchHit) (string, error) { //hash := sha1.Sum([]byte(concat)) //hashEncodedToString := hex.EncodeToString(hash[:]) //logger.Info().Msgf("hash: [%s]", hashEncodedToString) - return timestamp, nil + return pseudoUniqueId, nil } func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.QueryResultRow, typ model.SearchQueryType, highlighter model.Highlighter) *model.SearchResp { @@ -370,7 +371,7 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.Qu // Set the IDs for i, hit := range hits { - if id, err := computeIdFromDocument(hit); err != nil { + if id, err := cw.computeIdFromDocument(hit); err != nil { hits[i].ID = strconv.Itoa(i + 1) } else { hits[i].ID = id diff --git a/quesma/quesma/config/config.go b/quesma/quesma/config/config.go index 0b5536f7a..983b0ceeb 100644 --- a/quesma/quesma/config/config.go +++ b/quesma/quesma/config/config.go @@ -80,11 +80,12 @@ type FieldAlias struct { } type IndexConfiguration struct { - Name string `koanf:"name"` - Enabled bool `koanf:"enabled"` - FullTextFields []string `koanf:"fullTextFields"` - Aliases map[string]FieldAlias `koanf:"aliases"` - IgnoredFields map[string]bool `koanf:"ignoredFields"` + Name string `koanf:"name"` + Enabled bool `koanf:"enabled"` + FullTextFields []string `koanf:"fullTextFields"` + Aliases map[string]FieldAlias `koanf:"aliases"` + IgnoredFields map[string]bool `koanf:"ignoredFields"` + PseudoUniqueField string `koanf:"pseudoUniqueField"` } func (c IndexConfiguration) Matches(indexName string) bool { From 5333ef1f4f001247b00d91237a474ad441b92cea Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Fri, 17 May 2024 13:08:02 +0200 Subject: [PATCH 5/8] rename to timestamp field --- quesma/clickhouse/clickhouse.go | 13 ----------- quesma/clickhouse/table.go | 5 ++++ quesma/queryparser/query_parser.go | 19 +++++++-------- quesma/queryparser/query_translator.go | 32 +++++++++++++------------- quesma/quesma/config/config.go | 12 +++++----- 5 files changed, 35 insertions(+), 46 deletions(-) diff --git a/quesma/clickhouse/clickhouse.go b/quesma/clickhouse/clickhouse.go index edb1802b5..29fedf9a8 100644 --- a/quesma/clickhouse/clickhouse.go +++ b/quesma/clickhouse/clickhouse.go @@ -114,19 +114,6 @@ func (lm *LogManager) Close() { _ = lm.chDb.Close() } -func (lm *LogManager) GetPseudoUniqueField(tableName string) (string, error) { - - if v, ok := lm.cfg.IndexConfig[tableName]; ok { - if v.PseudoUniqueField != "" { - return v.PseudoUniqueField, nil - } else { - return "", fmt.Errorf("no pseudo unique field configured for table %s", tableName) - } - } else { - return "", fmt.Errorf("no index configuration for table %s", tableName) - } -} - func (lm *LogManager) matchIndex(ctx context.Context, indexNamePattern, indexName string) bool { r, err := regexp.Compile("^" + strings.Replace(indexNamePattern, "*", ".*", -1) + "$") if err != nil { diff --git a/quesma/clickhouse/table.go b/quesma/clickhouse/table.go index 87f9ff22e..61eb42f6a 100644 --- a/quesma/clickhouse/table.go +++ b/quesma/clickhouse/table.go @@ -22,6 +22,7 @@ type Table struct { aliases map[string]string Comment string // this human-readable comment CreateTableQuery string + TimestampColumn *string } func (t *Table) GetFields() []string { @@ -166,6 +167,10 @@ func (t *Table) applyIndexConfig(configuration config.QuesmaConfiguration) { t.aliases[alias.SourceFieldName] = alias.TargetFieldName } } + if v, ok := configuration.IndexConfig[t.Name]; ok { + t.TimestampColumn = v.TimestampField + } + } func (t *Table) ResolveField(ctx context.Context, fieldName string) (field string) { diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index 65208eac2..9c135b473 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -297,27 +297,24 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) SimpleQuery { } logger.Warn().Msgf("unsupported id query executed, requested ids of [%s]", strings.Join(ids, "','")) - // variant when field is a datetime64 - //statement := fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s) ", strconv.Quote("timestamp"), ids) - - pseudoUniqueFieldName, err := cw.ClickhouseLM.GetPseudoUniqueField(cw.Table.Name) - if err != nil && pseudoUniqueFieldName != "" { + timestampColumnName, err := cw.GetTimestampFieldName() + if err != nil { + logger.Warn().Msgf("id query executed, but not timestamp field configured") return newSimpleQuery(NewSimpleStatement(""), true) } + var statement string - if v, ok := cw.Table.Cols[pseudoUniqueFieldName]; !ok { + if v, ok := cw.Table.Cols[timestampColumnName]; !ok { switch v.Type.String() { case clickhouse.DateTime64.String(): - statement = fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s) ", strconv.Quote("timestamp"), ids) + statement = fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s) ", strconv.Quote(timestampColumnName), ids) case clickhouse.DateTime.String(): - statement = fmt.Sprintf("toUnixTimestamp(%s) *1000 IN (%s) ", strconv.Quote("timestamp"), ids) + statement = fmt.Sprintf("toUnixTimestamp(%s) *1000 IN (%s) ", strconv.Quote(timestampColumnName), ids) default: + logger.Warn().Msgf("timestamp field of unsupported type %s", v.Type.String()) return newSimpleQuery(NewSimpleStatement(""), true) } } - - //statement := fmt.Sprintf("toUnixTimestamp(%s) *1000 IN (%s) ", strconv.Quote("timestamp"), ids) - return newSimpleQuery(NewSimpleStatement(statement), true) } diff --git a/quesma/queryparser/query_translator.go b/quesma/queryparser/query_translator.go index 0175bd463..84d7abffd 100644 --- a/quesma/queryparser/query_translator.go +++ b/quesma/queryparser/query_translator.go @@ -49,6 +49,14 @@ func (cw *ClickhouseQueryTranslator) AddTokenToHighlight(token any) { } +func (cw *ClickhouseQueryTranslator) GetTimestampFieldName() (string, error) { + if cw.Table.TimestampColumn != nil { + return *cw.Table.TimestampColumn, nil + } else { + return "", fmt.Errorf("no pseudo unique field configured for table %s", cw.Table.Name) + } +} + func (cw *ClickhouseQueryTranslator) ClearTokensToHighlight() { cw.tokensToHighlight = []string{} } @@ -326,28 +334,19 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseFacets(ResultSet []model. } func (cw *ClickhouseQueryTranslator) computeIdFromDocument(doc model.SearchHit) (string, error) { - var pseudoUniqueFieldName, pseudoUniqueId string + var pseudoUniqueId string - if v, err := cw.ClickhouseLM.GetPseudoUniqueField(doc.Index); err != nil { - return "", fmt.Errorf("missing pseudo unique field for index %s", doc.Index) - } else { - pseudoUniqueFieldName = v + tsFieldName, err := cw.GetTimestampFieldName() + if err != nil { + return "", err } - if v, ok := doc.Fields[pseudoUniqueFieldName]; ok { + if v, ok := doc.Fields[tsFieldName]; ok { if vv, okk := v[0].(time.Time); okk { pseudoUniqueId = strconv.Itoa(int(vv.UnixMilli())) } else { - fmt.Sprintf("????? FAILed timestamp type assert : [%v]", v) + return "", fmt.Errorf("timestamp field is not a time.Time") } - } else { - logger.Error().Msgf("NO @timestamp FIELD [%v]", v) - return "", fmt.Errorf("missing @timestamp field") - } - //concat := email + fullName - //logger.Info().Msgf("concat: [%v]", concat) - //hash := sha1.Sum([]byte(concat)) - //hashEncodedToString := hex.EncodeToString(hash[:]) - //logger.Info().Msgf("hash: [%s]", hashEncodedToString) + } return pseudoUniqueId, nil } @@ -372,6 +371,7 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.Qu // Set the IDs for i, hit := range hits { if id, err := cw.computeIdFromDocument(hit); err != nil { + logger.Warn().Msgf("failed to compute ID for document: %v", err) hits[i].ID = strconv.Itoa(i + 1) } else { hits[i].ID = id diff --git a/quesma/quesma/config/config.go b/quesma/quesma/config/config.go index 983b0ceeb..4d4b8d7ad 100644 --- a/quesma/quesma/config/config.go +++ b/quesma/quesma/config/config.go @@ -80,12 +80,12 @@ type FieldAlias struct { } type IndexConfiguration struct { - Name string `koanf:"name"` - Enabled bool `koanf:"enabled"` - FullTextFields []string `koanf:"fullTextFields"` - Aliases map[string]FieldAlias `koanf:"aliases"` - IgnoredFields map[string]bool `koanf:"ignoredFields"` - PseudoUniqueField string `koanf:"pseudoUniqueField"` + Name string `koanf:"name"` + Enabled bool `koanf:"enabled"` + FullTextFields []string `koanf:"fullTextFields"` + Aliases map[string]FieldAlias `koanf:"aliases"` + IgnoredFields map[string]bool `koanf:"ignoredFields"` + TimestampField *string `koanf:"timestampField"` } func (c IndexConfiguration) Matches(indexName string) bool { From 1347fa9a3bf0f394dd4f30807e5d24495c4d4fd2 Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Fri, 17 May 2024 13:54:13 +0200 Subject: [PATCH 6/8] cleanup --- quesma/queryparser/query_translator.go | 35 ++++++++------------------ 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/quesma/queryparser/query_translator.go b/quesma/queryparser/query_translator.go index 84d7abffd..8d88e0d5a 100644 --- a/quesma/queryparser/query_translator.go +++ b/quesma/queryparser/query_translator.go @@ -97,15 +97,7 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseNormal(ResultSet []model. Highlight: make(map[string][]string), } cw.highlightHit(&hits[i], highlighter, ResultSet[i]) - } - - // Set the IDs - for i, hit := range hits { - if id, err := cw.computeIdFromDocument(hit); err != nil { - hits[i].ID = strconv.Itoa(i + 1) - } else { - hits[i].ID = id - } + hits[i].ID = cw.computeIdForDocument(hits[i], strconv.Itoa(i+1)) } return &model.SearchResp{ @@ -333,21 +325,23 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseFacets(ResultSet []model. } } -func (cw *ClickhouseQueryTranslator) computeIdFromDocument(doc model.SearchHit) (string, error) { - var pseudoUniqueId string - +func (cw *ClickhouseQueryTranslator) computeIdForDocument(doc model.SearchHit, defaultID string) string { tsFieldName, err := cw.GetTimestampFieldName() if err != nil { - return "", err + return defaultID } + + var pseudoUniqueId string + if v, ok := doc.Fields[tsFieldName]; ok { if vv, okk := v[0].(time.Time); okk { pseudoUniqueId = strconv.Itoa(int(vv.UnixMilli())) } else { - return "", fmt.Errorf("timestamp field is not a time.Time") + logger.WarnWithCtx(cw.Ctx).Msgf("failed to convert timestamp field [%v] to time.Time", v[0]) + return defaultID } } - return pseudoUniqueId, nil + return pseudoUniqueId } func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.QueryResultRow, typ model.SearchQueryType, highlighter model.Highlighter) *model.SearchResp { @@ -366,16 +360,7 @@ func (cw *ClickhouseQueryTranslator) makeSearchResponseList(ResultSet []model.Qu } } cw.highlightHit(&hits[i], highlighter, ResultSet[i]) - } - - // Set the IDs - for i, hit := range hits { - if id, err := cw.computeIdFromDocument(hit); err != nil { - logger.Warn().Msgf("failed to compute ID for document: %v", err) - hits[i].ID = strconv.Itoa(i + 1) - } else { - hits[i].ID = id - } + hits[i].ID = cw.computeIdForDocument(hits[i], strconv.Itoa(i+1)) } return &model.SearchResp{ From e1a5aee1884067a7b97849c597fe45a803d607ba Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Fri, 17 May 2024 13:57:20 +0200 Subject: [PATCH 7/8] Add example config --- quesma/config.yaml.template | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/quesma/config.yaml.template b/quesma/config.yaml.template index f03d63e6a..71d9d05e2 100644 --- a/quesma/config.yaml.template +++ b/quesma/config.yaml.template @@ -19,7 +19,14 @@ logging: remoteUrl: "https://api.quesma.com/phone-home" disableFileLogging: false indexes: + logs: + timestampField: "reqTimeSec" + enabled: true + siem: + timestampField: "timestamp" + enabled: true kibana_sample_data_ecommerce: + timestampField: "@timestamp" enabled: true kibana_sample_data_flights: enabled: true From 0483bfcbb3b6ea5fed58543f61fab68dc1b0a008 Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Fri, 17 May 2024 15:06:00 +0200 Subject: [PATCH 8/8] sprinkle some magic! --- quesma/queryparser/query_parser.go | 15 ++++++++++++++- quesma/queryparser/query_translator.go | 5 ++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go index 9c135b473..cab7fa2d4 100644 --- a/quesma/queryparser/query_parser.go +++ b/quesma/queryparser/query_parser.go @@ -303,8 +303,21 @@ func (cw *ClickhouseQueryTranslator) parseIds(queryMap QueryMap) SimpleQuery { return newSimpleQuery(NewSimpleStatement(""), true) } + // when our generated ID appears in query looks like this: `18f7b8800b8q1` + // therefore we need to strip the hex part (before `q`) and convert it to decimal + // then we can query at DB level + for i, id := range ids { + idInHex := strings.Split(id, "q")[0] + if decimalValue, err := strconv.ParseUint(idInHex, 16, 64); err != nil { + logger.Error().Msgf("error parsing document id %s: %v", id, err) + return newSimpleQuery(NewSimpleStatement(""), true) + } else { + ids[i] = fmt.Sprintf("%d", decimalValue) + } + } + var statement string - if v, ok := cw.Table.Cols[timestampColumnName]; !ok { + if v, ok := cw.Table.Cols[timestampColumnName]; ok { switch v.Type.String() { case clickhouse.DateTime64.String(): statement = fmt.Sprintf("toUnixTimestamp64Milli(%s) IN (%s) ", strconv.Quote(timestampColumnName), ids) diff --git a/quesma/queryparser/query_translator.go b/quesma/queryparser/query_translator.go index 8d88e0d5a..35be308af 100644 --- a/quesma/queryparser/query_translator.go +++ b/quesma/queryparser/query_translator.go @@ -335,7 +335,10 @@ func (cw *ClickhouseQueryTranslator) computeIdForDocument(doc model.SearchHit, d if v, ok := doc.Fields[tsFieldName]; ok { if vv, okk := v[0].(time.Time); okk { - pseudoUniqueId = strconv.Itoa(int(vv.UnixMilli())) + // At database level we only compare timestamps with millisecond precision + // However in search results we append `q` plus generated digits (we use q because it's not in hex) + // so that kibana can iterate over documents in UI + pseudoUniqueId = fmt.Sprintf("%xq%s", int(vv.UnixMilli()), defaultID) } else { logger.WarnWithCtx(cw.Ctx).Msgf("failed to convert timestamp field [%v] to time.Time", v[0]) return defaultID