Skip to content

Commit

Permalink
Small fixes top_metrics (#188)
Browse files Browse the repository at this point in the history
3 small fixes:
* `GROUP BY field1, field2` instead of `GROUP BY field1field2` 😄 
* `MAX` -> `maxOrNul`, like in all other places, just to be sure
* instead of `fieldname: value`, we returned `"windowed_fieldname":
value`. Fixed by unquoting if there are quotes (should always be, but
again, just to be sure)

 
Also added a type check in `date_histogram`. Should be unnecessary if
code is correct, but we used to panic when we had bugs before my fixes.


Fixes e.g. `Last value` view
Seems to be working, checked with Clickhouse, see `5` below

![ClickHouse_Query__SELECT_order_date__customer_id_FROM_kibana_sample_data_ecommerce_WHERE_order_date____2024-05-10_00_01_02_000__AND_order_date____2024-05-09_00_01_02_000__ORDER_BY_order_date_desc](https://github.com/QuesmaOrg/quesma/assets/5407146/40456bf7-8274-4986-83ee-bca920fa0ac3)
![Screenshot 2024-05-22 at 12 44
56](https://github.com/QuesmaOrg/quesma/assets/5407146/878236e2-4ad9-41f9-9445-ee7be6233fa5)
  • Loading branch information
trzysiek authored May 22, 2024
1 parent 880e84f commit 779f1a3
Show file tree
Hide file tree
Showing 6 changed files with 244 additions and 13 deletions.
9 changes: 7 additions & 2 deletions quesma/model/bucket_aggregations/date_histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,16 @@ func (query DateHistogram) TranslateSqlResponseToJson(rows []model.QueryResultRo
var response []model.JsonMap
for _, row := range rows {
intervalInMilliseconds := query.IntervalAsDuration().Milliseconds()
key := row.Cols[level-1].Value.(int64) * intervalInMilliseconds
var key int64
if keyValue, ok := row.Cols[len(row.Cols)-2].Value.(int64); ok { // used to be [level-1], but because some columns are duplicated, it doesn't work in 100% cases now
key = keyValue * intervalInMilliseconds
} else {
logger.WarnWithCtx(query.ctx).Msgf("unexpected type of key value: %T, %+v, Should be int64", row.Cols[len(row.Cols)-2].Value, row.Cols[len(row.Cols)-2].Value)
}
intervalStart := time.UnixMilli(key).UTC().Format("2006-01-02T15:04:05.000")
response = append(response, model.JsonMap{
"key": key,
"doc_count": row.Cols[level].Value,
"doc_count": row.LastColValue(), // used to be [level], but because some columns are duplicated, it doesn't work in 100% cases now
"key_as_string": intervalStart,
})
}
Expand Down
11 changes: 9 additions & 2 deletions quesma/model/metrics_aggregations/top_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"mitmproxy/quesma/logger"
"mitmproxy/quesma/model"
"strconv"
"strings"
)

Expand Down Expand Up @@ -34,8 +35,14 @@ func (query TopMetrics) TranslateSqlResponseToJson(rows []model.QueryResultRow,
valuesForMetrics := row.Cols[:lastIndex]
sortVal := row.Cols[lastIndex].Value
for _, col := range valuesForMetrics[level:] {
colName, _ := strings.CutPrefix(col.ColName, "windowed_")
metrics[colName] = col.ExtractValue(query.ctx) // CHANGE IT AFTER PART 2 MERGE!! ENTER REAL CONTEXT FROM THE query
var withoutQuotes string
if unquoted, err := strconv.Unquote(col.ColName); err == nil {
withoutQuotes = unquoted
} else {
withoutQuotes = col.ColName
}
colName, _ := strings.CutPrefix(withoutQuotes, `windowed_`)
metrics[colName] = col.ExtractValue(query.ctx)
}
elem := model.JsonMap{
"sort": []interface{}{sortVal},
Expand Down
6 changes: 3 additions & 3 deletions quesma/queryparser/aggregation_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,17 +132,17 @@ func (b *aggrQueryBuilder) buildMetricsAggregation(metricsAggr metricsAggregatio
var ordFunc string
switch metricsAggr.Order {
case "asc":
ordFunc = `MAX`
ordFunc = `maxOrNull`
case "desc":
ordFunc = `MIN`
ordFunc = `minOrNull`
}
var topSelectFields []string
innerFields := append(metricsAggr.FieldNames, metricsAggr.SortBy)
for _, field := range innerFields {
topSelectFields = append(topSelectFields, fmt.Sprintf(`%s("%s") AS "windowed_%s"`, ordFunc, field, field))
}
query.NonSchemaFields = append(query.NonSchemaFields, topSelectFields...)
partitionBy := strings.Join(b.Query.GroupByFields, "")
partitionBy := strings.Join(b.Query.GroupByFields, ", ")
fieldsAsString := strings.Join(quoteArray(innerFields), ", ") // need those fields in the inner clause
query.FromClause = fmt.Sprintf(
"(SELECT %s, ROW_NUMBER() OVER (PARTITION BY %s ORDER BY %s %s) AS %s FROM %s WHERE %s)",
Expand Down
2 changes: 1 addition & 1 deletion quesma/queryparser/aggregation_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ func Test2AggregationParserExternalTestcases(t *testing.T) {
if strings.HasPrefix(test.TestName, "dashboard-1") {
t.Skip("Those 2 tests have nested histograms with min_doc_count=0. I'll add support for that in next PR, already most of work done")
}
if i == 32 {
if test.TestName == "Range with subaggregations. Reproduce: Visualize -> Pie chart -> Aggregation: Top Hit, Buckets: Aggregation: Range" {
t.Skip("Need a (most likely) small fix to top_hits.")
}
if i == 20 {
Expand Down
227 changes: 223 additions & 4 deletions quesma/testdata/aggregation_requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1914,8 +1914,8 @@ var AggregationTests = []AggregationTestCase{
[]string{
`SELECT count() FROM ` + QuotedTableName + ` WHERE "order_date">=parseDateTime64BestEffort('2024-02-06T09:59:57.034Z') ` +
`AND "order_date"<=parseDateTime64BestEffort('2024-02-13T09:59:57.034Z')`,
"SELECT toInt64(toUnixTimestamp64Milli(`order_date`)/43200000), " + `MAX("order_date") AS "windowed_order_date", ` +
`MAX("order_date") AS "windowed_order_date" FROM ` +
"SELECT toInt64(toUnixTimestamp64Milli(`order_date`)/43200000), " + `maxOrNull("order_date") AS "windowed_order_date", ` +
`maxOrNull("order_date") AS "windowed_order_date" FROM ` +
`(SELECT "order_date", "order_date", ROW_NUMBER() OVER ` +
"(PARTITION BY toInt64(toUnixTimestamp64Milli(`order_date`)/43200000) " +
`ORDER BY "order_date" asc) AS row_number FROM ` + QuotedTableName + " " +
Expand All @@ -1924,8 +1924,8 @@ var AggregationTests = []AggregationTestCase{
`WHERE ("order_date">=parseDateTime64BestEffort('2024-02-06T09:59:57.034Z') AND ` +
`"order_date"<=parseDateTime64BestEffort('2024-02-13T09:59:57.034Z')) AND "taxful_total_price" > '250' AND row_number <= 10 ` +
"GROUP BY (toInt64(toUnixTimestamp64Milli(`order_date`)/43200000)) ORDER BY (toInt64(toUnixTimestamp64Milli(`order_date`)/43200000))",
"SELECT toInt64(toUnixTimestamp64Milli(`order_date`)/43200000), " + `MAX("taxful_total_price") AS "windowed_taxful_total_price", ` +
`MAX("order_date") AS "windowed_order_date" FROM ` +
"SELECT toInt64(toUnixTimestamp64Milli(`order_date`)/43200000), " + `maxOrNull("taxful_total_price") AS "windowed_taxful_total_price", ` +
`maxOrNull("order_date") AS "windowed_order_date" FROM ` +
`(SELECT "taxful_total_price", "order_date", ROW_NUMBER() OVER ` +
"(PARTITION BY toInt64(toUnixTimestamp64Milli(`order_date`)/43200000) " +
`ORDER BY "order_date" asc) AS row_number FROM ` + QuotedTableName + " " +
Expand Down Expand Up @@ -4957,4 +4957,223 @@ var AggregationTests = []AggregationTestCase{
// terms + histogram
// histogram + terms
// everything with some avg, cardinality, etc
{ // [31]
TestName: "Kibana Visualize -> Last Value. Used to panic",
QueryRequestJson: `
{
"_source": {
"excludes": []
},
"aggs": {
"0": {
"aggs": {
"1-bucket": {
"aggs": {
"1-metric": {
"top_metrics": {
"metrics": {
"field": "message"
},
"size": 1,
"sort": {
"order_date": "desc"
}
}
}
},
"filter": {
"bool": {
"filter": [
{
"bool": {
"minimum_should_match": 1,
"should": [
{
"exists": {
"field": "message"
}
}
]
}
}
],
"must": [],
"must_not": [],
"should": []
}
}
}
},
"date_histogram": {
"calendar_interval": "1d",
"field": "@timestamp",
"min_doc_count": 1,
"time_zone": "Europe/Warsaw"
}
}
},
"fields": [
{
"field": "@timestamp",
"format": "date_time"
},
{
"field": "order_date",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [],
"must": [],
"must_not": [],
"should": []
}
},
"runtime_mappings": {},
"script_fields": {},
"size": 0,
"stored_fields": [
"*"
],
"track_total_hits": true
}`,
ExpectedResponse: `
{
"completion_status": 200,
"completion_time_in_millis": 0,
"expiration_time_in_millis": 0,
"id": "quesma_async_search_id_17",
"is_partial": false,
"is_running": false,
"response": {
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"aggregations": {
"0": {
"buckets": [
{
"1-bucket": {
"1-metric": {
"top": [
{
"metrics": {
"message": 5
},
"sort": [
"2024-05-09T23:52:48Z"
]
}
]
},
"doc_count": 146
},
"doc_count": 146,
"key": 1715212800000,
"key_as_string": "2024-05-09T00:00:00.000"
},
{
"1-bucket": {
"1-metric": {
"top": [
{
"metrics": {
"message": 30
},
"sort": [
"2024-05-22T10:20:38Z"
]
}
]
},
"doc_count": 58
},
"doc_count": 58,
"key": 1716336000000,
"key_as_string": "2024-05-22T00:00:00.000"
}
]
}
},
"hits": {
"hits": [],
"max_score": null,
"total": {
"relation": "eq",
"value": 1974
}
},
"timed_out": false,
"took": 0
},
"start_time_in_millis": 0
}`,
ExpectedResults: [][]model.QueryResultRow{
{{Cols: []model.QueryResultCol{model.NewQueryResultCol("hits", uint64(2167))}}},
{
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000)", int64(1715212800000/86400000)),
model.NewQueryResultCol(`"windowed_message"`, 5),
model.NewQueryResultCol(`minOrNull("order_date")`, "2024-05-09T23:52:48Z"),
}},
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000)", int64(1716336000000/86400000)),
model.NewQueryResultCol(`windowed_message`, 30),
model.NewQueryResultCol(`minOrNull("order_date")`, "2024-05-22T10:20:38Z"),
}},
},
{
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000)", int64(1715212800000/86400000)),
model.NewQueryResultCol(`count()`, 146),
}},
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000)", int64(1716336000000/86400000)),
model.NewQueryResultCol(`count()`, 58),
}},
},
{
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000)", int64(1715212800000/86400000)),
model.NewQueryResultCol(`count()`, 146),
}},
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000)", int64(1716336000000/86400000)),
model.NewQueryResultCol(`count()`, 58),
}},
},
},
ExpectedSQLs: []string{
`SELECT count() ` +
`FROM ` + QuotedTableName,
"SELECT toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000), " +
`minOrNull("message") AS "windowed_message", ` +
`minOrNull("order_date") AS "windowed_order_date" ` +
`FROM (SELECT "message", "order_date", ROW_NUMBER() OVER ` +
"(PARTITION BY toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000) " +
`ORDER BY "order_date" desc) ` +
`AS row_number ` +
`FROM ` + QuotedTableName + ` ` +
`WHERE "message" IS NOT NULL) ` +
`WHERE "message" IS NOT NULL ` +
`AND row_number <= 1 ` +
"GROUP BY (toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000)) " +
"ORDER BY (toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000))",
"SELECT toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000), " +
"count() " +
`FROM ` + QuotedTableName + ` ` +
`WHERE "message" IS NOT NULL ` +
"GROUP BY (toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000)) " +
"ORDER BY (toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000))",
"SELECT toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000), " +
"count() " +
`FROM ` + QuotedTableName + ` ` +
"GROUP BY (toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000)) " +
"ORDER BY (toInt64(toUnixTimestamp64Milli(`@timestamp`)/86400000))",
},
},
}
2 changes: 1 addition & 1 deletion quesma/testdata/requests_with_special_characters.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ var AggregationTestsWithSpecialCharactersInFieldNames = []AggregationTestCase{
ExpectedResults: [][]model.QueryResultRow{}, // checking only the SQLs is enough for now
ExpectedSQLs: []string{
`SELECT count() FROM ` + QuotedTableName + ` WHERE "message\$\*\%\:\;" IS NOT NULL`,
`SELECT toInt64(toUnixTimestamp64Milli(` + "`-@timestamp`" + `)/43200000), MIN("-@bytes") AS "windowed_-@bytes", MIN("-@timestamp") AS "windowed_-@timestamp" FROM (SELECT "-@bytes", "-@timestamp", ROW_NUMBER() OVER (PARTITION BY toInt64(toUnixTimestamp64Milli(` + "`-@timestamp`)/43200000) ORDER BY " + `"-@timestamp" desc) AS row_number FROM ` + QuotedTableName + ` WHERE "message\$\*\%\:\;" IS NOT NULL) WHERE "message\$\*\%\:\;" IS NOT NULL AND row_number <= 1 GROUP BY (toInt64(toUnixTimestamp64Milli(` + "`-@timestamp`)/43200000)) ORDER BY (toInt64(toUnixTimestamp64Milli(`-@timestamp`)/43200000))",
`SELECT toInt64(toUnixTimestamp64Milli(` + "`-@timestamp`" + `)/43200000), minOrNull("-@bytes") AS "windowed_-@bytes", minOrNull("-@timestamp") AS "windowed_-@timestamp" FROM (SELECT "-@bytes", "-@timestamp", ROW_NUMBER() OVER (PARTITION BY toInt64(toUnixTimestamp64Milli(` + "`-@timestamp`)/43200000) ORDER BY " + `"-@timestamp" desc) AS row_number FROM ` + QuotedTableName + ` WHERE "message\$\*\%\:\;" IS NOT NULL) WHERE "message\$\*\%\:\;" IS NOT NULL AND row_number <= 1 GROUP BY (toInt64(toUnixTimestamp64Milli(` + "`-@timestamp`)/43200000)) ORDER BY (toInt64(toUnixTimestamp64Milli(`-@timestamp`)/43200000))",
"SELECT toInt64(toUnixTimestamp64Milli(`-@timestamp`)/43200000), count() FROM " + QuotedTableName + ` WHERE "message\$\*\%\:\;\" IS NOT NULL GROUP BY (toInt64(toUnixTimestamp64Milli(` + "`-@timestamp`)/43200000)) ORDER BY (toInt64(toUnixTimestamp64Milli(`-@timestamp`)/43200000))",
},
},
Expand Down

0 comments on commit 779f1a3

Please sign in to comment.