Skip to content

Commit

Permalink
Small fix range aggregation (#26)
Browse files Browse the repository at this point in the history
Fixed panicing range aggregation by adding distinction between `keyed`
or not in 1 place. Even though we already have very similar tests in our
suite, surprisingly they didn't catch this distinction, when I added
support for subaggregations for `range` a couple of days ago. They also
pass now, so I'm quite sure it doesn't create some new issues while
fixing others.

Before:
<img width="1728" alt="Screenshot 2024-05-03 at 08 08 35"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/4da19aa6-0a7f-4fd1-9679-706552a6cbcf">
<img width="1728" alt="Screenshot 2024-05-03 at 08 09 20"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/45a167c1-36a8-49c3-a20e-3778c37d6fb0">
After:
<img width="1728" alt="Screenshot 2024-05-03 at 08 07 57"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/b5fd9838-3264-4c91-902d-ad667072556c">
  • Loading branch information
trzysiek authored May 3, 2024
1 parent b63e2ab commit 015bf53
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 5 deletions.
10 changes: 9 additions & 1 deletion quesma/model/metrics_aggregations/quantile.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package metrics_aggregations

import (
"context"
"math"
"mitmproxy/quesma/logger"
"mitmproxy/quesma/model"
"strconv"
Expand Down Expand Up @@ -47,7 +48,14 @@ func (query Quantile) TranslateSqlResponseToJson(rows []model.QueryResultRow, le
percentileName += ".0"
}

valueMap[percentileName] = percentile[0]
if len(percentile) == 0 {
logger.WarnWithCtx(query.ctx).Msgf("empty percentile values for %s", percentileName)
}
if len(percentile) == 0 || math.IsNaN(percentile[0]) {
valueMap[percentileName] = nil
} else {
valueMap[percentileName] = percentile[0]
}
}
}

Expand Down
2 changes: 2 additions & 0 deletions quesma/queryparser/aggregation_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,9 @@ func Test2AggregationParserExternalTestcases(t *testing.T) {
// Let's leave those commented debugs for now, they'll be useful in next PRs
for j, aggregation := range aggregations {
// fmt.Println("--- Aggregation "+strconv.Itoa(j)+":", aggregation)
// fmt.Println()
// fmt.Println("--- SQL string ", aggregation.String())
// fmt.Println()
// fmt.Println("--- Group by: ", aggregation.GroupByFields)
if test.ExpectedSQLs[j] != "TODO" {
util.AssertSqlEqual(t, test.ExpectedSQLs[j], aggregation.String())
Expand Down
12 changes: 8 additions & 4 deletions quesma/queryparser/range_aggregation.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,14 @@ func (cw *ClickhouseQueryTranslator) processRangeAggregation(currentAggr *aggrQu
interval.ToSQLSelectQuery(Range.QuotedFieldName),
)
}
if len(currentAggr.Aggregators) > 0 {
currentAggr.Aggregators[len(currentAggr.Aggregators)-1].Empty = false
} else {
logger.ErrorWithCtx(cw.Ctx).Msg("no aggregators in currentAggr")
if !Range.Keyed {
// there's a difference in output structure whether the range is keyed or not
// it can be easily modeled in our code via setting last aggregator's .Empty to true/false
if len(currentAggr.Aggregators) > 0 {
currentAggr.Aggregators[len(currentAggr.Aggregators)-1].Empty = false
} else {
logger.ErrorWithCtx(cw.Ctx).Msg("no aggregators in currentAggr")
}
}
*aggregationsAccumulator = append(*aggregationsAccumulator, currentAggr.buildBucketAggregation(metadata))
currentAggr.NonSchemaFields = currentAggr.NonSchemaFields[:len(currentAggr.NonSchemaFields)-len(Range.Intervals)]
Expand Down
147 changes: 147 additions & 0 deletions quesma/testdata/opensearch-visualize/aggregation_requests.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package opensearch_visualize

import (
"math"
"mitmproxy/quesma/model"
"mitmproxy/quesma/testdata"
)
Expand Down Expand Up @@ -473,4 +474,150 @@ var AggregationTests = []testdata.AggregationTestCase{
`WHERE "epoch_time">='2024-04-28T14:34:22.674Z' AND "epoch_time"<='2024-04-28T14:49:22.674Z' `,
},
},
{ // [3]
TestName: `Range with subaggregations. Reproduce: Visualize -> Heat Map -> Metrics: Median, Buckets: X-Asis Range`,
QueryRequestJson: `
{
"_source": {
"excludes": []
},
"aggs": {
"2": {
"aggs": {
"1": {
"percentiles": {
"field": "properties::entry_time",
"percents": [
50
]
}
}
},
"range": {
"field": "properties::exoestimation_connection_speedinkbps",
"keyed": true,
"ranges": [
{
"from": 0,
"to": 1000
},
{
"from": 1000,
"to": 2000
}
]
}
}
},
"docvalue_fields": [
{
"field": "@timestamp",
"format": "date_time"
},
{
"field": "ts_time_druid",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"epoch_time": {
"format": "strict_date_optional_time",
"gte": "2024-04-18T04:40:12.252Z",
"lte": "2024-05-03T04:40:12.252Z"
}
}
}
],
"must": [
{
"match_all": {}
}
],
"must_not": [],
"should": []
}
},
"script_fields": {},
"size": 0,
"stored_fields": [
"*"
]
}`,
ExpectedResponse: `
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"aggregations": {
"2": {
"buckets": {
"0.0-1000.0": {
"1": {
"values": {
"50.0": 46.9921875
}
},
"doc_count": 1,
"from": 0.0,
"to": 1000.0
},
"1000.0-2000.0": {
"1": {
"values": {
"50.0": null
}
},
"doc_count": 2,
"from": 1000.0,
"to": 2000.0
}
}
}
},
"hits": {
"hits": [],
"max_score": null,
"total": {
"relation": "eq",
"value": 4
}
},
"timed_out": false,
"took": 95
}`,
ExpectedResults: [][]model.QueryResultRow{
{{Cols: []model.QueryResultCol{model.NewQueryResultCol("hits", uint64(4))}}},
{{Cols: []model.QueryResultCol{model.NewQueryResultCol("quantile_50", []float64{46.9921875})}}},
{{Cols: []model.QueryResultCol{model.NewQueryResultCol("quantile_50", []float64{math.NaN()})}}},
{{Cols: []model.QueryResultCol{
model.NewQueryResultCol("doc_count", 1),
model.NewQueryResultCol("doc_count", 2),
model.NewQueryResultCol("doc_count", 4),
}}},
},
ExpectedSQLs: []string{
`SELECT count() FROM ` + testdata.QuotedTableName + ` ` +
`WHERE "epoch_time">='2024-04-18T04:40:12.252Z' AND "epoch_time"<='2024-05-03T04:40:12.252Z' `,
"SELECT quantiles(0.500000)(`properties::entry_time`) AS `quantile_50` " +
`FROM ` + testdata.QuotedTableName + ` ` +
`WHERE ("epoch_time">='2024-04-18T04:40:12.252Z' AND "epoch_time"<='2024-05-03T04:40:12.252Z') ` +
`AND "properties::exoestimation_connection_speedinkbps">=0 AND "properties::exoestimation_connection_speedinkbps"<1000 `,
"SELECT quantiles(0.500000)(`properties::entry_time`) AS `quantile_50` " +
`FROM ` + testdata.QuotedTableName + ` ` +
`WHERE ("epoch_time">='2024-04-18T04:40:12.252Z' AND "epoch_time"<='2024-05-03T04:40:12.252Z') ` +
`AND "properties::exoestimation_connection_speedinkbps">=1000 AND "properties::exoestimation_connection_speedinkbps"<2000 `,
`SELECT count(if("properties::exoestimation_connection_speedinkbps">=0 AND "properties::exoestimation_connection_speedinkbps"<1000, 1, NULL)), ` +
`count(if("properties::exoestimation_connection_speedinkbps">=1000 AND "properties::exoestimation_connection_speedinkbps"<2000, 1, NULL)), ` +
`count() ` +
`FROM ` + testdata.QuotedTableName + ` ` +
`WHERE "epoch_time">='2024-04-18T04:40:12.252Z' AND "epoch_time"<='2024-05-03T04:40:12.252Z' `,
},
},
}

0 comments on commit 015bf53

Please sign in to comment.