Skip to content

Commit

Permalink
bucket_script (pipeline aggr) enhancements (#923)
Browse files Browse the repository at this point in the history
Before `bucket_script` was only handled in one simplest case (which
happened surprisingly often) - when it meant just a simple count.
Now we accept the script to be based on more than one parent aggregation
+ it works for 1 particular example of a script.

TODO: After implementing some basic arithmetic parsing, it should work
in all cases.
  • Loading branch information
trzysiek authored Oct 29, 2024
1 parent ca8ec4a commit 1f434d4
Show file tree
Hide file tree
Showing 9 changed files with 460 additions and 29 deletions.
75 changes: 58 additions & 17 deletions quesma/model/pipeline_aggregations/bucket_script.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,85 @@ package pipeline_aggregations

import (
"context"
"fmt"
"quesma/logger"
"quesma/model"
"quesma/util"
"strings"
)

type BucketScript struct {
ctx context.Context
*PipelineAggregation
script string
}

func NewBucketScript(ctx context.Context) BucketScript {
return BucketScript{ctx: ctx}
func NewBucketScript(ctx context.Context, script string) BucketScript {
return BucketScript{script: script, PipelineAggregation: newPipelineAggregation(ctx, "_count")}
}

func (query BucketScript) AggregationType() model.AggregationType {
return model.PipelineMetricsAggregation // not sure
}

func (query BucketScript) TranslateSqlResponseToJson(rows []model.QueryResultRow) model.JsonMap {
if len(rows) == 0 {
logger.WarnWithCtx(query.ctx).Msg("no rows returned for bucket script aggregation")
return model.JsonMap{"value": 0}
}
var response []model.JsonMap
for _, row := range rows {
response = append(response, model.JsonMap{"value": row.Cols[0].Value})
}
return model.JsonMap{
"buckets": response,
const defaultValue = 0.
switch {
case query.script == "params.numerator != null && params.denominator != null && params.denominator != 0 ? params.numerator / params.denominator : 0":
numerator := query.findFilterValue(rows, "numerator")
denominator := query.findFilterValue(rows, "denominator")
if denominator == 0 {
return model.JsonMap{"value": defaultValue}
}
return model.JsonMap{"value": numerator / denominator}
case len(rows) == 1:
for _, row := range rows {
return model.JsonMap{"value": util.ExtractNumeric64(row.LastColValue())}
}
}

logger.WarnWithCtx(query.ctx).Msgf("unexpected result in bucket_script: %s, len(rows): %d. Returning default.", query.String(), len(rows))
return model.JsonMap{"value": defaultValue}
}

func (query BucketScript) CalculateResultWhenMissing(*model.Query, []model.QueryResultRow) []model.QueryResultRow {
return []model.QueryResultRow{}
func (query BucketScript) CalculateResultWhenMissing(parentRows []model.QueryResultRow) []model.QueryResultRow {
resultRows := make([]model.QueryResultRow, 0, len(parentRows))
for _, parentRow := range parentRows {
resultRow := parentRow.Copy()
if len(resultRow.Cols) != 0 {
resultRow.Cols[len(resultRow.Cols)-1].Value = util.ExtractNumeric64(parentRow.LastColValue())
} else {
logger.ErrorWithCtx(query.ctx).Msgf("unexpected empty parent row in bucket_script: %s", query.String())
}
resultRows = append(resultRows, resultRow)
}
return resultRows
}

func (query BucketScript) String() string {
return "bucket script"
return fmt.Sprintf("bucket_script(isCount: %v, parent: %s, pathToParent: %v, parentBucketAggregation: %v, script: %v)",
query.isCount, query.Parent, query.PathToParent, query.parentBucketAggregation, query.script)
}

func (query BucketScript) PipelineAggregationType() model.PipelineAggregationType {
return model.PipelineParentAggregation // not sure, maybe it's sibling.
return model.PipelineParentAggregation // not sure, maybe it's sibling. change hasn't changed the result when running some tests.
}

func (query BucketScript) findFilterValue(rows []model.QueryResultRow, filterName string) float64 {
const defaultValue = 0.0
for _, row := range rows {
for _, col := range row.Cols {
colName := col.ColName
if !strings.HasSuffix(colName, "_col_0") {
continue
}
colName = strings.TrimSuffix(colName, "_col_0")
if strings.HasSuffix(colName, "-"+filterName) {
return float64(util.ExtractInt64(col.Value))
}
}
}

logger.WarnWithCtx(query.ctx).Msgf("could not find filter value for filter: %s, bucket_script: %s, len(rows): %d."+
"Returning default", filterName, query.String(), len(rows))
return defaultValue
}
6 changes: 5 additions & 1 deletion quesma/model/pipeline_aggregations/pipeline_aggregation.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func newPipelineAggregation(ctx context.Context, bucketsPath string) *PipelineAg
const delimiter = ">"
if len(bucketsPath) == 0 {
logger.WarnWithCtx(ctx).Msgf("invalid bucketsPath: %s. Using empty string as parent.", bucketsPath)
return &PipelineAggregation{}
return &PipelineAggregation{isCount: true} // count, as it's the simplest case
}

parent := ""
Expand All @@ -54,6 +54,10 @@ func (p *PipelineAggregation) IsCount() bool {
return p.isCount
}

func (p *PipelineAggregation) GetParentBucketAggregation() model.QueryType {
return p.parentBucketAggregation
}

func (p *PipelineAggregation) SetParentBucketAggregation(parentBucketAggregation model.QueryType) {
p.parentBucketAggregation = parentBucketAggregation
}
Expand Down
1 change: 1 addition & 0 deletions quesma/model/pipeline_query_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ type PipelineQueryType interface {
GetPathToParent() []string
IsCount() bool

GetParentBucketAggregation() QueryType
SetParentBucketAggregation(parentBucketAggregation QueryType)
}
1 change: 1 addition & 0 deletions quesma/queryparser/aggregation_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ func allAggregationTests() []testdata.AggregationTestCase {
add(kibana_visualize.PipelineAggregationTests, "kibana-visualize/pipeline_agg_req")
add(clients.KunkkaTests, "clients/kunkka")
add(clients.OpheliaTests, "clients/ophelia")
add(clients.CloverTests, "clients/clover")

return allTests
}
Expand Down
4 changes: 4 additions & 0 deletions quesma/queryparser/pancake_pipelines.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ func (p pancakePipelinesProcessor) selectPipelineRows(pipeline model.PipelineQue
bucketAggregation *pancakeModelBucketAggregation) (
result []model.QueryResultRow) {

if bucketAggregation == nil {
return rows
}

isCount := pipeline.IsCount()
for _, row := range rows {
newRow := model.QueryResultRow{Index: row.Index}
Expand Down
4 changes: 4 additions & 0 deletions quesma/queryparser/pancake_sql_query_generation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ func TestPancakeQueryGeneration(t *testing.T) {
t.Skip("Need to implement order by top metrics (talk with Jacek, he has an idea)")
}

if test.TestName == "multiple buckets_path(file:clients/clover,nr:1)" {
t.Skip("Unskip after merge of auto_date_histogram")
}

fmt.Println("i:", i, "test:", test.TestName)

jsonp, err := types.ParseJSON(test.QueryRequestJson)
Expand Down
43 changes: 32 additions & 11 deletions quesma/queryparser/pipeline_aggregations.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"quesma/logger"
"quesma/model"
"quesma/model/pipeline_aggregations"
"strings"
)

// CAUTION: maybe "return" everywhere isn't corrent, as maybe there can be multiple pipeline aggregations at one level.
Expand Down Expand Up @@ -168,26 +169,29 @@ func (cw *ClickhouseQueryTranslator) parseBucketScriptBasic(queryMap QueryMap) (
if !ok {
return
}
if bucketsPath != pipeline_aggregations.BucketsPathCount {
if !strings.HasSuffix(bucketsPath, pipeline_aggregations.BucketsPathCount) {
logger.WarnWithCtx(cw.Ctx).Msgf("buckets_path is not '_count', but %s. Skipping this aggregation", bucketsPath)
return
}

// if ["script"]["source"] != "_value", skip the aggregation
scriptRaw, exists := bucketScript["script"]
if !exists {
logger.WarnWithCtx(cw.Ctx).Msg("no script in bucket_script. Skipping this aggregation")
return
}
if script, ok := scriptRaw.(string); ok {
return pipeline_aggregations.NewBucketScript(cw.Ctx, script), true
}

script, ok := scriptRaw.(QueryMap)
if !ok {
logger.WarnWithCtx(cw.Ctx).Msgf("script is not a map, but %T, value: %v. Skipping this aggregation", scriptRaw, scriptRaw)
return
}
if sourceRaw, exists := script["source"]; exists {
if source, ok := sourceRaw.(string); ok {
if source != "_value" {
logger.WarnWithCtx(cw.Ctx).Msgf("source is not '_value', but %s. Skipping this aggregation", source)
if source != "_value" && source != "count * 1" {
logger.WarnWithCtx(cw.Ctx).Msgf("source is not '_value'/'count * 1', but %s. Skipping this aggregation", source)
return
}
} else {
Expand All @@ -200,10 +204,10 @@ func (cw *ClickhouseQueryTranslator) parseBucketScriptBasic(queryMap QueryMap) (
}

// okay, we've checked everything, it's indeed a simple count
return pipeline_aggregations.NewBucketScript(cw.Ctx), true
return pipeline_aggregations.NewBucketScript(cw.Ctx, ""), true
}

func (cw *ClickhouseQueryTranslator) parseBucketsPath(shouldBeQueryMap any, aggregationName string) (bucketsPath string, success bool) {
func (cw *ClickhouseQueryTranslator) parseBucketsPath(shouldBeQueryMap any, aggregationName string) (bucketsPathStr string, success bool) {
queryMap, ok := shouldBeQueryMap.(QueryMap)
if !ok {
logger.WarnWithCtx(cw.Ctx).Msgf("%s is not a map, but %T, value: %v", aggregationName, shouldBeQueryMap, shouldBeQueryMap)
Expand All @@ -214,10 +218,27 @@ func (cw *ClickhouseQueryTranslator) parseBucketsPath(shouldBeQueryMap any, aggr
logger.WarnWithCtx(cw.Ctx).Msg("no buckets_path in avg_bucket")
return
}
bucketsPath, ok = bucketsPathRaw.(string)
if !ok {
logger.WarnWithCtx(cw.Ctx).Msgf("buckets_path is not a string, but %T, value: %v", bucketsPathRaw, bucketsPathRaw)
return

switch bucketsPath := bucketsPathRaw.(type) {
case string:
return bucketsPath, true
case QueryMap:
// TODO: handle arbitrary nr of keys (and arbitrary scripts, because we also handle only one special case)
if len(bucketsPath) == 1 || len(bucketsPath) == 2 {
for _, bucketPath := range bucketsPath {
if _, ok = bucketPath.(string); !ok {
logger.WarnWithCtx(cw.Ctx).Msgf("buckets_path is not a map with string values, but %T. Skipping this aggregation", bucketPath)
return
}
// Kinda weird to return just the first value, but seems working on all cases so far.
// After fixing the TODO above, it should also get fixed.
return bucketPath.(string), true
}
} else {
logger.WarnWithCtx(cw.Ctx).Msgf("buckets_path is not a map with one or two keys, but %d. Skipping this aggregation", len(bucketsPath))
}
}
return bucketsPath, true

logger.WarnWithCtx(cw.Ctx).Msgf("buckets_path in wrong format, type: %T, value: %v", bucketsPathRaw, bucketsPathRaw)
return
}
Loading

0 comments on commit 1f434d4

Please sign in to comment.