Skip to content

Commit

Permalink
Unify counts (#758)
Browse files Browse the repository at this point in the history
Two simple changes:
- Unify `count()` to `count(*)`
- Don't generate order count, reuse count column

This optimizes and simplifies queries. Need it, so I can migrate facets
to pancakes.

The change is mostly mundane updates of tests.
  • Loading branch information
jakozaur authored Sep 12, 2024
1 parent be27360 commit 51a740b
Show file tree
Hide file tree
Showing 20 changed files with 306 additions and 607 deletions.
21 changes: 0 additions & 21 deletions quesma/model/bucket_aggregations/dateRange.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,27 +27,6 @@ func NewDateTimeInterval(begin, end string) DateTimeInterval {
}
}

// ToSQLSelectQuery returns count(...) where ... is a condition for the interval, just like we want it in SQL's SELECT
// from elastic docs: Note that this aggregation includes the from value and excludes the to value for each range.
func (interval DateTimeInterval) ToSQLSelectQuery(fieldName string) model.Expr {
if interval.Begin != UnboundedInterval && interval.End != UnboundedInterval {
return model.NewCountFunc(model.NewFunction("if",
model.NewInfixExpr(
model.NewInfixExpr(model.NewColumnRef(fieldName), " >= ", model.NewLiteral(interval.Begin)),
"AND",
model.NewInfixExpr(model.NewColumnRef(fieldName), " < ", model.NewLiteral(interval.End)),
),
model.NewLiteral(1), model.NewLiteral("NULL")))
} else if interval.Begin != UnboundedInterval {
return model.NewCountFunc(model.NewFunction("if",
model.NewInfixExpr(model.NewColumnRef(fieldName), " >= ", model.NewLiteral(interval.Begin)), model.NewLiteral(1), model.NewLiteral("NULL")))
} else if interval.End != UnboundedInterval {
return model.NewCountFunc(model.NewFunction("if",
model.NewInfixExpr(model.NewColumnRef(fieldName), " < ", model.NewLiteral(interval.End)), model.NewLiteral(1), model.NewLiteral("NULL")))
}
return model.NewCountFunc()
}

// BeginTimestampToSQL returns SQL select for the begin timestamp, and a boolean indicating if the select is needed
// We query Clickhouse for this timestamp, as it's defined in Clickhouse's format, e.g. now()-1d.
// It's only 1 more field to our SELECT query, so it shouldn't be a performance issue.
Expand Down
23 changes: 0 additions & 23 deletions quesma/model/bucket_aggregations/range.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,6 @@ func (interval Interval) String() string {
return interval.floatToString(interval.Begin) + "-" + interval.floatToString(interval.End)
}

// ToSQLSelectQuery returns count(...) where ... is a condition for the interval, just like we want it in SQL's SELECT
func (interval Interval) ToSQLSelectQuery(columnExpr model.Expr) model.Expr {
var sqlLeft, sqlRight, sql model.Expr
if !interval.IsOpeningBoundInfinite() {
sqlLeft = model.NewInfixExpr(columnExpr, ">=", model.NewLiteral(interval.Begin))
}
if !interval.IsClosingBoundInfinite() {
sqlRight = model.NewInfixExpr(columnExpr, "<", model.NewLiteral(interval.End))
}
switch {
case sqlLeft != nil && sqlRight != nil:
sql = model.NewInfixExpr(sqlLeft, "AND", sqlRight)
case sqlLeft != nil:
sql = sqlLeft
case sqlRight != nil:
sql = sqlRight
default:
return model.NewFunction("count")
}
// count(if(sql, 1, NULL))
return model.NewFunction("count", model.NewFunction("if", sql, model.NewLiteral(1), model.NewLiteral("NULL")))
}

func (interval Interval) ToWhereClause(field model.Expr) model.Expr { // returns a condition for the interval, just like we want it in SQL's WHERE
var sqlLeft, sqlRight model.Expr
if !interval.IsOpeningBoundInfinite() {
Expand Down
3 changes: 3 additions & 0 deletions quesma/model/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ func NewFunction(name string, args ...Expr) FunctionExpr {
}

func NewCountFunc(args ...Expr) FunctionExpr {
if len(args) == 0 {
args = []Expr{NewWildcardExpr}
}
return NewFunction("count", args...)
}

Expand Down
6 changes: 3 additions & 3 deletions quesma/optimize/pipeline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func Test_cacheQueries(t *testing.T) {
true,
"foo",
model.SelectCommand{
Columns: []model.Expr{model.NewColumnRef("a"), model.NewFunction("count", model.NewColumnRef("*"))},
Columns: []model.Expr{model.NewColumnRef("a"), model.NewCountFunc()},
FromClause: model.NewTableRef("foo"),
GroupBy: []model.Expr{model.NewLiteral(1)},
},
Expand Down Expand Up @@ -167,12 +167,12 @@ func Test_dateTrunc(t *testing.T) {
"select a, count() from foo group by 1",
"foo",
model.SelectCommand{
Columns: []model.Expr{model.NewColumnRef("a"), model.NewFunction("count", model.NewColumnRef("*"))},
Columns: []model.Expr{model.NewColumnRef("a"), model.NewCountFunc()},
FromClause: model.NewTableRef("foo"),
GroupBy: []model.Expr{model.NewLiteral(1)},
},
model.SelectCommand{
Columns: []model.Expr{model.NewColumnRef("a"), model.NewFunction("count", model.NewColumnRef("*"))},
Columns: []model.Expr{model.NewColumnRef("a"), model.NewCountFunc()},
FromClause: model.NewTableRef("foo"),
GroupBy: []model.Expr{model.NewLiteral(1)},
},
Expand Down
2 changes: 1 addition & 1 deletion quesma/queryparser/pancake_aggregation_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (cw *ClickhouseQueryTranslator) PancakeParseAggregationJson(body types.JSON
name: PancakeTotalCountMetricName,
internalName: "metric__" + PancakeTotalCountMetricName,
queryType: typical_queries.Count{},
selectedColumns: []model.Expr{model.NewFunction("count", model.NewLiteral("*"))},
selectedColumns: []model.Expr{model.NewCountFunc()},
}

pancakeQueries[0].layers[0].currentMetricAggregations = append(pancakeQueries[0].layers[0].currentMetricAggregations, augmentedCountAggregation)
Expand Down
2 changes: 1 addition & 1 deletion quesma/queryparser/pancake_aggregation_parser_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func generateMetricSelectedColumns(ctx context.Context, metricsAggr metricsAggre
castLon := model.NewFunction("CAST", lonColumn, model.NewLiteral(fmt.Sprintf("'%s'", "Float")))
result = append(result, model.NewFunction("avgOrNull", castLat))
result = append(result, model.NewFunction("avgOrNull", castLon))
result = append(result, model.NewFunction("count"))
result = append(result, model.NewCountFunc())
}
default:
logger.WarnWithCtx(ctx).Msgf("unknown metrics aggregation: %s", metricsAggr.AggrType)
Expand Down
20 changes: 11 additions & 9 deletions quesma/queryparser/pancake_sql_query_generation.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ func (p *pancakeSqlQueryGenerator) generateMetricSelects(metric *pancakeModelMet
return
}

func (p *pancakeSqlQueryGenerator) isPartOfGroupBy(column model.Expr, groupByColumns []model.AliasedExpr) *model.AliasedExpr {
for _, groupByColumn := range groupByColumns {
if model.PartlyImplementedIsEqual(column, groupByColumn) {
return &groupByColumn
func (p *pancakeSqlQueryGenerator) isPartOf(column model.Expr, aliasedColumns []model.AliasedExpr) *model.AliasedExpr {
for _, aliasedColumn := range aliasedColumns {
if model.PartlyImplementedIsEqual(column, aliasedColumn) {
return &aliasedColumn
}
}
return nil
Expand All @@ -103,7 +103,7 @@ func (p *pancakeSqlQueryGenerator) isPartOfOrderBy(alias model.AliasedExpr, orde
func (p *pancakeSqlQueryGenerator) addPotentialParentCount(bucketAggregation *pancakeModelBucketAggregation, groupByColumns []model.AliasedExpr) []model.AliasedExpr {
if query_util.IsAnyKindOfTerms(bucketAggregation.queryType) {
parentCountColumn := model.NewWindowFunction("sum",
[]model.Expr{model.NewFunction("count", model.NewLiteral("*"))},
[]model.Expr{model.NewCountFunc()},
p.generatePartitionBy(groupByColumns), []model.OrderByExpr{})
parentCountAliasedColumn := model.NewAliasedExpr(parentCountColumn, bucketAggregation.InternalNameForParentCount())
return []model.AliasedExpr{parentCountAliasedColumn}
Expand All @@ -126,12 +126,12 @@ func (p *pancakeSqlQueryGenerator) generateBucketSqlParts(bucketAggregation *pan
// build count for aggr
var countColumn model.Expr
if hasMoreBucketAggregations {
partCountColumn := model.NewFunction("count", model.NewLiteral("*"))
partCountColumn := model.NewCountFunc()

countColumn = model.NewWindowFunction("sum", []model.Expr{partCountColumn},
p.generatePartitionBy(append(groupByColumns, addGroupBys...)), []model.OrderByExpr{})
} else {
countColumn = model.NewFunction("count", model.NewLiteral("*"))
countColumn = model.NewCountFunc()
}
countAliasedColumn := model.NewAliasedExpr(countColumn, bucketAggregation.InternalNameForCount())
addSelectColumns = append(addSelectColumns, countAliasedColumn)
Expand All @@ -143,7 +143,9 @@ func (p *pancakeSqlQueryGenerator) generateBucketSqlParts(bucketAggregation *pan
columnId := len(bucketAggregation.selectedColumns) + i
direction := orderBy.Direction

rankColumn := p.isPartOfGroupBy(orderBy.Expr, append(groupByColumns, addGroupBys...))
rankColumn := p.isPartOf(orderBy.Expr, append(append(groupByColumns, addGroupBys...),
// We need count before window functions
model.NewAliasedExpr(model.NewCountFunc(), bucketAggregation.InternalNameForCount())))
if rankColumn != nil { // rank is part of group by
if direction == model.DefaultOrder {
direction = model.AscOrder // primarily needed for tests
Expand Down Expand Up @@ -323,7 +325,7 @@ func (p *pancakeSqlQueryGenerator) generateSelectCommand(aggregation *pancakeMod
combinatorWhere = append(combinatorWhere, subGroup.WhereClause)
for _, selectAfter := range selectsAfter {
var withCombinator model.Expr
if p.isPartOfGroupBy(selectAfter.Expr, groupBys) != nil {
if p.isPartOf(selectAfter.Expr, groupBys) != nil {
withCombinator = selectAfter.Expr
} else {
withIfCombinator, err := p.addIfCombinator(selectAfter.Expr, subGroup.WhereClause)
Expand Down
8 changes: 3 additions & 5 deletions quesma/queryparser/pancake_sql_query_generation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,10 @@ func TestPancakeQueryGeneration_halfpancake(t *testing.T) {
`,
sql: `
SELECT sum(count(*)) OVER () AS "aggr__0__parent_count",
"host.name" AS "aggr__0__key_0", count(*) AS "aggr__0__count",
count() AS "aggr__0__order_1"
"host.name" AS "aggr__0__key_0", count(*) AS "aggr__0__count"
FROM ` + TableName + `
GROUP BY "host.name" AS "aggr__0__key_0"
ORDER BY "aggr__0__order_1" DESC, "aggr__0__key_0" ASC
ORDER BY "aggr__0__count" DESC, "aggr__0__key_0" ASC
LIMIT 4`, // -- we added one more as filtering nulls happens during rendering
},

Expand All @@ -268,11 +267,10 @@ LIMIT 4`, // -- we added one more as filtering nulls happens during rendering
`
SELECT sum(count(*)) OVER () AS "aggr__0__parent_count",
"host.name" AS "aggr__0__key_0", count(*) AS "aggr__0__count",
count() AS "aggr__0__order_1",
avgOrNull("bytes_gauge") AS "metric__0__2_col_0"
FROM ` + TableName + `
GROUP BY "host.name" AS "aggr__0__key_0"
ORDER BY "aggr__0__order_1" DESC, "aggr__0__key_0" ASC
ORDER BY "aggr__0__count" DESC, "aggr__0__key_0" ASC
LIMIT 4`, // we increased limit by 1 to allow filtering of nulls druing json rendering
},
}
Expand Down
32 changes: 16 additions & 16 deletions quesma/quesma/schema_transformer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ func Test_arrayType(t *testing.T) {
FromClause: model.NewTableRef("kibana_sample_data_ecommerce"),
Columns: []model.Expr{
model.NewColumnRef("order_date"),
model.NewFunction("count"),
model.NewCountFunc(),
},
WhereClause: model.NewInfixExpr(
model.NewColumnRef("products::name"),
Expand All @@ -489,7 +489,7 @@ func Test_arrayType(t *testing.T) {
FromClause: model.NewTableRef("kibana_sample_data_ecommerce"),
Columns: []model.Expr{
model.NewColumnRef("order_date"),
model.NewFunction("count"),
model.NewCountFunc(),
},
WhereClause: model.NewFunction(
"arrayExists",
Expand All @@ -511,7 +511,7 @@ func Test_arrayType(t *testing.T) {
FromClause: model.NewTableRef("kibana_sample_data_ecommerce"),
Columns: []model.Expr{
model.NewColumnRef("order_date"),
model.NewFunction("count"),
model.NewCountFunc(),
},
WhereClause: model.NewInfixExpr(
model.NewColumnRef("products::sku"),
Expand All @@ -527,7 +527,7 @@ func Test_arrayType(t *testing.T) {
FromClause: model.NewTableRef("kibana_sample_data_ecommerce"),
Columns: []model.Expr{
model.NewColumnRef("order_date"),
model.NewFunction("count"),
model.NewCountFunc(),
},
WhereClause: model.NewFunction(
"has",
Expand Down Expand Up @@ -687,14 +687,14 @@ func TestApplyPhysicalFromExpression(t *testing.T) {
FromClause: model.NewTableRef(model.SingleTableNamePlaceHolder),
Columns: []model.Expr{
model.NewColumnRef("a"),
model.NewFunction("count"),
model.NewCountFunc(),
},
},
model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{
model.NewColumnRef("a"),
model.NewFunction("count"),
model.NewCountFunc(),
},
},
},
Expand All @@ -705,7 +705,7 @@ func TestApplyPhysicalFromExpression(t *testing.T) {
FromClause: model.NewTableRef(model.SingleTableNamePlaceHolder),
Columns: []model.Expr{
model.NewColumnRef("a"),
model.NewFunction("count"),
model.NewCountFunc(),
},
NamedCTEs: []*model.CTE{
{
Expand All @@ -723,7 +723,7 @@ func TestApplyPhysicalFromExpression(t *testing.T) {
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{
model.NewColumnRef("a"),
model.NewFunction("count"),
model.NewCountFunc(),
},
NamedCTEs: []*model.CTE{
{
Expand All @@ -745,7 +745,7 @@ func TestApplyPhysicalFromExpression(t *testing.T) {
FromClause: model.NewTableRef(model.SingleTableNamePlaceHolder),
Columns: []model.Expr{
model.NewColumnRef("order_date"),
model.NewFunction("count"),
model.NewCountFunc(),
},
NamedCTEs: []*model.CTE{
{
Expand All @@ -763,7 +763,7 @@ func TestApplyPhysicalFromExpression(t *testing.T) {
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{
model.NewColumnRef("order_date"),
model.NewFunction("count"),
model.NewCountFunc(),
},
NamedCTEs: []*model.CTE{
{
Expand Down Expand Up @@ -822,15 +822,15 @@ func TestFullTextFields(t *testing.T) {
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{
model.NewColumnRef("a"),
model.NewFunction("count"),
model.NewCountFunc(),
},
WhereClause: model.NewInfixExpr(model.NewColumnRef(model.FullTextFieldNamePlaceHolder), "=", model.NewLiteral("foo")),
},
model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{
model.NewColumnRef("a"),
model.NewFunction("count"),
model.NewCountFunc(),
},
WhereClause: model.NewLiteral(false),
},
Expand All @@ -843,15 +843,15 @@ func TestFullTextFields(t *testing.T) {
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{
model.NewColumnRef("a"),
model.NewFunction("count"),
model.NewCountFunc(),
},
WhereClause: model.NewInfixExpr(model.NewColumnRef(model.FullTextFieldNamePlaceHolder), "=", model.NewLiteral("foo")),
},
model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{
model.NewColumnRef("a"),
model.NewFunction("count"),
model.NewCountFunc(),
},
WhereClause: model.NewInfixExpr(model.NewColumnRef("b"), "=", model.NewLiteral("foo")),
},
Expand All @@ -864,15 +864,15 @@ func TestFullTextFields(t *testing.T) {
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{
model.NewColumnRef("a"),
model.NewFunction("count"),
model.NewCountFunc(),
},
WhereClause: model.NewInfixExpr(model.NewColumnRef(model.FullTextFieldNamePlaceHolder), "=", model.NewLiteral("foo")),
},
model.SelectCommand{
FromClause: model.NewTableRef("test"),
Columns: []model.Expr{
model.NewColumnRef("a"),
model.NewFunction("count"),
model.NewCountFunc(),
},
WhereClause: model.Or([]model.Expr{
model.NewInfixExpr(model.NewColumnRef("a"), "=", model.NewLiteral("foo")),
Expand Down
6 changes: 3 additions & 3 deletions quesma/quesma/search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ func TestNumericFacetsQueries(t *testing.T) {
}

// count, present in all tests
mock.ExpectQuery(`SELECT count\(\) FROM ` + tableName).WillReturnRows(sqlmock.NewRows([]string{"count"}))
mock.ExpectQuery(`SELECT count\(\*\) FROM ` + tableName).WillReturnRows(sqlmock.NewRows([]string{"count"}))
// Don't care about the query's SQL in this test, it's thoroughly tested in different tests, thus ""
mock.ExpectQuery("").WillReturnRows(returnedBuckets)

Expand Down Expand Up @@ -530,7 +530,7 @@ func TestSearchTrackTotalCount(t *testing.T) {
})

test := func(t *testing.T, handlerName string, testcase testdata.FullSearchTestCase) {
db, mock := util.InitSqlMockWithPrettyPrint(t, false)
db, mock := util.InitSqlMockWithPrettySqlAndPrint(t, false)
defer db.Close()
lm := clickhouse.NewLogManagerWithConnection(db, table)
managementConsole := ui.NewQuesmaManagementConsole(&DefaultConfig, nil, nil, make(<-chan logger.LogWithLevel, 50000), telemetry.NewPhoneHomeEmptyAgent(), nil)
Expand All @@ -540,7 +540,7 @@ func TestSearchTrackTotalCount(t *testing.T) {
for _, row := range testcase.ExpectedSQLResults[i] {
rows.AddRow(row.Cols[0].Value)
}
mock.ExpectQuery(testdata.EscapeBrackets(sql)).WillReturnRows(rows)
mock.ExpectQuery(sql).WillReturnRows(rows)
}

queryRunner := NewQueryRunner(lm, &DefaultConfig, nil, managementConsole, s, ab_testing.NewEmptySender())
Expand Down
Loading

0 comments on commit 51a740b

Please sign in to comment.