Skip to content

Commit

Permalink
Add split_time_range optimizer (#1091)
Browse files Browse the repository at this point in the history
Context: We noticed that for some schemas (that don't ORDER BY time),
the "Discover" view in Kibana over long time ranges can be very slow,
even though it only shows 500 results. Changing the time range to a
shorter one can make the query faster. (See this issue in ClickHouse for
a similar example:
ClickHouse/ClickHouse#69315)

This optimization therefore splits the time range into parts: a short
time range, on which we bet that the query will be fast (and still
return LIMIT many results) and a long time range, which will be used to
get the rest of the results (in case the short time range didn't return
enough results). The ranges are customizable (potentially even more than
2).

During the development I also fixed two related problems:
- added `AliasColumnsTransformation` which makes sure that the queries
we generate have columns with clear names (so that I can reference them
when they are in subquery)
- `read` from ClickHouse now only reads `LIMIT` many rows and closes
`Rows` asynchronously (don't wait for query completion if we read
`LIMIT` many rows)

---------

Signed-off-by: Piotr Grabowski <[email protected]>
Co-authored-by: Jacek Migdal <[email protected]>
  • Loading branch information
avelanarius and jakozaur authored Dec 17, 2024
1 parent 33bae75 commit 047ed16
Show file tree
Hide file tree
Showing 10 changed files with 376 additions and 79 deletions.
30 changes: 20 additions & 10 deletions quesma/clickhouse/quesma_communicator.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"quesma/end_user_errors"
"quesma/logger"
"quesma/model"
"quesma/quesma/recovery"
tracing "quesma_v2/core/tracing"
"strconv"
"strings"
Expand Down Expand Up @@ -59,10 +60,11 @@ func (lm *LogManager) ProcessQuery(ctx context.Context, table *Table, query *mod
colName = col.Alias
case model.LiteralExpr:

// This should be moved to the SchemaCheck pipeline. It'll require to change a lot of tests.
// There's now a AliasColumnsTransformation transformation that handles this,
// but it's not fully complete as it'll require to change a lot more tests.
//
// It can be removed just after the pancake will be the only way to generate SQL.
// Pancake SQL are aliased properly.
// The only remaining issue is that Pancake SQLs sometimes generate LiteralExpr in SELECT
// instead of ColumnRef (nested SQLs case).

if str, isStr := col.Value.(string); isStr {
if unquoted, err := strconv.Unquote(str); err == nil {
Expand All @@ -71,11 +73,15 @@ func (lm *LogManager) ProcessQuery(ctx context.Context, table *Table, query *mod
colName = str
}
} else {
// AliasColumnsTransformation should have handled this
logger.Warn().Msgf("Unexpected unaliased literal: %v", col.Value)
if colName == "" {
colName = fmt.Sprintf("column_%d", count)
}
}
default:
// AliasColumnsTransformation should have handled this
logger.Warn().Msgf("Unexpected unaliased literal: %v", col)
if colName == "" {
colName = fmt.Sprintf("column_%d", count)
}
Expand Down Expand Up @@ -188,7 +194,7 @@ func executeQuery(ctx context.Context, lm *LogManager, query *model.Query, field
performanceResult.Error = err
return nil, performanceResult, end_user_errors.GuessClickhouseErrorType(err).InternalDetails("clickhouse: query failed. err: %v, query: %v", err, queryAsString)
}
res, err = read(rows, fields, rowToScan)
res, err = read(ctx, rows, fields, rowToScan, query.SelectCommand.Limit)

elapsed := span.End(nil)
performanceResult.Duration = elapsed
Expand All @@ -204,7 +210,7 @@ func executeQuery(ctx context.Context, lm *LogManager, query *model.Query, field

// 'selectFields' are all values that we return from the query, both columns and non-schema fields,
// like e.g. count(), or toInt8(boolField)
func read(rows *sql.Rows, selectFields []string, rowToScan []interface{}) ([]model.QueryResultRow, error) {
func read(ctx context.Context, rows *sql.Rows, selectFields []string, rowToScan []interface{}, limit int) ([]model.QueryResultRow, error) {

// read selected fields from the metadata

Expand All @@ -213,7 +219,8 @@ func read(rows *sql.Rows, selectFields []string, rowToScan []interface{}) ([]mod
rowDb = append(rowDb, &rowToScan[i])
}
resultRows := make([]model.QueryResultRow, 0)
for rows.Next() {
// If a limit is set (limit != 0) then collect only the first 'limit' rows
for (len(resultRows) < limit || limit == 0) && rows.Next() {
err := rows.Scan(rowDb...)
if err != nil {
return nil, fmt.Errorf("clickhouse: scan failed: %v", err)
Expand All @@ -227,9 +234,12 @@ func read(rows *sql.Rows, selectFields []string, rowToScan []interface{}) ([]mod
if rows.Err() != nil {
return nil, fmt.Errorf("clickhouse: iterating over rows failed: %v", rows.Err())
}
err := rows.Close()
if err != nil {
return nil, fmt.Errorf("clickhouse: closing rows failed: %v", err)
}
go func() {
recovery.LogPanicWithCtx(ctx)
err := rows.Close()
if err != nil {
logger.ErrorWithCtx(ctx).Msgf("clickhouse: closing rows failed: %v", err)
}
}()
return resultRows, nil
}
18 changes: 11 additions & 7 deletions quesma/model/expr_string_renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func (v *renderer) VisitInfix(e InfixExpr) interface{} {
// I think in the future every infix op should be in braces.
if strings.HasPrefix(e.Op, "_") || e.Op == "AND" || e.Op == "OR" {
return fmt.Sprintf("(%v %v %v)", lhs, e.Op, rhs)
} else if strings.Contains(e.Op, "LIKE") || e.Op == "IS" || e.Op == "IN" || e.Op == "REGEXP" {
} else if strings.Contains(e.Op, "LIKE") || e.Op == "IS" || e.Op == "IN" || e.Op == "REGEXP" || strings.Contains(e.Op, "UNION") {
return fmt.Sprintf("%v %v %v", lhs, e.Op, rhs)
} else {
return fmt.Sprintf("%v%v%v", lhs, e.Op, rhs)
Expand Down Expand Up @@ -191,13 +191,17 @@ func (v *renderer) VisitSelectCommand(c SelectCommand) interface{} {
sb.WriteString(" FROM ")
}
/* HACK ALERT END */
if c.FromClause != nil { // here we have to handle nested
if nestedCmd, isNested := c.FromClause.(SelectCommand); isNested {
sb.WriteString(fmt.Sprintf("(%s)", AsString(nestedCmd)))
} else if nestedCmdPtr, isNested := c.FromClause.(*SelectCommand); isNested {
sb.WriteString(fmt.Sprintf("(%s)", AsString(nestedCmdPtr)))
} else {
if c.FromClause != nil {
// Non-nested FROM clauses don't have to be wrapped in parentheses
if _, isTableRef := c.FromClause.(TableRef); isTableRef {
sb.WriteString(AsString(c.FromClause))
} else if _, isLiteral := c.FromClause.(LiteralExpr); isLiteral {
sb.WriteString(AsString(c.FromClause))
} else if _, isJoinExpr := c.FromClause.(JoinExpr); isJoinExpr {
sb.WriteString(AsString(c.FromClause))
} else {
// Nested sub-query
sb.WriteString(fmt.Sprintf("(%s)", AsString(c.FromClause)))
}
}
if c.WhereClause != nil {
Expand Down
1 change: 1 addition & 0 deletions quesma/optimize/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ func NewOptimizePipeline(config *config.QuesmaConfiguration) model.QueryTransfor
&truncateDate{truncateTo: 5 * time.Minute},
&cacheQueries{},
&materializedViewReplace{},
&splitTimeRange{},
},
}
}
Expand Down
Loading

0 comments on commit 047ed16

Please sign in to comment.