Skip to content

Commit

Permalink
Some refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
trzysiek committed May 20, 2024
1 parent 090b4a0 commit 5b73ca3
Show file tree
Hide file tree
Showing 13 changed files with 391 additions and 574 deletions.
1 change: 1 addition & 0 deletions quesma/logger/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ func InitSimpleLoggerForTests() {
Out: os.Stderr,
TimeFormat: time.StampMilli,
}).
Level(zerolog.DebugLevel).
With().
Timestamp().
Logger()
Expand Down
20 changes: 17 additions & 3 deletions quesma/model/bucket_aggregations/filters.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,25 @@ import (
)

type Filters struct {
ctx context.Context
ctx context.Context
Filters []Filter
}

func NewFilters(ctx context.Context) Filters {
return Filters{ctx}
func NewFiltersEmpty(ctx context.Context) Filters {
return Filters{ctx: ctx}
}

func NewFilters(ctx context.Context, filters []Filter) Filters {
return Filters{ctx: ctx, Filters: filters}
}

type Filter struct {
Name string
Sql model.SimpleQuery
}

func NewFilter(name string, sql model.SimpleQuery) Filter {
return Filter{Name: name, Sql: sql}
}

func (query Filters) IsBucketAggregation() bool {
Expand Down
15 changes: 5 additions & 10 deletions quesma/model/bucket_aggregations/terms.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,17 @@ package bucket_aggregations

import (
"context"
"fmt"
"mitmproxy/quesma/logger"
"mitmproxy/quesma/model"
)

const DefaultSize = 10

type Terms struct {
ctx context.Context
size int
significant bool // true <=> significant_terms, false <=> terms
}

func NewTerms(ctx context.Context, size int, significant bool) Terms {
return Terms{ctx: ctx, size: size, significant: significant}
func NewTerms(ctx context.Context, significant bool) Terms {
return Terms{ctx: ctx, significant: significant}
}

func (query Terms) IsBucketAggregation() bool {
Expand Down Expand Up @@ -45,11 +41,10 @@ func (query Terms) TranslateSqlResponseToJson(rows []model.QueryResultRow, level
}

func (query Terms) String() string {
var namePrefix string
if query.significant {
namePrefix = "significant_"
if !query.significant {
return "terms"
}
return fmt.Sprintf("%sterms(size=%d)", namePrefix, query.size)
return "significant_terms"
}

func (query Terms) PostprocessResults(rowsFromDB []model.QueryResultRow) []model.QueryResultRow {
Expand Down
140 changes: 33 additions & 107 deletions quesma/model/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package model

import (
"context"
"fmt"
"mitmproxy/quesma/logger"
"sort"
"strconv"
Expand All @@ -11,7 +10,6 @@ import (

const RowNumberColumnName = "row_number"
const EmptyFieldSelection = "''" // we can query SELECT '', that's why such quotes
const CountShortName = "cnt"

type Highlighter struct {
Tokens []string
Expand All @@ -28,98 +26,77 @@ type Query struct {
NonSchemaFields []string // Fields that are not in schema, but are in 'SELECT ...', e.g. count()
WhereClause string // "WHERE ..." until next clause like GROUP BY/ORDER BY, etc.
GroupByFields []string // if not empty, we do GROUP BY GroupByFields... They are quoted if they are column names, unquoted if non-schema. So no quotes need to be added.
OrderBy []string // ORDER BY fields
SuffixClauses []string // LIMIT, etc.
SuffixClauses []string // ORDER BY, etc.
FromClause string // usually just "tableName", or databaseName."tableName". Sometimes a subquery e.g. (SELECT ...)
TableName string
SubQueries []subQuery
OrderByCount bool
CanParse bool // true <=> query is valid
CanParse bool // true <=> query is valid
QueryInfo SearchQueryInfo
Highlighter Highlighter
NoDBQuery bool // true <=> we don't need query to DB here, true in some pipeline aggregations
Parent string // parent aggregation name, used in some pipeline aggregations
Aggregators []Aggregator // keeps names of aggregators, e.g. "0", "1", "2", "suggestions". Needed for JSON response.
Type QueryType
SubSelect string
}

type subQuery struct {
sql string
innerJoin string
name string
}

func newSubQuery(sql, innerJoin, name string) subQuery {
return subQuery{sql: sql, innerJoin: innerJoin, name: name}
// dictionary to add as 'meta' field in the response.
// WARNING: it's probably not passed everywhere where it's needed, just in one place.
// But it works for the test + our dashboards, so let's fix it later if necessary.
// NoMetadataField (nil) is a valid option and means no meta field in the response.
Metadata JsonMap
}

var NoMetadataField JsonMap = nil

// returns string with * in SELECT
// returns string with SQL query
func (q *Query) String() string {
return q.stringCommon(q.allFields())
return q.StringFromColumns(q.Fields)
}

// returns string with SQL query
// colNames - list of columns (schema fields) for SELECT
func (q *Query) StringFromColumns(colNames []string) string {
return q.stringCommon(colNames)
}

func (q *Query) stringCommon(selectSchemaFields []string) string {
var sb strings.Builder
if len(q.SubQueries) > 0 {
sb.WriteString("WITH ")
for i, sq := range q.SubQueries {
sb.WriteString(sq.name + " AS (" + sq.sql + ")")
if i < len(q.SubQueries)-1 {
sb.WriteString(", ")
}
}
sb.WriteString(" ")
}
sb.WriteString("SELECT ")
if q.IsDistinct {
sb.WriteString("DISTINCT ")
}
sb.WriteString(strings.Join(selectSchemaFields, ", "))
sb.WriteString(" FROM " + q.FromClause + " ") //where + q.WhereClause + " ")
for i, sq := range q.SubQueries {
sb.WriteString("INNER JOIN " + sq.name + " ON " + sq.innerJoin + " ")
if i < len(q.SubQueries)-1 {
sb.WriteString("AND ")
for i, field := range colNames {
if field == "*" || field == EmptyFieldSelection {
sb.WriteString(field)
} else {
sb.WriteString(strconv.Quote(field))
}
if i < len(colNames)-1 || len(q.NonSchemaFields) > 0 {
sb.WriteString(", ")
}
}
if len(q.WhereClause) > 0 {
sb.WriteString("WHERE " + q.WhereClause + " ")
for i, field := range q.NonSchemaFields {
sb.WriteString(field)
if i < len(q.NonSchemaFields)-1 {
sb.WriteString(", ")
}
}
where := " WHERE "
if len(q.WhereClause) == 0 {
where = ""
}
sb.WriteString(" FROM " + q.FromClause + where + q.WhereClause)
lastLetterIsSpace := true
if len(q.GroupByFields) > 0 {
sb.WriteString("GROUP BY ")
sb.WriteString(" GROUP BY (")
for i, field := range q.GroupByFields {
sb.WriteString(field)
if i < len(q.GroupByFields)-1 {
sb.WriteString(", ")
}
}
lastLetterIsSpace = false
}
if len(q.OrderBy) > 0 {
if !lastLetterIsSpace {
sb.WriteString(" ")
}
sb.WriteString("ORDER BY ")
for i, field := range q.OrderBy {
sb.WriteString(field)
if i < len(q.OrderBy)-1 {
sb.WriteString(", ")
sb.WriteString(")")

if len(q.SuffixClauses) == 0 {
sb.WriteString(" ORDER BY (")
for i, field := range q.GroupByFields {
sb.WriteString(field)
if i < len(q.GroupByFields)-1 {
sb.WriteString(", ")
}
}
sb.WriteString(")")
}
}
if len(q.SuffixClauses) > 0 {
Expand All @@ -132,54 +109,6 @@ func (q *Query) IsWildcard() bool {
return len(q.Fields) == 1 && q.Fields[0] == "*"
}

func (q *Query) allFields() []string {
fields := make([]string, 0, len(q.Fields)+len(q.NonSchemaFields))
for _, field := range q.Fields {
if field == "*" {
fields = append(fields, "*")
} else {
fields = append(fields, strconv.Quote(field))
}
}
for _, field := range q.NonSchemaFields {
fields = append(fields, field)
}
return fields
}

func (q *Query) AddSubQueryFromCurrentState(ctx context.Context, subqueryNr int) {
queryName := q.subQueryName(subqueryNr)

selectFields := make([]string, 0, len(q.Fields)+len(q.NonSchemaFields)+1)
for _, schemaField := range q.Fields {
if schemaField == "*" {
logger.WarnWithCtx(ctx).Msgf("Query with * shouldn't happen here. Skipping (query: %+v)", q)
continue
}
selectFields = append(selectFields, fmt.Sprintf(`"%s" AS "%s_%s"`, schemaField, queryName, schemaField))
}
for i, nonSchemaField := range q.NonSchemaFields {
selectFields = append(selectFields, fmt.Sprintf(`%s AS "%s_ns_%d"`, nonSchemaField, queryName, i))
}
selectFields = append(selectFields, fmt.Sprintf("count() AS %s", strconv.Quote(q.subQueryCountFieldName(subqueryNr))))
sql := q.StringFromColumns(selectFields)
innerJoinParts := make([]string, 0, len(q.GroupByFields))
for _, field := range q.Fields {
innerJoinParts = append(innerJoinParts, fmt.Sprintf(`"%s" = "%s_%s"`, field, queryName, field))
// FIXME add support for non-schema fields
}
innerJoin := strings.Join(innerJoinParts, " AND ")
q.SubQueries = append(q.SubQueries, newSubQuery(sql, innerJoin, queryName))
}

func (q *Query) subQueryName(nr int) string {
return "subQuery" + strconv.Itoa(nr)
}

func (q *Query) subQueryCountFieldName(nr int) string {
return q.subQueryName(nr) + "_" + CountShortName
}

// CopyAggregationFields copies all aggregation fields from qwa to q
func (q *Query) CopyAggregationFields(qwa Query) {
q.GroupByFields = make([]string, len(qwa.GroupByFields))
Expand All @@ -191,9 +120,6 @@ func (q *Query) CopyAggregationFields(qwa Query) {
q.NonSchemaFields = make([]string, len(qwa.NonSchemaFields))
copy(q.NonSchemaFields, qwa.NonSchemaFields)

q.SuffixClauses = make([]string, len(qwa.SuffixClauses))
copy(q.SuffixClauses, qwa.SuffixClauses)

q.Aggregators = make([]Aggregator, len(qwa.Aggregators))
copy(q.Aggregators, qwa.Aggregators)
}
Expand Down
104 changes: 104 additions & 0 deletions quesma/model/simple_query.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package model

import "mitmproxy/quesma/logger"

type SimpleQuery struct {
Sql Statement
CanParse bool
FieldName string
SortFields []string
}

func NewSimpleQuery(sql Statement, canParse bool) SimpleQuery {
return SimpleQuery{Sql: sql, CanParse: canParse}
}

func NewSimpleQueryWithFieldName(sql Statement, canParse bool, fieldName string) SimpleQuery {
return SimpleQuery{Sql: sql, CanParse: canParse, FieldName: fieldName}
}

func (sq *SimpleQuery) CombineWheresWith(sq2 SimpleQuery) {
sq.Sql = And([]Statement{sq.Sql, sq2.Sql})
sq.CanParse = sq.CanParse && sq2.CanParse
if len(sq.FieldName) > 0 && len(sq2.FieldName) > 0 && sq.FieldName != sq2.FieldName {
logger.Warn().Msgf("combining 2 where clauses with different field names: %s, %s, where queries: %v %v", sq.FieldName, sq2.FieldName, sq, sq2)
}
if len(sq.FieldName) == 0 && len(sq2.FieldName) > 0 {
sq.FieldName = sq2.FieldName
}
}

type Statement struct {
Stmt string
IsCompound bool // "a" -> not compound, "a AND b" -> compound. Used to not make unnecessary brackets (not always, but usually)
FieldName string
}

func NewSimpleStatement(stmt string) Statement {
return Statement{Stmt: stmt, IsCompound: false}
}

func NewCompoundStatement(stmt, fieldName string) Statement {
return Statement{Stmt: stmt, IsCompound: true, FieldName: fieldName}
}

func NewCompoundStatementNoFieldName(stmt string) Statement {
return Statement{Stmt: stmt, IsCompound: true}
}

// Added to the generated SQL where the query is fine, but we're sure no rows will match it
var AlwaysFalseStatement = NewSimpleStatement("false")

func And(andStmts []Statement) Statement {
return combineStatements(andStmts, "AND")
}

func Or(orStmts []Statement) Statement {
return combineStatements(orStmts, "OR")
}

func FilterNonEmpty(slice []Statement) []Statement {
i := 0
for _, el := range slice {
if len(el.Stmt) > 0 {
slice[i] = el
i++
}
}
return slice[:i]
}

// sep = "AND" or "OR"
func combineStatements(stmts []Statement, sep string) Statement {
stmts = FilterNonEmpty(stmts)
if len(stmts) > 1 {
stmts = quoteWithBracketsIfCompound(stmts)
var fieldName string
sql := ""
for i, stmt := range stmts {
sql += stmt.Stmt
if i < len(stmts)-1 {
sql += " " + sep + " "
}
if stmt.FieldName != "" {
fieldName = stmt.FieldName
}
}
return NewCompoundStatement(sql, fieldName)
}
if len(stmts) == 1 {
return stmts[0]
}
return NewSimpleStatement("")
}

// used to combine statements with AND/OR
// [a, b, a AND b] ==> ["a", "b", "(a AND b)"]
func quoteWithBracketsIfCompound(slice []Statement) []Statement {
for i := range slice {
if slice[i].IsCompound {
slice[i].Stmt = "(" + slice[i].Stmt + ")"
}
}
return slice
}
Loading

0 comments on commit 5b73ca3

Please sign in to comment.