diff --git a/quesma/model/bucket_aggregations/ip_range.go b/quesma/model/bucket_aggregations/ip_range.go
index 3d07bb84b..e34bdbd1f 100644
--- a/quesma/model/bucket_aggregations/ip_range.go
+++ b/quesma/model/bucket_aggregations/ip_range.go
@@ -98,8 +98,8 @@ func (interval IpInterval) ToWhereClause(field model.Expr) model.Expr {
 	isBegin := interval.begin != UnboundedInterval
 	isEnd := interval.end != UnboundedInterval && interval.end != BiggestIpv4
 
-	begin := model.NewInfixExpr(field, ">=", model.NewLiteralSingleQuoted(interval.begin))
-	end := model.NewInfixExpr(field, "<", model.NewLiteralSingleQuoted(interval.end))
+	begin := model.NewInfixExpr(field, ">=", model.NewLiteralSingleQuoteString(interval.begin))
+	end := model.NewInfixExpr(field, "<", model.NewLiteralSingleQuoteString(interval.end))
 
 	if isBegin && isEnd {
 		return model.NewInfixExpr(begin, "AND", end)
diff --git a/quesma/model/bucket_aggregations/terms.go b/quesma/model/bucket_aggregations/terms.go
index e9c7dbfd4..e25cb0c0a 100644
--- a/quesma/model/bucket_aggregations/terms.go
+++ b/quesma/model/bucket_aggregations/terms.go
@@ -4,19 +4,31 @@ package bucket_aggregations
 
 import (
 	"context"
+	"fmt"
 	"quesma/logger"
 	"quesma/model"
 	"quesma/util"
+	"quesma/util/regex"
+	"reflect"
 )
 
 type Terms struct {
 	ctx         context.Context
 	significant bool // true <=> significant_terms, false <=> terms
-	OrderByExpr model.Expr
+	// include is either:
+	//   - single value: then for strings, it can be a regex.
+	//   - array: then field must match exactly one of the values (never a regex)
+	// Nil if missing in request.
+	include any
+	// exclude is either:
+	//   - single value: then for strings, it can be a regex.
+	//   - array: then field must match exactly one of the values (never a regex)
+	// Nil if missing in request.
+	exclude any
 }
 
-func NewTerms(ctx context.Context, significant bool, orderByExpr model.Expr) Terms {
-	return Terms{ctx: ctx, significant: significant, OrderByExpr: orderByExpr}
+func NewTerms(ctx context.Context, significant bool, include, exclude any) Terms {
+	return Terms{ctx: ctx, significant: significant, include: include, exclude: exclude}
 }
 
 func (query Terms) AggregationType() model.AggregationType {
@@ -106,3 +118,104 @@ func (query Terms) key(row model.QueryResultRow) any {
 func (query Terms) parentCount(row model.QueryResultRow) any {
 	return row.Cols[len(row.Cols)-3].Value
 }
+
+func (query Terms) UpdateFieldForIncludeAndExclude(field model.Expr) (updatedField model.Expr, didWeUpdateField bool) {
+	// We'll use here everywhere Clickhouse 'if' function: if(condition, then, else)
+	// In our case field becomes: if(condition that field is not excluded, field, NULL)
+	ifOrNull := func(condition model.Expr) model.FunctionExpr {
+		return model.NewFunction("if", condition, field, model.NullExpr)
+	}
+
+	hasExclude := query.exclude != nil
+	excludeArr, excludeIsArray := query.exclude.([]any)
+	switch {
+	case hasExclude && excludeIsArray:
+		if len(excludeArr) == 0 {
+			return field, false
+		}
+
+		// Select expr will be: if(field NOT IN (excludeArr[0], excludeArr[1], ...), field, NULL)
+		exprs := make([]model.Expr, 0, len(excludeArr))
+		for _, excludeVal := range excludeArr {
+			exprs = append(exprs, model.NewLiteralSingleQuoteString(excludeVal))
+		}
+		return ifOrNull(model.NewInfixExpr(field, "NOT IN", model.NewTupleExpr(exprs...))), true
+	case hasExclude:
+		switch exclude := query.exclude.(type) {
+		case string: // hard case, might be regex
+			funcName, patternExpr := regex.ToClickhouseExpr(exclude)
+			return ifOrNull(model.NewInfixExpr(field, "NOT "+funcName, patternExpr)), true
+		default: // easy case, never regex
+			return ifOrNull(model.NewInfixExpr(field, "!=", model.NewLiteral(query.exclude))), true
+		}
+
+	default:
+		return field, false // TODO implement similar support for 'include' in next PR
+	}
+}
+
+// TODO make part of QueryType interface and implement for all aggregations
+// TODO add bad requests to tests
+// Doing so will ensure we see 100% of what we're interested in in our logs (now we see ~95%)
+func CheckParamsTerms(ctx context.Context, paramsRaw any) error {
+	requiredParams := map[string]string{"field": "string"}
+	optionalParams := map[string]string{
+		"size":                      "float64|string", // TODO should be int|string, will be fixed
+		"shard_size":                "float64",        // TODO should be int, will be fixed
+		"order":                     "order",          // TODO add order type
+		"min_doc_count":             "float64",        // TODO should be int, will be fixed
+		"shard_min_doc_count":       "float64",        // TODO should be int, will be fixed
+		"show_term_doc_count_error": "bool",
+		"exclude":                   "not-checking-type-now-complicated",
+		"include":                   "not-checking-type-now-complicated",
+		"collect_mode":              "string",
+		"execution_hint":            "string",
+		"missing":                   "string",
+		"value_type":                "string",
+	}
+	logIfYouSeeThemParams := []string{
+		"shard_size", "min_doc_count", "shard_min_doc_count",
+		"show_term_doc_count_error", "collect_mode", "execution_hint", "value_type",
+	}
+
+	params, ok := paramsRaw.(model.JsonMap)
+	if !ok {
+		return fmt.Errorf("params is not a map, but %+v", paramsRaw)
+	}
+
+	// check if required are present
+	for paramName, paramType := range requiredParams {
+		paramVal, exists := params[paramName]
+		if !exists {
+			return fmt.Errorf("required parameter %s not found in Terms params", paramName)
+		}
+		if reflect.TypeOf(paramVal).Name() != paramType { // TODO I'll make a small rewrite to not use reflect here
+			return fmt.Errorf("required parameter %s is not of type %s, but %T", paramName, paramType, paramVal)
+		}
+	}
+
+	// check if only required/optional are present
+	for paramName := range params {
+		if _, isRequired := requiredParams[paramName]; !isRequired {
+			wantedType, isOptional := optionalParams[paramName]
+			if !isOptional {
+				return fmt.Errorf("unexpected parameter %s found in Terms params %v", paramName, params)
+			}
+			if wantedType == "not-checking-type-now-complicated" || wantedType == "order" || wantedType == "float64|string" {
+				continue // TODO: add that later
+			}
+			if reflect.TypeOf(params[paramName]).Name() != wantedType { // TODO I'll make a small rewrite to not use reflect here
+				return fmt.Errorf("optional parameter %s is not of type %s, but %T", paramName, wantedType, params[paramName])
+			}
+		}
+	}
+
+	// log if you see them
+	for _, warnParam := range logIfYouSeeThemParams {
+		if _, exists := params[warnParam]; exists {
+			logger.WarnWithCtxAndThrottling(ctx, "terms", warnParam, "we didn't expect %s in Terms params %v", warnParam, params)
+		}
+	}
+
+	return nil
+}
diff --git a/quesma/model/expr.go b/quesma/model/expr.go
index 3cf0a20d0..3ad5b330e 100644
--- a/quesma/model/expr.go
+++ b/quesma/model/expr.go
@@ -16,6 +16,7 @@ var (
 	InvalidExpr = Expr(nil)
 	TrueExpr    = NewLiteral(true)
 	FalseExpr   = NewLiteral(false)
+	NullExpr    = NewLiteral("NULL")
 )
 
 // ColumnRef is a reference to a column in a table, we can enrich it with more information (e.g. type used) as we go
@@ -129,8 +130,14 @@ func NewLiteral(value any) LiteralExpr {
 	return LiteralExpr{Value: value}
 }
 
-func NewLiteralSingleQuoted(value string) LiteralExpr {
-	return LiteralExpr{Value: fmt.Sprintf("'%s'", value)}
+// NewLiteralSingleQuoteString simply does: string -> 'string', anything_else -> anything_else
+func NewLiteralSingleQuoteString(value any) LiteralExpr {
+	switch v := value.(type) {
+	case string:
+		return LiteralExpr{Value: fmt.Sprintf("'%s'", v)}
+	default:
+		return LiteralExpr{Value: v}
+	}
 }
 
 // DistinctExpr is a representation of DISTINCT keyword in SQL, e.g. `SELECT DISTINCT` ... or `SELECT COUNT(DISTINCT ...)`
diff --git a/quesma/model/expr_string_renderer.go b/quesma/model/expr_string_renderer.go
index 7ab4adc28..f4c73fa6c 100644
--- a/quesma/model/expr_string_renderer.go
+++ b/quesma/model/expr_string_renderer.go
@@ -101,7 +101,7 @@ func (v *renderer) VisitInfix(e InfixExpr) interface{} {
 	// I think in the future every infix op should be in braces.
 	if strings.HasPrefix(e.Op, "_") || e.Op == "AND" || e.Op == "OR" {
 		return fmt.Sprintf("(%v %v %v)", lhs, e.Op, rhs)
-	} else if strings.Contains(e.Op, "LIKE") || e.Op == "IS" || e.Op == "IN" || e.Op == "REGEXP" || strings.Contains(e.Op, "UNION") {
+	} else if strings.Contains(e.Op, "LIKE") || e.Op == "IS" || e.Op == "IN" || e.Op == "NOT IN" || e.Op == "REGEXP" || strings.Contains(e.Op, "UNION") {
 		return fmt.Sprintf("%v %v %v", lhs, e.Op, rhs)
 	} else {
 		return fmt.Sprintf("%v%v%v", lhs, e.Op, rhs)
diff --git a/quesma/queryparser/pancake_aggregation_parser_buckets.go b/quesma/queryparser/pancake_aggregation_parser_buckets.go
index 387e2662e..4b65d0319 100644
--- a/quesma/queryparser/pancake_aggregation_parser_buckets.go
+++ b/quesma/queryparser/pancake_aggregation_parser_buckets.go
@@ -152,20 +152,33 @@ func (cw *ClickhouseQueryTranslator) parseDateHistogram(aggregation *pancakeAggr
 
 // aggrName - "terms" or "significant_terms"
 func (cw *ClickhouseQueryTranslator) parseTermsAggregation(aggregation *pancakeAggregationTreeNode, params QueryMap, aggrName string) error {
+	if err := bucket_aggregations.CheckParamsTerms(cw.Ctx, params); err != nil {
+		return err
+	}
+
+	terms := bucket_aggregations.NewTerms(
+		cw.Ctx, aggrName == "significant_terms", params["include"], params["exclude"],
+	)
+
+	var didWeAddMissing, didWeUpdateFieldHere bool
 	field := cw.parseFieldField(params, aggrName)
-	field, didWeAddMissing := cw.addMissingParameterIfPresent(field, params)
-	if !didWeAddMissing {
+	field, didWeAddMissing = cw.addMissingParameterIfPresent(field, params)
+	field, didWeUpdateFieldHere = terms.UpdateFieldForIncludeAndExclude(field)
+
+	// If we updated above, we change our select to if(condition, field, NULL), so we also need to filter out those NULLs later
+	if !didWeAddMissing || didWeUpdateFieldHere {
 		aggregation.filterOutEmptyKeyBucket = true
 	}
 
 	const defaultSize = 10
 	size := cw.parseSize(params, defaultSize)
+
 	orderBy, err := cw.parseOrder(params, []model.Expr{field})
 	if err != nil {
 		return err
 	}
 
-	aggregation.queryType = bucket_aggregations.NewTerms(cw.Ctx, aggrName == "significant_terms", orderBy[0]) // TODO probably full, not [0]
+	aggregation.queryType = terms
 	aggregation.selectedColumns = append(aggregation.selectedColumns, field)
 	aggregation.limit = size
 	aggregation.orderBy = orderBy
diff --git a/quesma/queryparser/query_parser.go b/quesma/queryparser/query_parser.go
index 0722c088b..e38f47de8 100644
--- a/quesma/queryparser/query_parser.go
+++ b/quesma/queryparser/query_parser.go
@@ -18,6 +18,7 @@ import (
 	"quesma/quesma/types"
 	"quesma/schema"
 	"quesma/util"
+	"quesma/util/regex"
 	"strconv"
 	"strings"
 	"unicode"
@@ -886,28 +887,13 @@ func (cw *ClickhouseQueryTranslator) parseRegexp(queryMap QueryMap) (result mode
 		return model.NewSimpleQueryInvalid()
 	}
 
-	// really simple == (out of all special characters, only . and .* may be present)
-	isPatternReallySimple := func(pattern string) bool {
-		// any special characters excluding . and * not allowed. Also (not the most important check) * can't be first character.
-		if strings.ContainsAny(pattern, `?+|{}[]()"\`) || (len(pattern) > 0 && pattern[0] == '*') {
-			return false
-		}
-		// .* allowed, but [any other char]* - not
-		for i, char := range pattern[1:] {
-			if char == '*' && pattern[i] != '.' {
-				return false
-			}
-		}
-		return true
-	}
-
-	for fieldName, parametersRaw := range queryMap {
-		parameters, ok := parametersRaw.(QueryMap)
+	for fieldName, paramsRaw := range queryMap {
+		params, ok := paramsRaw.(QueryMap)
 		if !ok {
-			logger.WarnWithCtx(cw.Ctx).Msgf("invalid regexp parameters type: %T, value: %v", parametersRaw, parametersRaw)
+			logger.WarnWithCtx(cw.Ctx).Msgf("invalid regexp parameters type: %T, value: %v", paramsRaw, paramsRaw)
 			return model.NewSimpleQueryInvalid()
 		}
-		patternRaw, exists := parameters["value"]
+		patternRaw, exists := params["value"]
 		if !exists {
 			logger.WarnWithCtx(cw.Ctx).Msgf("no value in regexp query: %v", queryMap)
 			return model.NewSimpleQueryInvalid()
@@ -918,21 +904,13 @@ func (cw *ClickhouseQueryTranslator) parseRegexp(queryMap QueryMap) (result mode
 			return model.NewSimpleQueryInvalid()
 		}
 
-		if len(parameters) > 1 {
-			logger.WarnWithCtx(cw.Ctx).Msgf("unsupported regexp parameters: %v", parameters)
+		if len(params) > 1 {
+			logger.WarnWithCtx(cw.Ctx).Msgf("unsupported regexp parameters: %v", params)
 		}
 
-		var funcName string
-		if isPatternReallySimple(pattern) {
-			pattern = strings.ReplaceAll(pattern, "_", `\_`)
-			pattern = strings.ReplaceAll(pattern, ".*", "%")
-			pattern = strings.ReplaceAll(pattern, ".", "_")
-			funcName = "LIKE"
-		} else { // this Clickhouse function is much slower, so we use it only for complex regexps
-			funcName = "REGEXP"
-		}
-		return model.NewSimpleQuery(
-			model.NewInfixExpr(model.NewColumnRef(fieldName), funcName, model.NewLiteral("'"+pattern+"'")), true)
+		clickhouseFuncName, patternExpr := regex.ToClickhouseExpr(pattern)
+		clickhouseExpr := model.NewInfixExpr(model.NewColumnRef(fieldName), clickhouseFuncName, patternExpr)
+		return model.NewSimpleQuery(clickhouseExpr, true)
 	}
 
 	logger.ErrorWithCtx(cw.Ctx).Msg("parseRegexp: theoretically unreachable code")
diff --git a/quesma/testdata/aggregation_requests_2.go b/quesma/testdata/aggregation_requests_2.go
index 6530c5618..21acfba83 100644
--- a/quesma/testdata/aggregation_requests_2.go
+++ b/quesma/testdata/aggregation_requests_2.go
@@ -4689,4 +4689,605 @@ var AggregationTests2 = []AggregationTestCase{
 			  "aggr__my_buckets__key_1" ASC
 			LIMIT 4`,
 	},
+	{ // [70]
+		TestName: "simplest terms with exclude (array of values)",
+		// TODO add ' somewhere in exclude after the merge!
+		QueryRequestJson: `
+		{
+			"aggs": {
+				"1": {
+					"terms": {
+						"field": "chess_goat", 
+						"size": 2,
+						"exclude": ["Carlsen", "Kasparov", "Fis._er*"]
+					}
+				}
+			},
+			"size": 0,
+			"track_total_hits": true
+		}`,
+		// I omit "took", "timed_out", "_shards", and "hits" from the response for brevity (they can also be easily unit-tested)
+		ExpectedResponse: `
+		{
+			"aggregations": {
+				"1": {
+					"doc_count_error_upper_bound": 0,
+					"sum_other_doc_count": 7416,
+					"buckets": [
+						{
+							"key": "My dad",
+							"doc_count": 3323
+						},
+						{
+							"key": "Barack Obama",
+							"doc_count": 3261
+						}
+					]
+				}
+			}
+		}`,
+		ExpectedPancakeResults: []model.QueryResultRow{
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__1__parent_count", int64(14000)),
+				model.NewQueryResultCol("aggr__1__key_0", "My dad"),
+				model.NewQueryResultCol("aggr__1__count", int64(3323)),
+			}},
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__1__parent_count", int64(14000)),
+				model.NewQueryResultCol("aggr__1__key_0", "Barack Obama"),
+				model.NewQueryResultCol("aggr__1__count", int64(3261)),
+			}},
+		},
+		ExpectedPancakeSQL: `
+			SELECT sum(count(*)) OVER () AS "aggr__1__parent_count",
+			  if("chess_goat" NOT IN tuple('Carlsen', 'Kasparov', 'Fis._er*'), "chess_goat", NULL)
+			  AS "aggr__1__key_0", count(*) AS "aggr__1__count"
+			FROM __quesma_table_name
+			GROUP BY if("chess_goat" NOT IN tuple('Carlsen', 'Kasparov', 'Fis._er*'), "chess_goat", NULL) AS "aggr__1__key_0"
+			ORDER BY "aggr__1__count" DESC, "aggr__1__key_0" ASC
+			LIMIT 3`,
+	},
+	{ // [71]
+		TestName: "simplest terms with exclude (single value, no regex)",
+		QueryRequestJson: `
+		{
+			"aggs": {
+				"1": {
+					"terms": {
+						"field": "agi_birth_year", 
+						"size": 1,
+						"exclude": 2025
+					}
+				}
+			},
+			"size": 0,
+			"track_total_hits": true
+		}`,
+		// I omit "took", "timed_out", "_shards", and "hits" from the response for brevity (they can also be easily unit-tested)
+		ExpectedResponse: `
+		{
+			"aggregations": {
+				"1": {
+					"doc_count_error_upper_bound": 0,
+					"sum_other_doc_count": 10700,
+					"buckets": [
+						{
+							"key": 2024,
+							"doc_count": 3300
+						}
+					]
+				}
+			}
+		}`,
+		ExpectedPancakeResults: []model.QueryResultRow{
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__1__parent_count", int64(14000)),
+				model.NewQueryResultCol("aggr__1__key_0", nil),
+				model.NewQueryResultCol("aggr__1__count", int64(10000)),
+			}},
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__1__parent_count", int64(14000)),
+				model.NewQueryResultCol("aggr__1__key_0", 2024),
+				model.NewQueryResultCol("aggr__1__count", int64(3300)),
+			}},
+		},
+		ExpectedPancakeSQL: `
+			SELECT sum(count(*)) OVER () AS "aggr__1__parent_count",
+			  if("agi_birth_year"!=2025, "agi_birth_year", NULL) AS "aggr__1__key_0",
+			  count(*) AS "aggr__1__count"
+			FROM __quesma_table_name
+			GROUP BY if("agi_birth_year"!=2025, "agi_birth_year", NULL) AS "aggr__1__key_0"
+			ORDER BY "aggr__1__count" DESC, "aggr__1__key_0" ASC
+			LIMIT 2`,
+	},
+	{ // [72]
+		TestName: "simplest terms with exclude (empty array)",
+		QueryRequestJson: `
+		{
+			"aggs": {
+				"1": {
+					"terms": {
+						"field": "agi_birth_year", 
+						"size": 1,
+						"exclude": []
+					}
+				}
+			},
+			"size": 0,
+			"track_total_hits": true
+		}`,
+		// I omit "took", "timed_out", "_shards", and "hits" from the response for brevity (they can also be easily unit-tested)
+		ExpectedResponse: `
+		{
+			"aggregations": {
+				"1": {
+					"doc_count_error_upper_bound": 0,
+					"sum_other_doc_count": 700,
+					"buckets": [
+						{
+							"key": 2024,
+							"doc_count": 300
+						}
+					]
+				}
+			}
+		}`,
+		ExpectedPancakeResults: []model.QueryResultRow{
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__1__parent_count", int64(1000)),
+				model.NewQueryResultCol("aggr__1__key_0", nil),
+				model.NewQueryResultCol("aggr__1__count", int64(600)),
+			}},
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__1__parent_count", int64(1000)),
+				model.NewQueryResultCol("aggr__1__key_0", 2024),
+				model.NewQueryResultCol("aggr__1__count", int64(300)),
+			}},
+		},
+		ExpectedPancakeSQL: `
+			SELECT sum(count(*)) OVER () AS "aggr__1__parent_count",
+			  "agi_birth_year" AS "aggr__1__key_0", count(*) AS "aggr__1__count"
+			FROM __quesma_table_name
+			GROUP BY "agi_birth_year" AS "aggr__1__key_0"
+			ORDER BY "aggr__1__count" DESC, "aggr__1__key_0" ASC
+			LIMIT 2`,
+	},
+	{ // [73]
+		TestName: "simplest terms with exclude (of strings), regression test",
+		QueryRequestJson: `
+		{
+			"aggs": {
+				"1": {
+					"terms": {
+						"field": "chess_goat", 
+						"size": 1,
+						"exclude": ["abc"]
+					}
+				}
+			},
+			"size": 0,
+			"track_total_hits": true
+		}`,
+		// I omit "took", "timed_out", "_shards", and "hits" from the response for brevity (they can also be easily unit-tested)
+		ExpectedResponse: `
+		{
+			"aggregations": {
+				"1": {
+					"doc_count_error_upper_bound": 0,
+					"sum_other_doc_count": 700,
+					"buckets": [
+						{
+							"key": 2024,
+							"doc_count": 300
+						}
+					]
+				}
+			}
+		}`,
+		ExpectedPancakeResults: []model.QueryResultRow{
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__1__parent_count", int64(1000)),
+				model.NewQueryResultCol("aggr__1__key_0", nil),
+				model.NewQueryResultCol("aggr__1__count", int64(600)),
+			}},
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__1__parent_count", int64(1000)),
+				model.NewQueryResultCol("aggr__1__key_0", 2024),
+				model.NewQueryResultCol("aggr__1__count", int64(300)),
+			}},
+		},
+		ExpectedPancakeSQL: `
+			SELECT sum(count(*)) OVER () AS "aggr__1__parent_count",
+			  if("chess_goat" NOT IN 'abc', "chess_goat", NULL) AS "aggr__1__key_0",
+			  count(*) AS "aggr__1__count"
+			FROM __quesma_table_name
+			GROUP BY if("chess_goat" NOT IN 'abc', "chess_goat", NULL) AS "aggr__1__key_0"
+			ORDER BY "aggr__1__count" DESC, "aggr__1__key_0" ASC
+			LIMIT 2`,
+	},
+	{ // [74]
+		TestName: "terms with exclude (more complex, string field with exclude regex)",
+		// One simple test, for more regex tests see util/regex unit tests
+		QueryRequestJson: `
+		{
+			"aggs": {
+				"1": {
+					"terms": {
+						"field": "chess_goat", 
+						"size": 1,
+						"exclude": "K.*"
+					}
+				}
+			},
+			"size": 0,
+			"track_total_hits": true
+		}`,
+		// I omit "took", "timed_out", "_shards", and "hits" from the response for brevity (they can also be easily unit-tested)
+		ExpectedResponse: `
+		{
+			"aggregations": {
+				"1": {
+					"doc_count_error_upper_bound": 0,
+					"sum_other_doc_count": 1,
+					"buckets": [
+						{
+							"key": "Paul Morphy",
+							"doc_count": 13999
+						}
+					]
+				}
+			}
+		}`,
+		ExpectedPancakeResults: []model.QueryResultRow{
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__1__parent_count", int64(14000)),
+				model.NewQueryResultCol("aggr__1__key_0", "Paul Morphy"),
+				model.NewQueryResultCol("aggr__1__count", int64(13999)),
+			}},
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__1__parent_count", int64(14000)),
+				model.NewQueryResultCol("aggr__1__key_0", nil),
+				model.NewQueryResultCol("aggr__1__count", int64(1)),
+			}},
+		},
+		ExpectedPancakeSQL: `
+			SELECT sum(count(*)) OVER () AS "aggr__1__parent_count",
+			  if("chess_goat" NOT LIKE 'K%', "chess_goat", NULL) AS "aggr__1__key_0",
+			  count(*) AS "aggr__1__count"
+			FROM __quesma_table_name
+			GROUP BY if("chess_goat" NOT LIKE 'K%', "chess_goat", NULL) AS "aggr__1__key_0"
+			ORDER BY "aggr__1__count" DESC, "aggr__1__key_0" ASC
+			LIMIT 2`,
+	},
+	{ // [75]
+		TestName: "complex terms with exclude: nested terms + 2 metrics",
+		QueryRequestJson: `
+		{
+			"aggs": {
+				"terms1": {
+					"aggs": {
+						"metric1": {
+							"avg": {
+								"field": "DistanceMiles"
+							}
+						},
+						"terms2": {
+							"aggs": {
+								"metric2": {
+									"sum": {
+										"field": "AvgTicketPrice"
+									}
+								}
+							},
+							"terms": {
+								"field": "DestCityName",
+								"size": 1
+							}
+						}
+					},
+					"terms": {
+						"exclude": [
+							"a",
+							"b"
+						],
+						"field": "Carrier",
+						"size": 2
+					}
+				}
+			},
+			"size": 0,
+			"track_total_hits": true
+		}`,
+		// I omit "took", "timed_out", "_shards", and "hits" from the response for brevity (they can also be easily unit-tested)
+		ExpectedResponse: `
+		{
+			"aggregations": {
+				"terms1": {
+					"buckets": [
+						{
+							"doc_count": 3323,
+							"key": "Logstash Airways",
+							"metric1": {
+								"value": 4451.946294580208
+							},
+							"terms2": {
+								"buckets": [
+									{
+										"doc_count": 173,
+										"key": "Zurich",
+										"metric2": {
+											"value": 102370.42402648926
+										}
+									}
+								],
+								"doc_count_error_upper_bound": 0,
+								"sum_other_doc_count": 3150
+							}
+						},
+						{
+							"doc_count": 3261,
+							"key": "JetBeats",
+							"metric1": {
+								"value": 4434.670874554115
+							},
+							"terms2": {
+								"buckets": [
+									{
+										"doc_count": 167,
+										"key": "Zurich",
+										"metric2": {
+											"value": 92215.76377868652
+										}
+									}
+								],
+								"doc_count_error_upper_bound": 0,
+								"sum_other_doc_count": 3094
+							}
+						}
+					],
+					"doc_count_error_upper_bound": 0,
+					"sum_other_doc_count": 6430
+				}
+			}
+		}`,
+		ExpectedPancakeResults: []model.QueryResultRow{
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)),
+				model.NewQueryResultCol("aggr__terms1__key_0", "Logstash Airways"),
+				model.NewQueryResultCol("aggr__terms1__count", int64(3323)),
+				model.NewQueryResultCol("metric__terms1__metric1_col_0", 4451.946294580208),
+				model.NewQueryResultCol("aggr__terms1__terms2__parent_count", int64(3323)),
+				model.NewQueryResultCol("aggr__terms1__terms2__key_0", "Zurich"),
+				model.NewQueryResultCol("aggr__terms1__terms2__count", int64(173)),
+				model.NewQueryResultCol("metric__terms1__terms2__metric2_col_0", 102370.42402648926),
+			}},
+			{Cols: []model.QueryResultCol{ // should be discarded by us because of terms2's size=1
+				model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)),
+				model.NewQueryResultCol("aggr__terms1__key_0", "Logstash Airways"),
+				model.NewQueryResultCol("aggr__terms1__count", int64(3323)),
+				model.NewQueryResultCol("metric__terms1__metric1_col_0", 4451.946294580208),
+				model.NewQueryResultCol("aggr__terms1__terms2__parent_count", int64(3323)),
+				model.NewQueryResultCol("aggr__terms1__terms2__key_0", "Wąchock"),
+				model.NewQueryResultCol("aggr__terms1__terms2__count", int64(150)),
+				model.NewQueryResultCol("metric__terms1__terms2__metric2_col_0", nil),
+			}},
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)),
+				model.NewQueryResultCol("aggr__terms1__key_0", "JetBeats"),
+				model.NewQueryResultCol("aggr__terms1__count", int64(3261)),
+				model.NewQueryResultCol("metric__terms1__metric1_col_0", 4434.670878262596),
+				model.NewQueryResultCol("aggr__terms1__terms2__parent_count", int64(3261)),
+				model.NewQueryResultCol("aggr__terms1__terms2__key_0", "Zurich"),
+				model.NewQueryResultCol("aggr__terms1__terms2__count", int64(167)),
+				model.NewQueryResultCol("metric__terms1__terms2__metric2_col_0", 92215.763779),
+			}},
+			{Cols: []model.QueryResultCol{ // should be discarded by us because of terms2's size=1
+				model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)),
+				model.NewQueryResultCol("aggr__terms1__key_0", "JetBeats"),
+				model.NewQueryResultCol("aggr__terms1__count", int64(3261)),
+				model.NewQueryResultCol("metric__terms1__metric1_col_0", 4434.670878262596),
+				model.NewQueryResultCol("aggr__terms1__terms2__parent_count", int64(3261)),
+				model.NewQueryResultCol("aggr__terms1__terms2__key_0", "Wąchock"),
+				model.NewQueryResultCol("aggr__terms1__terms2__count", int64(147)),
+				model.NewQueryResultCol("metric__terms1__terms2__metric2_col_0", 90242.31663285477),
+			}},
+			{Cols: []model.QueryResultCol{ // should be discarded by us because of terms1's size=2
+				model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)),
+				model.NewQueryResultCol("aggr__terms1__key_0", "Kibana Airlines"),
+				model.NewQueryResultCol("aggr__terms1__count", int64(3219)),
+				model.NewQueryResultCol("metric__terms1__metric1_col_0", 4335.019248495363),
+				model.NewQueryResultCol("aggr__terms1__terms2__parent_count", int64(3219)),
+				model.NewQueryResultCol("aggr__terms1__terms2__key_0", "Zurich"),
+				model.NewQueryResultCol("aggr__terms1__terms2__count", int64(173)),
+				model.NewQueryResultCol("metric__terms1__terms2__metric2_col_0", 99314.3501429406),
+			}},
+		},
+		ExpectedPancakeSQL: `
+			SELECT "aggr__terms1__parent_count", "aggr__terms1__key_0",
+			  "aggr__terms1__count", "metric__terms1__metric1_col_0",
+			  "aggr__terms1__terms2__parent_count", "aggr__terms1__terms2__key_0",
+			  "aggr__terms1__terms2__count", "metric__terms1__terms2__metric2_col_0"
+			FROM (
+			  SELECT "aggr__terms1__parent_count", "aggr__terms1__key_0",
+				"aggr__terms1__count", "metric__terms1__metric1_col_0",
+				"aggr__terms1__terms2__parent_count", "aggr__terms1__terms2__key_0",
+				"aggr__terms1__terms2__count", "metric__terms1__terms2__metric2_col_0",
+				dense_rank() OVER (ORDER BY "aggr__terms1__count" DESC,
+				"aggr__terms1__key_0" ASC) AS "aggr__terms1__order_1_rank",
+				dense_rank() OVER (PARTITION BY "aggr__terms1__key_0" ORDER BY
+				"aggr__terms1__terms2__count" DESC, "aggr__terms1__terms2__key_0" ASC) AS
+				"aggr__terms1__terms2__order_1_rank"
+			  FROM (
+				SELECT sum(count(*)) OVER () AS "aggr__terms1__parent_count",
+				  if("Carrier" NOT IN tuple('a', 'b'), "Carrier", NULL) AS "aggr__terms1__key_0",
+				  sum(count(*)) OVER (PARTITION BY "aggr__terms1__key_0") AS
+				  "aggr__terms1__count",
+				  avgOrNullMerge(avgOrNullState("DistanceMiles")) OVER (PARTITION BY
+				  "aggr__terms1__key_0") AS "metric__terms1__metric1_col_0",
+				  sum(count(*)) OVER (PARTITION BY "aggr__terms1__key_0") AS
+				  "aggr__terms1__terms2__parent_count",
+				  "DestCityName" AS "aggr__terms1__terms2__key_0",
+				  count(*) AS "aggr__terms1__terms2__count",
+				  sumOrNull("AvgTicketPrice") AS "metric__terms1__terms2__metric2_col_0"
+				FROM __quesma_table_name
+				GROUP BY if("Carrier" NOT IN tuple('a', 'b'), "Carrier", NULL) AS
+				  "aggr__terms1__key_0", "DestCityName" AS "aggr__terms1__terms2__key_0"))
+			WHERE ("aggr__terms1__order_1_rank"<=3 AND "aggr__terms1__terms2__order_1_rank"
+			  <=2)
+			ORDER BY "aggr__terms1__order_1_rank" ASC,
+			  "aggr__terms1__terms2__order_1_rank" ASC`,
+	},
+	{ // [76]
+		TestName: "terms with exclude, but with branched off aggregation tree",
+		QueryRequestJson: `
+		{
+			"aggs": {
+				"terms1": {
+					"aggs": {
+						"metric1": {
+							"avg": {
+								"field": "DistanceMiles"
+							}
+						}
+					},
+					"terms": {
+						"exclude": [
+							"a",
+							"b"
+						],
+						"field": "Carrier",
+						"size": 1
+					}
+				},
+				"terms2": {
+					"aggs": {
+						"metric1": {
+							"avg": {
+								"field": "DistanceMiles"
+							}
+						}
+					},
+					"terms": {
+						"exclude": [
+							"Logstash Airways",
+							".*"
+						],
+						"field": "Carrier",
+						"size": 2
+					}
+				}
+			},
+			"size": 0,
+			"track_total_hits": true
+		}`,
+		// I omit "took", "timed_out", "_shards", and "hits" from the response for brevity (they can also be easily unit-tested)
+		ExpectedResponse: `
+		{
+			"_shards": {
+				"failed": 0,
+				"skipped": 0,
+				"successful": 1,
+				"total": 1
+			},
+			"aggregations": {
+				"terms1": {
+					"buckets": [
+						{
+							"doc_count": 3323,
+							"key": "Logstash Airways",
+							"metric1": {
+								"value": 4451.946294580208
+							}
+						}
+					],
+					"doc_count_error_upper_bound": 0,
+					"sum_other_doc_count": 9691
+				},
+				"terms2": {
+					"buckets": [
+						{
+							"doc_count": 3261,
+							"key": "JetBeats",
+							"metric1": {
+								"value": 4434.670874554115
+							}
+						},
+						{
+							"doc_count": 3219,
+							"key": "Kibana Airlines",
+							"metric1": {
+								"value": 4335.019245198367
+							}
+						}
+					],
+					"doc_count_error_upper_bound": 0,
+					"sum_other_doc_count": 6534
+				}
+			},
+			"hits": {
+				"hits": [],
+				"max_score": null,
+				"total": {
+					"relation": "eq",
+					"value": 13014
+				}
+			},
+			"timed_out": false,
+			"took": 18
+		}`,
+		ExpectedPancakeResults: []model.QueryResultRow{
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)),
+				model.NewQueryResultCol("aggr__terms1__key_0", "Logstash Airways"),
+				model.NewQueryResultCol("aggr__terms1__count", int64(3323)),
+				model.NewQueryResultCol("metric__terms1__metric1_col_0", 4451.946294580208),
+			}},
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__terms1__parent_count", int64(13014)),
+				model.NewQueryResultCol("aggr__terms1__key_0", "Discard"),
+				model.NewQueryResultCol("aggr__terms1__count", int64(5)),
+				model.NewQueryResultCol("metric__terms1__metric1_col_0", 6.20),
+			}},
+		},
+		ExpectedPancakeSQL: `
+			SELECT sum(count(*)) OVER () AS "aggr__terms1__parent_count",
+			  if("Carrier" NOT IN tuple('a', 'b'), "Carrier", NULL) AS "aggr__terms1__key_0"
+			  , count(*) AS "aggr__terms1__count",
+			  avgOrNull("DistanceMiles") AS "metric__terms1__metric1_col_0"
+			FROM __quesma_table_name
+			GROUP BY if("Carrier" NOT IN tuple('a', 'b'), "Carrier", NULL) AS
+			  "aggr__terms1__key_0"
+			ORDER BY "aggr__terms1__count" DESC, "aggr__terms1__key_0" ASC
+			LIMIT 2`,
+		ExpectedAdditionalPancakeResults: [][]model.QueryResultRow{{
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__terms2__parent_count", int64(13014)),
+				model.NewQueryResultCol("aggr__terms2__key_0", "JetBeats"),
+				model.NewQueryResultCol("aggr__terms2__count", int64(3261)),
+				model.NewQueryResultCol("metric__terms2__metric1_col_0", 4434.670874554115),
+			}},
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__terms2__parent_count", int64(13014)),
+				model.NewQueryResultCol("aggr__terms2__key_0", "Kibana Airlines"),
+				model.NewQueryResultCol("aggr__terms2__count", int64(3219)),
+				model.NewQueryResultCol("metric__terms2__metric1_col_0", 4335.019245198367),
+			}},
+			{Cols: []model.QueryResultCol{
+				model.NewQueryResultCol("aggr__terms2__parent_count", int64(13014)),
+				model.NewQueryResultCol("aggr__terms2__key_0", "Discard"),
+				model.NewQueryResultCol("aggr__terms2__count", int64(11)),
+				model.NewQueryResultCol("metric__terms2__metric1_col_0", 42),
+			}},
+		}},
+		ExpectedAdditionalPancakeSQLs: []string{`
+			SELECT sum(count(*)) OVER () AS "aggr__terms2__parent_count",
+			  if("Carrier" NOT IN tuple('Logstash Airways', '.*'), "Carrier", NULL) AS
+			  "aggr__terms2__key_0", count(*) AS "aggr__terms2__count",
+			  avgOrNull("DistanceMiles") AS "metric__terms2__metric1_col_0"
+			FROM __quesma_table_name
+			GROUP BY if("Carrier" NOT IN tuple('Logstash Airways', '.*'), "Carrier", NULL)
+			  AS "aggr__terms2__key_0"
+			ORDER BY "aggr__terms2__count" DESC, "aggr__terms2__key_0" ASC
+			LIMIT 3`},
+	},
 }
diff --git a/quesma/util/regex/regex.go b/quesma/util/regex/regex.go
new file mode 100644
index 000000000..e5a42aa89
--- /dev/null
+++ b/quesma/util/regex/regex.go
@@ -0,0 +1,40 @@
+// Copyright Quesma, licensed under the Elastic License 2.0.
+// SPDX-License-Identifier: Elastic-2.0
+package regex
+
+import (
+	"quesma/model"
+	"strings"
+)
+
+// ToClickhouseExpr converts a regex pattern to a Clickhouse expression.
+// It's our old heuristic, maybe it'll need to be improved.
+func ToClickhouseExpr(pattern string) (clickhouseFuncName string, patternExpr model.Expr) {
+	// really simple == (out of all special characters, only . and .* may be present)
+	isPatternReallySimple := func(pattern string) bool {
+		// any special characters excluding . and * not allowed. Also (not the most important check) * can't be first character.
+		if strings.ContainsAny(pattern, `?+|{}[]()"\`) || (len(pattern) > 0 && pattern[0] == '*') {
+			return false
+		}
+		// .* allowed, but [any other char]* - not
+		for i, char := range pattern[1:] {
+			prevChar := pattern[i]
+			if char == '*' && prevChar != '.' {
+				return false
+			}
+		}
+		return true
+	}
+
+	var funcName string
+	if isPatternReallySimple(pattern) {
+		pattern = strings.ReplaceAll(pattern, "_", `\_`)
+		pattern = strings.ReplaceAll(pattern, ".*", "%")
+		pattern = strings.ReplaceAll(pattern, ".", "_")
+		funcName = "LIKE"
+	} else { // this Clickhouse function is much slower, so we use it only for complex regexps
+		funcName = "REGEXP"
+	}
+
+	return funcName, model.NewLiteral("'" + pattern + "'")
+}