Skip to content

Commit

Permalink
Resolve internal property names for columns references in lucene quer…
Browse files Browse the repository at this point in the history
…ies (#526)

Lucene query parser needs to be able to substitute field names with
their internal representation
  • Loading branch information
pivovarit authored Jul 13, 2024
1 parent d50c9b6 commit 399fd23
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 15 deletions.
6 changes: 5 additions & 1 deletion quesma/queryparser/lucene/expression.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,11 @@ func (p *luceneParser) buildWhereStatement(addDefaultOperator bool) model.Expr {
return invalidStatement
}
p.tokens = p.tokens[1:]
currentStatement = newLeafStatement([]string{currentToken.fieldName}, p.buildValue([]value{}, 0))
if name, resolved := p.fieldNameResolver.ResolveFieldName(currentToken.fieldName); resolved {
currentStatement = newLeafStatement([]string{name}, p.buildValue([]value{}, 0))
} else {
currentStatement = newLeafStatement([]string{currentToken.fieldName}, p.buildValue([]value{}, 0))
}
case separatorToken:
currentStatement = newLeafStatement(
p.defaultFieldNames,
Expand Down
30 changes: 18 additions & 12 deletions quesma/queryparser/lucene/lucene_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,23 @@ import (
// and tokens keep the rest (unparsed yet) part of the query.
// After parsing, the result statement is kept in p.WhereStatement (we should change it in the future)
// If you have multiple queries to parse, create a new luceneParser for each query.
type luceneParser struct {
ctx context.Context
tokens []token
defaultFieldNames []string
// This is a little awkward, at some point we should remove `WhereStatement` and just return the statement from `BuildWhereStatement`
// However, given parsing implementation, it's easier to keep it for now.
WhereStatement model.Expr
}
type (
luceneParser struct {
ctx context.Context
tokens []token
defaultFieldNames []string
fieldNameResolver fieldNameResolver
// This is a little awkward, at some point we should remove `WhereStatement` and just return the statement from `BuildWhereStatement`
// However, given parsing implementation, it's easier to keep it for now.
WhereStatement model.Expr
}
fieldNameResolver interface {
ResolveFieldName(fieldName string) (string, bool)
}
)

func newLuceneParser(ctx context.Context, defaultFieldNames []string) luceneParser {
return luceneParser{ctx: ctx, defaultFieldNames: defaultFieldNames, tokens: make([]token, 0)}
func newLuceneParser(ctx context.Context, defaultFieldNames []string, resolver fieldNameResolver) luceneParser {
return luceneParser{ctx: ctx, defaultFieldNames: defaultFieldNames, tokens: make([]token, 0), fieldNameResolver: resolver}
}

const fuzzyOperator = '~'
Expand All @@ -67,8 +73,8 @@ var specialOperators = map[string]token{
string(rightParenthesis): rightParenthesisToken{},
}

func TranslateToSQL(ctx context.Context, query string, fields []string) model.Expr {
parser := newLuceneParser(ctx, fields)
func TranslateToSQL(ctx context.Context, query string, fields []string, resolver fieldNameResolver) model.Expr {
parser := newLuceneParser(ctx, fields, resolver)
return parser.translateToSQL(query)
}

Expand Down
32 changes: 31 additions & 1 deletion quesma/queryparser/lucene/lucene_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,41 @@ func TestTranslatingLuceneQueriesToSQL(t *testing.T) {
}
for i, tt := range append(properQueries, randomQueriesWithPossiblyIncorrectInput...) {
t.Run(strconv.Itoa(i), func(t *testing.T) {
parser := newLuceneParser(context.Background(), defaultFieldNames)
parser := newLuceneParser(context.Background(), defaultFieldNames, fixedFieldNameResolver{})
got := model.AsString(parser.translateToSQL(tt.query))
if got != tt.want {
t.Errorf("\ngot [%q]\nwant [%q]", got, tt.want)
}
})
}
}

func TestResolvePropertyNamesWhenTranslatingToSQL(t *testing.T) {
defaultFieldNames := []string{"title", "text"}
var properQueries = []struct {
query string
nameResolver fieldNameResolver
want string
}{
{query: `title:"The Right Way" AND text:go!!`, nameResolver: fixedFieldNameResolver{}, want: `("title" = 'The Right Way' AND "text" = 'go!!')`},
{query: `age:>10`, nameResolver: fixedFieldNameResolver{namesMap: map[string]string{"age": "foo"}}, want: `"foo" > '10'`},
}
for i, tt := range properQueries {
t.Run(strconv.Itoa(i), func(t *testing.T) {
parser := newLuceneParser(context.Background(), defaultFieldNames, tt.nameResolver)
got := model.AsString(parser.translateToSQL(tt.query))
if got != tt.want {
t.Errorf("\ngot [%q]\nwant [%q]", got, tt.want)
}
})
}
}

type fixedFieldNameResolver struct {
namesMap map[string]string
}

func (f fixedFieldNameResolver) ResolveFieldName(fieldName string) (string, bool) {
name, exists := f.namesMap[fieldName]
return name, exists
}
17 changes: 16 additions & 1 deletion quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -731,10 +731,25 @@ func (cw *ClickhouseQueryTranslator) parseQueryString(queryMap QueryMap) model.S
query := queryMap["query"].(string) // query: (Required, string)

// we always call `TranslateToSQL` - Lucene parser returns "false" in case of invalid query
whereStmtFromLucene := lucene.TranslateToSQL(cw.Ctx, query, fields)
whereStmtFromLucene := lucene.TranslateToSQL(cw.Ctx, query, fields, schemaRegistryAdapter{tableName: cw.Table.Name, Registry: cw.SchemaRegistry})
return model.NewSimpleQuery(whereStmtFromLucene, true)
}

type schemaRegistryAdapter struct {
tableName string
schema.Registry
}

func (s schemaRegistryAdapter) ResolveFieldName(fieldName string) (string, bool) {
if resolvedSchema, exists := s.Registry.FindSchema(schema.TableName(s.tableName)); exists {
if field, fieldFound := resolvedSchema.ResolveField(fieldName); fieldFound {
return field.InternalPropertyName.AsString(), true
}
}

return fieldName, false
}

func (cw *ClickhouseQueryTranslator) parseNested(queryMap QueryMap) model.SimpleQuery {
if query, ok := queryMap["query"]; ok {
if queryAsMap, ok := query.(QueryMap); ok {
Expand Down

0 comments on commit 399fd23

Please sign in to comment.