Skip to content

Commit

Permalink
SQL Parser Performance (#731)
Browse files Browse the repository at this point in the history
* eliminate channels from lexer
* refactor parser for no-channels lexer
* refactor misc tests for no-channels lexer

Signed-off-by: James Ranson <[email protected]>
  • Loading branch information
jranson authored Aug 13, 2024
1 parent 6ef21df commit 65513a4
Show file tree
Hide file tree
Showing 15 changed files with 172 additions and 137 deletions.
61 changes: 33 additions & 28 deletions pkg/backends/clickhouse/parsing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,49 +162,51 @@ func TestParseErrors(t *testing.T) {

func TestAtWith(t *testing.T) {

rs := parsing.NewRunState(context.Background())
ch := rs.Tokens()
ch <- &token.Token{Typ: token.Space, Val: " "}
tk := token.Tokens{&token.Token{Typ: token.Space, Val: " "}}
rs := parsing.NewRunState(context.Background(), tk)
rs.Next()
f := atWith(nil, nil, rs)
if f != nil {
t.Error("expected nil StateFn")
}

rs = parsing.NewRunState(context.Background())
ch = rs.Tokens()
ch <- &token.Token{Typ: lsql.TokenWith, Val: "with"}
tk = token.Tokens{&token.Token{Typ: lsql.TokenWith, Val: "with"}}
rs = parsing.NewRunState(context.Background(), tk)
rs.Next()
atWith(nil, nil, rs)
if rs.Error() != parsing.ErrUnsupportedParser {
t.Error("expected ErrUnsupportedParser")
}

rs = parsing.NewRunState(context.Background())
ch = rs.Tokens()
ch <- &token.Token{Typ: lsql.TokenWith, Val: "with"}
ch <- &token.Token{Typ: lsql.TokenSelect, Val: "select"}
tk = token.Tokens{
&token.Token{Typ: lsql.TokenWith, Val: "with"},
&token.Token{Typ: lsql.TokenSelect, Val: "select"},
}

rs = parsing.NewRunState(context.Background(), tk)
rs.Next()
f = atWith(parser, parser, rs)
if f == nil {
t.Error("expected non-nil StateFn")
}

rs = parsing.NewRunState(context.Background())
ch = rs.Tokens()
ch <- &token.Token{Typ: lsql.TokenWith, Val: "with"}
ch <- &token.Token{Typ: token.EOF}
tk = token.Tokens{
&token.Token{Typ: lsql.TokenWith, Val: "with"},
&token.Token{Typ: token.EOF},
}
rs = parsing.NewRunState(context.Background(), tk)
rs.Next()
f = atWith(parser, parser, rs)
if f != nil {
t.Error("expected nil StateFn")
}

rs = parsing.NewRunState(context.Background())
ch = rs.Tokens()
ch <- &token.Token{Typ: lsql.TokenWith, Val: "with"}
ch <- &token.Token{Typ: token.Identifier, Val: "x"}
ch <- &token.Token{Typ: lsql.TokenSelect, Val: "select"}
tk = token.Tokens{
&token.Token{Typ: lsql.TokenWith, Val: "with"},
&token.Token{Typ: token.Identifier, Val: "x"},
&token.Token{Typ: lsql.TokenSelect, Val: "select"},
}
rs = parsing.NewRunState(context.Background(), tk)
rs.Next()
f = atWith(parser, parser, rs)
if f != nil {
Expand All @@ -216,7 +218,7 @@ func TestAtWith(t *testing.T) {
}

func TestAtPreWhere(t *testing.T) {
rs := parsing.NewRunState(context.Background())
rs := parsing.NewRunState(context.Background(), nil)
f := atPreWhere(nil, nil, rs)
if f != nil {
t.Error("expected nil StateFn")
Expand All @@ -227,19 +229,22 @@ func TestAtPreWhere(t *testing.T) {
}

func TestAtFormat(t *testing.T) {
rs := parsing.NewRunState(context.Background())
ch := rs.Tokens()
ch <- &token.Token{Typ: lsql.TokenComment}
ch <- &token.Token{Typ: token.EOF}

tk := token.Tokens{
&token.Token{Typ: lsql.TokenComment},
&token.Token{Typ: token.EOF},
}
rs := parsing.NewRunState(context.Background(), tk)
f := atFormat(nil, nil, rs)
if f != nil {
t.Error("expected nil StateFn")
}

rs = parsing.NewRunState(context.Background())
ch = rs.Tokens()
ch <- &token.Token{Typ: token.Identifier, Val: "UnsupportedFormat"}
ch <- &token.Token{Typ: token.EOF}
tk = token.Tokens{
&token.Token{Typ: token.Identifier, Val: "UnsupportedFormat"},
&token.Token{Typ: token.EOF},
}
rs = parsing.NewRunState(context.Background(), tk)
f = atFormat(nil, nil, rs)
if f != nil {
t.Error("expected nil StateFn")
Expand Down
2 changes: 1 addition & 1 deletion pkg/parsing/lex/lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const EOF = -1

// Lexer is the Lexer interface
type Lexer interface {
Run(string, chan *token.Token)
Run(string) token.Tokens
}

// Options provides members that alter the behavior of the underlying Lexer
Expand Down
23 changes: 12 additions & 11 deletions pkg/parsing/lex/run_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@ import (
"github.com/trickstercache/trickster/v2/pkg/parsing/token"
)

// RunState contains all the information about a particular run
// RunState contains all the information about a particular lexer run
type RunState struct {
Input string // the string being scanned
InputLowered string // the lowercase version of the string being scanned
InputWidth int // width of the input (so we don't have to keep calling len)
Pos int // current position in the input
Start int // start position of this Token
Width int // width of last rune read from input
Tokens chan *token.Token // channel of scanned items
ParenDepth int // nesting depth of ( ) exprs
Input string // the string being scanned
InputLowered string // the lowercase version of the string being scanned
InputWidth int // width of the input (to avoid multiple calls to len)
Pos int // current position in the input
Start int // start position of this Token
Width int // width of last rune read from input
ParenDepth int // nesting depth of ( ) exprs
Tokens token.Tokens
}

// Next returns the next rune in the input.
Expand Down Expand Up @@ -62,15 +62,16 @@ func (rs *RunState) Backup() {

// Emit passes an Token back to the client.
func (rs *RunState) Emit(t token.Typ) {
rs.Tokens <- &token.Token{Typ: t, Pos: rs.Start, Val: rs.InputLowered[rs.Start:rs.Pos]}
rs.Tokens = append(rs.Tokens, &token.Token{Typ: t, Pos: rs.Start,
Val: rs.InputLowered[rs.Start:rs.Pos]})
rs.Start = rs.Pos
}

// EmitToken passes a pre-built an Token back to the client. Returning nil allows the
// function output to be used as a value when needd to consolidate EmitToken and the
// likely subsequent return nil
func (rs *RunState) EmitToken(i *token.Token) StateFn {
rs.Tokens <- i
rs.Tokens = append(rs.Tokens, i)
rs.Start = rs.Pos
return nil
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/parsing/lex/run_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ var testRunState = &RunState{
Input: "Test",
InputLowered: "test",
InputWidth: 4,
Tokens: make(chan *token.Token, 8),
Tokens: make(token.Tokens, 0, 1),
}

func TestRunState(t *testing.T) {
Expand Down Expand Up @@ -88,7 +88,7 @@ func TestScanNumber(t *testing.T) {
Input: "1234",
InputLowered: "1234",
InputWidth: 4,
Tokens: make(chan *token.Token, 8),
Tokens: make(token.Tokens, 0, 1),
}
b := tr.ScanNumber()
if !b {
Expand Down Expand Up @@ -167,7 +167,7 @@ func TestAtTeminator(t *testing.T) {
Input: " ",
InputLowered: " ",
InputWidth: 1,
Tokens: make(chan *token.Token, 8),
Tokens: make(token.Tokens, 0, 1),
}
if !tr.AtTerminator() {
t.Error("expected true")
Expand All @@ -179,7 +179,7 @@ func TestErrorf(t *testing.T) {
Input: " ",
InputLowered: " ",
InputWidth: 1,
Tokens: make(chan *token.Token, 8),
Tokens: make(token.Tokens, 0, 1),
}
tk := tr.Errorf("%s", "fail")
if tk.Val != "fail" {
Expand Down
13 changes: 7 additions & 6 deletions pkg/parsing/lex/sql/sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,24 +77,25 @@ func BaseKey() map[string]token.Typ {

// Run runs the lexer against the provided string and returns tokens on the
// provided channel. Run will end when the state is EOF or Error.
func (l *sqllexer) Run(input string, ch chan *token.Token) {
func (l *sqllexer) Run(input string) token.Tokens {
li := len(input)
rc := &lex.RunState{
Input: input,
InputLowered: strings.ToLower(input),
InputWidth: len(input),
Tokens: ch,
InputWidth: li,
Tokens: make(token.Tokens, 0, li/4), // estimated # of tokens
}
for state := lexText; state != nil; {
state = state(l, rc)
}
close(ch)
return rc.Tokens
}

// state functions

// lexEOLComment scans a // comment that terminates at the end of the line
// it assumes you have already identified '//' and are positioned on the first slash
func lexEOLComment(li lex.Lexer, rs *lex.RunState) lex.StateFn {
func lexEOLComment(_ lex.Lexer, rs *lex.RunState) lex.StateFn {
rs.Pos += 2
i := strings.Index(rs.InputLowered[rs.Pos:], "\n")
if i == -1 {
Expand All @@ -112,7 +113,7 @@ func lexEOLComment(li lex.Lexer, rs *lex.RunState) lex.StateFn {
}

// lexComment scans a comment. The left comment marker is known to be present.
func lexComment(li lex.Lexer, rs *lex.RunState) lex.StateFn {
func lexComment(_ lex.Lexer, rs *lex.RunState) lex.StateFn {
rs.Pos += len(leftComment)
i := strings.Index(rs.InputLowered[rs.Pos:], rightComment)
if i < 0 {
Expand Down
28 changes: 9 additions & 19 deletions pkg/parsing/lex/sql/sql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package sql

import (
"fmt"
"sync"
"testing"

"github.com/trickstercache/trickster/v2/pkg/parsing/lex"
Expand All @@ -30,14 +29,12 @@ type mockParser struct {
l lex.Lexer
lo *lex.Options
err string
wg sync.WaitGroup
}

// this mock parser simply drains the channel to prevent the
// lexer from blocking on a full channel
func (p *mockParser) run() {
var t *token.Token
for ; ; t = <-p.ch {
func (p *mockParser) run(tokens token.Tokens) {
for _, t := range tokens {
if t != nil && t.Typ == token.Error {
p.err = t.Val
}
Expand All @@ -46,7 +43,6 @@ func (p *mockParser) run() {
break
}
}
p.wg.Done()
}

func newLexTestHarness(lo *lex.Options) *mockParser {
Expand Down Expand Up @@ -140,10 +136,8 @@ func TestLexer(t *testing.T) {
for i, test := range tests {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
p := newLexTestHarness(test.lo)
p.wg.Add(1)
go p.run()
p.l.Run(test.in, p.ch)
p.wg.Wait()
tokens := p.l.Run(test.in)
p.run(tokens)
if p.err != test.expected {
t.Errorf(`expected "%v" got "%v"`, test.expected, p.err)
}
Expand Down Expand Up @@ -179,7 +173,7 @@ func TestStateFuncs(t *testing.T) {
for i, test := range tests {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
p := newLexTestHarness(nil)
rs := &lex.RunState{Tokens: p.ch}
rs := &lex.RunState{}
f := test.f(p.l, rs)
if (test.expected != nil && f == nil) ||
(test.expected == nil && f != nil) {
Expand All @@ -193,23 +187,19 @@ func TestStateFuncs(t *testing.T) {
func TestLexIdentifierError(t *testing.T) {
expected := `bad character U+0040 '@'`
p := newLexTestHarness(nil)
p.wg.Add(1)
go p.run()
rs := &lex.RunState{Tokens: p.ch, Pos: 2, InputLowered: "@@@@@@", InputWidth: 6}
rs := &lex.RunState{Pos: 2, InputLowered: "@@@@@@", InputWidth: 6}
f := lexIdentifier
for f != nil {
f = f(p.l, rs)
}
p.wg.Wait()
p.run(rs.Tokens)
if p.err != expected {
t.Errorf("expected `%s` got `%s`", expected, p.err)
}
}

func TestFullSQLLex(t *testing.T) {
p := newLexTestHarness(nil)
p.wg.Add(1)
go p.run()
stmt := `SELECT t1.x as x, t2.count(*) as cnt FROM test_db.test_table t1
INNER JOIN test_db.test_table2 t2 ON
t1.id = t2.secondary_id
Expand All @@ -224,8 +214,8 @@ func TestFullSQLLex(t *testing.T) {
HAVING cnt > 15 // EOL COMMENT
ORDER BY x
LIMIT 100 // EOL COMMENT`
p.l.Run(stmt, p.ch)
p.wg.Wait()
tokens := p.l.Run(stmt)
p.run(tokens)
if p.err != "" {
t.Error(p.err)
}
Expand Down
26 changes: 13 additions & 13 deletions pkg/parsing/run_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,25 @@ import (

// RunState maintains the state of a unique parsing run
type RunState struct {
tokens chan *token.Token
tokens token.Tokens
prev, curr, next, lastkw *token.Token
err error
ctx context.Context
nextOverride StateFn
isPeeked bool
results map[string]interface{}
pos int
cnt int
}

// NewRunState returns a new RunState object for the parser
func NewRunState(ctx context.Context) *RunState {
func NewRunState(ctx context.Context, tokens token.Tokens) *RunState {
t := tokens.Compress()
rs := &RunState{
tokens: make(chan *token.Token, 8),
ctx: ctx,
results: make(map[string]interface{}),
tokens: t,
cnt: len(t),
pos: -1,
}
return rs
}
Expand Down Expand Up @@ -129,9 +133,10 @@ func (rs *RunState) Peek() *token.Token {
if rs.curr != nil && rs.curr.Typ == token.EOF {
return rs.curr
}
// this filters nil tokens so the parser is guaranteed to never encounter them
for ; rs.next == nil; rs.next = <-rs.tokens {
if rs.pos+1 >= rs.cnt {
return &token.Token{Typ: token.EOF, Pos: rs.pos}
}
rs.next = rs.tokens[rs.pos+1]
return rs.next
}

Expand All @@ -140,17 +145,12 @@ func (rs *RunState) IsPeeked() bool {
return rs.next != nil
}

// Next retrieves the next location by peeking and then advancing
// the state
// Next retrieves the next location by peeking and then advancing the state
func (rs *RunState) Next() *token.Token {
rs.Peek()
rs.prev = rs.curr
rs.curr = rs.next
rs.pos += 1
rs.next = nil
return rs.curr
}

// Tokens returns the Tokens Channel for the Run
func (rs *RunState) Tokens() chan *token.Token {
return rs.tokens
}
Loading

0 comments on commit 65513a4

Please sign in to comment.