Skip to content

Commit

Permalink
compiler refactors
Browse files Browse the repository at this point in the history
  • Loading branch information
chirst committed Jun 25, 2024
1 parent b420bb4 commit 8955e19
Show file tree
Hide file tree
Showing 4 changed files with 246 additions and 224 deletions.
89 changes: 52 additions & 37 deletions compiler/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,38 +15,53 @@ type token struct {
value string
}

// TokenTypes where tk is token
const (
// KEYWORD is a reserved word. For example SELECT, FROM, or WHERE.
KEYWORD = iota + 1
// IDENTIFIER is a word that is not a keyword like a table or column name.
IDENTIFIER
// WHITESPACE is a space, tab, or newline.
WHITESPACE
// EOF (End of file) is the end of input.
EOF
// SEPARATOR is punctuation such as "(", ",", ";".
SEPARATOR
// OPERATOR is a symbol that operates on arguments.
OPERATOR
// PUNCTUATOR is punctuation that is neither a separator or operator.
PUNCTUATOR
// LITERAL is a quoted text value like 'foo'.
LITERAL
// NUMERIC is a numeric value like 1, 1.2, or -3.
NUMERIC
// tkKeyword is a reserved word. For example SELECT, FROM, or WHERE.
tkKeyword = iota + 1
// tkIdentifier is a word that is not a keyword like a table or column name.
tkIdentifier
// tkWhitespace is a space, tab, or newline.
tkWhitespace
// tkEOF (End of file) is the end of input.
tkEOF
// tkSeparator is punctuation such as "(", ",", ";".
tkSeparator
// tkOperator is a symbol that operates on arguments.
tkOperator
// tkPunctuator is punctuation that is neither a separator or operator.
tkPunctuator
// tkLiteral is a quoted text value like 'foo'.
tkLiteral
// tkNumeric is a numeric value like 1, 1.2, or -3.
tkNumeric
)

// Keywords where kw is keyword
const (
kwExplain = "EXPLAIN"
kwSelect = "SELECT"
kwFrom = "FROM"
kwCreate = "CREATE"
kwInsert = "INSERT"
kwInto = "INTO"
kwTable = "TABLE"
kwValues = "VALUES"
kwInteger = "INTEGER"
kwText = "TEXT"
)

var keywords = []string{
"EXPLAIN",
"SELECT",
"FROM",
"CREATE",
"INSERT",
"INTO",
"TABLE",
"VALUES",
"INTEGER",
"TEXT",
kwExplain,
kwSelect,
kwFrom,
kwCreate,
kwInsert,
kwInto,
kwTable,
kwValues,
kwInteger,
kwText,
}

func (*lexer) isKeyword(w string) bool {
Expand All @@ -69,7 +84,7 @@ func (l *lexer) Lex() []token {
ret := []token{}
for {
t := l.getToken()
if t.tokenType == EOF {
if t.tokenType == tkEOF {
return ret
}
ret = append(ret, t)
Expand All @@ -93,7 +108,7 @@ func (l *lexer) getToken() token {
case l.isSingleQuote(r):
return l.scanLiteral()
}
return token{EOF, ""}
return token{tkEOF, ""}
}

func (l *lexer) peek(pos int) rune {
Expand All @@ -115,7 +130,7 @@ func (l *lexer) scanWhiteSpace() token {
for l.isWhiteSpace(l.peek(l.end)) {
l.next()
}
return token{tokenType: WHITESPACE, value: " "}
return token{tokenType: tkWhitespace, value: " "}
}

func (l *lexer) scanWord() token {
Expand All @@ -125,27 +140,27 @@ func (l *lexer) scanWord() token {
}
value := l.src[l.start:l.end]
if l.isKeyword(value) {
return token{tokenType: KEYWORD, value: strings.ToUpper(value)}
return token{tokenType: tkKeyword, value: strings.ToUpper(value)}
}
return token{tokenType: IDENTIFIER, value: value}
return token{tokenType: tkIdentifier, value: value}
}

func (l *lexer) scanDigit() token {
l.next()
for l.isDigit(l.peek(l.end)) {
l.next()
}
return token{tokenType: NUMERIC, value: l.src[l.start:l.end]}
return token{tokenType: tkNumeric, value: l.src[l.start:l.end]}
}

func (l *lexer) scanAsterisk() token {
l.next()
return token{tokenType: PUNCTUATOR, value: l.src[l.start:l.end]}
return token{tokenType: tkPunctuator, value: l.src[l.start:l.end]}
}

func (l *lexer) scanSeparator() token {
l.next()
return token{tokenType: SEPARATOR, value: l.src[l.start:l.end]}
return token{tokenType: tkSeparator, value: l.src[l.start:l.end]}
}

func (l *lexer) scanLiteral() token {
Expand All @@ -154,7 +169,7 @@ func (l *lexer) scanLiteral() token {
l.next()
}
l.next()
return token{tokenType: LITERAL, value: l.src[l.start:l.end]}
return token{tokenType: tkLiteral, value: l.src[l.start:l.end]}
}

func (*lexer) isWhiteSpace(r rune) bool {
Expand Down
162 changes: 81 additions & 81 deletions compiler/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,25 @@ func TestLexSelect(t *testing.T) {
{
sql: "SELECT * FROM foo",
expected: []token{
{KEYWORD, "SELECT"},
{WHITESPACE, " "},
{PUNCTUATOR, "*"},
{WHITESPACE, " "},
{KEYWORD, "FROM"},
{WHITESPACE, " "},
{IDENTIFIER, "foo"},
{tkKeyword, "SELECT"},
{tkWhitespace, " "},
{tkPunctuator, "*"},
{tkWhitespace, " "},
{tkKeyword, "FROM"},
{tkWhitespace, " "},
{tkIdentifier, "foo"},
},
},
{
sql: "select * from foo",
expected: []token{
{KEYWORD, "SELECT"},
{WHITESPACE, " "},
{PUNCTUATOR, "*"},
{WHITESPACE, " "},
{KEYWORD, "FROM"},
{WHITESPACE, " "},
{IDENTIFIER, "foo"},
{tkKeyword, "SELECT"},
{tkWhitespace, " "},
{tkPunctuator, "*"},
{tkWhitespace, " "},
{tkKeyword, "FROM"},
{tkWhitespace, " "},
{tkIdentifier, "foo"},
},
},
{
Expand All @@ -42,40 +42,40 @@ func TestLexSelect(t *testing.T) {
from foo
`,
expected: []token{
{KEYWORD, "SELECT"},
{WHITESPACE, " "},
{PUNCTUATOR, "*"},
{WHITESPACE, " "},
{KEYWORD, "FROM"},
{WHITESPACE, " "},
{IDENTIFIER, "foo"},
{tkKeyword, "SELECT"},
{tkWhitespace, " "},
{tkPunctuator, "*"},
{tkWhitespace, " "},
{tkKeyword, "FROM"},
{tkWhitespace, " "},
{tkIdentifier, "foo"},
},
},
{
sql: "EXPLAIN SELECT 1",
expected: []token{
{KEYWORD, "EXPLAIN"},
{WHITESPACE, " "},
{KEYWORD, "SELECT"},
{WHITESPACE, " "},
{NUMERIC, "1"},
{tkKeyword, "EXPLAIN"},
{tkWhitespace, " "},
{tkKeyword, "SELECT"},
{tkWhitespace, " "},
{tkNumeric, "1"},
},
},
{
sql: "SELECT 12",
expected: []token{
{KEYWORD, "SELECT"},
{WHITESPACE, " "},
{NUMERIC, "12"},
{tkKeyword, "SELECT"},
{tkWhitespace, " "},
{tkNumeric, "12"},
},
},
{
sql: "SELECT 1;",
expected: []token{
{KEYWORD, "SELECT"},
{WHITESPACE, " "},
{NUMERIC, "1"},
{SEPARATOR, ";"},
{tkKeyword, "SELECT"},
{tkWhitespace, " "},
{tkNumeric, "1"},
{tkSeparator, ";"},
},
},
}
Expand All @@ -92,27 +92,27 @@ func TestLexCreate(t *testing.T) {
{
sql: "CREATE TABLE foo (id INTEGER, first_name TEXT, last_name TEXT)",
expected: []token{
{KEYWORD, "CREATE"},
{WHITESPACE, " "},
{KEYWORD, "TABLE"},
{WHITESPACE, " "},
{IDENTIFIER, "foo"},
{WHITESPACE, " "},
{SEPARATOR, "("},
{IDENTIFIER, "id"},
{WHITESPACE, " "},
{KEYWORD, "INTEGER"},
{SEPARATOR, ","},
{WHITESPACE, " "},
{IDENTIFIER, "first_name"},
{WHITESPACE, " "},
{KEYWORD, "TEXT"},
{SEPARATOR, ","},
{WHITESPACE, " "},
{IDENTIFIER, "last_name"},
{WHITESPACE, " "},
{KEYWORD, "TEXT"},
{SEPARATOR, ")"},
{tkKeyword, "CREATE"},
{tkWhitespace, " "},
{tkKeyword, "TABLE"},
{tkWhitespace, " "},
{tkIdentifier, "foo"},
{tkWhitespace, " "},
{tkSeparator, "("},
{tkIdentifier, "id"},
{tkWhitespace, " "},
{tkKeyword, "INTEGER"},
{tkSeparator, ","},
{tkWhitespace, " "},
{tkIdentifier, "first_name"},
{tkWhitespace, " "},
{tkKeyword, "TEXT"},
{tkSeparator, ","},
{tkWhitespace, " "},
{tkIdentifier, "last_name"},
{tkWhitespace, " "},
{tkKeyword, "TEXT"},
{tkSeparator, ")"},
},
},
}
Expand All @@ -129,33 +129,33 @@ func TestLexInsert(t *testing.T) {
{
sql: "INSERT INTO foo (id, first_name, last_name) VALUES (1, 'gud', 'dude')",
expected: []token{
{KEYWORD, "INSERT"},
{WHITESPACE, " "},
{KEYWORD, "INTO"},
{WHITESPACE, " "},
{IDENTIFIER, "foo"},
{WHITESPACE, " "},
{SEPARATOR, "("},
{IDENTIFIER, "id"},
{SEPARATOR, ","},
{WHITESPACE, " "},
{IDENTIFIER, "first_name"},
{SEPARATOR, ","},
{WHITESPACE, " "},
{IDENTIFIER, "last_name"},
{SEPARATOR, ")"},
{WHITESPACE, " "},
{KEYWORD, "VALUES"},
{WHITESPACE, " "},
{SEPARATOR, "("},
{NUMERIC, "1"},
{SEPARATOR, ","},
{WHITESPACE, " "},
{LITERAL, "'gud'"},
{SEPARATOR, ","},
{WHITESPACE, " "},
{LITERAL, "'dude'"},
{SEPARATOR, ")"},
{tkKeyword, "INSERT"},
{tkWhitespace, " "},
{tkKeyword, "INTO"},
{tkWhitespace, " "},
{tkIdentifier, "foo"},
{tkWhitespace, " "},
{tkSeparator, "("},
{tkIdentifier, "id"},
{tkSeparator, ","},
{tkWhitespace, " "},
{tkIdentifier, "first_name"},
{tkSeparator, ","},
{tkWhitespace, " "},
{tkIdentifier, "last_name"},
{tkSeparator, ")"},
{tkWhitespace, " "},
{tkKeyword, "VALUES"},
{tkWhitespace, " "},
{tkSeparator, "("},
{tkNumeric, "1"},
{tkSeparator, ","},
{tkWhitespace, " "},
{tkLiteral, "'gud'"},
{tkSeparator, ","},
{tkWhitespace, " "},
{tkLiteral, "'dude'"},
{tkSeparator, ")"},
},
},
}
Expand Down
Loading

0 comments on commit 8955e19

Please sign in to comment.