This repository has been archived by the owner on Aug 28, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.go
118 lines (102 loc) · 2.14 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
package parser
import (
"errors"
)
// lexer represents a basic lexer tokenizing source code
// into 3 basic categories: spaces (whitespaces, tabs, line-breaks),
// signs (any ASCII special character) and
// words (any other character)
type lexer struct{ cr Cursor }
// newLexer creates a new basic-latin lexer instance
func newLexer(src *SourceFile) *lexer {
if src == nil {
panic("missing source file during lexer initialization")
}
return &lexer{
cr: NewCursor(src),
}
}
func finalizedToken(
tk *Token,
end Cursor,
) *Token {
if end.Index == tk.VBegin.Index {
return nil
}
tk.VEnd = end
return tk
}
func (lx *lexer) reachedEOF() bool {
return lx.cr.Index >= uint(len(lx.cr.File.Src))
}
// ReadExact tries to read an exact string and returns false if
// str couldn't have been matched
func (lx *lexer) ReadExact(
expectation []rune,
kind FragmentKind,
) (
token *Token,
matched bool,
err error,
) {
if len(expectation) < 1 {
panic(errors.New("empty string expected"))
}
if lx.reachedEOF() {
return nil, false, errEOF{}
}
token = &Token{
VKind: kind,
VBegin: lx.cr,
}
for ix := 0; ix < len(expectation); ix++ {
if lx.reachedEOF() {
return finalizedToken(token, lx.cr), false, nil
}
// Check against the expectation
rn := lx.cr.File.Src[lx.cr.Index]
// Advance the cursor
switch rn {
case '\n':
lx.cr.Index++
lx.cr.Column = 1
lx.cr.Line++
default:
lx.cr.Index++
lx.cr.Column++
}
if rn != expectation[ix] {
// No match
return finalizedToken(token, lx.cr), false, nil
}
}
return finalizedToken(token, lx.cr), true, nil
}
// ReadUntil reads until fn returns zero skipping as many runes as fn returns
func (lx *lexer) ReadUntil(
fn func(uint, Cursor) bool,
kind FragmentKind,
) (*Token, error) {
if lx.reachedEOF() {
return nil, errEOF{}
}
token := &Token{
VKind: kind,
VBegin: lx.cr,
}
subLexerIndex := uint(0)
for {
if lx.reachedEOF() || !fn(subLexerIndex, lx.cr) {
break
}
if lx.cr.File.Src[lx.cr.Index] == '\n' {
lx.cr.Column = 1
lx.cr.Line++
} else {
lx.cr.Column++
}
subLexerIndex++
lx.cr.Index++
}
return finalizedToken(token, lx.cr), nil
}