-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.h
145 lines (122 loc) · 3.73 KB
/
lexer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
// Lexer for Monkey language in C++
#ifndef LEXER_H
#define LEXER_H
#include <string>
#include <unordered_map>
enum class TokenType : uint8_t {
IDENT, // identifier for vars and function names
ILLEGAL,
ENDOFFILE,
INT,
ASSIGN,
PLUS,
COMMA,
SEMICOLON,
LPAREN,
RPAREN,
LBRACE,
RBRACE,
FUNCTION,
LET,
MINUS,
BANG,
ASTERISK,
SLASH,
LT,
GT,
NEQ,
EQ,
FALSE,
TRUE,
RETURN,
IF,
ELSE,
STRING,
LBRACKET,
RBRACKET,
COLON,
};
struct Token {
TokenType type;
std::string literal;
};
class Lexer{
public:
std::unordered_map<TokenType, std::string> TokenTypeToString = {
{TokenType::IDENT, "IDENT"},
{TokenType::ILLEGAL, "ILLEGAL"},
{TokenType::ENDOFFILE, "ENDOFFILE"},
{TokenType::INT, "INT"},
{TokenType::ASSIGN, "="},
{TokenType::PLUS, "+"},
{TokenType::COMMA, ","},
{TokenType::SEMICOLON, ";"},
{TokenType::LPAREN, "("},
{TokenType::RPAREN, ")"},
{TokenType::LBRACE, "{"},
{TokenType::RBRACE, "}"},
{TokenType::FUNCTION, "FUNCTION"},
{TokenType::LET, "LET"},
{TokenType::MINUS, "-"},
{TokenType::BANG, "!"},
{TokenType::ASTERISK, "*"},
{TokenType::SLASH, "/"},
{TokenType::LT, "<"},
{TokenType::GT, ">"},
{TokenType::NEQ, "!="},
{TokenType::EQ, "=="},
{TokenType::FALSE, "FALSE"},
{TokenType::TRUE, "TRUE"},
{TokenType::RETURN, "RETURN"},
{TokenType::IF, "IF"},
{TokenType::ELSE, "ELSE"},
{TokenType::LBRACKET, "["},
{TokenType::RBRACKET, "]"},
{TokenType::COLON, ":"},
};
// Lexer constructor
// REQUIRES: input is a string
// EFFECTS: creates a Lexer object
Lexer(std::string input);
// default lexer constructor
Lexer();
// Get the next token
// EFFECTS: returns the next token in the input
Token nextToken();
private:
std::string input;
size_t position; // current position in input
size_t read_position; // current reading position in input (after current char)
char ch; // current char under examination
std::unordered_map<std::string, TokenType> keywords{ // map of the keywords in the language
{"fn", TokenType::FUNCTION},
{"let", TokenType::LET},
{"true", TokenType::TRUE},
{"false", TokenType::FALSE},
{"return", TokenType::RETURN},
{"if", TokenType::IF},
{"else", TokenType::ELSE}
};
// Read the next character
// MODIFIES: ch, position, read_position
// EFFECTS: reads the next character in the input
void readChar();
// Peak at the next char
// EFFECTS: returns the next character in the input
char peekChar();
// Read an identifier
// EFFECTS: reads an identifier string from the input
std::string readIdentifier();
// Check type of string
// EFFECTS: returns the type of the string if it is a keyword and identifier otherwise
TokenType checkKeyword(std::string identifier);
// Skips whitespace
// EFFECTS: skips the whitespace in input until ch is not whitespace
void skipWhitespace();
// Read digits
// EFFECTS: reads in digits from the input
std::string readDigit();
// Reads a string as input and processes to put in as a literal
std::string readString();
};
#endif // LEXER_H