-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkumo.py
366 lines (302 loc) · 13.4 KB
/
kumo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
#Imports
from strings_with_arrows import *
#This is all our digits, and helps us determine if our char is a digit or not.
DIGITS = '0123456789'
class Error:
def __init__(self, pos_start, pos_end, error_name, details):
#When we make an error we take in the position start and end of the error. And we take the name and some descriptive details
self.pos_start = pos_start
self.pos_end = pos_end
self.error_name = error_name
self.details = details
def as_string(self):
#This creates a string that shows the error name and details
result = f'{self.error_name}: {self.details}'
#This shows the file name and line number of where it occurred
result += f' in File {self.pos_start.fname} at line {self.pos_start.ln + 1}'
#Formats the error text with arrows and a lot more info
result += '\n\n' + string_with_arrows(self.pos_start.ftext, self.pos_start, self.pos_end)
return result
class IllegalCharError(Error):
def __init__(self, pos_start, pos_end, details):
#This is an error that occurs when an illegal character shows up.
super().__init__(pos_start, pos_end, 'Illegal Character', details)
class InvalidSyntaxError(Error):
def __init__(self, pos_start, pos_end, details):
#This is an error that occurs when there is invalid syntax
super().__init__(pos_start, pos_end, 'Invalid Syntax', details)
class Position:
#Keeps track of index, line number, column number, filename and file text. This is for error reporting
def __init__(self, index, ln, col, fname, ftext):
self.index = index
self.ln = ln
self.col = col
self.fname = fname
self.ftext = ftext
def advance(self, current_char=None):
#When we advance we increase the index and column number
self.index += 1
self.col += 1
#Everytime we see a new line we will update the line number and reset the column number
if current_char == '\n':
self.ln += 1
self.col = 0
return self
def copy(self):
#This creates a copy of the position
return Position(self.index, self.ln, self.col, self.fname, self.ftext)
#These are all our tokens
TT_INT = 'INT'
TT_FLOAT = 'FLOAT'
TT_PLUS = 'PLUS'
TT_MINUS = 'MINUS'
TT_MUL = 'MUL'
TT_DIV = 'DIV'
TT_LPAREN = 'LPAREN'
TT_RPAREN = 'RPAREN'
TT_EOF = 'EOF'
class Token:
def __init__(self, type_, value=None, pos_start=None, pos_end=None):
#This makes a token with a type for example INT and a value which can be something like 5
self.type = type_
self.value = value
#Checking if there is a position start, and if there is creating the end from it
if pos_start:
self.pos_start = pos_start.copy()
self.pos_end = pos_start.copy()
self.pos_end.advance()
def __repr__(self):
#This is just a way to represent that information. If it has a value it will go INT:5 if not it will just go PLUS for example.
if self.value: return f'{self.type}:{self.value}'
return f'{self.type}'
class Lexer:
def __init__(self, fname, text):
#text we will be processing
self.fname = fname
self.text = text
#current position
self.pos = Position(-1, 0, -1, fname, text)
#current char
self.current_char = None
#We call the advance method on intialization of our lexer. This will make it start out at 0.
self.advance()
def advance(self):
#This advances to the next character in the text
self.pos.advance(self.current_char)
#This grabs the current character only if the index is not the length of the text. If it is we set the current char to none.
self.current_char = self.text[self.pos.index] if self.pos.index < len(self.text) else None
def make_tokens(self):
# This will hold our tokens
tokens = []
#This a loop that will got through every char what token the current char will hold as long as the current char is not None.
while self.current_char != None:
if self.current_char in ' \t':
#This ignores all spaces and tabs, and calls advance when it sees one
self.advance()
#This looks to see if our char is in the digits constant, and if so it makes into a number.
elif self.current_char in DIGITS:
tokens.append(self.make_number())
elif self.current_char == '+':
tokens.append(Token(TT_PLUS, pos_start=self.pos))
self.advance()
elif self.current_char == '-':
tokens.append(Token(TT_MINUS, pos_start=self.pos))
self.advance()
elif self.current_char == '*':
tokens.append(Token(TT_MUL, pos_start=self.pos))
self.advance()
elif self.current_char == '/':
tokens.append(Token(TT_DIV, pos_start=self.pos))
self.advance()
elif self.current_char == '(':
tokens.append(Token(TT_LPAREN, pos_start=self.pos))
self.advance()
elif self.current_char == ')':
tokens.append(Token(TT_RPAREN, pos_start=self.pos))
self.advance()
else:
#Here we store the pos and the current illegal character
pos_start = self.pos.copy()
char = self.current_char
self.advance()
#Then we return both a empty list of tokens and an illegal character error with the starting pos. The position after advance is called and the illegal character
return [], IllegalCharError(pos_start, self.pos, "'" + char + "'")
#Adds end of file token at the end
tokens.append(Token(TT_EOF, pos_start=self.pos))
#Here we return the tokens and none for the error
return tokens, None
def make_number(self):
#Here we are making the string representation of our number, and keeping track of how many decimals there are
num_str = ''
dot_count = 0
pos_start = self.pos.copy()
#Here we are making sure the current char exists, and that is either a digit or a .
while self.current_char != None and self.current_char in DIGITS + '.':
if self.current_char == '.':
#If we find a dot we increment our dot count, and we add the "." to our string repr of our number. If there is more than one we break, because a number can't have more than one decimal.
if dot_count == 1: break
dot_count += 1
num_str += '.'
else:
#if it is not a decimal/dot then we just append the current char to our num string
num_str += self.current_char
#Goes to next character after drtermining what it is
self.advance()
if dot_count == 0:
#if there are no decimals our char is an integer
return Token(TT_INT, int(num_str), pos_start, self.pos)
else:
#if there are decimals then our char is a float
return Token(TT_FLOAT, float(num_str), pos_start, self.pos)
class NumberNode:
#The Number Node is the node in our parser that will take the corresponding number token either a float or int
def __init__(self, tok):
self.tok = tok
def __repr__(self):
#string representation of number node
return f'{self.tok}'
class BinOpNode:
#This is a node for our operator. Ex: PLUS
def __init__(self, left_node, op_tok, right_node):
#We take in a left and right node and add our operator token
self.left_node = left_node
self.op_tok = op_tok
self.right_node = right_node
def __repr__(self):
return f'({self.left_node}, {self.op_tok}, {self.right_node})'
#Unary operations
class UnaryOpNode:
def __init__(self, op_tok, node):
self.op_tok = op_tok
self.node = node
def __repr__(self):
return f'({self.op_tok}, {self.node})'
#This ParseResult class will check if there are any errors
class ParseResult:
def __init__(self):
self.error = None
self.node = None
def register(self, res):
if isinstance(res, ParseResult):
if res.error: self.error = res.error
return res.node
return res
def success(self, node):
self.node = node
return self
def failure(self, error):
self.error = error
return self
class Parser:
def __init__(self, tokens):
#Parser goes through the tokens
self.tokens = tokens
self.tok_idx = -1
self.advance()
def advance(self):
#We increment the token index
self.tok_idx += 1
#As long as there are still tokens we will go though and find the current token and return it after advancing.
if self.tok_idx < len(self.tokens):
self.current_tok = self.tokens[self.tok_idx]
return self.current_tok
def parse(self):
res = self.expression()
#Checks to see if there is code that still hasn't been parsed. This means there is a syntax error
if not res.error and self.current_tok.type != TT_EOF:
return res.failure(InvalidSyntaxError(self.current_tok.pos_start, self.current_tok.pos_end, "Expected '+', '-', '*' or '/'"))
return res
def factor(self):
#Making a parse result instance every time
res = ParseResult()
#Here we grab the current token
tok = self.current_tok
#For unary operators
if tok.type in (TT_PLUS, TT_MINUS):
res.register(self.advance())
factor = res.register(self.factor())
if res.error: return res
return res.success(UnaryOpNode(tok, factor))
#Then we see if its type is of an INT or a FLOAT. If so we advance to the next token and create a number node with the token.
elif tok.type in (TT_INT, TT_FLOAT):
#does nothing right now
res.register(self.advance())
return res.success(NumberNode(tok))
elif tok.type == TT_LPAREN:
res.register(self.advance())
expr = res.register(self.expression())
if res.error: return res
if self.current_tok.type == TT_RPAREN:
res.register(self.advance())
return res.success(expr)
else:
return res.failure(InvalidSyntaxError(self.current_tok.pos_start, self.current_tok.pos_end, "Expected ')'"))
#Returns invalid syntax error if type is not int or float
return res.failure(InvalidSyntaxError(tok.pos_start, tok.pos_end, "Expected int or float"))
def term(self):
#Calling binary operation on a factor
return self.bin_op(self.factor, (TT_MUL, TT_DIV))
def expression(self):
#Calling a binary operation on the term
return self.bin_op(self.term, (TT_PLUS, TT_MINUS))
def bin_op(self, func, ops):
# Making an instance of ParseResult for every operation
res = ParseResult()
#Takes in either a factor or term
left = res.register(func())
#checking if there is an error
if res.error: return res
#While the token type is one of the operators given we keep making nodes
while self.current_tok.type in ops:
op_tok = self.current_tok
res.register(self.advance())
right = res.register(func())
#check if error
if res.error: return res
left = BinOpNode(left, op_tok, right)
return res.success(left)
### INTERPRETER ###
class Interpreter:
#Visit method will process the node and then process all the child nodes
def visit(self, node):
#Creates these strings
#visit_BinOpNode
#visit_NumberNode
method_name = f'visit_{type(node).__name__}'
#Will call the appropriate method
method = getattr(self, method_name, self.no_visit_method)
return method(node)
#If wrong visit method or no visit method
def no_visit_method(self, node):
raise Exception(f'No visit_{type(node).__name__} method defined')
def visit_NumberNode(self, node):
print("Found number node!")
def visit_BinOpNode(self, node):
print("Found binary operator node!")
self.visit(node.left_node)
self.visit(node.right_node)
def visit_UnaryOpNode(self, node):
print("Found unary operator node!")
self.visit(node.left_node)
self.visit(node.right_node)
def run(fname, text):
#Now we finally run our lexer with a file name and the text we want to run
lexer = Lexer(fname, text)
#We make tokens out of it
tokens, error = lexer.make_tokens()
if error: return None, error
#Generating the Abstract Syntax Tree
parser = Parser(tokens)
ast = parser.parse()
#error check
if ast.error: return None, ast.error
#Run program with interpreter
interpreter = Interpreter()
interpreter.visit(ast.node)
#Now we return the ast node and the error
return None, None
# return ast.node, ast.error
#Parser Notes:
# When we add two numbers in our language we will have an expression, two terms, and two factors.
# 10 + 5 * 2
#This is an expression with two terms(10 and 5*2), and three factors (10, 5, and 2)