-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathminiml_lex.mll
95 lines (88 loc) · 2.83 KB
/
miniml_lex.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
(*
CS 51 Final Project
MiniML -- Lexical Analyzer
*)
{
open Printf ;;
open Miniml_parse ;; (* need access to parser's token definitions *)
let create_hashtable size init =
let tbl = Hashtbl.create size in
List.iter (fun (key, data) -> Hashtbl.add tbl key data) init;
tbl
let keyword_table =
create_hashtable 8 [
("if", IF);
("in", IN);
("then", THEN);
("else", ELSE);
("let", LET);
("raise", RAISE);
("rec", REC);
("true", TRUE);
("false", FALSE);
("fun", FUNCTION);
("function", FUNCTION)
]
let sym_table =
create_hashtable 8 [
("^", CONCAT);
("=", EQUALS);
("<", LESSTHAN);
(">", LESSTHAN);
(".", DOT);
("->", DOT);
(";;", EOF);
("~-", NEG);
("+", PLUS);
("+.", FPLUS);
("-", MINUS);
("-.", FMINUS);
("*", TIMES);
("*.", FTIMES);
("(", OPEN);
(")", CLOSE)
]
}
let digit = ['0'-'9']
let float = digit* '.' digit*?
let id = ['a'-'z'] ['a'-'z' '0'-'9']*
let string = ['"'] [^ '"' '\\']+ ['"']
let sym = ['(' ')'] | (['$' '&' '*' '+' '-' '/' '=' '<' '>' '^'
'.' '~' ';' '!' '?' '%' ':' '#']+)
rule token = parse
| string as s
{ let new_s = String.split_on_char '"' s in
let final_s = List.nth new_s 1 in
STRING final_s
}
| float as fnum
{ let num = float_of_string fnum in
FLOAT num
}
| digit+ as inum
{ let num = int_of_string inum in
INT num
}
| id as word
{ try
let token = Hashtbl.find keyword_table word in
token
with Not_found ->
ID word
}
| sym as symbol
{ try
let token = Hashtbl.find sym_table symbol in
token
with Not_found ->
printf "Ignoring unrecognized token: %s\n" symbol;
token lexbuf
}
| '{' [^ '\n']* '}' { token lexbuf } (* skip one-line comments *)
| [' ' '\t' '\n'] { token lexbuf } (* skip whitespace *)
| _ as c (* warn and skip unrecognized characters *)
{ printf "Ignoring unrecognized character: %c\n" c;
token lexbuf
}
| eof
{ raise End_of_file }