From c7eeda11daeac2d98c996567ea22c20fe6d0e30d Mon Sep 17 00:00:00 2001 From: jmeaster30 Date: Sun, 12 Nov 2023 03:01:36 -0500 Subject: [PATCH] Started working on the new ocean lexer/parser. --- README.md | 14 +- examples/hydro/fibonacci.h2o | 2 +- examples/ocean/arrays.sea | 6 +- examples/ocean/collatz.sea | 6 +- examples/ocean/errors/test.sea | 2 +- examples/ocean/fibonacci.sea | 19 +- examples/ocean/functions.sea | 6 +- examples/ocean/hello_world.sea | 2 +- examples/ocean/op_overloading.sea | 13 +- examples/ocean/rational.sea | 18 +- src/hydro/frontend.rs | 2 +- src/hydro/frontend/parser.rs | 24 +- src/hydro/frontend/{token.rs => tokentype.rs} | 27 -- src/hydro/intrinsic/intrinsicmanager.rs | 1 - src/main.rs | 10 + src/ocean.rs | 3 + src/ocean/frontend.rs | 5 + src/ocean/frontend/ast.rs | 100 +++++ src/ocean/frontend/compiler.rs | 24 ++ src/ocean/frontend/lexer.rs | 383 ++++++++++++++++++ src/ocean/frontend/parserphase1.rs | 7 + src/ocean/frontend/tokentype.rs | 44 ++ src/tests/hydro/frontend/token_tests.rs | 2 +- src/util.rs | 3 +- src/util/token.rs | 50 +++ src/util/tokentrait.rs | 4 - standard_libraries/ocean/std/array.sea | 46 +-- 27 files changed, 715 insertions(+), 108 deletions(-) rename src/hydro/frontend/{token.rs => tokentype.rs} (51%) create mode 100644 src/ocean.rs create mode 100644 src/ocean/frontend.rs create mode 100644 src/ocean/frontend/ast.rs create mode 100644 src/ocean/frontend/compiler.rs create mode 100644 src/ocean/frontend/lexer.rs create mode 100644 src/ocean/frontend/parserphase1.rs create mode 100644 src/ocean/frontend/tokentype.rs create mode 100644 src/util/token.rs delete mode 100644 src/util/tokentrait.rs diff --git a/README.md b/README.md index 7560d06..05f3cfe 100755 --- a/README.md +++ b/README.md @@ -40,9 +40,15 @@ It would be cool if we can have this language compile to several targets such as - Java bytecode ### Static Analysis -I would like to be able to have the compiler understand what values certain variables can be at any given point in the program so we can check for things like unhandled cases, dead code, and out-of-bounds accesses at compile time. There are many more things I would want to check for but I don't know what I don't know. +I would like to be able to have the compiler understand what values certain variables can be at any given point in the program so we can check for things like unhandled cases, dead code, and out-of-bounds accesses at compile time. +There are many more things I would want to check for but I don't know what I don't know. ### Neat Generic System -I want to have generics determined by the operators and functions that are being used on the generic variables. This would provide some pretty neat functionality I feel but I haven't gotten to that point yet to really play with it. +I want to have generics determined by the operators and functions that are being used on the generic variables. +This would provide some pretty neat functionality I feel but I haven't gotten to that point yet to really play with it. ### Generic Operator Overloading -I don't think I would be able to implement this but I would like to add in the ability to override any symbol and provide the user with the ability to add their own operators and define the precedence and associativity of those operators. I tried writing out the parse for this and it proved to be really difficult so I am not sure if I will be able to get to this. Some things I noted was (especially with the type system I wanted (see Neat Generics System)) it would require a parser pass without parsing the expressions and then a type checking pass and then another parser pass to parse the expressions. Also, dealing with parsing conflicts when the user could potentially add an operator using the same symbol but the operator works on different types and have a different precedence order. Maybe this will be a different project. - +My idea for this would involve parsing occurring in multiple steps with some type checking to figure out what functions exist and what annotations are on those functions. +In that type checking pass, we can figure out what operators we are adding and at what precedence level. +At this point, we can figure out if there are any issues with the declared operators. +The operator's function can have generic types because we are just generating the operator precedence table based on number of arguments and info in the annotation solely. The type info comes in the last type checking pass when we have already converted those operator expression nodes into function call nodes. +Then we can probably work out the macros here as well but I think that would be better done after the expression parsing pass in case there are any code replacements needed. +Then the next parsing pass will parse all the expression statements in the ast from the first pass. Since we will be walking the ast, we will have both recursive operators and scoped overloaded operators. diff --git a/examples/hydro/fibonacci.h2o b/examples/hydro/fibonacci.h2o index 8d1a3db..6664f1c 100644 --- a/examples/hydro/fibonacci.h2o +++ b/examples/hydro/fibonacci.h2o @@ -1,6 +1,6 @@ module main -function fibonacci any body +function fibonacci u128 body duplicate duplicate push u128 1 diff --git a/examples/ocean/arrays.sea b/examples/ocean/arrays.sea index f25deb3..a15badf 100644 --- a/examples/ocean/arrays.sea +++ b/examples/ocean/arrays.sea @@ -3,14 +3,14 @@ let array: i32[] = 1..7 println(array) -let double = func (a: auto T) -> (result: T = 2 * a) +function double (a: auto T) -> (result: T = 2 * a) # create array [2, 4, 6, 8, 10, 12, 14] let doubled = array >. double println(doubled) -let sum = func (input: (auto T)[]) -> (result: T):{ +function sum (input: (auto T)[]) -> (result: T) { result = 0 for val in input { result += val @@ -19,7 +19,7 @@ let sum = func (input: (auto T)[]) -> (result: T):{ let total = array.sum() -let mod2 = func (a: auto T) -> (result: T):{ +function mod2 (a: auto T) -> (result: T) { result = a % 2 } diff --git a/examples/ocean/collatz.sea b/examples/ocean/collatz.sea index 7fa2c02..c22c48d 100644 --- a/examples/ocean/collatz.sea +++ b/examples/ocean/collatz.sea @@ -1,8 +1,8 @@ -let isEven = func (x: i64) -> (result: bool = x % 2 == 0) +function isEven (x: i64) -> (result: bool = x % 2 == 0) -let isEmpty = func (x: string) -> (result: bool = x == "") +function isEmpty (x: string) -> (result: bool = x.length) -let collatz = func (input: i64) -> (path: i64[]):{ +function collatz (input: i64) -> (path: i64[]) { while input != 1 { path.append(input) if !input.isEven() { diff --git a/examples/ocean/errors/test.sea b/examples/ocean/errors/test.sea index cd1cc96..99d793e 100644 --- a/examples/ocean/errors/test.sea +++ b/examples/ocean/errors/test.sea @@ -1,4 +1,4 @@ -let hello = func() -> ():{ +function hello() -> () { while true { continue } diff --git a/examples/ocean/fibonacci.sea b/examples/ocean/fibonacci.sea index 1cc66dd..1a34971 100644 --- a/examples/ocean/fibonacci.sea +++ b/examples/ocean/fibonacci.sea @@ -1,12 +1,15 @@ -let fib = func (input: i64) -> (result: i64):{ - if input == 0 { - result = 0 +using std.annotation.memoize +using std.annotation.trace + +@Trace +@Memoize +function fib (input: i64) -> (result: i64) { + if input <= 1 { + result = input } else { - result = input + self(input - 1) + result = fib(input - 1) + fib(input - 2) } } -#println(9.fib) -#println(fib(10)) - -let result = fib(10) \ No newline at end of file +println(9.fib) +println(fib(10)) diff --git a/examples/ocean/functions.sea b/examples/ocean/functions.sea index 9feba53..9f6377f 100644 --- a/examples/ocean/functions.sea +++ b/examples/ocean/functions.sea @@ -1,13 +1,13 @@ use std.io as io let morph = - func (value: auto T, callback: func(T:T)) + function (value: auto T, callback: function (T) -> T) -> (result: T = callback(value)) -let swap = func (a: auto T, b: T) -> (c: T = b, d: T = a) +function swap (a: auto T, b: T) -> (c: T = b, d: T = a) -let sum = func (a: i32, b: i32) -> (result: i32): +let sum = func (a: i32, b: i32) -> (result: i32) { result = a + b } \ No newline at end of file diff --git a/examples/ocean/hello_world.sea b/examples/ocean/hello_world.sea index eadeed5..873dcca 100644 --- a/examples/ocean/hello_world.sea +++ b/examples/ocean/hello_world.sea @@ -1,3 +1,3 @@ use std.io -println() \ No newline at end of file +println("hello world") \ No newline at end of file diff --git a/examples/ocean/op_overloading.sea b/examples/ocean/op_overloading.sea index 5fd166b..6e1abc3 100644 --- a/examples/ocean/op_overloading.sea +++ b/examples/ocean/op_overloading.sea @@ -2,9 +2,18 @@ # Really not sure about the creating new arrays or the appending to arrays # will need to think about that more -op _>._ func (arr: auto T[], f: func(T, T : T)) -> (result: T):{ - result = [] +@Operator {arr} >. {f} +function map(arr: auto T[], f: function(T, T) -> T) -> (result: T[]) { for val in arr { result.push(val.f()) } +} + +@Operator {cond} ? {left} : {right} +function ternary(cond: bool, left: lazy auto T, right: lazy T) -> (result: T) { + if cond { + result = left + } else { + result = right + } } \ No newline at end of file diff --git a/examples/ocean/rational.sea b/examples/ocean/rational.sea index 1643997..cc2ed18 100644 --- a/examples/ocean/rational.sea +++ b/examples/ocean/rational.sea @@ -1,4 +1,4 @@ -let gcd = func (a: i64, b: i64) -> (result: i64):{ +function gcd (a: i64, b: i64) -> (result: i64) { # If A=0 then GCD(A, B)=B since the Greatest Common Divisor of 0 and B is B. # If B=0 then GCD(A, B)=A since the Greatest Common Divisor of 0 and A is A. # Let R be the remainder of dividing A by B assuming A > B. (R = A % B) @@ -18,7 +18,7 @@ let gcd = func (a: i64, b: i64) -> (result: i64):{ } } -let abs = func (a: i64) -> (result: i64):{ +function abs (a: i64) -> (result: i64) { if a < 0 { result = -a } else { @@ -26,25 +26,21 @@ let abs = func (a: i64) -> (result: i64):{ } } -let lcm = func (a: i64, b: i64) -> (result: i64 = abs(a) * abs(b) / gcd(a, b)) - -#/ -op + = (x: auto T, y: T) -> (result: T = add(x, y)) - -op - = (x: auto T, y: T) -> (result: T = subtract(x, y)) -/# +function lcm (a: i64, b: i64) -> (result: i64 = abs(a) * abs(b) / gcd(a, b)) pack rational { numerator: i64 denominator: i64 } -let add = func (a: rational, b: rational) -> (res: rational):{ +@Operator {a} + {b} +function add (a: rational, b: rational) -> (res: rational) { let mul: i64 = lcm(a.denominator, b.denominator) res = ( a.numerator * mul + b.numerator * mul, mul ) } -let subtract = func (a: rational, b: rational) -> (res: rational):{ +@Operator {a} - {b} +function subtract (a: rational, b: rational) -> (res: rational) { let mul: i64 = lcm(a.denominator, b.denominator) res = ( a.numerator * mul - b.numerator * mul, mul ) } \ No newline at end of file diff --git a/src/hydro/frontend.rs b/src/hydro/frontend.rs index 80fc65c..7186dc4 100644 --- a/src/hydro/frontend.rs +++ b/src/hydro/frontend.rs @@ -1,4 +1,4 @@ //TODO mod binaryable; pub mod compiler; pub mod parser; -pub mod token; +pub mod tokentype; diff --git a/src/hydro/frontend/parser.rs b/src/hydro/frontend/parser.rs index 826c512..1f5d61c 100644 --- a/src/hydro/frontend/parser.rs +++ b/src/hydro/frontend/parser.rs @@ -1,19 +1,19 @@ -use crate::hydro::frontend::token::{Token, TokenType}; +use crate::hydro::frontend::tokentype::TokenType; use crate::hydro::function::{Function, Target}; use crate::hydro::instruction::*; use crate::hydro::intrinsic::Intrinsic; use crate::hydro::layouttemplate::LayoutTemplate; use crate::hydro::module::Module; use crate::hydro::value::{Array, FunctionPointer, LayoutIndexRef, Reference, Type, Value, VariableRef}; -use crate::util::tokentrait::TokenTrait; use std::collections::HashMap; use std::fs::File; use std::io; use std::io::Read; +use crate::util::token::{Token, TokenTrait}; pub struct Parser { file_contents: Vec, - current_token: Option, + current_token: Option>, current_index: usize, current_line: usize, current_column: usize, @@ -215,7 +215,7 @@ impl Parser { function.add_label(target_name_token.lexeme, function.body.len()); } TokenType::Module | TokenType::Function | TokenType::Layout | TokenType::Using | TokenType::Main | TokenType::Intrinsic => break, - _ => panic!("Expected to have an instruction here but read {:?} :(", inst_token), + _ => panic!("Expected to have an instruction here but read {} :(", inst_token), } } @@ -347,7 +347,7 @@ impl Parser { function: function_token.lexeme, }) } - _ => panic!("Expected to have a value token here :( {:?}", value_token), + _ => panic!("Expected to have a value token here :( {}", value_token), }, }) } @@ -589,7 +589,7 @@ impl Parser { !self.is_not_done() } - fn token(&mut self) -> Option { + fn token(&mut self) -> Option> { match &self.current_token { Some(current_token) => Some(current_token.clone()), None => { @@ -816,27 +816,27 @@ impl Parser { lexeme.chars().all(|c| c.is_alphanumeric() || c == '.' || c == '-' || c == '_' || c == '\\' || c == '/') } - fn expect_token(&mut self) -> Token { + fn expect_token(&mut self) -> Token { match self.token() { Some(token) => token, None => panic!("Expected a token here!"), } } - fn expect_token_type(&mut self, token_type: TokenType) -> Token { + fn expect_token_type(&mut self, token_type: TokenType) -> Token { match self.token() { Some(token) => { if token.is_token_type(token_type) { token } else { - panic!("Expected token type {:?} but got {:?}", token_type, token); + panic!("Expected token type {:?} but got {}", token_type, token); } } None => panic!("Expected some token here :("), } } - fn optional_token_type(&mut self, token_type: TokenType) -> Option { + fn optional_token_type(&mut self, token_type: TokenType) -> Option> { match self.token() { Some(token) => { if token.is_token_type(token_type) { @@ -849,13 +849,13 @@ impl Parser { } } - fn expect_one_of(&mut self, token_types: Vec) -> Token { + fn expect_one_of(&mut self, token_types: Vec) -> Token { match self.token() { Some(token) => { if token_types.contains(&token.token_type) { token } else { - panic!("Expected one of {:?} but got {:?}", token_types, token); + panic!("Expected one of {:?} but got {}", token_types, token); } } None => panic!("Expected some token here :("), diff --git a/src/hydro/frontend/token.rs b/src/hydro/frontend/tokentype.rs similarity index 51% rename from src/hydro/frontend/token.rs rename to src/hydro/frontend/tokentype.rs index 5cd7de8..206e666 100644 --- a/src/hydro/frontend/token.rs +++ b/src/hydro/frontend/tokentype.rs @@ -1,5 +1,3 @@ -use crate::util::tokentrait::TokenTrait; - #[derive(Copy, Clone, Debug, PartialEq)] pub enum TokenType { Error, @@ -64,28 +62,3 @@ pub enum TokenType { True, False, } - -#[derive(Clone, Debug)] -pub struct Token { - pub lexeme: String, - pub token_type: TokenType, - pub offset: (usize, usize), - pub line: (usize, usize), - pub column: (usize, usize), -} - -impl TokenTrait for Token { - fn is_token_type(&self, value: TokenType) -> bool { - self.token_type == value - } - - fn is_lexeme(&self, value: &str) -> bool { - self.lexeme == value - } -} - -impl Token { - pub fn new(lexeme: String, token_type: TokenType, offset: (usize, usize), line: (usize, usize), column: (usize, usize)) -> Self { - Self { lexeme, token_type, offset, line, column } - } -} diff --git a/src/hydro/intrinsic/intrinsicmanager.rs b/src/hydro/intrinsic/intrinsicmanager.rs index b405230..0fb2fad 100644 --- a/src/hydro/intrinsic/intrinsicmanager.rs +++ b/src/hydro/intrinsic/intrinsicmanager.rs @@ -3,7 +3,6 @@ use crate::hydro::executioncontext::ExecutionContext; use crate::hydro::value::Value; use lazy_static::lazy_static; use std::collections::HashMap; -use std::error::Error; use std::io; use std::io::Write; diff --git a/src/main.rs b/src/main.rs index d070f26..fa19a3f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ pub mod hydro; #[cfg(test)] mod tests; pub mod util; +mod ocean; use crate::hydro::debugcontext::DebugContext; use crate::hydro::frontend::compiler::HydroTranslateType; @@ -10,6 +11,7 @@ use crate::hydro::Hydro; use crate::util::argsparser::Command; use std::env; use util::argsparser::{ArgsParser, Argument}; +use crate::ocean::Ocean; fn main() -> std::io::Result<()> { let args: Vec = env::args().collect(); @@ -23,6 +25,11 @@ fn main() -> std::io::Result<()> { .description("Print this help message")) .command(Command::new("version") .description("Print version information")) + .command(Command::new("run") + .arg(Argument::new("Source File") + .last() + .default("main.sea") + .help("The main source file to compile"))) .command(Command::new("hydro-build") .arg(Argument::new("Output File") .default("main.h2o.bin") @@ -101,6 +108,9 @@ fn main() -> std::io::Result<()> { Err(e) => e.print_stacktrace(), } } + "run" => { + Ocean::compile(arguments.get("Source File").unwrap().as_str()).unwrap(); + } _ => todo!("Unimplemented command :("), }, None => { diff --git a/src/ocean.rs b/src/ocean.rs new file mode 100644 index 0000000..9af97ea --- /dev/null +++ b/src/ocean.rs @@ -0,0 +1,3 @@ +mod frontend; + +pub struct Ocean {} \ No newline at end of file diff --git a/src/ocean/frontend.rs b/src/ocean/frontend.rs new file mode 100644 index 0000000..3736772 --- /dev/null +++ b/src/ocean/frontend.rs @@ -0,0 +1,5 @@ +mod lexer; +pub mod tokentype; +pub mod compiler; +mod parserphase1; +mod ast; \ No newline at end of file diff --git a/src/ocean/frontend/ast.rs b/src/ocean/frontend/ast.rs new file mode 100644 index 0000000..5b8d466 --- /dev/null +++ b/src/ocean/frontend/ast.rs @@ -0,0 +1,100 @@ +use crate::ocean::frontend::tokentype::TokenType; +use crate::util::token::Token; + +pub struct Program { + pub statements: Vec, +} + +pub struct StatementNode { + pub annotations: Vec, + pub statement: Statement, +} + +pub struct CompoundStatement { + pub left_curly: Token, + pub body: Vec, + pub right_curly: Token, +} + +pub struct Annotation { + pub token: Token, +} + +pub enum Statement { + WhileLoop(WhileLoop), + ForLoop(ForLoop), + Loop(Loop), + Branch(Branch), + Match(Match), + Assignment(Assignment), + Function(Function), + Pack(Pack), + Union(Union), + Return(Return), + Break(Break), + Continue(Continue), + Use(Use), +} + +pub struct WhileLoop { + pub while_token: Token, + pub body: CompoundStatement, +} + +pub struct ForLoop { + pub for_token: Token, + pub iterator: ExpressionNode, + pub in_token: Token, + pub iterable: ExpressionNode, + pub body: CompoundStatement, +} + +pub struct Loop { + pub loop_token: Token, + pub body: CompoundStatement, +} + +pub struct Branch { + pub if_token: Token, + pub condition: ExpressionNode, + pub body: CompoundStatement, + pub else_branch: Option +} + +pub struct ElseBranch { + pub else_token: Token, + pub body: Option, + pub branch: Option>, +} + +pub struct Match {} + +pub struct Assignment {} + +pub struct Function {} + +pub struct Pack {} + +pub struct Union {} + +pub struct Return { + pub return_token: Token, +} + +pub struct Break { + pub break_token: Token, +} + +pub struct Continue { + pub continue_token: Token, +} + +pub struct Use { + pub use_token: Token, + pub root_token: Token, + pub path: Vec<(Token, Token)>, // will be (., id) +} + +pub struct ExpressionNode { + pub tokens: Vec>, +} diff --git a/src/ocean/frontend/compiler.rs b/src/ocean/frontend/compiler.rs new file mode 100644 index 0000000..973832b --- /dev/null +++ b/src/ocean/frontend/compiler.rs @@ -0,0 +1,24 @@ +use std::{env, fs, io}; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use crate::ocean::frontend::lexer::lex; +use crate::ocean::Ocean; + +impl Ocean { + pub fn compile(file_path: &str) -> Result<(), io::Error>{ + let path = Path::new(file_path); + println!("Compiling '{}' (absolute '{:?}' from '{:?}')", path.display(), fs::canonicalize(path), env::current_dir()); + + let mut file = File::open(path)?; + let mut file_contents = String::new(); + file.read_to_string(&mut file_contents)?; + + let tokens = lex(file_contents); + for token in tokens { + println!("{}", token) + } + + Ok(()) + } +} \ No newline at end of file diff --git a/src/ocean/frontend/lexer.rs b/src/ocean/frontend/lexer.rs new file mode 100644 index 0000000..ee2abd7 --- /dev/null +++ b/src/ocean/frontend/lexer.rs @@ -0,0 +1,383 @@ +use crate::ocean::frontend::tokentype::TokenType; +use crate::util::token::Token; + +pub fn lex(input: String) -> Vec> { + let input_length = input.len(); + let input_chars = input.chars().collect::>(); + let mut lexeme = String::new(); + let mut index = 0; + let mut tokens = Vec::new(); + let line_start = 1; + let line_end = 1; + let column_start = 1; + let column_end = 1; + while index < input_length { + let start_index = index; + let c = input_chars[index]; + match c { + 'A'..='Z' | 'a'..='z' | '_' => { + index += 1; + lexeme.push_str(&c.to_string()); + while index < input_length { + let n = input_chars[index]; + match n { + 'A'..='Z' | 'a'..='z' | '0'..='9' | '_' => lexeme.push_str(&n.to_string()), + _ => { + index -= 1; + break; + } + } + index += 1; + } + + //check against every other thing it could be + let token_type = match lexeme.as_str() { + "function" => TokenType::Function, + "i8" | "i16" | "i32" | "i64" | "i128" | "f32" | "f64" | "u8" | "u16" | "u32" | "u64" | "u128" | "string" | "auto" | "bool" | "ref" | "mut" | "lazy" | "char" => TokenType::Type, + "if" => TokenType::If, + "else" => TokenType::Else, + "return" => TokenType::Return, + "continue" => TokenType::Continue, + "while" => TokenType::While, + "break" => TokenType::Break, + "loop" => TokenType::Loop, + "union" => TokenType::Union, + "pack" => TokenType::Pack, + "for" => TokenType::For, + "in" => TokenType::In, + "as" => TokenType::As, + "use" => TokenType::Use, + "match" => TokenType::Match, + "true" => TokenType::True, + "false" => TokenType::False, + "let" => TokenType::Let, + _ => TokenType::Identifier + }; + + tokens.push(Token::new( + lexeme.clone(), + token_type, + (start_index, index), + (line_start, line_end), + (column_start, column_end), + )); + + lexeme.clear(); + } + '0'..='9' => { + lexeme.push_str(&c.to_string()); + index += 1; + let mut decimal = false; + while index < input_length { + let n = input_chars[index]; + match n { + '0'..='9' => lexeme.push_str(&n.to_string()), + '.' => { + if decimal { + index -= 1; + break; + } + lexeme.push_str(&n.to_string()); + decimal = true; + } + _ => { + index -= 1; + break; + } + } + index += 1 + } + + if lexeme.ends_with('.') { + lexeme.pop(); + index -= 1; + } + + tokens.push(Token::new( + lexeme.clone(), + TokenType::Number, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )); + lexeme.clear(); + } + '@' => { + lexeme.push_str(&c.to_string()); + index += 1; + if index < input_length && input_chars[index] == '@' { + lexeme.push_str(&input_chars[index].to_string()); + index += 1; + let mut found_end = false; + while index < input_length { + let n = input_chars[index]; + match n { + '@' => { + index += 1; + if index < input_length && input_chars[index] == '@' { + lexeme.push_str(&input_chars[index - 1].to_string()); + lexeme.push_str(&input_chars[index].to_string()); + found_end = true; + break; + } + index -= 1; + lexeme.push_str(&n.to_string()); + } + _ => lexeme.push_str(&n.to_string()), + } + index += 1; + } + if !found_end { + panic!("Unending block macro") + } else { + tokens.push(Token::new( + lexeme.clone(), + TokenType::Annotation, + (start_index, index), + (line_start, line_end), + (column_start, column_end), + )); + } + } else { + while index < input_length { + let n = input_chars[index]; + match n { + '\n' => break, + _ => lexeme.push_str(&n.to_string()), + } + index += 1; + } + tokens.push(Token::new( + lexeme.clone(), + TokenType::Annotation, + (start_index, index), + (line_start, line_end), + (column_start, column_end), + )); + } + lexeme.clear(); + } + '\"' | '\'' | '`' => { + let delim = c; + lexeme.push_str(&c.to_string()); + index += 1; + let mut found_end = false; + while index < input_length { + let n = input_chars[index]; + match n { + '\'' => { + if delim == '\'' { + found_end = true; + lexeme.push_str(&n.to_string()); + break; + } else { + lexeme.push_str(&n.to_string()) + } + } + '\"' => { + if delim == '\"' { + found_end = true; + lexeme.push_str(&n.to_string()); + break; + } else { + lexeme.push_str(&n.to_string()) + } + } + '`' => { + if delim == '`' { + found_end = true; + lexeme.push_str(&n.to_string()); + break; + } else { + lexeme.push_str(&n.to_string()) + } + } + '\\' => { + if index == input_length - 1 { + lexeme.push_str(&n.to_string()); + } else { + index += 1; + let x = input_chars[index]; + match x { + 'n' => lexeme.push_str(&"\n".to_string()), + 'r' => lexeme.push_str(&"\r".to_string()), + 't' => lexeme.push_str(&"\t".to_string()), + //need to add excape characters for octal, hex, and unicode + _ => lexeme.push_str(&x.to_string()), + } + } + } + _ => lexeme.push_str(&n.to_string()), + } + index += 1; + } + + if !found_end { + panic!("Unending string") + } else if delim == '`' { + tokens.push(Token::new( + lexeme.clone(), + TokenType::InterpolatedString, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )) + } else { + tokens.push(Token::new( + lexeme.clone(), + TokenType::String, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )); + } + lexeme.clear(); + } + '#' => { + if index < input_length - 1 && input_chars[index + 1] == '/' { + index += 1; + lexeme.push_str(&input_chars[index].to_string()); + let mut found_end = false; + while index < input_length - 1 { + index += 1; + let n = input_chars[index]; + match n { + '/' => { + lexeme.push_str(&n.to_string()); + if index < input_length - 1 && input_chars[index + 1] == '#' { + index += 1; + lexeme.push_str(&input_chars[index].to_string()); + found_end = true; + break; + } + } + _ => lexeme.push_str(&n.to_string()), + } + } + if !found_end { + panic!("Unending block comment") + } + } else { + while index < input_length - 1 { + index += 1; + let n = input_chars[index]; + match n { + '\n' => { + index -= 1; + break; + } + _ => lexeme.push_str(&n.to_string()), + } + } + } + lexeme.clear(); + } + '.' | ':' | '~' | '+' | '-' | '>' | '<' | '?' | '/' | '=' | '^' | '&' | '|' | '*' | '!' | '%' => { + lexeme.push_str(&input_chars[index].to_string()); + index += 1; + while index < input_length - 1 && lexeme.len() < 3 { + let n = input_chars[index]; + match n { + '.' | ':' | '~' | '+' | '-' | '>' | '<' | '?' | '/' | '=' | '^' | '&' | '|' | '*' | '!' | '%' => { + lexeme.push_str(&n.to_string()); + } + _ => { + index -= 1; + break + }, + } + index += 1; + } + + let token_type = match lexeme.as_str() { + "." => TokenType::Dot, + ":" => TokenType::Colon, + "->" => TokenType::Arrow, + _ => TokenType::Symbol, + }; + tokens.push(Token::new( + lexeme.clone(), + token_type, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )); + lexeme.clear(); + } + ',' => tokens.push(Token::new( + ",".to_string(), + TokenType::Comma, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )), + '\n' => tokens.push(Token::new( + "\n".to_string(), + TokenType::Newline, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )), + '(' => tokens.push(Token::new( + "(".to_string(), + TokenType::LeftParen, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )), + ')' => tokens.push(Token::new( + ")".to_string(), + TokenType::RightParen, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )), + '[' => tokens.push(Token::new( + "[".to_string(), + TokenType::LeftSquare, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )), + ']' => tokens.push(Token::new( + "]".to_string(), + TokenType::RightSquare, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )), + '{' => tokens.push(Token::new( + "{".to_string(), + TokenType::LeftCurly, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )), + '}' => tokens.push(Token::new( + "}".to_string(), + TokenType::RightCurly, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )), + ' ' | '\t' | '\r' => {} + _ => tokens.push(Token::new( + input_chars[index].to_string(), + TokenType::Error, + (start_index, index), + (line_start, line_end), + (column_start, column_end) + )) + } + index += 1; + } + + tokens.push(Token::new( + "".to_string(), + TokenType::EndOfInput, + (index, index), + (line_start, line_end), + (column_start, column_end), + )); + tokens +} \ No newline at end of file diff --git a/src/ocean/frontend/parserphase1.rs b/src/ocean/frontend/parserphase1.rs new file mode 100644 index 0000000..fd0afdf --- /dev/null +++ b/src/ocean/frontend/parserphase1.rs @@ -0,0 +1,7 @@ +use crate::ocean::frontend::ast::Program; +use crate::ocean::frontend::tokentype::TokenType; +use crate::util::token::Token; + +pub fn parse_phase_one(tokens: Token) -> Program { + +} \ No newline at end of file diff --git a/src/ocean/frontend/tokentype.rs b/src/ocean/frontend/tokentype.rs new file mode 100644 index 0000000..fb090fe --- /dev/null +++ b/src/ocean/frontend/tokentype.rs @@ -0,0 +1,44 @@ +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum TokenType { + EndOfInput, + Error, + Newline, + //Comment, + Annotation, + String, + InterpolatedString, + Number, + Identifier, + True, + False, + Type, + + Pack, + Union, + Function, + While, + Loop, + For, + In, + Match, + If, + Else, + Continue, + Break, + Return, + Use, + As, + Let, + + LeftParen, + RightParen, + LeftSquare, + RightSquare, + LeftCurly, + RightCurly, + Dot, + Comma, + Colon, + Arrow, + Symbol, +} diff --git a/src/tests/hydro/frontend/token_tests.rs b/src/tests/hydro/frontend/token_tests.rs index 3dd1d90..978c864 100644 --- a/src/tests/hydro/frontend/token_tests.rs +++ b/src/tests/hydro/frontend/token_tests.rs @@ -1,4 +1,4 @@ -use crate::hydro::frontend::token::{Token, TokenType}; +use crate::hydro::frontend::tokentype::{Token, TokenType}; use crate::util::tokentrait::TokenTrait; #[test] diff --git a/src/util.rs b/src/util.rs index 0e3c12d..6e9556f 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,5 +1,6 @@ pub mod argsparser; pub mod dependencygraph; pub mod metrictracker; -pub mod tokentrait; +pub mod span; +pub mod token; //pub mod errors; diff --git a/src/util/token.rs b/src/util/token.rs new file mode 100644 index 0000000..ba19953 --- /dev/null +++ b/src/util/token.rs @@ -0,0 +1,50 @@ +use std::fmt; +use std::fmt::Debug; +use crate::util::span::Spanned; + +pub trait TokenTrait { + fn is_token_type(&self, _: TokenType) -> bool; + fn is_lexeme(&self, _: &str) -> bool; +} + +#[derive(Clone)] +pub struct Token { + pub lexeme: String, + pub token_type: TokenType, + pub offset: (usize, usize), + pub line: (usize, usize), + pub column: (usize, usize), +} + +impl TokenTrait for Token { + fn is_token_type(&self, value: TokenType) -> bool { + self.token_type == value + } + + fn is_lexeme(&self, value: &str) -> bool { + self.lexeme == value + } +} + +impl Spanned for Token { + fn get_span(&self) -> (usize, usize) { + self.offset + } +} + +impl Token { + pub fn new(lexeme: String, token_type: TokenType, offset: (usize, usize), line: (usize, usize), column: (usize, usize)) -> Self { + Self { lexeme, token_type, offset, line, column } + } +} + +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "<[{:?}] '{}' {} {}>", + self.token_type, self.lexeme, self.offset.0, self.offset.1 + ) + } +} + diff --git a/src/util/tokentrait.rs b/src/util/tokentrait.rs deleted file mode 100644 index a7cc47d..0000000 --- a/src/util/tokentrait.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub trait TokenTrait { - fn is_token_type(&self, _: TokenType) -> bool; - fn is_lexeme(&self, _: &str) -> bool; -} diff --git a/standard_libraries/ocean/std/array.sea b/standard_libraries/ocean/std/array.sea index 7580f45..912bdd6 100644 --- a/standard_libraries/ocean/std/array.sea +++ b/standard_libraries/ocean/std/array.sea @@ -1,31 +1,29 @@ -let window = func (value: auto T [], size: i64) -> (result: T[]):{ - for i in 0..=(value.length - size) { - result ++= [value[i..size]] - } -} - -let filter = func (value: auto T [], condition: func(T:bool)) -> (result: T[]):{ - for v in value { - if condition(v) { - result ++= [v] +function window(value: auto T[], size: u64) -> (result: T[][]) { + for i in 0..=(value.length - size) { + result ++= value[i..size] } - } } -let transform = func (value: auto T [], transformer: func(T:auto U)) -> (result: U[]):{ - for v in value { - result ++= [transformer(v)] - } +function filter(value: auto T[], condition: function(T)->bool) -> (result: T[]) { + for v in value { + if condition(v) { + result ++= v + } + } } -let reduce = func (value: auto T [], reducer_func: func(T,T:T)) -> (result: T):{ - if value.length == 0 { - return - } else if value.length == 1 { - result = value[0] - } else { - result = reducer_func(reduce(value[0..(value.length // 2)], reducer_func), reduce(value[(value.length // 2)..value.length], reducer_func)) - } +function transform(value: auto T[], transformer: function(T) -> auto U) -> (result: U[]) { + for v in value { + result ++= v.transformer() + } } - +function reduce(value: auto T[], reducer: function(T, T) -> T) -> (result: T) { + if value.length == 0 { + return + } else if value.length == 1 { + result = value[0] + } else { + result = reducer(reduce(value[0..(value.length // 2)], reducer), reduce(value[(value.length // 2)..value.length], reducer)) + } +} \ No newline at end of file