From d822b4b3664bd97ee7f8d0f4030fce2e0eaa6e53 Mon Sep 17 00:00:00 2001 From: "Nikhil R." Date: Mon, 15 Jul 2024 16:21:17 +0530 Subject: [PATCH] Got `helloworld.fys` to compile with stub. Working on `walker`. Changed debug line number to `u16` from `u32`. --- src/args.rs | 8 ++- src/core/exec/frame.rs | 15 ++--- src/core/exec/mod.rs | 5 +- src/emit/walker.rs | 135 ++++++++++++++++++++++++++++++++----- src/main.rs | 10 ++- src/utils.rs | 1 + test/fys/helloworld.fys | 3 +- test/out/fys/helloworld.fr | Bin 0 -> 72 bytes 8 files changed, 142 insertions(+), 35 deletions(-) create mode 100644 test/out/fys/helloworld.fr diff --git a/src/args.rs b/src/args.rs index 3aabdd7..ad9821d 100644 --- a/src/args.rs +++ b/src/args.rs @@ -45,7 +45,7 @@ pub enum Commands { #[clap(alias = "vh")] VerifyHeader { path: String }, /// Load and execute the given file. - #[clap(alias = "x")] + #[clap(visible_alias = "x")] Exec { /// Path to the file which needs to be executed. filepath: String, @@ -59,7 +59,7 @@ pub enum Commands { /// Command line args passed to main. cmdargs: Vec }, - #[clap(alias = "asm")] + #[clap(visible_alias = "asm")] /// Assemble a source file to bytecode. If debug flag is set, line numbers are emitted. Assemble { /// Path to the assembly file, or a glob matching assembly files (requires --all flag) @@ -74,6 +74,8 @@ pub enum Commands { #[clap(alias = "mc")] MonoCompile { /// Path to the source file. - path: String + path: String, + /// Path to the output file. + output: Option } } \ No newline at end of file diff --git a/src/core/exec/frame.rs b/src/core/exec/frame.rs index 0117483..2eb0ab9 100644 --- a/src/core/exec/frame.rs +++ b/src/core/exec/frame.rs @@ -12,7 +12,7 @@ pub struct CallFrame { /// Id of the function for debugging purposes. function_id: usize, /// Line number updated whenever LINENUMBER instruction is encountered. - debug_lnum: u32, + debug_lnum: u16, /// Vector of base types; indices simulate registers. pub(super) regs: Vec>, /// The register to write the return value into. @@ -109,12 +109,9 @@ impl CallFrame { }) } - #[deprecated(since = "0.2.6", note = "Formerly common to write and drop. Prefer checked variant whenever needed.")] - pub fn _write_unchecked(&mut self, rid: usize, val: Option, rc: &mut RefCounter) -> FResult<()>{ - if let Some(btype) = self.regs[rid].take() { - Self::cleanup_value(btype, rc)?; - } - self.regs[rid] = val; + pub fn writeopt_register(&mut self, rid: u8, val: Option, rc: &mut RefCounter) -> FResult<()>{ + self.drop_register(rid, rc)?; + self.regs[rid as usize] = val; Ok(()) } @@ -305,12 +302,12 @@ impl CallFrame { /// Set the line number for the frame; useful for debugging purposes. /// All instructions betweeen two successive calls of this function will be regarded as having line `debug_lnum`, - pub fn set_lineno(&mut self, debug_lnum: u32) { + pub fn set_lineno(&mut self, debug_lnum: u16) { self.debug_lnum = debug_lnum; } /// Get line number debug-info for this frame, i.e, value passed to the last call of `set_lineno`. - pub fn lineno(&self) -> u32 { + pub fn lineno(&self) -> u16 { self.debug_lnum } diff --git a/src/core/exec/mod.rs b/src/core/exec/mod.rs index 7d95d64..0686796 100644 --- a/src/core/exec/mod.rs +++ b/src/core/exec/mod.rs @@ -239,8 +239,7 @@ impl WorkerThread { let cur_frame = &mut self.stack[stack_top-1]; if let Some(ret_reg) = cur_frame.rslot { let val = frame.take_register(r1, &mut self.refc)?; - let ret_reg = ret_reg as usize; - cur_frame._write_unchecked(ret_reg, val, &mut self.refc)?; + cur_frame.writeopt_register(ret_reg, val, &mut self.refc)?; cur_frame.rslot = None; } } @@ -432,7 +431,7 @@ impl WorkerThread { }, op::NOP => {}, op::DBGLN => { - let line_number = slvw.get_u32(); + let line_number = slvw.get_u16(); cur_frame.set_lineno(line_number); }, op::ASSERT => { diff --git a/src/emit/walker.rs b/src/emit/walker.rs index 0ee7baa..cd1216a 100644 --- a/src/emit/walker.rs +++ b/src/emit/walker.rs @@ -1,9 +1,10 @@ //! Module to traverse parsed S-expressions (AST) and generate VM modules. //! Walkers in this module emit equivalent bytecode. -use log::{debug}; -use std::{collections::HashMap, fs::File, io::Seek}; -use crate::{core::{module::{ExternDecl, FuncDecl}, op::{self, DoubleRegst, Id16Reg, QuadrupleRegst, VariadicRegst}}, utils::{AsBytes, OutBuf}}; +use std::path::Path; +use log::debug; +use std::{collections::HashMap, fs::{File, OpenOptions}, io::{Seek, Write}}; +use crate::{core::{module::{ExternDecl, FuncDecl, MAGIC}, op::{self, DoubleRegst, Id16Reg, QuadrupleRegst, VariadicRegst}}, utils::{AsBytes, OutBuf}}; use super::{Sexpr, SexprKind, TextualLocation, TlError, TlErrorType, Token}; #[derive(Debug, Clone)] @@ -61,7 +62,7 @@ impl Scope<'_> { #[derive(Debug)] pub struct CompileUnit { - functions: Vec, + functions: Vec<(String, FuncDecl)>, externfns: Vec, constants: HashMap, xusenames: HashMap, @@ -94,6 +95,11 @@ impl CompileUnit { content.write(&mut self.tmpfile).map_err(CompileUnit::_write_error) } + fn emit_lineno(&mut self, sx: &Sexpr) -> Result<(), TlError> { + self.tmpfile.write_u8(op::DBGLN).map_err(CompileUnit::_write_error)?; + self.tmpfile.write_u16(sx.loc.line_no as u16).map_err(CompileUnit::_write_error) + } + /// Get the stream position of the underlying file. fn offset(&mut self) -> u32 { self.tmpfile.stream_position().expect("CompileUnit temporary file should have stream_position") as u32 @@ -110,8 +116,77 @@ impl CompileUnit { } } - fn finish(self) -> Result<(), std::io::Error> { - todo!("Write header and copy out contents from temporary file. An important step here is to fix all incorrect offsets generated by walking.") + /// Finish code-generation by creating the header, copying contents of instruction dump, and fixing all code-offsets. + pub fn finish(mut self, path: F) -> Result<(), std::io::Error> + where F: AsRef { + let mut file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(path)?; + + // TODO: Think of a better way of handling constants that doesn't need this, but at the same time doesn't make any copies of Token. + // Token can potentially be incredibly large, since it can store file-size strings. This is important. + let nconsts = self.constants.len(); + let mut v = vec![Token::Boolean(false); nconsts]; + for (token, idx) in self.constants { + v[idx] = token; + } + + file.write_all(&MAGIC)?; + file.write_u16(nconsts as u16)?; + + for elm in v { + match elm { + Token::Integer(v) => { + file.write_u8(1)?; + file.write_u64(v as u64)?; + }, + Token::Float(v) => { + file.write_u8(2)?; + file.write_u64(v.to_bits())?; + }, + Token::Char(v) => { + file.write_u8(3)?; + file.write_u32(v as u32)?; + }, + Token::Str(v) => { + file.write_u8(4)?; + file.write_u16(v.len() as u16)?; + file.write_str(&v)?; + }, + Token::Symbol(v) => panic!("Somehow `{v}` made it into the constant pool. A symbol can't be a constant; it was checked for. Only literal atoms are constants."), + Token::Boolean(_)=> panic!("Boolean constants aren't even inserted into the pool in the first place. Somehow one made it in there.") + } + } + + file.write_u16(self.externfns.len() as u16)?; + for elm in self.externfns { + file.write_u8(elm.native as u8)?; + let s = format!("{}:{}", elm.module_path, elm.func_name); + file.write_u16(s.len() as u16)?; + file.write_str(&s)?; + } + + file.write_u16(self.functions.len() as u16)?; + let header_end = (file.stream_position()? as u32) + (self.functions.iter().fold(0, |acc, x| {acc + 7 + x.0.len()}) as u32); + for (fname, fdc) in self.functions { + file.write_u8(fname.len() as u8)?; + file.write_str(fname.as_str())?; + file.write_u8(fdc.nparam)?; + file.write_u8(fdc.nregs)?; + file.write_u32(header_end + fdc.offset)?; + debug!("Wrote function {fname} {fdc:?}") + } + + let p = self.tmpfile.stream_position()?; + self.tmpfile.rewind()?; + if std::io::copy(&mut self.tmpfile, &mut file)? != p { + eprintln!("Not all bytes written!"); + } + + Ok(()) } } @@ -188,11 +263,17 @@ fn visit_define_form<'a>(mut it: impl Iterator, cu: &mut Compi SexprKind::Atom(Token::Symbol(s)) => { let tok = visit_constant(it, loc) .map_err(|e| e.aug("Looking for literal atom as body for constant-type `define` form."))?; - let id = { - let new_id = cu.constants.len(); - *cu.constants.entry(tok).or_insert(new_id) + let sym = match tok { + Token::Integer(v) if (-0x80..=0x7f).contains(&v) => SymbolType::NamedInteger { value: v as i8 }, + Token::Boolean(v) => SymbolType::NamedInteger { value: v as i8 }, + Token::Symbol(_) => panic!("Somehow the token which was already checked for is now a symbol."), + tok => { + let new_id = cu.constants.len(); + let new_id = *cu.constants.entry(tok).or_insert(new_id); + SymbolType::NamedConstant { pool_index: new_id as u16 } + } }; - sc.bind(s, SymbolType::NamedConstant { pool_index: id as u16 }) + sc.bind(s, sym) .map_err(|_| TlError { etype: TlErrorType::ReboundName, msg: format!("Constant-type define form symbol `{s}` was already bound."), @@ -214,7 +295,7 @@ fn visit_define_form<'a>(mut it: impl Iterator, cu: &mut Compi offset: 0, }; let index = cu.functions.len() as u16; - cu.functions.push(fdc); + cu.functions.push((name.to_string(), fdc)); sc.bind(name, SymbolType::InternalFunction { index }) .map_err(|_| TlError { etype: TlErrorType::ReboundName, @@ -308,10 +389,14 @@ fn visit_root_list(ast: &Sexpr, cu: &mut CompileUnit, sc: &mut Scope<'_>) -> Res /// /// Then emit `LDC` to store value into register `r`. fn eat_literal_atom(t: Token, cu: &mut CompileUnit, r1: u8) -> Result<(),TlError> { - if let Token::Integer(i) = t { - if i > -0x80 && i < 0x7f { + match t { + Token::Integer(i) if i > -0x80 && i < 0x7f => { + return cu.emit(op::LDI, DoubleRegst {r1: (i as i8) as u8, r2: r1}); + }, + Token::Boolean(i) => { return cu.emit(op::LDI, DoubleRegst {r1: (i as i8) as u8, r2: r1}); } + _ => {} } let id = cu.constants.len(); let id = *cu.constants.entry(t).or_insert(id) as u16; @@ -375,7 +460,7 @@ fn walk_define_form(v: Vec, cu: &mut CompileUnit, sc: &mut Scope<'_>) -> .map_err(|e| e.aug("Looking for (func-name param..) symbol list as function-type `define` head."))?; let fidx = if let SymbolType::InternalFunction { index } = sc.resolve(&name).expect("Function name should already be bound.") { - cu.functions[index as usize].offset = cu.offset(); + cu.functions[index as usize].1.offset = cu.offset(); index as usize } else { panic!("Somehow function name {name} was bound as something that's not a function"); @@ -387,7 +472,7 @@ fn walk_define_form(v: Vec, cu: &mut CompileUnit, sc: &mut Scope<'_>) -> if s == "None" { cu.tmpfile.write_u8(op::VRET).map_err(CompileUnit::_write_error) } else if let Some(SymbolType::NamedConstant { pool_index }) = sc.resolve(&s) { - let fdc = &mut cu.functions[fidx]; + let fdc = &mut cu.functions[fidx].1; fdc.nregs = fdc.nregs.max(1); cu.emit(op::LDC, Id16Reg { id: pool_index, r1: 0 })?; @@ -410,7 +495,7 @@ fn walk_define_form(v: Vec, cu: &mut CompileUnit, sc: &mut Scope<'_>) -> }, // Literal atom. SexprKind::Atom(t) => { - let fdc = &mut cu.functions[fidx]; + let fdc = &mut cu.functions[fidx].1; fdc.nregs = fdc.nregs.max(1); eat_literal_atom(t, cu, 0)?; cu.emit_simple(op::RET, 0) @@ -429,12 +514,13 @@ fn walk_define_form(v: Vec, cu: &mut CompileUnit, sc: &mut Scope<'_>) -> })?; } let r = walk_regular_list(v, &mut ralloc, cu, sc, c.loc, false)?; + cu.functions[fidx].1.nregs = ralloc.max; cu.emit_simple(op::RET, r) }, } } -fn check_regs_adjacent(v: &Vec) -> bool { +fn check_regs_adjacent(v: &[u8]) -> bool { for (a, b) in v.iter().zip(v.iter().skip(1)) { if (b-a) != 1 { return false; @@ -521,6 +607,8 @@ fn walk_regular_list( loc, })?; + cu.emit_lineno(&s0).map_err(|e| e.aug("Trying to emit line number debug information"))?; + let (callable_r, is_temp) = match s0.kind { SexprKind::Atom(Token::Symbol(s)) => { match s.as_str() { @@ -584,7 +672,7 @@ fn walk_extern_symbol( cu: &mut CompileUnit ) -> Result, TlError> { let a = cu.externmap.get(s).copied().or_else(|| { - let (a, b) = s.split_once(':')?; + let (a, b) = s.rsplit_once(':')?; let (mpath, native) = cu.xusenames.get(a)?; let id = cu.externfns.len(); cu.externfns.push(ExternDecl::new_extern(mpath, b, *native)); @@ -600,6 +688,14 @@ fn walk_extern_symbol( Ok(Some(r1)) } +/// A 'regular' symbol is one which is either an external symbol, `None`, or a name in current scope. +/// Importantly, special forms are _not regular_, and are contextually matched. Effectively, all 'reserved symbols' used in special forms are _soft keywords_. +/// To illustrate this, note that the following is perfectly valid: +/// ```scheme +/// (define print "Yo!") +/// ; The first print is matched as the reserved symbol for print form, while the second is a regular symbol. +/// (define (main) (print print)) +/// ``` /// Returns true if register was allocated, false if symbol refers to a named value previously allocated. fn walk_regular_symbol( s: String, @@ -642,6 +738,9 @@ fn walk_regular_symbol( } } +/// Walk a regular S-expr `ast`. +/// Returns the register containing the eval value of `ast` and a boolean flag indicating whether the register is 'temporary'. +/// All registers are temporary, unless they containg bound values (or in otherwords bound to names). fn walk_regular_sexpr( ast: Sexpr, ralloc: &mut StackAllocator, diff --git a/src/main.rs b/src/main.rs index 006344e..3f95834 100644 --- a/src/main.rs +++ b/src/main.rs @@ -181,7 +181,9 @@ fn main() -> ExitCode { }); } }, - args::Commands::MonoCompile { path } => { + args::Commands::MonoCompile { path, output } => { + let output = output.map(PathBuf::from).unwrap_or(Path::new(&path).with_extension("fr")); + trace!("Input: {path}, Output: {}", output.display()); let file = File::open(path).expect("Failed to open file"); let mut reader = BufReader::new(file); let ast = match emit::parse(&mut reader) { @@ -202,6 +204,12 @@ fn main() -> ExitCode { }, Ok(cu) => { debug!("CU:\n{cu:#?}"); + if let Err(e) = cu.finish(output) { + error!("Failed to finish code generation."); + eprintln!("{e}"); + return ExitCode::FAILURE; + } + info!("Sucessfully completed codegen."); } } } diff --git a/src/utils.rs b/src/utils.rs index b9ce7b9..f9221b9 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -112,6 +112,7 @@ impl OutBuf for T { self.write_all(&buf) } + /// This method **does not** write string length; only the content as bytes. fn write_str(&mut self, v: &str) -> IoResult<()> { self.write_all(v.as_bytes()) } diff --git a/test/fys/helloworld.fys b/test/fys/helloworld.fys index ddea16d..7693fa6 100644 --- a/test/fys/helloworld.fys +++ b/test/fys/helloworld.fys @@ -1,6 +1,7 @@ ; A simple program to print "Hello World!" ; `main` is the entry point. +(define (my-print x) None) (define (main) ; Print the string literal to stdout. - (print "Hello World!") + (my-print "Hello World!") ) \ No newline at end of file diff --git a/test/out/fys/helloworld.fr b/test/out/fys/helloworld.fr new file mode 100644 index 0000000000000000000000000000000000000000..dad1cc168d766fef37c74a9a31b324028f6e468d GIT binary patch literal 72 zcmcE9ERxK?$il$mk(!f}uMnPJl#`;!z`(%7kz1)-P?VWh!pI2Z8nfgkX67+40huQ9 Wfk2W?7)U5EFfa--FfuX