From 42ebea54e35062a1a998f25e8fe019a5e0205fd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miquel=20Sabat=C3=A9=20Sol=C3=A0?= Date: Thu, 19 Dec 2024 09:59:34 +0100 Subject: [PATCH] Implement the .incbin control statement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also forced us to add the current working directory to the `Assembler::assemble` public function, as otherwise this control statement and others wouldn't know how to resolve relative paths. Signed-off-by: Miquel Sabaté Solà --- crates/nasm/src/main.rs | 27 +++- .../fuzz_targets/fuzz_target_assembler.rs | 2 +- lib/xixanta/src/assembler.rs | 141 +++++++++++++++++- lib/xixanta/src/node.rs | 1 + lib/xixanta/src/opcodes.rs | 1 + 5 files changed, 162 insertions(+), 10 deletions(-) diff --git a/crates/nasm/src/main.rs b/crates/nasm/src/main.rs index e6d7c45..fd97ca1 100644 --- a/crates/nasm/src/main.rs +++ b/crates/nasm/src/main.rs @@ -1,7 +1,8 @@ -use anyhow::Result; +use anyhow::{bail, Context, Result}; use clap::Parser as ClapParser; use std::fs::File; use std::io::{self, Read, Write}; +use std::path::Path; use xixanta::assembler::Assembler; use xixanta::mapping::{Mapping, EMPTY, NROM, NROM65}; @@ -31,10 +32,24 @@ struct Args { fn main() -> Result<()> { let args = Args::parse(); - // Select the input stream. - let input: Box = match args.file { - Some(file) => Box::new(File::open(file)?), - None => Box::new(std::io::stdin()), + // Select the input stream and the current working directory. + let input: Box; + let working_directory = match &args.file { + Some(file) => { + let path = Path::new(file); + if !path.is_file() { + bail!("Input file must be a valid file"); + } + input = Box::new(File::open(file)?); + + path.parent() + .with_context(|| String::from("Failed to find directory for given file"))? + } + None => { + input = Box::new(std::io::stdin()); + &std::env::current_dir() + .with_context(|| String::from("Could not fetch current directory"))? + } }; // Select the output stream. @@ -60,7 +75,7 @@ fn main() -> Result<()> { // And assemble. let mut assembler = Assembler::new(mapping); - match assembler.assemble(input) { + match assembler.assemble(working_directory.to_path_buf(), input) { Ok(bundles) => { for b in bundles { for i in 0..b.size { diff --git a/lib/xixanta/fuzz/fuzz_targets/fuzz_target_assembler.rs b/lib/xixanta/fuzz/fuzz_targets/fuzz_target_assembler.rs index 111bae9..db33241 100644 --- a/lib/xixanta/fuzz/fuzz_targets/fuzz_target_assembler.rs +++ b/lib/xixanta/fuzz/fuzz_targets/fuzz_target_assembler.rs @@ -6,5 +6,5 @@ use xixanta::mapping::EMPTY; fuzz_target!(|data: &[u8]| { let mut asm = Assembler::new(EMPTY.to_vec()); - let _ = asm.assemble(data); + let _ = asm.assemble(std::env::current_dir().unwrap().to_path_buf(), data); }); diff --git a/lib/xixanta/src/assembler.rs b/lib/xixanta/src/assembler.rs index 986f811..be10404 100644 --- a/lib/xixanta/src/assembler.rs +++ b/lib/xixanta/src/assembler.rs @@ -6,8 +6,10 @@ use crate::opcodes::{AddressingMode, INSTRUCTIONS}; use crate::parser::Parser; use std::cmp::Ordering; use std::collections::HashMap; +use std::fs::File; use std::io::Read; use std::ops::Range; +use std::path::PathBuf; /// The mode in which a literal is expressed. #[derive(Clone, PartialEq)] @@ -68,6 +70,11 @@ pub struct Assembler { current_segment: usize, pending: Vec, labels_seen: usize, + + // Stack of directories. The last directory is the current one, whereas the + // other elements come from previous contexts. This way we can implement a + // file that imports another file which in turn imports another file, etc. + directories: Vec, } impl Assembler { @@ -85,10 +92,23 @@ impl Assembler { current_segment: 0, pending: vec![], labels_seen: 0, + directories: vec![], } } - pub fn assemble(&mut self, reader: impl Read) -> Result, Vec> { + /// Read the contents from the `reader` as a source file and produce a list + /// of bundles that can be formatted as binary data. You also need to pass + /// the initial working directory `init_directory`, as otherwise control + /// statements like ".import" or ".incbin" wouldn't know how to resolve + /// relative paths. + pub fn assemble( + &mut self, + init_directory: PathBuf, + reader: impl Read, + ) -> Result, Vec> { + // Push the initial directory into our stack of directories. + self.directories.push(init_directory); + // First of all, parse the input so we get a list of nodes we can work // with. let mut parser = Parser::default(); @@ -870,6 +890,9 @@ impl Assembler { self.push_evaluated_arguments(node, 2) } NodeType::Control(ControlType::Segment) => self.switch_to_segment(node), + NodeType::Control(ControlType::IncBin) => { + self.incbin(node.args.as_ref().unwrap().first().unwrap()) + } _ => Err(EvalError { line: node.value.line, message: format!( @@ -881,6 +904,90 @@ impl Assembler { } } + // Push as many bundles as bytes are in the given file path. If there is any + // issue with reading the given file, or the parameter is given in a weird + // format, it will error out. + fn incbin(&mut self, node: &PNode) -> Result<(), EvalError> { + let value = &node.value.value; + + // Validate the path literal. + if value.len() < 3 || !value.starts_with('"') || !value.ends_with('"') { + return Err(EvalError { + line: node.value.line, + message: format!( + "path has to be written inside of double quotes ('{}' given instead)", + value, + ), + global: false, + }); + } + + // The '.incbin' control assumes that paths are relative to the + // directory of the current file. Hence, in order to make subsequent + // `File` operations work in this way, set the current directory now. + if let Err(e) = std::env::set_current_dir(self.directories.last().unwrap()) { + return Err(EvalError { + line: node.value.line, + message: format!("could not move to the directory of '{}': {}", value, e), + global: false, + }); + } + + // Fetch the actual path. + let path = &value[1..value.len() - 1].trim(); + let file = match File::open(path) { + Ok(f) => f, + Err(e) => { + return Err(EvalError { + global: false, + line: node.value.line, + message: format!("could not include binary data: {}", e), + }) + } + }; + + // Ensure that the included binary data is within reason. + match file.metadata() { + Ok(metadata) => { + // Note that we cannot assume that it's always going to be + // included in some specific mapping type (e.g. CHR-ROM vs + // CHR-RAM). Hence, let's force that nothing above 512KB can be + // included at face value. If this really surpasses the actual + // limit on where it's included, then it's going to show up at a + // later check. + if metadata.len() > 512 * 1024 { + return Err(EvalError { + global: false, + line: node.value.line, + message: format!("file '{}' is too big", path), + }); + } else if metadata.len() == 0 { + return Err(EvalError { + global: false, + line: node.value.line, + message: format!("trying to include an empty file ('{}')", path), + }); + } + } + Err(e) => { + return Err(EvalError { + global: false, + line: node.value.line, + message: format!("could not include binary data: {}", e), + }) + } + } + + // And finally just push each byte from the given file as a fill bundle. + for byte in file.bytes() { + match byte { + Ok(b) => self.push_bundle(Bundle::fill(b), node)?, + Err(_) => break, + } + } + Ok(()) + } + fn evaluate_control_expression(&mut self, node: &PNode) -> Result { match node.node_type { NodeType::Control(ControlType::Hibyte) => self.evaluate_byte(node, true), @@ -1362,7 +1469,12 @@ mod tests { asm.current_mapping = 1; // Grab the result passed the initial header. - let res = &asm.assemble(line.as_bytes()).unwrap()[0x10..]; + let res = &asm + .assemble( + std::env::current_dir().unwrap().to_path_buf(), + line.as_bytes(), + ) + .unwrap()[0x10..]; assert_eq!(res.len(), 1); @@ -1388,7 +1500,10 @@ mod tests { global: bool, message: &str, ) { - let res = asm.assemble(line.as_bytes()); + let res = asm.assemble( + std::env::current_dir().unwrap().to_path_buf(), + line.as_bytes(), + ); let msg = if global { format!("{} error: {}.", id, message) } else { @@ -1490,6 +1605,7 @@ adc $Four asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" .scope One ; This is a comment adc #Variable @@ -1529,6 +1645,7 @@ adc #Another::Variable asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" Variable = 4 adc Variable @@ -1890,6 +2007,7 @@ lda #Scope::Variable asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" nop @hello: @@ -1925,6 +2043,7 @@ nop asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" nop : @@ -1993,6 +2112,7 @@ nop asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" nop : @@ -2057,6 +2177,7 @@ nop asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" nop @hello: @@ -2090,6 +2211,7 @@ nop asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#"nop .proc hello nop @@ -2130,6 +2252,7 @@ nop asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" .scope Vars Variable = 4 @@ -2168,6 +2291,7 @@ nop asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" Var = $2002 lda #.lobyte(Var) @@ -2197,6 +2321,7 @@ lda #.hibyte(Var) asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" lda #42 @@ -2229,6 +2354,7 @@ MACRO asm.current_mapping = 1; let res = asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" lda #42 @@ -2257,6 +2383,7 @@ MACRO asm.current_mapping = 1; let res = asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" lda #42 @@ -2285,6 +2412,7 @@ MACRO(1, 2) asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" lda #42 @@ -2317,6 +2445,7 @@ MACRO(2) asm.current_mapping = 1; let res = asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" lda #42 @@ -2346,6 +2475,7 @@ MACRO(1) asm.current_mapping = 1; let res = asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" Var = 3 lda #42 @@ -2376,6 +2506,7 @@ MACRO(1) asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" .macro WRITE_PPU_DATA address, value bit $2002 ; PPUSTATUS @@ -2487,6 +2618,7 @@ nop asm.mappings[0].offset = 6; let bundles = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" .segment "ONE" nop @@ -2565,6 +2697,7 @@ nop asm.mappings[0].offset = 6; let bundles = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" .segment "ONE" lala: @@ -2610,6 +2743,7 @@ code: asm.mappings[0].offset = 6; let bundles = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" .segment "ONE" .addr code @@ -2656,6 +2790,7 @@ code: asm.current_mapping = 1; let res = &asm .assemble( + std::env::current_dir().unwrap().to_path_buf(), r#" .scope Vars .segment "CODE" diff --git a/lib/xixanta/src/node.rs b/lib/xixanta/src/node.rs index 993d9c1..bab9420 100644 --- a/lib/xixanta/src/node.rs +++ b/lib/xixanta/src/node.rs @@ -137,6 +137,7 @@ pub enum ControlType { Byte, Word, Addr, + IncBin, } /// The PNode type. diff --git a/lib/xixanta/src/opcodes.rs b/lib/xixanta/src/opcodes.rs index 1a2b318..24cbade 100644 --- a/lib/xixanta/src/opcodes.rs +++ b/lib/xixanta/src/opcodes.rs @@ -730,6 +730,7 @@ lazy_static! { functions.insert(String::from(".word"), Control { control_type: ControlType::Word, has_identifier: false, required_args: None, touches_context: false }); functions.insert(String::from(".dw"), Control { control_type: ControlType::Word, has_identifier: false, required_args: None, touches_context: false }); functions.insert(String::from(".addr"), Control { control_type: ControlType::Addr, has_identifier: false, required_args: None, touches_context: false }); + functions.insert(String::from(".incbin"), Control { control_type: ControlType::IncBin, has_identifier: false, required_args: Some(1), touches_context: false }); functions };