From 9165d3f2016550f1e93f183fa5d40807388270a2 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 5 Sep 2024 00:57:40 -0400 Subject: [PATCH] Convert exit to syscall instead of VM instruction --- api/syscalls.json | 16 ++++++ doc/syscalls.md | 8 +++ ncc/include/assert.h | 2 +- ncc/include/stdlib.h | 3 +- ncc/include/uvm/syscalls.h | 4 ++ ncc/src/codegen.rs | 4 +- vm/examples/empty.asm | 2 +- vm/examples/factorial.asm | 2 +- vm/examples/fib.asm | 2 +- vm/examples/fizzbuzz.asm | 2 +- vm/examples/guess.asm | 2 +- vm/examples/loop.asm | 2 +- vm/examples/memcpy.asm | 2 +- vm/src/asm.rs | 3 +- vm/src/constants.rs | 3 +- vm/src/host.rs | 7 +++ vm/src/vm.rs | 99 ++++++++++++++++---------------------- 17 files changed, 91 insertions(+), 72 deletions(-) diff --git a/api/syscalls.json b/api/syscalls.json index 548572f1..c776b16a 100644 --- a/api/syscalls.json +++ b/api/syscalls.json @@ -126,6 +126,22 @@ "const_idx": 17, "description": "Grow the heap to a new size given in bytes. This is similar to the `brk()` system call on POSIX systems. Note that the heap may be resized to a size larger than requested. The heap size is guaranteed to be a multiple of 8 bytes. If the requested size is smaller than the current heap size, this is a no-op. Returns the new heap size in bytes." }, + { + "name": "exit", + "args": [ + [ + "i8", + "status" + ] + ], + "returns": [ + "void", + "" + ], + "permission": "default_allowed", + "const_idx": 11, + "description": "End program execution with the specified exit status." + }, { "name": "thread_spawn", "args": [ diff --git a/doc/syscalls.md b/doc/syscalls.md index b49059fe..dd7d32c2 100644 --- a/doc/syscalls.md +++ b/doc/syscalls.md @@ -68,6 +68,14 @@ u64 vm_grow_heap(u64 num_bytes) Grow the heap to a new size given in bytes. This is similar to the `brk()` system call on POSIX systems. Note that the heap may be resized to a size larger than requested. The heap size is guaranteed to be a multiple of 8 bytes. If the requested size is smaller than the current heap size, this is a no-op. Returns the new heap size in bytes. +## exit + +``` +void exit(i8 status) +``` + +End program execution with the specified exit status. + ## thread_spawn ``` diff --git a/ncc/include/assert.h b/ncc/include/assert.h index d63b3be4..3d5c8244 100644 --- a/ncc/include/assert.h +++ b/ncc/include/assert.h @@ -11,7 +11,7 @@ asm () -> void { syscall print_endl; };\ asm ("assert(" #test_expr ")") -> void { syscall print_str; };\ asm () -> void { syscall print_endl; };\ - asm () -> void { push -1; exit; };\ + asm () -> void { push -1; syscall exit; };\ } #else #define assert(test_val) {} diff --git a/ncc/include/stdlib.h b/ncc/include/stdlib.h index d0509dad..1fcbcd4d 100644 --- a/ncc/include/stdlib.h +++ b/ncc/include/stdlib.h @@ -11,9 +11,10 @@ int abs(int n) return n; } +#undef exit void exit(int status) { - asm (status) -> void { exit; }; + asm (status) -> void { syscall exit; }; } // Convert long int to string diff --git a/ncc/include/uvm/syscalls.h b/ncc/include/uvm/syscalls.h index 9d925688..568152d7 100644 --- a/ncc/include/uvm/syscalls.h +++ b/ncc/include/uvm/syscalls.h @@ -29,6 +29,10 @@ // Grow the heap to a new size given in bytes. This is similar to the `brk()` system call on POSIX systems. Note that the heap may be resized to a size larger than requested. The heap size is guaranteed to be a multiple of 8 bytes. If the requested size is smaller than the current heap size, this is a no-op. Returns the new heap size in bytes. #define vm_grow_heap(__num_bytes) asm (__num_bytes) -> u64 { syscall vm_grow_heap; } +// void exit(i8 status) +// End program execution with the specified exit status. +#define exit(__status) asm (__status) -> void { syscall exit; } + // u64 thread_spawn(void* fptr, void* arg) // Spawn a new thread running the given function with the argument value `arg`. #define thread_spawn(__fptr, __arg) asm (__fptr, __arg) -> u64 { syscall thread_spawn; } diff --git a/ncc/src/codegen.rs b/ncc/src/codegen.rs index e1614149..b9d4eb28 100644 --- a/ncc/src/codegen.rs +++ b/ncc/src/codegen.rs @@ -165,14 +165,14 @@ impl Unit out.push_str("# call the main function and then exit\n"); out.push_str("call main, 0;\n"); - out.push_str("exit;\n"); + out.push_str("ret;\n"); out.push_str("\n"); } else { // If there is no main function, the unit should just exit (do nothing) out.push_str("push 0;\n"); - out.push_str("exit;\n"); + out.push_str("ret;\n"); out.push_str("\n"); } diff --git a/vm/examples/empty.asm b/vm/examples/empty.asm index 4a005997..4342eefc 100644 --- a/vm/examples/empty.asm +++ b/vm/examples/empty.asm @@ -1,2 +1,2 @@ push 0; -exit; \ No newline at end of file +ret; diff --git a/vm/examples/factorial.asm b/vm/examples/factorial.asm index 52327989..90e76b58 100644 --- a/vm/examples/factorial.asm +++ b/vm/examples/factorial.asm @@ -22,7 +22,7 @@ syscall print_i64; syscall print_endl; push 0; -exit; +ret; #### fact(n) #### FACT: diff --git a/vm/examples/fib.asm b/vm/examples/fib.asm index c3409869..636221e7 100644 --- a/vm/examples/fib.asm +++ b/vm/examples/fib.asm @@ -22,7 +22,7 @@ syscall print_i64; syscall print_endl; push 0; -exit; +ret; # # u64 fib(u64 n) diff --git a/vm/examples/fizzbuzz.asm b/vm/examples/fizzbuzz.asm index b6128c39..d22a393b 100644 --- a/vm/examples/fizzbuzz.asm +++ b/vm/examples/fizzbuzz.asm @@ -58,4 +58,4 @@ push 101; lt_i64; # l0 < COUNT jnz LOOP; -exit; +ret; diff --git a/vm/examples/guess.asm b/vm/examples/guess.asm index fb7307c3..0528f109 100644 --- a/vm/examples/guess.asm +++ b/vm/examples/guess.asm @@ -84,7 +84,7 @@ syscall print_i64; syscall print_endl; push 0; -exit; +ret; # # Read a positive integer from stdlin diff --git a/vm/examples/loop.asm b/vm/examples/loop.asm index eebe4d2c..67787848 100644 --- a/vm/examples/loop.asm +++ b/vm/examples/loop.asm @@ -61,4 +61,4 @@ push MIPS_STR; syscall print_str; syscall print_endl; -exit; +ret; diff --git a/vm/examples/memcpy.asm b/vm/examples/memcpy.asm index 3d99a11f..79d96072 100644 --- a/vm/examples/memcpy.asm +++ b/vm/examples/memcpy.asm @@ -73,4 +73,4 @@ push FPS_STR; syscall print_str; syscall print_endl; -exit; +ret; diff --git a/vm/src/asm.rs b/vm/src/asm.rs index 566b9f84..321c3829 100644 --- a/vm/src/asm.rs +++ b/vm/src/asm.rs @@ -1169,7 +1169,6 @@ impl Assembler } "ret" => self.code.push_op(Op::ret), - "exit" => self.code.push_op(Op::exit), _ => { return input.parse_error(&format!("unknown instruction opcode \"{}\"", op_name)) @@ -1323,7 +1322,7 @@ mod tests parse_ok(" FOO_BAR: jmp FOO_BAR; "); // Callback label - parse_ok("CB: ret; push_p32 CB; exit;"); + parse_ok("CB: ret; push_p32 CB; ret;"); } #[test] diff --git a/vm/src/constants.rs b/vm/src/constants.rs index b4c2a0eb..fb87a416 100644 --- a/vm/src/constants.rs +++ b/vm/src/constants.rs @@ -17,6 +17,7 @@ pub const PRINT_ENDL: u16 = 7; pub const GETCHAR: u16 = 8; pub const WINDOW_POLL_EVENT: u16 = 9; pub const WINDOW_DRAW_FRAME: u16 = 10; +pub const EXIT: u16 = 11; pub const VM_HEAP_SIZE: u16 = 14; pub const MEMSET32: u16 = 16; pub const VM_GROW_HEAP: u16 = 17; @@ -54,7 +55,7 @@ pub const SYSCALL_DESCS: [Option; SYSCALL_TBL_LEN] = [ Some(SysCallDesc { name: "getchar", const_idx: 8, argc: 0, has_ret: true }), Some(SysCallDesc { name: "window_poll_event", const_idx: 9, argc: 1, has_ret: true }), Some(SysCallDesc { name: "window_draw_frame", const_idx: 10, argc: 2, has_ret: false }), - None, + Some(SysCallDesc { name: "exit", const_idx: 11, argc: 1, has_ret: false }), None, None, Some(SysCallDesc { name: "vm_heap_size", const_idx: 14, argc: 0, has_ret: true }), diff --git a/vm/src/host.rs b/vm/src/host.rs index 827a8844..fb923146 100644 --- a/vm/src/host.rs +++ b/vm/src/host.rs @@ -97,6 +97,7 @@ pub fn get_syscall(const_idx: u16) -> HostFn MEMSET32 => HostFn::Fn3_0(memset32), MEMCPY => HostFn::Fn3_0(memcpy), MEMCMP => HostFn::Fn3_1(memcmp), + EXIT => HostFn::Fn1_0(exit), THREAD_SPAWN => HostFn::Fn2_1(thread_spawn), THREAD_JOIN => HostFn::Fn1_1(thread_join), @@ -224,6 +225,12 @@ fn memcmp(thread: &mut Thread, ptr_a: Value, ptr_b: Value, num_bytes: Value) -> } } +// End program execution +fn exit(thread: &mut Thread, val: Value) +{ + unsafe { libc::exit(val.as_i32() & 0xFF) }; +} + fn print_i64(thread: &mut Thread, v: Value) { let v = v.as_i64(); diff --git a/vm/src/vm.rs b/vm/src/vm.rs index c9a93345..cba3619d 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -263,16 +263,10 @@ pub enum Op // syscall (arg0, arg1, ..., argN) syscall, - // Return to caller function, or - // Return to the UVM event loop without terminating execution + // Return to caller function or end thread // ret (value) ret, - // Terminate program execution - // This stops the UVM event loop - // exit (value) - exit, - // NOTE: last opcode must have value < 255 // Currently, every opcode is just one byte long, // and we hope to keep it that way, but the value @@ -1605,17 +1599,6 @@ impl Thread } } - Op::exit => { - if self.stack.len() <= bp { - panic!("exit with no return value on stack"); - } - - let val = self.pop(); - self.stack.clear(); - self.frames.clear(); - return val; - } - Op::ret => { if self.stack.len() <= bp { panic!("ret with no return value on stack"); @@ -1813,143 +1796,143 @@ mod tests fn test_opcodes() { // We can have at most 254 short single-byte opcodes - assert!(Op::exit as usize <= 254); + assert!(Op::ret as usize <= 254); // Keep track of how many short opcodes we have so far - dbg!(Op::exit as usize); - assert!(Op::exit as usize <= 115); + dbg!(Op::ret as usize); + assert!(Op::ret as usize <= 114); } #[test] fn test_basics() { // Integer literals - eval_i64("push_i8 1; exit;", 1); - eval_i64("push_i8 -3; exit;", -3); - eval_i64("push_u64 1_333_444; exit;", 1_333_444); - eval_i64("push_u64 0xFF; exit;", 0xFF); - eval_i64("push_u64 0b1101; exit;", 0b1101); + eval_i64("push_i8 1; ret;", 1); + eval_i64("push_i8 -3; ret;", -3); + eval_i64("push_u64 1_333_444; ret;", 1_333_444); + eval_i64("push_u64 0xFF; ret;", 0xFF); + eval_i64("push_u64 0b1101; ret;", 0b1101); // Push mnemonic - eval_i64("push 0; exit;", 0); - eval_i64("push 1; exit;", 1); - eval_i64("push -1; exit;", -1); - eval_i64("push 0xFFFF; exit;", 0xFFFF); - eval_i64(".data; LABEL: .u64 0; .code; push LABEL; exit;", 0); + eval_i64("push 0; ret;", 0); + eval_i64("push 1; ret;", 1); + eval_i64("push -1; ret;", -1); + eval_i64("push 0xFFFF; ret;", 0xFFFF); + eval_i64(".data; LABEL: .u64 0; .code; push LABEL; ret;", 0); // Stack manipulation - eval_i64("push_i8 7; push_i8 3; swap; exit;", 7); - eval_i64("push_i8 7; push_i8 3; swap; swap; pop; exit;", 7); + eval_i64("push_i8 7; push_i8 3; swap; ret;", 7); + eval_i64("push_i8 7; push_i8 3; swap; swap; pop; ret;", 7); // Integer arithmetic - eval_i64("push_i8 1; push_i8 10; add_u64; exit;", 11); - eval_i64("push_i8 5; push_i8 10; sub_u64; exit;", -5); - eval_i64("push_i8 10; push_i8 2; sub_u64; exit;", 8); - eval_i64("push 5; push_i8 -6; mul_u64; exit;", -30); - eval_i64("push 1; push 2; lshift_u64; exit;", 4); + eval_i64("push_i8 1; push_i8 10; add_u64; ret;", 11); + eval_i64("push_i8 5; push_i8 10; sub_u64; ret;", -5); + eval_i64("push_i8 10; push_i8 2; sub_u64; ret;", 8); + eval_i64("push 5; push_i8 -6; mul_u64; ret;", -30); + eval_i64("push 1; push 2; lshift_u64; ret;", 4); // Comparisons - eval_i64("push_i8 1; push_i8 10; lt_i64; exit;", 1); - eval_i64("push_i8 11; push_i8 1; lt_i64; exit;", 0); + eval_i64("push_i8 1; push_i8 10; lt_i64; ret;", 1); + eval_i64("push_i8 11; push_i8 1; lt_i64; ret;", 0); } #[test] fn test_setlocal() { - eval_i64(".code; push 0; push 77; set_local 0; get_local 0; exit;", 77); + eval_i64(".code; push 0; push 77; set_local 0; get_local 0; ret;", 77); } #[test] fn test_floats() { - eval_i64("push_f32 1.5; push_f32 2.5; add_f32; push_f32 4.0; eq_u64; exit;", 1); + eval_i64("push_f32 1.5; push_f32 2.5; add_f32; push_f32 4.0; eq_u64; ret;", 1); } #[test] fn test_loop() { // Simple loop - eval_i64("push_i8 0; LOOP: push_i8 1; add_u64; dup; push_i8 10; eq_u64; jz LOOP; exit;", 10); + eval_i64("push_i8 0; LOOP: push_i8 1; add_u64; dup; push_i8 10; eq_u64; jz LOOP; ret;", 10); } #[test] fn test_load_store() { // Store instruction - eval_i64(".data; .zero 255; .code; push_i8 0; push_i8 77; store_u8; push_i8 11; exit;", 11); + eval_i64(".data; .zero 255; .code; push_i8 0; push_i8 77; store_u8; push_i8 11; ret;", 11); } #[test] fn test_setn() { // Store instruction - eval_i64(".code; push 3; push 0; push 7; setn 1; pop; exit;", 7); + eval_i64(".code; push 3; push 0; push 7; setn 1; pop; ret;", 7); } #[test] fn test_call_ret() { - eval_i64("call FN, 0; exit; FN: push_i8 33; ret;", 33); - eval_i64("push_i8 3; call FN, 1; exit; FN: get_arg 0; push_i8 1; add_u64; ret;", 4); + eval_i64("call FN, 0; ret; FN: push_i8 33; ret;", 33); + eval_i64("push_i8 3; call FN, 1; ret; FN: get_arg 0; push_i8 1; add_u64; ret;", 4); // set_arg - eval_i64("push_i8 3; call FN, 1; exit; FN: push 7; set_arg 0; get_arg 0; ret;", 7); + eval_i64("push_i8 3; call FN, 1; ret; FN: push 7; set_arg 0; get_arg 0; ret;", 7); // Two arguments and subtract (order of arguments matters) - eval_i64("push_i8 7; push 5; call FN, 2; exit; FN: get_arg 0; get_arg 1; sub_u64; ret;", 2); + eval_i64("push_i8 7; push 5; call FN, 2; ret; FN: get_arg 0; get_arg 1; sub_u64; ret;", 2); // Recursive decrement function - eval_i64("push 10; call DEC, 1; exit; DEC: get_arg 0; dup; jz ZERO; push 1; sub_u64; call DEC, 1; ret; ZERO: ret;", 0); + eval_i64("push 10; call DEC, 1; ret; DEC: get_arg 0; dup; jz ZERO; push 1; sub_u64; call DEC, 1; ret; ZERO: ret;", 0); // Regression: stack corruption - eval_i64("push 5; call foo, 0; pop; exit; foo: push 2; push 0; ret;", 5); + eval_i64("push 5; call foo, 0; pop; ret; foo: push 2; push 0; ret;", 5); } #[test] fn test_call_fp() { - eval_i64(" push FN; call_fp 0; exit; FN: push_i8 33; ret;", 33); + eval_i64(" push FN; call_fp 0; ret; FN: push_i8 33; ret;", 33); } #[test] fn test_syscalls() { - eval_src(".data; LABEL: .zero 256; .code; push LABEL; push 255; push 0; syscall memset; push 0; exit;"); + eval_src(".data; LABEL: .zero 256; .code; push LABEL; push 255; push 0; syscall memset; push 0; ret;"); } #[test] #[should_panic] fn test_div_zero() { - eval_src("push 8; push 0; div_u64; exit;"); + eval_src("push 8; push 0; div_u64; ret;"); } #[test] #[should_panic] fn test_ret_none() { - eval_src("call FN, 0; exit; FN: ret;"); + eval_src("call FN, 0; ret; FN: ret;"); } #[test] #[should_panic] fn test_get_arg_none() { - eval_src("call FN, 0; exit; FN: get_arg 0; push 0; ret;"); + eval_src("call FN, 0; ret; FN: get_arg 0; push 0; ret;"); } #[test] #[should_panic] fn test_load_oob() { - eval_src(".data; .fill 1000, 0; .code; push 100_000_000; load_u64; exit;"); + eval_src(".data; .fill 1000, 0; .code; push 100_000_000; load_u64; ret;"); } #[test] #[should_panic] fn test_memset_oob() { - eval_src(".data; LABEL: .zero 1; .code; push LABEL; push 255; push 100_000_000; syscall memset; push 0; exit;"); + eval_src(".data; LABEL: .zero 1; .code; push LABEL; push 255; push 100_000_000; syscall memset; push 0; ret;"); } // Regression: this used to segfault