From 92b5c9318dcdb476b68199fc5a3d34835b27f132 Mon Sep 17 00:00:00 2001 From: MeiK Date: Mon, 18 Mar 2024 19:48:35 +0800 Subject: [PATCH 01/11] =?UTF-8?q?linux=20=E5=B9=B3=E5=8F=B0=E5=BC=80?= =?UTF-8?q?=E5=8F=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + Cargo.toml | 4 +- src/main.rs | 25 - src/sys/linux/mod.rs | 34 +- src/sys/linux/seccomp.rs | 1170 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 1203 insertions(+), 31 deletions(-) create mode 100644 src/sys/linux/seccomp.rs diff --git a/.gitignore b/.gitignore index 38a694b..474741a 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ Cargo.lock .idea .vscode *.exe +.cargo diff --git a/Cargo.toml b/Cargo.toml index 762181e..e081d77 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] -libc = "0.2.153" clap = { version = "4.5.1", features = ["derive"] } clap-verbosity-flag = "2.2.0" log = "0.4.20" @@ -12,6 +11,7 @@ env_logger = "0.10.2" tempfile = "3.10.0" serde = { version = "1.0.197", features = ["derive"] } serde_json = "1.0.114" +libc = "0.2.153" [target.'cfg(windows)'.dependencies.windows] version = "0.53.0" @@ -29,4 +29,4 @@ features = [ "Win32_System_Diagnostics_Debug", "Win32_System_ProcessStatus", "Win32_System_WindowsProgramming" -] \ No newline at end of file +] diff --git a/src/main.rs b/src/main.rs index 1fc7a91..7175441 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,11 +32,6 @@ pub struct Opts { #[clap(short, long)] error: Option, - #[cfg(any(target_os = "linux", target_os = "macos"))] - /// Working directory. The default value is the current directory. - #[clap(short, long, default_value = "./")] - workdir: String, - /// Output location of the running result. The default value is STDOUT(1) #[clap(short, long)] result: Option, @@ -53,21 +48,6 @@ pub struct Opts { #[clap(short, long)] memory_limit: Option, - #[cfg(any(target_os = "linux", target_os = "macos"))] - /// Maximum number of files that can be written. The unit is bit. The default value is unlimited. - #[clap(short, long, default_value = "0")] - file_size_limit: i32, - - #[cfg(any(target_os = "linux", target_os = "macos"))] - /// Cgroup version, 1 or 2 - #[clap(short, long, default_value = "1")] - cgroup: i32, - - #[cfg(any(target_os = "linux", target_os = "macos"))] - /// Number of processes that can be created. The default value is unlimited. - #[clap(short, long, default_value = "0")] - pids: i32, - /// Program to run and command line arguments #[clap(last(true), required = true)] command: Vec, @@ -75,11 +55,6 @@ pub struct Opts { /// A level of verbosity, and can be used multiple times #[command(flatten)] verbose: Verbosity, - - #[cfg(any(target_os = "linux", target_os = "macos"))] - /// Network enable - #[clap(long, default_value = "false")] - network: bool, } impl Default for Opts { diff --git a/src/sys/linux/mod.rs b/src/sys/linux/mod.rs index d76cf71..987512e 100644 --- a/src/sys/linux/mod.rs +++ b/src/sys/linux/mod.rs @@ -1,9 +1,35 @@ +mod seccomp; + +use crate::status::Status; use crate::sys::SandboxImpl; +use crate::Opts; -pub struct Sandbox {} +#[derive(Debug)] +pub struct Sandbox { + inner_args: Vec, + time_limit: Option, + cpu_time_limit: Option, + memory_limit: Option, + input: Option, + output: Option, + error: Option, +} impl SandboxImpl for Sandbox { - fn run() -> () { - println!("Linux") + fn with_opts(opts: Opts) -> Self { + Sandbox { + inner_args: opts.command, + time_limit: opts.time_limit, + cpu_time_limit: opts.cpu_time_limit, + memory_limit: opts.memory_limit, + input: opts.input, + output: opts.output, + error: opts.error, + } + } + + unsafe fn run(&mut self) -> crate::error::Result { + let status: Status = Default::default(); + Ok(status) } -} \ No newline at end of file +} diff --git a/src/sys/linux/seccomp.rs b/src/sys/linux/seccomp.rs new file mode 100644 index 0000000..fe3d3be --- /dev/null +++ b/src/sys/linux/seccomp.rs @@ -0,0 +1,1170 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![allow(dead_code)] +#![deny(missing_docs)] +//! This crate implements a high level wrapper over BPF instructions for seccomp filtering. +//! +//! # Seccomp Filtering Levels +//! +//! [Seccomp filtering](https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt) is used +//! to limit the set of syscalls that a process can make. This crate exposes 2 levels of filtering: +//! 1. Simple filtering: all syscalls are denied, except for a subset that are explicitly let +//! through. The latter are identified solely through the syscall number. +//! 1. Advanced filtering: all syscalls are denied, except for a subset that are explicitly let +//! through. The latter are identified via the syscall number and the allowed values for the +//! syscall's arguments. Arguments whose values do not match the filtering rule will cause the +//! syscall to be denied. +//! +//! ## Example with Filtering Disabled +//! +//! ``` +//! let buf = "Hello, world!"; +//! assert_eq!( +//! unsafe { +//! libc::syscall( +//! libc::SYS_write, +//! libc::STDOUT_FILENO, +//! buf.as_bytes(), +//! buf.len(), +//! ); +//! }, +//! () +//! ); +//! ``` +//! +//! The code snippet above will print "Hello, world!" to stdout. +//! The exit code will be 0. +//! +//! ## Example with Simple Filtering +//! +//! In this example, the process will allow a subset of syscalls. All the others will fall under +//! the `Trap` action: cause the kernel to send `SIGSYS` (signal number 31) to the process. +//! Without a signal handler in place, the process will die with exit code 159 (128 + `SIGSYS`). +//! +//! ```should_panic +//! use std::convert::TryInto; +//! use seccomp::*; +//! +//! let buf = "Hello, world!"; +//! let filter = SeccompFilter::new( +//! vec![ +//! allow_syscall(libc::SYS_close), +//! allow_syscall(libc::SYS_execve), +//! allow_syscall(libc::SYS_exit_group), +//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +//! allow_syscall(libc::SYS_open), +//! #[cfg(target_arch = "aarch64")] +//! allow_syscall(libc::SYS_openat), +//! allow_syscall(libc::SYS_read), +//! ] +//! .into_iter() +//! .collect(), +//! SeccompAction::Trap, +//! ) +//! .unwrap().try_into().unwrap(); +//! SeccompFilter::apply(filter).unwrap(); +//! unsafe { +//! libc::syscall( +//! libc::SYS_write, +//! libc::STDOUT_FILENO, +//! buf.as_bytes(), +//! buf.len(), +//! ); +//! }; +//! ``` +//! +//! The code snippet above will print "Hello, world!" to stdout and "Bad system call" to stderr. +//! The exit code will be 159. +//! +//! ## Advanced Filtering: Conditions, Rules and Filters +//! +//! A system call is matched if it verifies a set of [`SeccompCondition`]s. Namely, the syscall +//! number must match the one in the [`SeccompCondition`], and each of its arguments (in case of +//! advanced filtering) must match a set of [`SeccompCondition`]s that identify the argument by its +//! index and its respective value either by exact value match, or by bounds to be compared to. +//! +//! A [`SeccompRule`] is composed of a set of [`SeccompCondition`]s the syscall must match and the +//! [`SeccompAction`] to be taken in case of a match. +//! +//! A [`SeccompFilter`] applies only to advanced filtering and is composed of a set of +//! [`SeccompRule`]s and a default [`SeccompAction`]. The default action will be taken for the +//! syscalls that do not match any of the rules. +//! +//! The seccomp rules are compiled into a [`BpfProgram`] which is loaded in the kernel. +//! +//! ### Denying Syscalls +//! +//! The [`SeccompRule`] struct specifies which action to be taken when a syscall is attempted +//! through its [`action`]. To deny a syscall, [`action`] must take one of the following values: +//! 1. `Errno(num)`: the syscall will not be executed. `errno` will be set to `num`. +//! 1. `Kill`: the kernel will kill the process. +//! 1. `Trap`: the kernel will send `SIGSYS` to the process. Handling is up to the process. If no +//! signal handler is set for `SIGSYS`, the process will die. +//! +//! ### Example with Advanced Filtering +//! +//! In this example, the process will allow a subset of syscalls with any arguments and the syscall +//! `SYS_write` with the first argument `0` and the third argument `13`. The default action is to +//! cause the kernel to send `SIGSYS` (signal number 31) to the process. +//! A signal handler will catch `SIGSYS` and exit with code 159 on any other syscall. +//! +//! ```should_panic +//! use seccomp::*; +//! use std::convert::TryInto; +//! use std::mem; +//! use std::process::exit; +//! +//! const SI_OFF_SYSCALL: isize = 6; +//! static mut SIGNAL_HANDLER_CALLED: i32 = 0; +//! +//! fn fail() { +//! exit(159); +//! } +//! +//! extern "C" fn sigsys_handler( +//! _num: libc::c_int, +//! info: *mut libc::siginfo_t, +//! _unused: *mut libc::c_void, +//! ) { +//! let syscall = unsafe { *(info as *const i32).offset(SI_OFF_SYSCALL) }; +//! if syscall as i64 != libc::SYS_write { +//! fail(); +//! } +//! unsafe { +//! SIGNAL_HANDLER_CALLED = SIGNAL_HANDLER_CALLED + 1; +//! } +//! } +//! +//! fn gen_rules() -> Vec { +//! vec![ +//! allow_syscall(libc::SYS_close), +//! allow_syscall(libc::SYS_execve), +//! allow_syscall(libc::SYS_exit_group), +//! allow_syscall(libc::SYS_munmap), +//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +//! allow_syscall(libc::SYS_open), +//! #[cfg(target_arch = "aarch64")] +//! allow_syscall(libc::SYS_openat), +//! allow_syscall(libc::SYS_rt_sigreturn), +//! allow_syscall(libc::SYS_sigaltstack), +//! ] +//! } +//! +//! fn main() { +//! let buf = "Hello, world!"; +//! +//! let mut act: libc::sigaction = unsafe { mem::zeroed() }; +//! act.sa_flags = libc::SA_SIGINFO; +//! act.sa_sigaction = sigsys_handler as *const () as usize; +//! +//! unsafe { libc::sigaction(libc::SIGSYS, &act, ::std::ptr::null_mut()) }; +//! +//! let mut filter = +//! SeccompFilter::new(vec![].into_iter().collect(), SeccompAction::Trap).unwrap(); +//! +//! gen_rules() +//! .into_iter() +//! .try_for_each(|(syscall_number, rules)| filter.add_rules(syscall_number, rules)) +//! .unwrap(); +//! +//! filter +//! .add_rules( +//! libc::SYS_write, +//! vec![SeccompRule::new( +//! vec![ +//! SeccompCondition::new( +//! 0, +//! SeccompCmpArgLen::DWORD, +//! SeccompCmpOp::Eq, +//! libc::STDOUT_FILENO as u64, +//! ) +//! .unwrap(), +//! SeccompCondition::new(2, SeccompCmpArgLen::QWORD, SeccompCmpOp::Eq, 13) +//! .unwrap(), +//! ], +//! SeccompAction::Allow, +//! )], +//! ) +//! .unwrap(); +//! +//! SeccompFilter::apply(filter.try_into().unwrap()).unwrap(); +//! +//! unsafe { +//! libc::syscall( +//! libc::SYS_write, +//! libc::STDOUT_FILENO, +//! buf.as_bytes(), +//! buf.len(), +//! ); +//! }; +//! +//! if unsafe { SIGNAL_HANDLER_CALLED } != 0 { +//! fail(); +//! } +//! +//! let buf = "Goodbye!"; +//! unsafe { +//! libc::syscall( +//! libc::SYS_write, +//! libc::STDOUT_FILENO, +//! buf.as_bytes(), +//! buf.len(), +//! ); +//! }; +//! if unsafe { SIGNAL_HANDLER_CALLED } != 1 { +//! fail(); +//! } +//! +//! unsafe { +//! libc::syscall(libc::SYS_getpid); +//! }; +//! } +//! ``` +//! The code snippet above will print "Hello, world!" to stdout. +//! The exit code will be 159. +//! +//! [`apply`]: struct.SeccompFilter.html#apply +//! [`BpfProgram`]: type.BpfProgram.html +//! [`SeccompCondition`]: struct.SeccompCondition.html +//! [`SeccompRule`]: struct.SeccompRule.html +//! [`SeccompAction`]: enum.SeccompAction.html +//! [`SeccompFilter`]: struct.SeccompFilter.html +//! [`action`]: struct.SeccompRule.html#action +use std::collections::BTreeMap; +use std::convert::TryInto; +use std::fmt::{Display, Formatter}; + +/// Maximum number of instructions that a BPF program can have. +const BPF_MAX_LEN: usize = 4096; + +// BPF Instruction classes. +// See /usr/include/linux/bpf_common.h . +const BPF_LD: u16 = 0x00; +const BPF_ALU: u16 = 0x04; +const BPF_JMP: u16 = 0x05; +const BPF_RET: u16 = 0x06; + +// BPF ld/ldx fields. +// See /usr/include/linux/bpf_common.h . +const BPF_W: u16 = 0x00; +const BPF_ABS: u16 = 0x20; + +// BPF alu fields. +// See /usr/include/linux/bpf_common.h . +const BPF_AND: u16 = 0x50; + +// BPF jmp fields. +// See /usr/include/linux/bpf_common.h . +const BPF_JA: u16 = 0x00; +const BPF_JEQ: u16 = 0x10; +const BPF_JGT: u16 = 0x20; +const BPF_JGE: u16 = 0x30; +const BPF_K: u16 = 0x00; + +// Return codes for BPF programs. +// See /usr/include/linux/seccomp.h . +const SECCOMP_RET_ALLOW: u32 = 0x7fff_0000; +const SECCOMP_RET_ERRNO: u32 = 0x0005_0000; +const SECCOMP_RET_KILL: u32 = 0x0000_0000; +const SECCOMP_RET_LOG: u32 = 0x7ffc_0000; +const SECCOMP_RET_TRACE: u32 = 0x7ff0_0000; +const SECCOMP_RET_TRAP: u32 = 0x0003_0000; +const SECCOMP_RET_MASK: u32 = 0x0000_ffff; + +// Architecture identifier. +// See /usr/include/linux/audit.h . + +#[cfg(target_arch = "x86_64")] +// Defined as: +// `#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)` +const AUDIT_ARCH_X86_64: u32 = 62 | 0x8000_0000 | 0x4000_0000; + +#[cfg(target_arch = "aarch64")] +// Defined as: +// `#define AUDIT_ARCH_AARCH64 (EM_AARCH64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)` +const AUDIT_ARCH_AARCH64: u32 = 183 | 0x8000_0000 | 0x4000_0000; + +// The maximum number of a syscall argument. +// A syscall can have at most 6 arguments. +// Arguments are numbered from 0 to 5. +const ARG_NUMBER_MAX: u8 = 5; + +// The maximum number of BPF statements that a condition will be translated into. +const CONDITION_MAX_LEN: u16 = 6; + +// `struct seccomp_data` offsets and sizes of fields in bytes: +// +// ```c +// struct seccomp_data { +// int nr; +// __u32 arch; +// __u64 instruction_pointer; +// __u64 args[6]; +// }; +// ``` +const SECCOMP_DATA_NR_OFFSET: u8 = 0; +const SECCOMP_DATA_ARGS_OFFSET: u8 = 16; +const SECCOMP_DATA_ARG_SIZE: u8 = 8; + +/// Seccomp errors. +#[derive(Debug)] +pub enum Error { + /// Attempting to add an empty vector of rules to the rule chain of a syscall. + EmptyRulesVector, + /// Filter exceeds the maximum number of instructions that a BPF program can have. + FilterTooLarge, + /// Failed to translate rules into BPF. + IntoBpf, + /// Argument number that exceeds the maximum value. + InvalidArgumentNumber, + /// Failed to load seccomp rules into the kernel. + Load(i32), +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + use self::Error::*; + + match *self { + EmptyRulesVector => write!(f, "The seccomp rules vector is empty."), + FilterTooLarge => write!(f, "The seccomp filter contains too many BPF instructions."), + IntoBpf => write!(f, "Failed to translate the seccomp rules into BPF."), + InvalidArgumentNumber => { + write!(f, "The seccomp rule contains an invalid argument number.") + } + Load(err) => write!( + f, + "Failed to load seccomp rules into the kernel with error {}.", + err + ), + } + } +} + +type Result = std::result::Result; + +/// Comparison to perform when matching a condition. +#[derive(Clone, Debug)] +pub enum SeccompCmpOp { + /// Argument value is equal to the specified value. + Eq, + /// Argument value is greater than or equal to the specified value. + Ge, + /// Argument value is greater than specified value. + Gt, + /// Argument value is less than or equal to the specified value. + Le, + /// Argument value is less than specified value. + Lt, + /// Masked bits of argument value are equal to masked bits of specified value. + MaskedEq(u64), + /// Argument value is not equal to specified value. + Ne, +} + +/// Seccomp argument value length. +#[derive(Clone, Debug)] +pub enum SeccompCmpArgLen { + /// Argument value length is 4 bytes. + DWORD, + /// Argument value length is 8 bytes. + QWORD, +} + +/// Condition that syscall must match in order to satisfy a rule. +#[derive(Clone, Debug)] +pub struct SeccompCondition { + /// Index of the argument that is to be compared. + arg_number: u8, + /// Length of the argument value that is to be compared. + arg_len: SeccompCmpArgLen, + /// Comparison to perform. + operator: SeccompCmpOp, + /// The value that will be compared with the argument value. + value: u64, +} + +/// Actions that `seccomp` can apply to process calling a syscall. +#[derive(Clone, Debug, PartialEq)] +pub enum SeccompAction { + /// Allows syscall. + Allow, + /// Returns from syscall with specified error number. + Errno(u32), + /// Kills calling process. + Kill, + /// Same as allow but logs call. + Log, + /// Notifies tracing process of the caller with respective number. + Trace(u32), + /// Sends `SIGSYS` to the calling process. + Trap, +} + +/// Rule that `seccomp` attempts to match for a syscall. +/// +/// If all conditions match then rule gets matched. +/// The action of the first rule that matches will be applied to the calling process. +/// If no rule matches the default action is applied. +#[derive(Clone, Debug)] +pub struct SeccompRule { + /// Conditions of rule that need to match in order for the rule to get matched. + conditions: Vec, + /// Action applied to calling process if rule gets matched. + action: SeccompAction, +} + +/// Type that encapsulates a tuple (syscall number, rule set). +pub type SyscallRuleSet = (i64, Vec); + +/// Builds the (syscall, rules) tuple for allowing a syscall regardless of arguments. +#[inline(always)] +pub fn allow_syscall(syscall_number: i64) -> SyscallRuleSet { + ( + syscall_number, + vec![SeccompRule::new(vec![], SeccompAction::Allow)], + ) +} + +/// Builds the (syscall, rules) tuple for allowing a syscall with certain arguments. +#[inline(always)] +pub fn allow_syscall_if(syscall_number: i64, rules: Vec) -> SyscallRuleSet { + (syscall_number, rules) +} + +/// Filter containing rules assigned to syscall numbers. +#[derive(Clone, Debug)] +pub struct SeccompFilter { + /// Map of syscall numbers and corresponding rule chains. + rules: BTreeMap>, + /// Default action to apply to syscall numbers that do not exist in the hash map. + default_action: SeccompAction, +} + +// BPF instruction structure definition. +// See /usr/include/linux/filter.h . +#[repr(C)] +#[derive(Clone, Debug, PartialEq)] +#[doc(hidden)] +pub struct sock_filter { + pub code: ::std::os::raw::c_ushort, + pub jt: ::std::os::raw::c_uchar, + pub jf: ::std::os::raw::c_uchar, + pub k: ::std::os::raw::c_uint, +} + +// BPF structure definition for filter array. +// See /usr/include/linux/filter.h . +#[repr(C)] +struct sock_fprog { + pub len: ::std::os::raw::c_ushort, + pub filter: *const sock_filter, +} + +/// Program made up of a sequence of BPF instructions. +pub type BpfProgram = Vec; +/// Reference to program made up of a sequence of BPF instructions. +pub type BpfProgramRef<'a> = &'a [sock_filter]; +/// Slice of BPF instructions. +pub type BpfInstructionSlice = [sock_filter]; + +impl SeccompCondition { + /// Creates a new [`SeccompCondition`]. + /// + /// # Arguments + /// + /// * `arg_number` - The index of the argument in the system call. + /// * `arg_len` - The length of the argument value. See `SeccompCmpArgLen`. + /// * `operator` - The comparison operator. See `SeccompCmpOp`. + /// * `value` - The value against which the argument will be compared with `operator`. + /// + /// [`SeccompCondition`]: struct.SeccompCondition.html + pub fn new( + arg_number: u8, + arg_len: SeccompCmpArgLen, + operator: SeccompCmpOp, + value: u64, + ) -> Result { + // Checks that the given argument number is valid. + if arg_number > ARG_NUMBER_MAX { + return Err(Error::InvalidArgumentNumber); + } + + Ok(Self { + arg_number, + arg_len, + operator, + value, + }) + } + + /// Splits the [`SeccompCondition`] into 32 bit chunks and offsets. + /// + /// Returns most significant half, least significant half of the `value` field of + /// [`SeccompCondition`], as well as the offsets of the most significant and least significant + /// half of the argument specified by `arg_number` relative to `struct seccomp_data` passed to + /// the BPF program by the kernel. + /// + /// [`SeccompCondition`]: struct.SeccompCondition.html + fn value_segments(&self) -> (u32, u32, u8, u8) { + // Splits the specified value into its most significant and least significant halves. + let (msb, lsb) = ((self.value >> 32) as u32, self.value as u32); + + // Offset to the argument specified by `arg_number`. + let arg_offset = SECCOMP_DATA_ARGS_OFFSET + self.arg_number * SECCOMP_DATA_ARG_SIZE; + + // Extracts offsets of most significant and least significant halves of argument. + let (msb_offset, lsb_offset) = { + #[cfg(target_endian = "big")] + { + (arg_offset, arg_offset + SECCOMP_DATA_ARG_SIZE / 2) + } + #[cfg(target_endian = "little")] + { + (arg_offset + SECCOMP_DATA_ARG_SIZE / 2, arg_offset) + } + }; + + (msb, lsb, msb_offset, lsb_offset) + } + + /// Translates the `eq` (equal) condition into BPF statements. + /// + /// # Arguments + /// + /// * `offset` - The given jump offset to the start of the next rule. + /// + /// The jump is performed if the condition fails and thus the current rule does not match so + /// `seccomp` tries to match the next rule by jumping out of the current rule. + /// + /// In case the condition is part of the last rule, the jump offset is to the default action of + /// respective filter. + /// + /// The most significant and least significant halves of the argument value are compared + /// separately since the BPF operand and accumulator are 4 bytes whereas an argument value is 8. + fn into_eq_bpf(self, offset: u8) -> Vec { + let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); + + let mut bpf = match self.arg_len { + SeccompCmpArgLen::DWORD => vec![], + SeccompCmpArgLen::QWORD => vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), + ], + }; + + bpf.append(&mut vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, 0, offset), + ]); + bpf + } + + /// Translates the `ge` (greater than or equal) condition into BPF statements. + /// + /// # Arguments + /// + /// * `offset` - The given jump offset to the start of the next rule. + fn into_ge_bpf(self, offset: u8) -> Vec { + let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); + + let mut bpf = match self.arg_len { + SeccompCmpArgLen::DWORD => vec![], + SeccompCmpArgLen::QWORD => vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), + BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, 3, 0), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), + ], + }; + + bpf.append(&mut vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), + BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, lsb, 0, offset), + ]); + bpf + } + + /// Translates the `gt` (greater than) condition into BPF statements. + /// + /// # Arguments + /// + /// * `offset` - The given jump offset to the start of the next rule. + fn into_gt_bpf(self, offset: u8) -> Vec { + let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); + + let mut bpf = match self.arg_len { + SeccompCmpArgLen::DWORD => vec![], + SeccompCmpArgLen::QWORD => vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), + BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, 3, 0), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), + ], + }; + + bpf.append(&mut vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), + BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, lsb, 0, offset), + ]); + bpf + } + + /// Translates the `le` (less than or equal) condition into BPF statements. + /// + /// # Arguments + /// + /// * `offset` - The given jump offset to the start of the next rule. + fn into_le_bpf(self, offset: u8) -> Vec { + let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); + + let mut bpf = match self.arg_len { + SeccompCmpArgLen::DWORD => vec![], + SeccompCmpArgLen::QWORD => vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), + BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, offset + 3, 0), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), + ], + }; + + bpf.append(&mut vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), + BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, lsb, offset, 0), + ]); + bpf + } + + /// Translates the `lt` (less than) condition into BPF statements. + /// + /// # Arguments + /// + /// * `offset` - The given jump offset to the start of the next rule. + fn into_lt_bpf(self, offset: u8) -> Vec { + let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); + + let mut bpf = match self.arg_len { + SeccompCmpArgLen::DWORD => vec![], + SeccompCmpArgLen::QWORD => vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), + BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, offset + 3, 0), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), + ], + }; + + bpf.append(&mut vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), + BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, lsb, offset, 0), + ]); + bpf + } + + /// Translates the `masked_eq` (masked equal) condition into BPF statements. + /// + /// The `masked_eq` condition is `true` if the result of logical `AND` between the given value + /// and the mask is the value being compared against. + /// + /// # Arguments + /// + /// * `offset` - The given jump offset to the start of the next rule. + fn into_masked_eq_bpf(self, offset: u8, mask: u64) -> Vec { + let (_, _, msb_offset, lsb_offset) = self.value_segments(); + let masked_value = self.value & mask; + let (msb, lsb) = ((masked_value >> 32) as u32, masked_value as u32); + let (mask_msb, mask_lsb) = ((mask >> 32) as u32, mask as u32); + + let mut bpf = match self.arg_len { + SeccompCmpArgLen::DWORD => vec![], + SeccompCmpArgLen::QWORD => vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), + BPF_STMT(BPF_ALU + BPF_AND + BPF_K, mask_msb), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 3), + ], + }; + + bpf.append(&mut vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), + BPF_STMT(BPF_ALU + BPF_AND + BPF_K, mask_lsb), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, 0, offset), + ]); + bpf + } + + /// Translates the `ne` (not equal) condition into BPF statements. + /// + /// # Arguments + /// + /// * `offset` - The given jump offset to the start of the next rule. + fn into_ne_bpf(self, offset: u8) -> Vec { + let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); + + let mut bpf = match self.arg_len { + SeccompCmpArgLen::DWORD => vec![], + SeccompCmpArgLen::QWORD => vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), + ], + }; + + bpf.append(&mut vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, offset, 0), + ]); + bpf + } + + /// Translates the [`SeccompCondition`] into BPF statements. + /// + /// # Arguments + /// + /// * `offset` - The given jump offset to the start of the next rule. + /// + /// [`SeccompCondition`]: struct.SeccompCondition.html + fn into_bpf(self, offset: u8) -> Vec { + let result = match self.operator { + SeccompCmpOp::Eq => self.into_eq_bpf(offset), + SeccompCmpOp::Ge => self.into_ge_bpf(offset), + SeccompCmpOp::Gt => self.into_gt_bpf(offset), + SeccompCmpOp::Le => self.into_le_bpf(offset), + SeccompCmpOp::Lt => self.into_lt_bpf(offset), + SeccompCmpOp::MaskedEq(mask) => self.into_masked_eq_bpf(offset, mask), + SeccompCmpOp::Ne => self.into_ne_bpf(offset), + }; + + // Verifies that the `CONDITION_MAX_LEN` constant was properly updated. + assert!(result.len() <= CONDITION_MAX_LEN as usize); + + result + } +} + +impl From for u32 { + /// Return codes of the BPF program for each action. + /// + /// # Arguments + /// + /// * `action` - The [`SeccompAction`] that the kernel will take. + /// + /// [`SeccompAction`]: struct.SeccompAction.html + fn from(action: SeccompAction) -> Self { + match action { + SeccompAction::Allow => SECCOMP_RET_ALLOW, + SeccompAction::Errno(x) => SECCOMP_RET_ERRNO | (x & SECCOMP_RET_MASK), + SeccompAction::Kill => SECCOMP_RET_KILL, + SeccompAction::Log => SECCOMP_RET_LOG, + SeccompAction::Trace(x) => SECCOMP_RET_TRACE | (x & SECCOMP_RET_MASK), + SeccompAction::Trap => SECCOMP_RET_TRAP, + } + } +} + +impl SeccompRule { + /// Creates a new rule. Rules with 0 conditions always match. + /// + /// # Arguments + /// + /// * `conditions` - Vector of [`SeccompCondition`] that the syscall must match. + /// * `action` - Action taken if the syscall matches the conditions. See [`SeccompAction`]. + /// + /// [`SeccompCondition`]: struct.SeccompCondition.html + /// [`SeccompAction`]: struct.SeccompAction.html + pub fn new(conditions: Vec, action: SeccompAction) -> Self { + Self { conditions, action } + } + + /// Appends a condition of the rule to an accumulator. + /// + /// The length of the rule and offset to the next rule are updated. + /// + /// # Arguments + /// + /// * `condition` - The condition added to the rule. + /// * `accumulator` - Accumulator of BPF statements that compose the BPF program. + /// * `rule_len` - Number of conditions in the rule. + /// * `offset` - Offset (in number of BPF statements) to the next rule. + fn append_condition( + condition: SeccompCondition, + accumulator: &mut Vec>, + rule_len: &mut usize, + offset: &mut u8, + ) { + // Tries to detect whether prepending the current condition will produce an unjumpable + // offset (since BPF jumps are a maximum of 255 instructions). + if u16::from(*offset) + CONDITION_MAX_LEN + 1 > u16::from(::std::u8::MAX) { + // If that is the case, three additional helper jumps are prepended and the offset + // is reset to 1. + // + // - The first jump continues the evaluation of the condition chain by jumping to + // the next condition or the action of the rule if the last condition was matched. + // - The second, jumps out of the rule, to the next rule or the default action of + // the filter in case of the last rule in the rule chain of a syscall. + // - The third jumps out of the rule chain of the syscall, to the rule chain of the + // next syscall number to be checked or the default action of the filter in the + // case of the last rule chain. + let helper_jumps = vec![ + BPF_STMT(BPF_JMP + BPF_JA, 2), + BPF_STMT(BPF_JMP + BPF_JA, u32::from(*offset) + 1), + BPF_STMT(BPF_JMP + BPF_JA, u32::from(*offset) + 1), + ]; + *rule_len += helper_jumps.len(); + accumulator.push(helper_jumps); + *offset = 1; + } + + let condition = condition.into_bpf(*offset); + *rule_len += condition.len(); + *offset += condition.len() as u8; + accumulator.push(condition); + } +} + +impl Into for SeccompRule { + /// Translates a rule into BPF statements. + /// + /// Each rule starts with 2 jump statements: + /// * The first jump enters the rule, attempting a match. + /// * The second jump points to the end of the rule chain for one syscall, into the rule chain + /// for the next syscall or the default action if the current syscall is the last one. It + /// essentially jumps out of the current rule chain. + fn into(self) -> BpfProgram { + // Rule is built backwards, last statement is the action of the rule. + // The offset to the next rule is 1. + let mut accumulator = Vec::with_capacity( + self.conditions.len() + + ((self.conditions.len() * CONDITION_MAX_LEN as usize) / ::std::u8::MAX as usize) + + 1, + ); + let mut rule_len = 1; + let mut offset = 1; + accumulator.push(vec![BPF_STMT(BPF_RET + BPF_K, u32::from(self.action))]); + + // Conditions are translated into BPF statements and prepended to the rule. + self.conditions.into_iter().for_each(|condition| { + SeccompRule::append_condition(condition, &mut accumulator, &mut rule_len, &mut offset) + }); + + // The two initial jump statements are prepended to the rule. + let rule_jumps = vec![ + BPF_STMT(BPF_JMP + BPF_JA, 1), + BPF_STMT(BPF_JMP + BPF_JA, u32::from(offset) + 1), + ]; + rule_len += rule_jumps.len(); + accumulator.push(rule_jumps); + + // Finally, builds the translated rule by consuming the accumulator. + let mut result = Vec::with_capacity(rule_len); + accumulator + .into_iter() + .rev() + .for_each(|mut instructions| result.append(&mut instructions)); + + result + } +} + +impl SeccompFilter { + /// Creates a new filter with a set of rules and a default action. + /// + /// # Arguments + /// + /// * `rules` - Map of syscall numbers and the rules that will be applied to each of them. + /// * `default_action` - Action taken for all syscalls that do not match any rule. + pub fn new( + rules: BTreeMap>, + default_action: SeccompAction, + ) -> Result { + // All inserted syscalls must have at least one rule, otherwise BPF code will break. + for (_, value) in rules.iter() { + if value.is_empty() { + return Err(Error::EmptyRulesVector); + } + } + + Ok(Self { + rules, + default_action, + }) + } + + /// Adds rules for the specified syscall in the filter. + /// + /// # Arguments + /// + /// * `syscall_number` - Syscall identifier. + /// * `rules` - Rules to be applied to the syscall. + pub fn add_rules(&mut self, syscall_number: i64, mut rules: Vec) -> Result<()> { + // All inserted syscalls must have at least one rule, otherwise BPF code will break. + if rules.is_empty() { + return Err(Error::EmptyRulesVector); + } + + self.rules + .entry(syscall_number) + .or_insert_with(std::vec::Vec::new) + .append(&mut rules); + + Ok(()) + } + + /// Builds the array of filter instructions and sends them to the kernel. + /// + /// # Arguments + /// + /// * `filters` - BPF program containing the seccomp rules. + pub fn apply(filters: BpfProgram) -> Result<()> { + // If the program is empty, skip this step. + if filters.is_empty() { + return Ok(()); + } + + let mut bpf_filter = Vec::new(); + bpf_filter.extend(VALIDATE_ARCHITECTURE()); + bpf_filter.extend(filters); + + unsafe { + { + let rc = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + if rc != 0 { + return Err(Error::Load(*libc::__errno_location())); + } + } + + let bpf_prog = sock_fprog { + len: bpf_filter.len() as u16, + filter: bpf_filter.as_ptr(), + }; + let bpf_prog_ptr = &bpf_prog as *const sock_fprog; + { + let rc = libc::prctl( + libc::PR_SET_SECCOMP, + libc::SECCOMP_MODE_FILTER, + bpf_prog_ptr, + ); + if rc != 0 { + return Err(Error::Load(*libc::__errno_location())); + } + } + } + + Ok(()) + } + + /// Appends a chain of rules to an accumulator, updating the length of the filter. + /// + /// # Arguments + /// + /// * `syscall_number` - The syscall to which the rules apply. + /// * `chain` - The chain of rules for the specified syscall. + /// * `default_action` - The action to be taken in none of the rules apply. + /// * `accumulator` - The expanding BPF program. + /// * `filter_len` - The size (in number of BPF statements) of the BPF program. This is + /// limited to 4096. If the limit is exceeded, the filter is invalidated. + fn append_syscall_chain( + syscall_number: i64, + chain: Vec, + default_action: u32, + accumulator: &mut Vec>, + filter_len: &mut usize, + ) -> Result<()> { + // The rules of the chain are translated into BPF statements. + let chain: Vec<_> = chain.into_iter().map(SeccompRule::into).collect(); + let chain_len: usize = chain.iter().map(std::vec::Vec::len).sum(); + + // The chain starts with a comparison checking the loaded syscall number against the + // syscall number of the chain. + let mut built_syscall = Vec::with_capacity(1 + chain_len + 1); + built_syscall.push(BPF_JUMP( + BPF_JMP + BPF_JEQ + BPF_K, + syscall_number as u32, + 0, + 1, + )); + + // The rules of the chain are appended. + chain + .into_iter() + .for_each(|mut rule| built_syscall.append(&mut rule)); + + // The default action is appended, if the syscall number comparison matched and then all + // rules fail to match, the default action is reached. + built_syscall.push(BPF_STMT(BPF_RET + BPF_K, default_action)); + + // The chain is appended to the result. + *filter_len += built_syscall.len(); + accumulator.push(built_syscall); + + // BPF programs are limited to 4096 statements. + if *filter_len >= BPF_MAX_LEN { + return Err(Error::FilterTooLarge); + } + + Ok(()) + } + + /// Replaces the seccomp rules so as to allow every syscall contained in the rule set. + pub fn allow_all(mut self) -> SeccompFilter { + // Pre-collect the keys to avoid the double borrow. + let syscalls: Vec = self.rules.keys().cloned().collect(); + for syscall in syscalls { + let ruleset: SyscallRuleSet = allow_syscall(syscall); + self.rules.insert(ruleset.0, ruleset.1); + } + self + } + + /// Creates an empty `SeccompFilter` which allows everything. + pub fn empty() -> SeccompFilter { + Self { + rules: BTreeMap::new(), + default_action: SeccompAction::Allow, + } + } +} + +impl TryInto for SeccompFilter { + type Error = Error; + fn try_into(self) -> Result { + // If no rules are set up, return an empty vector. + if self.rules.is_empty() { + return Ok(vec![]); + } + + // The called syscall number is loaded. + let mut accumulator = Vec::with_capacity(1); + let mut filter_len = 1; + accumulator.push(EXAMINE_SYSCALL()); + + // Orders syscalls by priority, the highest number represents the highest priority. + let mut iter = self.rules.into_iter(); + + // For each syscall adds its rule chain to the filter. + let default_action = u32::from(self.default_action); + iter.try_for_each(|(syscall_number, chain)| { + SeccompFilter::append_syscall_chain( + syscall_number, + chain, + default_action, + &mut accumulator, + &mut filter_len, + ) + })?; + + // The default action is once again appended, it is reached if all syscall number + // comparisons fail. + filter_len += 1; + accumulator.push(vec![BPF_STMT(BPF_RET + BPF_K, default_action)]); + + // Finally, builds the translated filter by consuming the accumulator. + let mut result = Vec::with_capacity(filter_len); + accumulator + .into_iter() + .for_each(|mut instructions| result.append(&mut instructions)); + + Ok(result) + } +} + +/// Builds a `jump` BPF instruction. +/// +/// # Arguments +/// +/// * `code` - The operation code. +/// * `jt` - The jump offset in case the operation returns `true`. +/// * `jf` - The jump offset in case the operation returns `false`. +/// * `k` - The operand. +#[allow(non_snake_case)] +#[inline(always)] +fn BPF_JUMP(code: u16, k: u32, jt: u8, jf: u8) -> sock_filter { + sock_filter { code, jt, jf, k } +} + +/// Builds a "statement" BPF instruction. +/// +/// # Arguments +/// +/// * `code` - The operation code. +/// * `k` - The operand. +#[allow(non_snake_case)] +#[inline(always)] +fn BPF_STMT(code: u16, k: u32) -> sock_filter { + sock_filter { + code, + jt: 0, + jf: 0, + k, + } +} + +/// Builds a sequence of BPF instructions that validate the underlying architecture. +#[allow(non_snake_case)] +#[inline(always)] +fn VALIDATE_ARCHITECTURE() -> Vec { + vec![ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, 4), + #[cfg(target_arch = "x86_64")] + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, AUDIT_ARCH_X86_64, 1, 0), + #[cfg(target_arch = "aarch64")] + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, AUDIT_ARCH_AARCH64, 1, 0), + BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL), + ] +} + +/// Builds a sequence of BPF instructions that are followed by syscall examination. +#[allow(non_snake_case)] +#[inline(always)] +fn EXAMINE_SYSCALL() -> Vec { + vec![BPF_STMT( + BPF_LD + BPF_W + BPF_ABS, + u32::from(SECCOMP_DATA_NR_OFFSET), + )] +} + +/// Possible errors that could be encountered while processing a seccomp level value or generating +/// a BPF program based on it. +#[derive(Debug)] +pub enum SeccompError { + /// Error while trying to generate a BPF program. + SeccompFilter(Error), + /// Failed to parse to `u8`. + Parse(std::num::ParseIntError), + /// Seccomp level is an `u8` value, other than 0, 1 or 2. + Level(u8), +} + +impl Display for SeccompError { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + match *self { + SeccompError::SeccompFilter(ref err) => write!(f, "Seccomp error: {}", err), + SeccompError::Parse(ref err) => write!(f, "Could not parse to 'u8': {}", err), + SeccompError::Level(arg) => write!( + f, + "'{}' isn't a valid value for 'seccomp-level'. Must be 0, 1 or 2.", + arg + ), + } + } +} + +/// Possible values for seccomp level. +#[repr(u8)] +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum SeccompLevel { + /// Seccomp filtering disabled. + None = 0, + /// Level of filtering that causes only syscall numbers to be examined. + Basic = 1, + /// Level of filtering that causes syscall numbers and parameters to be examined. + Advanced = 2, +} + +impl SeccompLevel { + /// Converts from a seccomp level value of type String to the corresponding SeccompLevel variant + /// or returns an error if the parsing failed. + pub fn from_string(seccomp_value: &str) -> std::result::Result { + match seccomp_value.parse::() { + Ok(0) => Ok(SeccompLevel::None), + Ok(1) => Ok(SeccompLevel::Basic), + Ok(2) => Ok(SeccompLevel::Advanced), + Ok(level) => Err(SeccompError::Level(level)), + Err(err) => Err(SeccompError::Parse(err)), + } + } +} From ec8b59c2556e57a91c387c83545350e61b7bcb28 Mon Sep 17 00:00:00 2001 From: MeiK Date: Thu, 28 Mar 2024 15:10:48 +0800 Subject: [PATCH 02/11] update --- src/error.rs | 25 +++++++++++++++++++++++++ src/sys/linux/mod.rs | 42 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/src/error.rs b/src/error.rs index 0803aaf..c98965a 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,4 +1,6 @@ +use libc::strerror; use serde_json::Error as SerdeJsonError; +use std::ffi::{CStr, NulError}; use std::fmt::Formatter; use std::io::Error as IOError; use std::{fmt, result}; @@ -14,6 +16,10 @@ pub enum Error { /// Windows 平台下的 LastError #[cfg(target_os = "windows")] WinError(String, u32, WIN_ERROR), + #[cfg(target_os = "linux")] + LinuxError(String, u32, Option), + #[cfg(target_os = "linux")] + StringToCStringError(NulError), } pub type Result = result::Result; @@ -40,6 +46,25 @@ impl fmt::Display for Error { e.message() ) } + #[cfg(target_os = "linux")] + Error::LinuxError(ref filename, ref line, errno) => { + write!(f, "{}:{}: Error: {}", filename, line, errno_str(errno)) + } + _ => { + write!(f, "{}", self) + } + } + } +} + +#[cfg(target_os = "linux")] +fn errno_str(errno: Option) -> String { + match errno { + Some(no) => { + let stre = unsafe { strerror(no) }; + let c_str: &CStr = unsafe { CStr::from_ptr(stre) }; + c_str.to_str().unwrap().to_string() } + _ => String::from("Unknown Error!"), } } diff --git a/src/sys/linux/mod.rs b/src/sys/linux/mod.rs index 987512e..b66109b 100644 --- a/src/sys/linux/mod.rs +++ b/src/sys/linux/mod.rs @@ -1,9 +1,14 @@ -mod seccomp; +use crate::error::Error::LinuxError; +use std::ptr; use crate::status::Status; use crate::sys::SandboxImpl; use crate::Opts; +mod seccomp; + +const STACK_SIZE: usize = 1024 * 1024; + #[derive(Debug)] pub struct Sandbox { inner_args: Vec, @@ -30,6 +35,41 @@ impl SandboxImpl for Sandbox { unsafe fn run(&mut self) -> crate::error::Result { let status: Status = Default::default(); + let stack = libc::mmap( + ptr::null_mut(), + STACK_SIZE, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_STACK, + -1, + 0, + ); + if stack == libc::MAP_FAILED { + let err = std::io::Error::last_os_error().raw_os_error(); + return Err(LinuxError(String::from(file!()), line!(), err)); + } + + let pid = libc::clone( + runit, + (stack as usize + STACK_SIZE) as *mut libc::c_void, + libc::SIGCHLD + | libc::CLONE_NEWUTS // 设置新的 UTS 名称空间(主机名、网络名等) + | libc::CLONE_NEWNET // 设置新的网络空间,如果没有配置网络,则该沙盒内部将无法联网 + | libc::CLONE_NEWNS // 为沙盒内部设置新的 namespaces 空间 + | libc::CLONE_NEWIPC // IPC 隔离 + | libc::CLONE_NEWCGROUP // 在新的 CGROUP 中创建沙盒 + | libc::CLONE_NEWPID, // 外部进程对沙盒不可见 + self as *mut _ as *mut libc::c_void, + ); Ok(status) } } + +extern "C" fn runit(sandbox: *mut libc::c_void) -> i32 { + let sandbox = unsafe { &mut *(sandbox as *mut Sandbox) }; + println!("{:?}", sandbox); + 0 +} + +fn wait_it(pid: i32) -> Status { + Status::default() +} From 87fd6ff4d9682fc399d840eff7414a96de2abd78 Mon Sep 17 00:00:00 2001 From: MeiK Date: Thu, 28 Mar 2024 17:03:12 +0800 Subject: [PATCH 03/11] update --- src/error.rs | 4 +++ src/sys/linux/mod.rs | 74 ++++++++++++++++++++++++++++++++++++------ src/sys/linux/utils.rs | 28 ++++++++++++++++ 3 files changed, 96 insertions(+), 10 deletions(-) create mode 100644 src/sys/linux/utils.rs diff --git a/src/error.rs b/src/error.rs index c98965a..eeb772a 100644 --- a/src/error.rs +++ b/src/error.rs @@ -10,6 +10,7 @@ use windows::core::Error as WIN_ERROR; #[derive(Debug)] pub enum Error { + S(String), E(String, u32, String), IOError(IOError), SerdeJsonError(SerdeJsonError), @@ -30,6 +31,9 @@ impl fmt::Display for Error { Error::E(ref filename, ref line, ref e) => { write!(f, "{}:{}: Error: {}", filename, line, e) } + Error::S(ref e) => { + write!(f, "{}", e) + } Error::IOError(ref e) => { write!(f, "{}", e) } diff --git a/src/sys/linux/mod.rs b/src/sys/linux/mod.rs index b66109b..73861a2 100644 --- a/src/sys/linux/mod.rs +++ b/src/sys/linux/mod.rs @@ -1,14 +1,28 @@ -use crate::error::Error::LinuxError; +use crate::error::Error::{LinuxError, S}; use std::ptr; +use std::time::Instant; use crate::status::Status; use crate::sys::SandboxImpl; use crate::Opts; mod seccomp; +mod utils; const STACK_SIZE: usize = 1024 * 1024; +#[macro_export] +macro_rules! linux_syscall { + ($expression:expr) => {{ + let ret = $expression; + if ret < 0 { + let err = std::io::Error::last_os_error().raw_os_error(); + return Err(LinuxError(String::from(file!()), line!(), err)); + }; + ret + }}; +} + #[derive(Debug)] pub struct Sandbox { inner_args: Vec, @@ -34,7 +48,6 @@ impl SandboxImpl for Sandbox { } unsafe fn run(&mut self) -> crate::error::Result { - let status: Status = Default::default(); let stack = libc::mmap( ptr::null_mut(), STACK_SIZE, @@ -52,15 +65,23 @@ impl SandboxImpl for Sandbox { runit, (stack as usize + STACK_SIZE) as *mut libc::c_void, libc::SIGCHLD - | libc::CLONE_NEWUTS // 设置新的 UTS 名称空间(主机名、网络名等) - | libc::CLONE_NEWNET // 设置新的网络空间,如果没有配置网络,则该沙盒内部将无法联网 - | libc::CLONE_NEWNS // 为沙盒内部设置新的 namespaces 空间 - | libc::CLONE_NEWIPC // IPC 隔离 - | libc::CLONE_NEWCGROUP // 在新的 CGROUP 中创建沙盒 + | libc::CLONE_NEWUTS // 设置新的 UTS 名称空间(主机名、网络名等) + | libc::CLONE_NEWNET // 设置新的网络空间,如果没有配置网络,则该沙盒内部将无法联网 + | libc::CLONE_NEWNS // 为沙盒内部设置新的 namespaces 空间 + | libc::CLONE_NEWIPC // IPC 隔离 + | libc::CLONE_NEWCGROUP // 在新的 CGROUP 中创建沙盒 | libc::CLONE_NEWPID, // 外部进程对沙盒不可见 self as *mut _ as *mut libc::c_void, ); - Ok(status) + if pid < 0 { + return Err(S(format!("clone failure: {}", pid))); + } + + let status = wait_it(pid); + + linux_syscall!(libc::munmap(stack, STACK_SIZE)); + + status } } @@ -70,6 +91,39 @@ extern "C" fn runit(sandbox: *mut libc::c_void) -> i32 { 0 } -fn wait_it(pid: i32) -> Status { - Status::default() +unsafe fn wait_it(pid: i32) -> crate::error::Result { + let start_time = Instant::now(); + + let mut status: i32 = 0; + let mut rusage: libc::rusage = utils::new_rusage(); + + linux_syscall!(libc::wait4(pid, &mut status, 0, &mut rusage)); + + let cpu_time_used = rusage.ru_utime.tv_sec * 1000 + + i64::from(rusage.ru_utime.tv_usec) / 1000 + + rusage.ru_stime.tv_sec * 1000 + + i64::from(rusage.ru_stime.tv_usec) / 1000; + let memory_used = rusage.ru_maxrss; + let mut exit_code = 0; + let exited = libc::WIFEXITED(status); + if exited { + exit_code = libc::WEXITSTATUS(status); + } + let signal = if libc::WIFSIGNALED(status) { + libc::WTERMSIG(status) + } else if libc::WIFSTOPPED(status) { + libc::WSTOPSIG(status) + } else { + 0 + }; + + let time_used = start_time.elapsed().as_millis(); + Ok(Status { + time_used: time_used as u64, + cpu_time_used: cpu_time_used as u64, + memory_used: memory_used as u64, + exit_code, + status, + signal, + }) } diff --git a/src/sys/linux/utils.rs b/src/sys/linux/utils.rs new file mode 100644 index 0000000..485f6b9 --- /dev/null +++ b/src/sys/linux/utils.rs @@ -0,0 +1,28 @@ +/// 一个全为 `0` 的 `rusage` +#[inline(always)] +pub fn new_rusage() -> libc::rusage { + libc::rusage { + ru_utime: libc::timeval { + tv_sec: 0 as libc::time_t, + tv_usec: 0 as libc::suseconds_t, + }, + ru_stime: libc::timeval { + tv_sec: 0 as libc::time_t, + tv_usec: 0 as libc::suseconds_t, + }, + ru_maxrss: 0 as libc::c_long, + ru_ixrss: 0 as libc::c_long, + ru_idrss: 0 as libc::c_long, + ru_isrss: 0 as libc::c_long, + ru_minflt: 0 as libc::c_long, + ru_majflt: 0 as libc::c_long, + ru_nswap: 0 as libc::c_long, + ru_inblock: 0 as libc::c_long, + ru_oublock: 0 as libc::c_long, + ru_msgsnd: 0 as libc::c_long, + ru_msgrcv: 0 as libc::c_long, + ru_nsignals: 0 as libc::c_long, + ru_nvcsw: 0 as libc::c_long, + ru_nivcsw: 0 as libc::c_long, + } +} From 0195f56f5f2cf94212187b5c785e135c0a805cb7 Mon Sep 17 00:00:00 2001 From: MeiK Date: Thu, 28 Mar 2024 18:45:24 +0800 Subject: [PATCH 04/11] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=BC=82=E5=B8=B8?= =?UTF-8?q?=E5=B1=95=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/sys/linux/mod.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/sys/linux/mod.rs b/src/sys/linux/mod.rs index 73861a2..661bff8 100644 --- a/src/sys/linux/mod.rs +++ b/src/sys/linux/mod.rs @@ -61,7 +61,7 @@ impl SandboxImpl for Sandbox { return Err(LinuxError(String::from(file!()), line!(), err)); } - let pid = libc::clone( + let pid = linux_syscall!(libc::clone( runit, (stack as usize + STACK_SIZE) as *mut libc::c_void, libc::SIGCHLD @@ -72,10 +72,7 @@ impl SandboxImpl for Sandbox { | libc::CLONE_NEWCGROUP // 在新的 CGROUP 中创建沙盒 | libc::CLONE_NEWPID, // 外部进程对沙盒不可见 self as *mut _ as *mut libc::c_void, - ); - if pid < 0 { - return Err(S(format!("clone failure: {}", pid))); - } + )); let status = wait_it(pid); From 90bf6a804e090afda876970be7cd503574255da4 Mon Sep 17 00:00:00 2001 From: MeiK Date: Fri, 29 Mar 2024 16:23:26 +0800 Subject: [PATCH 05/11] update --- src/sys/linux/mod.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/sys/linux/mod.rs b/src/sys/linux/mod.rs index 661bff8..8d2a101 100644 --- a/src/sys/linux/mod.rs +++ b/src/sys/linux/mod.rs @@ -1,6 +1,7 @@ use crate::error::Error::{LinuxError, S}; use std::ptr; use std::time::Instant; +use libc::pid_t; use crate::status::Status; use crate::sys::SandboxImpl; @@ -85,6 +86,23 @@ impl SandboxImpl for Sandbox { extern "C" fn runit(sandbox: *mut libc::c_void) -> i32 { let sandbox = unsafe { &mut *(sandbox as *mut Sandbox) }; println!("{:?}", sandbox); + + let pid = unsafe { linux_syscall!(libc::fork()) }; + + if pid > 0 { + // 父进程 + runit_parent(pid) + } else { + // 子进程 + runit_child() + } +} + +fn runit_parent(pid: pid_t) -> i32 { + 0 +} + +fn runit_child() -> i32 { 0 } From c9a83e85b59fd592fc8384c2e7a557b57e169c97 Mon Sep 17 00:00:00 2001 From: MeiK Date: Fri, 29 Mar 2024 16:34:25 +0800 Subject: [PATCH 06/11] update --- src/sys/linux/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sys/linux/mod.rs b/src/sys/linux/mod.rs index 8d2a101..e6363a3 100644 --- a/src/sys/linux/mod.rs +++ b/src/sys/linux/mod.rs @@ -1,7 +1,7 @@ use crate::error::Error::{LinuxError, S}; +use libc::pid_t; use std::ptr; use std::time::Instant; -use libc::pid_t; use crate::status::Status; use crate::sys::SandboxImpl; @@ -87,7 +87,7 @@ extern "C" fn runit(sandbox: *mut libc::c_void) -> i32 { let sandbox = unsafe { &mut *(sandbox as *mut Sandbox) }; println!("{:?}", sandbox); - let pid = unsafe { linux_syscall!(libc::fork()) }; + let pid = unsafe { libc::fork() }; if pid > 0 { // 父进程 From 1ec4af77485da4f70443b3f38ac70adde67d8605 Mon Sep 17 00:00:00 2001 From: MeiK Date: Thu, 11 Apr 2024 14:28:50 +0800 Subject: [PATCH 07/11] update --- src/sys/linux/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sys/linux/mod.rs b/src/sys/linux/mod.rs index e6363a3..208ee2e 100644 --- a/src/sys/linux/mod.rs +++ b/src/sys/linux/mod.rs @@ -66,6 +66,7 @@ impl SandboxImpl for Sandbox { runit, (stack as usize + STACK_SIZE) as *mut libc::c_void, libc::SIGCHLD + | libc::CLONE_NEWUSER // 在 namespaces 空间内使用新的用户 | libc::CLONE_NEWUTS // 设置新的 UTS 名称空间(主机名、网络名等) | libc::CLONE_NEWNET // 设置新的网络空间,如果没有配置网络,则该沙盒内部将无法联网 | libc::CLONE_NEWNS // 为沙盒内部设置新的 namespaces 空间 @@ -83,6 +84,9 @@ impl SandboxImpl for Sandbox { } } +/** + * 从这里开始主流程将无法获取函数返回值等信息,因此有异常就直接 panic 退出 + */ extern "C" fn runit(sandbox: *mut libc::c_void) -> i32 { let sandbox = unsafe { &mut *(sandbox as *mut Sandbox) }; println!("{:?}", sandbox); From 0e1bb498907708142da0bdbcac5eb8e776810fdc Mon Sep 17 00:00:00 2001 From: MeiK Date: Thu, 11 Apr 2024 16:11:01 +0800 Subject: [PATCH 08/11] =?UTF-8?q?=E6=94=AF=E6=8C=81=E8=B5=84=E6=BA=90?= =?UTF-8?q?=E9=99=90=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 +++ resources/runit.s | 27 ++++++++++ src/error.rs | 2 +- src/sys/linux/mod.rs | 117 +++++++++++++++++++++++++++++++++++++--- src/sys/linux/utils.rs | 118 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 261 insertions(+), 9 deletions(-) create mode 100644 resources/runit.s diff --git a/README.md b/README.md index d57aa3d..fac214d 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,12 @@ Options: Print version ``` +**在 linux 环境下,需要额外安装 `runit`:** + +```shell +$ gcc resources/runit.s -o /usr/bin/runit +``` + ## 结果 结果的格式为 JSON diff --git a/resources/runit.s b/resources/runit.s new file mode 100644 index 0000000..d345e71 --- /dev/null +++ b/resources/runit.s @@ -0,0 +1,27 @@ + .text + .globl main +main: + endbr64 + pushq %rbp + movq %rsp, %rbp + subq $48, %rsp + movl %edi, -20(%rbp) + movq %rsi, -32(%rbp) + movq %rdx, -40(%rbp) + call fork@PLT + movl %eax, -4(%rbp) + cmpl $0, -4(%rbp) + jne .L2 + movq -32(%rbp), %rax + leaq 8(%rax), %rcx + movq -32(%rbp), %rax + addq $8, %rax + movq (%rax), %rax + movq -40(%rbp), %rdx + movq %rcx, %rsi + movq %rax, %rdi + call execve@PLT +.L2: + movl $0, %eax + leave + ret diff --git a/src/error.rs b/src/error.rs index eeb772a..0710233 100644 --- a/src/error.rs +++ b/src/error.rs @@ -62,7 +62,7 @@ impl fmt::Display for Error { } #[cfg(target_os = "linux")] -fn errno_str(errno: Option) -> String { +pub fn errno_str(errno: Option) -> String { match errno { Some(no) => { let stre = unsafe { strerror(no) }; diff --git a/src/sys/linux/mod.rs b/src/sys/linux/mod.rs index 208ee2e..b56bb33 100644 --- a/src/sys/linux/mod.rs +++ b/src/sys/linux/mod.rs @@ -1,11 +1,14 @@ use crate::error::Error::{LinuxError, S}; use libc::pid_t; +use std::ffi::CString; +use std::path::Path; use std::ptr; use std::time::Instant; use crate::status::Status; +use crate::sys::linux::utils::{last_err, ExecArgs}; use crate::sys::SandboxImpl; -use crate::Opts; +use crate::{syscall_or_panic, Opts}; mod seccomp; mod utils; @@ -33,6 +36,7 @@ pub struct Sandbox { input: Option, output: Option, error: Option, + file_size_limit: Option, } impl SandboxImpl for Sandbox { @@ -45,6 +49,7 @@ impl SandboxImpl for Sandbox { input: opts.input, output: opts.output, error: opts.error, + file_size_limit: Some(0), } } @@ -66,7 +71,7 @@ impl SandboxImpl for Sandbox { runit, (stack as usize + STACK_SIZE) as *mut libc::c_void, libc::SIGCHLD - | libc::CLONE_NEWUSER // 在 namespaces 空间内使用新的用户 + | libc::CLONE_NEWUSER // 在 namespaces 空间内使用新的用户,这允许我们在不使用 root 用户的情况下创建新的 namespaces 空间 | libc::CLONE_NEWUTS // 设置新的 UTS 名称空间(主机名、网络名等) | libc::CLONE_NEWNET // 设置新的网络空间,如果没有配置网络,则该沙盒内部将无法联网 | libc::CLONE_NEWNS // 为沙盒内部设置新的 namespaces 空间 @@ -89,25 +94,121 @@ impl SandboxImpl for Sandbox { */ extern "C" fn runit(sandbox: *mut libc::c_void) -> i32 { let sandbox = unsafe { &mut *(sandbox as *mut Sandbox) }; - println!("{:?}", sandbox); + + // 判断 runit 是否存在 + let runit_exists = Path::new("/usr/bin/runit").exists(); let pid = unsafe { libc::fork() }; if pid > 0 { // 父进程 - runit_parent(pid) + runit_parent(&sandbox, pid, runit_exists) } else { // 子进程 - runit_child() + runit_child(&sandbox, runit_exists) } } -fn runit_parent(pid: pid_t) -> i32 { +fn runit_parent(sandbox: &Sandbox, pid: pid_t, runit_exists: bool) -> i32 { 0 } -fn runit_child() -> i32 { - 0 +fn runit_child(sandbox: &Sandbox, runit_exists: bool) -> i32 { + // 进行资源与安全限制等 + let mut rlimit = libc::rlimit { + rlim_cur: 0, + rlim_max: 0, + }; + // CPU 时间限制,单位为 S + if let Some(time_limit) = sandbox.time_limit { + rlimit.rlim_cur = (time_limit / 1000 + 1) as u64; + if time_limit % 1000 > 800 { + rlimit.rlim_cur += 1; + } + rlimit.rlim_max = rlimit.rlim_cur; + unsafe { + syscall_or_panic!( + libc::setrlimit(libc::RLIMIT_CPU, &rlimit), + "setrlimit RLIMIT_CPU" + ) + }; + } + // 内存限制,单位为 kib + if let Some(memory_limit) = sandbox.memory_limit { + rlimit.rlim_cur = memory_limit as u64 * 1024 * 2; + rlimit.rlim_max = memory_limit as u64 * 1024 * 2; + unsafe { + syscall_or_panic!( + libc::setrlimit(libc::RLIMIT_AS, &rlimit), + "setrlimit RLIMIT_AS" + ) + }; + + rlimit.rlim_cur = memory_limit as u64 * 1024 * 2; + rlimit.rlim_max = memory_limit as u64 * 1024 * 2; + unsafe { + syscall_or_panic!( + libc::setrlimit(libc::RLIMIT_STACK, &rlimit), + "setrlimit RLIMIT_STACK" + ) + }; + } + // 文件大小限制,单位为 bit + if let Some(file_size_limit) = sandbox.file_size_limit { + rlimit.rlim_cur = file_size_limit as u64; + rlimit.rlim_max = file_size_limit as u64; + unsafe { + syscall_or_panic!( + libc::setrlimit(libc::RLIMIT_FSIZE, &rlimit), + "setrlimit RLIMIT_FSIZE" + ) + }; + } + // 重定向输入输出流 + if let Some(file) = &sandbox.input { + let f = CString::new(file).unwrap(); + let fd = unsafe { + syscall_or_panic!( + libc::open(f.as_ptr(), libc::O_RDONLY, 0o644), + format!("open input file `{}`", file) + ) + }; + unsafe { syscall_or_panic!(libc::dup2(fd, libc::STDIN_FILENO), "dup2 stdin") }; + } + if let Some(file) = &sandbox.output { + let f = CString::new(file).unwrap(); + let fd = unsafe { + syscall_or_panic!( + libc::open(f.as_ptr(), libc::O_CREAT | libc::O_RDWR, 0o644), + format!("open output file `{}`", file) + ) + }; + unsafe { syscall_or_panic!(libc::dup2(fd, libc::STDOUT_FILENO), "dup2 stdout") }; + } + if let Some(file) = &sandbox.error { + let f = CString::new(file).unwrap(); + let fd = unsafe { + syscall_or_panic!( + libc::open(f.as_ptr(), libc::O_CREAT | libc::O_RDWR, 0o644), + format!("open error file `{}`", file) + ) + }; + unsafe { syscall_or_panic!(libc::dup2(fd, libc::STDERR_FILENO), "dup2 stderr") }; + } + + let exec_args = if !runit_exists { + ExecArgs::build(&sandbox.inner_args) + } else { + ExecArgs::build(&sandbox.inner_args) + } + .unwrap(); + + unsafe { + syscall_or_panic!( + libc::execve(exec_args.pathname, exec_args.argv, exec_args.envp), + "execve" + ) + } } unsafe fn wait_it(pid: i32) -> crate::error::Result { diff --git a/src/sys/linux/utils.rs b/src/sys/linux/utils.rs index 485f6b9..4f41071 100644 --- a/src/sys/linux/utils.rs +++ b/src/sys/linux/utils.rs @@ -1,3 +1,12 @@ +use std::collections::HashMap; +use std::ffi::{CStr, CString}; +use std::mem; +use std::ptr; + +use libc; + +use crate::error::{errno_str, Result}; + /// 一个全为 `0` 的 `rusage` #[inline(always)] pub fn new_rusage() -> libc::rusage { @@ -26,3 +35,112 @@ pub fn new_rusage() -> libc::rusage { ru_nivcsw: 0 as libc::c_long, } } + +pub fn last_err() -> String { + errno_str(std::io::Error::last_os_error().raw_os_error()) +} + +#[macro_export] +macro_rules! string_to_cstring { + ($expression:expr) => { + match CString::new($expression) { + Ok(value) => value, + Err(err) => return Err(crate::error::Error::StringToCStringError(err)), + } + }; +} + +/// 执行指定的系统调用,如果返回值小于 0,则抛出异常并结束进程 +#[macro_export] +macro_rules! syscall_or_panic { + ($expression:expr, $syscall:expr) => {{ + let ret = $expression; + if ret < 0 { + let last_err = last_err(); + panic!( + "{file}:{line}: {message}\n ret = {ret}, err = {last_err}", + file = file!(), + line = line!(), + message = $syscall + ); + }; + ret + }}; +} + +pub struct ExecArgs { + pub pathname: *const libc::c_char, + pub argv: *const *const libc::c_char, + pub envp: *const *const libc::c_char, + args: usize, + envs: usize, +} + +impl ExecArgs { + pub fn build(args: &Vec) -> Result { + let pathname = args[0].clone(); + let pathname_str = string_to_cstring!(pathname); + let pathname = pathname_str.as_ptr(); + + let mut argv_vec: Vec<*const libc::c_char> = vec![]; + for item in args.iter() { + let cstr = string_to_cstring!(item.clone()); + let cptr = cstr.as_ptr(); + // 需要使用 mem::forget 来标记 + // 否则在此次循环结束后,cstr 就会被回收,后续 exec 函数无法通过指针获取到字符串内容 + mem::forget(cstr); + argv_vec.push(cptr); + } + // argv 与 envp 的参数需要使用 NULL 来标记结束 + argv_vec.push(ptr::null()); + let argv: *const *const libc::c_char = argv_vec.as_ptr() as *const *const libc::c_char; + + // env 传递环境变量 + let mut envp_vec: Vec<*const libc::c_char> = vec![]; + envp_vec.push(ptr::null()); + let envp = envp_vec.as_ptr() as *const *const libc::c_char; + + mem::forget(pathname_str); + mem::forget(argv_vec); + mem::forget(envp_vec); + Ok(ExecArgs { + pathname, + argv, + args: args.len(), + envp, + envs: envp_vec.len(), + }) + } +} + +impl Drop for ExecArgs { + fn drop(&mut self) { + // 将 forget 的内存重新获取,并释放 + let c_string = unsafe { CString::from_raw(self.pathname as *mut i8) }; + drop(c_string); + let argv = unsafe { + Vec::from_raw_parts( + self.argv as *mut *const libc::c_void, + self.args - 1, + self.args - 1, + ) + }; + for arg in &argv { + let c_string = unsafe { CString::from_raw(*arg as *mut i8) }; + drop(c_string); + } + drop(argv); + let envp = unsafe { + Vec::from_raw_parts( + self.envp as *mut *const libc::c_void, + self.envs - 1, + self.envs - 1, + ) + }; + for env in &envp { + let c_string = unsafe { CString::from_raw(*env as *mut i8) }; + drop(c_string); + } + drop(envp); + } +} From 05381f242a8a4f4dafaba9a3f1f8584488bcabbf Mon Sep 17 00:00:00 2001 From: MeiK Date: Thu, 11 Apr 2024 16:15:50 +0800 Subject: [PATCH 09/11] fix --- src/sys/linux/mod.rs | 6 +++--- src/sys/linux/utils.rs | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/sys/linux/mod.rs b/src/sys/linux/mod.rs index b56bb33..173c68b 100644 --- a/src/sys/linux/mod.rs +++ b/src/sys/linux/mod.rs @@ -166,7 +166,7 @@ fn runit_child(sandbox: &Sandbox, runit_exists: bool) -> i32 { } // 重定向输入输出流 if let Some(file) = &sandbox.input { - let f = CString::new(file).unwrap(); + let f = CString::new(file.clone()).unwrap(); let fd = unsafe { syscall_or_panic!( libc::open(f.as_ptr(), libc::O_RDONLY, 0o644), @@ -176,7 +176,7 @@ fn runit_child(sandbox: &Sandbox, runit_exists: bool) -> i32 { unsafe { syscall_or_panic!(libc::dup2(fd, libc::STDIN_FILENO), "dup2 stdin") }; } if let Some(file) = &sandbox.output { - let f = CString::new(file).unwrap(); + let f = CString::new(file.clone()).unwrap(); let fd = unsafe { syscall_or_panic!( libc::open(f.as_ptr(), libc::O_CREAT | libc::O_RDWR, 0o644), @@ -186,7 +186,7 @@ fn runit_child(sandbox: &Sandbox, runit_exists: bool) -> i32 { unsafe { syscall_or_panic!(libc::dup2(fd, libc::STDOUT_FILENO), "dup2 stdout") }; } if let Some(file) = &sandbox.error { - let f = CString::new(file).unwrap(); + let f = CString::new(file.clone()).unwrap(); let fd = unsafe { syscall_or_panic!( libc::open(f.as_ptr(), libc::O_CREAT | libc::O_RDWR, 0o644), diff --git a/src/sys/linux/utils.rs b/src/sys/linux/utils.rs index 4f41071..2ab15d8 100644 --- a/src/sys/linux/utils.rs +++ b/src/sys/linux/utils.rs @@ -98,6 +98,7 @@ impl ExecArgs { // env 传递环境变量 let mut envp_vec: Vec<*const libc::c_char> = vec![]; envp_vec.push(ptr::null()); + let envs = envp_vec.len(); let envp = envp_vec.as_ptr() as *const *const libc::c_char; mem::forget(pathname_str); @@ -108,7 +109,7 @@ impl ExecArgs { argv, args: args.len(), envp, - envs: envp_vec.len(), + envs, }) } } From 91b7a81b9b84bdae804d7fd73a9ce8076aaf2314 Mon Sep 17 00:00:00 2001 From: MeiK Date: Tue, 30 Jul 2024 16:44:58 +0800 Subject: [PATCH 10/11] update --- .github/workflows/CI.yml | 169 +++++ .gitignore | 1 + Cargo.toml | 33 +- pyproject.toml | 15 + src/error.rs | 74 -- src/lib.rs | 29 + src/main.rs | 98 --- src/status.rs | 49 -- src/sys/linux/mod.rs | 249 ------- src/sys/linux/seccomp.rs | 1170 -------------------------------- src/sys/linux/utils.rs | 147 ---- src/sys/macos/mod.rs | 9 - src/sys/mod.rs | 20 - src/sys/windows/mod.rs | 341 ---------- src/sys/windows/utils.rs | 55 -- tests/test.py | 7 + tests/windows/output/output.rs | 3 - tests/windows/sleep/sleep.rs | 7 - tests/windows/stderr/stderr.rs | 3 - tests/windows/stdin/stdin.rs | 7 - 20 files changed, 228 insertions(+), 2258 deletions(-) create mode 100644 .github/workflows/CI.yml create mode 100644 pyproject.toml delete mode 100644 src/error.rs create mode 100644 src/lib.rs delete mode 100644 src/main.rs delete mode 100644 src/status.rs delete mode 100644 src/sys/linux/mod.rs delete mode 100644 src/sys/linux/seccomp.rs delete mode 100644 src/sys/linux/utils.rs delete mode 100644 src/sys/macos/mod.rs delete mode 100644 src/sys/mod.rs delete mode 100644 src/sys/windows/mod.rs delete mode 100644 src/sys/windows/utils.rs create mode 100644 tests/test.py delete mode 100644 tests/windows/output/output.rs delete mode 100644 tests/windows/sleep/sleep.rs delete mode 100644 tests/windows/stderr/stderr.rs delete mode 100644 tests/windows/stdin/stdin.rs diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..0e66a34 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,169 @@ +# This file is autogenerated by maturin v1.7.0 +# To update, run +# +# maturin generate-ci github +# +name: CI + +on: + push: + branches: + - main + - master + tags: + - '*' + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + linux: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: ubuntu-latest + target: x86_64 + - runner: ubuntu-latest + target: x86 + - runner: ubuntu-latest + target: aarch64 + - runner: ubuntu-latest + target: armv7 + - runner: ubuntu-latest + target: s390x + - runner: ubuntu-latest + target: ppc64le + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + manylinux: auto + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-linux-${{ matrix.platform.target }} + path: dist + + musllinux: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: ubuntu-latest + target: x86_64 + - runner: ubuntu-latest + target: x86 + - runner: ubuntu-latest + target: aarch64 + - runner: ubuntu-latest + target: armv7 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + manylinux: musllinux_1_2 + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-musllinux-${{ matrix.platform.target }} + path: dist + + windows: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: windows-latest + target: x64 + - runner: windows-latest + target: x86 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + architecture: ${{ matrix.platform.target }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-windows-${{ matrix.platform.target }} + path: dist + + macos: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: macos-12 + target: x86_64 + - runner: macos-14 + target: aarch64 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-macos-${{ matrix.platform.target }} + path: dist + + sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Build sdist + uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out dist + - name: Upload sdist + uses: actions/upload-artifact@v4 + with: + name: wheels-sdist + path: dist + + release: + name: Release + runs-on: ubuntu-latest + if: "startsWith(github.ref, 'refs/tags/')" + needs: [linux, musllinux, windows, macos, sdist] + steps: + - uses: actions/download-artifact@v4 + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + with: + command: upload + args: --non-interactive --skip-existing wheels-*/* diff --git a/.gitignore b/.gitignore index 474741a..e839429 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ Cargo.lock .vscode *.exe .cargo +venv diff --git a/Cargo.toml b/Cargo.toml index e081d77..860a29f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,30 +3,11 @@ name = "river" version = "0.1.0" edition = "2021" -[dependencies] -clap = { version = "4.5.1", features = ["derive"] } -clap-verbosity-flag = "2.2.0" -log = "0.4.20" -env_logger = "0.10.2" -tempfile = "3.10.0" -serde = { version = "1.0.197", features = ["derive"] } -serde_json = "1.0.114" -libc = "0.2.153" +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "river" +crate-type = ["cdylib"] -[target.'cfg(windows)'.dependencies.windows] -version = "0.53.0" -features = [ - "Win32_Storage", - "Win32_Storage_FileSystem", - "Win32_System_Ioctl", - "Win32_System_IO", - "Win32_Foundation", - "Win32_Security", - "Win32_System_JobObjects", - "Win32_System_Threading", - "Win32_UI_WindowsAndMessaging", - "Win32_System_Diagnostics", - "Win32_System_Diagnostics_Debug", - "Win32_System_ProcessStatus", - "Win32_System_WindowsProgramming" -] +[dependencies] +pyo3 = "0.22.0" +libc = "0.2.155" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0fdf696 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["maturin>=1.7,<2.0"] +build-backend = "maturin" + +[project] +name = "river" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/src/error.rs b/src/error.rs deleted file mode 100644 index 0710233..0000000 --- a/src/error.rs +++ /dev/null @@ -1,74 +0,0 @@ -use libc::strerror; -use serde_json::Error as SerdeJsonError; -use std::ffi::{CStr, NulError}; -use std::fmt::Formatter; -use std::io::Error as IOError; -use std::{fmt, result}; - -#[cfg(target_os = "windows")] -use windows::core::Error as WIN_ERROR; - -#[derive(Debug)] -pub enum Error { - S(String), - E(String, u32, String), - IOError(IOError), - SerdeJsonError(SerdeJsonError), - /// Windows 平台下的 LastError - #[cfg(target_os = "windows")] - WinError(String, u32, WIN_ERROR), - #[cfg(target_os = "linux")] - LinuxError(String, u32, Option), - #[cfg(target_os = "linux")] - StringToCStringError(NulError), -} - -pub type Result = result::Result; - -impl fmt::Display for Error { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match *self { - Error::E(ref filename, ref line, ref e) => { - write!(f, "{}:{}: Error: {}", filename, line, e) - } - Error::S(ref e) => { - write!(f, "{}", e) - } - Error::IOError(ref e) => { - write!(f, "{}", e) - } - Error::SerdeJsonError(ref e) => { - write!(f, "{}", e) - } - #[cfg(target_os = "windows")] - Error::WinError(ref filename, ref line, ref e) => { - write!( - f, - "{}:{}: Windows API Error: {}", - filename, - line, - e.message() - ) - } - #[cfg(target_os = "linux")] - Error::LinuxError(ref filename, ref line, errno) => { - write!(f, "{}:{}: Error: {}", filename, line, errno_str(errno)) - } - _ => { - write!(f, "{}", self) - } - } - } -} - -#[cfg(target_os = "linux")] -pub fn errno_str(errno: Option) -> String { - match errno { - Some(no) => { - let stre = unsafe { strerror(no) }; - let c_str: &CStr = unsafe { CStr::from_ptr(stre) }; - c_str.to_str().unwrap().to_string() - } - _ => String::from("Unknown Error!"), - } -} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..d1042b4 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,29 @@ +use pyo3::prelude::*; + +#[pyclass] +struct River { + file: String, +} + +#[pymethods] +impl River { + #[new] + fn new(file: String) -> Self { + Self { file } + } + + #[getter] + fn val(&self) -> String { + self.file.to_string() + } + + fn __str__(&self) -> String { + self.file.to_string() + } +} + +#[pymodule] +fn river(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + Ok(()) +} diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 7175441..0000000 --- a/src/main.rs +++ /dev/null @@ -1,98 +0,0 @@ -use clap::Parser; -use clap_verbosity_flag::Verbosity; -use env_logger::Builder; -use log::{error, trace}; - -#[cfg(target_os = "linux")] -use sys::linux::Sandbox; -#[cfg(target_os = "macos")] -use sys::macos::Sandbox; -#[cfg(target_os = "windows")] -use sys::windows::Sandbox; - -use crate::sys::SandboxImpl; - -mod error; -mod status; -mod sys; - -/// example: `river -vvv -- /usr/bin/echo hello world` -#[derive(Parser, Debug)] -#[clap(version = "1.0.0", author = "MeiK ")] -pub struct Opts { - /// Input stream. The default value is STDIN(0) - #[clap(short, long)] - input: Option, - - /// Output stream. The default value is STDOUT(1) - #[clap(short, long)] - output: Option, - - /// Error stream. The default value is STDERR(2) - #[clap(short, long)] - error: Option, - - /// Output location of the running result. The default value is STDOUT(1) - #[clap(short, long)] - result: Option, - - /// Time limit, in ms. The default value is unlimited. - #[clap(short, long)] - time_limit: Option, - - /// CPU Time limit, in ms. The default value is unlimited. - #[clap(short, long)] - cpu_time_limit: Option, - - /// Memory limit, in kib. The default value is unlimited. - #[clap(short, long)] - memory_limit: Option, - - /// Program to run and command line arguments - #[clap(last(true), required = true)] - command: Vec, - - /// A level of verbosity, and can be used multiple times - #[command(flatten)] - verbose: Verbosity, -} - -impl Default for Opts { - fn default() -> Self { - Opts { - input: None, - output: None, - error: None, - result: None, - time_limit: None, - cpu_time_limit: None, - memory_limit: None, - command: vec![], - verbose: Default::default(), - } - } -} - -fn main() { - let opts: Opts = Opts::parse(); - - Builder::new() - .filter_level(opts.verbose.log_level_filter()) - .init(); - - trace!("{:?}", opts); - let result = opts.result.clone(); - let status = unsafe { Sandbox::with_opts(opts).run() }; - match status { - Ok(val) => { - if let Err(e) = val.write(result) { - error!("{}", e); - std::process::exit(1); - } - } - Err(e) => { - error!("{}", e); - std::process::exit(1); - } - } -} diff --git a/src/status.rs b/src/status.rs deleted file mode 100644 index 8b24b17..0000000 --- a/src/status.rs +++ /dev/null @@ -1,49 +0,0 @@ -use std::fmt; -use std::fmt::Formatter; -use std::fs; - -use serde::{Deserialize, Serialize}; - -use crate::error::Error::{IOError, SerdeJsonError}; -use crate::error::Result; - -#[derive(Debug, Serialize, Deserialize)] -pub struct Status { - pub time_used: u64, - pub cpu_time_used: u64, - pub memory_used: u64, - pub exit_code: i32, - pub status: i32, - pub signal: i32, -} - -impl Status { - pub fn write(&self, file: Option) -> Result<()> { - let json_str = serde_json::to_string_pretty(&self).map_err(|e| SerdeJsonError(e))?; - if let Some(f) = file { - fs::write(f, format!("{}", json_str)).map_err(|e| IOError(e))?; - } else { - println!("{}", json_str); - }; - Ok(()) - } -} - -impl fmt::Display for Status { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "{:?}", self) - } -} - -impl Default for Status { - fn default() -> Self { - Status { - time_used: 0, - cpu_time_used: 0, - memory_used: 0, - exit_code: 0, - status: 0, - signal: 0, - } - } -} diff --git a/src/sys/linux/mod.rs b/src/sys/linux/mod.rs deleted file mode 100644 index 173c68b..0000000 --- a/src/sys/linux/mod.rs +++ /dev/null @@ -1,249 +0,0 @@ -use crate::error::Error::{LinuxError, S}; -use libc::pid_t; -use std::ffi::CString; -use std::path::Path; -use std::ptr; -use std::time::Instant; - -use crate::status::Status; -use crate::sys::linux::utils::{last_err, ExecArgs}; -use crate::sys::SandboxImpl; -use crate::{syscall_or_panic, Opts}; - -mod seccomp; -mod utils; - -const STACK_SIZE: usize = 1024 * 1024; - -#[macro_export] -macro_rules! linux_syscall { - ($expression:expr) => {{ - let ret = $expression; - if ret < 0 { - let err = std::io::Error::last_os_error().raw_os_error(); - return Err(LinuxError(String::from(file!()), line!(), err)); - }; - ret - }}; -} - -#[derive(Debug)] -pub struct Sandbox { - inner_args: Vec, - time_limit: Option, - cpu_time_limit: Option, - memory_limit: Option, - input: Option, - output: Option, - error: Option, - file_size_limit: Option, -} - -impl SandboxImpl for Sandbox { - fn with_opts(opts: Opts) -> Self { - Sandbox { - inner_args: opts.command, - time_limit: opts.time_limit, - cpu_time_limit: opts.cpu_time_limit, - memory_limit: opts.memory_limit, - input: opts.input, - output: opts.output, - error: opts.error, - file_size_limit: Some(0), - } - } - - unsafe fn run(&mut self) -> crate::error::Result { - let stack = libc::mmap( - ptr::null_mut(), - STACK_SIZE, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_STACK, - -1, - 0, - ); - if stack == libc::MAP_FAILED { - let err = std::io::Error::last_os_error().raw_os_error(); - return Err(LinuxError(String::from(file!()), line!(), err)); - } - - let pid = linux_syscall!(libc::clone( - runit, - (stack as usize + STACK_SIZE) as *mut libc::c_void, - libc::SIGCHLD - | libc::CLONE_NEWUSER // 在 namespaces 空间内使用新的用户,这允许我们在不使用 root 用户的情况下创建新的 namespaces 空间 - | libc::CLONE_NEWUTS // 设置新的 UTS 名称空间(主机名、网络名等) - | libc::CLONE_NEWNET // 设置新的网络空间,如果没有配置网络,则该沙盒内部将无法联网 - | libc::CLONE_NEWNS // 为沙盒内部设置新的 namespaces 空间 - | libc::CLONE_NEWIPC // IPC 隔离 - | libc::CLONE_NEWCGROUP // 在新的 CGROUP 中创建沙盒 - | libc::CLONE_NEWPID, // 外部进程对沙盒不可见 - self as *mut _ as *mut libc::c_void, - )); - - let status = wait_it(pid); - - linux_syscall!(libc::munmap(stack, STACK_SIZE)); - - status - } -} - -/** - * 从这里开始主流程将无法获取函数返回值等信息,因此有异常就直接 panic 退出 - */ -extern "C" fn runit(sandbox: *mut libc::c_void) -> i32 { - let sandbox = unsafe { &mut *(sandbox as *mut Sandbox) }; - - // 判断 runit 是否存在 - let runit_exists = Path::new("/usr/bin/runit").exists(); - - let pid = unsafe { libc::fork() }; - - if pid > 0 { - // 父进程 - runit_parent(&sandbox, pid, runit_exists) - } else { - // 子进程 - runit_child(&sandbox, runit_exists) - } -} - -fn runit_parent(sandbox: &Sandbox, pid: pid_t, runit_exists: bool) -> i32 { - 0 -} - -fn runit_child(sandbox: &Sandbox, runit_exists: bool) -> i32 { - // 进行资源与安全限制等 - let mut rlimit = libc::rlimit { - rlim_cur: 0, - rlim_max: 0, - }; - // CPU 时间限制,单位为 S - if let Some(time_limit) = sandbox.time_limit { - rlimit.rlim_cur = (time_limit / 1000 + 1) as u64; - if time_limit % 1000 > 800 { - rlimit.rlim_cur += 1; - } - rlimit.rlim_max = rlimit.rlim_cur; - unsafe { - syscall_or_panic!( - libc::setrlimit(libc::RLIMIT_CPU, &rlimit), - "setrlimit RLIMIT_CPU" - ) - }; - } - // 内存限制,单位为 kib - if let Some(memory_limit) = sandbox.memory_limit { - rlimit.rlim_cur = memory_limit as u64 * 1024 * 2; - rlimit.rlim_max = memory_limit as u64 * 1024 * 2; - unsafe { - syscall_or_panic!( - libc::setrlimit(libc::RLIMIT_AS, &rlimit), - "setrlimit RLIMIT_AS" - ) - }; - - rlimit.rlim_cur = memory_limit as u64 * 1024 * 2; - rlimit.rlim_max = memory_limit as u64 * 1024 * 2; - unsafe { - syscall_or_panic!( - libc::setrlimit(libc::RLIMIT_STACK, &rlimit), - "setrlimit RLIMIT_STACK" - ) - }; - } - // 文件大小限制,单位为 bit - if let Some(file_size_limit) = sandbox.file_size_limit { - rlimit.rlim_cur = file_size_limit as u64; - rlimit.rlim_max = file_size_limit as u64; - unsafe { - syscall_or_panic!( - libc::setrlimit(libc::RLIMIT_FSIZE, &rlimit), - "setrlimit RLIMIT_FSIZE" - ) - }; - } - // 重定向输入输出流 - if let Some(file) = &sandbox.input { - let f = CString::new(file.clone()).unwrap(); - let fd = unsafe { - syscall_or_panic!( - libc::open(f.as_ptr(), libc::O_RDONLY, 0o644), - format!("open input file `{}`", file) - ) - }; - unsafe { syscall_or_panic!(libc::dup2(fd, libc::STDIN_FILENO), "dup2 stdin") }; - } - if let Some(file) = &sandbox.output { - let f = CString::new(file.clone()).unwrap(); - let fd = unsafe { - syscall_or_panic!( - libc::open(f.as_ptr(), libc::O_CREAT | libc::O_RDWR, 0o644), - format!("open output file `{}`", file) - ) - }; - unsafe { syscall_or_panic!(libc::dup2(fd, libc::STDOUT_FILENO), "dup2 stdout") }; - } - if let Some(file) = &sandbox.error { - let f = CString::new(file.clone()).unwrap(); - let fd = unsafe { - syscall_or_panic!( - libc::open(f.as_ptr(), libc::O_CREAT | libc::O_RDWR, 0o644), - format!("open error file `{}`", file) - ) - }; - unsafe { syscall_or_panic!(libc::dup2(fd, libc::STDERR_FILENO), "dup2 stderr") }; - } - - let exec_args = if !runit_exists { - ExecArgs::build(&sandbox.inner_args) - } else { - ExecArgs::build(&sandbox.inner_args) - } - .unwrap(); - - unsafe { - syscall_or_panic!( - libc::execve(exec_args.pathname, exec_args.argv, exec_args.envp), - "execve" - ) - } -} - -unsafe fn wait_it(pid: i32) -> crate::error::Result { - let start_time = Instant::now(); - - let mut status: i32 = 0; - let mut rusage: libc::rusage = utils::new_rusage(); - - linux_syscall!(libc::wait4(pid, &mut status, 0, &mut rusage)); - - let cpu_time_used = rusage.ru_utime.tv_sec * 1000 - + i64::from(rusage.ru_utime.tv_usec) / 1000 - + rusage.ru_stime.tv_sec * 1000 - + i64::from(rusage.ru_stime.tv_usec) / 1000; - let memory_used = rusage.ru_maxrss; - let mut exit_code = 0; - let exited = libc::WIFEXITED(status); - if exited { - exit_code = libc::WEXITSTATUS(status); - } - let signal = if libc::WIFSIGNALED(status) { - libc::WTERMSIG(status) - } else if libc::WIFSTOPPED(status) { - libc::WSTOPSIG(status) - } else { - 0 - }; - - let time_used = start_time.elapsed().as_millis(); - Ok(Status { - time_used: time_used as u64, - cpu_time_used: cpu_time_used as u64, - memory_used: memory_used as u64, - exit_code, - status, - signal, - }) -} diff --git a/src/sys/linux/seccomp.rs b/src/sys/linux/seccomp.rs deleted file mode 100644 index fe3d3be..0000000 --- a/src/sys/linux/seccomp.rs +++ /dev/null @@ -1,1170 +0,0 @@ -// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#![allow(dead_code)] -#![deny(missing_docs)] -//! This crate implements a high level wrapper over BPF instructions for seccomp filtering. -//! -//! # Seccomp Filtering Levels -//! -//! [Seccomp filtering](https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt) is used -//! to limit the set of syscalls that a process can make. This crate exposes 2 levels of filtering: -//! 1. Simple filtering: all syscalls are denied, except for a subset that are explicitly let -//! through. The latter are identified solely through the syscall number. -//! 1. Advanced filtering: all syscalls are denied, except for a subset that are explicitly let -//! through. The latter are identified via the syscall number and the allowed values for the -//! syscall's arguments. Arguments whose values do not match the filtering rule will cause the -//! syscall to be denied. -//! -//! ## Example with Filtering Disabled -//! -//! ``` -//! let buf = "Hello, world!"; -//! assert_eq!( -//! unsafe { -//! libc::syscall( -//! libc::SYS_write, -//! libc::STDOUT_FILENO, -//! buf.as_bytes(), -//! buf.len(), -//! ); -//! }, -//! () -//! ); -//! ``` -//! -//! The code snippet above will print "Hello, world!" to stdout. -//! The exit code will be 0. -//! -//! ## Example with Simple Filtering -//! -//! In this example, the process will allow a subset of syscalls. All the others will fall under -//! the `Trap` action: cause the kernel to send `SIGSYS` (signal number 31) to the process. -//! Without a signal handler in place, the process will die with exit code 159 (128 + `SIGSYS`). -//! -//! ```should_panic -//! use std::convert::TryInto; -//! use seccomp::*; -//! -//! let buf = "Hello, world!"; -//! let filter = SeccompFilter::new( -//! vec![ -//! allow_syscall(libc::SYS_close), -//! allow_syscall(libc::SYS_execve), -//! allow_syscall(libc::SYS_exit_group), -//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -//! allow_syscall(libc::SYS_open), -//! #[cfg(target_arch = "aarch64")] -//! allow_syscall(libc::SYS_openat), -//! allow_syscall(libc::SYS_read), -//! ] -//! .into_iter() -//! .collect(), -//! SeccompAction::Trap, -//! ) -//! .unwrap().try_into().unwrap(); -//! SeccompFilter::apply(filter).unwrap(); -//! unsafe { -//! libc::syscall( -//! libc::SYS_write, -//! libc::STDOUT_FILENO, -//! buf.as_bytes(), -//! buf.len(), -//! ); -//! }; -//! ``` -//! -//! The code snippet above will print "Hello, world!" to stdout and "Bad system call" to stderr. -//! The exit code will be 159. -//! -//! ## Advanced Filtering: Conditions, Rules and Filters -//! -//! A system call is matched if it verifies a set of [`SeccompCondition`]s. Namely, the syscall -//! number must match the one in the [`SeccompCondition`], and each of its arguments (in case of -//! advanced filtering) must match a set of [`SeccompCondition`]s that identify the argument by its -//! index and its respective value either by exact value match, or by bounds to be compared to. -//! -//! A [`SeccompRule`] is composed of a set of [`SeccompCondition`]s the syscall must match and the -//! [`SeccompAction`] to be taken in case of a match. -//! -//! A [`SeccompFilter`] applies only to advanced filtering and is composed of a set of -//! [`SeccompRule`]s and a default [`SeccompAction`]. The default action will be taken for the -//! syscalls that do not match any of the rules. -//! -//! The seccomp rules are compiled into a [`BpfProgram`] which is loaded in the kernel. -//! -//! ### Denying Syscalls -//! -//! The [`SeccompRule`] struct specifies which action to be taken when a syscall is attempted -//! through its [`action`]. To deny a syscall, [`action`] must take one of the following values: -//! 1. `Errno(num)`: the syscall will not be executed. `errno` will be set to `num`. -//! 1. `Kill`: the kernel will kill the process. -//! 1. `Trap`: the kernel will send `SIGSYS` to the process. Handling is up to the process. If no -//! signal handler is set for `SIGSYS`, the process will die. -//! -//! ### Example with Advanced Filtering -//! -//! In this example, the process will allow a subset of syscalls with any arguments and the syscall -//! `SYS_write` with the first argument `0` and the third argument `13`. The default action is to -//! cause the kernel to send `SIGSYS` (signal number 31) to the process. -//! A signal handler will catch `SIGSYS` and exit with code 159 on any other syscall. -//! -//! ```should_panic -//! use seccomp::*; -//! use std::convert::TryInto; -//! use std::mem; -//! use std::process::exit; -//! -//! const SI_OFF_SYSCALL: isize = 6; -//! static mut SIGNAL_HANDLER_CALLED: i32 = 0; -//! -//! fn fail() { -//! exit(159); -//! } -//! -//! extern "C" fn sigsys_handler( -//! _num: libc::c_int, -//! info: *mut libc::siginfo_t, -//! _unused: *mut libc::c_void, -//! ) { -//! let syscall = unsafe { *(info as *const i32).offset(SI_OFF_SYSCALL) }; -//! if syscall as i64 != libc::SYS_write { -//! fail(); -//! } -//! unsafe { -//! SIGNAL_HANDLER_CALLED = SIGNAL_HANDLER_CALLED + 1; -//! } -//! } -//! -//! fn gen_rules() -> Vec { -//! vec![ -//! allow_syscall(libc::SYS_close), -//! allow_syscall(libc::SYS_execve), -//! allow_syscall(libc::SYS_exit_group), -//! allow_syscall(libc::SYS_munmap), -//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -//! allow_syscall(libc::SYS_open), -//! #[cfg(target_arch = "aarch64")] -//! allow_syscall(libc::SYS_openat), -//! allow_syscall(libc::SYS_rt_sigreturn), -//! allow_syscall(libc::SYS_sigaltstack), -//! ] -//! } -//! -//! fn main() { -//! let buf = "Hello, world!"; -//! -//! let mut act: libc::sigaction = unsafe { mem::zeroed() }; -//! act.sa_flags = libc::SA_SIGINFO; -//! act.sa_sigaction = sigsys_handler as *const () as usize; -//! -//! unsafe { libc::sigaction(libc::SIGSYS, &act, ::std::ptr::null_mut()) }; -//! -//! let mut filter = -//! SeccompFilter::new(vec![].into_iter().collect(), SeccompAction::Trap).unwrap(); -//! -//! gen_rules() -//! .into_iter() -//! .try_for_each(|(syscall_number, rules)| filter.add_rules(syscall_number, rules)) -//! .unwrap(); -//! -//! filter -//! .add_rules( -//! libc::SYS_write, -//! vec![SeccompRule::new( -//! vec![ -//! SeccompCondition::new( -//! 0, -//! SeccompCmpArgLen::DWORD, -//! SeccompCmpOp::Eq, -//! libc::STDOUT_FILENO as u64, -//! ) -//! .unwrap(), -//! SeccompCondition::new(2, SeccompCmpArgLen::QWORD, SeccompCmpOp::Eq, 13) -//! .unwrap(), -//! ], -//! SeccompAction::Allow, -//! )], -//! ) -//! .unwrap(); -//! -//! SeccompFilter::apply(filter.try_into().unwrap()).unwrap(); -//! -//! unsafe { -//! libc::syscall( -//! libc::SYS_write, -//! libc::STDOUT_FILENO, -//! buf.as_bytes(), -//! buf.len(), -//! ); -//! }; -//! -//! if unsafe { SIGNAL_HANDLER_CALLED } != 0 { -//! fail(); -//! } -//! -//! let buf = "Goodbye!"; -//! unsafe { -//! libc::syscall( -//! libc::SYS_write, -//! libc::STDOUT_FILENO, -//! buf.as_bytes(), -//! buf.len(), -//! ); -//! }; -//! if unsafe { SIGNAL_HANDLER_CALLED } != 1 { -//! fail(); -//! } -//! -//! unsafe { -//! libc::syscall(libc::SYS_getpid); -//! }; -//! } -//! ``` -//! The code snippet above will print "Hello, world!" to stdout. -//! The exit code will be 159. -//! -//! [`apply`]: struct.SeccompFilter.html#apply -//! [`BpfProgram`]: type.BpfProgram.html -//! [`SeccompCondition`]: struct.SeccompCondition.html -//! [`SeccompRule`]: struct.SeccompRule.html -//! [`SeccompAction`]: enum.SeccompAction.html -//! [`SeccompFilter`]: struct.SeccompFilter.html -//! [`action`]: struct.SeccompRule.html#action -use std::collections::BTreeMap; -use std::convert::TryInto; -use std::fmt::{Display, Formatter}; - -/// Maximum number of instructions that a BPF program can have. -const BPF_MAX_LEN: usize = 4096; - -// BPF Instruction classes. -// See /usr/include/linux/bpf_common.h . -const BPF_LD: u16 = 0x00; -const BPF_ALU: u16 = 0x04; -const BPF_JMP: u16 = 0x05; -const BPF_RET: u16 = 0x06; - -// BPF ld/ldx fields. -// See /usr/include/linux/bpf_common.h . -const BPF_W: u16 = 0x00; -const BPF_ABS: u16 = 0x20; - -// BPF alu fields. -// See /usr/include/linux/bpf_common.h . -const BPF_AND: u16 = 0x50; - -// BPF jmp fields. -// See /usr/include/linux/bpf_common.h . -const BPF_JA: u16 = 0x00; -const BPF_JEQ: u16 = 0x10; -const BPF_JGT: u16 = 0x20; -const BPF_JGE: u16 = 0x30; -const BPF_K: u16 = 0x00; - -// Return codes for BPF programs. -// See /usr/include/linux/seccomp.h . -const SECCOMP_RET_ALLOW: u32 = 0x7fff_0000; -const SECCOMP_RET_ERRNO: u32 = 0x0005_0000; -const SECCOMP_RET_KILL: u32 = 0x0000_0000; -const SECCOMP_RET_LOG: u32 = 0x7ffc_0000; -const SECCOMP_RET_TRACE: u32 = 0x7ff0_0000; -const SECCOMP_RET_TRAP: u32 = 0x0003_0000; -const SECCOMP_RET_MASK: u32 = 0x0000_ffff; - -// Architecture identifier. -// See /usr/include/linux/audit.h . - -#[cfg(target_arch = "x86_64")] -// Defined as: -// `#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)` -const AUDIT_ARCH_X86_64: u32 = 62 | 0x8000_0000 | 0x4000_0000; - -#[cfg(target_arch = "aarch64")] -// Defined as: -// `#define AUDIT_ARCH_AARCH64 (EM_AARCH64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)` -const AUDIT_ARCH_AARCH64: u32 = 183 | 0x8000_0000 | 0x4000_0000; - -// The maximum number of a syscall argument. -// A syscall can have at most 6 arguments. -// Arguments are numbered from 0 to 5. -const ARG_NUMBER_MAX: u8 = 5; - -// The maximum number of BPF statements that a condition will be translated into. -const CONDITION_MAX_LEN: u16 = 6; - -// `struct seccomp_data` offsets and sizes of fields in bytes: -// -// ```c -// struct seccomp_data { -// int nr; -// __u32 arch; -// __u64 instruction_pointer; -// __u64 args[6]; -// }; -// ``` -const SECCOMP_DATA_NR_OFFSET: u8 = 0; -const SECCOMP_DATA_ARGS_OFFSET: u8 = 16; -const SECCOMP_DATA_ARG_SIZE: u8 = 8; - -/// Seccomp errors. -#[derive(Debug)] -pub enum Error { - /// Attempting to add an empty vector of rules to the rule chain of a syscall. - EmptyRulesVector, - /// Filter exceeds the maximum number of instructions that a BPF program can have. - FilterTooLarge, - /// Failed to translate rules into BPF. - IntoBpf, - /// Argument number that exceeds the maximum value. - InvalidArgumentNumber, - /// Failed to load seccomp rules into the kernel. - Load(i32), -} - -impl Display for Error { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - use self::Error::*; - - match *self { - EmptyRulesVector => write!(f, "The seccomp rules vector is empty."), - FilterTooLarge => write!(f, "The seccomp filter contains too many BPF instructions."), - IntoBpf => write!(f, "Failed to translate the seccomp rules into BPF."), - InvalidArgumentNumber => { - write!(f, "The seccomp rule contains an invalid argument number.") - } - Load(err) => write!( - f, - "Failed to load seccomp rules into the kernel with error {}.", - err - ), - } - } -} - -type Result = std::result::Result; - -/// Comparison to perform when matching a condition. -#[derive(Clone, Debug)] -pub enum SeccompCmpOp { - /// Argument value is equal to the specified value. - Eq, - /// Argument value is greater than or equal to the specified value. - Ge, - /// Argument value is greater than specified value. - Gt, - /// Argument value is less than or equal to the specified value. - Le, - /// Argument value is less than specified value. - Lt, - /// Masked bits of argument value are equal to masked bits of specified value. - MaskedEq(u64), - /// Argument value is not equal to specified value. - Ne, -} - -/// Seccomp argument value length. -#[derive(Clone, Debug)] -pub enum SeccompCmpArgLen { - /// Argument value length is 4 bytes. - DWORD, - /// Argument value length is 8 bytes. - QWORD, -} - -/// Condition that syscall must match in order to satisfy a rule. -#[derive(Clone, Debug)] -pub struct SeccompCondition { - /// Index of the argument that is to be compared. - arg_number: u8, - /// Length of the argument value that is to be compared. - arg_len: SeccompCmpArgLen, - /// Comparison to perform. - operator: SeccompCmpOp, - /// The value that will be compared with the argument value. - value: u64, -} - -/// Actions that `seccomp` can apply to process calling a syscall. -#[derive(Clone, Debug, PartialEq)] -pub enum SeccompAction { - /// Allows syscall. - Allow, - /// Returns from syscall with specified error number. - Errno(u32), - /// Kills calling process. - Kill, - /// Same as allow but logs call. - Log, - /// Notifies tracing process of the caller with respective number. - Trace(u32), - /// Sends `SIGSYS` to the calling process. - Trap, -} - -/// Rule that `seccomp` attempts to match for a syscall. -/// -/// If all conditions match then rule gets matched. -/// The action of the first rule that matches will be applied to the calling process. -/// If no rule matches the default action is applied. -#[derive(Clone, Debug)] -pub struct SeccompRule { - /// Conditions of rule that need to match in order for the rule to get matched. - conditions: Vec, - /// Action applied to calling process if rule gets matched. - action: SeccompAction, -} - -/// Type that encapsulates a tuple (syscall number, rule set). -pub type SyscallRuleSet = (i64, Vec); - -/// Builds the (syscall, rules) tuple for allowing a syscall regardless of arguments. -#[inline(always)] -pub fn allow_syscall(syscall_number: i64) -> SyscallRuleSet { - ( - syscall_number, - vec![SeccompRule::new(vec![], SeccompAction::Allow)], - ) -} - -/// Builds the (syscall, rules) tuple for allowing a syscall with certain arguments. -#[inline(always)] -pub fn allow_syscall_if(syscall_number: i64, rules: Vec) -> SyscallRuleSet { - (syscall_number, rules) -} - -/// Filter containing rules assigned to syscall numbers. -#[derive(Clone, Debug)] -pub struct SeccompFilter { - /// Map of syscall numbers and corresponding rule chains. - rules: BTreeMap>, - /// Default action to apply to syscall numbers that do not exist in the hash map. - default_action: SeccompAction, -} - -// BPF instruction structure definition. -// See /usr/include/linux/filter.h . -#[repr(C)] -#[derive(Clone, Debug, PartialEq)] -#[doc(hidden)] -pub struct sock_filter { - pub code: ::std::os::raw::c_ushort, - pub jt: ::std::os::raw::c_uchar, - pub jf: ::std::os::raw::c_uchar, - pub k: ::std::os::raw::c_uint, -} - -// BPF structure definition for filter array. -// See /usr/include/linux/filter.h . -#[repr(C)] -struct sock_fprog { - pub len: ::std::os::raw::c_ushort, - pub filter: *const sock_filter, -} - -/// Program made up of a sequence of BPF instructions. -pub type BpfProgram = Vec; -/// Reference to program made up of a sequence of BPF instructions. -pub type BpfProgramRef<'a> = &'a [sock_filter]; -/// Slice of BPF instructions. -pub type BpfInstructionSlice = [sock_filter]; - -impl SeccompCondition { - /// Creates a new [`SeccompCondition`]. - /// - /// # Arguments - /// - /// * `arg_number` - The index of the argument in the system call. - /// * `arg_len` - The length of the argument value. See `SeccompCmpArgLen`. - /// * `operator` - The comparison operator. See `SeccompCmpOp`. - /// * `value` - The value against which the argument will be compared with `operator`. - /// - /// [`SeccompCondition`]: struct.SeccompCondition.html - pub fn new( - arg_number: u8, - arg_len: SeccompCmpArgLen, - operator: SeccompCmpOp, - value: u64, - ) -> Result { - // Checks that the given argument number is valid. - if arg_number > ARG_NUMBER_MAX { - return Err(Error::InvalidArgumentNumber); - } - - Ok(Self { - arg_number, - arg_len, - operator, - value, - }) - } - - /// Splits the [`SeccompCondition`] into 32 bit chunks and offsets. - /// - /// Returns most significant half, least significant half of the `value` field of - /// [`SeccompCondition`], as well as the offsets of the most significant and least significant - /// half of the argument specified by `arg_number` relative to `struct seccomp_data` passed to - /// the BPF program by the kernel. - /// - /// [`SeccompCondition`]: struct.SeccompCondition.html - fn value_segments(&self) -> (u32, u32, u8, u8) { - // Splits the specified value into its most significant and least significant halves. - let (msb, lsb) = ((self.value >> 32) as u32, self.value as u32); - - // Offset to the argument specified by `arg_number`. - let arg_offset = SECCOMP_DATA_ARGS_OFFSET + self.arg_number * SECCOMP_DATA_ARG_SIZE; - - // Extracts offsets of most significant and least significant halves of argument. - let (msb_offset, lsb_offset) = { - #[cfg(target_endian = "big")] - { - (arg_offset, arg_offset + SECCOMP_DATA_ARG_SIZE / 2) - } - #[cfg(target_endian = "little")] - { - (arg_offset + SECCOMP_DATA_ARG_SIZE / 2, arg_offset) - } - }; - - (msb, lsb, msb_offset, lsb_offset) - } - - /// Translates the `eq` (equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - /// - /// The jump is performed if the condition fails and thus the current rule does not match so - /// `seccomp` tries to match the next rule by jumping out of the current rule. - /// - /// In case the condition is part of the last rule, the jump offset is to the default action of - /// respective filter. - /// - /// The most significant and least significant halves of the argument value are compared - /// separately since the BPF operand and accumulator are 4 bytes whereas an argument value is 8. - fn into_eq_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::DWORD => vec![], - SeccompCmpArgLen::QWORD => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `ge` (greater than or equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_ge_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::DWORD => vec![], - SeccompCmpArgLen::QWORD => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `gt` (greater than) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_gt_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::DWORD => vec![], - SeccompCmpArgLen::QWORD => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `le` (less than or equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_le_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::DWORD => vec![], - SeccompCmpArgLen::QWORD => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, offset + 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, lsb, offset, 0), - ]); - bpf - } - - /// Translates the `lt` (less than) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_lt_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::DWORD => vec![], - SeccompCmpArgLen::QWORD => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, offset + 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, lsb, offset, 0), - ]); - bpf - } - - /// Translates the `masked_eq` (masked equal) condition into BPF statements. - /// - /// The `masked_eq` condition is `true` if the result of logical `AND` between the given value - /// and the mask is the value being compared against. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_masked_eq_bpf(self, offset: u8, mask: u64) -> Vec { - let (_, _, msb_offset, lsb_offset) = self.value_segments(); - let masked_value = self.value & mask; - let (msb, lsb) = ((masked_value >> 32) as u32, masked_value as u32); - let (mask_msb, mask_lsb) = ((mask >> 32) as u32, mask as u32); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::DWORD => vec![], - SeccompCmpArgLen::QWORD => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_STMT(BPF_ALU + BPF_AND + BPF_K, mask_msb), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 3), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_STMT(BPF_ALU + BPF_AND + BPF_K, mask_lsb), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `ne` (not equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_ne_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::DWORD => vec![], - SeccompCmpArgLen::QWORD => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, offset, 0), - ]); - bpf - } - - /// Translates the [`SeccompCondition`] into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - /// - /// [`SeccompCondition`]: struct.SeccompCondition.html - fn into_bpf(self, offset: u8) -> Vec { - let result = match self.operator { - SeccompCmpOp::Eq => self.into_eq_bpf(offset), - SeccompCmpOp::Ge => self.into_ge_bpf(offset), - SeccompCmpOp::Gt => self.into_gt_bpf(offset), - SeccompCmpOp::Le => self.into_le_bpf(offset), - SeccompCmpOp::Lt => self.into_lt_bpf(offset), - SeccompCmpOp::MaskedEq(mask) => self.into_masked_eq_bpf(offset, mask), - SeccompCmpOp::Ne => self.into_ne_bpf(offset), - }; - - // Verifies that the `CONDITION_MAX_LEN` constant was properly updated. - assert!(result.len() <= CONDITION_MAX_LEN as usize); - - result - } -} - -impl From for u32 { - /// Return codes of the BPF program for each action. - /// - /// # Arguments - /// - /// * `action` - The [`SeccompAction`] that the kernel will take. - /// - /// [`SeccompAction`]: struct.SeccompAction.html - fn from(action: SeccompAction) -> Self { - match action { - SeccompAction::Allow => SECCOMP_RET_ALLOW, - SeccompAction::Errno(x) => SECCOMP_RET_ERRNO | (x & SECCOMP_RET_MASK), - SeccompAction::Kill => SECCOMP_RET_KILL, - SeccompAction::Log => SECCOMP_RET_LOG, - SeccompAction::Trace(x) => SECCOMP_RET_TRACE | (x & SECCOMP_RET_MASK), - SeccompAction::Trap => SECCOMP_RET_TRAP, - } - } -} - -impl SeccompRule { - /// Creates a new rule. Rules with 0 conditions always match. - /// - /// # Arguments - /// - /// * `conditions` - Vector of [`SeccompCondition`] that the syscall must match. - /// * `action` - Action taken if the syscall matches the conditions. See [`SeccompAction`]. - /// - /// [`SeccompCondition`]: struct.SeccompCondition.html - /// [`SeccompAction`]: struct.SeccompAction.html - pub fn new(conditions: Vec, action: SeccompAction) -> Self { - Self { conditions, action } - } - - /// Appends a condition of the rule to an accumulator. - /// - /// The length of the rule and offset to the next rule are updated. - /// - /// # Arguments - /// - /// * `condition` - The condition added to the rule. - /// * `accumulator` - Accumulator of BPF statements that compose the BPF program. - /// * `rule_len` - Number of conditions in the rule. - /// * `offset` - Offset (in number of BPF statements) to the next rule. - fn append_condition( - condition: SeccompCondition, - accumulator: &mut Vec>, - rule_len: &mut usize, - offset: &mut u8, - ) { - // Tries to detect whether prepending the current condition will produce an unjumpable - // offset (since BPF jumps are a maximum of 255 instructions). - if u16::from(*offset) + CONDITION_MAX_LEN + 1 > u16::from(::std::u8::MAX) { - // If that is the case, three additional helper jumps are prepended and the offset - // is reset to 1. - // - // - The first jump continues the evaluation of the condition chain by jumping to - // the next condition or the action of the rule if the last condition was matched. - // - The second, jumps out of the rule, to the next rule or the default action of - // the filter in case of the last rule in the rule chain of a syscall. - // - The third jumps out of the rule chain of the syscall, to the rule chain of the - // next syscall number to be checked or the default action of the filter in the - // case of the last rule chain. - let helper_jumps = vec![ - BPF_STMT(BPF_JMP + BPF_JA, 2), - BPF_STMT(BPF_JMP + BPF_JA, u32::from(*offset) + 1), - BPF_STMT(BPF_JMP + BPF_JA, u32::from(*offset) + 1), - ]; - *rule_len += helper_jumps.len(); - accumulator.push(helper_jumps); - *offset = 1; - } - - let condition = condition.into_bpf(*offset); - *rule_len += condition.len(); - *offset += condition.len() as u8; - accumulator.push(condition); - } -} - -impl Into for SeccompRule { - /// Translates a rule into BPF statements. - /// - /// Each rule starts with 2 jump statements: - /// * The first jump enters the rule, attempting a match. - /// * The second jump points to the end of the rule chain for one syscall, into the rule chain - /// for the next syscall or the default action if the current syscall is the last one. It - /// essentially jumps out of the current rule chain. - fn into(self) -> BpfProgram { - // Rule is built backwards, last statement is the action of the rule. - // The offset to the next rule is 1. - let mut accumulator = Vec::with_capacity( - self.conditions.len() - + ((self.conditions.len() * CONDITION_MAX_LEN as usize) / ::std::u8::MAX as usize) - + 1, - ); - let mut rule_len = 1; - let mut offset = 1; - accumulator.push(vec![BPF_STMT(BPF_RET + BPF_K, u32::from(self.action))]); - - // Conditions are translated into BPF statements and prepended to the rule. - self.conditions.into_iter().for_each(|condition| { - SeccompRule::append_condition(condition, &mut accumulator, &mut rule_len, &mut offset) - }); - - // The two initial jump statements are prepended to the rule. - let rule_jumps = vec![ - BPF_STMT(BPF_JMP + BPF_JA, 1), - BPF_STMT(BPF_JMP + BPF_JA, u32::from(offset) + 1), - ]; - rule_len += rule_jumps.len(); - accumulator.push(rule_jumps); - - // Finally, builds the translated rule by consuming the accumulator. - let mut result = Vec::with_capacity(rule_len); - accumulator - .into_iter() - .rev() - .for_each(|mut instructions| result.append(&mut instructions)); - - result - } -} - -impl SeccompFilter { - /// Creates a new filter with a set of rules and a default action. - /// - /// # Arguments - /// - /// * `rules` - Map of syscall numbers and the rules that will be applied to each of them. - /// * `default_action` - Action taken for all syscalls that do not match any rule. - pub fn new( - rules: BTreeMap>, - default_action: SeccompAction, - ) -> Result { - // All inserted syscalls must have at least one rule, otherwise BPF code will break. - for (_, value) in rules.iter() { - if value.is_empty() { - return Err(Error::EmptyRulesVector); - } - } - - Ok(Self { - rules, - default_action, - }) - } - - /// Adds rules for the specified syscall in the filter. - /// - /// # Arguments - /// - /// * `syscall_number` - Syscall identifier. - /// * `rules` - Rules to be applied to the syscall. - pub fn add_rules(&mut self, syscall_number: i64, mut rules: Vec) -> Result<()> { - // All inserted syscalls must have at least one rule, otherwise BPF code will break. - if rules.is_empty() { - return Err(Error::EmptyRulesVector); - } - - self.rules - .entry(syscall_number) - .or_insert_with(std::vec::Vec::new) - .append(&mut rules); - - Ok(()) - } - - /// Builds the array of filter instructions and sends them to the kernel. - /// - /// # Arguments - /// - /// * `filters` - BPF program containing the seccomp rules. - pub fn apply(filters: BpfProgram) -> Result<()> { - // If the program is empty, skip this step. - if filters.is_empty() { - return Ok(()); - } - - let mut bpf_filter = Vec::new(); - bpf_filter.extend(VALIDATE_ARCHITECTURE()); - bpf_filter.extend(filters); - - unsafe { - { - let rc = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - if rc != 0 { - return Err(Error::Load(*libc::__errno_location())); - } - } - - let bpf_prog = sock_fprog { - len: bpf_filter.len() as u16, - filter: bpf_filter.as_ptr(), - }; - let bpf_prog_ptr = &bpf_prog as *const sock_fprog; - { - let rc = libc::prctl( - libc::PR_SET_SECCOMP, - libc::SECCOMP_MODE_FILTER, - bpf_prog_ptr, - ); - if rc != 0 { - return Err(Error::Load(*libc::__errno_location())); - } - } - } - - Ok(()) - } - - /// Appends a chain of rules to an accumulator, updating the length of the filter. - /// - /// # Arguments - /// - /// * `syscall_number` - The syscall to which the rules apply. - /// * `chain` - The chain of rules for the specified syscall. - /// * `default_action` - The action to be taken in none of the rules apply. - /// * `accumulator` - The expanding BPF program. - /// * `filter_len` - The size (in number of BPF statements) of the BPF program. This is - /// limited to 4096. If the limit is exceeded, the filter is invalidated. - fn append_syscall_chain( - syscall_number: i64, - chain: Vec, - default_action: u32, - accumulator: &mut Vec>, - filter_len: &mut usize, - ) -> Result<()> { - // The rules of the chain are translated into BPF statements. - let chain: Vec<_> = chain.into_iter().map(SeccompRule::into).collect(); - let chain_len: usize = chain.iter().map(std::vec::Vec::len).sum(); - - // The chain starts with a comparison checking the loaded syscall number against the - // syscall number of the chain. - let mut built_syscall = Vec::with_capacity(1 + chain_len + 1); - built_syscall.push(BPF_JUMP( - BPF_JMP + BPF_JEQ + BPF_K, - syscall_number as u32, - 0, - 1, - )); - - // The rules of the chain are appended. - chain - .into_iter() - .for_each(|mut rule| built_syscall.append(&mut rule)); - - // The default action is appended, if the syscall number comparison matched and then all - // rules fail to match, the default action is reached. - built_syscall.push(BPF_STMT(BPF_RET + BPF_K, default_action)); - - // The chain is appended to the result. - *filter_len += built_syscall.len(); - accumulator.push(built_syscall); - - // BPF programs are limited to 4096 statements. - if *filter_len >= BPF_MAX_LEN { - return Err(Error::FilterTooLarge); - } - - Ok(()) - } - - /// Replaces the seccomp rules so as to allow every syscall contained in the rule set. - pub fn allow_all(mut self) -> SeccompFilter { - // Pre-collect the keys to avoid the double borrow. - let syscalls: Vec = self.rules.keys().cloned().collect(); - for syscall in syscalls { - let ruleset: SyscallRuleSet = allow_syscall(syscall); - self.rules.insert(ruleset.0, ruleset.1); - } - self - } - - /// Creates an empty `SeccompFilter` which allows everything. - pub fn empty() -> SeccompFilter { - Self { - rules: BTreeMap::new(), - default_action: SeccompAction::Allow, - } - } -} - -impl TryInto for SeccompFilter { - type Error = Error; - fn try_into(self) -> Result { - // If no rules are set up, return an empty vector. - if self.rules.is_empty() { - return Ok(vec![]); - } - - // The called syscall number is loaded. - let mut accumulator = Vec::with_capacity(1); - let mut filter_len = 1; - accumulator.push(EXAMINE_SYSCALL()); - - // Orders syscalls by priority, the highest number represents the highest priority. - let mut iter = self.rules.into_iter(); - - // For each syscall adds its rule chain to the filter. - let default_action = u32::from(self.default_action); - iter.try_for_each(|(syscall_number, chain)| { - SeccompFilter::append_syscall_chain( - syscall_number, - chain, - default_action, - &mut accumulator, - &mut filter_len, - ) - })?; - - // The default action is once again appended, it is reached if all syscall number - // comparisons fail. - filter_len += 1; - accumulator.push(vec![BPF_STMT(BPF_RET + BPF_K, default_action)]); - - // Finally, builds the translated filter by consuming the accumulator. - let mut result = Vec::with_capacity(filter_len); - accumulator - .into_iter() - .for_each(|mut instructions| result.append(&mut instructions)); - - Ok(result) - } -} - -/// Builds a `jump` BPF instruction. -/// -/// # Arguments -/// -/// * `code` - The operation code. -/// * `jt` - The jump offset in case the operation returns `true`. -/// * `jf` - The jump offset in case the operation returns `false`. -/// * `k` - The operand. -#[allow(non_snake_case)] -#[inline(always)] -fn BPF_JUMP(code: u16, k: u32, jt: u8, jf: u8) -> sock_filter { - sock_filter { code, jt, jf, k } -} - -/// Builds a "statement" BPF instruction. -/// -/// # Arguments -/// -/// * `code` - The operation code. -/// * `k` - The operand. -#[allow(non_snake_case)] -#[inline(always)] -fn BPF_STMT(code: u16, k: u32) -> sock_filter { - sock_filter { - code, - jt: 0, - jf: 0, - k, - } -} - -/// Builds a sequence of BPF instructions that validate the underlying architecture. -#[allow(non_snake_case)] -#[inline(always)] -fn VALIDATE_ARCHITECTURE() -> Vec { - vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, 4), - #[cfg(target_arch = "x86_64")] - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, AUDIT_ARCH_X86_64, 1, 0), - #[cfg(target_arch = "aarch64")] - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, AUDIT_ARCH_AARCH64, 1, 0), - BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL), - ] -} - -/// Builds a sequence of BPF instructions that are followed by syscall examination. -#[allow(non_snake_case)] -#[inline(always)] -fn EXAMINE_SYSCALL() -> Vec { - vec![BPF_STMT( - BPF_LD + BPF_W + BPF_ABS, - u32::from(SECCOMP_DATA_NR_OFFSET), - )] -} - -/// Possible errors that could be encountered while processing a seccomp level value or generating -/// a BPF program based on it. -#[derive(Debug)] -pub enum SeccompError { - /// Error while trying to generate a BPF program. - SeccompFilter(Error), - /// Failed to parse to `u8`. - Parse(std::num::ParseIntError), - /// Seccomp level is an `u8` value, other than 0, 1 or 2. - Level(u8), -} - -impl Display for SeccompError { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - match *self { - SeccompError::SeccompFilter(ref err) => write!(f, "Seccomp error: {}", err), - SeccompError::Parse(ref err) => write!(f, "Could not parse to 'u8': {}", err), - SeccompError::Level(arg) => write!( - f, - "'{}' isn't a valid value for 'seccomp-level'. Must be 0, 1 or 2.", - arg - ), - } - } -} - -/// Possible values for seccomp level. -#[repr(u8)] -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum SeccompLevel { - /// Seccomp filtering disabled. - None = 0, - /// Level of filtering that causes only syscall numbers to be examined. - Basic = 1, - /// Level of filtering that causes syscall numbers and parameters to be examined. - Advanced = 2, -} - -impl SeccompLevel { - /// Converts from a seccomp level value of type String to the corresponding SeccompLevel variant - /// or returns an error if the parsing failed. - pub fn from_string(seccomp_value: &str) -> std::result::Result { - match seccomp_value.parse::() { - Ok(0) => Ok(SeccompLevel::None), - Ok(1) => Ok(SeccompLevel::Basic), - Ok(2) => Ok(SeccompLevel::Advanced), - Ok(level) => Err(SeccompError::Level(level)), - Err(err) => Err(SeccompError::Parse(err)), - } - } -} diff --git a/src/sys/linux/utils.rs b/src/sys/linux/utils.rs deleted file mode 100644 index 2ab15d8..0000000 --- a/src/sys/linux/utils.rs +++ /dev/null @@ -1,147 +0,0 @@ -use std::collections::HashMap; -use std::ffi::{CStr, CString}; -use std::mem; -use std::ptr; - -use libc; - -use crate::error::{errno_str, Result}; - -/// 一个全为 `0` 的 `rusage` -#[inline(always)] -pub fn new_rusage() -> libc::rusage { - libc::rusage { - ru_utime: libc::timeval { - tv_sec: 0 as libc::time_t, - tv_usec: 0 as libc::suseconds_t, - }, - ru_stime: libc::timeval { - tv_sec: 0 as libc::time_t, - tv_usec: 0 as libc::suseconds_t, - }, - ru_maxrss: 0 as libc::c_long, - ru_ixrss: 0 as libc::c_long, - ru_idrss: 0 as libc::c_long, - ru_isrss: 0 as libc::c_long, - ru_minflt: 0 as libc::c_long, - ru_majflt: 0 as libc::c_long, - ru_nswap: 0 as libc::c_long, - ru_inblock: 0 as libc::c_long, - ru_oublock: 0 as libc::c_long, - ru_msgsnd: 0 as libc::c_long, - ru_msgrcv: 0 as libc::c_long, - ru_nsignals: 0 as libc::c_long, - ru_nvcsw: 0 as libc::c_long, - ru_nivcsw: 0 as libc::c_long, - } -} - -pub fn last_err() -> String { - errno_str(std::io::Error::last_os_error().raw_os_error()) -} - -#[macro_export] -macro_rules! string_to_cstring { - ($expression:expr) => { - match CString::new($expression) { - Ok(value) => value, - Err(err) => return Err(crate::error::Error::StringToCStringError(err)), - } - }; -} - -/// 执行指定的系统调用,如果返回值小于 0,则抛出异常并结束进程 -#[macro_export] -macro_rules! syscall_or_panic { - ($expression:expr, $syscall:expr) => {{ - let ret = $expression; - if ret < 0 { - let last_err = last_err(); - panic!( - "{file}:{line}: {message}\n ret = {ret}, err = {last_err}", - file = file!(), - line = line!(), - message = $syscall - ); - }; - ret - }}; -} - -pub struct ExecArgs { - pub pathname: *const libc::c_char, - pub argv: *const *const libc::c_char, - pub envp: *const *const libc::c_char, - args: usize, - envs: usize, -} - -impl ExecArgs { - pub fn build(args: &Vec) -> Result { - let pathname = args[0].clone(); - let pathname_str = string_to_cstring!(pathname); - let pathname = pathname_str.as_ptr(); - - let mut argv_vec: Vec<*const libc::c_char> = vec![]; - for item in args.iter() { - let cstr = string_to_cstring!(item.clone()); - let cptr = cstr.as_ptr(); - // 需要使用 mem::forget 来标记 - // 否则在此次循环结束后,cstr 就会被回收,后续 exec 函数无法通过指针获取到字符串内容 - mem::forget(cstr); - argv_vec.push(cptr); - } - // argv 与 envp 的参数需要使用 NULL 来标记结束 - argv_vec.push(ptr::null()); - let argv: *const *const libc::c_char = argv_vec.as_ptr() as *const *const libc::c_char; - - // env 传递环境变量 - let mut envp_vec: Vec<*const libc::c_char> = vec![]; - envp_vec.push(ptr::null()); - let envs = envp_vec.len(); - let envp = envp_vec.as_ptr() as *const *const libc::c_char; - - mem::forget(pathname_str); - mem::forget(argv_vec); - mem::forget(envp_vec); - Ok(ExecArgs { - pathname, - argv, - args: args.len(), - envp, - envs, - }) - } -} - -impl Drop for ExecArgs { - fn drop(&mut self) { - // 将 forget 的内存重新获取,并释放 - let c_string = unsafe { CString::from_raw(self.pathname as *mut i8) }; - drop(c_string); - let argv = unsafe { - Vec::from_raw_parts( - self.argv as *mut *const libc::c_void, - self.args - 1, - self.args - 1, - ) - }; - for arg in &argv { - let c_string = unsafe { CString::from_raw(*arg as *mut i8) }; - drop(c_string); - } - drop(argv); - let envp = unsafe { - Vec::from_raw_parts( - self.envp as *mut *const libc::c_void, - self.envs - 1, - self.envs - 1, - ) - }; - for env in &envp { - let c_string = unsafe { CString::from_raw(*env as *mut i8) }; - drop(c_string); - } - drop(envp); - } -} diff --git a/src/sys/macos/mod.rs b/src/sys/macos/mod.rs deleted file mode 100644 index 500fe18..0000000 --- a/src/sys/macos/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -use crate::sys::SandboxImpl; - -pub struct Sandbox {} - -impl SandboxImpl for Sandbox { - fn run() -> () { - println!("macOS") - } -} \ No newline at end of file diff --git a/src/sys/mod.rs b/src/sys/mod.rs deleted file mode 100644 index e90ac5f..0000000 --- a/src/sys/mod.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::Opts; -use crate::status::Status; - -use super::error::Result; - -#[cfg(target_os = "linux")] -pub(crate) mod linux; -#[cfg(target_os = "macos")] -pub(crate) mod macos; -#[cfg(target_os = "windows")] -pub(crate) mod windows; - -pub trait SandboxImpl { - fn with_opts(opts: Opts) -> Self; - - /** - * run - */ - unsafe fn run(&mut self) -> Result; -} diff --git a/src/sys/windows/mod.rs b/src/sys/windows/mod.rs deleted file mode 100644 index a2ef9fe..0000000 --- a/src/sys/windows/mod.rs +++ /dev/null @@ -1,341 +0,0 @@ -use std::ffi::c_void; -use std::mem::size_of; - -use windows::core::PSTR; -use windows::Win32::Foundation::{CloseHandle, FILETIME, WAIT_FAILED, WAIT_TIMEOUT}; -use windows::Win32::System::JobObjects::{ - AssignProcessToJobObject, CreateJobObjectA, JobObjectBasicLimitInformation, - SetInformationJobObject, JOBOBJECT_BASIC_LIMIT_INFORMATION, JOB_OBJECT_LIMIT_PRIORITY_CLASS, - JOB_OBJECT_LIMIT_PROCESS_TIME, -}; -use windows::Win32::System::ProcessStatus::{GetProcessMemoryInfo, PROCESS_MEMORY_COUNTERS}; -use windows::Win32::System::Threading::{ - CreateProcessA, GetProcessTimes, ResumeThread, SetProcessWorkingSetSize, TerminateProcess, - WaitForSingleObject, CREATE_SUSPENDED, IDLE_PRIORITY_CLASS, PROCESS_INFORMATION, - STARTF_USESTDHANDLES, STARTUPINFOA, -}; - -use utils::utils::string_to_pcstr; - -use crate::error::Error::{WinError, E}; -use crate::error::Result; -use crate::status::Status; -use crate::sys::windows::utils::utils::{handle_from_file, string_to_pstr}; -use crate::sys::SandboxImpl; -use crate::Opts; - -mod utils; - -#[macro_export] -macro_rules! winapi { - ($expression:expr) => { - if let Err(e) = $expression { - return Err(WinError(String::from(file!()), line!(), e)); - } - }; -} - -#[derive(Debug)] -pub struct Sandbox { - inner_args: Vec, - time_limit: Option, - cpu_time_limit: Option, - memory_limit: Option, - input: Option, - output: Option, - error: Option, -} - -impl Sandbox { - unsafe fn set_limit(&mut self, information: &PROCESS_INFORMATION) -> Result<()> { - // 创建 JOB - let job = match CreateJobObjectA(None, None) { - Ok(j) => j, - Err(e) => return Err(WinError(file!().to_string(), line!(), e)), - }; - - let mut limit: JOBOBJECT_BASIC_LIMIT_INFORMATION = Default::default(); - limit.LimitFlags = JOB_OBJECT_LIMIT_PRIORITY_CLASS; - limit.PriorityClass = IDLE_PRIORITY_CLASS.0; - - // 内存限制 - if let Some(l) = self.memory_limit { - // 与 cpu 时间限制类似,此限制并不能保证可用性 - winapi!(SetProcessWorkingSetSize( - information.hProcess, - 1, - l as usize * 1024 - )); - } - - // 系统定期检查以确定与作业关联的每个进程是否累积了比设置限制更多的用户模式时间。 如果已终止,则终止进程。 - // cpu 时间限制,此限制不会实时结束进程(需要等到下次检查?) - if let Some(l) = self.cpu_time_limit { - limit.LimitFlags |= JOB_OBJECT_LIMIT_PROCESS_TIME; - limit.PerProcessUserTimeLimit = l as i64 * 10000; - limit.PerJobUserTimeLimit = l as i64 * 10000; - } - - // 设置 job 限制 - winapi!(SetInformationJobObject( - job, - JobObjectBasicLimitInformation, - &limit as *const _ as *const c_void, - size_of::() as u32, - )); - // 将 job 附加到进程 - winapi!(AssignProcessToJobObject(job, information.hProcess)); - Ok(()) - } - - unsafe fn redirect_fd(&mut self, info: &mut STARTUPINFOA) -> Result<()> { - // 重定向 stdin - if let Some(file) = &self.input { - info.hStdInput = handle_from_file(file, 'r')?; - } - // 重定向 stdout - if let Some(file) = &self.output { - info.hStdOutput = handle_from_file(file, 'w')?; - } - // 重定向 stderr - if let Some(file) = &self.error { - info.hStdError = handle_from_file(file, 'w')?; - } - - Ok(()) - } - - unsafe fn wait_it( - &mut self, - info: &STARTUPINFOA, - information: &PROCESS_INFORMATION, - ) -> Result { - let mut status: Status = Default::default(); - let timeout = if let Some(t) = self.time_limit { - t - } else { - // 如果 dwMilliseconds 为 INFINITE,则仅当发出对象信号时,该函数才会返回 - 0xFFFFFFFF - }; - let wait_ret = WaitForSingleObject(information.hProcess, timeout); - if wait_ret == WAIT_TIMEOUT { - // 超时中断进程 - winapi!(TerminateProcess(information.hProcess, 0)); - // 此处不检查返回值 - WaitForSingleObject(information.hProcess, 0xFFFFFFFF); - } else if wait_ret == WAIT_FAILED { - return Err(E(file!().to_string(), line!(), "WAIT_FAILED".to_string())); - } - - // 关闭文件流 - if !info.hStdInput.is_invalid() { - winapi!(CloseHandle(info.hStdInput)); - } - if !info.hStdOutput.is_invalid() { - winapi!(CloseHandle(info.hStdOutput)); - } - if !info.hStdError.is_invalid() { - winapi!(CloseHandle(info.hStdError)); - } - - let mut pmc: PROCESS_MEMORY_COUNTERS = Default::default(); - - // 获取内存使用情况 - winapi!(GetProcessMemoryInfo( - information.hProcess, - &mut pmc, - size_of::() as u32, - )); - - status.memory_used = (pmc.PeakWorkingSetSize / 1024) as u64; - - // 获取时间使用情况 - let mut lp_creation_time: FILETIME = Default::default(); - let mut lp_exit_time: FILETIME = Default::default(); - let mut lp_kernel_time: FILETIME = Default::default(); - let mut lp_user_time: FILETIME = Default::default(); - winapi!(GetProcessTimes( - information.hProcess, - &mut lp_creation_time, - &mut lp_exit_time, - &mut lp_kernel_time, - &mut lp_user_time, - )); - - status.time_used = - (lp_exit_time.dwLowDateTime - lp_creation_time.dwLowDateTime) as u64 / 10000; - status.cpu_time_used = - (lp_kernel_time.dwLowDateTime + lp_user_time.dwLowDateTime) as u64 / 10000; - - Ok(status) - } -} - -impl SandboxImpl for Sandbox { - fn with_opts(opts: Opts) -> Self { - Sandbox { - inner_args: opts.command, - time_limit: opts.time_limit, - cpu_time_limit: opts.cpu_time_limit, - memory_limit: opts.memory_limit, - input: opts.input, - output: opts.output, - error: opts.error, - } - } - - unsafe fn run(&mut self) -> Result { - // 执行的目标 app,前置的命令行解析保证 inner_args 至少有一项 - let app = string_to_pcstr(&mut self.inner_args[0]); - // 执行的文件参数 - let command_line_pstr = if self.inner_args.len() > 1 { - let mut command_line = &mut self.inner_args[1..].join(" "); - string_to_pstr(&mut command_line) - } else { - PSTR::null() - }; - - let mut info: STARTUPINFOA = Default::default(); - let mut information: PROCESS_INFORMATION = Default::default(); - - let mut binherithandles = false; - // 设置 stdin/stdout/stderr 的重定向 - if self.input != None || self.output != None || self.error != None { - binherithandles = true; - info.dwFlags |= STARTF_USESTDHANDLES; - self.redirect_fd(&mut info)?; - } - - // 创建进程 - winapi!(CreateProcessA( - app, - command_line_pstr, - None, - None, - binherithandles, - // CREATE_SUSPENDED: 创建一个暂停的进程,需要 ResumeThread 之后才可以正常运行 - CREATE_SUSPENDED, - None, - None, - &mut info, - &mut information, - )); - - self.set_limit(&information)?; - - let resume = ResumeThread(information.hThread); - - // 唤醒被暂停的进程 - if resume != 1 { - return Err(E( - String::from(file!()), - line!(), - format!("唤醒进程失败,resume = {}", resume), - )); - } - - self.wait_it(&info, &information) - } -} - -#[cfg(test)] -mod tests { - use std::fs; - - use crate::sys::windows::Sandbox; - use crate::sys::SandboxImpl; - use crate::Opts; - - /** - * 执行不存在的可执行文件 - */ - #[test] - #[should_panic] - fn not_found() { - let mut opts: Opts = Opts::default(); - opts.command - .push("./tests/windows/not-found.exe".to_string()); - unsafe { - Sandbox::with_opts(opts).run().unwrap(); - } - } - - /** - * 测试时间限制 - */ - #[test] - fn time_limit() { - let mut opts: Opts = Opts::default(); - opts.command - .push("./tests/windows/sleep/sleep.exe".to_string()); - opts.time_limit = Some(1000); - let status = unsafe { Sandbox::with_opts(opts).run().unwrap() }; - assert!(status.time_used >= 1000); - assert!(status.time_used < 2000); - } - - /** - * 测试 stdout - */ - #[test] - fn output() { - let filename = "./output.txt"; - let mut opts: Opts = Opts::default(); - opts.command - .push("./tests/windows/output/output.exe".to_string()); - opts.output = Option::from(filename.to_string()); - unsafe { Sandbox::with_opts(opts).run().unwrap() }; - - if let Ok(content) = fs::read_to_string(filename) { - assert_eq!(content.trim(), "Hello World!"); - fs::remove_file(filename).unwrap() - } else { - assert!(false) - } - } - - /** - * 测试 stderr - */ - #[test] - fn stderr() { - let filename = "./stderr.txt"; - let mut opts: Opts = Opts::default(); - opts.command - .push("./tests/windows/stderr/stderr.exe".to_string()); - opts.error = Option::from(filename.to_string()); - unsafe { Sandbox::with_opts(opts).run().unwrap() }; - - if let Ok(content) = fs::read_to_string(filename) { - assert_eq!("Hello World!", content.trim()); - fs::remove_file(filename).unwrap() - } else { - assert!(false) - } - } - - /** - * 测试 stdin - */ - #[test] - fn stdin() { - let filename = "./stdin.txt"; - let out_filename = "./stdin-stdout.txt"; - let content = "Hello Stdin!"; - fs::write(filename, content).unwrap(); - let mut opts: Opts = Opts::default(); - opts.command - .push("./tests/windows/stdin/stdin.exe".to_string()); - opts.input = Option::from(filename.to_string()); - opts.output = Option::from(out_filename.to_string()); - unsafe { Sandbox::with_opts(opts).run().unwrap() }; - - if let Ok(c) = fs::read_to_string(out_filename) { - assert_eq!(c.trim(), content.trim()); - fs::remove_file(filename).unwrap(); - fs::remove_file(out_filename).unwrap() - } else { - assert!(false) - } - } -} diff --git a/src/sys/windows/utils.rs b/src/sys/windows/utils.rs deleted file mode 100644 index ce59759..0000000 --- a/src/sys/windows/utils.rs +++ /dev/null @@ -1,55 +0,0 @@ -pub(crate) mod utils { - use std::mem::size_of; - use std::ptr; - - use windows::core::{PCSTR, PSTR}; - use windows::Win32::Foundation::{GENERIC_READ, GENERIC_WRITE, HANDLE, TRUE}; - use windows::Win32::Security::SECURITY_ATTRIBUTES; - use windows::Win32::Storage::FileSystem::{ - CreateFileA, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, OPEN_EXISTING, - }; - - use crate::error::Error::WinError; - use crate::error::Result; - - pub unsafe fn string_to_pcstr(string: &mut String) -> PCSTR { - string.push('\0'); - PCSTR(string.as_ptr()) - } - - pub unsafe fn string_to_pstr(string: &mut String) -> PSTR { - string.push('\0'); - PSTR(string.as_mut_ptr()) - } - - pub unsafe fn handle_from_file(string: &String, wr: char) -> Result { - let mut string = string.clone(); - let sa = SECURITY_ATTRIBUTES { - nLength: size_of::() as u32, - lpSecurityDescriptor: ptr::null_mut(), - bInheritHandle: TRUE, // 指明这个 handle 需要被子进程继承 - }; - let mode = if wr == 'w' { - GENERIC_WRITE - } else { - GENERIC_READ - }; - let exist = if wr == 'w' { - CREATE_ALWAYS - } else { - OPEN_EXISTING - }; - return match CreateFileA( - string_to_pcstr(&mut string), - mode.0, - Default::default(), - Some(&sa), - exist, - FILE_ATTRIBUTE_NORMAL, - HANDLE::default(), - ) { - Ok(h_file) => Ok(h_file), - Err(e) => Err(WinError(String::from(string), line!(), e)), - }; - } -} diff --git a/tests/test.py b/tests/test.py new file mode 100644 index 0000000..d1f1cf2 --- /dev/null +++ b/tests/test.py @@ -0,0 +1,7 @@ +from river import River + +def main(): + r = River("echo") + print(r) + +main() diff --git a/tests/windows/output/output.rs b/tests/windows/output/output.rs deleted file mode 100644 index 47ad8c6..0000000 --- a/tests/windows/output/output.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - println!("Hello World!"); -} diff --git a/tests/windows/sleep/sleep.rs b/tests/windows/sleep/sleep.rs deleted file mode 100644 index e6da1e8..0000000 --- a/tests/windows/sleep/sleep.rs +++ /dev/null @@ -1,7 +0,0 @@ -use std::thread; -use std::time::Duration; - -fn main() { - let sleep_duration = Duration::from_secs(30); - thread::sleep(sleep_duration); -} diff --git a/tests/windows/stderr/stderr.rs b/tests/windows/stderr/stderr.rs deleted file mode 100644 index 724666e..0000000 --- a/tests/windows/stderr/stderr.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - eprintln!("Hello World!"); -} diff --git a/tests/windows/stdin/stdin.rs b/tests/windows/stdin/stdin.rs deleted file mode 100644 index d3c4bb6..0000000 --- a/tests/windows/stdin/stdin.rs +++ /dev/null @@ -1,7 +0,0 @@ -use std::io; - -fn main() { - let mut input = String::new(); - io::stdin().read_line(&mut input).unwrap(); - println!("{}", input); -} From 3e6822d7d9073d0dba7bb6794d092512d9c3fe47 Mon Sep 17 00:00:00 2001 From: MeiK Date: Tue, 30 Jul 2024 17:07:56 +0800 Subject: [PATCH 11/11] update --- README.md | 89 ------------------------------------------------------- 1 file changed, 89 deletions(-) diff --git a/README.md b/README.md index fac214d..170def1 100644 --- a/README.md +++ b/README.md @@ -1,90 +1 @@ # river - -## 用法 - -```bash -$ river -h -example: `river -vvv -- /usr/bin/echo hello world` - -Usage: river.exe [OPTIONS] -- ... - -Arguments: - ... Program to run and command line arguments - -Options: - -i, --input - Input stream. The default value is STDIN(0) - -o, --output - Output stream. The default value is STDOUT(1) - -e, --error - Error stream. The default value is STDERR(2) - -r, --result - Output location of the running result. The default value is STDOUT(1) - -t, --time-limit - Time limit, in ms. The default value is unlimited - -c, --cpu-time-limit - CPU Time limit, in ms. The default value is unlimited - -m, --memory-limit - Memory limit, in kib. The default value is unlimited - -v, --verbose... - Increase logging verbosity - -q, --quiet... - Decrease logging verbosity - -h, --help - Print help - -V, --version - Print version -``` - -**在 linux 环境下,需要额外安装 `runit`:** - -```shell -$ gcc resources/runit.s -o /usr/bin/runit -``` - -## 结果 - -结果的格式为 JSON - -| 字段 | 含义 | -|-----------------|--------------------| -| `time_used` | 程序运行用时 | -| `cpu_time_used` | 程序运行使用 CPU 时间 | -| `memory_used` | 程序运行使用内存 | -| `exit_code` | 程序退出 code,正常情况下为 0 | -| `status` | 正常情况下为 0 | -| `signal` | 正常情况下为 0 | - -## 系统支持 - -`~` 代表开发中的功能 - -| 特性 | Linux | Windows | macOS | -|-------------|-------|---------|-------| -| 执行指定命令 | ~ | √ | ~ | -| 流重定向 | ~ | √ | ~ | -| 运行时间统计 | ~ | √ | ~ | -| 运行 CPU 时间统计 | ~ | √ | ~ | -| 运行内存统计 | ~ | √ | ~ | -| 运行时间限制 | ~ | √ | ~ | -| 运行 CPU 时间限制 | ~ | ~ | ~ | -| 运行内存限制 | ~ | ~ | ~ | -| 获取进程退出状态 | ~ | ~ | ~ | -| 切换工作空间 | ~ | ~ | ~ | -| 传递环境变量 | ~ | ~ | ~ | -| 网络限制 | ~ | ~ | ~ | -| 写入文件大小限制 | ~ | ~ | ~ | -| 进程/线程数量限制 | ~ | ~ | ~ | -| 危险系统调用限制 | ~ | ~ | ~ | -| 执行用户权限限制 | ~ | ~ | ~ | -| 平滑退出 | ~ | ~ | ~ | - -**注意:** Windows 平台下运行 CPU 时间限制与运行内存限制不能保证精确,请不要以此为基准进行判断。 - -## 测试 - -```bash -cargo test -- --test-threads=1 -``` - -测试涉及文件操作,建议顺序执行测试用例(并发限制为 1)