From 112c6754edd3bfb1c467ecf711e896757bb4ca8b Mon Sep 17 00:00:00 2001 From: "Cliff L. Biffle" Date: Fri, 22 Nov 2024 10:19:47 -0800 Subject: [PATCH] kern: suppress phantom SVCs on ARMv6-M You might recognize this as #1134. Well, it's back. My fix for the problem on ARMv6-M was apparently never quite correct on M0+, because the core register we were relying on to detect and cancel a pending SVC -- SHCSR -- is _implemented_ but _only visible from the debugger._ So when I tried it by hand in openocd, it worked... but from the kernel it silently fails. ARM, you are not making friends with stuff like this. I've worked out a different way to _detect_ the "fault during SVC" case using the always-implemented ICSR register. That register doesn't provide any way to _cancel_ the SVC, so I've added conditional code to the syscall entry path that can do it. Fixes #1928. --- app/donglet/app-g031.toml | 2 +- sys/kern/build.rs | 6 ++++ sys/kern/src/arch/arm_m.rs | 49 ++++++++++++++++++++++++-------- sys/kern/src/syscalls.rs | 23 +++++++++++++++ test/tests-stm32g0/app-g070.toml | 2 +- 5 files changed, 68 insertions(+), 14 deletions(-) diff --git a/app/donglet/app-g031.toml b/app/donglet/app-g031.toml index 57c8dab432..94f077e58b 100644 --- a/app/donglet/app-g031.toml +++ b/app/donglet/app-g031.toml @@ -6,7 +6,7 @@ board = "donglet-g031" [kernel] name = "app-donglet" -requires = {flash = 19168, ram = 1820} +requires = {flash = 19168, ram = 1824} features = ["g031"] stacksize = 936 diff --git a/sys/kern/build.rs b/sys/kern/build.rs index f5c6f0d185..230b60a506 100644 --- a/sys/kern/build.rs +++ b/sys/kern/build.rs @@ -17,6 +17,12 @@ use proc_macro2::TokenStream; fn main() -> Result<()> { build_util::expose_m_profile()?; + println!("cargo::rustc-check-cfg=cfg(hubris_phantom_svc_mitigation)"); + if build_util::target().starts_with("thumbv6m") { + // Force SVC checks on for v6-M. + println!("cargo:rustc-cfg=hubris_phantom_svc_mitigation"); + } + let g = process_config()?; generate_statics(&g)?; diff --git a/sys/kern/src/arch/arm_m.rs b/sys/kern/src/arch/arm_m.rs index 37b9dbca5f..eef3c9063a 100644 --- a/sys/kern/src/arch/arm_m.rs +++ b/sys/kern/src/arch/arm_m.rs @@ -1409,20 +1409,45 @@ unsafe extern "C" fn handle_fault(task: *mut task::Task) { // generate a spurious SVC. Even in the best of cases, this breaks the // supervisor. // - // SVCALLPENDED is in the System Handler Control and State Register, bit 15, - // and we need to clear its bit. We do this unconditionally because it - // doesn't hurt, and it's slightly faster/smaller. + // It would be super great if there were, say, a register in the System + // Control Block that would tell us that SVC is pended, wouldn't it? Perhaps + // it could be called the System Handler Control and State Register. In + // fact, if you read the ARMv6-M ARM, you will find a register with such a + // name, and might be tempted to use it! // - // (If you're comparing this to the ARMv7/v8-M equivalent, note that v6-M - // lacks the usage/mem/bus faults present in v7/8.) + // BEWARE. // - // Safety: the cortex-m crate makes all these registers blanket-unsafe - // without documenting the required preconditions. From the ARMv7-M spec, we - // can infer that the main risk here is if SVC were higher priority than - // this handler, which it is not. - unsafe { - let scb = &*cortex_m::peripheral::SCB::PTR; - scb.shcsr.modify(|bits| bits & !(1 << 15)); + // In a _different section_ of that manual, there is a throwaway footnote + // that reads: + // + // > The DWT, BPU, ROM table, DCB, and the SHCSR and DFSR registers are + // > accessible through the DAP interface. Access from the processor is + // > IMPLEMENTATION DEFINED. + // + // On the Cortex-M0+, this very attractive register works great from the + // debugger but _reads as zero from the kernel._ Ugh. + // + // Instead, we are using the always-active ICSR register, which lets us + // _detect_ the pending SVC _but not clear it._ To clear it, we use the + // mitigation mechanism defined over in syscalls.rs. + // + // The case where an SVC is pending in ICSR uniquely identifies a task + // having faulted during SVC, because a hardfault _in the kernel_ during + // processing of an SVC would not have made it here (see above). + { + let scb = unsafe { &*cortex_m::peripheral::SCB::PTR }; + let icsr = scb.icsr.read(); + // VECTPENDING is 9 bits, so, why are we casting it to a u8? Because + // this code is ARMv6-M specific, and ARMv6-M is architecturally + // specified as having no more than 32 interrupts (plus 16 exceptions). + let vectpending = (icsr >> 12) as u8; + + // If we're in a hardfault (which we know, because it's the only fault + // on ARMv6M and we are in a fault handler) and an SVC is pending... + if vectpending == 11 { + crate::syscalls::EXPECT_PHANTOM_SYSCALL + .store(true, core::sync::atomic::Ordering::Relaxed); + } } // ARMv6-M, to reduce complexity, does not distinguish fault causes. diff --git a/sys/kern/src/syscalls.rs b/sys/kern/src/syscalls.rs index 5137e5619e..52517ef637 100644 --- a/sys/kern/src/syscalls.rs +++ b/sys/kern/src/syscalls.rs @@ -27,6 +27,9 @@ //! struct* type to make this easy and safe, e.g. `task.save().as_send_args()`. //! See the `task::ArchState` trait for details. +#[cfg(hubris_phantom_svc_mitigation)] +use core::sync::atomic::{AtomicBool, Ordering}; + use abi::{ FaultInfo, IrqStatus, LeaseAttributes, SchedState, Sysnum, TaskId, TaskState, ULease, UsageError, @@ -40,6 +43,9 @@ use crate::task::{self, current_id, ArchState, NextTask, Task}; use crate::time::Timestamp; use crate::umem::{safe_copy, USlice}; +#[cfg(hubris_phantom_svc_mitigation)] +pub(crate) static EXPECT_PHANTOM_SYSCALL: AtomicBool = AtomicBool::new(false); + /// Entry point accessed by arch-specific syscall entry sequence. /// /// Before calling this, task volatile state (e.g. callee-save registers on ARM) @@ -69,6 +75,23 @@ pub unsafe extern "C" fn syscall_entry(nr: u32, task: *mut Task) { }; with_task_table(|tasks| { + // On certain architectures we risk receiving "phantom SVCs" assigned to + // tasks that are not, in fact, making system calls. As of this writing, + // this can occur on ARMv6-M (we believe we have fixed it on later ARM + // profiles). + // + // The only foolproof way of detecting this situation is to notice that + // the task that is _allegedly_ generating a syscall _is already blocked + // in a syscall._ + #[cfg(hubris_phantom_svc_mitigation)] + { + use crate::atomic::AtomicExt; + if EXPECT_PHANTOM_SYSCALL.swap_polyfill(false, Ordering::Relaxed) { + // Ignore it. Make no changes to state. + return; + } + } + match safe_syscall_entry(nr, idx, tasks) { // If we're returning to the same task, we're done! NextTask::Same => (), diff --git a/test/tests-stm32g0/app-g070.toml b/test/tests-stm32g0/app-g070.toml index 660cfc31f6..88d9a8e009 100644 --- a/test/tests-stm32g0/app-g070.toml +++ b/test/tests-stm32g0/app-g070.toml @@ -6,7 +6,7 @@ memory = "memory-g070.toml" [kernel] name = "demo-stm32g0-nucleo" -requires = {flash = 19112, ram = 2828} +requires = {flash = 19112, ram = 2832} features = ["g070"] stacksize = 2048