oxidecomputer · cbiffle · Nov 25, 2024 · Nov 20, 2024 · hawkw · Nov 25, 2024
diff --git a/doc/kipc.adoc b/doc/kipc.adoc
@@ -290,6 +290,69 @@ returned by a call to `get_task_dump_region` for the specified task.
 A copy of the memory referred to by the specified region, starting
 at `base` and running for `size` bytes.
 
+=== `find_faulted_task` (8)
+
+Scans forward from a given task index searching for a faulted task. If a faulted
+task is found, returns its index. Otherwise, returns zero, the index of the
+supervisor task, which is by definition not faulted.
+
+To simplify supervisor implementations, the given task index may equal the
+number of tasks in the system. In this case, there are no tasks to search, and
+you always get zero back.
+
+We do _not_ permit greater indices than that, because that would strongly
+suggest a bug.
+
+==== Request
+
+[source,rust]
+----
+struct FindFaultedTaskRequest {
+    starting_index: u32,
+}
+----
+
+==== Preconditions
+
+The `starting_index` must be a valid index for this system, or one greater.
+
+==== Response
+
+[source,rust]
+----
+struct FindFaultedTaskResponse {
+    fault_index: u32,
+}
+----
+
+==== Notes
+
+This is intended for the common case of a supervisor scanning the task table in
+search of a fault. Compared to doing this manually with `read_task_status`,
+`find_faulted_task` has several advantages:
+
+1. Generates one kipc per failed task, plus one, rather than one kipc per
+   potentially failed task.
+
+2. Avoids serializing/transferring/deserializing the relatively complex
+   `TaskState` type, which supervisors rarely make use of in practice.
+
+3. Can be implemented in a way that doesn't potentially panic the supervisor.
+
+The "task ID or zero" return value is represented as an `Option<NonZeroUsize>`
+in the Rust API, so a typical use of this kipc looks like:
+
+[source,rust]
+----
+let mut next_task = 1; // skip supervisor
+while let Some(fault) = kipc::find_faulted_task(next_task) {
+    let fault = usize::from(fault);
+    // do things with the faulted task
+
+    next_task = fault + 1;
+}
+----
+
 == Receiving from the kernel
 
 The kernel never sends messages to tasks. It's simply not equipped to do so.

diff --git a/sys/abi/src/lib.rs b/sys/abi/src/lib.rs
@@ -490,6 +490,7 @@ pub enum Kipcnum {
     GetTaskDumpRegion = 6,
     ReadTaskDumpRegion = 7,
     SoftwareIrq = 8,
+    FindFaultedTask = 9,
 }
 
 impl core::convert::TryFrom<u16> for Kipcnum {
@@ -505,6 +506,7 @@ impl core::convert::TryFrom<u16> for Kipcnum {
             6 => Ok(Self::GetTaskDumpRegion),
             7 => Ok(Self::ReadTaskDumpRegion),
             8 => Ok(Self::SoftwareIrq),
+            9 => Ok(Self::FindFaultedTask),
             _ => Err(()),
         }
     }

diff --git a/sys/kern/src/kipc.rs b/sys/kern/src/kipc.rs
@@ -39,6 +39,9 @@ pub fn handle_kernel_message(
             read_task_dump_region(tasks, caller, args.message?, args.response?)
         }
         Ok(Kipcnum::SoftwareIrq) => software_irq(tasks, caller, args.message?),
+        Ok(Kipcnum::FindFaultedTask) => {
+            find_faulted_task(tasks, caller, args.message?, args.response?)
+        }
 
         _ => {
             // Task has sent an unknown message to the kernel. That's bad.
@@ -465,3 +468,38 @@ fn software_irq(
     tasks[caller].save_mut().set_send_response_and_length(0, 0);
     Ok(NextTask::Same)
 }
+
+fn find_faulted_task(
+    tasks: &mut [Task],
+    caller: usize,
+    message: USlice<u8>,
+    response: USlice<u8>,
+) -> Result<NextTask, UserError> {
+    if caller != 0 {
+        return Err(UserError::Unrecoverable(FaultInfo::SyscallUsage(
+            UsageError::NotSupervisor,
+        )));
+    }
+
+    let index = deserialize_message::<u32>(&tasks[caller], message)? as usize;
+
+    // Note: we explicitly permit index == tasks.len(), which causes us to wrap
+    // and end the search.
+    if index > tasks.len() {
+        return Err(UserError::Unrecoverable(FaultInfo::SyscallUsage(
+            UsageError::TaskOutOfRange,
+        )));
+    }
+    let i = tasks[index..]
+        .iter()
+        .position(|task| matches!(task.state(), TaskState::Faulted { .. }))
+        .map(|i| i + index)
+        .unwrap_or(0);
+
+    let response_len =
+        serialize_response(&mut tasks[caller], response, &(i as u32))?;
+    tasks[caller]
+        .save_mut()
+        .set_send_response_and_length(0, response_len);
+    Ok(NextTask::Same)
+}
diff --git a/sys/userlib/src/kipc.rs b/sys/userlib/src/kipc.rs
@@ -4,6 +4,8 @@
 
 //! Operations implemented by IPC with the kernel task.
 
+use core::num::NonZeroUsize;
+
 use abi::{Kipcnum, TaskId};
 use zerocopy::AsBytes;
 
@@ -24,6 +26,33 @@ pub fn read_task_status(task: usize) -> abi::TaskState {
     ssmarshal::deserialize(&response[..len]).unwrap_lite().0
 }
 
+/// Scans forward from index `task` looking for a task in faulted state.
+///
+/// If no tasks at `task` or greater indices are faulted, this returns `None`.
+///
+/// If a faulted task at index `i` is found, returns `Some(i)`.
+///
+/// `task` may equal the number of tasks in the system (i.e. a one-past-the-end
+/// index). In that case, this returns `None` every time. Larger values will get
+/// you killed.
+///
+/// The return value is a `NonZeroUsize` because this can't ever return zero,
+/// since that would mean the supervisor (presumably the caller of this
+/// function!) is in faulted state.
+pub fn find_faulted_task(task: usize) -> Option<NonZeroUsize> {
+    // Coerce `task` to a known size (Rust doesn't assume that usize == u32)
+    let task = task as u32;
+    let mut response = 0_u32;
+    let (_, _) = sys_send(
+        TaskId::KERNEL,
+        Kipcnum::FindFaultedTask as u16,
+        task.as_bytes(),
+        response.as_bytes_mut(),
+        &[],
+    );
+    NonZeroUsize::new(response as usize)
+}
+
 pub fn get_task_dump_region(
     task: usize,
     region: usize,

diff --git a/task/jefe/src/main.rs b/task/jefe/src/main.rs
@@ -307,37 +307,36 @@ impl idol_runtime::NotificationHandler for ServerImpl<'_> {
             // one task to have faulted since we last looked, but it's somewhat
             // unlikely since a fault causes us to immediately preempt. In any
             // case, let's assume we might have to handle multiple tasks.
-            //
-            // TODO: it would be fantastic to have a way of finding this out in
-            // one syscall.
-            for (i, status) in self.task_states.iter_mut().enumerate() {
+            let mut next_task = 1;
+            while let Some(fault_index) = kipc::find_faulted_task(next_task) {
+                let fault_index = usize::from(fault_index);
+                next_task = fault_index + 1;
+
+                let status = &mut self.task_states[fault_index];
+
                 // If we're aware that this task is in a fault state, don't
                 // bother making a syscall to enquire.
                 if status.holding_fault {
                     continue;
                 }
 
-                if let abi::TaskState::Faulted { .. } =
-                    kipc::read_task_status(i)
+                #[cfg(feature = "dump")]
                 {
-                    #[cfg(feature = "dump")]
-                    {
-                        // We'll ignore the result of dumping; it could fail
-                        // if we're out of space, but we don't have a way of
-                        // dealing with that right now.
-                        //
-                        // TODO: some kind of circular buffer?
-                        _ = dump::dump_task(self.dump_areas, i);
-                    }
-
-                    if status.disposition == Disposition::Restart {
-                        // Stand it back up
-                        kipc::restart_task(i, true);
-                    } else {
-                        // Mark this one off so we don't revisit it until
-                        // requested.
-                        status.holding_fault = true;
-                    }
+                    // We'll ignore the result of dumping; it could fail
+                    // if we're out of space, but we don't have a way of
+                    // dealing with that right now.
+                    //
+                    // TODO: some kind of circular buffer?
+                    _ = dump::dump_task(self.dump_areas, fault_index);
+                }
+
+                if status.disposition == Disposition::Restart {
+                    // Stand it back up
+                    kipc::restart_task(fault_index, true);
+                } else {
+                    // Mark this one off so we don't revisit it until
+                    // requested.
+                    status.holding_fault = true;
                 }
             }
         }