Skip to content

Commit

Permalink
nvme_driver: add a sequence number to high cid bits (#186)
Browse files Browse the repository at this point in the history
Due to its use of a slab for tracking pending commands, the NVMe driver
aggressively reuses cids. This sometimes makes debugging difficult,
especially when correlating logs across multiple sources. It also
increases the chance that a NVMe device or driver bug will cause us to
incorrectly complete an in-flight IO.

To resolve this, use the high bits of the CID as a sequence number.
There are six unused bits, which is enough to at least get some
variability.
  • Loading branch information
jstarks authored Oct 25, 2024
1 parent 5cba1ca commit 145788f
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 12 deletions.
9 changes: 9 additions & 0 deletions support/inspect/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1617,6 +1617,15 @@ pub struct AsHex<T>(pub T);

hexbincount!(AsHex, into_hex, u8, u16, u32, u64, usize);

impl<T> Inspect for AsHex<Wrapping<T>>
where
for<'a> AsHex<&'a T>: Inspect,
{
fn inspect(&self, req: Request<'_>) {
Inspect::inspect(&AsHex(&self.0 .0), req)
}
}

impl<T: Clone> Inspect for AsHex<&'_ T>
where
AsHex<T>: Inspect,
Expand Down
81 changes: 69 additions & 12 deletions vm/devices/storage/disk_nvme/nvme_driver/src/queue_pair.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use pal_async::task::Task;
use safeatomic::AtomicSliceOps;
use slab::Slab;
use std::future::poll_fn;
use std::num::Wrapping;
use std::sync::Arc;
use std::task::Poll;
use thiserror::Error;
Expand Down Expand Up @@ -58,6 +59,59 @@ impl Inspect for QueuePair {
}
}

impl PendingCommands {
const CID_KEY_BITS: u32 = 10;
const CID_KEY_MASK: u16 = (1 << Self::CID_KEY_BITS) - 1;
const MAX_CIDS: usize = 1 << Self::CID_KEY_BITS;
const CID_SEQ_OFFSET: Wrapping<u16> = Wrapping(1 << Self::CID_KEY_BITS);

fn new() -> Self {
Self {
commands: Slab::new(),
next_cid_high_bits: Wrapping(0),
}
}

fn is_full(&self) -> bool {
self.commands.len() >= Self::MAX_CIDS
}

fn is_empty(&self) -> bool {
self.commands.is_empty()
}

/// Inserts a command into the pending list, updating it with a new CID.
fn insert(
&mut self,
command: &mut spec::Command,
respond: mesh::OneshotSender<spec::Completion>,
) {
let entry = self.commands.vacant_entry();
assert!(entry.key() < Self::MAX_CIDS);
assert_eq!(self.next_cid_high_bits % Self::CID_SEQ_OFFSET, Wrapping(0));
let cid = entry.key() as u16 | self.next_cid_high_bits.0;
self.next_cid_high_bits += Self::CID_SEQ_OFFSET;
command.cdw0.set_cid(cid);
entry.insert(PendingCommand {
command: *command,
respond,
});
}

fn remove(&mut self, cid: u16) -> mesh::OneshotSender<spec::Completion> {
let command = self
.commands
.try_remove((cid & Self::CID_KEY_MASK) as usize)
.expect("completion for unknown cid");
assert_eq!(
command.command.cdw0.cid(),
cid,
"cid sequence number mismatch"
);
command.respond
}
}

impl QueuePair {
pub const MAX_SQSIZE: u16 = (PAGE_SIZE / 64) as u16; // Maximum SQ size in entries.
pub const MAX_CQSIZE: u16 = (PAGE_SIZE / 16) as u16; // Maximum CQ size in entries.
Expand Down Expand Up @@ -87,8 +141,7 @@ impl QueuePair {
let mut queue_handler = QueueHandler {
sq,
cq,
commands: Slab::new(),
max_cids: 1024,
commands: PendingCommands::new(),
stats: Default::default(),
};
let task = spawner.spawn("nvme-queue", {
Expand Down Expand Up @@ -375,6 +428,15 @@ struct Prp<'a> {
_pages: Option<ScopedPages<'a>>,
}

#[derive(Inspect)]
struct PendingCommands {
/// Mapping from the low bits of cid to pending command.
#[inspect(iter_by_key)]
commands: Slab<PendingCommand>,
#[inspect(hex)]
next_cid_high_bits: Wrapping<u16>,
}

#[derive(Inspect)]
struct PendingCommand {
// Keep the command around for diagnostics.
Expand All @@ -392,10 +454,7 @@ enum Req {
struct QueueHandler {
sq: SubmissionQueue,
cq: CompletionQueue,
/// Mapping from cid to pending command.
#[inspect(iter_by_key)]
commands: Slab<PendingCommand>,
max_cids: usize,
commands: PendingCommands,
stats: QueueStats,
}

Expand All @@ -420,7 +479,7 @@ impl QueueHandler {
}

let event = poll_fn(|cx| {
if !self.sq.is_full() && self.commands.len() < self.max_cids {
if !self.sq.is_full() && !self.commands.is_full() {
if let Poll::Ready(Some(req)) = recv.poll_next_unpin(cx) {
return Event::Request(req).into();
}
Expand All @@ -443,19 +502,17 @@ impl QueueHandler {
match event {
Event::Request(req) => match req {
Req::Command(Rpc(mut command, respond)) => {
let entry = self.commands.vacant_entry();
command.cdw0.set_cid(entry.key() as u16);
entry.insert(PendingCommand { command, respond });
self.commands.insert(&mut command, respond);
self.sq.write(command).unwrap();
self.stats.issued.increment();
}
Req::Inspect(deferred) => deferred.inspect(&self),
},
Event::Completion(completion) => {
let command = self.commands.remove(completion.cid.into());
assert_eq!(completion.sqid, self.sq.id());
let respond = self.commands.remove(completion.cid);
self.sq.update_head(completion.sqhd);
command.respond.send(completion);
respond.send(completion);
self.stats.completed.increment();
}
}
Expand Down

0 comments on commit 145788f

Please sign in to comment.