From eda68e0b374265acfc201dd46fc314143ff57c6c Mon Sep 17 00:00:00 2001 From: Andrew Walbran Date: Tue, 26 Nov 2024 17:50:04 +0000 Subject: [PATCH] Add HypPciTransport. --- src/transport/pci.rs | 62 +++---- src/transport/x86_64.rs | 348 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 378 insertions(+), 32 deletions(-) diff --git a/src/transport/pci.rs b/src/transport/pci.rs index 7c6e6e94..130c7f1a 100644 --- a/src/transport/pci.rs +++ b/src/transport/pci.rs @@ -21,7 +21,7 @@ use core::{ use zerocopy::{FromBytes, Immutable, IntoBytes}; /// The PCI vendor ID for VirtIO devices. -const VIRTIO_VENDOR_ID: u16 = 0x1af4; +pub const VIRTIO_VENDOR_ID: u16 = 0x1af4; /// The offset to add to a VirtIO device ID to get the corresponding PCI device ID. const PCI_DEVICE_ID_OFFSET: u16 = 0x1040; @@ -35,24 +35,24 @@ const TRANSITIONAL_ENTROPY_SOURCE: u16 = 0x1005; const TRANSITIONAL_9P_TRANSPORT: u16 = 0x1009; /// The offset of the bar field within `virtio_pci_cap`. -const CAP_BAR_OFFSET: u8 = 4; +pub(crate) const CAP_BAR_OFFSET: u8 = 4; /// The offset of the offset field with `virtio_pci_cap`. -const CAP_BAR_OFFSET_OFFSET: u8 = 8; +pub(crate) const CAP_BAR_OFFSET_OFFSET: u8 = 8; /// The offset of the `length` field within `virtio_pci_cap`. -const CAP_LENGTH_OFFSET: u8 = 12; +pub(crate) const CAP_LENGTH_OFFSET: u8 = 12; /// The offset of the`notify_off_multiplier` field within `virtio_pci_notify_cap`. -const CAP_NOTIFY_OFF_MULTIPLIER_OFFSET: u8 = 16; +pub(crate) const CAP_NOTIFY_OFF_MULTIPLIER_OFFSET: u8 = 16; /// Common configuration. -const VIRTIO_PCI_CAP_COMMON_CFG: u8 = 1; +pub const VIRTIO_PCI_CAP_COMMON_CFG: u8 = 1; /// Notifications. -const VIRTIO_PCI_CAP_NOTIFY_CFG: u8 = 2; +pub const VIRTIO_PCI_CAP_NOTIFY_CFG: u8 = 2; /// ISR Status. -const VIRTIO_PCI_CAP_ISR_CFG: u8 = 3; +pub const VIRTIO_PCI_CAP_ISR_CFG: u8 = 3; /// Device specific configuration. -const VIRTIO_PCI_CAP_DEVICE_CFG: u8 = 4; +pub const VIRTIO_PCI_CAP_DEVICE_CFG: u8 = 4; -fn device_type(pci_device_id: u16) -> DeviceType { +pub(crate) fn device_type(pci_device_id: u16) -> DeviceType { match pci_device_id { TRANSITIONAL_NETWORK => DeviceType::Network, TRANSITIONAL_BLOCK => DeviceType::Block, @@ -389,34 +389,34 @@ impl Drop for PciTransport { /// `virtio_pci_common_cfg`, see 4.1.4.3 "Common configuration structure layout". #[repr(C)] -struct CommonCfg { - device_feature_select: Volatile, - device_feature: ReadOnly, - driver_feature_select: Volatile, - driver_feature: Volatile, - msix_config: Volatile, - num_queues: ReadOnly, - device_status: Volatile, - config_generation: ReadOnly, - queue_select: Volatile, - queue_size: Volatile, - queue_msix_vector: Volatile, - queue_enable: Volatile, - queue_notify_off: Volatile, - queue_desc: Volatile, - queue_driver: Volatile, - queue_device: Volatile, +pub(crate) struct CommonCfg { + pub device_feature_select: Volatile, + pub device_feature: ReadOnly, + pub driver_feature_select: Volatile, + pub driver_feature: Volatile, + pub msix_config: Volatile, + pub num_queues: ReadOnly, + pub device_status: Volatile, + pub config_generation: ReadOnly, + pub queue_select: Volatile, + pub queue_size: Volatile, + pub queue_msix_vector: Volatile, + pub queue_enable: Volatile, + pub queue_notify_off: Volatile, + pub queue_desc: Volatile, + pub queue_driver: Volatile, + pub queue_device: Volatile, } /// Information about a VirtIO structure within some BAR, as provided by a `virtio_pci_cap`. #[derive(Clone, Debug, Eq, PartialEq)] -struct VirtioCapabilityInfo { +pub(crate) struct VirtioCapabilityInfo { /// The bar in which the structure can be found. - bar: u8, + pub bar: u8, /// The offset within the bar. - offset: u32, + pub offset: u32, /// The length in bytes of the structure within the bar. - length: u32, + pub length: u32, } fn get_bar_region( diff --git a/src/transport/x86_64.rs b/src/transport/x86_64.rs index ceda094b..0f834d17 100644 --- a/src/transport/x86_64.rs +++ b/src/transport/x86_64.rs @@ -1,7 +1,21 @@ //! x86-64 specific transports. -use super::pci::bus::{Cam, ConfigurationAccess, DeviceFunction}; +use super::{ + pci::{ + bus::{Cam, ConfigurationAccess, DeviceFunction, PciRoot, PCI_CAP_ID_VNDR}, + device_type, CommonCfg, VirtioCapabilityInfo, VirtioPciError, CAP_BAR_OFFSET, + CAP_BAR_OFFSET_OFFSET, CAP_LENGTH_OFFSET, CAP_NOTIFY_OFF_MULTIPLIER_OFFSET, + VIRTIO_PCI_CAP_COMMON_CFG, VIRTIO_PCI_CAP_DEVICE_CFG, VIRTIO_PCI_CAP_ISR_CFG, + VIRTIO_PCI_CAP_NOTIFY_CFG, VIRTIO_VENDOR_ID, + }, + DeviceStatus, DeviceType, Transport, +}; +use crate::{ + hal::{Hal, PhysAddr}, + Error, +}; use core::arch::asm; +use zerocopy::{FromBytes, Immutable, IntoBytes}; /// This CPUID returns the signature and should be used to determine if VM is running under pKVM, /// KVM or not. See the Linux header `arch/x86/include/uapi/asm/kvm_para.h`. @@ -53,6 +67,296 @@ impl ConfigurationAccess for HypCam { } } +macro_rules! configread { + ($common_cfg:expr, $field:ident) => { + $common_cfg.read(core::mem::offset_of!(CommonCfg, $field)) + }; +} + +macro_rules! configwrite { + ($common_cfg:expr, $field:ident, $value:expr) => { + $common_cfg.write(core::mem::offset_of!(CommonCfg, $field), $value) + }; +} + +/// PCI transport for VirtIO using hypercalls implemented by the x86-64 pKVM hypervisor for IO BARs. +pub struct HypPciTransport { + device_type: DeviceType, + /// The bus, device and function identifier for the VirtIO device. + device_function: DeviceFunction, + /// The common configuration structure within some BAR. + common_cfg: HypIoRegion, + /// The start of the queue notification region within some BAR. + notify_region: HypIoRegion, + notify_off_multiplier: u32, + /// The ISR status register within some BAR. + isr_status: HypIoRegion, + /// The VirtIO device-specific configuration within some BAR. + config_space: Option, +} + +impl HypPciTransport { + /// Constructs a new x86-64 pKVM PCI VirtIO transport for the given device function on the given + /// PCI root controller. + pub fn new( + root: &mut PciRoot, + device_function: DeviceFunction, + ) -> Result { + let device_vendor = root.configuration_access.read_word(device_function, 0); + let device_id = (device_vendor >> 16) as u16; + let vendor_id = device_vendor as u16; + if vendor_id != VIRTIO_VENDOR_ID { + return Err(VirtioPciError::InvalidVendorId(vendor_id)); + } + let device_type = device_type(device_id); + + // Find the PCI capabilities we need. + let mut common_cfg = None; + let mut notify_cfg = None; + let mut notify_off_multiplier = 0; + let mut isr_cfg = None; + let mut device_cfg = None; + for capability in root.capabilities(device_function) { + if capability.id != PCI_CAP_ID_VNDR { + continue; + } + let cap_len = capability.private_header as u8; + let cfg_type = (capability.private_header >> 8) as u8; + if cap_len < 16 { + continue; + } + let struct_info = VirtioCapabilityInfo { + bar: root + .configuration_access + .read_word(device_function, capability.offset + CAP_BAR_OFFSET) + as u8, + offset: root + .configuration_access + .read_word(device_function, capability.offset + CAP_BAR_OFFSET_OFFSET), + length: root + .configuration_access + .read_word(device_function, capability.offset + CAP_LENGTH_OFFSET), + }; + + match cfg_type { + VIRTIO_PCI_CAP_COMMON_CFG if common_cfg.is_none() => { + common_cfg = Some(struct_info); + } + VIRTIO_PCI_CAP_NOTIFY_CFG if cap_len >= 20 && notify_cfg.is_none() => { + notify_cfg = Some(struct_info); + notify_off_multiplier = root.configuration_access.read_word( + device_function, + capability.offset + CAP_NOTIFY_OFF_MULTIPLIER_OFFSET, + ); + } + VIRTIO_PCI_CAP_ISR_CFG if isr_cfg.is_none() => { + isr_cfg = Some(struct_info); + } + VIRTIO_PCI_CAP_DEVICE_CFG if device_cfg.is_none() => { + device_cfg = Some(struct_info); + } + _ => {} + } + } + + let common_cfg = get_bar_region::( + root, + device_function, + &common_cfg.ok_or(VirtioPciError::MissingCommonConfig)?, + )?; + + let notify_cfg = notify_cfg.ok_or(VirtioPciError::MissingNotifyConfig)?; + if notify_off_multiplier % 2 != 0 { + return Err(VirtioPciError::InvalidNotifyOffMultiplier( + notify_off_multiplier, + )); + } + let notify_region = get_bar_region::(root, device_function, ¬ify_cfg)?; + + let isr_status = get_bar_region::( + root, + device_function, + &isr_cfg.ok_or(VirtioPciError::MissingIsrConfig)?, + )?; + + let config_space = if let Some(device_cfg) = device_cfg { + Some(get_bar_region::( + root, + device_function, + &device_cfg, + )?) + } else { + None + }; + + Ok(Self { + device_type, + device_function, + common_cfg, + notify_region, + notify_off_multiplier, + isr_status, + config_space, + }) + } +} + +impl Transport for HypPciTransport { + fn device_type(&self) -> DeviceType { + self.device_type + } + + fn read_device_features(&mut self) -> u64 { + configwrite!(self.common_cfg, device_feature_select, 0); + let device_features_low: u32 = configread!(self.common_cfg, device_feature); + configwrite!(self.common_cfg, device_feature_select, 1); + let device_features_high: u32 = configread!(self.common_cfg, device_feature); + (device_features_high as u64) << 32 | device_features_low as u64 + } + + fn write_driver_features(&mut self, driver_features: u64) { + configwrite!(self.common_cfg, driver_feature_select, 0); + configwrite!(self.common_cfg, driver_feature, driver_features as u32); + configwrite!(self.common_cfg, driver_feature_select, 1); + configwrite!( + self.common_cfg, + driver_feature, + (driver_features >> 32) as u32 + ); + } + + fn max_queue_size(&mut self, queue: u16) -> u32 { + configwrite!(self.common_cfg, queue_select, queue); + let queue_size: u16 = configread!(self.common_cfg, queue_size); + queue_size.into() + } + + fn notify(&mut self, queue: u16) { + configwrite!(self.common_cfg, queue_select, queue); + // TODO: Consider caching this somewhere (per queue). + let queue_notify_off: u16 = configread!(self.common_cfg, queue_notify_off); + + let offset_bytes = usize::from(queue_notify_off) * self.notify_off_multiplier as usize; + self.notify_region.write(offset_bytes, queue); + } + + fn get_status(&self) -> DeviceStatus { + let status: u8 = configread!(self.common_cfg, device_status); + DeviceStatus::from_bits_truncate(status.into()) + } + + fn set_status(&mut self, status: DeviceStatus) { + configwrite!(self.common_cfg, device_status, status.bits() as u8); + } + + fn set_guest_page_size(&mut self, _guest_page_size: u32) { + // No-op, the PCI transport doesn't care. + } + + fn requires_legacy_layout(&self) -> bool { + false + } + + fn queue_set( + &mut self, + queue: u16, + size: u32, + descriptors: PhysAddr, + driver_area: PhysAddr, + device_area: PhysAddr, + ) { + configwrite!(self.common_cfg, queue_select, queue); + configwrite!(self.common_cfg, queue_size, size as u16); + configwrite!(self.common_cfg, queue_desc, descriptors as u64); + configwrite!(self.common_cfg, queue_driver, driver_area as u64); + configwrite!(self.common_cfg, queue_device, device_area as u64); + configwrite!(self.common_cfg, queue_enable, 1); + } + + fn queue_unset(&mut self, _queue: u16) { + // The VirtIO spec doesn't allow queues to be unset once they have been set up for the PCI + // transport, so this is a no-op. + } + + fn queue_used(&mut self, queue: u16) -> bool { + configwrite!(self.common_cfg, queue_select, queue); + let queue_enable: u16 = configread!(self.common_cfg, queue_enable); + queue_enable == 1 + } + + fn ack_interrupt(&mut self) -> bool { + // Safe because the common config pointer is valid and we checked in get_bar_region that it + // was aligned. + // Reading the ISR status resets it to 0 and causes the device to de-assert the interrupt. + let isr_status: u8 = self.isr_status.read(0); + // TODO: Distinguish between queue interrupt and device configuration interrupt. + isr_status & 0x3 != 0 + } + + fn read_config_space(&self, offset: usize) -> Result { + assert!(align_of::() <= 4, + "Driver expected config space alignment of {} bytes, but VirtIO only guarantees 4 byte alignment.", + align_of::()); + assert_eq!(offset % align_of::(), 0); + + let config_space = self.config_space.ok_or(Error::ConfigSpaceMissing)?; + if config_space.size < offset + size_of::() { + Err(Error::ConfigSpaceTooSmall) + } else { + Ok(config_space.read(offset)) + } + } + + fn write_config_space( + &mut self, + offset: usize, + value: T, + ) -> Result<(), Error> { + assert!(align_of::() <= 4, + "Driver expected config space alignment of {} bytes, but VirtIO only guarantees 4 byte alignment.", + align_of::()); + assert_eq!(offset % align_of::(), 0); + + let config_space = self.config_space.ok_or(Error::ConfigSpaceMissing)?; + if config_space.size < offset + size_of::() { + Err(Error::ConfigSpaceTooSmall) + } else { + config_space.write(offset, value); + Ok(()) + } + } +} + +fn get_bar_region( + root: &mut PciRoot, + device_function: DeviceFunction, + struct_info: &VirtioCapabilityInfo, +) -> Result { + let bar_info = root.bar_info(device_function, struct_info.bar)?; + let (bar_address, bar_size) = bar_info + .memory_address_size() + .ok_or(VirtioPciError::UnexpectedIoBar)?; + if bar_address == 0 { + return Err(VirtioPciError::BarNotAllocated(struct_info.bar)); + } + if struct_info.offset + struct_info.length > bar_size + || size_of::() > struct_info.length as usize + { + return Err(VirtioPciError::BarOffsetOutOfRange); + } + let paddr = bar_address as PhysAddr + struct_info.offset as PhysAddr; + if paddr % align_of::() != 0 { + return Err(VirtioPciError::Misaligned { + address: paddr, + alignment: align_of::(), + }); + } + Ok(HypIoRegion { + paddr, + size: struct_info.length as usize, + }) +} + /// Gets the signature CPU ID. fn cpuid_signature() -> [u8; 4] { let signature: u32; @@ -122,3 +426,45 @@ fn hyp_io_write(address: usize, size: usize, data: u64) { ); } } + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct HypIoRegion { + pub paddr: usize, + pub size: usize, +} + +impl HypIoRegion { + pub const fn new(paddr: usize, size: usize) -> Self { + Self { paddr, size } + } + + fn read(self, offset: usize) -> T { + assert!(offset + size_of::() <= self.size); + let paddr = self.paddr + offset; + + let mut value = T::new_zeroed(); + for (i, chunk) in value + .as_mut_bytes() + .chunks_mut(HYP_IO_CHUNK_SIZE) + .enumerate() + { + let chunk_data = hyp_io_read(paddr + i * HYP_IO_CHUNK_SIZE, chunk.len()); + chunk.copy_from_slice(&chunk_data.as_bytes()[..chunk.len()]); + } + value + } + + fn write(self, offset: usize, data: T) { + assert!(offset + size_of::() <= self.size); + let paddr = self.paddr + offset; + + for (i, chunk) in data.as_bytes().chunks(HYP_IO_CHUNK_SIZE).enumerate() { + let mut chunk_data = 0; + chunk_data.as_mut_bytes()[..chunk.len()].copy_from_slice(chunk); + hyp_io_write(paddr + i * HYP_IO_CHUNK_SIZE, chunk.len(), chunk_data); + } + } +} + +/// The maximum number of bytes that can be read or written by a single IO hypercall. +const HYP_IO_CHUNK_SIZE: usize = 8;