diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 4b6069409521..e4b9899cb20c 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -8,6 +8,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "1.0.5" @@ -47,6 +58,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f" +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "async-broadcast" version = "0.5.1" @@ -246,6 +263,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -270,6 +299,51 @@ dependencies = [ "log", ] +[[package]] +name = "borsh" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4114279215a005bc675e386011e594e1d9b800918cea18fcadadcce864a2046b" +dependencies = [ + "borsh-derive", + "hashbrown", +] + +[[package]] +name = "borsh-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0754613691538d51f329cce9af41d7b7ca150bc973056f1156611489475f54f7" +dependencies = [ + "borsh-derive-internal", + "borsh-schema-derive-internal", + "proc-macro-crate 0.1.5", + "proc-macro2", + "syn 1.0.109", +] + +[[package]] +name = "borsh-derive-internal" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afb438156919598d2c7bad7e1c0adf3d26ed3840dbc010db1a882a65583ca2fb" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "borsh-schema-derive-internal" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634205cc43f74a1b9046ef87c4540ebda95696ec0f315024860cad7c5b0f5ccd" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "bumpalo" version = "3.10.0" @@ -278,9 +352,36 @@ checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" [[package]] name = "byte-unit" -version = "3.1.4" +version = "5.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "415301c9de11005d4b92193c0eb7ac7adc37e5a49e0ac9bed0a42343512744b8" +checksum = "d405b41420a161b4e1dd5a52e3349f41b4dae9a39be02aff1d67fe53256430ac" +dependencies = [ + "rust_decimal", + "serde", + "utf8-width", +] + +[[package]] +name = "bytecheck" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6372023ac861f6e6dc89c8344a8f398fb42aaba2b5dbc649ca0c0e9dbcb627" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7ec4c6f261935ad534c0c22dbef2201b45918860eb1c574b972bd213a76af61" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] [[package]] name = "byteorder" @@ -488,11 +589,33 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e3681d554572a651dda4186cd47240627c3d0114d45a95f6ad27f2f22e7548d" +dependencies = [ + "autocfg", + "cfg-if 1.0.0", + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" -version = "0.8.16" +version = "0.8.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +checksum = "c3a430a770ebd84726f584a90ee7f020d28db52c6d02138900f22341f866d39c" dependencies = [ "cfg-if 1.0.0", ] @@ -735,6 +858,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.3.21" @@ -890,6 +1019,9 @@ name = "hashbrown" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" +dependencies = [ + "ahash", +] [[package]] name = "heck" @@ -1229,6 +1361,7 @@ dependencies = [ "serde_json", "slog", "slog-scope", + "sysinfo", "thiserror", "toml", ] @@ -1241,9 +1374,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.139" +version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" [[package]] name = "libseccomp" @@ -1519,6 +1652,15 @@ dependencies = [ "memoffset 0.7.1", ] +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -1816,6 +1958,15 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" +[[package]] +name = "proc-macro-crate" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d6ea3c4595b96363c13943497db34af4460fb474a95c43f4446ad341b8c9785" +dependencies = [ + "toml", +] + [[package]] name = "proc-macro-crate" version = "1.2.1" @@ -2022,6 +2173,26 @@ dependencies = [ "ttrpc-codegen", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "quote" version = "1.0.27" @@ -2031,6 +2202,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -2061,6 +2238,26 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rayon" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.2.13" @@ -2134,6 +2331,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "rend" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2571463863a6bd50c32f94402933f03457a3fbaf697a707c5be741e459f08fd" +dependencies = [ + "bytecheck", +] + [[package]] name = "reqwest" version = "0.11.18" @@ -2171,6 +2377,34 @@ dependencies = [ "winreg", ] +[[package]] +name = "rkyv" +version = "0.7.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0200c8230b013893c0b2d6213d6ec64ed2b9be2e0e016682b7224ff82cff5c58" +dependencies = [ + "bitvec", + "bytecheck", + "hashbrown", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5c462a1328c8e67e4d6dbad1eb0355dd43e8ab432c6e227a43657f16ade5033" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "rlimit" version = "0.5.4" @@ -2195,6 +2429,22 @@ dependencies = [ "tokio", ] +[[package]] +name = "rust_decimal" +version = "1.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c4216490d5a413bc6d10fa4742bd7d4955941d062c0ef873141d6b0e7b30fd" +dependencies = [ + "arrayvec", + "borsh", + "bytes 1.1.0", + "num-traits", + "rand", + "rkyv", + "serde", + "serde_json", +] + [[package]] name = "rustix" version = "0.37.3" @@ -2291,6 +2541,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "security-framework" version = "2.9.2" @@ -2459,6 +2715,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "slab" version = "0.4.6" @@ -2596,12 +2858,33 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sysinfo" +version = "0.29.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" +dependencies = [ + "cfg-if 1.0.0", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "rayon", + "winapi", +] + [[package]] name = "take_mut" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.3.0" @@ -3024,6 +3307,18 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf8-width" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" + +[[package]] +name = "uuid" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" + [[package]] name = "valuable" version = "0.1.0" @@ -3407,6 +3702,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "xattr" version = "0.2.3" @@ -3473,7 +3777,7 @@ version = "3.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d1794a946878c0e807f55a397187c11fc7a038ba5d868e7db4f3bd7760bc9d" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 1.2.1", "proc-macro2", "quote", "regex", @@ -3512,7 +3816,7 @@ version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "934d7a7dfc310d6ee06c87ffe88ef4eca7d3e37bb251dece2ef93da8f17d8ecd" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 1.2.1", "proc-macro2", "quote", "syn 1.0.109", diff --git a/src/dragonball/Cargo.lock b/src/dragonball/Cargo.lock index 7915c83cd48f..bb758d1f85fd 100644 --- a/src/dragonball/Cargo.lock +++ b/src/dragonball/Cargo.lock @@ -344,6 +344,26 @@ dependencies = [ "vmm-sys-util", ] +[[package]] +name = "dbs-pci" +version = "0.1.0" +dependencies = [ + "byteorder", + "dbs-allocator", + "dbs-boot", + "dbs-device", + "dbs-interrupt", + "downcast-rs", + "kvm-bindings", + "kvm-ioctls", + "libc", + "log", + "thiserror", + "vfio-bindings", + "vfio-ioctls", + "vm-memory", +] + [[package]] name = "dbs-upcall" version = "0.3.0" @@ -398,6 +418,7 @@ dependencies = [ "serde_json", "thiserror", "threadpool", + "timerfd", "vhost", "virtio-bindings", "virtio-queue", @@ -454,6 +475,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "downcast-rs" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" + [[package]] name = "dragonball" version = "0.1.0" @@ -469,6 +496,7 @@ dependencies = [ "dbs-device", "dbs-interrupt", "dbs-legacy-devices", + "dbs-pci", "dbs-upcall", "dbs-utils", "dbs-virtio-devices", @@ -494,6 +522,8 @@ dependencies = [ "test-utils", "thiserror", "tracing", + "vfio-bindings", + "vfio-ioctls", "virtio-queue", "vm-memory", "vmm-sys-util", @@ -2074,6 +2104,29 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vfio-bindings" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43449b404c488f70507dca193debd4bea361fe8089869b947adc19720e464bce" + +[[package]] +name = "vfio-ioctls" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "068bac78842164a8ecc1d1a84a8d8a9168ab29fa3c96942689e286a30ae22ac4" +dependencies = [ + "byteorder", + "kvm-bindings", + "kvm-ioctls", + "libc", + "log", + "thiserror", + "vfio-bindings", + "vm-memory", + "vmm-sys-util", +] + [[package]] name = "vhost" version = "0.6.1" diff --git a/src/dragonball/Cargo.toml b/src/dragonball/Cargo.toml index ca41c613fab2..3b45ca4dc1cf 100644 --- a/src/dragonball/Cargo.toml +++ b/src/dragonball/Cargo.toml @@ -25,6 +25,7 @@ dbs-utils = { path = "./src/dbs_utils" } dbs-virtio-devices = { path = "./src/dbs_virtio_devices", optional = true, features = [ "virtio-mmio", ] } +dbs-pci = { path = "./src/dbs_pci", optional = true } derivative = "2.2.0" kvm-bindings = "0.6.0" kvm-ioctls = "0.12.0" @@ -48,6 +49,8 @@ virtio-queue = { version = "0.7.0", optional = true } vm-memory = { version = "0.10.0", features = ["backend-mmap"] } crossbeam-channel = "0.5.6" fuse-backend-rs = "0.10.5" +vfio-bindings = { version = "0.3.0", optional = true } +vfio-ioctls = { version = "0.1.0", optional = true } [dev-dependencies] slog-async = "2.7.0" @@ -77,3 +80,4 @@ vhost-net = ["dbs-virtio-devices/vhost-net"] vhost-user-fs = ["dbs-virtio-devices/vhost-user-fs"] vhost-user-net = ["dbs-virtio-devices/vhost-user-net"] vhost-user-blk = ["dbs-virtio-devices/vhost-user-blk"] +host-device = ["dep:vfio-bindings", "dep:vfio-ioctls", "dep:dbs-pci"] diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 0d2f678ed033..586b998cbce2 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -9,13 +9,14 @@ use std::fs::File; use std::sync::{Arc, Mutex}; -use crossbeam_channel::{Receiver, Sender, TryRecvError}; +use crossbeam_channel::{unbounded, Receiver, Sender, TryRecvError}; use log::{debug, error, info, warn}; use tracing::instrument; use crate::error::{Result, StartMicroVmError, StopMicrovmError}; use crate::event_manager::EventManager; use crate::tracer::{DragonballTracer, TraceError, TraceInfo}; +use crate::vcpu::VcpuManagerError; use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo}; use crate::vmm::Vmm; @@ -36,6 +37,8 @@ pub use crate::device_manager::fs_dev_mgr::{ }; #[cfg(feature = "virtio-mem")] pub use crate::device_manager::mem_dev_mgr::{MemDeviceConfigInfo, MemDeviceError}; +#[cfg(feature = "host-device")] +use crate::device_manager::vfio_dev_mgr::{HostDeviceConfig, VfioDeviceError}; #[cfg(feature = "vhost-net")] pub use crate::device_manager::vhost_net_dev_mgr::{ VhostNetDeviceConfigInfo, VhostNetDeviceError, VhostNetDeviceMgr, @@ -148,11 +151,20 @@ pub enum VmmActionError { /// End tracing Failed. #[error("End tracing failed: {0}")] EndTracingFailed(#[source] TraceError), + + #[cfg(feature = "host-device")] + /// The action `InsertHostDevice` failed either because of bad user input or an internal error. + #[error("failed to add VFIO passthrough device: {0:?}")] + HostDeviceConfig(#[source] VfioDeviceError), + #[cfg(feature = "host-device")] + /// The action 'RemoveHostDevice' failed because of vcpu manager internal error. + #[error("remove host device error: {0}")] + RemoveHostDevice(#[source] VcpuManagerError), } /// This enum represents the public interface of the VMM. Each action contains various /// bits of information (ids, paths, etc.). -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub enum VmmAction { /// Configure the boot source of the microVM using `BootSourceConfig`. /// This action can only be called before the microVM has booted. @@ -245,6 +257,18 @@ pub enum VmmAction { /// Add a new balloon device or update one that already exists using the `BalloonDeviceConfig` /// as input. InsertBalloonDevice(BalloonDeviceConfigInfo), + + #[cfg(feature = "host-device")] + /// Add a VFIO assignment host device or update that already exists + InsertHostDevice(HostDeviceConfig), + + #[cfg(feature = "host-device")] + /// Prepare to remove a VFIO assignment host device that already exists + PrepareRemoveHostDevice(String), + + #[cfg(feature = "host-device")] + /// Add a VFIO assignment host device or update that already exists + RemoveHostDevice(String), } /// The enum represents the response sent by the VMM in case of success. The response is either @@ -257,6 +281,8 @@ pub enum VmmData { MachineConfiguration(Box), /// Prometheus Metrics represented by String. HypervisorMetrics(String), + /// Sync Hotplug + SyncHotplug((Sender>, Receiver>)), } /// Request data type used to communicate between the API and the VMM. @@ -371,6 +397,14 @@ impl VmmService { VmmAction::InsertBalloonDevice(balloon_cfg) => { self.add_balloon_device(vmm, event_mgr, balloon_cfg) } + #[cfg(feature = "host-device")] + VmmAction::InsertHostDevice(hostdev_cfg) => self.add_vfio_device(vmm, hostdev_cfg), + #[cfg(feature = "host-device")] + VmmAction::PrepareRemoveHostDevice(hostdev_id) => { + self.prepare_remove_vfio_device(vmm, &hostdev_id) + } + #[cfg(feature = "host-device")] + VmmAction::RemoveHostDevice(hostdev_cfg) => self.remove_vfio_device(vmm, &hostdev_cfg), }; debug!("send vmm response: {:?}", response); @@ -539,6 +573,8 @@ impl VmmService { // - Some(path), legacy_manager will create_socket_console on that path. config.serial_path = machine_config.serial_path; + config.pci_hotplug_enabled = machine_config.pci_hotplug_enabled; + vm.set_vm_config(config.clone()); self.machine_config = config; @@ -813,6 +849,101 @@ impl VmmService { .map_err(VmmActionError::FsDevice) } + #[cfg(feature = "host-device")] + fn add_vfio_device(&self, vmm: &mut Vmm, config: HostDeviceConfig) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig( + VfioDeviceError::InvalidVMID, + ))?; + info!("add_vfio_device: {:?}", config); + + let mut ctx = vm.create_device_op_context(None).map_err(|e| { + info!("create device op context error: {:?}", e); + if let StartMicroVmError::MicroVMAlreadyRunning = e { + VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot) + } else if let StartMicroVmError::UpcallServerNotReady = e { + VmmActionError::UpcallServerNotReady + } else { + VmmActionError::StartMicroVm(e) + } + })?; + + vm.device_manager() + .vfio_manager + .lock() + .unwrap() + .insert_device(&mut ctx, config) + .map_err(VmmActionError::HostDeviceConfig)?; + Ok(VmmData::Empty) + } + + // using upcall to unplug the pci device in the guest + #[cfg(feature = "host-device")] + fn prepare_remove_vfio_device(&mut self, vmm: &mut Vmm, hostdev_id: &str) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig( + VfioDeviceError::InvalidVMID, + ))?; + + info!("prepare_remove_vfio_device: {:?}", hostdev_id); + let ctx = vm.create_device_op_context(None).map_err(|e| { + info!("create device op context error: {:?}", e); + if let StartMicroVmError::MicroVMAlreadyRunning = e { + VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot) + } else if let StartMicroVmError::UpcallServerNotReady = e { + VmmActionError::UpcallServerNotReady + } else { + VmmActionError::StartMicroVm(e) + } + })?; + + let (sender, receiver) = unbounded(); + + // It is safe because we don't expect poison lock. + let vfio_manager = vm.device_manager.vfio_manager.lock().unwrap(); + + vfio_manager + .prepare_remove_device(&ctx, hostdev_id, sender.clone()) + .map(|_| VmmData::SyncHotplug((sender, receiver))) + .map_err(VmmActionError::HostDeviceConfig) + } + + #[cfg(feature = "host-device")] + fn remove_vfio_device(&self, vmm: &mut Vmm, hostdev_id: &str) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig( + VfioDeviceError::InvalidVMID, + ))?; + + info!("remove_vfio_device: {:?}", hostdev_id); + let mut ctx = vm.create_device_op_context(None).map_err(|e| { + info!("create device op context error: {:?}", e); + if let StartMicroVmError::MicroVMAlreadyRunning = e { + VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot) + } else if let StartMicroVmError::UpcallServerNotReady = e { + VmmActionError::UpcallServerNotReady + } else { + VmmActionError::StartMicroVm(e) + } + })?; + + // It is safe because we don't expect poison lock. + let mut vfio_manager = vm.device_manager.vfio_manager.lock().unwrap(); + + vfio_manager + .remove_device(&mut ctx, hostdev_id) + .map_err(VmmActionError::HostDeviceConfig)?; + + // we need to revalidate io_manager cache in all vcpus + // in order to drop old io_manager and close device's fd + vm.vcpu_manager() + .map_err(VmmActionError::RemoveHostDevice)? + .revalidate_all_vcpus_cache() + .map_err(VmmActionError::RemoveHostDevice)?; + + // FIXME: we should clear corresponding information because vfio module in + // host kernel will clear iommu table in this scenario. + + Ok(VmmData::Empty) + } + #[cfg(feature = "hotplug")] #[instrument(skip(self))] fn resize_vcpu(&mut self, vmm: &mut Vmm, config: VcpuResizeInfo) -> VmmRequestResult { diff --git a/src/dragonball/src/dbs_device/src/resources.rs b/src/dragonball/src/dbs_device/src/resources.rs index e87b0fe87453..b834c16c3e6a 100644 --- a/src/dragonball/src/dbs_device/src/resources.rs +++ b/src/dragonball/src/dbs_device/src/resources.rs @@ -202,7 +202,7 @@ pub enum Resource { size: u32, }, /// Network Interface Card MAC address. - MacAddresss(String), + MacAddress(String), /// KVM memslot index. KvmMemSlot(u32), } @@ -310,7 +310,7 @@ impl DeviceResources { /// Get the first resource information for NIC MAC address. pub fn get_mac_address(&self) -> Option { for entry in self.0.iter().as_ref() { - if let Resource::MacAddresss(addr) = entry { + if let Resource::MacAddress(addr) = entry { return Some(addr.clone()); } } @@ -403,7 +403,7 @@ pub(crate) mod tests { resource.append(entry.clone()); assert_eq!(entry, resource[6]); - let entry = Resource::MacAddresss(MAC_ADDRESS.to_string()); + let entry = Resource::MacAddress(MAC_ADDRESS.to_string()); resource.append(entry.clone()); assert_eq!(entry, resource[7]); diff --git a/src/dragonball/src/dbs_pci/src/lib.rs b/src/dragonball/src/dbs_pci/src/lib.rs index 92ef8b4eac65..625682c1f19d 100644 --- a/src/dragonball/src/dbs_pci/src/lib.rs +++ b/src/dragonball/src/dbs_pci/src/lib.rs @@ -40,6 +40,8 @@ pub use configuration::{ mod device; pub use device::PciDevice; +#[cfg(target_arch = "aarch64")] +pub use device::{PciBusResources, ECAM_SPACE_LENGTH}; mod root_bus; pub use root_bus::create_pci_root_bus; @@ -54,6 +56,7 @@ mod msix; pub use msix::{MsixCap, MsixState, MSIX_TABLE_ENTRY_SIZE}; mod vfio; +pub use vfio::{VfioPciDevice, VfioPciError, VENDOR_NVIDIA}; /// Error codes related to PCI root/bus/device operations. #[derive(Debug, thiserror::Error)] diff --git a/src/dragonball/src/dbs_pci/src/vfio.rs b/src/dragonball/src/dbs_pci/src/vfio.rs index 7ca6fcc142bc..c828c4ccbe0c 100644 --- a/src/dragonball/src/dbs_pci/src/vfio.rs +++ b/src/dragonball/src/dbs_pci/src/vfio.rs @@ -3,6 +3,7 @@ // // SPDX-License-Identifier: Apache-2.0 +use std::any::Any; use std::io; use std::os::unix::io::AsRawFd; use std::ptr::null_mut; @@ -228,7 +229,7 @@ impl Interrupt { fn get_irq_pin(&self) -> u32 { if let Some(legacy_irq) = self.legacy_irq { - (PciInterruptPin::IntA as u32) << 8 | self.legacy_irq.unwrap() + (PciInterruptPin::IntA as u32) << 8 | legacy_irq } else { 0 } @@ -884,7 +885,7 @@ impl Region { } } -struct VfioPciDeviceState { +pub struct VfioPciDeviceState { vfio_path: String, interrupt: Interrupt, vfio_dev: Arc, @@ -947,6 +948,10 @@ impl VfioPciDeviceState { }) } + pub fn vfio_dev(&self) -> &Arc { + &self.vfio_dev + } + fn read_config_byte(&self, offset: u32) -> u8 { let mut data: [u8; 1] = [0]; self.vfio_dev @@ -1314,6 +1319,23 @@ impl VfioPciDeviceState { Ok(()) } + fn free_register_resources(&self) -> Result<()> { + let mut register_resources = DeviceResources::new(); + for region in self.regions.iter() { + let resources = region.to_resources(); + for res in resources.get_all_resources() { + register_resources.append(res.clone()); + } + } + + self.bus + .upgrade() + .ok_or(VfioPciError::BusIsDropped)? + .free_resources(register_resources); + + Ok(()) + } + fn unregister_regions(&mut self, vm: &Arc) -> Result<()> { // This routine handle VfioPciDevice dropped but not unmap memory if self.context.upgrade().is_none() { @@ -1661,7 +1683,7 @@ impl VfioPciDevice { Ok(()) } - fn state(&self) -> MutexGuard> { + pub fn state(&self) -> MutexGuard> { // Don't expect poisoned lock self.state .lock() @@ -1687,6 +1709,14 @@ impl VfioPciDevice { .expect("poisoned lock for VFIO PCI device") .read_config_word(PCI_CONFIG_VENDOR_OFFSET) } + + pub fn clear_device(&self) -> Result<()> { + let mut state = self.state(); + state.free_register_resources()?; + let _ = state.unregister_regions(&self.vm_fd); + + Ok(()) + } } impl DeviceIo for VfioPciDevice { @@ -1784,7 +1814,8 @@ impl DeviceIo for VfioPciDevice { fn get_trapped_io_resources(&self) -> DeviceResources { self.state().trapped_resources.clone() } - fn as_any(&self) -> &dyn std::any::Any { + + fn as_any(&self) -> &dyn Any { self } } diff --git a/src/dragonball/src/dbs_upcall/src/dev_mgr_service.rs b/src/dragonball/src/dbs_upcall/src/dev_mgr_service.rs index f618828108ea..5a4068fd0871 100755 --- a/src/dragonball/src/dbs_upcall/src/dev_mgr_service.rs +++ b/src/dragonball/src/dbs_upcall/src/dev_mgr_service.rs @@ -45,6 +45,16 @@ struct DevMgrMsgHeader { pub msg_flags: u32, } +/// Command struct to add/del a PCI Device. +#[repr(C)] +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct PciDevRequest { + /// PCI bus number + pub busno: u8, + /// Combined device number and function number + pub devfn: u8, +} + /// Command struct to add/del a MMIO Virtio Device. #[repr(C)] #[derive(Copy, Clone, Debug, Eq, PartialEq)] @@ -128,6 +138,10 @@ pub enum DevMgrRequest { AddVcpu(CpuDevRequest), /// Del a VCPU DelVcpu(CpuDevRequest), + /// Add a PCI device + AddPciDev(PciDevRequest), + /// Delete a PCI device + DelPciDev(PciDevRequest), } impl DevMgrRequest { @@ -167,6 +181,18 @@ impl DevMgrRequest { let vcpu_dev = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut CpuDevRequest) }; *vcpu_dev = s.clone(); } + DevMgrRequest::AddPciDev(s) => { + msg_hdr.msg_type = DevMgrMsgType::AddPci as u32; + msg_hdr.msg_size = mem::size_of::() as u32; + let pci_dev = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut PciDevRequest) }; + *pci_dev = *s; + } + DevMgrRequest::DelPciDev(s) => { + msg_hdr.msg_type = DevMgrMsgType::DelPci as u32; + msg_hdr.msg_size = mem::size_of::() as u32; + let pci_dev = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut PciDevRequest) }; + *pci_dev = *s; + } } buffer diff --git a/src/dragonball/src/dbs_upcall/src/lib.rs b/src/dragonball/src/dbs_upcall/src/lib.rs index 8e03c4e01236..d6daae681752 100755 --- a/src/dragonball/src/dbs_upcall/src/lib.rs +++ b/src/dragonball/src/dbs_upcall/src/lib.rs @@ -23,7 +23,7 @@ use log::{debug, error, info, trace, warn}; use timerfd::{SetTimeFlags, TimerFd, TimerState}; pub use crate::dev_mgr_service::{ - CpuDevRequest, DevMgrRequest, DevMgrResponse, DevMgrService, MmioDevRequest, + CpuDevRequest, DevMgrRequest, DevMgrResponse, DevMgrService, MmioDevRequest, PciDevRequest, }; const SERVER_PORT: u32 = 0xDB; diff --git a/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_v2.rs b/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_v2.rs index fad6bbb89d88..23fa7ee93dc5 100644 --- a/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_v2.rs +++ b/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_v2.rs @@ -2,6 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause +use std::any::Any; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, Mutex, MutexGuard}; @@ -484,7 +485,7 @@ where resources } - fn as_any(&self) -> &dyn std::any::Any { + fn as_any(&self) -> &dyn Any { self } } diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 090c13524525..d50294471fc3 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -20,6 +20,8 @@ use dbs_device::resources::Resource; use dbs_device::DeviceIo; use dbs_interrupt::KvmIrqManager; use dbs_legacy_devices::ConsoleHandler; +#[cfg(all(feature = "host-device", target_arch = "aarch64"))] +use dbs_pci::PciBusResources; use dbs_utils::epoll_manager::EpollManager; use kvm_ioctls::VmFd; @@ -36,9 +38,11 @@ use dbs_virtio_devices::{ VirtioDevice, }; +#[cfg(feature = "host-device")] +use dbs_pci::VfioPciDevice; #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] use dbs_upcall::{ - DevMgrRequest, DevMgrService, MmioDevRequest, UpcallClient, UpcallClientError, + DevMgrRequest, DevMgrService, MmioDevRequest, PciDevRequest, UpcallClient, UpcallClientError, UpcallClientRequest, UpcallClientResponse, }; #[cfg(feature = "hotplug")] @@ -46,6 +50,8 @@ use dbs_virtio_devices::vsock::backend::VsockInnerConnector; use crate::address_space_manager::GuestAddressSpaceImpl; use crate::api::v1::InstanceInfo; +#[cfg(feature = "host-device")] +use crate::device_manager::vfio_dev_mgr::PciSystemManager; use crate::error::StartMicroVmError; use crate::resource_manager::ResourceManager; use crate::vm::{KernelConfigInfo, Vm, VmConfigInfo}; @@ -107,6 +113,11 @@ use self::balloon_dev_mgr::BalloonDeviceMgr; pub mod vhost_net_dev_mgr; #[cfg(feature = "vhost-net")] use self::vhost_net_dev_mgr::VhostNetDeviceMgr; +#[cfg(feature = "host-device")] +/// Device manager for PCI/MMIO VFIO devices. +pub mod vfio_dev_mgr; +#[cfg(feature = "host-device")] +use self::vfio_dev_mgr::VfioDeviceMgr; #[cfg(feature = "vhost-user-net")] /// Device manager for vhost-user-net devices. @@ -164,6 +175,11 @@ pub enum DeviceMgrError { /// Failed to free device resource. #[error("failed to free device resources: {0}")] ResourceError(#[source] crate::resource_manager::ResourceError), + + #[cfg(feature = "host-device")] + /// Error from Vfio Pci + #[error("failed to do vfio pci operation: {0:?}")] + VfioPci(#[source] dbs_pci::VfioPciError), } /// Specialized version of `std::result::Result` for device manager operations. @@ -268,11 +284,15 @@ pub struct DeviceOpContext { address_space: Option, logger: slog::Logger, is_hotplug: bool, + #[cfg(all(feature = "hotplug", feature = "host-device"))] + pci_hotplug_enabled: bool, #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] upcall_client: Option>>, #[cfg(feature = "dbs-virtio-devices")] virtio_devices: Vec>, + #[cfg(feature = "host-device")] + vfio_manager: Option>>, vm_config: Option, shared_info: Arc>, } @@ -297,6 +317,12 @@ impl DeviceOpContext { }; let logger = device_mgr.logger.new(slog::o!()); + #[cfg(all(feature = "hotplug", feature = "host-device"))] + let pci_hotplug_enabled = vm_config + .clone() + .map(|c| c.pci_hotplug_enabled) + .unwrap_or(false); + DeviceOpContext { epoll_mgr, io_context, @@ -307,12 +333,16 @@ impl DeviceOpContext { address_space, logger, is_hotplug, + #[cfg(all(feature = "hotplug", feature = "host-device"))] + pci_hotplug_enabled, #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] upcall_client: None, #[cfg(feature = "dbs-virtio-devices")] virtio_devices: Vec::new(), vm_config, shared_info, + #[cfg(feature = "host-device")] + vfio_manager: None, } } @@ -435,6 +465,13 @@ impl DeviceOpContext { } } +#[cfg(feature = "host-device")] +impl DeviceOpContext { + pub(crate) fn set_vfio_manager(&mut self, vfio_device_mgr: Arc>) { + self.vfio_manager = Some(vfio_device_mgr); + } +} + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] impl DeviceOpContext { pub(crate) fn create_hotplug_ctx(vm: &Vm, epoll_mgr: Option) -> Self { @@ -510,6 +547,37 @@ impl DeviceOpContext { self.call_hotplug_device(req, callback) } + + #[cfg(feature = "host-device")] + pub(crate) fn insert_hotplug_pci_device( + &self, + dev: &Arc, + callback: Option>, + ) -> Result<()> { + if !self.is_hotplug || !self.pci_hotplug_enabled { + return Err(DeviceMgrError::InvalidOperation); + } + + let (busno, devfn) = DeviceManager::get_pci_device_info(dev)?; + let req = DevMgrRequest::AddPciDev(PciDevRequest { busno, devfn }); + + self.call_hotplug_device(req, callback) + } + + #[cfg(feature = "host-device")] + pub(crate) fn remove_hotplug_pci_device( + &self, + dev: &Arc, + callback: Option>, + ) -> Result<()> { + if !self.is_hotplug || !self.pci_hotplug_enabled { + return Err(DeviceMgrError::InvalidOperation); + } + let (busno, devfn) = DeviceManager::get_pci_device_info(dev)?; + let req = DevMgrRequest::DelPciDev(PciDevRequest { busno, devfn }); + + self.call_hotplug_device(req, callback) + } } #[cfg(all(feature = "hotplug", feature = "acpi"))] @@ -555,6 +623,8 @@ pub struct DeviceManager { #[cfg(feature = "vhost-user-net")] vhost_user_net_manager: VhostUserNetDeviceMgr, + #[cfg(feature = "host-device")] + pub(crate) vfio_manager: Arc>, } impl DeviceManager { @@ -571,7 +641,7 @@ impl DeviceManager { io_lock: Arc::new(Mutex::new(())), irq_manager: Arc::new(KvmIrqManager::new(vm_fd.clone())), res_manager, - vm_fd, + vm_fd: vm_fd.clone(), logger: logger.new(slog::o!()), shared_info, @@ -595,6 +665,8 @@ impl DeviceManager { vhost_net_manager: VhostNetDeviceMgr::default(), #[cfg(feature = "vhost-user-net")] vhost_user_net_manager: VhostUserNetDeviceMgr::default(), + #[cfg(feature = "host-device")] + vfio_manager: Arc::new(Mutex::new(VfioDeviceMgr::new(vm_fd, logger))), } } @@ -775,6 +847,14 @@ impl DeviceManager { .attach_devices(&mut ctx) .map_err(StartMicroVmError::VhostUserNetDeviceError)?; + #[cfg(feature = "host-device")] + { + // It is safe bacause we don't expect poison lock. + let mut vfio_manager = self.vfio_manager.lock().unwrap(); + vfio_manager.attach_devices(&mut ctx)?; + ctx.set_vfio_manager(self.vfio_manager.clone()) + } + // Ensure that all devices are attached before kernel boot args are // generated. ctx.generate_kernel_boot_args(kernel_config) @@ -792,8 +872,17 @@ impl DeviceManager { } /// Start all registered devices when booting the associated virtual machine. - pub fn start_devices(&mut self) -> std::result::Result<(), StartMicroVmError> { - // TODO: add vfio support here. issue #4589. + pub fn start_devices( + &mut self, + vm_as: &GuestAddressSpaceImpl, + ) -> std::result::Result<(), StartMicroVmError> { + // It is safe because we don't expect poison lock. + #[cfg(feature = "host-device")] + self.vfio_manager + .lock() + .unwrap() + .start_devices(vm_as) + .map_err(StartMicroVmError::RegisterDMAAddress)?; Ok(()) } @@ -943,6 +1032,23 @@ impl DeviceManager { Err(DeviceMgrError::GetDeviceResource) } + + /// Get pci bus resources for creating fdt. + #[cfg(feature = "host-device")] + pub fn get_pci_bus_resources(&self) -> Option { + let mut vfio_dev_mgr = self.vfio_manager.lock().unwrap(); + let vfio_pci_mgr = vfio_dev_mgr.get_pci_manager(); + if vfio_pci_mgr.is_none() { + return None; + } + let pci_manager = vfio_pci_mgr.unwrap(); + let ecam_space = pci_manager.get_ecam_space(); + let bar_space = pci_manager.get_bar_space(); + Some(PciBusResources { + ecam_space, + bar_space, + }) + } } #[cfg(feature = "dbs-virtio-devices")] @@ -1115,6 +1221,30 @@ impl DeviceManager { Ok(()) } } + + #[cfg(feature = "host-device")] + fn get_pci_device_info(device: &Arc) -> Result<(u8, u8)> { + if let Some(pci_dev) = device + .as_any() + .downcast_ref::>() + { + // reference from kernel: include/uapi/linux/pci.h + let busno = pci_dev.bus_id().map_err(DeviceMgrError::VfioPci)?; + let slot = pci_dev.device_id(); + let func = 0; + // The slot/function address of each device is encoded + // in a single byte as follows: + // + // 7:3 = slot + // 2:0 = function + // together those 8 bits combined as devfn value + let devfn = (((slot) & 0x1f) << 3) | ((func) & 0x07); + + return Ok((busno, devfn)); + } + + Err(DeviceMgrError::GetDeviceResource) + } } #[cfg(feature = "hotplug")] @@ -1202,6 +1332,8 @@ mod tests { vhost_net_manager: VhostNetDeviceMgr::default(), #[cfg(feature = "vhost-user-net")] vhost_user_net_manager: VhostUserNetDeviceMgr::default(), + #[cfg(feature = "host-device")] + vfio_manager: Arc::new(Mutex::new(VfioDeviceMgr::new(vm_fd, &logger))), logger, shared_info, @@ -1243,6 +1375,7 @@ mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config.clone()); vm.init_guest_memory().unwrap(); diff --git a/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs b/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs new file mode 100644 index 000000000000..c3c3b6bacf69 --- /dev/null +++ b/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs @@ -0,0 +1,818 @@ +// Copyright 2023 Alibaba, Inc. or its affiliates. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +//! Device manager for host passthrough devices. +// we allow missing_doc temporaily, because rust can't use this declariation in marco + +#![allow(missing_docs)] +mod pci_vfio; +pub use pci_vfio::PciSystemManager; + +use std::collections::HashMap; +use std::ops::Deref; +use std::os::fd::RawFd; +use std::path::Path; +use std::sync::{Arc, Weak}; + +use crossbeam_channel::Sender; +use dbs_device::resources::Resource::LegacyIrq; +use dbs_device::resources::{DeviceResources, Resource, ResourceConstraint}; +use dbs_device::DeviceIo; +use dbs_interrupt::KvmIrqManager; +use dbs_pci::{VfioPciDevice, VENDOR_NVIDIA}; +use dbs_upcall::{DevMgrResponse, UpcallClientResponse}; +use kvm_ioctls::{DeviceFd, VmFd}; +use log::{debug, error}; +use serde_derive::{Deserialize, Serialize}; +use vfio_ioctls::{VfioContainer, VfioDevice}; +use vm_memory::{ + Address, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap, + MemoryRegionAddress, +}; + +use super::StartMicroVmError; +use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; +use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos}; +use crate::device_manager::{DeviceManagerContext, DeviceMgrError, DeviceOpContext}; +use crate::resource_manager::{ResourceError, ResourceManager}; + +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; + +/// Errors associated with the operations allowed on a host device +#[derive(Debug, thiserror::Error)] +pub enum VfioDeviceError { + /// Internal error. + #[error("VFIO subsystem internal error")] + InternalError, + + /// The virtual machine instance ID is invalid. + #[error("the virtual machine instance ID is invalid")] + InvalidVMID, + + /// Cannot open host device due to invalid bus::slot::function + #[error("can't open host device for VFIO")] + CannotOpenVfioDevice, + + /// The Context Identifier is already in use. + #[error("the device ID {0} already exists")] + DeviceIDAlreadyExist(String), + + /// Host device string (bus::slot::function) is already in use + #[error("device '{0}' is already in use")] + DeviceAlreadyInUse(String), + + /// The configuration of vfio device is invalid. + #[error("The configuration of vfio device is invalid")] + InvalidConfig, + + /// No resource available + #[error("no resource available for VFIO device")] + NoResource, + + /// Cannot perform the requested operation after booting the microVM + #[error("update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// Failed to create kvm device + #[error("failed to create kvm device: {0:?}")] + CreateKvmDevice(#[source] vmm_sys_util::errno::Error), + + /// Failed to restore vfio mlock count + #[error("failure while restoring vfio mlock count: {0:?}")] + RestoreMlockCount(#[source] std::io::Error), + + /// Failure in device manager while managing VFIO device + #[error("failure in device manager while managing VFIO device, {0:?}")] + VfioDeviceMgr(#[source] DeviceMgrError), + + /// Failure in VFIO IOCTL subsystem. + #[error("failure while configuring VFIO device, {0:?}")] + VfioIoctlError(#[source] vfio_ioctls::VfioError), + + /// Failure in VFIO PCI subsystem. + #[error("failure while managing PCI VFIO device: {0:?}")] + VfioPciError(#[source] dbs_pci::VfioPciError), + + /// Failure in PCI subsystem. + #[error("PCI subsystem failed to manage the device: {0:?}")] + PciError(#[source] dbs_pci::Error), + + /// Failed to get vfio host info + #[error("PCI get host info failed: {0}")] + GetHostInfo(String), + + /// Invalid PCI device ID + #[error("invalid PCI device ID: {0}")] + InvalidDeviceID(u32), + + /// Failed to allocate device resource + #[error("failure while allocate device resource: {0:?}")] + AllocateDeviceResource(#[source] ResourceError), + + /// Failed to free device resource + #[error("failure while freeing device resource: {0:?}")] + FreeDeviceResource(#[source] ResourceError), + + /// Vfio container not found + #[error("vfio container not found")] + VfioContainerNotFound, + + /// Generic IO error. + #[error("Generic IO error, {0}")] + IoError(#[source] std::io::Error), +} + +type Result = std::result::Result; + +/// Host info for vfio device +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct VfioDeviceHostInfo { + pub group_id: u32, + pub group_fd: RawFd, + pub device_fd: RawFd, +} + +/// Configuration information for a VFIO PCI device. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize, Default)] +pub struct VfioPciDeviceConfig { + /// PCI device information: "bus:slot:function" + pub bus_slot_func: String, + /// PCI vendor and device id + /// high 16bit : low 16bit = device_id : vendor_id + pub vendor_device_id: u32, + /// Deice ID used in guest, guest_dev_id = slot + pub guest_dev_id: Option, + /// Clique ID for Nvidia GPUs and RDMA NICs + pub clique_id: Option, +} + +impl VfioPciDeviceConfig { + /// default pci domain is 0 + pub fn host_pci_domain(&self) -> u32 { + 0 + } + + pub fn valid_vendor_device(&self) -> bool { + if self.vendor_device_id == 0 { + return true; + } + // vendor_device_id high 16bit : low 16bit = device_id : vendor_id + self.vendor_device_id != 0 + && (self.vendor_device_id & 0xffff) != 0 + && ((self.vendor_device_id >> 16) & 0xffff) != 0 + } +} + +/// Configuration for a specific Vfio Device +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub enum VfioDevConfig { + Pci(VfioPciDeviceConfig), +} + +impl Default for VfioDevConfig { + fn default() -> Self { + Self::Pci(Default::default()) + } +} + +/// Configuration information for a VFIO device. +#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, Default)] +pub struct HostDeviceConfig { + /// Unique identifier of the hostdev + pub hostdev_id: String, + /// Sysfs path for device + pub sysfs_path: String, + /// Device specific config + pub dev_config: VfioPciDeviceConfig, +} + +impl ConfigItem for HostDeviceConfig { + type Err = VfioDeviceError; + + fn id(&self) -> &str { + &self.hostdev_id + } + + fn check_conflicts(&self, other: &Self) -> Result<()> { + if self.hostdev_id == other.hostdev_id { + return Err(VfioDeviceError::DeviceIDAlreadyExist( + self.hostdev_id.clone(), + )); + } + + if !self.sysfs_path.is_empty() && self.sysfs_path == other.sysfs_path { + return Err(VfioDeviceError::DeviceAlreadyInUse(self.sysfs_path.clone())); + } + + if !self.dev_config.bus_slot_func.is_empty() + && self.dev_config.bus_slot_func == other.dev_config.bus_slot_func + { + return Err(VfioDeviceError::DeviceAlreadyInUse( + self.dev_config.bus_slot_func.clone(), + )); + } + + Ok(()) + } +} + +/// Vfio device info +pub type VfioDeviceInfo = DeviceConfigInfo; + +/// A device manager to manage all VFIO devices. +pub struct VfioDeviceMgr { + vm_fd: Arc, + info_list: DeviceConfigInfos, + locked_vm_size: u64, + vfio_container: Option>, + pci_vfio_manager: Option>, + pci_legacy_irqs: Option>, + nvidia_shared_irq: Option, + logger: slog::Logger, +} + +impl VfioDeviceMgr { + /// Create a new VFIO device manager. + pub fn new(vm_fd: Arc, logger: &slog::Logger) -> Self { + VfioDeviceMgr { + vm_fd, + info_list: DeviceConfigInfos::new(), + locked_vm_size: 0, + vfio_container: None, + pci_vfio_manager: None, + pci_legacy_irqs: Some(HashMap::new()), + nvidia_shared_irq: None, + logger: logger.new(slog::o!()), + } + } + + /// Insert or update a VFIO device into the manager.ig)?; + pub fn insert_device( + &mut self, + ctx: &mut DeviceOpContext, + config: HostDeviceConfig, + ) -> Result<()> { + if !cfg!(feature = "hotplug") && ctx.is_hotplug { + return Err(VfioDeviceError::UpdateNotAllowedPostBoot); + } + slog::info!( + ctx.logger(), + "add VFIO device configuration"; + "subsystem" => "vfio_dev_mgr", + "hostdev_id" => &config.hostdev_id, + "bdf" => &config.dev_config.bus_slot_func, + ); + let device_index = self.info_list.insert_or_update(&config)?; + // Handle device hotplug case + if ctx.is_hotplug { + slog::info!( + ctx.logger(), + "attach VFIO device"; + "subsystem" => "vfio_dev_mgr", + "hostdev_id" => &config.hostdev_id, + "bdf" => &config.dev_config.bus_slot_func, + ); + self.add_device(ctx, &config, device_index)?; + } + + Ok(()) + } + + /// Attach all configured VFIO device to the virtual machine instance. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), StartMicroVmError> { + // create and attach pci root bus + #[cfg(all(feature = "hotplug", feature = "host-device"))] + if ctx.pci_hotplug_enabled { + let _ = self + .create_pci_manager( + ctx.irq_manager.clone(), + ctx.io_context.clone(), + ctx.res_manager.clone(), + ) + .map_err(StartMicroVmError::CreateVfioDevice)?; + } + for (idx, info) in self.info_list.clone().iter().enumerate() { + self.create_device(&info.config, ctx, idx) + .map_err(StartMicroVmError::CreateVfioDevice)?; + } + Ok(()) + } + + pub fn remove_device(&mut self, ctx: &mut DeviceOpContext, hostdev_id: &str) -> Result<()> { + if !cfg!(feature = "hotplug") { + return Err(VfioDeviceError::UpdateNotAllowedPostBoot); + } + + slog::info!( + ctx.logger(), + "remove VFIO device"; + "subsystem" => "vfio_dev_mgr", + "hostdev_id" => hostdev_id, + ); + let device_index = self + .get_index_of_hostdev_id(hostdev_id) + .ok_or(VfioDeviceError::InvalidConfig)?; + let mut info = self + .info_list + .remove(device_index) + .ok_or(VfioDeviceError::InvalidConfig)?; + + self.remove_vfio_device(ctx, &mut info) + } + + /// prepare to remove device + pub fn prepare_remove_device( + &self, + ctx: &DeviceOpContext, + hostdev_id: &str, + result_sender: Sender>, + ) -> Result<()> { + if !cfg!(feature = "hotplug") { + return Err(VfioDeviceError::UpdateNotAllowedPostBoot); + } + + slog::info!( + ctx.logger(), + "prepare remove VFIO device"; + "subsystem" => "vfio_dev_mgr", + "hostdev_id" => hostdev_id, + ); + + let device_index = self + .get_index_of_hostdev_id(hostdev_id) + .ok_or(VfioDeviceError::InvalidConfig)?; + + let info = &self.info_list[device_index]; + if let Some(dev) = info.device.as_ref() { + let callback: Option> = + Some(Box::new(move |result| match result { + UpcallClientResponse::DevMgr(response) => { + if let DevMgrResponse::Other(resp) = response { + if let Err(e) = result_sender.send(Some(resp.result)) { + error!("send upcall result failed, due to {:?}!", e); + } + } + } + UpcallClientResponse::UpcallReset => { + if let Err(e) = result_sender.send(None) { + error!("send upcall result failed, due to {:?}!", e); + } + } + #[allow(unreachable_patterns)] + _ => { + debug!("this arm should only be triggered under test"); + } + })); + ctx.remove_hotplug_pci_device(dev, callback) + .map_err(VfioDeviceError::VfioDeviceMgr)? + } + Ok(()) + } + + fn remove_vfio_device( + &mut self, + ctx: &mut DeviceOpContext, + info: &mut DeviceConfigInfo, + ) -> Result<()> { + let device = info.device.take().ok_or(VfioDeviceError::InvalidConfig)?; + self.remove_pci_vfio_device(&device, ctx)?; + Ok(()) + } + + /// Start all VFIO devices. + pub fn start_devices(&mut self, vm_as: &GuestAddressSpaceImpl) -> Result<()> { + if self.vfio_container.is_some() { + let vm_memory = vm_as.memory(); + self.register_memory(vm_memory.deref())?; + } + Ok(()) + } + + pub(crate) fn get_kvm_dev_fd(&self) -> Result { + let mut kvm_vfio_dev = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_VFIO, + fd: 0, + flags: 0, + }; + let kvm_dev_fd = self + .vm_fd + .create_device(&mut kvm_vfio_dev) + .map_err(|e| VfioDeviceError::IoError(std::io::Error::from_raw_os_error(e.errno())))?; + Ok(kvm_dev_fd) + } + + /// Get vfio container object. You should call get_vfio_manager to get vfio_manager Firstly. + pub fn get_vfio_container(&mut self) -> Result> { + if let Some(vfio_container) = self.vfio_container.as_ref() { + Ok(vfio_container.clone()) + } else { + let kvm_dev_fd = Arc::new(self.get_kvm_dev_fd()?); + let vfio_container = + Arc::new(VfioContainer::new(kvm_dev_fd).map_err(VfioDeviceError::VfioIoctlError)?); + self.vfio_container = Some(vfio_container.clone()); + + Ok(vfio_container) + } + } + + fn create_device( + &mut self, + cfg: &HostDeviceConfig, + ctx: &mut DeviceOpContext, + idx: usize, + ) -> Result> { + let sysfs_path = Self::build_sysfs_path(cfg)?; + let device = self.attach_pci_vfio_device(ctx, sysfs_path, &cfg.dev_config)?; + self.info_list[idx].device = Some(device.clone()); + Ok(device) + } + + fn add_device( + &mut self, + ctx: &mut DeviceOpContext, + cfg: &HostDeviceConfig, + idx: usize, + ) -> Result<()> { + let dev = self.create_device(cfg, ctx, idx)?; + if self.locked_vm_size == 0 && self.vfio_container.is_some() { + let vm_as = ctx + .get_vm_as() + .map_err(|_| VfioDeviceError::InternalError)?; + let vm_memory = vm_as.memory(); + + self.register_memory(vm_memory.deref())?; + } + ctx.insert_hotplug_pci_device(&dev, None) + .map_err(VfioDeviceError::VfioDeviceMgr) + } + + /// Gets the index of the device with the specified `hostdev_id` if it exists in the list. + fn get_index_of_hostdev_id(&self, id: &str) -> Option { + self.info_list + .iter() + .position(|info| info.config.id().eq(id)) + } + + /// Register guest memory to the VFIO container. + /// + /// # Arguments + /// * `guest_mem`: guest memory configuration object. + pub(crate) fn register_memory(&mut self, vm_memory: &GuestMemoryImpl) -> Result<()> { + for region in vm_memory.iter() { + self.register_memory_region(region)?; + } + Ok(()) + } + + pub(crate) fn register_memory_region(&mut self, region: &GuestRegionMmap) -> Result<()> { + let gpa = region.start_addr().raw_value(); + let size = region.len(); + let user_addr = region + .get_host_address(MemoryRegionAddress(0)) + .expect("guest memory region should be mapped and has HVA.") + as u64; + let readonly = region.prot() & libc::PROT_WRITE == 0; + self.register_region(gpa, size, user_addr, readonly) + } + + pub(crate) fn register_region( + &mut self, + iova: u64, + size: u64, + user_addr: u64, + readonly: bool, + ) -> Result<()> { + slog::info!( + self.logger, + "map guest physical memory"; + "subsystem" => "vfio_dev_mgr", + "iova" => iova, + "size" => size, + "user_addr" => user_addr, + "readonly" => readonly, + ); + //FIXME: add readonly flag when related commit is pushed to upstream vfio-ioctls + self.get_vfio_container()? + .vfio_dma_map(iova, size, user_addr) + .map_err(VfioDeviceError::VfioIoctlError)?; + self.locked_vm_size += size; + Ok(()) + } + + /// Clear locked size because iommu table is cleared + pub(crate) fn clear_locked_size(&mut self) { + self.locked_vm_size = 0; + } + + pub(crate) fn unregister_region(&mut self, region: &GuestRegionMmap) -> Result<()> { + let gpa = region.start_addr().raw_value(); + let size = region.len(); + + self.get_vfio_container()? + .vfio_dma_unmap(gpa, size) + .map_err(VfioDeviceError::VfioIoctlError)?; + + self.locked_vm_size -= size; + Ok(()) + } + + pub(crate) fn update_memory(&mut self, region: &GuestRegionMmap) -> Result<()> { + if self.locked_vm_size != 0 { + self.register_memory_region(region)?; + } + Ok(()) + } + + pub(crate) fn build_sysfs_path(cfg: &HostDeviceConfig) -> Result { + if cfg.sysfs_path.is_empty() { + let (bdf, domain) = ( + &cfg.dev_config.bus_slot_func, + cfg.dev_config.host_pci_domain(), + ); + let len = bdf.split(':').count(); + if len == 0 { + Err(VfioDeviceError::InvalidConfig) + } else if len == 2 { + Ok(format!("/sys/bus/pci/devices/{:04}:{}", domain, bdf)) + } else { + Ok(format!("/sys/bus/pci/devices/{}", bdf)) + } + } else { + Ok(cfg.sysfs_path.clone()) + } + } + + /// Get all PCI devices' legacy irqs + pub fn get_pci_legacy_irqs(&self) -> Option<&HashMap> { + self.pci_legacy_irqs.as_ref() + } +} + +impl VfioDeviceMgr { + pub(super) fn attach_pci_vfio_device( + &mut self, + ctx: &mut DeviceOpContext, + sysfs_path: String, + cfg: &VfioPciDeviceConfig, + ) -> Result> { + slog::info!( + ctx.logger(), + "attach vfio pci device"; + "subsystem" => "vfio_dev_mgr", + "host_bdf" => &cfg.bus_slot_func, + ); + // safe to get pci_manager + let pci_manager = self.create_pci_manager( + ctx.irq_manager.clone(), + ctx.io_context.clone(), + ctx.res_manager.clone(), + )?; + let pci_bus = pci_manager.pci_root_bus(); + let id = pci_manager + .new_device_id(cfg.guest_dev_id) + .ok_or(VfioDeviceError::NoResource)?; + slog::info!( + ctx.logger(), + "PCI:{} vfio pci device id: {}, vendor_device: 0x{:x}", + &sysfs_path, id, cfg.vendor_device_id; + "subsystem" => "vfio_dev_mgr", + "guest_bdf" => id, + ); + if !cfg.valid_vendor_device() { + return Err(VfioDeviceError::InvalidConfig); + } + let vfio_container = self.get_vfio_container()?; + let vfio_dev = VfioDevice::new(Path::new(&sysfs_path), vfio_container.clone()) + .map_err(VfioDeviceError::VfioIoctlError)?; + // Use Weak::clone to break cycle reference: + // + // reference 1: VfioPciDevice reference to PciBus + // reference 2: VfioPciDevice -> PciManager -> PciBus -> VfioPciDevice + let vfio_pci_device = Arc::new( + VfioPciDevice::create( + id, + sysfs_path, + Arc::downgrade(&pci_bus), + vfio_dev, + Arc::downgrade(self.get_pci_manager().unwrap()), + ctx.vm_fd.clone(), + cfg.vendor_device_id, + cfg.clique_id, + vfio_container, + ) + .map_err(VfioDeviceError::VfioPciError)?, + ); + let mut requires = Vec::new(); + vfio_pci_device.get_resource_requirements(&mut requires); + let vendor_id = vfio_pci_device.vendor_id(); + if vendor_id == VENDOR_NVIDIA && self.nvidia_shared_irq.is_some() { + requires.retain(|x| !matches!(x, ResourceConstraint::LegacyIrq { irq: _ })); + } + let mut resource = ctx + .res_manager + .allocate_device_resources(&requires, USE_SHARED_IRQ) + .or(Err(VfioDeviceError::NoResource))?; + if vendor_id == VENDOR_NVIDIA { + if let Some(irq) = self.nvidia_shared_irq { + resource.append(LegacyIrq(irq)); + } else { + self.nvidia_shared_irq = resource.get_legacy_irq(); + } + } + vfio_pci_device + .activate( + Arc::downgrade(&vfio_pci_device) as Weak, + resource, + ) + .map_err(VfioDeviceError::VfioPciError)?; + if let Some(irq) = vfio_pci_device.get_assigned_resources().get_legacy_irq() { + self.pci_legacy_irqs + .as_mut() + .map(|v| v.insert(vfio_pci_device.device_id(), irq as u8)); + } + // PciBus reference to VfioPciDevice + pci_bus + .register_device(vfio_pci_device.clone()) + .map_err(VfioDeviceError::PciError)?; + Ok(vfio_pci_device) + } + + fn remove_pci_vfio_device( + &mut self, + device: &Arc, + ctx: &mut DeviceOpContext, + ) -> Result<()> { + // safe to unwrap because type is decided + let vfio_pci_device = device + .as_any() + .downcast_ref::>() + .unwrap(); + + let device_id = vfio_pci_device.device_id() as u32; + + // safe to unwrap because pci vfio manager is already created + let _ = self + .pci_vfio_manager + .as_mut() + .unwrap() + .free_device_id(device_id) + .ok_or(VfioDeviceError::InvalidDeviceID(device_id))?; + + let resources = vfio_pci_device.get_assigned_resources(); + let vendor_id = vfio_pci_device.vendor_id(); + let filtered_resources = if vendor_id == VENDOR_NVIDIA { + let mut filtered_resources = DeviceResources::new(); + for resource in resources.get_all_resources() { + if let Resource::LegacyIrq(_) = resource { + continue; + } else { + filtered_resources.append(resource.clone()) + } + } + filtered_resources + } else { + resources + }; + + ctx.res_manager + .free_device_resources(&filtered_resources) + .map_err(VfioDeviceError::FreeDeviceResource)?; + + vfio_pci_device + .clear_device() + .map_err(VfioDeviceError::VfioPciError)?; + + Ok(()) + } + + pub(crate) fn create_pci_manager( + &mut self, + irq_manager: Arc, + io_context: DeviceManagerContext, + res_manager: Arc, + ) -> Result<&mut Arc> { + if self.pci_vfio_manager.is_none() { + let mut mgr = PciSystemManager::new(irq_manager, io_context, res_manager.clone())?; + let requirements = mgr.resource_requirements(); + let resources = res_manager + .allocate_device_resources(&requirements, USE_SHARED_IRQ) + .or(Err(VfioDeviceError::NoResource))?; + mgr.activate(resources)?; + self.pci_vfio_manager = Some(Arc::new(mgr)); + } + Ok(self.pci_vfio_manager.as_mut().unwrap()) + } + + /// Get the PCI manager to support PCI device passthrough + pub fn get_pci_manager(&mut self) -> Option<&mut Arc> { + self.pci_vfio_manager.as_mut() + } +} + +#[cfg(all(test, feature = "test-mock"))] +mod tests { + use kvm_ioctls::Kvm; + use logger::LOGGER; + use vm_memory::{GuestAddress, GuestMemoryMmap, MmapRegion}; + + use super::*; + use crate::config_manager::DeviceInfoGroup; + use crate::test_utils::tests::create_vm_for_test; + + type VfioDeviceInfo = DeviceInfoGroup; + + fn get_vfio_dev_mgr() -> VfioDeviceMgr { + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let logger = Arc::new(LOGGER.new_logger(slog::o!())); + VfioDeviceMgr::new(vm_fd, &logger) + } + + #[test] + fn test_register_memory() { + let mut mgr = get_vfio_dev_mgr(); + // mock for vfio_dma_map. + let mut vfio_container = VfioContainer::default(); + vfio_container.vfio_dma_map = true; + vfio_container.vfio_dma_unmap = true; + mgr.vfio_container = Some(Arc::new(vfio_container)); + let region_size = 0x1000; + let region1 = + GuestRegionMmap::new(MmapRegion::new(region_size).unwrap(), GuestAddress(0x4000)) + .unwrap(); + let region2 = + GuestRegionMmap::new(MmapRegion::new(region_size).unwrap(), GuestAddress(0xc000)) + .unwrap(); + let regions = vec![region1, region2]; + let gmm = Arc::new(GuestMemoryMmap::from_regions(regions).unwrap()); + assert!(mgr.register_memory(&gmm.clone()).is_ok()); + assert_eq!(mgr.locked_vm_size, region_size as u64 * 2); + for region in gmm.iter() { + mgr.unregister_region(region).unwrap(); + } + assert_eq!(mgr.locked_vm_size, 0); + } + + #[test] + fn test_register_region() { + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let logger = Arc::new(LOGGER.new_logger(slog::o!())); + let mut mgr = VfioDeviceMgr::new(vm_fd, &logger); + // mock for vfio_dma_map. + let mut vfio_container = VfioContainer::default(); + vfio_container.vfio_dma_map = true; + vfio_container.vfio_dma_unmap = true; + mgr.vfio_container = Some(Arc::new(vfio_container)); + let region_size = 0x400000; + let region = + GuestRegionMmap::new(MmapRegion::new(region_size).unwrap(), GuestAddress(0x0000)) + .unwrap(); + let gpa = region.start_addr().raw_value(); + let size = region.len() as u64; + let user_addr = region.get_host_address(MemoryRegionAddress(0)).unwrap() as u64; + let readonly = region.prot() & libc::PROT_WRITE == 0; + mgr.register_region(gpa, size, user_addr, readonly).unwrap(); + assert_eq!(mgr.locked_vm_size, region_size as u64); + assert!(mgr.unregister_region(®ion).is_ok()); + assert_eq!(mgr.locked_vm_size, 0); + } + + #[test] + fn test_vfio_attach_pci_vfio_devices() { + let vm = create_vm_for_test(); + let mut mgr = vm.device_manager.vfio_manager.lock().unwrap(); + let config = VfioDeviceConfigInfo { + hostdev_id: "hostdev_1".to_string(), + sysfs_path: "uuid1".to_string(), + bus_slot_func: "0:0:1".to_string(), + mode: "pci".to_string(), + vendor_device_id: 0, + guest_dev_id: None, + clique_id: None, + }; + let mut device_op_ctx = DeviceOpContext::new( + Some(vm.epoll_manager.clone()), + &vm.device_manager, + Some(vm.vm_as().unwrap().clone()), + vm.address_space.address_space.clone(), + false, + None, + vm.address_space.get_base_to_slot_map(), + vm.shared_info().clone(), + ); + // Invalid resources. + assert!(matches!( + mgr.attach_pci_vfio_devices(&mut device_op_ctx, &config), + Err(VfioDeviceError::VfioPciError(_)) + )); + } +} diff --git a/src/dragonball/src/device_manager/vfio_dev_mgr/pci_vfio.rs b/src/dragonball/src/device_manager/vfio_dev_mgr/pci_vfio.rs new file mode 100644 index 000000000000..78f03db2f2f6 --- /dev/null +++ b/src/dragonball/src/device_manager/vfio_dev_mgr/pci_vfio.rs @@ -0,0 +1,169 @@ +// Copyright (C) 2023 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use dbs_device::resources::Resource; +use dbs_device::resources::{DeviceResources, ResourceConstraint}; +use dbs_interrupt::KvmIrqManager; +#[cfg(target_arch = "aarch64")] +use dbs_pci::ECAM_SPACE_LENGTH; +use dbs_pci::{create_pci_root_bus, PciBus, PciDevice, PciRootDevice, PciSystemContext}; + +use super::{Result, VfioDeviceError}; +#[cfg(target_arch = "aarch64")] +use crate::device_manager::vfio_dev_mgr::USE_SHARED_IRQ; +use crate::device_manager::DeviceManagerContext; +use crate::resource_manager::ResourceManager; + +/// we only support one pci bus +pub const PCI_BUS_DEFAULT: u8 = 0; + +/// PCI pass-through device manager. +#[derive(Clone)] +pub struct PciSystemManager { + pub irq_manager: Arc, + pub io_context: DeviceManagerContext, + pub pci_root: Arc, + pub pci_root_bus: Arc, +} + +impl PciSystemManager { + /// Create a new PCI pass-through device manager. + pub fn new( + irq_manager: Arc, + io_context: DeviceManagerContext, + res_manager: Arc, + ) -> std::result::Result { + let resources = PciSystemManager::allocate_root_device_resources(res_manager)?; + let pci_root = Arc::new( + PciRootDevice::create(PCI_BUS_DEFAULT, resources).map_err(VfioDeviceError::PciError)?, + ); + let pci_root_bus = + create_pci_root_bus(PCI_BUS_DEFAULT).map_err(VfioDeviceError::PciError)?; + + Ok(PciSystemManager { + irq_manager, + io_context, + pci_root, + pci_root_bus, + }) + } + + // The x86 pci root device is a pio device with a fixed pio base address and length. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn allocate_root_device_resources( + _res_manager: Arc, + ) -> Result { + let mut resources = DeviceResources::new(); + resources.append(Resource::PioAddressRange { + // PCI CONFIG_ADDRESS port address 0xcf8 and uses 32 bits + // PCI COFIG_DATA port address 0xcfc and uses 32 bits + // so the resource registered begins at 0xcf8 and takes 8 bytes as size + base: 0xcf8, + size: 0x8, + }); + Ok(resources) + } + + // The pci root device of arm is a mmio device, and its reg range is ECAM space, + // which needs to be dynamically applied from the resource pool. In addition, + // the ECAM space is used to enumerate and identify PCI devices. + #[cfg(target_arch = "aarch64")] + fn allocate_root_device_resources( + res_manager: Arc, + ) -> Result { + let requests = vec![ResourceConstraint::MmioAddress { + range: Some((0x0, 0xffff_ffff)), + align: 4096, + size: ECAM_SPACE_LENGTH, + }]; + let resources = res_manager + .allocate_device_resources(&requests, USE_SHARED_IRQ) + .map_err(VfioDeviceError::AllocateDeviceResource)?; + Ok(resources) + } + + /// Activate the PCI subsystem. + pub fn activate(&mut self, resources: DeviceResources) -> Result<()> { + let bus_id = self.pci_root_bus.bus_id(); + + self.pci_root + .add_bus(self.pci_root_bus.clone(), bus_id) + .map_err(VfioDeviceError::PciError)?; + PciRootDevice::activate(self.pci_root.clone(), &mut self.io_context) + .map_err(VfioDeviceError::PciError)?; + + self.pci_root_bus + .assign_resources(resources) + .map_err(VfioDeviceError::PciError)?; + + Ok(()) + } + + /// Get resource requirements of the PCI subsystem. + #[allow(clippy::vec_init_then_push)] + pub fn resource_requirements(&self) -> Vec { + let mut requests = Vec::new(); + + // allocate 512MB MMIO address below 4G. + requests.push(ResourceConstraint::MmioAddress { + range: Some((0x0, 0xffff_ffff)), + align: 4096, + size: 512u64 << 20, + }); + // allocate 2048GB MMIO address above 4G. + requests.push(ResourceConstraint::MmioAddress { + range: Some((0x1_0000_0000, 0xffff_ffff_ffff_ffff)), + align: 4096, + size: 2048u64 << 30, + }); + // allocate 8KB IO port + requests.push(ResourceConstraint::PioAddress { + range: None, + align: 1, + size: 8u16 << 10, + }); + + requests + } + + /// Get the PCI root bus. + pub fn pci_root_bus(&self) -> Arc { + self.pci_root_bus.clone() + } + + /// Allocate a PCI device id. + pub fn new_device_id(&self, device_id: Option) -> Option { + self.pci_root_bus.allocate_device_id(device_id) + } + + pub fn free_device_id(&self, device_id: u32) -> Option> { + self.pci_root_bus.free_device_id(device_id) + } + + /// Obtain ECAM space resources, that is, pci root device resources. + #[cfg(target_arch = "aarch64")] + pub fn get_ecam_space(&self) -> DeviceResources { + self.pci_root.get_device_resources() + } + + /// Obtain BAR space resources, that is, pci root bus resources. + #[cfg(target_arch = "aarch64")] + pub fn get_bar_space(&self) -> DeviceResources { + self.pci_root_bus.get_device_resources() + } +} + +impl PciSystemContext for PciSystemManager { + type D = DeviceManagerContext; + + fn get_device_manager_context(&self) -> Self::D { + self.io_context.clone() + } + + fn get_interrupt_manager(&self) -> Arc { + self.irq_manager.clone() + } +} diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index 6dc427a2ab56..c2885de28501 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -14,6 +14,8 @@ use dbs_arch::pmu::PmuError; #[cfg(feature = "dbs-virtio-devices")] use dbs_virtio_devices::Error as VirtioError; +#[cfg(feature = "host-device")] +use crate::device_manager::vfio_dev_mgr::VfioDeviceError; use crate::{address_space_manager, device_manager, resource_manager, vcpu, vm}; /// Shorthand result type for internal VMM commands. @@ -205,6 +207,14 @@ pub enum StartMicroVmError { VhostUserNetDeviceError( #[source] device_manager::vhost_user_net_dev_mgr::VhostUserNetDeviceError, ), + #[cfg(feature = "host-device")] + /// Failed to create VFIO device + #[error("cannot create VFIO device {0:?}")] + CreateVfioDevice(#[source] VfioDeviceError), + #[cfg(feature = "host-device")] + /// Failed to register DMA memory address range. + #[error("failure while registering DMA address range: {0:?}")] + RegisterDMAAddress(#[source] VfioDeviceError), } /// Errors associated with starting the instance. diff --git a/src/dragonball/src/resource_manager.rs b/src/dragonball/src/resource_manager.rs index b0f96e252eba..eb3897b46974 100644 --- a/src/dragonball/src/resource_manager.rs +++ b/src/dragonball/src/resource_manager.rs @@ -565,7 +565,7 @@ impl ResourceManager { Resource::LegacyIrq(base) => self.free_legacy_irq(*base), Resource::MsiIrq { ty: _, base, size } => self.free_msi_irq(*base, *size), Resource::KvmMemSlot(slot) => self.free_kvm_mem_slot(*slot), - Resource::MacAddresss(_) => Ok(()), + Resource::MacAddress(_) => Ok(()), }; result?; } diff --git a/src/dragonball/src/test_utils.rs b/src/dragonball/src/test_utils.rs index dec006f4334d..58612cbaf3c6 100644 --- a/src/dragonball/src/test_utils.rs +++ b/src/dragonball/src/test_utils.rs @@ -39,6 +39,7 @@ pub mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config); vm.init_guest_memory().unwrap(); diff --git a/src/dragonball/src/vcpu/vcpu_manager.rs b/src/dragonball/src/vcpu/vcpu_manager.rs index 45d0541f48d1..a7164eb032f0 100644 --- a/src/dragonball/src/vcpu/vcpu_manager.rs +++ b/src/dragonball/src/vcpu/vcpu_manager.rs @@ -1133,6 +1133,7 @@ mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config); vm.init_guest_memory().unwrap(); @@ -1181,6 +1182,7 @@ mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config.clone()); vm.init_guest_memory().unwrap(); diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index 2964936b7fce..2ac1cc39d2a2 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -138,6 +138,9 @@ pub struct VmConfigInfo { /// sock path pub serial_path: Option, + + /// Enable PCI device hotplug or not + pub pci_hotplug_enabled: bool, } impl Default for VmConfigInfo { @@ -157,6 +160,7 @@ impl Default for VmConfigInfo { mem_file_path: String::from(""), mem_size_mib: 128, serial_path: None, + pci_hotplug_enabled: false, } } } @@ -182,7 +186,8 @@ pub struct Vm { shared_info: Arc>, address_space: AddressSpaceMgr, - device_manager: DeviceManager, + /// device manager for Dragonball + pub device_manager: DeviceManager, dmesg_fifo: Option>, kernel_config: Option, logger: slog::Logger, @@ -494,7 +499,7 @@ impl Vm { )?; info!(self.logger, "VM: start devices"); - self.device_manager.start_devices()?; + self.device_manager.start_devices(vm_as)?; info!(self.logger, "VM: initializing devices done"); Ok(()) @@ -928,6 +933,7 @@ pub mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; let mut vm = create_vm_instance(); @@ -960,6 +966,7 @@ pub mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config); assert!(vm.init_guest_memory().is_ok()); @@ -1008,6 +1015,7 @@ pub mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config); @@ -1084,6 +1092,7 @@ pub mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config); diff --git a/src/dragonball/src/vm/x86_64.rs b/src/dragonball/src/vm/x86_64.rs index 4aedeafd8eef..fca35f829c42 100644 --- a/src/dragonball/src/vm/x86_64.rs +++ b/src/dragonball/src/vm/x86_64.rs @@ -6,6 +6,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. +use std::collections::HashMap; use std::convert::TryInto; use std::ops::Deref; @@ -48,6 +49,7 @@ fn configure_system( initrd: &Option, boot_cpus: u8, max_cpus: u8, + pci_legacy_irqs: Option<&HashMap>, ) -> super::Result<()> { const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; const KERNEL_HDR_MAGIC: u32 = 0x5372_6448; @@ -59,7 +61,8 @@ fn configure_system( let himem_start = GuestAddress(layout::HIMEM_START); // Note that this puts the mptable at the last 1k of Linux's 640k base RAM - mptable::setup_mptable(guest_mem, boot_cpus, max_cpus, None).map_err(Error::MpTableSetup)?; + mptable::setup_mptable(guest_mem, boot_cpus, max_cpus, pci_legacy_irqs) + .map_err(Error::MpTableSetup)?; let mut params: BootParamsWrapper = BootParamsWrapper(bootparam::boot_params::default()); @@ -219,6 +222,24 @@ impl Vm { .as_bytes_with_nul() .len(); + #[cfg(feature = "host-device")] + { + // Don't expect poisoned lock here. + let vfio_manager = self.device_manager.vfio_manager.lock().unwrap(); + configure_system( + vm_memory, + self.address_space.address_space(), + cmdline_addr, + cmdline_size, + &initrd, + self.vm_config.vcpu_count, + self.vm_config.max_vcpu_count, + vfio_manager.get_pci_legacy_irqs(), + ) + .map_err(StartMicroVmError::ConfigureSystem) + } + + #[cfg(not(feature = "host-device"))] configure_system( vm_memory, self.address_space.address_space(), @@ -227,6 +248,7 @@ impl Vm { &initrd, self.vm_config.vcpu_count, self.vm_config.max_vcpu_count, + None, ) .map_err(StartMicroVmError::ConfigureSystem) } diff --git a/src/libs/kata-types/src/annotations/mod.rs b/src/libs/kata-types/src/annotations/mod.rs index fd316c2e2f40..4024ce2cb2de 100644 --- a/src/libs/kata-types/src/annotations/mod.rs +++ b/src/libs/kata-types/src/annotations/mod.rs @@ -705,7 +705,7 @@ impl Annotation { } // Hypervisor Memory related annotations KATA_ANNO_CFG_HYPERVISOR_DEFAULT_MEMORY => { - match byte_unit::Byte::parse_str(value,true) { + match byte_unit::Byte::parse_str(value, true) { Ok(mem_bytes) => { let memory_size = mem_bytes .get_adjusted_unit(byte_unit::Unit::MiB) diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index 375b7c91e76e..b593214b3091 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -775,6 +775,26 @@ dependencies = [ "vmm-sys-util 0.11.1", ] +[[package]] +name = "dbs-pci" +version = "0.1.0" +dependencies = [ + "byteorder", + "dbs-allocator", + "dbs-boot", + "dbs-device", + "dbs-interrupt", + "downcast-rs", + "kvm-bindings", + "kvm-ioctls", + "libc", + "log", + "thiserror", + "vfio-bindings", + "vfio-ioctls", + "vm-memory", +] + [[package]] name = "dbs-upcall" version = "0.3.0" @@ -829,6 +849,7 @@ dependencies = [ "serde_json", "thiserror", "threadpool", + "timerfd", "vhost", "virtio-bindings", "virtio-queue", @@ -905,6 +926,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257" +[[package]] +name = "downcast-rs" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" + [[package]] name = "dragonball" version = "0.1.0" @@ -920,6 +947,7 @@ dependencies = [ "dbs-device", "dbs-interrupt", "dbs-legacy-devices", + "dbs-pci", "dbs-upcall", "dbs-utils", "dbs-virtio-devices", @@ -942,6 +970,8 @@ dependencies = [ "slog-scope", "thiserror", "tracing", + "vfio-bindings", + "vfio-ioctls", "virtio-queue", "vm-memory", "vmm-sys-util 0.11.1", @@ -4146,6 +4176,29 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vfio-bindings" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43449b404c488f70507dca193debd4bea361fe8089869b947adc19720e464bce" + +[[package]] +name = "vfio-ioctls" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "068bac78842164a8ecc1d1a84a8d8a9168ab29fa3c96942689e286a30ae22ac4" +dependencies = [ + "byteorder", + "kvm-bindings", + "kvm-ioctls", + "libc", + "log", + "thiserror", + "vfio-bindings", + "vm-memory", + "vmm-sys-util 0.11.1", +] + [[package]] name = "vhost" version = "0.6.1" diff --git a/src/runtime-rs/crates/hypervisor/Cargo.toml b/src/runtime-rs/crates/hypervisor/Cargo.toml index a8520b15bf59..ef47fe1910cb 100644 --- a/src/runtime-rs/crates/hypervisor/Cargo.toml +++ b/src/runtime-rs/crates/hypervisor/Cargo.toml @@ -35,7 +35,7 @@ kata-types = { path = "../../../libs/kata-types" } logging = { path = "../../../libs/logging" } shim-interface = { path = "../../../libs/shim-interface" } -dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "vhost-net", "dbs-upcall","virtio-mem", "virtio-balloon", "vhost-user-net"] } +dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "vhost-net", "dbs-upcall", "virtio-mem", "virtio-balloon", "vhost-user-net", "host-device"] } ch-config = { path = "ch-config", optional = true } tests_utils = { path = "../../tests/utils" } diff --git a/tools/packaging/kernel/configs/fragments/build-type/dragonball-experimental/upcall.conf b/tools/packaging/kernel/configs/fragments/build-type/dragonball-experimental/upcall.conf index 75596e687a1d..a7e608b14c11 100644 --- a/tools/packaging/kernel/configs/fragments/build-type/dragonball-experimental/upcall.conf +++ b/tools/packaging/kernel/configs/fragments/build-type/dragonball-experimental/upcall.conf @@ -4,3 +4,4 @@ CONFIG_DRAGONBALL_UPCALL_SRV=y CONFIG_DRAGONBALL_DEVICE_MANAGER=y CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO=y CONFIG_DRAGONBALL_HOTPLUG_CPU=y +CONFIG_DRAGONBALL_HOTPLUG_PCI=y diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index 078fa0fe576b..52bd8e43afb0 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -119 +120 diff --git a/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0008-upcall-add-pci-hotplug-hot-unplug-support.patch b/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0008-upcall-add-pci-hotplug-hot-unplug-support.patch new file mode 100644 index 000000000000..d4171e64a203 --- /dev/null +++ b/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0008-upcall-add-pci-hotplug-hot-unplug-support.patch @@ -0,0 +1,173 @@ +From 4ed40d8ce3793129ba9c0b7b663a5e137aceb70c Mon Sep 17 00:00:00 2001 +From: Chao Wu +Date: Wed, 27 Dec 2023 14:43:47 +0800 +Subject: [PATCH] upcall: add pci hotplug / hot-unplug support + +add two new upcall functions add_pci_dev and del_pci_dev, mainly for hotplugging +and hot-unplugging pci device in the guest kernel through the upcall server. + +Users could implement upcall client side with add_pci or del_pci command and trigger +those commands in the hypervisor side. + +As always, Dragonball hypervisor will implement the client side to do pci hotplug and +hot-unplug as an example + +Signed-off-by: Gerry Liu +Signed-off-by: Helin Guo +Signed-off-by: Chao Wu +--- + drivers/misc/dragonball/upcall_srv/Kconfig | 11 +++ + .../upcall_srv/dragonball_device_manager.c | 90 +++++++++++++++++++ + 2 files changed, 101 insertions(+) + +diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig +index fc83f03c2edd..19a6ca957ea6 100644 +--- a/drivers/misc/dragonball/upcall_srv/Kconfig ++++ b/drivers/misc/dragonball/upcall_srv/Kconfig +@@ -47,3 +47,14 @@ config DRAGONBALL_HOTPLUG_CPU + structure with command and parameter to hot-pluging an vCPU. + + If unsure, say N. ++ ++config DRAGONBALL_HOTPLUG_PCI ++ bool "PCI hotplug/hotunplug support" ++ depends on DRAGONBALL_DEVICE_MANAGER ++ default y ++ help ++ This configure implements a PCI hotplug/hotunplug support, vmm ++ should send hotplug request by vsock which follow special data ++ structure with command and parameter to hot-pluging a PCI device. ++ ++ If unsure, say N. +diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c +index 088d38623b8d..3544afefa2a9 100644 +--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c ++++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -90,6 +91,12 @@ struct devmgr_req { + uint8_t apic_ids[256]; + #endif + } cpu_dev_info; ++#endif ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI) ++ struct { ++ uint8_t busno; ++ uint8_t devfn; ++ } pci_dev_info; + #endif + } msg_load; + }; +@@ -117,6 +124,9 @@ struct devmgr_reply { + #endif + #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) + struct cpu_dev_reply_info cpu_dev_info; ++#endif ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI) ++ struct {} pci_dev_info; + #endif + } msg_load; + }; +@@ -286,6 +296,82 @@ static int del_mmio_dev(struct devmgr_req *req, + } + #endif + ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI) ++static int add_pci_dev(struct devmgr_req *req, ++ struct devmgr_reply *rep) ++{ ++ int ret = 0; ++ struct devmgr_msg_header *rep_mh = &rep->msg_header; ++ uint8_t busno = req->msg_load.pci_dev_info.busno; ++ uint8_t devfn = req->msg_load.pci_dev_info.devfn; ++ struct pci_bus *bus; ++ struct pci_dev *dev; ++ ++ pr_info("add pci device of busno: %02x, devfn: %02x\n", busno, devfn); ++ ++ pci_lock_rescan_remove(); ++ ++ /* It is similar to pci_rescan_bus */ ++ ++ bus = pci_find_bus(0, busno); ++ if (!bus) { ++ pr_err("Could not find PCI bus for busno %02x\n", busno); ++ ret = -ENODEV; ++ goto out; ++ } ++ ++ pci_scan_slot(bus, devfn); ++ dev = pci_get_slot(bus, devfn); ++ if (!dev) { ++ pr_err("Could not find PCI device for slot %02x\n", devfn); ++ ret = -ENODEV; ++ goto out; ++ } ++ ++ pci_bus_claim_resources(bus); ++ ++ pci_bus_add_devices(bus); ++ ++ pci_dev_put(dev); ++ ++out: ++ pci_unlock_rescan_remove(); ++ if (!ret) ++ _fill_msg_header(rep_mh, 0, ADD_PCI, 0); ++ return ret; ++} ++ ++static int del_pci_dev(struct devmgr_req *req, ++ struct devmgr_reply *rep) ++{ ++ int ret = 0; ++ struct devmgr_msg_header *rep_mh = &rep->msg_header; ++ uint8_t busno = req->msg_load.pci_dev_info.busno; ++ uint8_t devfn = req->msg_load.pci_dev_info.devfn; ++ struct pci_dev *dev; ++ ++ pr_info("remove pci device of busno: %02x, devfn: %02x\n", busno, devfn); ++ ++ pci_lock_rescan_remove(); ++ ++ dev = pci_get_domain_bus_and_slot(0, busno, devfn); ++ ++ if (!dev) { ++ pr_err("Could not find PCI device for slot %02x\n", devfn); ++ ret = -ENODEV; ++ goto out; ++ } ++ ++ pci_stop_and_remove_bus_device(dev); ++ ++ pci_dev_put(dev); ++out: ++ pci_unlock_rescan_remove(); ++ if (!ret) ++ _fill_msg_header(rep_mh, 0, DEL_PCI, 0); ++ return ret; ++} ++#endif + + #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) + #if defined(CONFIG_X86_64) +@@ -522,6 +608,10 @@ static struct { + {ADD_CPU, add_cpu_dev}, + {DEL_CPU, del_cpu_dev}, + #endif ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI) ++ {ADD_PCI, add_pci_dev}, ++ {DEL_PCI, del_pci_dev}, ++#endif + }; + + static action_route_t get_action(struct devmgr_req *req) +-- +2.31.1 +