Merge pull request #8740 from openanolis/upstream/pci-6-final

Dragonball: add pci vfio passthrough, hot(un)plug support
This commit is contained in:
Chao Wu 2023-12-29 01:58:32 +08:00 committed by GitHub
commit 67b91c1eb3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 1975 additions and 31 deletions

322
src/agent/Cargo.lock generated
View File

@ -8,6 +8,17 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "1.0.5"
@ -47,6 +58,12 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f"
[[package]]
name = "arrayvec"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]]
name = "async-broadcast"
version = "0.5.1"
@ -246,6 +263,18 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "bitvec"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
dependencies = [
"funty",
"radium",
"tap",
"wyz",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
@ -270,6 +299,51 @@ dependencies = [
"log",
]
[[package]]
name = "borsh"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4114279215a005bc675e386011e594e1d9b800918cea18fcadadcce864a2046b"
dependencies = [
"borsh-derive",
"hashbrown",
]
[[package]]
name = "borsh-derive"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0754613691538d51f329cce9af41d7b7ca150bc973056f1156611489475f54f7"
dependencies = [
"borsh-derive-internal",
"borsh-schema-derive-internal",
"proc-macro-crate 0.1.5",
"proc-macro2",
"syn 1.0.109",
]
[[package]]
name = "borsh-derive-internal"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afb438156919598d2c7bad7e1c0adf3d26ed3840dbc010db1a882a65583ca2fb"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "borsh-schema-derive-internal"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "634205cc43f74a1b9046ef87c4540ebda95696ec0f315024860cad7c5b0f5ccd"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "bumpalo"
version = "3.10.0"
@ -278,9 +352,36 @@ checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3"
[[package]]
name = "byte-unit"
version = "3.1.4"
version = "5.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "415301c9de11005d4b92193c0eb7ac7adc37e5a49e0ac9bed0a42343512744b8"
checksum = "d405b41420a161b4e1dd5a52e3349f41b4dae9a39be02aff1d67fe53256430ac"
dependencies = [
"rust_decimal",
"serde",
"utf8-width",
]
[[package]]
name = "bytecheck"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b6372023ac861f6e6dc89c8344a8f398fb42aaba2b5dbc649ca0c0e9dbcb627"
dependencies = [
"bytecheck_derive",
"ptr_meta",
"simdutf8",
]
[[package]]
name = "bytecheck_derive"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7ec4c6f261935ad534c0c22dbef2201b45918860eb1c574b972bd213a76af61"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "byteorder"
@ -489,10 +590,32 @@ dependencies = [
]
[[package]]
name = "crossbeam-utils"
version = "0.8.16"
name = "crossbeam-deque"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751"
dependencies = [
"cfg-if 1.0.0",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e3681d554572a651dda4186cd47240627c3d0114d45a95f6ad27f2f22e7548d"
dependencies = [
"autocfg",
"cfg-if 1.0.0",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3a430a770ebd84726f584a90ee7f020d28db52c6d02138900f22341f866d39c"
dependencies = [
"cfg-if 1.0.0",
]
@ -735,6 +858,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "funty"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
[[package]]
name = "futures"
version = "0.3.21"
@ -890,6 +1019,9 @@ name = "hashbrown"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3"
dependencies = [
"ahash",
]
[[package]]
name = "heck"
@ -1229,6 +1361,7 @@ dependencies = [
"serde_json",
"slog",
"slog-scope",
"sysinfo",
"thiserror",
"toml",
]
@ -1241,9 +1374,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.139"
version = "0.2.151"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
[[package]]
name = "libseccomp"
@ -1519,6 +1652,15 @@ dependencies = [
"memoffset 0.7.1",
]
[[package]]
name = "ntapi"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4"
dependencies = [
"winapi",
]
[[package]]
name = "num-integer"
version = "0.1.45"
@ -1816,6 +1958,15 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
[[package]]
name = "proc-macro-crate"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d6ea3c4595b96363c13943497db34af4460fb474a95c43f4446ad341b8c9785"
dependencies = [
"toml",
]
[[package]]
name = "proc-macro-crate"
version = "1.2.1"
@ -2022,6 +2173,26 @@ dependencies = [
"ttrpc-codegen",
]
[[package]]
name = "ptr_meta"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1"
dependencies = [
"ptr_meta_derive",
]
[[package]]
name = "ptr_meta_derive"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "quote"
version = "1.0.27"
@ -2031,6 +2202,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "radium"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
[[package]]
name = "rand"
version = "0.8.5"
@ -2061,6 +2238,26 @@ dependencies = [
"getrandom",
]
[[package]]
name = "rayon"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.2.13"
@ -2134,6 +2331,15 @@ dependencies = [
"winapi",
]
[[package]]
name = "rend"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2571463863a6bd50c32f94402933f03457a3fbaf697a707c5be741e459f08fd"
dependencies = [
"bytecheck",
]
[[package]]
name = "reqwest"
version = "0.11.18"
@ -2171,6 +2377,34 @@ dependencies = [
"winreg",
]
[[package]]
name = "rkyv"
version = "0.7.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0200c8230b013893c0b2d6213d6ec64ed2b9be2e0e016682b7224ff82cff5c58"
dependencies = [
"bitvec",
"bytecheck",
"hashbrown",
"ptr_meta",
"rend",
"rkyv_derive",
"seahash",
"tinyvec",
"uuid",
]
[[package]]
name = "rkyv_derive"
version = "0.7.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5c462a1328c8e67e4d6dbad1eb0355dd43e8ab432c6e227a43657f16ade5033"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "rlimit"
version = "0.5.4"
@ -2195,6 +2429,22 @@ dependencies = [
"tokio",
]
[[package]]
name = "rust_decimal"
version = "1.32.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4c4216490d5a413bc6d10fa4742bd7d4955941d062c0ef873141d6b0e7b30fd"
dependencies = [
"arrayvec",
"borsh",
"bytes 1.1.0",
"num-traits",
"rand",
"rkyv",
"serde",
"serde_json",
]
[[package]]
name = "rustix"
version = "0.37.3"
@ -2291,6 +2541,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "seahash"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
[[package]]
name = "security-framework"
version = "2.9.2"
@ -2459,6 +2715,12 @@ dependencies = [
"libc",
]
[[package]]
name = "simdutf8"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
[[package]]
name = "slab"
version = "0.4.6"
@ -2596,12 +2858,33 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "sysinfo"
version = "0.29.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666"
dependencies = [
"cfg-if 1.0.0",
"core-foundation-sys",
"libc",
"ntapi",
"once_cell",
"rayon",
"winapi",
]
[[package]]
name = "take_mut"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
[[package]]
name = "tap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "tempfile"
version = "3.3.0"
@ -3024,6 +3307,18 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "utf8-width"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
[[package]]
name = "uuid"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560"
[[package]]
name = "valuable"
version = "0.1.0"
@ -3407,6 +3702,15 @@ dependencies = [
"winapi",
]
[[package]]
name = "wyz"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
dependencies = [
"tap",
]
[[package]]
name = "xattr"
version = "0.2.3"
@ -3473,7 +3777,7 @@ version = "3.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d1794a946878c0e807f55a397187c11fc7a038ba5d868e7db4f3bd7760bc9d"
dependencies = [
"proc-macro-crate",
"proc-macro-crate 1.2.1",
"proc-macro2",
"quote",
"regex",
@ -3512,7 +3816,7 @@ version = "3.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "934d7a7dfc310d6ee06c87ffe88ef4eca7d3e37bb251dece2ef93da8f17d8ecd"
dependencies = [
"proc-macro-crate",
"proc-macro-crate 1.2.1",
"proc-macro2",
"quote",
"syn 1.0.109",

View File

@ -344,6 +344,26 @@ dependencies = [
"vmm-sys-util",
]
[[package]]
name = "dbs-pci"
version = "0.1.0"
dependencies = [
"byteorder",
"dbs-allocator",
"dbs-boot",
"dbs-device",
"dbs-interrupt",
"downcast-rs",
"kvm-bindings",
"kvm-ioctls",
"libc",
"log",
"thiserror",
"vfio-bindings",
"vfio-ioctls",
"vm-memory",
]
[[package]]
name = "dbs-upcall"
version = "0.3.0"
@ -398,6 +418,7 @@ dependencies = [
"serde_json",
"thiserror",
"threadpool",
"timerfd",
"vhost",
"virtio-bindings",
"virtio-queue",
@ -454,6 +475,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "downcast-rs"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650"
[[package]]
name = "dragonball"
version = "0.1.0"
@ -469,6 +496,7 @@ dependencies = [
"dbs-device",
"dbs-interrupt",
"dbs-legacy-devices",
"dbs-pci",
"dbs-upcall",
"dbs-utils",
"dbs-virtio-devices",
@ -494,6 +522,8 @@ dependencies = [
"test-utils",
"thiserror",
"tracing",
"vfio-bindings",
"vfio-ioctls",
"virtio-queue",
"vm-memory",
"vmm-sys-util",
@ -2074,6 +2104,29 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "vfio-bindings"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43449b404c488f70507dca193debd4bea361fe8089869b947adc19720e464bce"
[[package]]
name = "vfio-ioctls"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "068bac78842164a8ecc1d1a84a8d8a9168ab29fa3c96942689e286a30ae22ac4"
dependencies = [
"byteorder",
"kvm-bindings",
"kvm-ioctls",
"libc",
"log",
"thiserror",
"vfio-bindings",
"vm-memory",
"vmm-sys-util",
]
[[package]]
name = "vhost"
version = "0.6.1"

View File

@ -25,6 +25,7 @@ dbs-utils = { path = "./src/dbs_utils" }
dbs-virtio-devices = { path = "./src/dbs_virtio_devices", optional = true, features = [
"virtio-mmio",
] }
dbs-pci = { path = "./src/dbs_pci", optional = true }
derivative = "2.2.0"
kvm-bindings = "0.6.0"
kvm-ioctls = "0.12.0"
@ -48,6 +49,8 @@ virtio-queue = { version = "0.7.0", optional = true }
vm-memory = { version = "0.10.0", features = ["backend-mmap"] }
crossbeam-channel = "0.5.6"
fuse-backend-rs = "0.10.5"
vfio-bindings = { version = "0.3.0", optional = true }
vfio-ioctls = { version = "0.1.0", optional = true }
[dev-dependencies]
slog-async = "2.7.0"
@ -77,3 +80,4 @@ vhost-net = ["dbs-virtio-devices/vhost-net"]
vhost-user-fs = ["dbs-virtio-devices/vhost-user-fs"]
vhost-user-net = ["dbs-virtio-devices/vhost-user-net"]
vhost-user-blk = ["dbs-virtio-devices/vhost-user-blk"]
host-device = ["dep:vfio-bindings", "dep:vfio-ioctls", "dep:dbs-pci"]

View File

@ -9,13 +9,14 @@
use std::fs::File;
use std::sync::{Arc, Mutex};
use crossbeam_channel::{Receiver, Sender, TryRecvError};
use crossbeam_channel::{unbounded, Receiver, Sender, TryRecvError};
use log::{debug, error, info, warn};
use tracing::instrument;
use crate::error::{Result, StartMicroVmError, StopMicrovmError};
use crate::event_manager::EventManager;
use crate::tracer::{DragonballTracer, TraceError, TraceInfo};
use crate::vcpu::VcpuManagerError;
use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo};
use crate::vmm::Vmm;
@ -36,6 +37,8 @@ pub use crate::device_manager::fs_dev_mgr::{
};
#[cfg(feature = "virtio-mem")]
pub use crate::device_manager::mem_dev_mgr::{MemDeviceConfigInfo, MemDeviceError};
#[cfg(feature = "host-device")]
use crate::device_manager::vfio_dev_mgr::{HostDeviceConfig, VfioDeviceError};
#[cfg(feature = "vhost-net")]
pub use crate::device_manager::vhost_net_dev_mgr::{
VhostNetDeviceConfigInfo, VhostNetDeviceError, VhostNetDeviceMgr,
@ -148,11 +151,20 @@ pub enum VmmActionError {
/// End tracing Failed.
#[error("End tracing failed: {0}")]
EndTracingFailed(#[source] TraceError),
#[cfg(feature = "host-device")]
/// The action `InsertHostDevice` failed either because of bad user input or an internal error.
#[error("failed to add VFIO passthrough device: {0:?}")]
HostDeviceConfig(#[source] VfioDeviceError),
#[cfg(feature = "host-device")]
/// The action 'RemoveHostDevice' failed because of vcpu manager internal error.
#[error("remove host device error: {0}")]
RemoveHostDevice(#[source] VcpuManagerError),
}
/// This enum represents the public interface of the VMM. Each action contains various
/// bits of information (ids, paths, etc.).
#[derive(Clone, Debug, PartialEq, Eq)]
#[derive(Clone, Debug, PartialEq)]
pub enum VmmAction {
/// Configure the boot source of the microVM using `BootSourceConfig`.
/// This action can only be called before the microVM has booted.
@ -245,6 +257,18 @@ pub enum VmmAction {
/// Add a new balloon device or update one that already exists using the `BalloonDeviceConfig`
/// as input.
InsertBalloonDevice(BalloonDeviceConfigInfo),
#[cfg(feature = "host-device")]
/// Add a VFIO assignment host device or update that already exists
InsertHostDevice(HostDeviceConfig),
#[cfg(feature = "host-device")]
/// Prepare to remove a VFIO assignment host device that already exists
PrepareRemoveHostDevice(String),
#[cfg(feature = "host-device")]
/// Add a VFIO assignment host device or update that already exists
RemoveHostDevice(String),
}
/// The enum represents the response sent by the VMM in case of success. The response is either
@ -257,6 +281,8 @@ pub enum VmmData {
MachineConfiguration(Box<VmConfigInfo>),
/// Prometheus Metrics represented by String.
HypervisorMetrics(String),
/// Sync Hotplug
SyncHotplug((Sender<Option<i32>>, Receiver<Option<i32>>)),
}
/// Request data type used to communicate between the API and the VMM.
@ -371,6 +397,14 @@ impl VmmService {
VmmAction::InsertBalloonDevice(balloon_cfg) => {
self.add_balloon_device(vmm, event_mgr, balloon_cfg)
}
#[cfg(feature = "host-device")]
VmmAction::InsertHostDevice(hostdev_cfg) => self.add_vfio_device(vmm, hostdev_cfg),
#[cfg(feature = "host-device")]
VmmAction::PrepareRemoveHostDevice(hostdev_id) => {
self.prepare_remove_vfio_device(vmm, &hostdev_id)
}
#[cfg(feature = "host-device")]
VmmAction::RemoveHostDevice(hostdev_cfg) => self.remove_vfio_device(vmm, &hostdev_cfg),
};
debug!("send vmm response: {:?}", response);
@ -539,6 +573,8 @@ impl VmmService {
// - Some(path), legacy_manager will create_socket_console on that path.
config.serial_path = machine_config.serial_path;
config.pci_hotplug_enabled = machine_config.pci_hotplug_enabled;
vm.set_vm_config(config.clone());
self.machine_config = config;
@ -813,6 +849,101 @@ impl VmmService {
.map_err(VmmActionError::FsDevice)
}
#[cfg(feature = "host-device")]
fn add_vfio_device(&self, vmm: &mut Vmm, config: HostDeviceConfig) -> VmmRequestResult {
let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig(
VfioDeviceError::InvalidVMID,
))?;
info!("add_vfio_device: {:?}", config);
let mut ctx = vm.create_device_op_context(None).map_err(|e| {
info!("create device op context error: {:?}", e);
if let StartMicroVmError::MicroVMAlreadyRunning = e {
VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot)
} else if let StartMicroVmError::UpcallServerNotReady = e {
VmmActionError::UpcallServerNotReady
} else {
VmmActionError::StartMicroVm(e)
}
})?;
vm.device_manager()
.vfio_manager
.lock()
.unwrap()
.insert_device(&mut ctx, config)
.map_err(VmmActionError::HostDeviceConfig)?;
Ok(VmmData::Empty)
}
// using upcall to unplug the pci device in the guest
#[cfg(feature = "host-device")]
fn prepare_remove_vfio_device(&mut self, vmm: &mut Vmm, hostdev_id: &str) -> VmmRequestResult {
let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig(
VfioDeviceError::InvalidVMID,
))?;
info!("prepare_remove_vfio_device: {:?}", hostdev_id);
let ctx = vm.create_device_op_context(None).map_err(|e| {
info!("create device op context error: {:?}", e);
if let StartMicroVmError::MicroVMAlreadyRunning = e {
VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot)
} else if let StartMicroVmError::UpcallServerNotReady = e {
VmmActionError::UpcallServerNotReady
} else {
VmmActionError::StartMicroVm(e)
}
})?;
let (sender, receiver) = unbounded();
// It is safe because we don't expect poison lock.
let vfio_manager = vm.device_manager.vfio_manager.lock().unwrap();
vfio_manager
.prepare_remove_device(&ctx, hostdev_id, sender.clone())
.map(|_| VmmData::SyncHotplug((sender, receiver)))
.map_err(VmmActionError::HostDeviceConfig)
}
#[cfg(feature = "host-device")]
fn remove_vfio_device(&self, vmm: &mut Vmm, hostdev_id: &str) -> VmmRequestResult {
let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig(
VfioDeviceError::InvalidVMID,
))?;
info!("remove_vfio_device: {:?}", hostdev_id);
let mut ctx = vm.create_device_op_context(None).map_err(|e| {
info!("create device op context error: {:?}", e);
if let StartMicroVmError::MicroVMAlreadyRunning = e {
VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot)
} else if let StartMicroVmError::UpcallServerNotReady = e {
VmmActionError::UpcallServerNotReady
} else {
VmmActionError::StartMicroVm(e)
}
})?;
// It is safe because we don't expect poison lock.
let mut vfio_manager = vm.device_manager.vfio_manager.lock().unwrap();
vfio_manager
.remove_device(&mut ctx, hostdev_id)
.map_err(VmmActionError::HostDeviceConfig)?;
// we need to revalidate io_manager cache in all vcpus
// in order to drop old io_manager and close device's fd
vm.vcpu_manager()
.map_err(VmmActionError::RemoveHostDevice)?
.revalidate_all_vcpus_cache()
.map_err(VmmActionError::RemoveHostDevice)?;
// FIXME: we should clear corresponding information because vfio module in
// host kernel will clear iommu table in this scenario.
Ok(VmmData::Empty)
}
#[cfg(feature = "hotplug")]
#[instrument(skip(self))]
fn resize_vcpu(&mut self, vmm: &mut Vmm, config: VcpuResizeInfo) -> VmmRequestResult {

View File

@ -202,7 +202,7 @@ pub enum Resource {
size: u32,
},
/// Network Interface Card MAC address.
MacAddresss(String),
MacAddress(String),
/// KVM memslot index.
KvmMemSlot(u32),
}
@ -310,7 +310,7 @@ impl DeviceResources {
/// Get the first resource information for NIC MAC address.
pub fn get_mac_address(&self) -> Option<String> {
for entry in self.0.iter().as_ref() {
if let Resource::MacAddresss(addr) = entry {
if let Resource::MacAddress(addr) = entry {
return Some(addr.clone());
}
}
@ -403,7 +403,7 @@ pub(crate) mod tests {
resource.append(entry.clone());
assert_eq!(entry, resource[6]);
let entry = Resource::MacAddresss(MAC_ADDRESS.to_string());
let entry = Resource::MacAddress(MAC_ADDRESS.to_string());
resource.append(entry.clone());
assert_eq!(entry, resource[7]);

View File

@ -40,6 +40,8 @@ pub use configuration::{
mod device;
pub use device::PciDevice;
#[cfg(target_arch = "aarch64")]
pub use device::{PciBusResources, ECAM_SPACE_LENGTH};
mod root_bus;
pub use root_bus::create_pci_root_bus;
@ -54,6 +56,7 @@ mod msix;
pub use msix::{MsixCap, MsixState, MSIX_TABLE_ENTRY_SIZE};
mod vfio;
pub use vfio::{VfioPciDevice, VfioPciError, VENDOR_NVIDIA};
/// Error codes related to PCI root/bus/device operations.
#[derive(Debug, thiserror::Error)]

View File

@ -3,6 +3,7 @@
//
// SPDX-License-Identifier: Apache-2.0
use std::any::Any;
use std::io;
use std::os::unix::io::AsRawFd;
use std::ptr::null_mut;
@ -228,7 +229,7 @@ impl Interrupt {
fn get_irq_pin(&self) -> u32 {
if let Some(legacy_irq) = self.legacy_irq {
(PciInterruptPin::IntA as u32) << 8 | self.legacy_irq.unwrap()
(PciInterruptPin::IntA as u32) << 8 | legacy_irq
} else {
0
}
@ -884,7 +885,7 @@ impl Region {
}
}
struct VfioPciDeviceState<C: PciSystemContext> {
pub struct VfioPciDeviceState<C: PciSystemContext> {
vfio_path: String,
interrupt: Interrupt,
vfio_dev: Arc<VfioDevice>,
@ -947,6 +948,10 @@ impl<C: PciSystemContext> VfioPciDeviceState<C> {
})
}
pub fn vfio_dev(&self) -> &Arc<VfioDevice> {
&self.vfio_dev
}
fn read_config_byte(&self, offset: u32) -> u8 {
let mut data: [u8; 1] = [0];
self.vfio_dev
@ -1314,6 +1319,23 @@ impl<C: PciSystemContext> VfioPciDeviceState<C> {
Ok(())
}
fn free_register_resources(&self) -> Result<()> {
let mut register_resources = DeviceResources::new();
for region in self.regions.iter() {
let resources = region.to_resources();
for res in resources.get_all_resources() {
register_resources.append(res.clone());
}
}
self.bus
.upgrade()
.ok_or(VfioPciError::BusIsDropped)?
.free_resources(register_resources);
Ok(())
}
fn unregister_regions(&mut self, vm: &Arc<VmFd>) -> Result<()> {
// This routine handle VfioPciDevice dropped but not unmap memory
if self.context.upgrade().is_none() {
@ -1661,7 +1683,7 @@ impl<C: PciSystemContext> VfioPciDevice<C> {
Ok(())
}
fn state(&self) -> MutexGuard<VfioPciDeviceState<C>> {
pub fn state(&self) -> MutexGuard<VfioPciDeviceState<C>> {
// Don't expect poisoned lock
self.state
.lock()
@ -1687,6 +1709,14 @@ impl<C: PciSystemContext> VfioPciDevice<C> {
.expect("poisoned lock for VFIO PCI device")
.read_config_word(PCI_CONFIG_VENDOR_OFFSET)
}
pub fn clear_device(&self) -> Result<()> {
let mut state = self.state();
state.free_register_resources()?;
let _ = state.unregister_regions(&self.vm_fd);
Ok(())
}
}
impl<C: 'static + PciSystemContext> DeviceIo for VfioPciDevice<C> {
@ -1784,7 +1814,8 @@ impl<C: 'static + PciSystemContext> DeviceIo for VfioPciDevice<C> {
fn get_trapped_io_resources(&self) -> DeviceResources {
self.state().trapped_resources.clone()
}
fn as_any(&self) -> &dyn std::any::Any {
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@ -45,6 +45,16 @@ struct DevMgrMsgHeader {
pub msg_flags: u32,
}
/// Command struct to add/del a PCI Device.
#[repr(C)]
#[derive(Copy, Clone, Debug, PartialEq)]
pub struct PciDevRequest {
/// PCI bus number
pub busno: u8,
/// Combined device number and function number
pub devfn: u8,
}
/// Command struct to add/del a MMIO Virtio Device.
#[repr(C)]
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
@ -128,6 +138,10 @@ pub enum DevMgrRequest {
AddVcpu(CpuDevRequest),
/// Del a VCPU
DelVcpu(CpuDevRequest),
/// Add a PCI device
AddPciDev(PciDevRequest),
/// Delete a PCI device
DelPciDev(PciDevRequest),
}
impl DevMgrRequest {
@ -167,6 +181,18 @@ impl DevMgrRequest {
let vcpu_dev = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut CpuDevRequest) };
*vcpu_dev = s.clone();
}
DevMgrRequest::AddPciDev(s) => {
msg_hdr.msg_type = DevMgrMsgType::AddPci as u32;
msg_hdr.msg_size = mem::size_of::<PciDevRequest>() as u32;
let pci_dev = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut PciDevRequest) };
*pci_dev = *s;
}
DevMgrRequest::DelPciDev(s) => {
msg_hdr.msg_type = DevMgrMsgType::DelPci as u32;
msg_hdr.msg_size = mem::size_of::<PciDevRequest>() as u32;
let pci_dev = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut PciDevRequest) };
*pci_dev = *s;
}
}
buffer

View File

@ -23,7 +23,7 @@ use log::{debug, error, info, trace, warn};
use timerfd::{SetTimeFlags, TimerFd, TimerState};
pub use crate::dev_mgr_service::{
CpuDevRequest, DevMgrRequest, DevMgrResponse, DevMgrService, MmioDevRequest,
CpuDevRequest, DevMgrRequest, DevMgrResponse, DevMgrService, MmioDevRequest, PciDevRequest,
};
const SERVER_PORT: u32 = 0xDB;

View File

@ -2,6 +2,7 @@
//
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
use std::any::Any;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, Mutex, MutexGuard};
@ -484,7 +485,7 @@ where
resources
}
fn as_any(&self) -> &dyn std::any::Any {
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@ -20,6 +20,8 @@ use dbs_device::resources::Resource;
use dbs_device::DeviceIo;
use dbs_interrupt::KvmIrqManager;
use dbs_legacy_devices::ConsoleHandler;
#[cfg(all(feature = "host-device", target_arch = "aarch64"))]
use dbs_pci::PciBusResources;
use dbs_utils::epoll_manager::EpollManager;
use kvm_ioctls::VmFd;
@ -36,9 +38,11 @@ use dbs_virtio_devices::{
VirtioDevice,
};
#[cfg(feature = "host-device")]
use dbs_pci::VfioPciDevice;
#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
use dbs_upcall::{
DevMgrRequest, DevMgrService, MmioDevRequest, UpcallClient, UpcallClientError,
DevMgrRequest, DevMgrService, MmioDevRequest, PciDevRequest, UpcallClient, UpcallClientError,
UpcallClientRequest, UpcallClientResponse,
};
#[cfg(feature = "hotplug")]
@ -46,6 +50,8 @@ use dbs_virtio_devices::vsock::backend::VsockInnerConnector;
use crate::address_space_manager::GuestAddressSpaceImpl;
use crate::api::v1::InstanceInfo;
#[cfg(feature = "host-device")]
use crate::device_manager::vfio_dev_mgr::PciSystemManager;
use crate::error::StartMicroVmError;
use crate::resource_manager::ResourceManager;
use crate::vm::{KernelConfigInfo, Vm, VmConfigInfo};
@ -107,6 +113,11 @@ use self::balloon_dev_mgr::BalloonDeviceMgr;
pub mod vhost_net_dev_mgr;
#[cfg(feature = "vhost-net")]
use self::vhost_net_dev_mgr::VhostNetDeviceMgr;
#[cfg(feature = "host-device")]
/// Device manager for PCI/MMIO VFIO devices.
pub mod vfio_dev_mgr;
#[cfg(feature = "host-device")]
use self::vfio_dev_mgr::VfioDeviceMgr;
#[cfg(feature = "vhost-user-net")]
/// Device manager for vhost-user-net devices.
@ -164,6 +175,11 @@ pub enum DeviceMgrError {
/// Failed to free device resource.
#[error("failed to free device resources: {0}")]
ResourceError(#[source] crate::resource_manager::ResourceError),
#[cfg(feature = "host-device")]
/// Error from Vfio Pci
#[error("failed to do vfio pci operation: {0:?}")]
VfioPci(#[source] dbs_pci::VfioPciError),
}
/// Specialized version of `std::result::Result` for device manager operations.
@ -268,11 +284,15 @@ pub struct DeviceOpContext {
address_space: Option<AddressSpace>,
logger: slog::Logger,
is_hotplug: bool,
#[cfg(all(feature = "hotplug", feature = "host-device"))]
pci_hotplug_enabled: bool,
#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
upcall_client: Option<Arc<UpcallClient<DevMgrService>>>,
#[cfg(feature = "dbs-virtio-devices")]
virtio_devices: Vec<Arc<DbsMmioV2Device>>,
#[cfg(feature = "host-device")]
vfio_manager: Option<Arc<Mutex<VfioDeviceMgr>>>,
vm_config: Option<VmConfigInfo>,
shared_info: Arc<RwLock<InstanceInfo>>,
}
@ -297,6 +317,12 @@ impl DeviceOpContext {
};
let logger = device_mgr.logger.new(slog::o!());
#[cfg(all(feature = "hotplug", feature = "host-device"))]
let pci_hotplug_enabled = vm_config
.clone()
.map(|c| c.pci_hotplug_enabled)
.unwrap_or(false);
DeviceOpContext {
epoll_mgr,
io_context,
@ -307,12 +333,16 @@ impl DeviceOpContext {
address_space,
logger,
is_hotplug,
#[cfg(all(feature = "hotplug", feature = "host-device"))]
pci_hotplug_enabled,
#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
upcall_client: None,
#[cfg(feature = "dbs-virtio-devices")]
virtio_devices: Vec::new(),
vm_config,
shared_info,
#[cfg(feature = "host-device")]
vfio_manager: None,
}
}
@ -435,6 +465,13 @@ impl DeviceOpContext {
}
}
#[cfg(feature = "host-device")]
impl DeviceOpContext {
pub(crate) fn set_vfio_manager(&mut self, vfio_device_mgr: Arc<Mutex<VfioDeviceMgr>>) {
self.vfio_manager = Some(vfio_device_mgr);
}
}
#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
impl DeviceOpContext {
pub(crate) fn create_hotplug_ctx(vm: &Vm, epoll_mgr: Option<EpollManager>) -> Self {
@ -510,6 +547,37 @@ impl DeviceOpContext {
self.call_hotplug_device(req, callback)
}
#[cfg(feature = "host-device")]
pub(crate) fn insert_hotplug_pci_device(
&self,
dev: &Arc<dyn DeviceIo>,
callback: Option<Box<dyn Fn(UpcallClientResponse) + Send>>,
) -> Result<()> {
if !self.is_hotplug || !self.pci_hotplug_enabled {
return Err(DeviceMgrError::InvalidOperation);
}
let (busno, devfn) = DeviceManager::get_pci_device_info(dev)?;
let req = DevMgrRequest::AddPciDev(PciDevRequest { busno, devfn });
self.call_hotplug_device(req, callback)
}
#[cfg(feature = "host-device")]
pub(crate) fn remove_hotplug_pci_device(
&self,
dev: &Arc<dyn DeviceIo>,
callback: Option<Box<dyn Fn(UpcallClientResponse) + Send>>,
) -> Result<()> {
if !self.is_hotplug || !self.pci_hotplug_enabled {
return Err(DeviceMgrError::InvalidOperation);
}
let (busno, devfn) = DeviceManager::get_pci_device_info(dev)?;
let req = DevMgrRequest::DelPciDev(PciDevRequest { busno, devfn });
self.call_hotplug_device(req, callback)
}
}
#[cfg(all(feature = "hotplug", feature = "acpi"))]
@ -555,6 +623,8 @@ pub struct DeviceManager {
#[cfg(feature = "vhost-user-net")]
vhost_user_net_manager: VhostUserNetDeviceMgr,
#[cfg(feature = "host-device")]
pub(crate) vfio_manager: Arc<Mutex<VfioDeviceMgr>>,
}
impl DeviceManager {
@ -571,7 +641,7 @@ impl DeviceManager {
io_lock: Arc::new(Mutex::new(())),
irq_manager: Arc::new(KvmIrqManager::new(vm_fd.clone())),
res_manager,
vm_fd,
vm_fd: vm_fd.clone(),
logger: logger.new(slog::o!()),
shared_info,
@ -595,6 +665,8 @@ impl DeviceManager {
vhost_net_manager: VhostNetDeviceMgr::default(),
#[cfg(feature = "vhost-user-net")]
vhost_user_net_manager: VhostUserNetDeviceMgr::default(),
#[cfg(feature = "host-device")]
vfio_manager: Arc::new(Mutex::new(VfioDeviceMgr::new(vm_fd, logger))),
}
}
@ -775,6 +847,14 @@ impl DeviceManager {
.attach_devices(&mut ctx)
.map_err(StartMicroVmError::VhostUserNetDeviceError)?;
#[cfg(feature = "host-device")]
{
// It is safe bacause we don't expect poison lock.
let mut vfio_manager = self.vfio_manager.lock().unwrap();
vfio_manager.attach_devices(&mut ctx)?;
ctx.set_vfio_manager(self.vfio_manager.clone())
}
// Ensure that all devices are attached before kernel boot args are
// generated.
ctx.generate_kernel_boot_args(kernel_config)
@ -792,8 +872,17 @@ impl DeviceManager {
}
/// Start all registered devices when booting the associated virtual machine.
pub fn start_devices(&mut self) -> std::result::Result<(), StartMicroVmError> {
// TODO: add vfio support here. issue #4589.
pub fn start_devices(
&mut self,
vm_as: &GuestAddressSpaceImpl,
) -> std::result::Result<(), StartMicroVmError> {
// It is safe because we don't expect poison lock.
#[cfg(feature = "host-device")]
self.vfio_manager
.lock()
.unwrap()
.start_devices(vm_as)
.map_err(StartMicroVmError::RegisterDMAAddress)?;
Ok(())
}
@ -943,6 +1032,23 @@ impl DeviceManager {
Err(DeviceMgrError::GetDeviceResource)
}
/// Get pci bus resources for creating fdt.
#[cfg(feature = "host-device")]
pub fn get_pci_bus_resources(&self) -> Option<PciBusResources> {
let mut vfio_dev_mgr = self.vfio_manager.lock().unwrap();
let vfio_pci_mgr = vfio_dev_mgr.get_pci_manager();
if vfio_pci_mgr.is_none() {
return None;
}
let pci_manager = vfio_pci_mgr.unwrap();
let ecam_space = pci_manager.get_ecam_space();
let bar_space = pci_manager.get_bar_space();
Some(PciBusResources {
ecam_space,
bar_space,
})
}
}
#[cfg(feature = "dbs-virtio-devices")]
@ -1115,6 +1221,30 @@ impl DeviceManager {
Ok(())
}
}
#[cfg(feature = "host-device")]
fn get_pci_device_info(device: &Arc<dyn DeviceIo>) -> Result<(u8, u8)> {
if let Some(pci_dev) = device
.as_any()
.downcast_ref::<VfioPciDevice<PciSystemManager>>()
{
// reference from kernel: include/uapi/linux/pci.h
let busno = pci_dev.bus_id().map_err(DeviceMgrError::VfioPci)?;
let slot = pci_dev.device_id();
let func = 0;
// The slot/function address of each device is encoded
// in a single byte as follows:
//
// 7:3 = slot
// 2:0 = function
// together those 8 bits combined as devfn value
let devfn = (((slot) & 0x1f) << 3) | ((func) & 0x07);
return Ok((busno, devfn));
}
Err(DeviceMgrError::GetDeviceResource)
}
}
#[cfg(feature = "hotplug")]
@ -1202,6 +1332,8 @@ mod tests {
vhost_net_manager: VhostNetDeviceMgr::default(),
#[cfg(feature = "vhost-user-net")]
vhost_user_net_manager: VhostUserNetDeviceMgr::default(),
#[cfg(feature = "host-device")]
vfio_manager: Arc::new(Mutex::new(VfioDeviceMgr::new(vm_fd, &logger))),
logger,
shared_info,
@ -1243,6 +1375,7 @@ mod tests {
sockets: 1,
},
vpmu_feature: 0,
pci_hotplug_enabled: false,
};
vm.set_vm_config(vm_config.clone());
vm.init_guest_memory().unwrap();

View File

@ -0,0 +1,818 @@
// Copyright 2023 Alibaba, Inc. or its affiliates. All Rights Reserved.
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.
//! Device manager for host passthrough devices.
// we allow missing_doc temporaily, because rust can't use this declariation in marco
#![allow(missing_docs)]
mod pci_vfio;
pub use pci_vfio::PciSystemManager;
use std::collections::HashMap;
use std::ops::Deref;
use std::os::fd::RawFd;
use std::path::Path;
use std::sync::{Arc, Weak};
use crossbeam_channel::Sender;
use dbs_device::resources::Resource::LegacyIrq;
use dbs_device::resources::{DeviceResources, Resource, ResourceConstraint};
use dbs_device::DeviceIo;
use dbs_interrupt::KvmIrqManager;
use dbs_pci::{VfioPciDevice, VENDOR_NVIDIA};
use dbs_upcall::{DevMgrResponse, UpcallClientResponse};
use kvm_ioctls::{DeviceFd, VmFd};
use log::{debug, error};
use serde_derive::{Deserialize, Serialize};
use vfio_ioctls::{VfioContainer, VfioDevice};
use vm_memory::{
Address, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap,
MemoryRegionAddress,
};
use super::StartMicroVmError;
use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl};
use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos};
use crate::device_manager::{DeviceManagerContext, DeviceMgrError, DeviceOpContext};
use crate::resource_manager::{ResourceError, ResourceManager};
// The flag of whether to use the shared irq.
const USE_SHARED_IRQ: bool = true;
/// Errors associated with the operations allowed on a host device
#[derive(Debug, thiserror::Error)]
pub enum VfioDeviceError {
/// Internal error.
#[error("VFIO subsystem internal error")]
InternalError,
/// The virtual machine instance ID is invalid.
#[error("the virtual machine instance ID is invalid")]
InvalidVMID,
/// Cannot open host device due to invalid bus::slot::function
#[error("can't open host device for VFIO")]
CannotOpenVfioDevice,
/// The Context Identifier is already in use.
#[error("the device ID {0} already exists")]
DeviceIDAlreadyExist(String),
/// Host device string (bus::slot::function) is already in use
#[error("device '{0}' is already in use")]
DeviceAlreadyInUse(String),
/// The configuration of vfio device is invalid.
#[error("The configuration of vfio device is invalid")]
InvalidConfig,
/// No resource available
#[error("no resource available for VFIO device")]
NoResource,
/// Cannot perform the requested operation after booting the microVM
#[error("update operation is not allowed after boot")]
UpdateNotAllowedPostBoot,
/// Failed to create kvm device
#[error("failed to create kvm device: {0:?}")]
CreateKvmDevice(#[source] vmm_sys_util::errno::Error),
/// Failed to restore vfio mlock count
#[error("failure while restoring vfio mlock count: {0:?}")]
RestoreMlockCount(#[source] std::io::Error),
/// Failure in device manager while managing VFIO device
#[error("failure in device manager while managing VFIO device, {0:?}")]
VfioDeviceMgr(#[source] DeviceMgrError),
/// Failure in VFIO IOCTL subsystem.
#[error("failure while configuring VFIO device, {0:?}")]
VfioIoctlError(#[source] vfio_ioctls::VfioError),
/// Failure in VFIO PCI subsystem.
#[error("failure while managing PCI VFIO device: {0:?}")]
VfioPciError(#[source] dbs_pci::VfioPciError),
/// Failure in PCI subsystem.
#[error("PCI subsystem failed to manage the device: {0:?}")]
PciError(#[source] dbs_pci::Error),
/// Failed to get vfio host info
#[error("PCI get host info failed: {0}")]
GetHostInfo(String),
/// Invalid PCI device ID
#[error("invalid PCI device ID: {0}")]
InvalidDeviceID(u32),
/// Failed to allocate device resource
#[error("failure while allocate device resource: {0:?}")]
AllocateDeviceResource(#[source] ResourceError),
/// Failed to free device resource
#[error("failure while freeing device resource: {0:?}")]
FreeDeviceResource(#[source] ResourceError),
/// Vfio container not found
#[error("vfio container not found")]
VfioContainerNotFound,
/// Generic IO error.
#[error("Generic IO error, {0}")]
IoError(#[source] std::io::Error),
}
type Result<T> = std::result::Result<T, VfioDeviceError>;
/// Host info for vfio device
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct VfioDeviceHostInfo {
pub group_id: u32,
pub group_fd: RawFd,
pub device_fd: RawFd,
}
/// Configuration information for a VFIO PCI device.
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize, Default)]
pub struct VfioPciDeviceConfig {
/// PCI device information: "bus:slot:function"
pub bus_slot_func: String,
/// PCI vendor and device id
/// high 16bit : low 16bit = device_id : vendor_id
pub vendor_device_id: u32,
/// Deice ID used in guest, guest_dev_id = slot
pub guest_dev_id: Option<u8>,
/// Clique ID for Nvidia GPUs and RDMA NICs
pub clique_id: Option<u8>,
}
impl VfioPciDeviceConfig {
/// default pci domain is 0
pub fn host_pci_domain(&self) -> u32 {
0
}
pub fn valid_vendor_device(&self) -> bool {
if self.vendor_device_id == 0 {
return true;
}
// vendor_device_id high 16bit : low 16bit = device_id : vendor_id
self.vendor_device_id != 0
&& (self.vendor_device_id & 0xffff) != 0
&& ((self.vendor_device_id >> 16) & 0xffff) != 0
}
}
/// Configuration for a specific Vfio Device
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub enum VfioDevConfig {
Pci(VfioPciDeviceConfig),
}
impl Default for VfioDevConfig {
fn default() -> Self {
Self::Pci(Default::default())
}
}
/// Configuration information for a VFIO device.
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, Default)]
pub struct HostDeviceConfig {
/// Unique identifier of the hostdev
pub hostdev_id: String,
/// Sysfs path for device
pub sysfs_path: String,
/// Device specific config
pub dev_config: VfioPciDeviceConfig,
}
impl ConfigItem for HostDeviceConfig {
type Err = VfioDeviceError;
fn id(&self) -> &str {
&self.hostdev_id
}
fn check_conflicts(&self, other: &Self) -> Result<()> {
if self.hostdev_id == other.hostdev_id {
return Err(VfioDeviceError::DeviceIDAlreadyExist(
self.hostdev_id.clone(),
));
}
if !self.sysfs_path.is_empty() && self.sysfs_path == other.sysfs_path {
return Err(VfioDeviceError::DeviceAlreadyInUse(self.sysfs_path.clone()));
}
if !self.dev_config.bus_slot_func.is_empty()
&& self.dev_config.bus_slot_func == other.dev_config.bus_slot_func
{
return Err(VfioDeviceError::DeviceAlreadyInUse(
self.dev_config.bus_slot_func.clone(),
));
}
Ok(())
}
}
/// Vfio device info
pub type VfioDeviceInfo = DeviceConfigInfo<HostDeviceConfig>;
/// A device manager to manage all VFIO devices.
pub struct VfioDeviceMgr {
vm_fd: Arc<VmFd>,
info_list: DeviceConfigInfos<HostDeviceConfig>,
locked_vm_size: u64,
vfio_container: Option<Arc<VfioContainer>>,
pci_vfio_manager: Option<Arc<PciSystemManager>>,
pci_legacy_irqs: Option<HashMap<u8, u8>>,
nvidia_shared_irq: Option<u32>,
logger: slog::Logger,
}
impl VfioDeviceMgr {
/// Create a new VFIO device manager.
pub fn new(vm_fd: Arc<VmFd>, logger: &slog::Logger) -> Self {
VfioDeviceMgr {
vm_fd,
info_list: DeviceConfigInfos::new(),
locked_vm_size: 0,
vfio_container: None,
pci_vfio_manager: None,
pci_legacy_irqs: Some(HashMap::new()),
nvidia_shared_irq: None,
logger: logger.new(slog::o!()),
}
}
/// Insert or update a VFIO device into the manager.ig)?;
pub fn insert_device(
&mut self,
ctx: &mut DeviceOpContext,
config: HostDeviceConfig,
) -> Result<()> {
if !cfg!(feature = "hotplug") && ctx.is_hotplug {
return Err(VfioDeviceError::UpdateNotAllowedPostBoot);
}
slog::info!(
ctx.logger(),
"add VFIO device configuration";
"subsystem" => "vfio_dev_mgr",
"hostdev_id" => &config.hostdev_id,
"bdf" => &config.dev_config.bus_slot_func,
);
let device_index = self.info_list.insert_or_update(&config)?;
// Handle device hotplug case
if ctx.is_hotplug {
slog::info!(
ctx.logger(),
"attach VFIO device";
"subsystem" => "vfio_dev_mgr",
"hostdev_id" => &config.hostdev_id,
"bdf" => &config.dev_config.bus_slot_func,
);
self.add_device(ctx, &config, device_index)?;
}
Ok(())
}
/// Attach all configured VFIO device to the virtual machine instance.
pub fn attach_devices(
&mut self,
ctx: &mut DeviceOpContext,
) -> std::result::Result<(), StartMicroVmError> {
// create and attach pci root bus
#[cfg(all(feature = "hotplug", feature = "host-device"))]
if ctx.pci_hotplug_enabled {
let _ = self
.create_pci_manager(
ctx.irq_manager.clone(),
ctx.io_context.clone(),
ctx.res_manager.clone(),
)
.map_err(StartMicroVmError::CreateVfioDevice)?;
}
for (idx, info) in self.info_list.clone().iter().enumerate() {
self.create_device(&info.config, ctx, idx)
.map_err(StartMicroVmError::CreateVfioDevice)?;
}
Ok(())
}
pub fn remove_device(&mut self, ctx: &mut DeviceOpContext, hostdev_id: &str) -> Result<()> {
if !cfg!(feature = "hotplug") {
return Err(VfioDeviceError::UpdateNotAllowedPostBoot);
}
slog::info!(
ctx.logger(),
"remove VFIO device";
"subsystem" => "vfio_dev_mgr",
"hostdev_id" => hostdev_id,
);
let device_index = self
.get_index_of_hostdev_id(hostdev_id)
.ok_or(VfioDeviceError::InvalidConfig)?;
let mut info = self
.info_list
.remove(device_index)
.ok_or(VfioDeviceError::InvalidConfig)?;
self.remove_vfio_device(ctx, &mut info)
}
/// prepare to remove device
pub fn prepare_remove_device(
&self,
ctx: &DeviceOpContext,
hostdev_id: &str,
result_sender: Sender<Option<i32>>,
) -> Result<()> {
if !cfg!(feature = "hotplug") {
return Err(VfioDeviceError::UpdateNotAllowedPostBoot);
}
slog::info!(
ctx.logger(),
"prepare remove VFIO device";
"subsystem" => "vfio_dev_mgr",
"hostdev_id" => hostdev_id,
);
let device_index = self
.get_index_of_hostdev_id(hostdev_id)
.ok_or(VfioDeviceError::InvalidConfig)?;
let info = &self.info_list[device_index];
if let Some(dev) = info.device.as_ref() {
let callback: Option<Box<dyn Fn(UpcallClientResponse) + Send>> =
Some(Box::new(move |result| match result {
UpcallClientResponse::DevMgr(response) => {
if let DevMgrResponse::Other(resp) = response {
if let Err(e) = result_sender.send(Some(resp.result)) {
error!("send upcall result failed, due to {:?}!", e);
}
}
}
UpcallClientResponse::UpcallReset => {
if let Err(e) = result_sender.send(None) {
error!("send upcall result failed, due to {:?}!", e);
}
}
#[allow(unreachable_patterns)]
_ => {
debug!("this arm should only be triggered under test");
}
}));
ctx.remove_hotplug_pci_device(dev, callback)
.map_err(VfioDeviceError::VfioDeviceMgr)?
}
Ok(())
}
fn remove_vfio_device(
&mut self,
ctx: &mut DeviceOpContext,
info: &mut DeviceConfigInfo<HostDeviceConfig>,
) -> Result<()> {
let device = info.device.take().ok_or(VfioDeviceError::InvalidConfig)?;
self.remove_pci_vfio_device(&device, ctx)?;
Ok(())
}
/// Start all VFIO devices.
pub fn start_devices(&mut self, vm_as: &GuestAddressSpaceImpl) -> Result<()> {
if self.vfio_container.is_some() {
let vm_memory = vm_as.memory();
self.register_memory(vm_memory.deref())?;
}
Ok(())
}
pub(crate) fn get_kvm_dev_fd(&self) -> Result<DeviceFd> {
let mut kvm_vfio_dev = kvm_bindings::kvm_create_device {
type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_VFIO,
fd: 0,
flags: 0,
};
let kvm_dev_fd = self
.vm_fd
.create_device(&mut kvm_vfio_dev)
.map_err(|e| VfioDeviceError::IoError(std::io::Error::from_raw_os_error(e.errno())))?;
Ok(kvm_dev_fd)
}
/// Get vfio container object. You should call get_vfio_manager to get vfio_manager Firstly.
pub fn get_vfio_container(&mut self) -> Result<Arc<VfioContainer>> {
if let Some(vfio_container) = self.vfio_container.as_ref() {
Ok(vfio_container.clone())
} else {
let kvm_dev_fd = Arc::new(self.get_kvm_dev_fd()?);
let vfio_container =
Arc::new(VfioContainer::new(kvm_dev_fd).map_err(VfioDeviceError::VfioIoctlError)?);
self.vfio_container = Some(vfio_container.clone());
Ok(vfio_container)
}
}
fn create_device(
&mut self,
cfg: &HostDeviceConfig,
ctx: &mut DeviceOpContext,
idx: usize,
) -> Result<Arc<dyn DeviceIo>> {
let sysfs_path = Self::build_sysfs_path(cfg)?;
let device = self.attach_pci_vfio_device(ctx, sysfs_path, &cfg.dev_config)?;
self.info_list[idx].device = Some(device.clone());
Ok(device)
}
fn add_device(
&mut self,
ctx: &mut DeviceOpContext,
cfg: &HostDeviceConfig,
idx: usize,
) -> Result<()> {
let dev = self.create_device(cfg, ctx, idx)?;
if self.locked_vm_size == 0 && self.vfio_container.is_some() {
let vm_as = ctx
.get_vm_as()
.map_err(|_| VfioDeviceError::InternalError)?;
let vm_memory = vm_as.memory();
self.register_memory(vm_memory.deref())?;
}
ctx.insert_hotplug_pci_device(&dev, None)
.map_err(VfioDeviceError::VfioDeviceMgr)
}
/// Gets the index of the device with the specified `hostdev_id` if it exists in the list.
fn get_index_of_hostdev_id(&self, id: &str) -> Option<usize> {
self.info_list
.iter()
.position(|info| info.config.id().eq(id))
}
/// Register guest memory to the VFIO container.
///
/// # Arguments
/// * `guest_mem`: guest memory configuration object.
pub(crate) fn register_memory(&mut self, vm_memory: &GuestMemoryImpl) -> Result<()> {
for region in vm_memory.iter() {
self.register_memory_region(region)?;
}
Ok(())
}
pub(crate) fn register_memory_region(&mut self, region: &GuestRegionMmap) -> Result<()> {
let gpa = region.start_addr().raw_value();
let size = region.len();
let user_addr = region
.get_host_address(MemoryRegionAddress(0))
.expect("guest memory region should be mapped and has HVA.")
as u64;
let readonly = region.prot() & libc::PROT_WRITE == 0;
self.register_region(gpa, size, user_addr, readonly)
}
pub(crate) fn register_region(
&mut self,
iova: u64,
size: u64,
user_addr: u64,
readonly: bool,
) -> Result<()> {
slog::info!(
self.logger,
"map guest physical memory";
"subsystem" => "vfio_dev_mgr",
"iova" => iova,
"size" => size,
"user_addr" => user_addr,
"readonly" => readonly,
);
//FIXME: add readonly flag when related commit is pushed to upstream vfio-ioctls
self.get_vfio_container()?
.vfio_dma_map(iova, size, user_addr)
.map_err(VfioDeviceError::VfioIoctlError)?;
self.locked_vm_size += size;
Ok(())
}
/// Clear locked size because iommu table is cleared
pub(crate) fn clear_locked_size(&mut self) {
self.locked_vm_size = 0;
}
pub(crate) fn unregister_region(&mut self, region: &GuestRegionMmap) -> Result<()> {
let gpa = region.start_addr().raw_value();
let size = region.len();
self.get_vfio_container()?
.vfio_dma_unmap(gpa, size)
.map_err(VfioDeviceError::VfioIoctlError)?;
self.locked_vm_size -= size;
Ok(())
}
pub(crate) fn update_memory(&mut self, region: &GuestRegionMmap) -> Result<()> {
if self.locked_vm_size != 0 {
self.register_memory_region(region)?;
}
Ok(())
}
pub(crate) fn build_sysfs_path(cfg: &HostDeviceConfig) -> Result<String> {
if cfg.sysfs_path.is_empty() {
let (bdf, domain) = (
&cfg.dev_config.bus_slot_func,
cfg.dev_config.host_pci_domain(),
);
let len = bdf.split(':').count();
if len == 0 {
Err(VfioDeviceError::InvalidConfig)
} else if len == 2 {
Ok(format!("/sys/bus/pci/devices/{:04}:{}", domain, bdf))
} else {
Ok(format!("/sys/bus/pci/devices/{}", bdf))
}
} else {
Ok(cfg.sysfs_path.clone())
}
}
/// Get all PCI devices' legacy irqs
pub fn get_pci_legacy_irqs(&self) -> Option<&HashMap<u8, u8>> {
self.pci_legacy_irqs.as_ref()
}
}
impl VfioDeviceMgr {
pub(super) fn attach_pci_vfio_device(
&mut self,
ctx: &mut DeviceOpContext,
sysfs_path: String,
cfg: &VfioPciDeviceConfig,
) -> Result<Arc<dyn DeviceIo>> {
slog::info!(
ctx.logger(),
"attach vfio pci device";
"subsystem" => "vfio_dev_mgr",
"host_bdf" => &cfg.bus_slot_func,
);
// safe to get pci_manager
let pci_manager = self.create_pci_manager(
ctx.irq_manager.clone(),
ctx.io_context.clone(),
ctx.res_manager.clone(),
)?;
let pci_bus = pci_manager.pci_root_bus();
let id = pci_manager
.new_device_id(cfg.guest_dev_id)
.ok_or(VfioDeviceError::NoResource)?;
slog::info!(
ctx.logger(),
"PCI:{} vfio pci device id: {}, vendor_device: 0x{:x}",
&sysfs_path, id, cfg.vendor_device_id;
"subsystem" => "vfio_dev_mgr",
"guest_bdf" => id,
);
if !cfg.valid_vendor_device() {
return Err(VfioDeviceError::InvalidConfig);
}
let vfio_container = self.get_vfio_container()?;
let vfio_dev = VfioDevice::new(Path::new(&sysfs_path), vfio_container.clone())
.map_err(VfioDeviceError::VfioIoctlError)?;
// Use Weak::clone to break cycle reference:
//
// reference 1: VfioPciDevice reference to PciBus
// reference 2: VfioPciDevice -> PciManager -> PciBus -> VfioPciDevice
let vfio_pci_device = Arc::new(
VfioPciDevice::create(
id,
sysfs_path,
Arc::downgrade(&pci_bus),
vfio_dev,
Arc::downgrade(self.get_pci_manager().unwrap()),
ctx.vm_fd.clone(),
cfg.vendor_device_id,
cfg.clique_id,
vfio_container,
)
.map_err(VfioDeviceError::VfioPciError)?,
);
let mut requires = Vec::new();
vfio_pci_device.get_resource_requirements(&mut requires);
let vendor_id = vfio_pci_device.vendor_id();
if vendor_id == VENDOR_NVIDIA && self.nvidia_shared_irq.is_some() {
requires.retain(|x| !matches!(x, ResourceConstraint::LegacyIrq { irq: _ }));
}
let mut resource = ctx
.res_manager
.allocate_device_resources(&requires, USE_SHARED_IRQ)
.or(Err(VfioDeviceError::NoResource))?;
if vendor_id == VENDOR_NVIDIA {
if let Some(irq) = self.nvidia_shared_irq {
resource.append(LegacyIrq(irq));
} else {
self.nvidia_shared_irq = resource.get_legacy_irq();
}
}
vfio_pci_device
.activate(
Arc::downgrade(&vfio_pci_device) as Weak<dyn DeviceIo>,
resource,
)
.map_err(VfioDeviceError::VfioPciError)?;
if let Some(irq) = vfio_pci_device.get_assigned_resources().get_legacy_irq() {
self.pci_legacy_irqs
.as_mut()
.map(|v| v.insert(vfio_pci_device.device_id(), irq as u8));
}
// PciBus reference to VfioPciDevice
pci_bus
.register_device(vfio_pci_device.clone())
.map_err(VfioDeviceError::PciError)?;
Ok(vfio_pci_device)
}
fn remove_pci_vfio_device(
&mut self,
device: &Arc<dyn DeviceIo>,
ctx: &mut DeviceOpContext,
) -> Result<()> {
// safe to unwrap because type is decided
let vfio_pci_device = device
.as_any()
.downcast_ref::<VfioPciDevice<PciSystemManager>>()
.unwrap();
let device_id = vfio_pci_device.device_id() as u32;
// safe to unwrap because pci vfio manager is already created
let _ = self
.pci_vfio_manager
.as_mut()
.unwrap()
.free_device_id(device_id)
.ok_or(VfioDeviceError::InvalidDeviceID(device_id))?;
let resources = vfio_pci_device.get_assigned_resources();
let vendor_id = vfio_pci_device.vendor_id();
let filtered_resources = if vendor_id == VENDOR_NVIDIA {
let mut filtered_resources = DeviceResources::new();
for resource in resources.get_all_resources() {
if let Resource::LegacyIrq(_) = resource {
continue;
} else {
filtered_resources.append(resource.clone())
}
}
filtered_resources
} else {
resources
};
ctx.res_manager
.free_device_resources(&filtered_resources)
.map_err(VfioDeviceError::FreeDeviceResource)?;
vfio_pci_device
.clear_device()
.map_err(VfioDeviceError::VfioPciError)?;
Ok(())
}
pub(crate) fn create_pci_manager(
&mut self,
irq_manager: Arc<KvmIrqManager>,
io_context: DeviceManagerContext,
res_manager: Arc<ResourceManager>,
) -> Result<&mut Arc<PciSystemManager>> {
if self.pci_vfio_manager.is_none() {
let mut mgr = PciSystemManager::new(irq_manager, io_context, res_manager.clone())?;
let requirements = mgr.resource_requirements();
let resources = res_manager
.allocate_device_resources(&requirements, USE_SHARED_IRQ)
.or(Err(VfioDeviceError::NoResource))?;
mgr.activate(resources)?;
self.pci_vfio_manager = Some(Arc::new(mgr));
}
Ok(self.pci_vfio_manager.as_mut().unwrap())
}
/// Get the PCI manager to support PCI device passthrough
pub fn get_pci_manager(&mut self) -> Option<&mut Arc<PciSystemManager>> {
self.pci_vfio_manager.as_mut()
}
}
#[cfg(all(test, feature = "test-mock"))]
mod tests {
use kvm_ioctls::Kvm;
use logger::LOGGER;
use vm_memory::{GuestAddress, GuestMemoryMmap, MmapRegion};
use super::*;
use crate::config_manager::DeviceInfoGroup;
use crate::test_utils::tests::create_vm_for_test;
type VfioDeviceInfo = DeviceInfoGroup<VfioDeviceConfigInfo, VfioDeviceError>;
fn get_vfio_dev_mgr() -> VfioDeviceMgr {
let kvm = Kvm::new().unwrap();
let vm_fd = Arc::new(kvm.create_vm().unwrap());
let logger = Arc::new(LOGGER.new_logger(slog::o!()));
VfioDeviceMgr::new(vm_fd, &logger)
}
#[test]
fn test_register_memory() {
let mut mgr = get_vfio_dev_mgr();
// mock for vfio_dma_map.
let mut vfio_container = VfioContainer::default();
vfio_container.vfio_dma_map = true;
vfio_container.vfio_dma_unmap = true;
mgr.vfio_container = Some(Arc::new(vfio_container));
let region_size = 0x1000;
let region1 =
GuestRegionMmap::new(MmapRegion::new(region_size).unwrap(), GuestAddress(0x4000))
.unwrap();
let region2 =
GuestRegionMmap::new(MmapRegion::new(region_size).unwrap(), GuestAddress(0xc000))
.unwrap();
let regions = vec![region1, region2];
let gmm = Arc::new(GuestMemoryMmap::from_regions(regions).unwrap());
assert!(mgr.register_memory(&gmm.clone()).is_ok());
assert_eq!(mgr.locked_vm_size, region_size as u64 * 2);
for region in gmm.iter() {
mgr.unregister_region(region).unwrap();
}
assert_eq!(mgr.locked_vm_size, 0);
}
#[test]
fn test_register_region() {
let kvm = Kvm::new().unwrap();
let vm_fd = Arc::new(kvm.create_vm().unwrap());
let logger = Arc::new(LOGGER.new_logger(slog::o!()));
let mut mgr = VfioDeviceMgr::new(vm_fd, &logger);
// mock for vfio_dma_map.
let mut vfio_container = VfioContainer::default();
vfio_container.vfio_dma_map = true;
vfio_container.vfio_dma_unmap = true;
mgr.vfio_container = Some(Arc::new(vfio_container));
let region_size = 0x400000;
let region =
GuestRegionMmap::new(MmapRegion::new(region_size).unwrap(), GuestAddress(0x0000))
.unwrap();
let gpa = region.start_addr().raw_value();
let size = region.len() as u64;
let user_addr = region.get_host_address(MemoryRegionAddress(0)).unwrap() as u64;
let readonly = region.prot() & libc::PROT_WRITE == 0;
mgr.register_region(gpa, size, user_addr, readonly).unwrap();
assert_eq!(mgr.locked_vm_size, region_size as u64);
assert!(mgr.unregister_region(&region).is_ok());
assert_eq!(mgr.locked_vm_size, 0);
}
#[test]
fn test_vfio_attach_pci_vfio_devices() {
let vm = create_vm_for_test();
let mut mgr = vm.device_manager.vfio_manager.lock().unwrap();
let config = VfioDeviceConfigInfo {
hostdev_id: "hostdev_1".to_string(),
sysfs_path: "uuid1".to_string(),
bus_slot_func: "0:0:1".to_string(),
mode: "pci".to_string(),
vendor_device_id: 0,
guest_dev_id: None,
clique_id: None,
};
let mut device_op_ctx = DeviceOpContext::new(
Some(vm.epoll_manager.clone()),
&vm.device_manager,
Some(vm.vm_as().unwrap().clone()),
vm.address_space.address_space.clone(),
false,
None,
vm.address_space.get_base_to_slot_map(),
vm.shared_info().clone(),
);
// Invalid resources.
assert!(matches!(
mgr.attach_pci_vfio_devices(&mut device_op_ctx, &config),
Err(VfioDeviceError::VfioPciError(_))
));
}
}

View File

@ -0,0 +1,169 @@
// Copyright (C) 2023 Alibaba Cloud. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::sync::Arc;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use dbs_device::resources::Resource;
use dbs_device::resources::{DeviceResources, ResourceConstraint};
use dbs_interrupt::KvmIrqManager;
#[cfg(target_arch = "aarch64")]
use dbs_pci::ECAM_SPACE_LENGTH;
use dbs_pci::{create_pci_root_bus, PciBus, PciDevice, PciRootDevice, PciSystemContext};
use super::{Result, VfioDeviceError};
#[cfg(target_arch = "aarch64")]
use crate::device_manager::vfio_dev_mgr::USE_SHARED_IRQ;
use crate::device_manager::DeviceManagerContext;
use crate::resource_manager::ResourceManager;
/// we only support one pci bus
pub const PCI_BUS_DEFAULT: u8 = 0;
/// PCI pass-through device manager.
#[derive(Clone)]
pub struct PciSystemManager {
pub irq_manager: Arc<KvmIrqManager>,
pub io_context: DeviceManagerContext,
pub pci_root: Arc<PciRootDevice>,
pub pci_root_bus: Arc<PciBus>,
}
impl PciSystemManager {
/// Create a new PCI pass-through device manager.
pub fn new(
irq_manager: Arc<KvmIrqManager>,
io_context: DeviceManagerContext,
res_manager: Arc<ResourceManager>,
) -> std::result::Result<Self, VfioDeviceError> {
let resources = PciSystemManager::allocate_root_device_resources(res_manager)?;
let pci_root = Arc::new(
PciRootDevice::create(PCI_BUS_DEFAULT, resources).map_err(VfioDeviceError::PciError)?,
);
let pci_root_bus =
create_pci_root_bus(PCI_BUS_DEFAULT).map_err(VfioDeviceError::PciError)?;
Ok(PciSystemManager {
irq_manager,
io_context,
pci_root,
pci_root_bus,
})
}
// The x86 pci root device is a pio device with a fixed pio base address and length.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn allocate_root_device_resources(
_res_manager: Arc<ResourceManager>,
) -> Result<DeviceResources> {
let mut resources = DeviceResources::new();
resources.append(Resource::PioAddressRange {
// PCI CONFIG_ADDRESS port address 0xcf8 and uses 32 bits
// PCI COFIG_DATA port address 0xcfc and uses 32 bits
// so the resource registered begins at 0xcf8 and takes 8 bytes as size
base: 0xcf8,
size: 0x8,
});
Ok(resources)
}
// The pci root device of arm is a mmio device, and its reg range is ECAM space,
// which needs to be dynamically applied from the resource pool. In addition,
// the ECAM space is used to enumerate and identify PCI devices.
#[cfg(target_arch = "aarch64")]
fn allocate_root_device_resources(
res_manager: Arc<ResourceManager>,
) -> Result<DeviceResources> {
let requests = vec![ResourceConstraint::MmioAddress {
range: Some((0x0, 0xffff_ffff)),
align: 4096,
size: ECAM_SPACE_LENGTH,
}];
let resources = res_manager
.allocate_device_resources(&requests, USE_SHARED_IRQ)
.map_err(VfioDeviceError::AllocateDeviceResource)?;
Ok(resources)
}
/// Activate the PCI subsystem.
pub fn activate(&mut self, resources: DeviceResources) -> Result<()> {
let bus_id = self.pci_root_bus.bus_id();
self.pci_root
.add_bus(self.pci_root_bus.clone(), bus_id)
.map_err(VfioDeviceError::PciError)?;
PciRootDevice::activate(self.pci_root.clone(), &mut self.io_context)
.map_err(VfioDeviceError::PciError)?;
self.pci_root_bus
.assign_resources(resources)
.map_err(VfioDeviceError::PciError)?;
Ok(())
}
/// Get resource requirements of the PCI subsystem.
#[allow(clippy::vec_init_then_push)]
pub fn resource_requirements(&self) -> Vec<ResourceConstraint> {
let mut requests = Vec::new();
// allocate 512MB MMIO address below 4G.
requests.push(ResourceConstraint::MmioAddress {
range: Some((0x0, 0xffff_ffff)),
align: 4096,
size: 512u64 << 20,
});
// allocate 2048GB MMIO address above 4G.
requests.push(ResourceConstraint::MmioAddress {
range: Some((0x1_0000_0000, 0xffff_ffff_ffff_ffff)),
align: 4096,
size: 2048u64 << 30,
});
// allocate 8KB IO port
requests.push(ResourceConstraint::PioAddress {
range: None,
align: 1,
size: 8u16 << 10,
});
requests
}
/// Get the PCI root bus.
pub fn pci_root_bus(&self) -> Arc<PciBus> {
self.pci_root_bus.clone()
}
/// Allocate a PCI device id.
pub fn new_device_id(&self, device_id: Option<u8>) -> Option<u8> {
self.pci_root_bus.allocate_device_id(device_id)
}
pub fn free_device_id(&self, device_id: u32) -> Option<Arc<dyn PciDevice>> {
self.pci_root_bus.free_device_id(device_id)
}
/// Obtain ECAM space resources, that is, pci root device resources.
#[cfg(target_arch = "aarch64")]
pub fn get_ecam_space(&self) -> DeviceResources {
self.pci_root.get_device_resources()
}
/// Obtain BAR space resources, that is, pci root bus resources.
#[cfg(target_arch = "aarch64")]
pub fn get_bar_space(&self) -> DeviceResources {
self.pci_root_bus.get_device_resources()
}
}
impl PciSystemContext for PciSystemManager {
type D = DeviceManagerContext;
fn get_device_manager_context(&self) -> Self::D {
self.io_context.clone()
}
fn get_interrupt_manager(&self) -> Arc<KvmIrqManager> {
self.irq_manager.clone()
}
}

View File

@ -14,6 +14,8 @@ use dbs_arch::pmu::PmuError;
#[cfg(feature = "dbs-virtio-devices")]
use dbs_virtio_devices::Error as VirtioError;
#[cfg(feature = "host-device")]
use crate::device_manager::vfio_dev_mgr::VfioDeviceError;
use crate::{address_space_manager, device_manager, resource_manager, vcpu, vm};
/// Shorthand result type for internal VMM commands.
@ -205,6 +207,14 @@ pub enum StartMicroVmError {
VhostUserNetDeviceError(
#[source] device_manager::vhost_user_net_dev_mgr::VhostUserNetDeviceError,
),
#[cfg(feature = "host-device")]
/// Failed to create VFIO device
#[error("cannot create VFIO device {0:?}")]
CreateVfioDevice(#[source] VfioDeviceError),
#[cfg(feature = "host-device")]
/// Failed to register DMA memory address range.
#[error("failure while registering DMA address range: {0:?}")]
RegisterDMAAddress(#[source] VfioDeviceError),
}
/// Errors associated with starting the instance.

View File

@ -565,7 +565,7 @@ impl ResourceManager {
Resource::LegacyIrq(base) => self.free_legacy_irq(*base),
Resource::MsiIrq { ty: _, base, size } => self.free_msi_irq(*base, *size),
Resource::KvmMemSlot(slot) => self.free_kvm_mem_slot(*slot),
Resource::MacAddresss(_) => Ok(()),
Resource::MacAddress(_) => Ok(()),
};
result?;
}

View File

@ -39,6 +39,7 @@ pub mod tests {
sockets: 1,
},
vpmu_feature: 0,
pci_hotplug_enabled: false,
};
vm.set_vm_config(vm_config);
vm.init_guest_memory().unwrap();

View File

@ -1133,6 +1133,7 @@ mod tests {
sockets: 1,
},
vpmu_feature: 0,
pci_hotplug_enabled: false,
};
vm.set_vm_config(vm_config);
vm.init_guest_memory().unwrap();
@ -1181,6 +1182,7 @@ mod tests {
sockets: 1,
},
vpmu_feature: 0,
pci_hotplug_enabled: false,
};
vm.set_vm_config(vm_config.clone());
vm.init_guest_memory().unwrap();

View File

@ -138,6 +138,9 @@ pub struct VmConfigInfo {
/// sock path
pub serial_path: Option<String>,
/// Enable PCI device hotplug or not
pub pci_hotplug_enabled: bool,
}
impl Default for VmConfigInfo {
@ -157,6 +160,7 @@ impl Default for VmConfigInfo {
mem_file_path: String::from(""),
mem_size_mib: 128,
serial_path: None,
pci_hotplug_enabled: false,
}
}
}
@ -182,7 +186,8 @@ pub struct Vm {
shared_info: Arc<RwLock<InstanceInfo>>,
address_space: AddressSpaceMgr,
device_manager: DeviceManager,
/// device manager for Dragonball
pub device_manager: DeviceManager,
dmesg_fifo: Option<Box<dyn io::Write + Send>>,
kernel_config: Option<KernelConfigInfo>,
logger: slog::Logger,
@ -494,7 +499,7 @@ impl Vm {
)?;
info!(self.logger, "VM: start devices");
self.device_manager.start_devices()?;
self.device_manager.start_devices(vm_as)?;
info!(self.logger, "VM: initializing devices done");
Ok(())
@ -928,6 +933,7 @@ pub mod tests {
sockets: 1,
},
vpmu_feature: 0,
pci_hotplug_enabled: false,
};
let mut vm = create_vm_instance();
@ -960,6 +966,7 @@ pub mod tests {
sockets: 1,
},
vpmu_feature: 0,
pci_hotplug_enabled: false,
};
vm.set_vm_config(vm_config);
assert!(vm.init_guest_memory().is_ok());
@ -1008,6 +1015,7 @@ pub mod tests {
sockets: 1,
},
vpmu_feature: 0,
pci_hotplug_enabled: false,
};
vm.set_vm_config(vm_config);
@ -1084,6 +1092,7 @@ pub mod tests {
sockets: 1,
},
vpmu_feature: 0,
pci_hotplug_enabled: false,
};
vm.set_vm_config(vm_config);

View File

@ -6,6 +6,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.
use std::collections::HashMap;
use std::convert::TryInto;
use std::ops::Deref;
@ -48,6 +49,7 @@ fn configure_system<M: GuestMemory>(
initrd: &Option<InitrdConfig>,
boot_cpus: u8,
max_cpus: u8,
pci_legacy_irqs: Option<&HashMap<u8, u8>>,
) -> super::Result<()> {
const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
const KERNEL_HDR_MAGIC: u32 = 0x5372_6448;
@ -59,7 +61,8 @@ fn configure_system<M: GuestMemory>(
let himem_start = GuestAddress(layout::HIMEM_START);
// Note that this puts the mptable at the last 1k of Linux's 640k base RAM
mptable::setup_mptable(guest_mem, boot_cpus, max_cpus, None).map_err(Error::MpTableSetup)?;
mptable::setup_mptable(guest_mem, boot_cpus, max_cpus, pci_legacy_irqs)
.map_err(Error::MpTableSetup)?;
let mut params: BootParamsWrapper = BootParamsWrapper(bootparam::boot_params::default());
@ -219,6 +222,24 @@ impl Vm {
.as_bytes_with_nul()
.len();
#[cfg(feature = "host-device")]
{
// Don't expect poisoned lock here.
let vfio_manager = self.device_manager.vfio_manager.lock().unwrap();
configure_system(
vm_memory,
self.address_space.address_space(),
cmdline_addr,
cmdline_size,
&initrd,
self.vm_config.vcpu_count,
self.vm_config.max_vcpu_count,
vfio_manager.get_pci_legacy_irqs(),
)
.map_err(StartMicroVmError::ConfigureSystem)
}
#[cfg(not(feature = "host-device"))]
configure_system(
vm_memory,
self.address_space.address_space(),
@ -227,6 +248,7 @@ impl Vm {
&initrd,
self.vm_config.vcpu_count,
self.vm_config.max_vcpu_count,
None,
)
.map_err(StartMicroVmError::ConfigureSystem)
}

View File

@ -705,7 +705,7 @@ impl Annotation {
}
// Hypervisor Memory related annotations
KATA_ANNO_CFG_HYPERVISOR_DEFAULT_MEMORY => {
match byte_unit::Byte::parse_str(value,true) {
match byte_unit::Byte::parse_str(value, true) {
Ok(mem_bytes) => {
let memory_size = mem_bytes
.get_adjusted_unit(byte_unit::Unit::MiB)

View File

@ -775,6 +775,26 @@ dependencies = [
"vmm-sys-util 0.11.1",
]
[[package]]
name = "dbs-pci"
version = "0.1.0"
dependencies = [
"byteorder",
"dbs-allocator",
"dbs-boot",
"dbs-device",
"dbs-interrupt",
"downcast-rs",
"kvm-bindings",
"kvm-ioctls",
"libc",
"log",
"thiserror",
"vfio-bindings",
"vfio-ioctls",
"vm-memory",
]
[[package]]
name = "dbs-upcall"
version = "0.3.0"
@ -829,6 +849,7 @@ dependencies = [
"serde_json",
"thiserror",
"threadpool",
"timerfd",
"vhost",
"virtio-bindings",
"virtio-queue",
@ -905,6 +926,12 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257"
[[package]]
name = "downcast-rs"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650"
[[package]]
name = "dragonball"
version = "0.1.0"
@ -920,6 +947,7 @@ dependencies = [
"dbs-device",
"dbs-interrupt",
"dbs-legacy-devices",
"dbs-pci",
"dbs-upcall",
"dbs-utils",
"dbs-virtio-devices",
@ -942,6 +970,8 @@ dependencies = [
"slog-scope",
"thiserror",
"tracing",
"vfio-bindings",
"vfio-ioctls",
"virtio-queue",
"vm-memory",
"vmm-sys-util 0.11.1",
@ -4146,6 +4176,29 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "vfio-bindings"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43449b404c488f70507dca193debd4bea361fe8089869b947adc19720e464bce"
[[package]]
name = "vfio-ioctls"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "068bac78842164a8ecc1d1a84a8d8a9168ab29fa3c96942689e286a30ae22ac4"
dependencies = [
"byteorder",
"kvm-bindings",
"kvm-ioctls",
"libc",
"log",
"thiserror",
"vfio-bindings",
"vm-memory",
"vmm-sys-util 0.11.1",
]
[[package]]
name = "vhost"
version = "0.6.1"

View File

@ -35,7 +35,7 @@ kata-types = { path = "../../../libs/kata-types" }
logging = { path = "../../../libs/logging" }
shim-interface = { path = "../../../libs/shim-interface" }
dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "vhost-net", "dbs-upcall","virtio-mem", "virtio-balloon", "vhost-user-net"] }
dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "vhost-net", "dbs-upcall", "virtio-mem", "virtio-balloon", "vhost-user-net", "host-device"] }
ch-config = { path = "ch-config", optional = true }
tests_utils = { path = "../../tests/utils" }

View File

@ -4,3 +4,4 @@ CONFIG_DRAGONBALL_UPCALL_SRV=y
CONFIG_DRAGONBALL_DEVICE_MANAGER=y
CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO=y
CONFIG_DRAGONBALL_HOTPLUG_CPU=y
CONFIG_DRAGONBALL_HOTPLUG_PCI=y

View File

@ -1 +1 @@
119
120

View File

@ -0,0 +1,173 @@
From 4ed40d8ce3793129ba9c0b7b663a5e137aceb70c Mon Sep 17 00:00:00 2001
From: Chao Wu <chaowu@linux.alibaba.com>
Date: Wed, 27 Dec 2023 14:43:47 +0800
Subject: [PATCH] upcall: add pci hotplug / hot-unplug support
add two new upcall functions add_pci_dev and del_pci_dev, mainly for hotplugging
and hot-unplugging pci device in the guest kernel through the upcall server.
Users could implement upcall client side with add_pci or del_pci command and trigger
those commands in the hypervisor side.
As always, Dragonball hypervisor will implement the client side to do pci hotplug and
hot-unplug as an example
Signed-off-by: Gerry Liu <gerry@linux.alibaba.com>
Signed-off-by: Helin Guo <helinguo@linux.alibaba.com>
Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
---
drivers/misc/dragonball/upcall_srv/Kconfig | 11 +++
.../upcall_srv/dragonball_device_manager.c | 90 +++++++++++++++++++
2 files changed, 101 insertions(+)
diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
index fc83f03c2edd..19a6ca957ea6 100644
--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
@@ -47,3 +47,14 @@ config DRAGONBALL_HOTPLUG_CPU
structure with command and parameter to hot-pluging an vCPU.
If unsure, say N.
+
+config DRAGONBALL_HOTPLUG_PCI
+ bool "PCI hotplug/hotunplug support"
+ depends on DRAGONBALL_DEVICE_MANAGER
+ default y
+ help
+ This configure implements a PCI hotplug/hotunplug support, vmm
+ should send hotplug request by vsock which follow special data
+ structure with command and parameter to hot-pluging a PCI device.
+
+ If unsure, say N.
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
index 088d38623b8d..3544afefa2a9 100644
--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
@@ -22,6 +22,7 @@
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/cpuhotplug.h>
+#include <linux/pci.h>
#include <asm/cpu.h>
#include <dragonball/upcall_srv.h>
#include <dragonball/device_manager.h>
@@ -90,6 +91,12 @@ struct devmgr_req {
uint8_t apic_ids[256];
#endif
} cpu_dev_info;
+#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+ struct {
+ uint8_t busno;
+ uint8_t devfn;
+ } pci_dev_info;
#endif
} msg_load;
};
@@ -117,6 +124,9 @@ struct devmgr_reply {
#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
struct cpu_dev_reply_info cpu_dev_info;
+#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+ struct {} pci_dev_info;
#endif
} msg_load;
};
@@ -286,6 +296,82 @@ static int del_mmio_dev(struct devmgr_req *req,
}
#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+static int add_pci_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret = 0;
+ struct devmgr_msg_header *rep_mh = &rep->msg_header;
+ uint8_t busno = req->msg_load.pci_dev_info.busno;
+ uint8_t devfn = req->msg_load.pci_dev_info.devfn;
+ struct pci_bus *bus;
+ struct pci_dev *dev;
+
+ pr_info("add pci device of busno: %02x, devfn: %02x\n", busno, devfn);
+
+ pci_lock_rescan_remove();
+
+ /* It is similar to pci_rescan_bus */
+
+ bus = pci_find_bus(0, busno);
+ if (!bus) {
+ pr_err("Could not find PCI bus for busno %02x\n", busno);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ pci_scan_slot(bus, devfn);
+ dev = pci_get_slot(bus, devfn);
+ if (!dev) {
+ pr_err("Could not find PCI device for slot %02x\n", devfn);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ pci_bus_claim_resources(bus);
+
+ pci_bus_add_devices(bus);
+
+ pci_dev_put(dev);
+
+out:
+ pci_unlock_rescan_remove();
+ if (!ret)
+ _fill_msg_header(rep_mh, 0, ADD_PCI, 0);
+ return ret;
+}
+
+static int del_pci_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret = 0;
+ struct devmgr_msg_header *rep_mh = &rep->msg_header;
+ uint8_t busno = req->msg_load.pci_dev_info.busno;
+ uint8_t devfn = req->msg_load.pci_dev_info.devfn;
+ struct pci_dev *dev;
+
+ pr_info("remove pci device of busno: %02x, devfn: %02x\n", busno, devfn);
+
+ pci_lock_rescan_remove();
+
+ dev = pci_get_domain_bus_and_slot(0, busno, devfn);
+
+ if (!dev) {
+ pr_err("Could not find PCI device for slot %02x\n", devfn);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ pci_stop_and_remove_bus_device(dev);
+
+ pci_dev_put(dev);
+out:
+ pci_unlock_rescan_remove();
+ if (!ret)
+ _fill_msg_header(rep_mh, 0, DEL_PCI, 0);
+ return ret;
+}
+#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
#if defined(CONFIG_X86_64)
@@ -522,6 +608,10 @@ static struct {
{ADD_CPU, add_cpu_dev},
{DEL_CPU, del_cpu_dev},
#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI)
+ {ADD_PCI, add_pci_dev},
+ {DEL_PCI, del_pci_dev},
+#endif
};
static action_route_t get_action(struct devmgr_req *req)
--
2.31.1