diff --git a/Cargo.lock b/Cargo.lock index 1d6a6d20be..f45ee5b033 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8615,6 +8615,7 @@ dependencies = [ "nix 0.26.4", "oci-spec 0.8.4", "persist", + "pod-resources-rs", "protobuf", "resource", "runtime-spec", diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs index 776b781a4c..8edfeaacd7 100644 --- a/src/libs/kata-types/src/config/hypervisor/mod.rs +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -787,6 +787,14 @@ pub struct DeviceInfo { #[serde(default)] pub hotplug_vfio_on_root_bus: bool, + /// Cold-plug VFIO devices to a PCIe port type. + /// + /// Accepted values: `"no-port"` (default, disabled), `"root-port"`. + /// In confidential compute environments hot-plugging can compromise + /// security, so devices are cold-plugged instead. + #[serde(default)] + pub cold_plug_vfio: String, + /// Number of PCIe root ports to create during VM creation. /// /// Valid when `hotplug_vfio_on_root_bus = true` and `machine_type = "q35"`. diff --git a/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml b/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml index 13e9524c40..74ff8a3163 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml +++ b/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml @@ -30,6 +30,7 @@ agent = { workspace = true } common = { workspace = true } hypervisor = { workspace = true, features = ["cloud-hypervisor"] } kata-sys-util = { workspace = true } +pod-resources-rs = { workspace = true } kata-types = { workspace = true } logging = { workspace = true } runtime-spec = { workspace = true } diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs index d887de1ece..4de5cf1de9 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs @@ -24,6 +24,7 @@ use common::{ }; use containerd_shim_protos::events::task::{TaskExit, TaskOOM}; +use hypervisor::device::topology::PCIePort; use hypervisor::VsockConfig; #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use hypervisor::{firecracker::Firecracker, HYPERVISOR_FIRECRACKER}; @@ -47,6 +48,7 @@ use hypervisor::{ use hypervisor::{BlockConfig, Hypervisor}; use hypervisor::{BlockDeviceAio, PortDeviceConfig}; use hypervisor::{ProtectionDeviceConfig, SevSnpConfig, TdxConfig}; +use hypervisor::VfioDeviceBase; use kata_sys_util::hooks::HookStates; use kata_sys_util::protection::{available_guest_protection, GuestProtection}; use kata_sys_util::spec::load_oci_spec; @@ -61,6 +63,7 @@ use kata_types::config::{hypervisor::Factory, TomlConfig}; use kata_types::initdata::{calculate_initdata_digest, ProtectedPlatform}; use oci_spec::runtime as oci; use persist::{self, sandbox_persist::Persist}; +use pod_resources_rs::handle_cdi_devices; use protobuf::SpecialFields; use resource::coco_data::initdata::{ kata_shared_init_data_path, InitDataConfig, KATA_INIT_DATA_IMAGE, @@ -70,7 +73,7 @@ use resource::manager::ManagerArgs; use resource::network::{dan_config_path, DanNetworkConfig, NetworkConfig, NetworkWithNetNsConfig}; use resource::{ResourceConfig, ResourceManager}; use runtime_spec as spec; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::Arc; use strum::Display; use tokio::sync::{mpsc::Sender, Mutex, RwLock}; @@ -175,7 +178,7 @@ impl VirtSandbox { async fn prepare_for_start_sandbox( &self, id: &str, - network_env: SandboxNetworkEnv, + sandbox_config: &SandboxConfig, ) -> Result> { let mut resource_configs = vec![]; @@ -186,6 +189,7 @@ impl VirtSandbox { .context("failed to prepare vm socket config")?; resource_configs.push(vm_socket_config); + let network_env: SandboxNetworkEnv = sandbox_config.network_env.clone(); // prepare network config if !network_env.network_created { if let Some(network_resource) = self.prepare_network_resource(&network_env).await { @@ -221,6 +225,17 @@ impl VirtSandbox { None }; + let vfio_devices = self.prepare_coldplug_cdi_devices(sandbox_config).await?; + if !vfio_devices.is_empty() { + info!( + sl!(), + "prepare pod devices {vfio_devices:?} for sandbox done." + ); + resource_configs.extend(vfio_devices); + } else { + info!(sl!(), "no pod devices to prepare for sandbox."); + } + // prepare protection device config if let Some(protection_dev_config) = self .prepare_protection_device_config(&self.hypervisor.hypervisor_config().await, init_data) @@ -266,6 +281,77 @@ impl VirtSandbox { } } + async fn prepare_coldplug_cdi_devices( + &self, + sandbox_config: &SandboxConfig, + ) -> Result> { + let hypervisor_config = self.hypervisor.hypervisor_config().await; + let cold_plug_vfio = &hypervisor_config.device_info.cold_plug_vfio; + if cold_plug_vfio.is_empty() || cold_plug_vfio == "no-port" { + return Ok(Vec::new()); + } + + let port = match cold_plug_vfio.as_str() { + "root-port" => PCIePort::RootPort, + other => { + return Err(anyhow!( + "unsupported cold_plug_vfio value {:?}; only \"root-port\" is supported", + other + )) + } + }; + + let config = self.resource_manager.config().await; + let pod_resource_socket = &config.runtime.pod_resource_api_sock; + info!( + sl!(), + "sandbox pod_resource_socket: {:?}", pod_resource_socket + ); + if pod_resource_socket.is_empty() || !Path::new(pod_resource_socket).exists() { + return Ok(Vec::new()); + } + + let annotations = &sandbox_config.annotations; + debug!( + sl!(), + "cold-plug: sandbox-name={:?} sandbox-namespace={:?}", + annotations.get("io.kubernetes.cri.sandbox-name"), + annotations.get("io.kubernetes.cri.sandbox-namespace") + ); + + let cdi_devices = pod_resources_rs::pod_resources::get_pod_cdi_devices( + pod_resource_socket, + annotations, + ) + .await + .context("failed to query Pod Resources CDI devices")?; + info!(sl!(), "pod cdi devices: {:?}", cdi_devices); + + let device_nodes = handle_cdi_devices(&cdi_devices).await?; + let paths: Vec = device_nodes + .iter() + .filter_map(pod_resources_rs::device_node_host_path) + .collect(); + + let mut vfio_configs = Vec::new(); + for path in paths.iter() { + let dev_info = VfioDeviceBase { + host_path: path.clone(), + iommu_group_devnode: PathBuf::from(path), + dev_type: "c".to_string(), + port, + hostdev_prefix: "vfio_device".to_owned(), + ..Default::default() + }; + vfio_configs.push(dev_info); + } + + Ok(vfio_configs + .into_iter() + .map(ResourceConfig::VfioDeviceModern) + .collect()) + } + async fn prepare_network_resource( &self, network_env: &SandboxNetworkEnv, @@ -656,9 +742,7 @@ impl Sandbox for VirtSandbox { // generate device and setup before start vm // should after hypervisor.prepare_vm - let resources = self - .prepare_for_start_sandbox(id, sandbox_config.network_env.clone()) - .await?; + let resources = self.prepare_for_start_sandbox(id, sandbox_config).await?; self.resource_manager .prepare_before_start_vm(resources) @@ -841,7 +925,7 @@ impl Sandbox for VirtSandbox { // generate device and setup before start vm // should after hypervisor.prepare_vm let resources = self - .prepare_for_start_sandbox(id, sandbox_config.network_env.clone()) + .prepare_for_start_sandbox(id, sandbox_config) .await .context("prepare resources before start vm")?; diff --git a/src/tools/agent-ctl/Cargo.lock b/src/tools/agent-ctl/Cargo.lock index 8522a5a35c..72be9c7019 100644 --- a/src/tools/agent-ctl/Cargo.lock +++ b/src/tools/agent-ctl/Cargo.lock @@ -2139,6 +2139,7 @@ dependencies = [ "libc", "logging", "nix 0.26.4", + "once_cell", "path-clean", "persist", "protocols", @@ -2146,6 +2147,7 @@ dependencies = [ "qapi-qmp", "qapi-spec", "rand 0.10.1", + "regex", "rust-ini", "safe-path 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "seccompiler",