From d217f2efdaa3b1975f4a038f875faebf1f769429 Mon Sep 17 00:00:00 2001 From: Alex Lyn Date: Sun, 12 Apr 2026 16:30:29 +0200 Subject: [PATCH] runtime-rs: Add resource manager VFIO modern handling and CDI wiring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the resource manager to handle VfioModern and BlockModern device types when building the agent's device list and storage list. For VFIO modern devices, the manager resolves the container path and sets the agent Device.id to match what genpolicy expects. Rework CDI device annotation handling in container_device.rs: - Strip the "vfio" prefix from device names when building CDI annotation keys (cdi.k8s.io/vfio0, cdi.k8s.io/vfio1, etc.) - Remove the per-device index suffix that caused policy mismatches - Add iommufd cdev path support alongside legacy VFIO group paths Update the vfio driver to detect iommufd cdev vs legacy group from the CDI device node path. Signed-off-by: Alex Lyn Signed-off-by: Fabiano Fidêncio --- .../hypervisor/src/device/driver/vfio.rs | 12 +- .../src/cdi_devices/container_device.rs | 78 +++++---- .../crates/resource/src/cdi_devices/mod.rs | 4 +- src/runtime-rs/crates/resource/src/lib.rs | 4 +- .../crates/resource/src/manager_inner.rs | 153 ++++++++++++++---- 5 files changed, 166 insertions(+), 85 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs index b09328f6ee..9e329186a7 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs @@ -177,6 +177,9 @@ pub struct HostDevice { /// PCI device information (Domain) pub domain: String, + // iommufd for vfio device + pub iommufd: String, + /// PCI device information (BDF): "bus:slot:function" pub bus_slot_func: String, @@ -531,14 +534,7 @@ impl Device for VfioDevice { // do add device for vfio device match h.add_device(DeviceType::Vfio(self.clone())).await { - Ok(dev) => { - // Update device info with the one received from device attach - if let DeviceType::Vfio(vfio) = dev { - self.config = vfio.config; - self.devices = vfio.devices; - self.allocated = true; - } - + Ok(_dev) => { update_pcie_device!(self, pcie_topo)?; Ok(()) diff --git a/src/runtime-rs/crates/resource/src/cdi_devices/container_device.rs b/src/runtime-rs/crates/resource/src/cdi_devices/container_device.rs index 8a4158be6f..13d3367138 100644 --- a/src/runtime-rs/crates/resource/src/cdi_devices/container_device.rs +++ b/src/runtime-rs/crates/resource/src/cdi_devices/container_device.rs @@ -74,32 +74,26 @@ pub fn annotate_container_devices( grouped_devices .iter_mut() .for_each(|(vendor_class, container_devices)| { - // The *offset* is a monotonically increasing counter that keeps track of the number of devices - // within an IOMMU group. It increments by total_of whenever a new IOMMU group is processed. - let offset: &mut usize = &mut 0; - sort_devices_by_guest_pcipath(container_devices); container_devices .iter() .enumerate() - .for_each(|(base, container_device)| { - let total_of = container_device.device.options.len(); - // annotate device with cdi information in OCI Spec. - for index in 0..total_of { - if let Some(iommu_grpid) = - Path::new(&container_device.device.container_path) - .file_name() - .and_then(|name| name.to_str()) - { - spec.annotations_mut().as_mut().unwrap().insert( - format!("{CDI_PREFIX}/vfio{iommu_grpid}.{index}"), // cdi.k8s.io/vfioX.y - format!("{}={}", vendor_class, base + *offset), // vendor/class=name - ); - } + .for_each(|(index, container_device)| { + if let Some(iommu_grpid) = + Path::new(&container_device.device.container_path) + .file_name() + .and_then(|name| name.to_str()) + { + // iommufd cdev paths use "/dev/vfio/devices/vfio" — + // strip the "vfio" prefix so the annotation key is + // "cdi.k8s.io/vfio", matching the Go runtime. + let vfio_num = + iommu_grpid.strip_prefix("vfio").unwrap_or(iommu_grpid); + spec.annotations_mut().as_mut().unwrap().insert( + format!("{CDI_PREFIX}/vfio{vfio_num}"), + format!("{}={}", vendor_class, index), + ); } - - // update the offset with *total_of*. - *offset += total_of - 1; }); }); @@ -234,10 +228,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x1002".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device2"), + host_path: PathBuf::from("/dev/vfio/devices/vfio2"), }), device: Device { - container_path: "/dev/device2".to_string(), + container_path: "/dev/vfio/devices/vfio2".to_string(), options: vec!["pci_host_path02=BB:DD02.F02".to_string()], ..Default::default() }, @@ -246,10 +240,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x1002".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device3"), + host_path: PathBuf::from("/dev/vfio/devices/vfio0"), }), device: Device { - container_path: "/dev/device3".to_string(), + container_path: "/dev/vfio/devices/vfio0".to_string(), options: vec!["pci_host_path03=BB:DD03.F03".to_string()], ..Default::default() }, @@ -258,10 +252,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x1002".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device1"), + host_path: PathBuf::from("/dev/vfio/devices/vfio1"), }), device: Device { - container_path: "/dev/device1".to_string(), + container_path: "/dev/vfio/devices/vfio1".to_string(), options: vec!["pci_host_path01=BB:DD01.F01".to_string()], ..Default::default() }, @@ -289,15 +283,15 @@ mod tests { let expected_annotations: HashMap = vec![ ( - "cdi.k8s.io/vfiodevice3.0".to_owned(), + "cdi.k8s.io/vfio0".to_owned(), "amd.com/gpu=2".to_owned(), ), ( - "cdi.k8s.io/vfiodevice1.0".to_owned(), + "cdi.k8s.io/vfio1".to_owned(), "amd.com/gpu=0".to_owned(), ), ( - "cdi.k8s.io/vfiodevice2.0".to_owned(), + "cdi.k8s.io/vfio2".to_owned(), "amd.com/gpu=1".to_owned(), ), ] @@ -324,10 +318,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x10de".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device2"), + host_path: PathBuf::from("/dev/vfio/devices/vfio1"), }), device: Device { - container_path: "/dev/device2".to_string(), + container_path: "/dev/vfio/devices/vfio1".to_string(), options: vec!["pci_host_path02=BB:DD02.F02".to_string()], ..Default::default() }, @@ -336,10 +330,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x10de".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device3"), + host_path: PathBuf::from("/dev/vfio/devices/vfio2"), }), device: Device { - container_path: "/dev/device3".to_string(), + container_path: "/dev/vfio/devices/vfio2".to_string(), options: vec!["pci_host_path03=BB:DD03.F03".to_string()], ..Default::default() }, @@ -348,10 +342,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x8086".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device1"), + host_path: PathBuf::from("/dev/vfio/devices/vfio0"), }), device: Device { - container_path: "/dev/device1".to_string(), + container_path: "/dev/vfio/devices/vfio0".to_string(), options: vec!["pci_host_path01=BB:DD01.F01".to_string()], ..Default::default() }, @@ -360,10 +354,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x8086".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device4"), + host_path: PathBuf::from("/dev/vfio/devices/vfio3"), }), device: Device { - container_path: "/dev/device4".to_string(), + container_path: "/dev/vfio/devices/vfio3".to_string(), options: vec!["pci_host_path04=BB:DD01.F04".to_string()], ..Default::default() }, @@ -390,19 +384,19 @@ mod tests { let expected_annotations: HashMap = vec![ ( - "cdi.k8s.io/vfiodevice1.0".to_owned(), + "cdi.k8s.io/vfio0".to_owned(), "intel.com/gpu=0".to_owned(), ), ( - "cdi.k8s.io/vfiodevice2.0".to_owned(), + "cdi.k8s.io/vfio1".to_owned(), "nvidia.com/gpu=0".to_owned(), ), ( - "cdi.k8s.io/vfiodevice3.0".to_owned(), + "cdi.k8s.io/vfio2".to_owned(), "nvidia.com/gpu=1".to_owned(), ), ( - "cdi.k8s.io/vfiodevice4.0".to_owned(), + "cdi.k8s.io/vfio3".to_owned(), "intel.com/gpu=1".to_owned(), ), ] diff --git a/src/runtime-rs/crates/resource/src/cdi_devices/mod.rs b/src/runtime-rs/crates/resource/src/cdi_devices/mod.rs index 395c6c4381..f432bacf0f 100644 --- a/src/runtime-rs/crates/resource/src/cdi_devices/mod.rs +++ b/src/runtime-rs/crates/resource/src/cdi_devices/mod.rs @@ -10,14 +10,14 @@ use agent::types::Device; use std::collections::HashMap; use std::path::PathBuf; -#[derive(Clone, Default)] +#[derive(Clone, Debug, Default)] pub struct DeviceInfo { pub class_id: String, pub vendor_id: String, pub host_path: PathBuf, } -#[derive(Clone, Default)] +#[derive(Clone, Debug, Default)] pub struct ContainerDevice { pub device_info: Option, pub device: Device, diff --git a/src/runtime-rs/crates/resource/src/lib.rs b/src/runtime-rs/crates/resource/src/lib.rs index a91aa0c8cf..a1f9d06466 100644 --- a/src/runtime-rs/crates/resource/src/lib.rs +++ b/src/runtime-rs/crates/resource/src/lib.rs @@ -18,7 +18,8 @@ mod manager_inner; pub mod network; pub mod resource_persist; use hypervisor::{ - BlockConfig, HybridVsockConfig, PortDeviceConfig, ProtectionDeviceConfig, VsockConfig, + vfio_device::VfioDeviceBase, BlockConfig, HybridVsockConfig, PortDeviceConfig, + ProtectionDeviceConfig, VsockConfig, }; use network::NetworkConfig; pub mod rootfs; @@ -39,6 +40,7 @@ pub enum ResourceConfig { HybridVsock(HybridVsockConfig), Vsock(VsockConfig), Protection(ProtectionDeviceConfig), + VfioDeviceModern(VfioDeviceBase), PortDevice(PortDeviceConfig), InitData(BlockConfig), } diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index cecbd9c4a8..68bfb02c02 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -211,6 +211,11 @@ impl ResourceManagerInner { .await .context("do handle initdata block device failed.")?; } + ResourceConfig::VfioDeviceModern(vfiobase) => { + do_handle_device(&self.device_manager, &DeviceConfig::VfioModernCfg(vfiobase)) + .await + .context("do handle vfio device failed.")?; + } }; } @@ -483,50 +488,134 @@ impl ResourceManagerInner { .await .context("do handle device")?; - // vfio mode: vfio-pci and vfio-pci-gk for x86_64 - // - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container. - // - vfio-pci-gk, devices are managed by whatever driver in Guest kernel. - // - vfio-ap, devices appear as VFIO character devices under /dev/vfio in container for ccw devices. - let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() { - "vfio" => { - if bus_type == "ccw" { - "vfio-ap".to_string() - } else { - "vfio-pci".to_string() - } - } - _ => "vfio-pci-gk".to_string(), - }; + if let DeviceType::VfioModern(vfio_dev) = device_info.clone() { + info!(sl!(), "device info: {:?}", vfio_dev.lock().await); + let vfio_device = vfio_dev.lock().await; + let guest_pci_path = vfio_device + .config + .guest_pci_path + .clone() + .unwrap_or_default(); + let host_bdf = vfio_device.device.primary.addr.to_string(); + info!( + sl!(), + "vfio device guest pci path: {:?}, host bdf: {:?}", + guest_pci_path, + &host_bdf + ); - // create agent device - if let DeviceType::Vfio(device) = device_info { - let device_options = sort_options_by_pcipath(device.device_options); + // vfio mode: vfio-pci and vfio-pci-gk for x86_64 + // - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container. + // - vfio-pci-gk, devices are managed by whatever driver in Guest kernel. + // - vfio-ap, devices appear as VFIO character devices under /dev/vfio in container for ccw devices. + let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() { + "vfio" => { + if bus_type == "ccw" { + "vfio-ap".to_string() + } else { + "vfio-pci".to_string() + } + } + _ => "vfio-pci-gk".to_string(), + }; + let device_options = vec![format!("{}={}", host_bdf, guest_pci_path)]; + // The Go runtime sets the device Id to + // filepath.Base(dev.ContainerPath), e.g. "vfio0". + // The agent policy validates this with: + // i_vfio_device.id == concat("", ["vfio", suffix]) + let group_num = d + .path() + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or_default() + .to_string(); let agent_device = Device { - id: device.device_id, // just for kata-agent + id: group_num, container_path: d.path().display().to_string().clone(), field_type: vfio_mode, options: device_options, ..Default::default() }; - let device_info = if let Some(device_vendor_class) = - &device.devices.first().unwrap().device_vendor_class - { - let vendor_class = device_vendor_class - .get_vendor_class_id() - .context("get vendor class failed")?; - Some(DeviceInfo { - vendor_id: vendor_class.0.to_owned(), - class_id: vendor_class.1.to_owned(), - host_path: d.path().clone(), - }) - } else { - None - }; + let device_info = Some(DeviceInfo { + vendor_id: vfio_device + .device + .primary + .vendor_id + .clone() + .unwrap_or_default(), + class_id: format!( + "{:#08x}", + vfio_device.device.primary.class_code.unwrap_or_default() + ), + host_path: d.path().clone(), + }); + info!( + sl!(), + "vfio device info for agent: {:?}", + device_info.clone() + ); + info!( + sl!(), + "agent device info for agent: {:?}", + agent_device.clone() + ); devices.push(ContainerDevice { device_info, device: agent_device, }); + } else { + // vfio mode: vfio-pci and vfio-pci-gk for x86_64 + // - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container. + // - vfio-pci-gk, devices are managed by whatever driver in Guest kernel. + // - vfio-ap, devices appear as VFIO character devices under /dev/vfio in container for ccw devices. + let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() { + "vfio" => { + if bus_type == "ccw" { + "vfio-ap".to_string() + } else { + "vfio-pci".to_string() + } + } + _ => "vfio-pci-gk".to_string(), + }; + + // create agent device + if let DeviceType::Vfio(device) = device_info { + let device_options = sort_options_by_pcipath(device.device_options); + let group_num = d + .path() + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or_default() + .to_string(); + let agent_device = Device { + id: group_num, + container_path: d.path().display().to_string().clone(), + field_type: vfio_mode, + options: device_options, + ..Default::default() + }; + + let device_info = if let Some(device_vendor_class) = + &device.devices.first().unwrap().device_vendor_class + { + let vendor_class = device_vendor_class + .get_vendor_class_id() + .context("get vendor class failed")?; + Some(DeviceInfo { + vendor_id: vendor_class.0.to_owned(), + class_id: vendor_class.1.to_owned(), + host_path: d.path().clone(), + }) + } else { + None + }; + devices.push(ContainerDevice { + device_info, + device: agent_device, + }); + } } } _ => {