diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs index b09328f6ee..9e329186a7 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs @@ -177,6 +177,9 @@ pub struct HostDevice { /// PCI device information (Domain) pub domain: String, + // iommufd for vfio device + pub iommufd: String, + /// PCI device information (BDF): "bus:slot:function" pub bus_slot_func: String, @@ -531,14 +534,7 @@ impl Device for VfioDevice { // do add device for vfio device match h.add_device(DeviceType::Vfio(self.clone())).await { - Ok(dev) => { - // Update device info with the one received from device attach - if let DeviceType::Vfio(vfio) = dev { - self.config = vfio.config; - self.devices = vfio.devices; - self.allocated = true; - } - + Ok(_dev) => { update_pcie_device!(self, pcie_topo)?; Ok(()) diff --git a/src/runtime-rs/crates/resource/src/cdi_devices/container_device.rs b/src/runtime-rs/crates/resource/src/cdi_devices/container_device.rs index 8a4158be6f..13d3367138 100644 --- a/src/runtime-rs/crates/resource/src/cdi_devices/container_device.rs +++ b/src/runtime-rs/crates/resource/src/cdi_devices/container_device.rs @@ -74,32 +74,26 @@ pub fn annotate_container_devices( grouped_devices .iter_mut() .for_each(|(vendor_class, container_devices)| { - // The *offset* is a monotonically increasing counter that keeps track of the number of devices - // within an IOMMU group. It increments by total_of whenever a new IOMMU group is processed. - let offset: &mut usize = &mut 0; - sort_devices_by_guest_pcipath(container_devices); container_devices .iter() .enumerate() - .for_each(|(base, container_device)| { - let total_of = container_device.device.options.len(); - // annotate device with cdi information in OCI Spec. - for index in 0..total_of { - if let Some(iommu_grpid) = - Path::new(&container_device.device.container_path) - .file_name() - .and_then(|name| name.to_str()) - { - spec.annotations_mut().as_mut().unwrap().insert( - format!("{CDI_PREFIX}/vfio{iommu_grpid}.{index}"), // cdi.k8s.io/vfioX.y - format!("{}={}", vendor_class, base + *offset), // vendor/class=name - ); - } + .for_each(|(index, container_device)| { + if let Some(iommu_grpid) = + Path::new(&container_device.device.container_path) + .file_name() + .and_then(|name| name.to_str()) + { + // iommufd cdev paths use "/dev/vfio/devices/vfio" — + // strip the "vfio" prefix so the annotation key is + // "cdi.k8s.io/vfio", matching the Go runtime. + let vfio_num = + iommu_grpid.strip_prefix("vfio").unwrap_or(iommu_grpid); + spec.annotations_mut().as_mut().unwrap().insert( + format!("{CDI_PREFIX}/vfio{vfio_num}"), + format!("{}={}", vendor_class, index), + ); } - - // update the offset with *total_of*. - *offset += total_of - 1; }); }); @@ -234,10 +228,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x1002".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device2"), + host_path: PathBuf::from("/dev/vfio/devices/vfio2"), }), device: Device { - container_path: "/dev/device2".to_string(), + container_path: "/dev/vfio/devices/vfio2".to_string(), options: vec!["pci_host_path02=BB:DD02.F02".to_string()], ..Default::default() }, @@ -246,10 +240,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x1002".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device3"), + host_path: PathBuf::from("/dev/vfio/devices/vfio0"), }), device: Device { - container_path: "/dev/device3".to_string(), + container_path: "/dev/vfio/devices/vfio0".to_string(), options: vec!["pci_host_path03=BB:DD03.F03".to_string()], ..Default::default() }, @@ -258,10 +252,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x1002".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device1"), + host_path: PathBuf::from("/dev/vfio/devices/vfio1"), }), device: Device { - container_path: "/dev/device1".to_string(), + container_path: "/dev/vfio/devices/vfio1".to_string(), options: vec!["pci_host_path01=BB:DD01.F01".to_string()], ..Default::default() }, @@ -289,15 +283,15 @@ mod tests { let expected_annotations: HashMap = vec![ ( - "cdi.k8s.io/vfiodevice3.0".to_owned(), + "cdi.k8s.io/vfio0".to_owned(), "amd.com/gpu=2".to_owned(), ), ( - "cdi.k8s.io/vfiodevice1.0".to_owned(), + "cdi.k8s.io/vfio1".to_owned(), "amd.com/gpu=0".to_owned(), ), ( - "cdi.k8s.io/vfiodevice2.0".to_owned(), + "cdi.k8s.io/vfio2".to_owned(), "amd.com/gpu=1".to_owned(), ), ] @@ -324,10 +318,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x10de".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device2"), + host_path: PathBuf::from("/dev/vfio/devices/vfio1"), }), device: Device { - container_path: "/dev/device2".to_string(), + container_path: "/dev/vfio/devices/vfio1".to_string(), options: vec!["pci_host_path02=BB:DD02.F02".to_string()], ..Default::default() }, @@ -336,10 +330,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x10de".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device3"), + host_path: PathBuf::from("/dev/vfio/devices/vfio2"), }), device: Device { - container_path: "/dev/device3".to_string(), + container_path: "/dev/vfio/devices/vfio2".to_string(), options: vec!["pci_host_path03=BB:DD03.F03".to_string()], ..Default::default() }, @@ -348,10 +342,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x8086".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device1"), + host_path: PathBuf::from("/dev/vfio/devices/vfio0"), }), device: Device { - container_path: "/dev/device1".to_string(), + container_path: "/dev/vfio/devices/vfio0".to_string(), options: vec!["pci_host_path01=BB:DD01.F01".to_string()], ..Default::default() }, @@ -360,10 +354,10 @@ mod tests { device_info: Some(DeviceInfo { vendor_id: "0x8086".to_string(), class_id: "0x0302".to_string(), - host_path: PathBuf::from("/dev/device4"), + host_path: PathBuf::from("/dev/vfio/devices/vfio3"), }), device: Device { - container_path: "/dev/device4".to_string(), + container_path: "/dev/vfio/devices/vfio3".to_string(), options: vec!["pci_host_path04=BB:DD01.F04".to_string()], ..Default::default() }, @@ -390,19 +384,19 @@ mod tests { let expected_annotations: HashMap = vec![ ( - "cdi.k8s.io/vfiodevice1.0".to_owned(), + "cdi.k8s.io/vfio0".to_owned(), "intel.com/gpu=0".to_owned(), ), ( - "cdi.k8s.io/vfiodevice2.0".to_owned(), + "cdi.k8s.io/vfio1".to_owned(), "nvidia.com/gpu=0".to_owned(), ), ( - "cdi.k8s.io/vfiodevice3.0".to_owned(), + "cdi.k8s.io/vfio2".to_owned(), "nvidia.com/gpu=1".to_owned(), ), ( - "cdi.k8s.io/vfiodevice4.0".to_owned(), + "cdi.k8s.io/vfio3".to_owned(), "intel.com/gpu=1".to_owned(), ), ] diff --git a/src/runtime-rs/crates/resource/src/cdi_devices/mod.rs b/src/runtime-rs/crates/resource/src/cdi_devices/mod.rs index 395c6c4381..f432bacf0f 100644 --- a/src/runtime-rs/crates/resource/src/cdi_devices/mod.rs +++ b/src/runtime-rs/crates/resource/src/cdi_devices/mod.rs @@ -10,14 +10,14 @@ use agent::types::Device; use std::collections::HashMap; use std::path::PathBuf; -#[derive(Clone, Default)] +#[derive(Clone, Debug, Default)] pub struct DeviceInfo { pub class_id: String, pub vendor_id: String, pub host_path: PathBuf, } -#[derive(Clone, Default)] +#[derive(Clone, Debug, Default)] pub struct ContainerDevice { pub device_info: Option, pub device: Device, diff --git a/src/runtime-rs/crates/resource/src/lib.rs b/src/runtime-rs/crates/resource/src/lib.rs index a91aa0c8cf..a1f9d06466 100644 --- a/src/runtime-rs/crates/resource/src/lib.rs +++ b/src/runtime-rs/crates/resource/src/lib.rs @@ -18,7 +18,8 @@ mod manager_inner; pub mod network; pub mod resource_persist; use hypervisor::{ - BlockConfig, HybridVsockConfig, PortDeviceConfig, ProtectionDeviceConfig, VsockConfig, + vfio_device::VfioDeviceBase, BlockConfig, HybridVsockConfig, PortDeviceConfig, + ProtectionDeviceConfig, VsockConfig, }; use network::NetworkConfig; pub mod rootfs; @@ -39,6 +40,7 @@ pub enum ResourceConfig { HybridVsock(HybridVsockConfig), Vsock(VsockConfig), Protection(ProtectionDeviceConfig), + VfioDeviceModern(VfioDeviceBase), PortDevice(PortDeviceConfig), InitData(BlockConfig), } diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index 903c401f94..2947bbef1a 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -211,6 +211,11 @@ impl ResourceManagerInner { .await .context("do handle initdata block device failed.")?; } + ResourceConfig::VfioDeviceModern(vfiobase) => { + do_handle_device(&self.device_manager, &DeviceConfig::VfioModernCfg(vfiobase)) + .await + .context("do handle vfio device failed.")?; + } }; } @@ -529,50 +534,134 @@ impl ResourceManagerInner { .await .context("do handle device")?; - // vfio mode: vfio-pci and vfio-pci-gk for x86_64 - // - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container. - // - vfio-pci-gk, devices are managed by whatever driver in Guest kernel. - // - vfio-ap, devices appear as VFIO character devices under /dev/vfio in container for ccw devices. - let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() { - "vfio" => { - if bus_type == "ccw" { - "vfio-ap".to_string() - } else { - "vfio-pci".to_string() - } - } - _ => "vfio-pci-gk".to_string(), - }; + if let DeviceType::VfioModern(vfio_dev) = device_info.clone() { + info!(sl!(), "device info: {:?}", vfio_dev.lock().await); + let vfio_device = vfio_dev.lock().await; + let guest_pci_path = vfio_device + .config + .guest_pci_path + .clone() + .context("VFIO device has no guest PCI path assigned")?; + let host_bdf = vfio_device.device.primary.addr.to_string(); + info!( + sl!(), + "vfio device guest pci path: {:?}, host bdf: {:?}", + guest_pci_path, + &host_bdf + ); - // create agent device - if let DeviceType::Vfio(device) = device_info { - let device_options = sort_options_by_pcipath(device.device_options); + // vfio mode: vfio-pci and vfio-pci-gk for x86_64 + // - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container. + // - vfio-pci-gk, devices are managed by whatever driver in Guest kernel. + // - vfio-ap, devices appear as VFIO character devices under /dev/vfio in container for ccw devices. + let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() { + "vfio" => { + if bus_type == "ccw" { + "vfio-ap".to_string() + } else { + "vfio-pci".to_string() + } + } + _ => "vfio-pci-gk".to_string(), + }; + let device_options = vec![format!("{}={}", host_bdf, guest_pci_path)]; + // The Go runtime sets the device Id to + // filepath.Base(dev.ContainerPath), e.g. "vfio0". + // The agent policy validates this with: + // i_vfio_device.id == concat("", ["vfio", suffix]) + let group_num = d + .path() + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or_default() + .to_string(); let agent_device = Device { - id: device.device_id, // just for kata-agent + id: group_num, container_path: d.path().display().to_string().clone(), field_type: vfio_mode, options: device_options, ..Default::default() }; - let device_info = if let Some(device_vendor_class) = - &device.devices.first().unwrap().device_vendor_class - { - let vendor_class = device_vendor_class - .get_vendor_class_id() - .context("get vendor class failed")?; - Some(DeviceInfo { - vendor_id: vendor_class.0.to_owned(), - class_id: vendor_class.1.to_owned(), - host_path: d.path().clone(), - }) - } else { - None - }; + let device_info = Some(DeviceInfo { + vendor_id: vfio_device + .device + .primary + .vendor_id + .clone() + .unwrap_or_default(), + class_id: format!( + "{:#08x}", + vfio_device.device.primary.class_code.unwrap_or_default() + ), + host_path: d.path().clone(), + }); + info!( + sl!(), + "vfio device info for agent: {:?}", + device_info.clone() + ); + info!( + sl!(), + "agent device info for agent: {:?}", + agent_device.clone() + ); devices.push(ContainerDevice { device_info, device: agent_device, }); + } else { + // vfio mode: vfio-pci and vfio-pci-gk for x86_64 + // - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container. + // - vfio-pci-gk, devices are managed by whatever driver in Guest kernel. + // - vfio-ap, devices appear as VFIO character devices under /dev/vfio in container for ccw devices. + let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() { + "vfio" => { + if bus_type == "ccw" { + "vfio-ap".to_string() + } else { + "vfio-pci".to_string() + } + } + _ => "vfio-pci-gk".to_string(), + }; + + // create agent device + if let DeviceType::Vfio(device) = device_info { + let device_options = sort_options_by_pcipath(device.device_options); + let group_num = d + .path() + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or_default() + .to_string(); + let agent_device = Device { + id: group_num, + container_path: d.path().display().to_string().clone(), + field_type: vfio_mode, + options: device_options, + ..Default::default() + }; + + let device_info = if let Some(device_vendor_class) = + &device.devices.first().unwrap().device_vendor_class + { + let vendor_class = device_vendor_class + .get_vendor_class_id() + .context("get vendor class failed")?; + Some(DeviceInfo { + vendor_id: vendor_class.0.to_owned(), + class_id: vendor_class.1.to_owned(), + host_path: d.path().clone(), + }) + } else { + None + }; + devices.push(ContainerDevice { + device_info, + device: agent_device, + }); + } } } _ => {