runtime-rs: Add resource manager VFIO modern handling and CDI wiring

Extend the resource manager to handle VfioModern and BlockModern device
types when building the agent's device list and storage list. For VFIO
modern devices, the manager resolves the container path and sets the
agent Device.id to match what genpolicy expects.

Rework CDI device annotation handling in container_device.rs:
- Strip the "vfio" prefix from device names when building CDI annotation
  keys (cdi.k8s.io/vfio0, cdi.k8s.io/vfio1, etc.)
- Remove the per-device index suffix that caused policy mismatches
- Add iommufd cdev path support alongside legacy VFIO group paths

Update the vfio driver to detect iommufd cdev vs legacy group from
the CDI device node path.

Signed-off-by: Alex Lyn <alex.lyn@antgroup.com>
Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
Alex Lyn
2026-04-12 16:30:29 +02:00
committed by Fabiano Fidêncio
parent ae7b83980e
commit d217f2efda
5 changed files with 166 additions and 85 deletions

View File

@@ -177,6 +177,9 @@ pub struct HostDevice {
/// PCI device information (Domain)
pub domain: String,
// iommufd for vfio device
pub iommufd: String,
/// PCI device information (BDF): "bus:slot:function"
pub bus_slot_func: String,
@@ -531,14 +534,7 @@ impl Device for VfioDevice {
// do add device for vfio device
match h.add_device(DeviceType::Vfio(self.clone())).await {
Ok(dev) => {
// Update device info with the one received from device attach
if let DeviceType::Vfio(vfio) = dev {
self.config = vfio.config;
self.devices = vfio.devices;
self.allocated = true;
}
Ok(_dev) => {
update_pcie_device!(self, pcie_topo)?;
Ok(())

View File

@@ -74,32 +74,26 @@ pub fn annotate_container_devices(
grouped_devices
.iter_mut()
.for_each(|(vendor_class, container_devices)| {
// The *offset* is a monotonically increasing counter that keeps track of the number of devices
// within an IOMMU group. It increments by total_of whenever a new IOMMU group is processed.
let offset: &mut usize = &mut 0;
sort_devices_by_guest_pcipath(container_devices);
container_devices
.iter()
.enumerate()
.for_each(|(base, container_device)| {
let total_of = container_device.device.options.len();
// annotate device with cdi information in OCI Spec.
for index in 0..total_of {
if let Some(iommu_grpid) =
Path::new(&container_device.device.container_path)
.file_name()
.and_then(|name| name.to_str())
{
spec.annotations_mut().as_mut().unwrap().insert(
format!("{CDI_PREFIX}/vfio{iommu_grpid}.{index}"), // cdi.k8s.io/vfioX.y
format!("{}={}", vendor_class, base + *offset), // vendor/class=name
);
}
.for_each(|(index, container_device)| {
if let Some(iommu_grpid) =
Path::new(&container_device.device.container_path)
.file_name()
.and_then(|name| name.to_str())
{
// iommufd cdev paths use "/dev/vfio/devices/vfio<N>" —
// strip the "vfio" prefix so the annotation key is
// "cdi.k8s.io/vfio<N>", matching the Go runtime.
let vfio_num =
iommu_grpid.strip_prefix("vfio").unwrap_or(iommu_grpid);
spec.annotations_mut().as_mut().unwrap().insert(
format!("{CDI_PREFIX}/vfio{vfio_num}"),
format!("{}={}", vendor_class, index),
);
}
// update the offset with *total_of*.
*offset += total_of - 1;
});
});
@@ -234,10 +228,10 @@ mod tests {
device_info: Some(DeviceInfo {
vendor_id: "0x1002".to_string(),
class_id: "0x0302".to_string(),
host_path: PathBuf::from("/dev/device2"),
host_path: PathBuf::from("/dev/vfio/devices/vfio2"),
}),
device: Device {
container_path: "/dev/device2".to_string(),
container_path: "/dev/vfio/devices/vfio2".to_string(),
options: vec!["pci_host_path02=BB:DD02.F02".to_string()],
..Default::default()
},
@@ -246,10 +240,10 @@ mod tests {
device_info: Some(DeviceInfo {
vendor_id: "0x1002".to_string(),
class_id: "0x0302".to_string(),
host_path: PathBuf::from("/dev/device3"),
host_path: PathBuf::from("/dev/vfio/devices/vfio0"),
}),
device: Device {
container_path: "/dev/device3".to_string(),
container_path: "/dev/vfio/devices/vfio0".to_string(),
options: vec!["pci_host_path03=BB:DD03.F03".to_string()],
..Default::default()
},
@@ -258,10 +252,10 @@ mod tests {
device_info: Some(DeviceInfo {
vendor_id: "0x1002".to_string(),
class_id: "0x0302".to_string(),
host_path: PathBuf::from("/dev/device1"),
host_path: PathBuf::from("/dev/vfio/devices/vfio1"),
}),
device: Device {
container_path: "/dev/device1".to_string(),
container_path: "/dev/vfio/devices/vfio1".to_string(),
options: vec!["pci_host_path01=BB:DD01.F01".to_string()],
..Default::default()
},
@@ -289,15 +283,15 @@ mod tests {
let expected_annotations: HashMap<String, String> = vec![
(
"cdi.k8s.io/vfiodevice3.0".to_owned(),
"cdi.k8s.io/vfio0".to_owned(),
"amd.com/gpu=2".to_owned(),
),
(
"cdi.k8s.io/vfiodevice1.0".to_owned(),
"cdi.k8s.io/vfio1".to_owned(),
"amd.com/gpu=0".to_owned(),
),
(
"cdi.k8s.io/vfiodevice2.0".to_owned(),
"cdi.k8s.io/vfio2".to_owned(),
"amd.com/gpu=1".to_owned(),
),
]
@@ -324,10 +318,10 @@ mod tests {
device_info: Some(DeviceInfo {
vendor_id: "0x10de".to_string(),
class_id: "0x0302".to_string(),
host_path: PathBuf::from("/dev/device2"),
host_path: PathBuf::from("/dev/vfio/devices/vfio1"),
}),
device: Device {
container_path: "/dev/device2".to_string(),
container_path: "/dev/vfio/devices/vfio1".to_string(),
options: vec!["pci_host_path02=BB:DD02.F02".to_string()],
..Default::default()
},
@@ -336,10 +330,10 @@ mod tests {
device_info: Some(DeviceInfo {
vendor_id: "0x10de".to_string(),
class_id: "0x0302".to_string(),
host_path: PathBuf::from("/dev/device3"),
host_path: PathBuf::from("/dev/vfio/devices/vfio2"),
}),
device: Device {
container_path: "/dev/device3".to_string(),
container_path: "/dev/vfio/devices/vfio2".to_string(),
options: vec!["pci_host_path03=BB:DD03.F03".to_string()],
..Default::default()
},
@@ -348,10 +342,10 @@ mod tests {
device_info: Some(DeviceInfo {
vendor_id: "0x8086".to_string(),
class_id: "0x0302".to_string(),
host_path: PathBuf::from("/dev/device1"),
host_path: PathBuf::from("/dev/vfio/devices/vfio0"),
}),
device: Device {
container_path: "/dev/device1".to_string(),
container_path: "/dev/vfio/devices/vfio0".to_string(),
options: vec!["pci_host_path01=BB:DD01.F01".to_string()],
..Default::default()
},
@@ -360,10 +354,10 @@ mod tests {
device_info: Some(DeviceInfo {
vendor_id: "0x8086".to_string(),
class_id: "0x0302".to_string(),
host_path: PathBuf::from("/dev/device4"),
host_path: PathBuf::from("/dev/vfio/devices/vfio3"),
}),
device: Device {
container_path: "/dev/device4".to_string(),
container_path: "/dev/vfio/devices/vfio3".to_string(),
options: vec!["pci_host_path04=BB:DD01.F04".to_string()],
..Default::default()
},
@@ -390,19 +384,19 @@ mod tests {
let expected_annotations: HashMap<String, String> = vec![
(
"cdi.k8s.io/vfiodevice1.0".to_owned(),
"cdi.k8s.io/vfio0".to_owned(),
"intel.com/gpu=0".to_owned(),
),
(
"cdi.k8s.io/vfiodevice2.0".to_owned(),
"cdi.k8s.io/vfio1".to_owned(),
"nvidia.com/gpu=0".to_owned(),
),
(
"cdi.k8s.io/vfiodevice3.0".to_owned(),
"cdi.k8s.io/vfio2".to_owned(),
"nvidia.com/gpu=1".to_owned(),
),
(
"cdi.k8s.io/vfiodevice4.0".to_owned(),
"cdi.k8s.io/vfio3".to_owned(),
"intel.com/gpu=1".to_owned(),
),
]

View File

@@ -10,14 +10,14 @@ use agent::types::Device;
use std::collections::HashMap;
use std::path::PathBuf;
#[derive(Clone, Default)]
#[derive(Clone, Debug, Default)]
pub struct DeviceInfo {
pub class_id: String,
pub vendor_id: String,
pub host_path: PathBuf,
}
#[derive(Clone, Default)]
#[derive(Clone, Debug, Default)]
pub struct ContainerDevice {
pub device_info: Option<DeviceInfo>,
pub device: Device,

View File

@@ -18,7 +18,8 @@ mod manager_inner;
pub mod network;
pub mod resource_persist;
use hypervisor::{
BlockConfig, HybridVsockConfig, PortDeviceConfig, ProtectionDeviceConfig, VsockConfig,
vfio_device::VfioDeviceBase, BlockConfig, HybridVsockConfig, PortDeviceConfig,
ProtectionDeviceConfig, VsockConfig,
};
use network::NetworkConfig;
pub mod rootfs;
@@ -39,6 +40,7 @@ pub enum ResourceConfig {
HybridVsock(HybridVsockConfig),
Vsock(VsockConfig),
Protection(ProtectionDeviceConfig),
VfioDeviceModern(VfioDeviceBase),
PortDevice(PortDeviceConfig),
InitData(BlockConfig),
}

View File

@@ -211,6 +211,11 @@ impl ResourceManagerInner {
.await
.context("do handle initdata block device failed.")?;
}
ResourceConfig::VfioDeviceModern(vfiobase) => {
do_handle_device(&self.device_manager, &DeviceConfig::VfioModernCfg(vfiobase))
.await
.context("do handle vfio device failed.")?;
}
};
}
@@ -483,50 +488,134 @@ impl ResourceManagerInner {
.await
.context("do handle device")?;
// vfio mode: vfio-pci and vfio-pci-gk for x86_64
// - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container.
// - vfio-pci-gk, devices are managed by whatever driver in Guest kernel.
// - vfio-ap, devices appear as VFIO character devices under /dev/vfio in container for ccw devices.
let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() {
"vfio" => {
if bus_type == "ccw" {
"vfio-ap".to_string()
} else {
"vfio-pci".to_string()
}
}
_ => "vfio-pci-gk".to_string(),
};
if let DeviceType::VfioModern(vfio_dev) = device_info.clone() {
info!(sl!(), "device info: {:?}", vfio_dev.lock().await);
let vfio_device = vfio_dev.lock().await;
let guest_pci_path = vfio_device
.config
.guest_pci_path
.clone()
.unwrap_or_default();
let host_bdf = vfio_device.device.primary.addr.to_string();
info!(
sl!(),
"vfio device guest pci path: {:?}, host bdf: {:?}",
guest_pci_path,
&host_bdf
);
// create agent device
if let DeviceType::Vfio(device) = device_info {
let device_options = sort_options_by_pcipath(device.device_options);
// vfio mode: vfio-pci and vfio-pci-gk for x86_64
// - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container.
// - vfio-pci-gk, devices are managed by whatever driver in Guest kernel.
// - vfio-ap, devices appear as VFIO character devices under /dev/vfio in container for ccw devices.
let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() {
"vfio" => {
if bus_type == "ccw" {
"vfio-ap".to_string()
} else {
"vfio-pci".to_string()
}
}
_ => "vfio-pci-gk".to_string(),
};
let device_options = vec![format!("{}={}", host_bdf, guest_pci_path)];
// The Go runtime sets the device Id to
// filepath.Base(dev.ContainerPath), e.g. "vfio0".
// The agent policy validates this with:
// i_vfio_device.id == concat("", ["vfio", suffix])
let group_num = d
.path()
.file_name()
.and_then(|n| n.to_str())
.unwrap_or_default()
.to_string();
let agent_device = Device {
id: device.device_id, // just for kata-agent
id: group_num,
container_path: d.path().display().to_string().clone(),
field_type: vfio_mode,
options: device_options,
..Default::default()
};
let device_info = if let Some(device_vendor_class) =
&device.devices.first().unwrap().device_vendor_class
{
let vendor_class = device_vendor_class
.get_vendor_class_id()
.context("get vendor class failed")?;
Some(DeviceInfo {
vendor_id: vendor_class.0.to_owned(),
class_id: vendor_class.1.to_owned(),
host_path: d.path().clone(),
})
} else {
None
};
let device_info = Some(DeviceInfo {
vendor_id: vfio_device
.device
.primary
.vendor_id
.clone()
.unwrap_or_default(),
class_id: format!(
"{:#08x}",
vfio_device.device.primary.class_code.unwrap_or_default()
),
host_path: d.path().clone(),
});
info!(
sl!(),
"vfio device info for agent: {:?}",
device_info.clone()
);
info!(
sl!(),
"agent device info for agent: {:?}",
agent_device.clone()
);
devices.push(ContainerDevice {
device_info,
device: agent_device,
});
} else {
// vfio mode: vfio-pci and vfio-pci-gk for x86_64
// - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container.
// - vfio-pci-gk, devices are managed by whatever driver in Guest kernel.
// - vfio-ap, devices appear as VFIO character devices under /dev/vfio in container for ccw devices.
let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() {
"vfio" => {
if bus_type == "ccw" {
"vfio-ap".to_string()
} else {
"vfio-pci".to_string()
}
}
_ => "vfio-pci-gk".to_string(),
};
// create agent device
if let DeviceType::Vfio(device) = device_info {
let device_options = sort_options_by_pcipath(device.device_options);
let group_num = d
.path()
.file_name()
.and_then(|n| n.to_str())
.unwrap_or_default()
.to_string();
let agent_device = Device {
id: group_num,
container_path: d.path().display().to_string().clone(),
field_type: vfio_mode,
options: device_options,
..Default::default()
};
let device_info = if let Some(device_vendor_class) =
&device.devices.first().unwrap().device_vendor_class
{
let vendor_class = device_vendor_class
.get_vendor_class_id()
.context("get vendor class failed")?;
Some(DeviceInfo {
vendor_id: vendor_class.0.to_owned(),
class_id: vendor_class.1.to_owned(),
host_path: d.path().clone(),
})
} else {
None
};
devices.push(ContainerDevice {
device_info,
device: agent_device,
});
}
}
}
_ => {