From e6777f0866f20264a323d3ccb0327464ce655972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 27 May 2026 15:32:05 +0200 Subject: [PATCH] runtime: keep cold-plug VFIO devices in guest-kernel mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Container.createDevices was dropping cold-plug VFIO entries from the container's deviceInfos whenever vfio_mode = "guest-kernel", which in turn meant the agent's CreateContainer request carried no vfio-pci-gk device entry and sandbox.pcimap[cid] stayed empty. The SR-IOV device plugin still set PCIDEVICE_= on the workload container, so update_env_pci then aborted with "No PCI mapping found for container " and the container failed with CrashLoopBackOff. Include cold-plug VFIO devices in deviceInfos for both VFIO modes. The existing vfio-pci-gk agent handler returns dev: None (so /dev/vfio/ is not materialised in the container spec, and constrainGRPCSpec(stripVfio=true) already strips it from the grpc spec for guest-kernel mode), while still recording the host->guest PCI mapping into sandbox.pcimap[cid] so env-var translation works. devManager.NewDevice calls FindDevice first, which matches the already cold-plugged sandbox-level device by HostPath / major / minor, so this does not double-attach. Signed-off-by: Fabiano Fidêncio Assisted-by: Cursor --- src/runtime/virtcontainers/container.go | 17 ++++++++++++++++- src/runtime/virtcontainers/kata_agent_test.go | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index c3784712f3..1716389272 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -1106,7 +1106,22 @@ func (c *Container) createDevices(ctx context.Context, contConfig *ContainerConf // device /dev/vfio/vfio an 2nd the actuall device(s) afterwards. // Sort the devices starting with device #1 being the VFIO control group // device and the next the actuall device(s) /dev/vfio/ - if coldPlugVFIO && c.sandbox.config.VfioMode == config.VFIOModeVFIO { + // + // Cold-plug VFIO devices must also reach the agent in + // `VfioMode == GuestKernel`. The agent's `vfio-pci-gk` handler + // returns `dev: None` (so /dev/vfio/ is *not* materialised in + // the container spec — `constrainGRPCSpec(stripVfio=true)` will have + // already removed it from `grpcSpec.Linux.Devices`), but it still + // records the host->guest PCI mapping into `sandbox.pcimap[cid]`. + // Without that mapping, `update_env_pci` cannot translate the + // `PCIDEVICE_=` env vars set by the SR-IOV device + // plugin and aborts the container creation with + // "No PCI mapping found for container ". + // + // `devManager.NewDevice` calls `FindDevice` first, which matches the + // already-cold-plugged sandbox-level device by HostPath/major/minor, + // so this does not double-attach. + if coldPlugVFIO { // DeviceInfo should still be added to the sandbox's device manager // if vfio_mode is VFIO and coldPlugVFIO is true (e.g. vfio-ap-cold). // This ensures that ociSpec.Linux.Devices is updated with diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 4b27f0c07e..5b08271ad4 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -1284,7 +1284,7 @@ func TestKataAgentCreateContainerVFIODevices(t *testing.T) { hotPlugVFIO: config.NoPort, coldPlugVFIO: config.BridgePort, vfioMode: config.VFIOModeGuestKernel, - expectVFIODev: false, + expectVFIODev: true, }, }