From 992a7233925edcff85828796fccb742fded5817d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 27 May 2026 16:34:25 +0200 Subject: [PATCH] runtime: resolve cold-plug VFIO guest PCI path via QMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For QEMU cold-plug + guest-kernel mode the guest BDF of a cold-plugged VFIO device is auto-allocated at boot (each pcie-root-port is added with chassis=N,slot=N but no pinned addr=, so QEMU picks the next free slot on pcie.0). The hot-plug path already queries QMP via qomGetPciPath; reuse that same mechanism for cold-plugged devices. Add ResolveColdPlugVFIOGuestPciPaths to the Hypervisor interface. Implement it in qemu.go using qomGetPciPath. Add no-op stubs for all other hypervisors. Call it at the start of setupNetworks so that the PCI paths are resolved before generateVCNetworkStructures emits the agent Interface proto. Also stamp the resolved path onto PhysicalEndpoints (used by SR-IOV VFs exposed as physical network devices) so that update_interface carries a non-empty devicePath. Without devicePath the agent falls back to a by-MAC link lookup which fails when the VF firmware MAC differs from the CNI-assigned MAC after the vfio-pci unbind/rebind cycle. Signed-off-by: Fabiano FidĂȘncio Assisted-by: Cursor --- src/runtime/virtcontainers/clh.go | 4 ++ src/runtime/virtcontainers/fc.go | 4 ++ src/runtime/virtcontainers/hypervisor.go | 5 +++ src/runtime/virtcontainers/kata_agent.go | 45 +++++++++++++++++++ src/runtime/virtcontainers/mock_hypervisor.go | 5 +++ src/runtime/virtcontainers/qemu.go | 42 +++++++++++++++++ src/runtime/virtcontainers/remote.go | 5 +++ src/runtime/virtcontainers/stratovirt.go | 4 ++ src/runtime/virtcontainers/virtframework.go | 5 +++ 9 files changed, 119 insertions(+) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 51e3577ab6..bffaa34385 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -1984,3 +1984,7 @@ func pathExists(path string) bool { } return true } + +func (clh *cloudHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +} diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 29a6e20ce4..d5e2741e9c 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -1334,3 +1334,7 @@ func (fc *firecracker) GenerateSocket(id string) (interface{}, error) { func (fc *firecracker) IsRateLimiterBuiltin() bool { return true } + +func (fc *firecracker) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +} diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 8b93b31428..9f2bc71a6a 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -1316,6 +1316,11 @@ type Hypervisor interface { AddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) error HotplugAddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) HotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) + // ResolveColdPlugVFIOGuestPciPaths resolves the in-guest PCI path for each + // VFIODev with IsPCIe=true and an empty GuestPciPath, writing the result + // back onto the device. Hypervisors that do not require this (e.g. CLH, + // which already populates GuestPciPath during hot-plug) return nil. + ResolveColdPlugVFIOGuestPciPaths(ctx context.Context, vfioDevs []*config.VFIODev) error ResizeMemory(ctx context.Context, memMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) ResizeVCPUs(ctx context.Context, vcpus uint32) (uint32, uint32, error) GetTotalMemoryMB(ctx context.Context) uint32 diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index f720c4e3e3..74e00afad0 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1419,6 +1419,31 @@ func (k *kataAgent) setupNetworks(ctx context.Context, sandbox *Sandbox, c *Cont return nil } + // Resolve the guest PCI path for cold-plugged VFIO PCIe devices whose + // GuestPciPath is not yet known. The path is needed below to stamp + // Interface.devicePath in the agent proto; without it the agent falls + // back to a by-MAC link lookup, which fails for SR-IOV VFs whose + // firmware MAC differs from the CNI-assigned MAC after the + // vfio-pci unbind/rebind cycle. + var coldPlugVFIODevs []*config.VFIODev + for _, dev := range sandbox.devManager.GetAllDevices() { + if dev.DeviceType() != config.DeviceVFIO { + continue + } + if vfioDevs, ok := dev.GetDeviceInfo().([]*config.VFIODev); ok { + for _, vfioDev := range vfioDevs { + if vfioDev.IsPCIe && vfioDev.GuestPciPath.IsNil() && vfioDev.ID != "" { + coldPlugVFIODevs = append(coldPlugVFIODevs, vfioDev) + } + } + } + } + if len(coldPlugVFIODevs) > 0 { + if err := sandbox.hypervisor.ResolveColdPlugVFIOGuestPciPaths(ctx, coldPlugVFIODevs); err != nil { + k.Logger().WithError(err).Warn("setupNetworks: failed to resolve guest PCI paths for cold-plug VFIO devices") + } + } + var err error var endpoints []Endpoint if c == nil || c.id == sandbox.id { @@ -1429,6 +1454,26 @@ func (k *kataAgent) setupNetworks(ctx context.Context, sandbox *Sandbox, c *Cont // creation, so no need to skip them here anymore. for _, ep := range sandbox.network.Endpoints() { if ep.Type() != VfioEndpointType { + // For cold-plugged SR-IOV VFs that appear as PhysicalEndpoints, + // the guest PCI path is known after resolveColdPlugVFIOGuestPciPaths + // has run (during createContainers). Look it up and stamp it on the + // endpoint so that generateVCNetworkStructures emits a non-empty + // devicePath in the agent Interface proto. Without this the agent + // receives devicePath="" and falls back to a by-MAC link lookup, + // which fails when the VF firmware MAC differs from the OVN MAC. + if ep.Type() == PhysicalEndpointType && ep.PciPath().IsNil() { + if pe, ok := ep.(*PhysicalEndpoint); ok && pe.BDF != "" { + guestPath := sandbox.GetVfioDeviceGuestPciPath(pe.BDF) + if !guestPath.IsNil() { + ep.SetPciPath(guestPath) + k.Logger().WithFields(logrus.Fields{ + "endpoint-name": ep.Name(), + "host-bdf": pe.BDF, + "guest-pci-path": guestPath.String(), + }).Info("setupNetworks: filled guest PCI path for PhysicalEndpoint cold-plug") + } + } + } endpoints = append(endpoints, ep) } } diff --git a/src/runtime/virtcontainers/mock_hypervisor.go b/src/runtime/virtcontainers/mock_hypervisor.go index 7d6da561fa..90f2de2ebc 100644 --- a/src/runtime/virtcontainers/mock_hypervisor.go +++ b/src/runtime/virtcontainers/mock_hypervisor.go @@ -10,6 +10,7 @@ import ( "errors" "os" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" ) @@ -155,3 +156,7 @@ func (m *mockHypervisor) GenerateSocket(id string) (interface{}, error) { func (m *mockHypervisor) IsRateLimiterBuiltin() bool { return false } + +func (m *mockHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +} diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 74818ff5d6..fe99945f56 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1848,6 +1848,48 @@ func (q *qemu) togglePauseSandbox(ctx context.Context, pause bool) error { return q.qmpMonitorCh.qmp.ExecuteCont(q.qmpMonitorCh.ctx) } +// ResolveColdPlugVFIOGuestPciPaths implements Hypervisor. For each VFIODev +// with IsPCIe=true and an empty GuestPciPath, it queries QMP to find the +// in-guest PCI path and writes it back onto the device. +func (q *qemu) ResolveColdPlugVFIOGuestPciPaths(ctx context.Context, vfioDevs []*config.VFIODev) error { + if len(vfioDevs) == 0 { + return nil + } + if err := q.qmpSetup(); err != nil { + return fmt.Errorf("ResolveColdPlugVFIOGuestPciPaths: qmpSetup: %w", err) + } + for _, vfioDev := range vfioDevs { + if vfioDev == nil || !vfioDev.IsPCIe { + continue + } + if !vfioDev.GuestPciPath.IsNil() { + q.Logger().WithFields(logrus.Fields{ + "qemu-device-id": vfioDev.ID, + "host-bdf": vfioDev.BDF, + "guest-pci-path": vfioDev.GuestPciPath.String(), + }).Debug("ResolveColdPlugVFIOGuestPciPaths: skipping device with pre-computed guest PCI path") + continue + } + guestPath, err := q.arch.qomGetPciPath(vfioDev.ID, &q.qmpMonitorCh) + if err != nil { + q.Logger().WithFields(logrus.Fields{ + "qemu-device-id": vfioDev.ID, + "host-bdf": vfioDev.BDF, + }).WithError(err).Warn("ResolveColdPlugVFIOGuestPciPaths: failed to resolve guest PCI path") + continue + } + vfioDev.GuestPciPath = guestPath + q.Logger().WithFields(logrus.Fields{ + "qemu-device-id": vfioDev.ID, + "host-bdf": vfioDev.BDF, + "port": vfioDev.Port, + "bus": vfioDev.Bus, + "guest-pci-path": guestPath.String(), + }).Info("ResolveColdPlugVFIOGuestPciPaths: resolved guest PCI path") + } + return nil +} + func (q *qemu) qmpSetup() error { q.qmpMonitorCh.Lock() defer q.qmpMonitorCh.Unlock() diff --git a/src/runtime/virtcontainers/remote.go b/src/runtime/virtcontainers/remote.go index d88a3ca4e9..caf431ac72 100644 --- a/src/runtime/virtcontainers/remote.go +++ b/src/runtime/virtcontainers/remote.go @@ -13,6 +13,7 @@ import ( cri "github.com/containerd/containerd/pkg/cri/annotations" "github.com/containerd/ttrpc" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" persistapi "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" pb "github.com/kata-containers/kata-containers/src/runtime/protocols/hypervisor" hypannotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" @@ -296,3 +297,7 @@ func (rh *remoteHypervisor) Load(persistapi.HypervisorState) { func (rh *remoteHypervisor) IsRateLimiterBuiltin() bool { return false } + +func (rh *remoteHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +} diff --git a/src/runtime/virtcontainers/stratovirt.go b/src/runtime/virtcontainers/stratovirt.go index 3f6a4c1b18..52043c6681 100644 --- a/src/runtime/virtcontainers/stratovirt.go +++ b/src/runtime/virtcontainers/stratovirt.go @@ -1309,3 +1309,7 @@ func (s *stratovirt) GenerateSocket(id string) (interface{}, error) { func (s *stratovirt) IsRateLimiterBuiltin() bool { return false } + +func (s *stratovirt) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +} diff --git a/src/runtime/virtcontainers/virtframework.go b/src/runtime/virtcontainers/virtframework.go index 9196e291c2..295f8302a7 100644 --- a/src/runtime/virtcontainers/virtframework.go +++ b/src/runtime/virtcontainers/virtframework.go @@ -12,6 +12,7 @@ package virtcontainers import ( "context" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/pkg/errors" @@ -135,3 +136,7 @@ func (vfw *virtFramework) GenerateSocket(id string) (interface{}, error) { func (vfw *virtFramework) IsRateLimiterBuiltin() bool { return false } + +func (vfw *virtFramework) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +}