runtime: resolve cold-plug VFIO guest PCI path via QMP

For QEMU cold-plug + guest-kernel mode the guest BDF of a cold-plugged
VFIO device is auto-allocated at boot (each pcie-root-port is added with
chassis=N,slot=N but no pinned addr=, so QEMU picks the next free slot
on pcie.0). The hot-plug path already queries QMP via qomGetPciPath;
reuse that same mechanism for cold-plugged devices.

Add ResolveColdPlugVFIOGuestPciPaths to the Hypervisor interface.
Implement it in qemu.go using qomGetPciPath. Add no-op stubs for all
other hypervisors.

Call it at the start of setupNetworks so that the PCI paths are resolved
before generateVCNetworkStructures emits the agent Interface proto. Also
stamp the resolved path onto PhysicalEndpoints (used by SR-IOV VFs
exposed as physical network devices) so that update_interface carries a
non-empty devicePath. Without devicePath the agent falls back to a
by-MAC link lookup which fails when the VF firmware MAC differs from the
CNI-assigned MAC after the vfio-pci unbind/rebind cycle.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Assisted-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Fabiano Fidêncio
2026-05-27 16:34:25 +02:00
parent 23c5250933
commit 992a723392
9 changed files with 119 additions and 0 deletions

View File

@@ -1984,3 +1984,7 @@ func pathExists(path string) bool {
}
return true
}
func (clh *cloudHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
return nil
}

View File

@@ -1334,3 +1334,7 @@ func (fc *firecracker) GenerateSocket(id string) (interface{}, error) {
func (fc *firecracker) IsRateLimiterBuiltin() bool {
return true
}
func (fc *firecracker) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
return nil
}

View File

@@ -1316,6 +1316,11 @@ type Hypervisor interface {
AddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) error
HotplugAddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error)
HotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error)
// ResolveColdPlugVFIOGuestPciPaths resolves the in-guest PCI path for each
// VFIODev with IsPCIe=true and an empty GuestPciPath, writing the result
// back onto the device. Hypervisors that do not require this (e.g. CLH,
// which already populates GuestPciPath during hot-plug) return nil.
ResolveColdPlugVFIOGuestPciPaths(ctx context.Context, vfioDevs []*config.VFIODev) error
ResizeMemory(ctx context.Context, memMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error)
ResizeVCPUs(ctx context.Context, vcpus uint32) (uint32, uint32, error)
GetTotalMemoryMB(ctx context.Context) uint32

View File

@@ -1419,6 +1419,31 @@ func (k *kataAgent) setupNetworks(ctx context.Context, sandbox *Sandbox, c *Cont
return nil
}
// Resolve the guest PCI path for cold-plugged VFIO PCIe devices whose
// GuestPciPath is not yet known. The path is needed below to stamp
// Interface.devicePath in the agent proto; without it the agent falls
// back to a by-MAC link lookup, which fails for SR-IOV VFs whose
// firmware MAC differs from the CNI-assigned MAC after the
// vfio-pci unbind/rebind cycle.
var coldPlugVFIODevs []*config.VFIODev
for _, dev := range sandbox.devManager.GetAllDevices() {
if dev.DeviceType() != config.DeviceVFIO {
continue
}
if vfioDevs, ok := dev.GetDeviceInfo().([]*config.VFIODev); ok {
for _, vfioDev := range vfioDevs {
if vfioDev.IsPCIe && vfioDev.GuestPciPath.IsNil() && vfioDev.ID != "" {
coldPlugVFIODevs = append(coldPlugVFIODevs, vfioDev)
}
}
}
}
if len(coldPlugVFIODevs) > 0 {
if err := sandbox.hypervisor.ResolveColdPlugVFIOGuestPciPaths(ctx, coldPlugVFIODevs); err != nil {
k.Logger().WithError(err).Warn("setupNetworks: failed to resolve guest PCI paths for cold-plug VFIO devices")
}
}
var err error
var endpoints []Endpoint
if c == nil || c.id == sandbox.id {
@@ -1429,6 +1454,26 @@ func (k *kataAgent) setupNetworks(ctx context.Context, sandbox *Sandbox, c *Cont
// creation, so no need to skip them here anymore.
for _, ep := range sandbox.network.Endpoints() {
if ep.Type() != VfioEndpointType {
// For cold-plugged SR-IOV VFs that appear as PhysicalEndpoints,
// the guest PCI path is known after resolveColdPlugVFIOGuestPciPaths
// has run (during createContainers). Look it up and stamp it on the
// endpoint so that generateVCNetworkStructures emits a non-empty
// devicePath in the agent Interface proto. Without this the agent
// receives devicePath="" and falls back to a by-MAC link lookup,
// which fails when the VF firmware MAC differs from the OVN MAC.
if ep.Type() == PhysicalEndpointType && ep.PciPath().IsNil() {
if pe, ok := ep.(*PhysicalEndpoint); ok && pe.BDF != "" {
guestPath := sandbox.GetVfioDeviceGuestPciPath(pe.BDF)
if !guestPath.IsNil() {
ep.SetPciPath(guestPath)
k.Logger().WithFields(logrus.Fields{
"endpoint-name": ep.Name(),
"host-bdf": pe.BDF,
"guest-pci-path": guestPath.String(),
}).Info("setupNetworks: filled guest PCI path for PhysicalEndpoint cold-plug")
}
}
}
endpoints = append(endpoints, ep)
}
}

View File

@@ -10,6 +10,7 @@ import (
"errors"
"os"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
)
@@ -155,3 +156,7 @@ func (m *mockHypervisor) GenerateSocket(id string) (interface{}, error) {
func (m *mockHypervisor) IsRateLimiterBuiltin() bool {
return false
}
func (m *mockHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
return nil
}

View File

@@ -1848,6 +1848,48 @@ func (q *qemu) togglePauseSandbox(ctx context.Context, pause bool) error {
return q.qmpMonitorCh.qmp.ExecuteCont(q.qmpMonitorCh.ctx)
}
// ResolveColdPlugVFIOGuestPciPaths implements Hypervisor. For each VFIODev
// with IsPCIe=true and an empty GuestPciPath, it queries QMP to find the
// in-guest PCI path and writes it back onto the device.
func (q *qemu) ResolveColdPlugVFIOGuestPciPaths(ctx context.Context, vfioDevs []*config.VFIODev) error {
if len(vfioDevs) == 0 {
return nil
}
if err := q.qmpSetup(); err != nil {
return fmt.Errorf("ResolveColdPlugVFIOGuestPciPaths: qmpSetup: %w", err)
}
for _, vfioDev := range vfioDevs {
if vfioDev == nil || !vfioDev.IsPCIe {
continue
}
if !vfioDev.GuestPciPath.IsNil() {
q.Logger().WithFields(logrus.Fields{
"qemu-device-id": vfioDev.ID,
"host-bdf": vfioDev.BDF,
"guest-pci-path": vfioDev.GuestPciPath.String(),
}).Debug("ResolveColdPlugVFIOGuestPciPaths: skipping device with pre-computed guest PCI path")
continue
}
guestPath, err := q.arch.qomGetPciPath(vfioDev.ID, &q.qmpMonitorCh)
if err != nil {
q.Logger().WithFields(logrus.Fields{
"qemu-device-id": vfioDev.ID,
"host-bdf": vfioDev.BDF,
}).WithError(err).Warn("ResolveColdPlugVFIOGuestPciPaths: failed to resolve guest PCI path")
continue
}
vfioDev.GuestPciPath = guestPath
q.Logger().WithFields(logrus.Fields{
"qemu-device-id": vfioDev.ID,
"host-bdf": vfioDev.BDF,
"port": vfioDev.Port,
"bus": vfioDev.Bus,
"guest-pci-path": guestPath.String(),
}).Info("ResolveColdPlugVFIOGuestPciPaths: resolved guest PCI path")
}
return nil
}
func (q *qemu) qmpSetup() error {
q.qmpMonitorCh.Lock()
defer q.qmpMonitorCh.Unlock()

View File

@@ -13,6 +13,7 @@ import (
cri "github.com/containerd/containerd/pkg/cri/annotations"
"github.com/containerd/ttrpc"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
persistapi "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
pb "github.com/kata-containers/kata-containers/src/runtime/protocols/hypervisor"
hypannotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
@@ -296,3 +297,7 @@ func (rh *remoteHypervisor) Load(persistapi.HypervisorState) {
func (rh *remoteHypervisor) IsRateLimiterBuiltin() bool {
return false
}
func (rh *remoteHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
return nil
}

View File

@@ -1309,3 +1309,7 @@ func (s *stratovirt) GenerateSocket(id string) (interface{}, error) {
func (s *stratovirt) IsRateLimiterBuiltin() bool {
return false
}
func (s *stratovirt) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
return nil
}

View File

@@ -12,6 +12,7 @@ package virtcontainers
import (
"context"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
"github.com/pkg/errors"
@@ -135,3 +136,7 @@ func (vfw *virtFramework) GenerateSocket(id string) (interface{}, error) {
func (vfw *virtFramework) IsRateLimiterBuiltin() bool {
return false
}
func (vfw *virtFramework) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
return nil
}