mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-06-30 22:21:05 +00:00
runtime: resolve cold-plug VFIO guest PCI path via QMP
For QEMU cold-plug + guest-kernel mode the guest BDF of a cold-plugged VFIO device is auto-allocated at boot (each pcie-root-port is added with chassis=N,slot=N but no pinned addr=, so QEMU picks the next free slot on pcie.0). The hot-plug path already queries QMP via qomGetPciPath; reuse that same mechanism for cold-plugged devices. Add ResolveColdPlugVFIOGuestPciPaths to the Hypervisor interface. Implement it in qemu.go using qomGetPciPath. Add no-op stubs for all other hypervisors. Call it at the start of setupNetworks so that the PCI paths are resolved before generateVCNetworkStructures emits the agent Interface proto. Also stamp the resolved path onto PhysicalEndpoints (used by SR-IOV VFs exposed as physical network devices) so that update_interface carries a non-empty devicePath. Without devicePath the agent falls back to a by-MAC link lookup which fails when the VF firmware MAC differs from the CNI-assigned MAC after the vfio-pci unbind/rebind cycle. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com> Assisted-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1984,3 +1984,7 @@ func pathExists(path string) bool {
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (clh *cloudHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1334,3 +1334,7 @@ func (fc *firecracker) GenerateSocket(id string) (interface{}, error) {
|
||||
func (fc *firecracker) IsRateLimiterBuiltin() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (fc *firecracker) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1316,6 +1316,11 @@ type Hypervisor interface {
|
||||
AddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) error
|
||||
HotplugAddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error)
|
||||
HotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error)
|
||||
// ResolveColdPlugVFIOGuestPciPaths resolves the in-guest PCI path for each
|
||||
// VFIODev with IsPCIe=true and an empty GuestPciPath, writing the result
|
||||
// back onto the device. Hypervisors that do not require this (e.g. CLH,
|
||||
// which already populates GuestPciPath during hot-plug) return nil.
|
||||
ResolveColdPlugVFIOGuestPciPaths(ctx context.Context, vfioDevs []*config.VFIODev) error
|
||||
ResizeMemory(ctx context.Context, memMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error)
|
||||
ResizeVCPUs(ctx context.Context, vcpus uint32) (uint32, uint32, error)
|
||||
GetTotalMemoryMB(ctx context.Context) uint32
|
||||
|
||||
@@ -1419,6 +1419,31 @@ func (k *kataAgent) setupNetworks(ctx context.Context, sandbox *Sandbox, c *Cont
|
||||
return nil
|
||||
}
|
||||
|
||||
// Resolve the guest PCI path for cold-plugged VFIO PCIe devices whose
|
||||
// GuestPciPath is not yet known. The path is needed below to stamp
|
||||
// Interface.devicePath in the agent proto; without it the agent falls
|
||||
// back to a by-MAC link lookup, which fails for SR-IOV VFs whose
|
||||
// firmware MAC differs from the CNI-assigned MAC after the
|
||||
// vfio-pci unbind/rebind cycle.
|
||||
var coldPlugVFIODevs []*config.VFIODev
|
||||
for _, dev := range sandbox.devManager.GetAllDevices() {
|
||||
if dev.DeviceType() != config.DeviceVFIO {
|
||||
continue
|
||||
}
|
||||
if vfioDevs, ok := dev.GetDeviceInfo().([]*config.VFIODev); ok {
|
||||
for _, vfioDev := range vfioDevs {
|
||||
if vfioDev.IsPCIe && vfioDev.GuestPciPath.IsNil() && vfioDev.ID != "" {
|
||||
coldPlugVFIODevs = append(coldPlugVFIODevs, vfioDev)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(coldPlugVFIODevs) > 0 {
|
||||
if err := sandbox.hypervisor.ResolveColdPlugVFIOGuestPciPaths(ctx, coldPlugVFIODevs); err != nil {
|
||||
k.Logger().WithError(err).Warn("setupNetworks: failed to resolve guest PCI paths for cold-plug VFIO devices")
|
||||
}
|
||||
}
|
||||
|
||||
var err error
|
||||
var endpoints []Endpoint
|
||||
if c == nil || c.id == sandbox.id {
|
||||
@@ -1429,6 +1454,26 @@ func (k *kataAgent) setupNetworks(ctx context.Context, sandbox *Sandbox, c *Cont
|
||||
// creation, so no need to skip them here anymore.
|
||||
for _, ep := range sandbox.network.Endpoints() {
|
||||
if ep.Type() != VfioEndpointType {
|
||||
// For cold-plugged SR-IOV VFs that appear as PhysicalEndpoints,
|
||||
// the guest PCI path is known after resolveColdPlugVFIOGuestPciPaths
|
||||
// has run (during createContainers). Look it up and stamp it on the
|
||||
// endpoint so that generateVCNetworkStructures emits a non-empty
|
||||
// devicePath in the agent Interface proto. Without this the agent
|
||||
// receives devicePath="" and falls back to a by-MAC link lookup,
|
||||
// which fails when the VF firmware MAC differs from the OVN MAC.
|
||||
if ep.Type() == PhysicalEndpointType && ep.PciPath().IsNil() {
|
||||
if pe, ok := ep.(*PhysicalEndpoint); ok && pe.BDF != "" {
|
||||
guestPath := sandbox.GetVfioDeviceGuestPciPath(pe.BDF)
|
||||
if !guestPath.IsNil() {
|
||||
ep.SetPciPath(guestPath)
|
||||
k.Logger().WithFields(logrus.Fields{
|
||||
"endpoint-name": ep.Name(),
|
||||
"host-bdf": pe.BDF,
|
||||
"guest-pci-path": guestPath.String(),
|
||||
}).Info("setupNetworks: filled guest PCI path for PhysicalEndpoint cold-plug")
|
||||
}
|
||||
}
|
||||
}
|
||||
endpoints = append(endpoints, ep)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"errors"
|
||||
"os"
|
||||
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
||||
)
|
||||
@@ -155,3 +156,7 @@ func (m *mockHypervisor) GenerateSocket(id string) (interface{}, error) {
|
||||
func (m *mockHypervisor) IsRateLimiterBuiltin() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *mockHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1848,6 +1848,48 @@ func (q *qemu) togglePauseSandbox(ctx context.Context, pause bool) error {
|
||||
return q.qmpMonitorCh.qmp.ExecuteCont(q.qmpMonitorCh.ctx)
|
||||
}
|
||||
|
||||
// ResolveColdPlugVFIOGuestPciPaths implements Hypervisor. For each VFIODev
|
||||
// with IsPCIe=true and an empty GuestPciPath, it queries QMP to find the
|
||||
// in-guest PCI path and writes it back onto the device.
|
||||
func (q *qemu) ResolveColdPlugVFIOGuestPciPaths(ctx context.Context, vfioDevs []*config.VFIODev) error {
|
||||
if len(vfioDevs) == 0 {
|
||||
return nil
|
||||
}
|
||||
if err := q.qmpSetup(); err != nil {
|
||||
return fmt.Errorf("ResolveColdPlugVFIOGuestPciPaths: qmpSetup: %w", err)
|
||||
}
|
||||
for _, vfioDev := range vfioDevs {
|
||||
if vfioDev == nil || !vfioDev.IsPCIe {
|
||||
continue
|
||||
}
|
||||
if !vfioDev.GuestPciPath.IsNil() {
|
||||
q.Logger().WithFields(logrus.Fields{
|
||||
"qemu-device-id": vfioDev.ID,
|
||||
"host-bdf": vfioDev.BDF,
|
||||
"guest-pci-path": vfioDev.GuestPciPath.String(),
|
||||
}).Debug("ResolveColdPlugVFIOGuestPciPaths: skipping device with pre-computed guest PCI path")
|
||||
continue
|
||||
}
|
||||
guestPath, err := q.arch.qomGetPciPath(vfioDev.ID, &q.qmpMonitorCh)
|
||||
if err != nil {
|
||||
q.Logger().WithFields(logrus.Fields{
|
||||
"qemu-device-id": vfioDev.ID,
|
||||
"host-bdf": vfioDev.BDF,
|
||||
}).WithError(err).Warn("ResolveColdPlugVFIOGuestPciPaths: failed to resolve guest PCI path")
|
||||
continue
|
||||
}
|
||||
vfioDev.GuestPciPath = guestPath
|
||||
q.Logger().WithFields(logrus.Fields{
|
||||
"qemu-device-id": vfioDev.ID,
|
||||
"host-bdf": vfioDev.BDF,
|
||||
"port": vfioDev.Port,
|
||||
"bus": vfioDev.Bus,
|
||||
"guest-pci-path": guestPath.String(),
|
||||
}).Info("ResolveColdPlugVFIOGuestPciPaths: resolved guest PCI path")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *qemu) qmpSetup() error {
|
||||
q.qmpMonitorCh.Lock()
|
||||
defer q.qmpMonitorCh.Unlock()
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
|
||||
cri "github.com/containerd/containerd/pkg/cri/annotations"
|
||||
"github.com/containerd/ttrpc"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||
persistapi "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
pb "github.com/kata-containers/kata-containers/src/runtime/protocols/hypervisor"
|
||||
hypannotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
|
||||
@@ -296,3 +297,7 @@ func (rh *remoteHypervisor) Load(persistapi.HypervisorState) {
|
||||
func (rh *remoteHypervisor) IsRateLimiterBuiltin() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (rh *remoteHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1309,3 +1309,7 @@ func (s *stratovirt) GenerateSocket(id string) (interface{}, error) {
|
||||
func (s *stratovirt) IsRateLimiterBuiltin() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (s *stratovirt) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ package virtcontainers
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
||||
"github.com/pkg/errors"
|
||||
@@ -135,3 +136,7 @@ func (vfw *virtFramework) GenerateSocket(id string) (interface{}, error) {
|
||||
func (vfw *virtFramework) IsRateLimiterBuiltin() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (vfw *virtFramework) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user