diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 51e3577ab6..bffaa34385 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -1984,3 +1984,7 @@ func pathExists(path string) bool { } return true } + +func (clh *cloudHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +} diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 29a6e20ce4..d5e2741e9c 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -1334,3 +1334,7 @@ func (fc *firecracker) GenerateSocket(id string) (interface{}, error) { func (fc *firecracker) IsRateLimiterBuiltin() bool { return true } + +func (fc *firecracker) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +} diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 8b93b31428..9f2bc71a6a 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -1316,6 +1316,11 @@ type Hypervisor interface { AddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) error HotplugAddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) HotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) + // ResolveColdPlugVFIOGuestPciPaths resolves the in-guest PCI path for each + // VFIODev with IsPCIe=true and an empty GuestPciPath, writing the result + // back onto the device. Hypervisors that do not require this (e.g. CLH, + // which already populates GuestPciPath during hot-plug) return nil. + ResolveColdPlugVFIOGuestPciPaths(ctx context.Context, vfioDevs []*config.VFIODev) error ResizeMemory(ctx context.Context, memMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) ResizeVCPUs(ctx context.Context, vcpus uint32) (uint32, uint32, error) GetTotalMemoryMB(ctx context.Context) uint32 diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index f720c4e3e3..74e00afad0 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1419,6 +1419,31 @@ func (k *kataAgent) setupNetworks(ctx context.Context, sandbox *Sandbox, c *Cont return nil } + // Resolve the guest PCI path for cold-plugged VFIO PCIe devices whose + // GuestPciPath is not yet known. The path is needed below to stamp + // Interface.devicePath in the agent proto; without it the agent falls + // back to a by-MAC link lookup, which fails for SR-IOV VFs whose + // firmware MAC differs from the CNI-assigned MAC after the + // vfio-pci unbind/rebind cycle. + var coldPlugVFIODevs []*config.VFIODev + for _, dev := range sandbox.devManager.GetAllDevices() { + if dev.DeviceType() != config.DeviceVFIO { + continue + } + if vfioDevs, ok := dev.GetDeviceInfo().([]*config.VFIODev); ok { + for _, vfioDev := range vfioDevs { + if vfioDev.IsPCIe && vfioDev.GuestPciPath.IsNil() && vfioDev.ID != "" { + coldPlugVFIODevs = append(coldPlugVFIODevs, vfioDev) + } + } + } + } + if len(coldPlugVFIODevs) > 0 { + if err := sandbox.hypervisor.ResolveColdPlugVFIOGuestPciPaths(ctx, coldPlugVFIODevs); err != nil { + k.Logger().WithError(err).Warn("setupNetworks: failed to resolve guest PCI paths for cold-plug VFIO devices") + } + } + var err error var endpoints []Endpoint if c == nil || c.id == sandbox.id { @@ -1429,6 +1454,26 @@ func (k *kataAgent) setupNetworks(ctx context.Context, sandbox *Sandbox, c *Cont // creation, so no need to skip them here anymore. for _, ep := range sandbox.network.Endpoints() { if ep.Type() != VfioEndpointType { + // For cold-plugged SR-IOV VFs that appear as PhysicalEndpoints, + // the guest PCI path is known after resolveColdPlugVFIOGuestPciPaths + // has run (during createContainers). Look it up and stamp it on the + // endpoint so that generateVCNetworkStructures emits a non-empty + // devicePath in the agent Interface proto. Without this the agent + // receives devicePath="" and falls back to a by-MAC link lookup, + // which fails when the VF firmware MAC differs from the OVN MAC. + if ep.Type() == PhysicalEndpointType && ep.PciPath().IsNil() { + if pe, ok := ep.(*PhysicalEndpoint); ok && pe.BDF != "" { + guestPath := sandbox.GetVfioDeviceGuestPciPath(pe.BDF) + if !guestPath.IsNil() { + ep.SetPciPath(guestPath) + k.Logger().WithFields(logrus.Fields{ + "endpoint-name": ep.Name(), + "host-bdf": pe.BDF, + "guest-pci-path": guestPath.String(), + }).Info("setupNetworks: filled guest PCI path for PhysicalEndpoint cold-plug") + } + } + } endpoints = append(endpoints, ep) } } diff --git a/src/runtime/virtcontainers/mock_hypervisor.go b/src/runtime/virtcontainers/mock_hypervisor.go index 7d6da561fa..90f2de2ebc 100644 --- a/src/runtime/virtcontainers/mock_hypervisor.go +++ b/src/runtime/virtcontainers/mock_hypervisor.go @@ -10,6 +10,7 @@ import ( "errors" "os" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" ) @@ -155,3 +156,7 @@ func (m *mockHypervisor) GenerateSocket(id string) (interface{}, error) { func (m *mockHypervisor) IsRateLimiterBuiltin() bool { return false } + +func (m *mockHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +} diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 74818ff5d6..fe99945f56 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1848,6 +1848,48 @@ func (q *qemu) togglePauseSandbox(ctx context.Context, pause bool) error { return q.qmpMonitorCh.qmp.ExecuteCont(q.qmpMonitorCh.ctx) } +// ResolveColdPlugVFIOGuestPciPaths implements Hypervisor. For each VFIODev +// with IsPCIe=true and an empty GuestPciPath, it queries QMP to find the +// in-guest PCI path and writes it back onto the device. +func (q *qemu) ResolveColdPlugVFIOGuestPciPaths(ctx context.Context, vfioDevs []*config.VFIODev) error { + if len(vfioDevs) == 0 { + return nil + } + if err := q.qmpSetup(); err != nil { + return fmt.Errorf("ResolveColdPlugVFIOGuestPciPaths: qmpSetup: %w", err) + } + for _, vfioDev := range vfioDevs { + if vfioDev == nil || !vfioDev.IsPCIe { + continue + } + if !vfioDev.GuestPciPath.IsNil() { + q.Logger().WithFields(logrus.Fields{ + "qemu-device-id": vfioDev.ID, + "host-bdf": vfioDev.BDF, + "guest-pci-path": vfioDev.GuestPciPath.String(), + }).Debug("ResolveColdPlugVFIOGuestPciPaths: skipping device with pre-computed guest PCI path") + continue + } + guestPath, err := q.arch.qomGetPciPath(vfioDev.ID, &q.qmpMonitorCh) + if err != nil { + q.Logger().WithFields(logrus.Fields{ + "qemu-device-id": vfioDev.ID, + "host-bdf": vfioDev.BDF, + }).WithError(err).Warn("ResolveColdPlugVFIOGuestPciPaths: failed to resolve guest PCI path") + continue + } + vfioDev.GuestPciPath = guestPath + q.Logger().WithFields(logrus.Fields{ + "qemu-device-id": vfioDev.ID, + "host-bdf": vfioDev.BDF, + "port": vfioDev.Port, + "bus": vfioDev.Bus, + "guest-pci-path": guestPath.String(), + }).Info("ResolveColdPlugVFIOGuestPciPaths: resolved guest PCI path") + } + return nil +} + func (q *qemu) qmpSetup() error { q.qmpMonitorCh.Lock() defer q.qmpMonitorCh.Unlock() diff --git a/src/runtime/virtcontainers/remote.go b/src/runtime/virtcontainers/remote.go index d88a3ca4e9..caf431ac72 100644 --- a/src/runtime/virtcontainers/remote.go +++ b/src/runtime/virtcontainers/remote.go @@ -13,6 +13,7 @@ import ( cri "github.com/containerd/containerd/pkg/cri/annotations" "github.com/containerd/ttrpc" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" persistapi "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" pb "github.com/kata-containers/kata-containers/src/runtime/protocols/hypervisor" hypannotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" @@ -296,3 +297,7 @@ func (rh *remoteHypervisor) Load(persistapi.HypervisorState) { func (rh *remoteHypervisor) IsRateLimiterBuiltin() bool { return false } + +func (rh *remoteHypervisor) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +} diff --git a/src/runtime/virtcontainers/stratovirt.go b/src/runtime/virtcontainers/stratovirt.go index 3f6a4c1b18..52043c6681 100644 --- a/src/runtime/virtcontainers/stratovirt.go +++ b/src/runtime/virtcontainers/stratovirt.go @@ -1309,3 +1309,7 @@ func (s *stratovirt) GenerateSocket(id string) (interface{}, error) { func (s *stratovirt) IsRateLimiterBuiltin() bool { return false } + +func (s *stratovirt) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +} diff --git a/src/runtime/virtcontainers/virtframework.go b/src/runtime/virtcontainers/virtframework.go index 9196e291c2..295f8302a7 100644 --- a/src/runtime/virtcontainers/virtframework.go +++ b/src/runtime/virtcontainers/virtframework.go @@ -12,6 +12,7 @@ package virtcontainers import ( "context" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/pkg/errors" @@ -135,3 +136,7 @@ func (vfw *virtFramework) GenerateSocket(id string) (interface{}, error) { func (vfw *virtFramework) IsRateLimiterBuiltin() bool { return false } + +func (vfw *virtFramework) ResolveColdPlugVFIOGuestPciPaths(_ context.Context, _ []*config.VFIODev) error { + return nil +}