agent: Add VFIO-AP device handling

Initial VFIO-AP support (#578) was simple, but somewhat hacky; a
different code path would be chosen for performing the hotplug, and
agent-side device handling was bound to knowing the assigned queue
numbers (APQNs) through some other means; plus the code for awaiting
them was written for the Go agent and never released. This code also
artificially increased the hotplug timeout to wait for the (relatively
expensive, thus limited to 5 seconds at the quickest) AP rescan, which
is impractical for e.g. common k8s timeouts.

Since then, the general handling logic was improved (#1190), but it
assumed PCI in several places.

In the runtime, introduce and parse AP devices. Annotate them as such
when passing to the agent, and include information about the associated
APQNs.

The agent awaits the passed APQNs through uevents and triggers a
rescan directly.

Fixes: #3678
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
This commit is contained in:
Jakob Naucke
2022-03-08 18:51:08 +01:00
committed by Hyounggyu Choi
parent b546eca26f
commit f666f8e2df
11 changed files with 335 additions and 74 deletions

View File

@@ -867,7 +867,7 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
if err != nil {
return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err))
}
clh.devicesIds[device.ID] = pciInfo.GetId()
clh.devicesIds[*(*device).GetID()] = pciInfo.GetId()
// clh doesn't use bridges, so the PCI path is simply the slot
// number of the device. This will break if clh starts using
@@ -884,7 +884,7 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
}
guestPciPath, err := vcTypes.PciPathFromString(tokens[0])
guestPciPath, err := types.PciPathFromString(tokens[0])
pciDevice.GuestPciPath = guestPciPath
*device = pciDevice

View File

@@ -77,38 +77,39 @@ const (
)
var (
checkRequestTimeout = 30 * time.Second
defaultRequestTimeout = 60 * time.Second
errorMissingOCISpec = errors.New("Missing OCI specification")
defaultKataHostSharedDir = "/run/kata-containers/shared/sandboxes/"
defaultKataGuestSharedDir = "/run/kata-containers/shared/containers/"
defaultKataGuestNydusRootDir = "/run/kata-containers/shared/"
mountGuestTag = "kataShared"
defaultKataGuestSandboxDir = "/run/kata-containers/sandbox/"
type9pFs = "9p"
typeVirtioFS = "virtiofs"
typeOverlayFS = "overlay"
kata9pDevType = "9p"
kataMmioBlkDevType = "mmioblk"
kataBlkDevType = "blk"
kataBlkCCWDevType = "blk-ccw"
kataSCSIDevType = "scsi"
kataNvdimmDevType = "nvdimm"
kataVirtioFSDevType = "virtio-fs"
kataOverlayDevType = "overlayfs"
kataWatchableBindDevType = "watchable-bind"
kataVfioDevType = "vfio" // VFIO device to used as VFIO in the container
kataVfioGuestKernelDevType = "vfio-gk" // VFIO device for consumption by the guest kernel
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
sharedDirVirtioFSOptions = []string{}
sharedDirVirtioFSDaxOptions = "dax"
shmDir = "shm"
kataEphemeralDevType = "ephemeral"
defaultEphemeralPath = filepath.Join(defaultKataGuestSandboxDir, kataEphemeralDevType)
grpcMaxDataSize = int64(1024 * 1024)
localDirOptions = []string{"mode=0777"}
maxHostnameLen = 64
GuestDNSFile = "/etc/resolv.conf"
checkRequestTimeout = 30 * time.Second
defaultRequestTimeout = 60 * time.Second
errorMissingOCISpec = errors.New("Missing OCI specification")
defaultKataHostSharedDir = "/run/kata-containers/shared/sandboxes/"
defaultKataGuestSharedDir = "/run/kata-containers/shared/containers/"
defaultKataGuestNydusRootDir = "/run/kata-containers/shared/"
mountGuestTag = "kataShared"
defaultKataGuestSandboxDir = "/run/kata-containers/sandbox/"
type9pFs = "9p"
typeVirtioFS = "virtiofs"
typeOverlayFS = "overlay"
kata9pDevType = "9p"
kataMmioBlkDevType = "mmioblk"
kataBlkDevType = "blk"
kataBlkCCWDevType = "blk-ccw"
kataSCSIDevType = "scsi"
kataNvdimmDevType = "nvdimm"
kataVirtioFSDevType = "virtio-fs"
kataOverlayDevType = "overlayfs"
kataWatchableBindDevType = "watchable-bind"
kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container
kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel
kataVfioApDevType = "vfio-ap"
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
sharedDirVirtioFSOptions = []string{}
sharedDirVirtioFSDaxOptions = "dax"
shmDir = "shm"
kataEphemeralDevType = "ephemeral"
defaultEphemeralPath = filepath.Join(defaultKataGuestSandboxDir, kataEphemeralDevType)
grpcMaxDataSize = int64(1024 * 1024)
localDirOptions = []string{"mode=0777"}
maxHostnameLen = 64
GuestDNSFile = "/etc/resolv.conf"
)
const (
@@ -1126,6 +1127,11 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
// DDDD:BB:DD.F is the device's PCI address on the
// *host*. <pcipath> is the device's PCI path in the guest
// (see qomGetPciPath() for details).
//
// For VFIO-AP, one VFIO group could include several queue devices. They are
// identified by APQNs (Adjunct Processor Queue Numbers), which do not differ
// between host and guest. They are passed as options so they can be awaited
// by the agent.
kataDevice := &grpc.Device{
ContainerPath: dev.ContainerPath,
Type: kataVfioPciDevType,
@@ -1141,10 +1147,15 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
kataDevice.Type = kataVfioPciGuestKernelDevType
}
kataDevice.Options = make([]string, len(devList))
for i, device := range devList {
pciDevice := (*device).(config.VFIOPCIDev)
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDevice.BDF, pciDevice.GuestPciPath)
if (*devList[0]).GetType() == config.VFIOAPDeviceMediatedType {
kataDevice.Type = kataVfioApDevType
kataDevice.Options = (*devList[0]).(config.VFIOAPDev).APDevices
} else {
kataDevice.Options = make([]string, len(devList))
for i, device := range devList {
pciDevice := (*device).(config.VFIOPCIDev)
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDevice.BDF, pciDevice.GuestPciPath)
}
}
return kataDevice

View File

@@ -1753,6 +1753,8 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
} else {
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), "", pciDevice.Bus, romFile)
}
case config.VFIOAPDeviceMediatedType:
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev())
}
} else {
addr, bridge, err := q.arch.addDeviceToBridge(ctx, devID, types.PCI)
@@ -1775,6 +1777,8 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, addr, bridge.ID, romFile)
case config.VFIOPCIDeviceMediatedType:
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), addr, bridge.ID, romFile)
case config.VFIOAPDeviceMediatedType:
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev())
default:
return fmt.Errorf("Incorrect VFIO device type found")
}