From b546eca26f0ef8e35288afe6cab0021fdc75ed46 Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Tue, 8 Mar 2022 18:51:08 +0100 Subject: [PATCH] runtime: Generalize VFIO devices Generalize VFIO devices to allow for adding AP in the next patch. The logic for VFIOPciDeviceMediatedType() has been changed and IsAPVFIOMediatedDevice() has been removed. The rationale for the revomal is: - VFIODeviceMediatedType is divided into 2 subtypes for AP and PCI - Logic of checking a subtype of mediated device is included in GetVFIODeviceType() - VFIOPciDeviceMediatedType() can simply fulfill the device addition based on a type categorized by GetVFIODeviceType() Signed-off-by: Jakob Naucke --- src/runtime/pkg/device/config/config.go | 22 +++++- src/runtime/pkg/device/drivers/vfio.go | 50 ++++++++---- src/runtime/virtcontainers/clh.go | 11 ++- src/runtime/virtcontainers/clh_test.go | 2 +- src/runtime/virtcontainers/kata_agent.go | 6 +- src/runtime/virtcontainers/qemu.go | 79 +++++++++++-------- src/runtime/virtcontainers/qemu_arch_base.go | 11 +-- .../virtcontainers/qemu_arch_base_test.go | 4 +- src/runtime/virtcontainers/sandbox.go | 16 +++- .../virtcontainers/utils/utils_linux.go | 3 +- .../virtcontainers/utils/utils_linux_test.go | 16 ---- 11 files changed, 135 insertions(+), 85 deletions(-) diff --git a/src/runtime/pkg/device/config/config.go b/src/runtime/pkg/device/config/config.go index a44723dac6..6bcbed32c3 100644 --- a/src/runtime/pkg/device/config/config.go +++ b/src/runtime/pkg/device/config/config.go @@ -265,8 +265,14 @@ const ( VFIOPCIDeviceMediatedType ) -// VFIODev represents a VFIO drive used for hotplugging -type VFIODev struct { +type VFIODev interface { + GetID() *string + GetType() VFIODeviceType + GetSysfsDev() *string +} + +// VFIOPCIDev represents a VFIO PCI device used for hotplugging +type VFIOPCIDev struct { // ID is used to identify this drive in the hypervisor options. ID string @@ -298,6 +304,18 @@ type VFIODev struct { IsPCIe bool } +func (d VFIOPCIDev) GetID() *string { + return &d.ID +} + +func (d VFIOPCIDev) GetType() VFIODeviceType { + return d.Type +} + +func (d VFIOPCIDev) GetSysfsDev() *string { + return &d.SysfsDev +} + // RNGDev represents a random number generator device type RNGDev struct { // ID is used to identify the device in the hypervisor options. diff --git a/src/runtime/pkg/device/drivers/vfio.go b/src/runtime/pkg/device/drivers/vfio.go index 86033aa736..a07a7df734 100644 --- a/src/runtime/pkg/device/drivers/vfio.go +++ b/src/runtime/pkg/device/drivers/vfio.go @@ -85,19 +85,29 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece if err != nil { return err } - vfio := &config.VFIODev{ - ID: utils.MakeNameID("vfio", device.DeviceInfo.ID+strconv.Itoa(i), maxDevIDSize), - Type: vfioDeviceType, - BDF: deviceBDF, - SysfsDev: deviceSysfsDev, - IsPCIe: isPCIeDevice(deviceBDF), - Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass), - } - device.VfioDevs = append(device.VfioDevs, vfio) - if vfio.IsPCIe { - vfio.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs)) - AllPCIeDevs[vfio.BDF] = true + id := utils.MakeNameID("vfio", device.DeviceInfo.ID+strconv.Itoa(i), maxDevIDSize) + + var vfio config.VFIODev + + switch vfioDeviceType { + case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: + isPCIe := isPCIeDevice(deviceBDF) + // Do not directly assign to `vfio` -- need to access field still + vfioPCI := config.VFIOPCIDev{ + ID: id, + Type: vfioDeviceType, + BDF: deviceBDF, + SysfsDev: deviceSysfsDev, + IsPCIe: isPCIe, + Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass), + } + if isPCIe { + vfioPCI.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs)) + AllPCIeDevs[deviceBDF] = true + } + vfio = vfioPCI } + device.VfioDevs = append(device.VfioDevs, &vfio) } coldPlug := device.DeviceInfo.ColdPlug @@ -180,7 +190,16 @@ func (device *VFIODevice) Save() config.DeviceState { devs := device.VfioDevs for _, dev := range devs { if dev != nil { - ds.VFIODevs = append(ds.VFIODevs, dev) + bdf := "" + if pciDev, ok := (*dev).(config.VFIOPCIDev); ok { + bdf = pciDev.BDF + } + ds.VFIODevs = append(ds.VFIODevs, &persistapi.VFIODev{ + ID: *(*dev).GetID(), + Type: uint32((*dev).GetType()), + BDF: bdf, + SysfsDev: *(*dev).GetSysfsDev(), + }) } } return ds @@ -192,12 +211,13 @@ func (device *VFIODevice) Load(ds config.DeviceState) { device.GenericDevice.Load(ds) for _, dev := range ds.VFIODevs { - device.VfioDevs = append(device.VfioDevs, &config.VFIODev{ + var vfioDev config.VFIODev = config.VFIOPCIDev{ ID: dev.ID, Type: config.VFIODeviceType(dev.Type), BDF: dev.BDF, SysfsDev: dev.SysfsDev, - }) + } + device.VfioDevs = append(device.VfioDevs, &vfioDev) } } diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 71bd931dce..d9be15082a 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -856,6 +856,11 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second) defer cancel() + pciDevice, ok := (*device).(config.VFIOPCIDev) + if !ok { + return fmt.Errorf("VFIO device %+v is not PCI, only PCI is supported in Cloud Hypervisor", device) + } + // Create the clh device config via the constructor to ensure default values are properly assigned clhDevice := *chclient.NewDeviceConfig(device.SysfsDev) pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice) @@ -879,7 +884,9 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf) } - device.GuestPciPath, err = types.PciPathFromString(tokens[0]) + guestPciPath, err := vcTypes.PciPathFromString(tokens[0]) + pciDevice.GuestPciPath = guestPciPath + *device = pciDevice return err } @@ -923,7 +930,7 @@ func (clh *cloudHypervisor) HotplugRemoveDevice(ctx context.Context, devInfo int case BlockDev: deviceID = clhDriveIndexToID(devInfo.(*config.BlockDrive).Index) case VfioDev: - deviceID = devInfo.(*config.VFIODev).ID + deviceID = *devInfo.(config.VFIODev).GetID() default: clh.Logger().WithFields(log.Fields{"devInfo": devInfo, "deviceType": devType}).Error("HotplugRemoveDevice: unsupported device") diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 20e6935d85..a1c3c4b643 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -624,7 +624,7 @@ func TestCloudHypervisorHotplugRemoveDevice(t *testing.T) { _, err = clh.HotplugRemoveDevice(context.Background(), &config.BlockDrive{}, BlockDev) assert.NoError(err, "Hotplug remove block device expected no error") - _, err = clh.HotplugRemoveDevice(context.Background(), &config.VFIODev{}, VfioDev) + _, err = clh.HotplugRemoveDevice(context.Background(), &config.VFIOPCIDev{}, VfioDev) assert.NoError(err, "Hotplug remove vfio block device expected no error") _, err = clh.HotplugRemoveDevice(context.Background(), nil, NetDev) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 98411aa458..60010a14a6 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1141,8 +1141,10 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c * kataDevice.Type = kataVfioPciGuestKernelDevType } - for i, pciDev := range devList { - kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDev.BDF, pciDev.GuestPciPath) + kataDevice.Options = make([]string, len(devList)) + for i, device := range devList { + pciDevice := (*device).(config.VFIOPCIDev) + kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDevice.BDF, pciDevice.GuestPciPath) } return kataDevice diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 17b3edbe78..6354b9d8a4 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1713,7 +1713,7 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op return err } - devID := device.ID + devID := *(*device).GetID() machineType := q.HypervisorConfig().HypervisorMachineType if op == AddDevice { @@ -1730,29 +1730,29 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op // for pc machine type instead of bridge. This is useful for devices that require // a large PCI BAR which is a currently a limitation with PCI bridges. if q.state.HotplugVFIOOnRootBus { - - // In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port. - switch machineType { - case QemuQ35: - if device.IsPCIe && q.state.PCIeRootPort <= 0 { - q.Logger().WithField("dev-id", device.ID).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35") - device.Bus = "" + switch (*device).GetType() { + case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: + // In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port. + pciDevice, ok := (*device).(config.VFIOPCIDev) + if !ok { + return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device) } - default: - device.Bus = "" - } + switch machineType { + case QemuQ35: + if pciDevice.IsPCIe && q.state.PCIeRootPort <= 0 { + q.Logger().WithField("dev-id", (*device).GetID()).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35") + pciDevice.Bus = "" + } + default: + pciDevice.Bus = "" + } + *device = pciDevice - switch device.Type { - case config.VFIOPCIDeviceNormalType: - err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile) - case config.VFIOPCIDeviceMediatedType: - if utils.IsAPVFIOMediatedDevice(device.SysfsDev) { - err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev) + if pciDevice.Type == config.VFIOPCIDeviceNormalType { + err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, pciDevice.Bus, romFile) } else { - err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile) + err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), "", pciDevice.Bus, romFile) } - default: - return fmt.Errorf("Incorrect VFIO device type found") } } else { addr, bridge, err := q.arch.addDeviceToBridge(ctx, devID, types.PCI) @@ -1766,15 +1766,15 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op } }() - switch device.Type { + switch (*device).GetType() { case config.VFIOPCIDeviceNormalType: - err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, addr, bridge.ID, romFile) - case config.VFIOPCIDeviceMediatedType: - if utils.IsAPVFIOMediatedDevice(device.SysfsDev) { - err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev) - } else { - err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, addr, bridge.ID, romFile) + pciDevice, ok := (*device).(config.VFIOPCIDev) + if !ok { + return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device) } + err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, addr, bridge.ID, romFile) + case config.VFIOPCIDeviceMediatedType: + err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), addr, bridge.ID, romFile) default: return fmt.Errorf("Incorrect VFIO device type found") } @@ -1782,13 +1782,24 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op if err != nil { return err } - // XXX: Depending on whether we're doing root port or - // bridge hotplug, and how the bridge is set up in - // other parts of the code, we may or may not already - // have information about the slot number of the - // bridge and or the device. For simplicity, just - // query both of them back from qemu - device.GuestPciPath, err = q.qomGetPciPath(devID) + + switch (*device).GetType() { + case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: + pciDevice, ok := (*device).(config.VFIOPCIDev) + if !ok { + return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device) + } + // XXX: Depending on whether we're doing root port or + // bridge hotplug, and how the bridge is set up in + // other parts of the code, we may or may not already + // have information about the slot number of the + // bridge and or the device. For simplicity, just + // query both of them back from qemu + guestPciPath, err := q.qomGetPciPath(devID) + pciDevice.GuestPciPath = guestPciPath + *device = pciDevice + return err + } return err } else { q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device") diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index 5a03b659f6..9aff1e76c2 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -675,16 +675,17 @@ func (q *qemuArchBase) appendVhostUserDevice(ctx context.Context, devices []govm } func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device { - if vfioDev.BDF == "" { + pciDevice := vfioDev.(config.VFIOPCIDev) + if pciDevice.BDF == "" { return devices } devices = append(devices, govmmQemu.VFIODevice{ - BDF: vfioDev.BDF, - VendorID: vfioDev.VendorID, - DeviceID: vfioDev.DeviceID, - Bus: vfioDev.Bus, + BDF: pciDevice.BDF, + VendorID: pciDevice.VendorID, + DeviceID: pciDevice.DeviceID, + Bus: pciDevice.Bus, }, ) diff --git a/src/runtime/virtcontainers/qemu_arch_base_test.go b/src/runtime/virtcontainers/qemu_arch_base_test.go index 37611bb5bb..51c11bd91d 100644 --- a/src/runtime/virtcontainers/qemu_arch_base_test.go +++ b/src/runtime/virtcontainers/qemu_arch_base_test.go @@ -463,7 +463,7 @@ func TestQemuArchBaseAppendVFIODevice(t *testing.T) { }, } - vfDevice := config.VFIODev{ + vfDevice := config.VFIOPCIDev{ BDF: bdf, } @@ -483,7 +483,7 @@ func TestQemuArchBaseAppendVFIODeviceWithVendorDeviceID(t *testing.T) { }, } - vfDevice := config.VFIODev{ + vfDevice := config.VFIOPCIDev{ BDF: bdf, VendorID: vendorID, DeviceID: deviceID, diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index dcc93048f8..e7c52fcc22 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -1856,11 +1856,15 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy // adding a group of VFIO devices for _, dev := range vfioDevices { if _, err := s.hypervisor.HotplugAddDevice(ctx, dev, VfioDev); err != nil { + bdf := "" + if pciDevice, ok := (*dev).(config.VFIOPCIDev); ok { + bdf = pciDevice.BDF + } s.Logger(). WithFields(logrus.Fields{ "sandbox": s.id, - "vfio-device-ID": dev.ID, - "vfio-device-BDF": dev.BDF, + "vfio-device-ID": (*dev).GetID(), + "vfio-device-BDF": bdf, }).WithError(err).Error("failed to hotplug VFIO device") return err } @@ -1909,11 +1913,15 @@ func (s *Sandbox) HotplugRemoveDevice(ctx context.Context, device api.Device, de // remove a group of VFIO devices for _, dev := range vfioDevices { if _, err := s.hypervisor.HotplugRemoveDevice(ctx, dev, VfioDev); err != nil { + bdf := "" + if pciDevice, ok := (*dev).(config.VFIOPCIDev); ok { + bdf = pciDevice.BDF + } s.Logger().WithError(err). WithFields(logrus.Fields{ "sandbox": s.id, - "vfio-device-ID": dev.ID, - "vfio-device-BDF": dev.BDF, + "vfio-device-ID": (*dev).GetID(), + "vfio-device-BDF": bdf, }).Error("failed to hot unplug VFIO device") return err } diff --git a/src/runtime/virtcontainers/utils/utils_linux.go b/src/runtime/virtcontainers/utils/utils_linux.go index 40c5c360ea..0b46d2572e 100644 --- a/src/runtime/virtcontainers/utils/utils_linux.go +++ b/src/runtime/virtcontainers/utils/utils_linux.go @@ -89,8 +89,7 @@ func FindContextID() (*os.File, uint64, error) { const ( procMountsFile = "/proc/mounts" - fieldsPerLine = 6 - vfioAPSysfsDir = "vfio_ap" + fieldsPerLine = 6 ) const ( diff --git a/src/runtime/virtcontainers/utils/utils_linux_test.go b/src/runtime/virtcontainers/utils/utils_linux_test.go index dbf9fde38b..c8e213c2c7 100644 --- a/src/runtime/virtcontainers/utils/utils_linux_test.go +++ b/src/runtime/virtcontainers/utils/utils_linux_test.go @@ -63,19 +63,3 @@ func TestGetDevicePathAndFsTypeOptionsSuccessful(t *testing.T) { assert.Equal(fstype, fstypeOut) assert.Equal(fsOptions, optsOut) } - -func TestIsAPVFIOMediatedDeviceFalse(t *testing.T) { - assert := assert.New(t) - - // Should be false for a PCI device - isAPMdev := IsAPVFIOMediatedDevice("/sys/bus/pci/devices/0000:00:02.0/a297db4a-f4c2-11e6-90f6-d3b88d6c9525") - assert.False(isAPMdev) -} - -func TestIsAPVFIOMediatedDeviceTrue(t *testing.T) { - assert := assert.New(t) - - // Typical AP sysfsdev - isAPMdev := IsAPVFIOMediatedDevice("/sys/devices/vfio_ap/matrix/a297db4a-f4c2-11e6-90f6-d3b88d6c9525") - assert.True(isAPMdev) -}