mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-29 20:24:31 +00:00
s390x: Fixing device.Bus assignment
The device.Bus was reset if a specific combination of configuration parameters were not met. With the new PCIe topology this should not happen anymore Fixes: #7381 Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
parent
b6307c2744
commit
114542e2ba
@ -178,22 +178,22 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODe
|
||||
}
|
||||
id := utils.MakeNameID("vfio", device.ID+strconv.Itoa(i), maxDevIDSize)
|
||||
|
||||
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
|
||||
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
|
||||
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
|
||||
// Class 0x0604 is PCI bridge, 0x0600 is Host bridge
|
||||
ignorePCIDevice, err := checkIgnorePCIClass(pciClass, deviceBDF, 0x0600)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ignorePCIDevice {
|
||||
continue
|
||||
}
|
||||
|
||||
var vfio config.VFIODev
|
||||
|
||||
switch vfioDeviceType {
|
||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||
// This is vfio-pci and vfio-mdev specific
|
||||
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
|
||||
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
|
||||
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
|
||||
// Class 0x0604 is PCI bridge, 0x0600 is Host bridge
|
||||
ignorePCIDevice, err := checkIgnorePCIClass(pciClass, deviceBDF, 0x0600)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ignorePCIDevice {
|
||||
continue
|
||||
}
|
||||
// Do not directly assign to `vfio` -- need to access field still
|
||||
vfio = config.VFIODev{
|
||||
ID: id,
|
||||
|
@ -70,6 +70,12 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
|
||||
return err
|
||||
}
|
||||
for _, vfio := range device.VfioDevs {
|
||||
// If vfio.Port is not set we bail out, users should set
|
||||
// explicitly the port in the config file
|
||||
if vfio.Port == "" {
|
||||
return fmt.Errorf("cold_plug_vfio= or hot_plug_vfio= port is not set for device %s (BridgePort | RootPort | SwitchPort)", vfio.BDF)
|
||||
}
|
||||
|
||||
if vfio.IsPCIe {
|
||||
busIndex := len(config.PCIeDevices[vfio.Port])
|
||||
vfio.Bus = fmt.Sprintf("%s%d", config.PCIePortPrefixMapping[vfio.Port], busIndex)
|
||||
|
@ -132,6 +132,8 @@ func TestAttachVFIODevice(t *testing.T) {
|
||||
HostPath: path,
|
||||
ContainerPath: path,
|
||||
DevType: "c",
|
||||
ColdPlug: false,
|
||||
Port: config.RootPort,
|
||||
}
|
||||
|
||||
device, err := dm.NewDevice(deviceInfo)
|
||||
|
@ -163,6 +163,9 @@ const (
|
||||
|
||||
// TransportMMIO is the MMIO transport for virtio devices.
|
||||
TransportMMIO VirtioTransport = "mmio"
|
||||
|
||||
// TransportAP is the AP transport for virtio devices.
|
||||
TransportAP VirtioTransport = "ap"
|
||||
)
|
||||
|
||||
// defaultTransport returns the default transport for the current combination
|
||||
@ -199,6 +202,14 @@ func (transport VirtioTransport) isVirtioCCW(config *Config) bool {
|
||||
return transport == TransportCCW
|
||||
}
|
||||
|
||||
func (transport VirtioTransport) isVirtioAP(config *Config) bool {
|
||||
if transport == "" {
|
||||
transport = transport.defaultTransport(config)
|
||||
}
|
||||
|
||||
return transport == TransportAP
|
||||
}
|
||||
|
||||
// getName returns the name of the current transport.
|
||||
func (transport VirtioTransport) getName(config *Config) string {
|
||||
if transport == "" {
|
||||
@ -1811,6 +1822,9 @@ type VFIODevice struct {
|
||||
|
||||
// Transport is the virtio transport for this device.
|
||||
Transport VirtioTransport
|
||||
|
||||
// SysfsDev specifies the sysfs matrix entry for the AP device
|
||||
SysfsDev string
|
||||
}
|
||||
|
||||
// VFIODeviceTransport is a map of the vfio device name that corresponds to
|
||||
@ -1819,11 +1833,13 @@ var VFIODeviceTransport = map[VirtioTransport]string{
|
||||
TransportPCI: "vfio-pci",
|
||||
TransportCCW: "vfio-ccw",
|
||||
TransportMMIO: "vfio-device",
|
||||
TransportAP: "vfio-ap",
|
||||
}
|
||||
|
||||
// Valid returns true if the VFIODevice structure is valid and complete.
|
||||
// s390x architecture requires SysfsDev to be set.
|
||||
func (vfioDev VFIODevice) Valid() bool {
|
||||
return vfioDev.BDF != ""
|
||||
return vfioDev.BDF != "" || vfioDev.SysfsDev != ""
|
||||
}
|
||||
|
||||
// QemuParams returns the qemu parameters built out of this vfio device.
|
||||
@ -1833,6 +1849,15 @@ func (vfioDev VFIODevice) QemuParams(config *Config) []string {
|
||||
|
||||
driver := vfioDev.deviceName(config)
|
||||
|
||||
if vfioDev.Transport.isVirtioAP(config) {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("%s,sysfsdev=%s", driver, vfioDev.SysfsDev))
|
||||
|
||||
qemuParams = append(qemuParams, "-device")
|
||||
qemuParams = append(qemuParams, strings.Join(deviceParams, ","))
|
||||
|
||||
return qemuParams
|
||||
}
|
||||
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("%s,host=%s", driver, vfioDev.BDF))
|
||||
if vfioDev.Transport.isVirtioPCI(config) {
|
||||
if vfioDev.VendorID != "" {
|
||||
@ -2837,10 +2862,9 @@ func (config *Config) appendDevices(logger QMPLog) {
|
||||
|
||||
for _, d := range config.Devices {
|
||||
if !d.Valid() {
|
||||
logger.Errorf("vm device is not valid: %+v", config.Devices)
|
||||
logger.Errorf("vm device is not valid: %+v", d)
|
||||
continue
|
||||
}
|
||||
|
||||
config.qemuParams = append(config.qemuParams, d.QemuParams(config)...)
|
||||
}
|
||||
}
|
||||
|
@ -1217,10 +1217,11 @@ func (q *QMP) ExecutePCIVFIOMediatedDeviceAdd(ctx context.Context, devID, sysfsd
|
||||
}
|
||||
|
||||
// ExecuteAPVFIOMediatedDeviceAdd adds a VFIO mediated AP device to a QEMU instance using the device_add command.
|
||||
func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string) error {
|
||||
func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string, devID string) error {
|
||||
args := map[string]interface{}{
|
||||
"driver": VfioAP,
|
||||
"sysfsdev": sysfsdev,
|
||||
"id": devID,
|
||||
}
|
||||
return q.executeCommand(ctx, "device_add", args, nil)
|
||||
}
|
||||
|
@ -1128,7 +1128,7 @@ func TestQMPAPVFIOMediatedDeviceAdd(t *testing.T) {
|
||||
q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh)
|
||||
checkVersion(t, connectedCh)
|
||||
sysfsDev := "/sys/devices/vfio_ap/matrix/a297db4a-f4c2-11e6-90f6-d3b88d6c9525"
|
||||
err := q.ExecuteAPVFIOMediatedDeviceAdd(context.Background(), sysfsDev)
|
||||
err := q.ExecuteAPVFIOMediatedDeviceAdd(context.Background(), sysfsDev, "test-id")
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error %v", err)
|
||||
}
|
||||
|
@ -869,6 +869,23 @@ func (c *Container) create(ctx context.Context) (err error) {
|
||||
}
|
||||
}
|
||||
|
||||
// If cold-plug we've attached the devices already, do not try to
|
||||
// attach them a second time.
|
||||
coldPlugVFIO := (c.sandbox.config.HypervisorConfig.ColdPlugVFIO != config.NoPort)
|
||||
if coldPlugVFIO {
|
||||
var cntDevices []ContainerDevice
|
||||
for _, dev := range c.devices {
|
||||
if strings.HasPrefix(dev.ContainerPath, vfioPath) {
|
||||
c.Logger().WithFields(logrus.Fields{
|
||||
"device": dev,
|
||||
}).Info("Remvoing device since we're cold-plugging no Attach needed")
|
||||
continue
|
||||
}
|
||||
cntDevices = append(cntDevices, dev)
|
||||
}
|
||||
c.devices = cntDevices
|
||||
}
|
||||
|
||||
c.Logger().WithFields(logrus.Fields{
|
||||
"devices": c.devices,
|
||||
}).Info("Attach devices")
|
||||
|
@ -65,11 +65,6 @@ const romFile = ""
|
||||
// Default value is false.
|
||||
const defaultDisableModern = false
|
||||
|
||||
// A deeper PCIe topology than 5 is already not advisable just for the sake
|
||||
// of having enough buffer we limit ourselves to 10 and exit if we reach
|
||||
// the root bus
|
||||
const maxPCIeTopoDepth = 10
|
||||
|
||||
type qmpChannel struct {
|
||||
qmp *govmmQemu.QMP
|
||||
ctx context.Context
|
||||
@ -80,15 +75,14 @@ type qmpChannel struct {
|
||||
|
||||
// QemuState keeps Qemu's state
|
||||
type QemuState struct {
|
||||
UUID string
|
||||
HotPlugVFIO config.PCIePort
|
||||
Bridges []types.Bridge
|
||||
HotpluggedVCPUs []hv.CPUDevice
|
||||
HotpluggedMemory int
|
||||
VirtiofsDaemonPid int
|
||||
HotplugVFIOOnRootBus bool
|
||||
HotplugVFIO config.PCIePort
|
||||
ColdPlugVFIO config.PCIePort
|
||||
UUID string
|
||||
HotPlugVFIO config.PCIePort
|
||||
Bridges []types.Bridge
|
||||
HotpluggedVCPUs []hv.CPUDevice
|
||||
HotpluggedMemory int
|
||||
VirtiofsDaemonPid int
|
||||
HotplugVFIO config.PCIePort
|
||||
ColdPlugVFIO config.PCIePort
|
||||
}
|
||||
|
||||
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
|
||||
@ -289,7 +283,6 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
|
||||
q.state.UUID = uuid.Generate().String()
|
||||
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
||||
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
|
||||
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
|
||||
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
||||
|
||||
// The path might already exist, but in case of VM templating,
|
||||
@ -792,7 +785,7 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
||||
}
|
||||
}
|
||||
}
|
||||
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus)
|
||||
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort)
|
||||
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
|
||||
|
||||
numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices)
|
||||
@ -1638,7 +1631,7 @@ func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.V
|
||||
config.PCIeDevices[config.RootPort][devID] = true
|
||||
|
||||
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeID)
|
||||
bridgeSlot, err := q.qomGetSlot(bridgeQomPath)
|
||||
bridgeSlot, err := q.arch.qomGetSlot(bridgeQomPath, &q.qmpMonitorCh)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -1741,88 +1734,6 @@ func (q *qemu) hotplugVhostUserDevice(ctx context.Context, vAttr *config.VhostUs
|
||||
}
|
||||
}
|
||||
|
||||
// Query QMP to find the PCI slot of a device, given its QOM path or ID
|
||||
func (q *qemu) qomGetSlot(qomPath string) (types.PciSlot, error) {
|
||||
addr, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qomPath, "addr")
|
||||
if err != nil {
|
||||
return types.PciSlot{}, err
|
||||
}
|
||||
addrf, ok := addr.(float64)
|
||||
// XXX going via float makes no real sense, but that's how
|
||||
// JSON works, and we'll get away with it for the small values
|
||||
// we have here
|
||||
if !ok {
|
||||
return types.PciSlot{}, fmt.Errorf("addr QOM property of %q is %T not a number", qomPath, addr)
|
||||
}
|
||||
addri := int(addrf)
|
||||
|
||||
slotNum, funcNum := addri>>3, addri&0x7
|
||||
if funcNum != 0 {
|
||||
return types.PciSlot{}, fmt.Errorf("Unexpected non-zero PCI function (%02x.%1x) on %q",
|
||||
slotNum, funcNum, qomPath)
|
||||
}
|
||||
|
||||
return types.PciSlotFromInt(slotNum)
|
||||
}
|
||||
|
||||
// Query QMP to find a device's PCI path given its QOM path or ID
|
||||
func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
|
||||
|
||||
var slots []types.PciSlot
|
||||
|
||||
devSlot, err := q.qomGetSlot(qemuID)
|
||||
if err != nil {
|
||||
return types.PciPath{}, err
|
||||
}
|
||||
slots = append(slots, devSlot)
|
||||
|
||||
// This only works for Q35 and Virt
|
||||
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
|
||||
|
||||
var parentPath = qemuID
|
||||
// We do not want to use a forever loop here, a deeper PCIe topology
|
||||
// than 5 is already not advisable just for the sake of having enough
|
||||
// buffer we limit ourselves to 10 and leave the loop early if we hit
|
||||
// the root bus.
|
||||
for i := 1; i <= maxPCIeTopoDepth; i++ {
|
||||
parenBusQOM, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, parentPath, "parent_bus")
|
||||
if err != nil {
|
||||
return types.PciPath{}, err
|
||||
}
|
||||
|
||||
busQOM, ok := parenBusQOM.(string)
|
||||
if !ok {
|
||||
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, parenBusQOM)
|
||||
}
|
||||
|
||||
// If we hit /machine/q35/pcie.0 we're done this is the root bus
|
||||
// we climbed the complete hierarchy
|
||||
if r.Match([]byte(busQOM)) {
|
||||
break
|
||||
}
|
||||
|
||||
// `bus` is the QOM path of the QOM bus object, but we need
|
||||
// the PCI parent_bus which manages that bus. There doesn't seem
|
||||
// to be a way to get that other than to simply drop the last
|
||||
// path component.
|
||||
idx := strings.LastIndex(busQOM, "/")
|
||||
if idx == -1 {
|
||||
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", busQOM)
|
||||
}
|
||||
parentBus := busQOM[:idx]
|
||||
|
||||
parentSlot, err := q.qomGetSlot(parentBus)
|
||||
if err != nil {
|
||||
return types.PciPath{}, err
|
||||
}
|
||||
|
||||
// Prepend the slots, since we're climbing the hierarchy
|
||||
slots = append([]types.PciSlot{parentSlot}, slots...)
|
||||
parentPath = parentBus
|
||||
}
|
||||
return types.PciPathFromSlots(slots...)
|
||||
}
|
||||
|
||||
func (q *qemu) hotplugVFIODeviceRootPort(ctx context.Context, device *config.VFIODev) (err error) {
|
||||
return q.executeVFIODeviceAdd(device)
|
||||
}
|
||||
@ -1852,7 +1763,7 @@ func (q *qemu) executePCIVFIODeviceAdd(device *config.VFIODev, addr string, brid
|
||||
case config.VFIOPCIDeviceMediatedType:
|
||||
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, addr, bridgeID, romFile)
|
||||
case config.VFIOAPDeviceMediatedType:
|
||||
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
||||
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev, device.ID)
|
||||
default:
|
||||
return fmt.Errorf("Incorrect VFIO device type found")
|
||||
}
|
||||
@ -1865,7 +1776,7 @@ func (q *qemu) executeVFIODeviceAdd(device *config.VFIODev) error {
|
||||
case config.VFIOPCIDeviceMediatedType:
|
||||
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, "", device.Bus, romFile)
|
||||
case config.VFIOAPDeviceMediatedType:
|
||||
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
||||
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev, device.ID)
|
||||
default:
|
||||
return fmt.Errorf("Incorrect VFIO device type found")
|
||||
}
|
||||
@ -1883,46 +1794,43 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
||||
"hot-plug-vfio": q.state.HotPlugVFIO,
|
||||
"device-info": string(buf),
|
||||
}).Info("Start hot-plug VFIO device")
|
||||
// In case MachineType is q35, a PCIe device is hotplugged on
|
||||
// a PCIe Root Port or alternatively on a PCIe Switch Port
|
||||
if q.HypervisorConfig().HypervisorMachineType != QemuQ35 && q.HypervisorConfig().HypervisorMachineType != QemuVirt {
|
||||
device.Bus = ""
|
||||
} else {
|
||||
var err error
|
||||
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
|
||||
// for pc machine type instead of bridge. This is useful for devices that require
|
||||
// a large PCI BAR which is a currently a limitation with PCI bridges.
|
||||
if q.state.HotPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus {
|
||||
err = q.hotplugVFIODeviceRootPort(ctx, device)
|
||||
} else if q.state.HotPlugVFIO == config.SwitchPort {
|
||||
err = q.hotplugVFIODeviceSwitchPort(ctx, device)
|
||||
} else {
|
||||
err = q.hotplugVFIODeviceBridgePort(ctx, device)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = fmt.Errorf("Incorrect hot plug configuration %v for device %v found", q.state.HotPlugVFIO, device)
|
||||
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
|
||||
// for pc machine type instead of bridge. This is useful for devices that require
|
||||
// a large PCI BAR which is a currently a limitation with PCI bridges.
|
||||
if q.state.HotPlugVFIO == config.RootPort {
|
||||
err = q.hotplugVFIODeviceRootPort(ctx, device)
|
||||
} else if q.state.HotPlugVFIO == config.SwitchPort {
|
||||
err = q.hotplugVFIODeviceSwitchPort(ctx, device)
|
||||
} else if q.state.HotPlugVFIO == config.BridgePort {
|
||||
err = q.hotplugVFIODeviceBridgePort(ctx, device)
|
||||
}
|
||||
// XXX: Depending on whether we're doing root port or
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Depending on whether we're doing root port or
|
||||
// bridge hotplug, and how the bridge is set up in
|
||||
// other parts of the code, we may or may not already
|
||||
// have information about the slot number of the
|
||||
// bridge and or the device. For simplicity, just
|
||||
// query both of them back from qemu
|
||||
device.GuestPciPath, err = q.qomGetPciPath(device.ID)
|
||||
// query both of them back from qemu based on the arch
|
||||
device.GuestPciPath, err = q.arch.qomGetPciPath(device.ID, &q.qmpMonitorCh)
|
||||
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
|
||||
q.Logger().WithField("dev-id", device.ID).Info("Start hot-unplug VFIO device")
|
||||
q.Logger().WithField("dev-id", device.ID).Info("Start hot-unplug VFIO device")
|
||||
|
||||
if !q.state.HotplugVFIOOnRootBus {
|
||||
if err := q.arch.removeDeviceFromBridge(device.ID); err != nil {
|
||||
return err
|
||||
if q.state.HotPlugVFIO == config.BridgePort {
|
||||
if err := q.arch.removeDeviceFromBridge(device.ID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, device.ID)
|
||||
}
|
||||
|
||||
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, device.ID)
|
||||
|
||||
}
|
||||
|
||||
func (q *qemu) hotAddNetDevice(name, hardAddr string, VMFds, VhostFds []*os.File) error {
|
||||
@ -2881,7 +2789,6 @@ func (q *qemu) Save() (s hv.HypervisorState) {
|
||||
s.Type = string(QemuHypervisor)
|
||||
s.UUID = q.state.UUID
|
||||
s.HotpluggedMemory = q.state.HotpluggedMemory
|
||||
s.HotplugVFIOOnRootBus = q.state.HotplugVFIOOnRootBus
|
||||
|
||||
for _, bridge := range q.arch.getBridges() {
|
||||
s.Bridges = append(s.Bridges, hv.Bridge{
|
||||
@ -2903,7 +2810,6 @@ func (q *qemu) Save() (s hv.HypervisorState) {
|
||||
func (q *qemu) Load(s hv.HypervisorState) {
|
||||
q.state.UUID = s.UUID
|
||||
q.state.HotpluggedMemory = s.HotpluggedMemory
|
||||
q.state.HotplugVFIOOnRootBus = s.HotplugVFIOOnRootBus
|
||||
q.state.VirtiofsDaemonPid = s.VirtiofsDaemonPid
|
||||
|
||||
for _, bridge := range s.Bridges {
|
||||
|
@ -13,6 +13,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
@ -24,6 +25,11 @@ import (
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||
)
|
||||
|
||||
// A deeper PCIe topology than 5 is already not advisable just for the sake
|
||||
// of having enough buffer we limit ourselves to 10 and exit if we reach
|
||||
// the root bus
|
||||
const maxPCIeTopoDepth = 10
|
||||
|
||||
type qemuArch interface {
|
||||
// enableNestingChecks nesting checks will be honoured
|
||||
enableNestingChecks()
|
||||
@ -158,6 +164,12 @@ type qemuArch interface {
|
||||
// scans the PCIe space and returns the biggest BAR sizes for 32-bit
|
||||
// and 64-bit addressable memory
|
||||
getBARsMaxAddressableMemory() (uint64, uint64)
|
||||
|
||||
// Query QMP to find a device's PCI path given its QOM path or ID
|
||||
qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error)
|
||||
|
||||
// Query QMP to find the PCI slot of a device, given its QOM path or ID
|
||||
qomGetSlot(qomPath string, qmpCh *qmpChannel) (types.PciSlot, error)
|
||||
}
|
||||
|
||||
type qemuArchBase struct {
|
||||
@ -881,3 +893,85 @@ func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmwa
|
||||
hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture")
|
||||
return devices, firmware, nil
|
||||
}
|
||||
|
||||
// Query QMP to find the PCI slot of a device, given its QOM path or ID
|
||||
func (q *qemuArchBase) qomGetSlot(qomPath string, qmpCh *qmpChannel) (types.PciSlot, error) {
|
||||
addr, err := qmpCh.qmp.ExecQomGet(qmpCh.ctx, qomPath, "addr")
|
||||
if err != nil {
|
||||
return types.PciSlot{}, err
|
||||
}
|
||||
addrf, ok := addr.(float64)
|
||||
// XXX going via float makes no real sense, but that's how
|
||||
// JSON works, and we'll get away with it for the small values
|
||||
// we have here
|
||||
if !ok {
|
||||
return types.PciSlot{}, fmt.Errorf("addr QOM property of %q is %T not a number", qomPath, addr)
|
||||
}
|
||||
addri := int(addrf)
|
||||
|
||||
slotNum, funcNum := addri>>3, addri&0x7
|
||||
if funcNum != 0 {
|
||||
return types.PciSlot{}, fmt.Errorf("Unexpected non-zero PCI function (%02x.%1x) on %q",
|
||||
slotNum, funcNum, qomPath)
|
||||
}
|
||||
|
||||
return types.PciSlotFromInt(slotNum)
|
||||
}
|
||||
|
||||
// Query QMP to find a device's PCI path given its QOM path or ID
|
||||
func (q *qemuArchBase) qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error) {
|
||||
|
||||
var slots []types.PciSlot
|
||||
|
||||
devSlot, err := q.qomGetSlot(qemuID, qmpCh)
|
||||
if err != nil {
|
||||
return types.PciPath{}, err
|
||||
}
|
||||
slots = append(slots, devSlot)
|
||||
|
||||
// This only works for Q35 and Virt
|
||||
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
|
||||
|
||||
var parentPath = qemuID
|
||||
// We do not want to use a forever loop here, a deeper PCIe topology
|
||||
// than 5 is already not advisable just for the sake of having enough
|
||||
// buffer we limit ourselves to 10 and leave the loop early if we hit
|
||||
// the root bus.
|
||||
for i := 1; i <= maxPCIeTopoDepth; i++ {
|
||||
parenBusQOM, err := qmpCh.qmp.ExecQomGet(qmpCh.ctx, parentPath, "parent_bus")
|
||||
if err != nil {
|
||||
return types.PciPath{}, err
|
||||
}
|
||||
|
||||
busQOM, ok := parenBusQOM.(string)
|
||||
if !ok {
|
||||
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, parenBusQOM)
|
||||
}
|
||||
|
||||
// If we hit /machine/q35/pcie.0 we're done this is the root bus
|
||||
// we climbed the complete hierarchy
|
||||
if r.Match([]byte(busQOM)) {
|
||||
break
|
||||
}
|
||||
|
||||
// `bus` is the QOM path of the QOM bus object, but we need
|
||||
// the PCI parent_bus which manages that bus. There doesn't seem
|
||||
// to be a way to get that other than to simply drop the last
|
||||
// path component.
|
||||
idx := strings.LastIndex(busQOM, "/")
|
||||
if idx == -1 {
|
||||
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", busQOM)
|
||||
}
|
||||
parentBus := busQOM[:idx]
|
||||
|
||||
parentSlot, err := q.qomGetSlot(parentBus, qmpCh)
|
||||
if err != nil {
|
||||
return types.PciPath{}, err
|
||||
}
|
||||
|
||||
// Prepend the slots, since we're climbing the hierarchy
|
||||
slots = append([]types.PciSlot{parentSlot}, slots...)
|
||||
parentPath = parentBus
|
||||
}
|
||||
return types.PciPathFromSlots(slots...)
|
||||
}
|
||||
|
@ -351,3 +351,32 @@ func (q *qemuS390x) appendProtectionDevice(devices []govmmQemu.Device, firmware,
|
||||
return devices, firmware, fmt.Errorf("Unsupported guest protection technology: %v", q.protection)
|
||||
}
|
||||
}
|
||||
|
||||
func (q *qemuS390x) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device {
|
||||
if vfioDev.SysfsDev == "" {
|
||||
return devices
|
||||
}
|
||||
|
||||
if len(vfioDev.APDevices) > 0 {
|
||||
devices = append(devices,
|
||||
govmmQemu.VFIODevice{
|
||||
SysfsDev: vfioDev.SysfsDev,
|
||||
Transport: govmmQemu.TransportAP,
|
||||
},
|
||||
)
|
||||
return devices
|
||||
|
||||
}
|
||||
devices = append(devices,
|
||||
govmmQemu.VFIODevice{
|
||||
SysfsDev: vfioDev.SysfsDev,
|
||||
},
|
||||
)
|
||||
return devices
|
||||
}
|
||||
|
||||
// Query QMP to find a device's PCI path given its QOM path or ID
|
||||
func (q *qemuArchBase) qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error) {
|
||||
hvLogger.Warnf("qomGetPciPath not implemented for s390x")
|
||||
return types.PciPath{}, nil
|
||||
}
|
||||
|
@ -619,6 +619,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
// Aggregate all the containner devices for hot-plug and use them to dedcue
|
||||
// the correct amount of ports to reserve for the hypervisor.
|
||||
hotPlugVFIO := (sandboxConfig.HypervisorConfig.HotPlugVFIO != config.NoPort)
|
||||
stripVFIO := sandboxConfig.VfioMode == config.VFIOModeGuestKernel
|
||||
|
||||
var vfioDevices []config.DeviceInfo
|
||||
// vhost-user-block device is a PCIe device in Virt, keep track of it
|
||||
@ -644,7 +645,9 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
// We need to remove the devices marked for cold-plug
|
||||
// otherwise at the container level the kata-agent
|
||||
// will try to hot-plug them.
|
||||
sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
|
||||
if stripVFIO {
|
||||
sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
|
||||
}
|
||||
}
|
||||
}
|
||||
var filteredDevices []config.DeviceInfo
|
||||
@ -656,6 +659,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices
|
||||
|
||||
}
|
||||
|
||||
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
|
||||
sandboxConfig.HypervisorConfig.VhostUserBlkDevices = vhostUserBlkDevices
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user