mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-30 20:54:26 +00:00
s390x: Fixing device.Bus assignment
The device.Bus was reset if a specific combination of configuration parameters were not met. With the new PCIe topology this should not happen anymore Fixes: #7381 Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
parent
b6307c2744
commit
114542e2ba
@ -178,6 +178,11 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODe
|
|||||||
}
|
}
|
||||||
id := utils.MakeNameID("vfio", device.ID+strconv.Itoa(i), maxDevIDSize)
|
id := utils.MakeNameID("vfio", device.ID+strconv.Itoa(i), maxDevIDSize)
|
||||||
|
|
||||||
|
var vfio config.VFIODev
|
||||||
|
|
||||||
|
switch vfioDeviceType {
|
||||||
|
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||||
|
// This is vfio-pci and vfio-mdev specific
|
||||||
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
|
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
|
||||||
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
|
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
|
||||||
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
|
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
|
||||||
@ -189,11 +194,6 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODe
|
|||||||
if ignorePCIDevice {
|
if ignorePCIDevice {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
var vfio config.VFIODev
|
|
||||||
|
|
||||||
switch vfioDeviceType {
|
|
||||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
|
||||||
// Do not directly assign to `vfio` -- need to access field still
|
// Do not directly assign to `vfio` -- need to access field still
|
||||||
vfio = config.VFIODev{
|
vfio = config.VFIODev{
|
||||||
ID: id,
|
ID: id,
|
||||||
|
@ -70,6 +70,12 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for _, vfio := range device.VfioDevs {
|
for _, vfio := range device.VfioDevs {
|
||||||
|
// If vfio.Port is not set we bail out, users should set
|
||||||
|
// explicitly the port in the config file
|
||||||
|
if vfio.Port == "" {
|
||||||
|
return fmt.Errorf("cold_plug_vfio= or hot_plug_vfio= port is not set for device %s (BridgePort | RootPort | SwitchPort)", vfio.BDF)
|
||||||
|
}
|
||||||
|
|
||||||
if vfio.IsPCIe {
|
if vfio.IsPCIe {
|
||||||
busIndex := len(config.PCIeDevices[vfio.Port])
|
busIndex := len(config.PCIeDevices[vfio.Port])
|
||||||
vfio.Bus = fmt.Sprintf("%s%d", config.PCIePortPrefixMapping[vfio.Port], busIndex)
|
vfio.Bus = fmt.Sprintf("%s%d", config.PCIePortPrefixMapping[vfio.Port], busIndex)
|
||||||
|
@ -132,6 +132,8 @@ func TestAttachVFIODevice(t *testing.T) {
|
|||||||
HostPath: path,
|
HostPath: path,
|
||||||
ContainerPath: path,
|
ContainerPath: path,
|
||||||
DevType: "c",
|
DevType: "c",
|
||||||
|
ColdPlug: false,
|
||||||
|
Port: config.RootPort,
|
||||||
}
|
}
|
||||||
|
|
||||||
device, err := dm.NewDevice(deviceInfo)
|
device, err := dm.NewDevice(deviceInfo)
|
||||||
|
@ -163,6 +163,9 @@ const (
|
|||||||
|
|
||||||
// TransportMMIO is the MMIO transport for virtio devices.
|
// TransportMMIO is the MMIO transport for virtio devices.
|
||||||
TransportMMIO VirtioTransport = "mmio"
|
TransportMMIO VirtioTransport = "mmio"
|
||||||
|
|
||||||
|
// TransportAP is the AP transport for virtio devices.
|
||||||
|
TransportAP VirtioTransport = "ap"
|
||||||
)
|
)
|
||||||
|
|
||||||
// defaultTransport returns the default transport for the current combination
|
// defaultTransport returns the default transport for the current combination
|
||||||
@ -199,6 +202,14 @@ func (transport VirtioTransport) isVirtioCCW(config *Config) bool {
|
|||||||
return transport == TransportCCW
|
return transport == TransportCCW
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (transport VirtioTransport) isVirtioAP(config *Config) bool {
|
||||||
|
if transport == "" {
|
||||||
|
transport = transport.defaultTransport(config)
|
||||||
|
}
|
||||||
|
|
||||||
|
return transport == TransportAP
|
||||||
|
}
|
||||||
|
|
||||||
// getName returns the name of the current transport.
|
// getName returns the name of the current transport.
|
||||||
func (transport VirtioTransport) getName(config *Config) string {
|
func (transport VirtioTransport) getName(config *Config) string {
|
||||||
if transport == "" {
|
if transport == "" {
|
||||||
@ -1811,6 +1822,9 @@ type VFIODevice struct {
|
|||||||
|
|
||||||
// Transport is the virtio transport for this device.
|
// Transport is the virtio transport for this device.
|
||||||
Transport VirtioTransport
|
Transport VirtioTransport
|
||||||
|
|
||||||
|
// SysfsDev specifies the sysfs matrix entry for the AP device
|
||||||
|
SysfsDev string
|
||||||
}
|
}
|
||||||
|
|
||||||
// VFIODeviceTransport is a map of the vfio device name that corresponds to
|
// VFIODeviceTransport is a map of the vfio device name that corresponds to
|
||||||
@ -1819,11 +1833,13 @@ var VFIODeviceTransport = map[VirtioTransport]string{
|
|||||||
TransportPCI: "vfio-pci",
|
TransportPCI: "vfio-pci",
|
||||||
TransportCCW: "vfio-ccw",
|
TransportCCW: "vfio-ccw",
|
||||||
TransportMMIO: "vfio-device",
|
TransportMMIO: "vfio-device",
|
||||||
|
TransportAP: "vfio-ap",
|
||||||
}
|
}
|
||||||
|
|
||||||
// Valid returns true if the VFIODevice structure is valid and complete.
|
// Valid returns true if the VFIODevice structure is valid and complete.
|
||||||
|
// s390x architecture requires SysfsDev to be set.
|
||||||
func (vfioDev VFIODevice) Valid() bool {
|
func (vfioDev VFIODevice) Valid() bool {
|
||||||
return vfioDev.BDF != ""
|
return vfioDev.BDF != "" || vfioDev.SysfsDev != ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// QemuParams returns the qemu parameters built out of this vfio device.
|
// QemuParams returns the qemu parameters built out of this vfio device.
|
||||||
@ -1833,6 +1849,15 @@ func (vfioDev VFIODevice) QemuParams(config *Config) []string {
|
|||||||
|
|
||||||
driver := vfioDev.deviceName(config)
|
driver := vfioDev.deviceName(config)
|
||||||
|
|
||||||
|
if vfioDev.Transport.isVirtioAP(config) {
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("%s,sysfsdev=%s", driver, vfioDev.SysfsDev))
|
||||||
|
|
||||||
|
qemuParams = append(qemuParams, "-device")
|
||||||
|
qemuParams = append(qemuParams, strings.Join(deviceParams, ","))
|
||||||
|
|
||||||
|
return qemuParams
|
||||||
|
}
|
||||||
|
|
||||||
deviceParams = append(deviceParams, fmt.Sprintf("%s,host=%s", driver, vfioDev.BDF))
|
deviceParams = append(deviceParams, fmt.Sprintf("%s,host=%s", driver, vfioDev.BDF))
|
||||||
if vfioDev.Transport.isVirtioPCI(config) {
|
if vfioDev.Transport.isVirtioPCI(config) {
|
||||||
if vfioDev.VendorID != "" {
|
if vfioDev.VendorID != "" {
|
||||||
@ -2837,10 +2862,9 @@ func (config *Config) appendDevices(logger QMPLog) {
|
|||||||
|
|
||||||
for _, d := range config.Devices {
|
for _, d := range config.Devices {
|
||||||
if !d.Valid() {
|
if !d.Valid() {
|
||||||
logger.Errorf("vm device is not valid: %+v", config.Devices)
|
logger.Errorf("vm device is not valid: %+v", d)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
config.qemuParams = append(config.qemuParams, d.QemuParams(config)...)
|
config.qemuParams = append(config.qemuParams, d.QemuParams(config)...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1217,10 +1217,11 @@ func (q *QMP) ExecutePCIVFIOMediatedDeviceAdd(ctx context.Context, devID, sysfsd
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ExecuteAPVFIOMediatedDeviceAdd adds a VFIO mediated AP device to a QEMU instance using the device_add command.
|
// ExecuteAPVFIOMediatedDeviceAdd adds a VFIO mediated AP device to a QEMU instance using the device_add command.
|
||||||
func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string) error {
|
func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string, devID string) error {
|
||||||
args := map[string]interface{}{
|
args := map[string]interface{}{
|
||||||
"driver": VfioAP,
|
"driver": VfioAP,
|
||||||
"sysfsdev": sysfsdev,
|
"sysfsdev": sysfsdev,
|
||||||
|
"id": devID,
|
||||||
}
|
}
|
||||||
return q.executeCommand(ctx, "device_add", args, nil)
|
return q.executeCommand(ctx, "device_add", args, nil)
|
||||||
}
|
}
|
||||||
|
@ -1128,7 +1128,7 @@ func TestQMPAPVFIOMediatedDeviceAdd(t *testing.T) {
|
|||||||
q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh)
|
q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh)
|
||||||
checkVersion(t, connectedCh)
|
checkVersion(t, connectedCh)
|
||||||
sysfsDev := "/sys/devices/vfio_ap/matrix/a297db4a-f4c2-11e6-90f6-d3b88d6c9525"
|
sysfsDev := "/sys/devices/vfio_ap/matrix/a297db4a-f4c2-11e6-90f6-d3b88d6c9525"
|
||||||
err := q.ExecuteAPVFIOMediatedDeviceAdd(context.Background(), sysfsDev)
|
err := q.ExecuteAPVFIOMediatedDeviceAdd(context.Background(), sysfsDev, "test-id")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Unexpected error %v", err)
|
t.Fatalf("Unexpected error %v", err)
|
||||||
}
|
}
|
||||||
|
@ -869,6 +869,23 @@ func (c *Container) create(ctx context.Context) (err error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If cold-plug we've attached the devices already, do not try to
|
||||||
|
// attach them a second time.
|
||||||
|
coldPlugVFIO := (c.sandbox.config.HypervisorConfig.ColdPlugVFIO != config.NoPort)
|
||||||
|
if coldPlugVFIO {
|
||||||
|
var cntDevices []ContainerDevice
|
||||||
|
for _, dev := range c.devices {
|
||||||
|
if strings.HasPrefix(dev.ContainerPath, vfioPath) {
|
||||||
|
c.Logger().WithFields(logrus.Fields{
|
||||||
|
"device": dev,
|
||||||
|
}).Info("Remvoing device since we're cold-plugging no Attach needed")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
cntDevices = append(cntDevices, dev)
|
||||||
|
}
|
||||||
|
c.devices = cntDevices
|
||||||
|
}
|
||||||
|
|
||||||
c.Logger().WithFields(logrus.Fields{
|
c.Logger().WithFields(logrus.Fields{
|
||||||
"devices": c.devices,
|
"devices": c.devices,
|
||||||
}).Info("Attach devices")
|
}).Info("Attach devices")
|
||||||
|
@ -65,11 +65,6 @@ const romFile = ""
|
|||||||
// Default value is false.
|
// Default value is false.
|
||||||
const defaultDisableModern = false
|
const defaultDisableModern = false
|
||||||
|
|
||||||
// A deeper PCIe topology than 5 is already not advisable just for the sake
|
|
||||||
// of having enough buffer we limit ourselves to 10 and exit if we reach
|
|
||||||
// the root bus
|
|
||||||
const maxPCIeTopoDepth = 10
|
|
||||||
|
|
||||||
type qmpChannel struct {
|
type qmpChannel struct {
|
||||||
qmp *govmmQemu.QMP
|
qmp *govmmQemu.QMP
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
@ -86,7 +81,6 @@ type QemuState struct {
|
|||||||
HotpluggedVCPUs []hv.CPUDevice
|
HotpluggedVCPUs []hv.CPUDevice
|
||||||
HotpluggedMemory int
|
HotpluggedMemory int
|
||||||
VirtiofsDaemonPid int
|
VirtiofsDaemonPid int
|
||||||
HotplugVFIOOnRootBus bool
|
|
||||||
HotplugVFIO config.PCIePort
|
HotplugVFIO config.PCIePort
|
||||||
ColdPlugVFIO config.PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
}
|
}
|
||||||
@ -289,7 +283,6 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
|
|||||||
q.state.UUID = uuid.Generate().String()
|
q.state.UUID = uuid.Generate().String()
|
||||||
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
||||||
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
|
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
|
||||||
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
|
|
||||||
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
||||||
|
|
||||||
// The path might already exist, but in case of VM templating,
|
// The path might already exist, but in case of VM templating,
|
||||||
@ -792,7 +785,7 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus)
|
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort)
|
||||||
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
|
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
|
||||||
|
|
||||||
numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices)
|
numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices)
|
||||||
@ -1638,7 +1631,7 @@ func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.V
|
|||||||
config.PCIeDevices[config.RootPort][devID] = true
|
config.PCIeDevices[config.RootPort][devID] = true
|
||||||
|
|
||||||
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeID)
|
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeID)
|
||||||
bridgeSlot, err := q.qomGetSlot(bridgeQomPath)
|
bridgeSlot, err := q.arch.qomGetSlot(bridgeQomPath, &q.qmpMonitorCh)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -1741,88 +1734,6 @@ func (q *qemu) hotplugVhostUserDevice(ctx context.Context, vAttr *config.VhostUs
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Query QMP to find the PCI slot of a device, given its QOM path or ID
|
|
||||||
func (q *qemu) qomGetSlot(qomPath string) (types.PciSlot, error) {
|
|
||||||
addr, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qomPath, "addr")
|
|
||||||
if err != nil {
|
|
||||||
return types.PciSlot{}, err
|
|
||||||
}
|
|
||||||
addrf, ok := addr.(float64)
|
|
||||||
// XXX going via float makes no real sense, but that's how
|
|
||||||
// JSON works, and we'll get away with it for the small values
|
|
||||||
// we have here
|
|
||||||
if !ok {
|
|
||||||
return types.PciSlot{}, fmt.Errorf("addr QOM property of %q is %T not a number", qomPath, addr)
|
|
||||||
}
|
|
||||||
addri := int(addrf)
|
|
||||||
|
|
||||||
slotNum, funcNum := addri>>3, addri&0x7
|
|
||||||
if funcNum != 0 {
|
|
||||||
return types.PciSlot{}, fmt.Errorf("Unexpected non-zero PCI function (%02x.%1x) on %q",
|
|
||||||
slotNum, funcNum, qomPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
return types.PciSlotFromInt(slotNum)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Query QMP to find a device's PCI path given its QOM path or ID
|
|
||||||
func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
|
|
||||||
|
|
||||||
var slots []types.PciSlot
|
|
||||||
|
|
||||||
devSlot, err := q.qomGetSlot(qemuID)
|
|
||||||
if err != nil {
|
|
||||||
return types.PciPath{}, err
|
|
||||||
}
|
|
||||||
slots = append(slots, devSlot)
|
|
||||||
|
|
||||||
// This only works for Q35 and Virt
|
|
||||||
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
|
|
||||||
|
|
||||||
var parentPath = qemuID
|
|
||||||
// We do not want to use a forever loop here, a deeper PCIe topology
|
|
||||||
// than 5 is already not advisable just for the sake of having enough
|
|
||||||
// buffer we limit ourselves to 10 and leave the loop early if we hit
|
|
||||||
// the root bus.
|
|
||||||
for i := 1; i <= maxPCIeTopoDepth; i++ {
|
|
||||||
parenBusQOM, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, parentPath, "parent_bus")
|
|
||||||
if err != nil {
|
|
||||||
return types.PciPath{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
busQOM, ok := parenBusQOM.(string)
|
|
||||||
if !ok {
|
|
||||||
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, parenBusQOM)
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we hit /machine/q35/pcie.0 we're done this is the root bus
|
|
||||||
// we climbed the complete hierarchy
|
|
||||||
if r.Match([]byte(busQOM)) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// `bus` is the QOM path of the QOM bus object, but we need
|
|
||||||
// the PCI parent_bus which manages that bus. There doesn't seem
|
|
||||||
// to be a way to get that other than to simply drop the last
|
|
||||||
// path component.
|
|
||||||
idx := strings.LastIndex(busQOM, "/")
|
|
||||||
if idx == -1 {
|
|
||||||
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", busQOM)
|
|
||||||
}
|
|
||||||
parentBus := busQOM[:idx]
|
|
||||||
|
|
||||||
parentSlot, err := q.qomGetSlot(parentBus)
|
|
||||||
if err != nil {
|
|
||||||
return types.PciPath{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prepend the slots, since we're climbing the hierarchy
|
|
||||||
slots = append([]types.PciSlot{parentSlot}, slots...)
|
|
||||||
parentPath = parentBus
|
|
||||||
}
|
|
||||||
return types.PciPathFromSlots(slots...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (q *qemu) hotplugVFIODeviceRootPort(ctx context.Context, device *config.VFIODev) (err error) {
|
func (q *qemu) hotplugVFIODeviceRootPort(ctx context.Context, device *config.VFIODev) (err error) {
|
||||||
return q.executeVFIODeviceAdd(device)
|
return q.executeVFIODeviceAdd(device)
|
||||||
}
|
}
|
||||||
@ -1852,7 +1763,7 @@ func (q *qemu) executePCIVFIODeviceAdd(device *config.VFIODev, addr string, brid
|
|||||||
case config.VFIOPCIDeviceMediatedType:
|
case config.VFIOPCIDeviceMediatedType:
|
||||||
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, addr, bridgeID, romFile)
|
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, addr, bridgeID, romFile)
|
||||||
case config.VFIOAPDeviceMediatedType:
|
case config.VFIOAPDeviceMediatedType:
|
||||||
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev, device.ID)
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("Incorrect VFIO device type found")
|
return fmt.Errorf("Incorrect VFIO device type found")
|
||||||
}
|
}
|
||||||
@ -1865,7 +1776,7 @@ func (q *qemu) executeVFIODeviceAdd(device *config.VFIODev) error {
|
|||||||
case config.VFIOPCIDeviceMediatedType:
|
case config.VFIOPCIDeviceMediatedType:
|
||||||
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, "", device.Bus, romFile)
|
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, "", device.Bus, romFile)
|
||||||
case config.VFIOAPDeviceMediatedType:
|
case config.VFIOAPDeviceMediatedType:
|
||||||
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev, device.ID)
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("Incorrect VFIO device type found")
|
return fmt.Errorf("Incorrect VFIO device type found")
|
||||||
}
|
}
|
||||||
@ -1883,46 +1794,43 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
|||||||
"hot-plug-vfio": q.state.HotPlugVFIO,
|
"hot-plug-vfio": q.state.HotPlugVFIO,
|
||||||
"device-info": string(buf),
|
"device-info": string(buf),
|
||||||
}).Info("Start hot-plug VFIO device")
|
}).Info("Start hot-plug VFIO device")
|
||||||
// In case MachineType is q35, a PCIe device is hotplugged on
|
|
||||||
// a PCIe Root Port or alternatively on a PCIe Switch Port
|
err = fmt.Errorf("Incorrect hot plug configuration %v for device %v found", q.state.HotPlugVFIO, device)
|
||||||
if q.HypervisorConfig().HypervisorMachineType != QemuQ35 && q.HypervisorConfig().HypervisorMachineType != QemuVirt {
|
|
||||||
device.Bus = ""
|
|
||||||
} else {
|
|
||||||
var err error
|
|
||||||
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
|
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
|
||||||
// for pc machine type instead of bridge. This is useful for devices that require
|
// for pc machine type instead of bridge. This is useful for devices that require
|
||||||
// a large PCI BAR which is a currently a limitation with PCI bridges.
|
// a large PCI BAR which is a currently a limitation with PCI bridges.
|
||||||
if q.state.HotPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus {
|
if q.state.HotPlugVFIO == config.RootPort {
|
||||||
err = q.hotplugVFIODeviceRootPort(ctx, device)
|
err = q.hotplugVFIODeviceRootPort(ctx, device)
|
||||||
} else if q.state.HotPlugVFIO == config.SwitchPort {
|
} else if q.state.HotPlugVFIO == config.SwitchPort {
|
||||||
err = q.hotplugVFIODeviceSwitchPort(ctx, device)
|
err = q.hotplugVFIODeviceSwitchPort(ctx, device)
|
||||||
} else {
|
} else if q.state.HotPlugVFIO == config.BridgePort {
|
||||||
err = q.hotplugVFIODeviceBridgePort(ctx, device)
|
err = q.hotplugVFIODeviceBridgePort(ctx, device)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
|
||||||
// XXX: Depending on whether we're doing root port or
|
// Depending on whether we're doing root port or
|
||||||
// bridge hotplug, and how the bridge is set up in
|
// bridge hotplug, and how the bridge is set up in
|
||||||
// other parts of the code, we may or may not already
|
// other parts of the code, we may or may not already
|
||||||
// have information about the slot number of the
|
// have information about the slot number of the
|
||||||
// bridge and or the device. For simplicity, just
|
// bridge and or the device. For simplicity, just
|
||||||
// query both of them back from qemu
|
// query both of them back from qemu based on the arch
|
||||||
device.GuestPciPath, err = q.qomGetPciPath(device.ID)
|
device.GuestPciPath, err = q.arch.qomGetPciPath(device.ID, &q.qmpMonitorCh)
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
} else {
|
||||||
|
|
||||||
q.Logger().WithField("dev-id", device.ID).Info("Start hot-unplug VFIO device")
|
q.Logger().WithField("dev-id", device.ID).Info("Start hot-unplug VFIO device")
|
||||||
|
|
||||||
if !q.state.HotplugVFIOOnRootBus {
|
if q.state.HotPlugVFIO == config.BridgePort {
|
||||||
if err := q.arch.removeDeviceFromBridge(device.ID); err != nil {
|
if err := q.arch.removeDeviceFromBridge(device.ID); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, device.ID)
|
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, device.ID)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *qemu) hotAddNetDevice(name, hardAddr string, VMFds, VhostFds []*os.File) error {
|
func (q *qemu) hotAddNetDevice(name, hardAddr string, VMFds, VhostFds []*os.File) error {
|
||||||
@ -2881,7 +2789,6 @@ func (q *qemu) Save() (s hv.HypervisorState) {
|
|||||||
s.Type = string(QemuHypervisor)
|
s.Type = string(QemuHypervisor)
|
||||||
s.UUID = q.state.UUID
|
s.UUID = q.state.UUID
|
||||||
s.HotpluggedMemory = q.state.HotpluggedMemory
|
s.HotpluggedMemory = q.state.HotpluggedMemory
|
||||||
s.HotplugVFIOOnRootBus = q.state.HotplugVFIOOnRootBus
|
|
||||||
|
|
||||||
for _, bridge := range q.arch.getBridges() {
|
for _, bridge := range q.arch.getBridges() {
|
||||||
s.Bridges = append(s.Bridges, hv.Bridge{
|
s.Bridges = append(s.Bridges, hv.Bridge{
|
||||||
@ -2903,7 +2810,6 @@ func (q *qemu) Save() (s hv.HypervisorState) {
|
|||||||
func (q *qemu) Load(s hv.HypervisorState) {
|
func (q *qemu) Load(s hv.HypervisorState) {
|
||||||
q.state.UUID = s.UUID
|
q.state.UUID = s.UUID
|
||||||
q.state.HotpluggedMemory = s.HotpluggedMemory
|
q.state.HotpluggedMemory = s.HotpluggedMemory
|
||||||
q.state.HotplugVFIOOnRootBus = s.HotplugVFIOOnRootBus
|
|
||||||
q.state.VirtiofsDaemonPid = s.VirtiofsDaemonPid
|
q.state.VirtiofsDaemonPid = s.VirtiofsDaemonPid
|
||||||
|
|
||||||
for _, bridge := range s.Bridges {
|
for _, bridge := range s.Bridges {
|
||||||
|
@ -13,6 +13,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"regexp"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@ -24,6 +25,11 @@ import (
|
|||||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// A deeper PCIe topology than 5 is already not advisable just for the sake
|
||||||
|
// of having enough buffer we limit ourselves to 10 and exit if we reach
|
||||||
|
// the root bus
|
||||||
|
const maxPCIeTopoDepth = 10
|
||||||
|
|
||||||
type qemuArch interface {
|
type qemuArch interface {
|
||||||
// enableNestingChecks nesting checks will be honoured
|
// enableNestingChecks nesting checks will be honoured
|
||||||
enableNestingChecks()
|
enableNestingChecks()
|
||||||
@ -158,6 +164,12 @@ type qemuArch interface {
|
|||||||
// scans the PCIe space and returns the biggest BAR sizes for 32-bit
|
// scans the PCIe space and returns the biggest BAR sizes for 32-bit
|
||||||
// and 64-bit addressable memory
|
// and 64-bit addressable memory
|
||||||
getBARsMaxAddressableMemory() (uint64, uint64)
|
getBARsMaxAddressableMemory() (uint64, uint64)
|
||||||
|
|
||||||
|
// Query QMP to find a device's PCI path given its QOM path or ID
|
||||||
|
qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error)
|
||||||
|
|
||||||
|
// Query QMP to find the PCI slot of a device, given its QOM path or ID
|
||||||
|
qomGetSlot(qomPath string, qmpCh *qmpChannel) (types.PciSlot, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type qemuArchBase struct {
|
type qemuArchBase struct {
|
||||||
@ -881,3 +893,85 @@ func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmwa
|
|||||||
hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture")
|
hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture")
|
||||||
return devices, firmware, nil
|
return devices, firmware, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Query QMP to find the PCI slot of a device, given its QOM path or ID
|
||||||
|
func (q *qemuArchBase) qomGetSlot(qomPath string, qmpCh *qmpChannel) (types.PciSlot, error) {
|
||||||
|
addr, err := qmpCh.qmp.ExecQomGet(qmpCh.ctx, qomPath, "addr")
|
||||||
|
if err != nil {
|
||||||
|
return types.PciSlot{}, err
|
||||||
|
}
|
||||||
|
addrf, ok := addr.(float64)
|
||||||
|
// XXX going via float makes no real sense, but that's how
|
||||||
|
// JSON works, and we'll get away with it for the small values
|
||||||
|
// we have here
|
||||||
|
if !ok {
|
||||||
|
return types.PciSlot{}, fmt.Errorf("addr QOM property of %q is %T not a number", qomPath, addr)
|
||||||
|
}
|
||||||
|
addri := int(addrf)
|
||||||
|
|
||||||
|
slotNum, funcNum := addri>>3, addri&0x7
|
||||||
|
if funcNum != 0 {
|
||||||
|
return types.PciSlot{}, fmt.Errorf("Unexpected non-zero PCI function (%02x.%1x) on %q",
|
||||||
|
slotNum, funcNum, qomPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
return types.PciSlotFromInt(slotNum)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query QMP to find a device's PCI path given its QOM path or ID
|
||||||
|
func (q *qemuArchBase) qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error) {
|
||||||
|
|
||||||
|
var slots []types.PciSlot
|
||||||
|
|
||||||
|
devSlot, err := q.qomGetSlot(qemuID, qmpCh)
|
||||||
|
if err != nil {
|
||||||
|
return types.PciPath{}, err
|
||||||
|
}
|
||||||
|
slots = append(slots, devSlot)
|
||||||
|
|
||||||
|
// This only works for Q35 and Virt
|
||||||
|
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
|
||||||
|
|
||||||
|
var parentPath = qemuID
|
||||||
|
// We do not want to use a forever loop here, a deeper PCIe topology
|
||||||
|
// than 5 is already not advisable just for the sake of having enough
|
||||||
|
// buffer we limit ourselves to 10 and leave the loop early if we hit
|
||||||
|
// the root bus.
|
||||||
|
for i := 1; i <= maxPCIeTopoDepth; i++ {
|
||||||
|
parenBusQOM, err := qmpCh.qmp.ExecQomGet(qmpCh.ctx, parentPath, "parent_bus")
|
||||||
|
if err != nil {
|
||||||
|
return types.PciPath{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
busQOM, ok := parenBusQOM.(string)
|
||||||
|
if !ok {
|
||||||
|
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, parenBusQOM)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we hit /machine/q35/pcie.0 we're done this is the root bus
|
||||||
|
// we climbed the complete hierarchy
|
||||||
|
if r.Match([]byte(busQOM)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// `bus` is the QOM path of the QOM bus object, but we need
|
||||||
|
// the PCI parent_bus which manages that bus. There doesn't seem
|
||||||
|
// to be a way to get that other than to simply drop the last
|
||||||
|
// path component.
|
||||||
|
idx := strings.LastIndex(busQOM, "/")
|
||||||
|
if idx == -1 {
|
||||||
|
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", busQOM)
|
||||||
|
}
|
||||||
|
parentBus := busQOM[:idx]
|
||||||
|
|
||||||
|
parentSlot, err := q.qomGetSlot(parentBus, qmpCh)
|
||||||
|
if err != nil {
|
||||||
|
return types.PciPath{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prepend the slots, since we're climbing the hierarchy
|
||||||
|
slots = append([]types.PciSlot{parentSlot}, slots...)
|
||||||
|
parentPath = parentBus
|
||||||
|
}
|
||||||
|
return types.PciPathFromSlots(slots...)
|
||||||
|
}
|
||||||
|
@ -351,3 +351,32 @@ func (q *qemuS390x) appendProtectionDevice(devices []govmmQemu.Device, firmware,
|
|||||||
return devices, firmware, fmt.Errorf("Unsupported guest protection technology: %v", q.protection)
|
return devices, firmware, fmt.Errorf("Unsupported guest protection technology: %v", q.protection)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (q *qemuS390x) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device {
|
||||||
|
if vfioDev.SysfsDev == "" {
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(vfioDev.APDevices) > 0 {
|
||||||
|
devices = append(devices,
|
||||||
|
govmmQemu.VFIODevice{
|
||||||
|
SysfsDev: vfioDev.SysfsDev,
|
||||||
|
Transport: govmmQemu.TransportAP,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return devices
|
||||||
|
|
||||||
|
}
|
||||||
|
devices = append(devices,
|
||||||
|
govmmQemu.VFIODevice{
|
||||||
|
SysfsDev: vfioDev.SysfsDev,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query QMP to find a device's PCI path given its QOM path or ID
|
||||||
|
func (q *qemuArchBase) qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error) {
|
||||||
|
hvLogger.Warnf("qomGetPciPath not implemented for s390x")
|
||||||
|
return types.PciPath{}, nil
|
||||||
|
}
|
||||||
|
@ -619,6 +619,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
|||||||
// Aggregate all the containner devices for hot-plug and use them to dedcue
|
// Aggregate all the containner devices for hot-plug and use them to dedcue
|
||||||
// the correct amount of ports to reserve for the hypervisor.
|
// the correct amount of ports to reserve for the hypervisor.
|
||||||
hotPlugVFIO := (sandboxConfig.HypervisorConfig.HotPlugVFIO != config.NoPort)
|
hotPlugVFIO := (sandboxConfig.HypervisorConfig.HotPlugVFIO != config.NoPort)
|
||||||
|
stripVFIO := sandboxConfig.VfioMode == config.VFIOModeGuestKernel
|
||||||
|
|
||||||
var vfioDevices []config.DeviceInfo
|
var vfioDevices []config.DeviceInfo
|
||||||
// vhost-user-block device is a PCIe device in Virt, keep track of it
|
// vhost-user-block device is a PCIe device in Virt, keep track of it
|
||||||
@ -644,9 +645,11 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
|||||||
// We need to remove the devices marked for cold-plug
|
// We need to remove the devices marked for cold-plug
|
||||||
// otherwise at the container level the kata-agent
|
// otherwise at the container level the kata-agent
|
||||||
// will try to hot-plug them.
|
// will try to hot-plug them.
|
||||||
|
if stripVFIO {
|
||||||
sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
|
sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
var filteredDevices []config.DeviceInfo
|
var filteredDevices []config.DeviceInfo
|
||||||
for _, device := range containers.DeviceInfos {
|
for _, device := range containers.DeviceInfos {
|
||||||
if device.ID != "remove-we-are-cold-plugging" {
|
if device.ID != "remove-we-are-cold-plugging" {
|
||||||
@ -656,6 +659,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
|||||||
sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices
|
sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
|
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
|
||||||
sandboxConfig.HypervisorConfig.VhostUserBlkDevices = vhostUserBlkDevices
|
sandboxConfig.HypervisorConfig.VhostUserBlkDevices = vhostUserBlkDevices
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user