gpu: Various fixes for virt machine type

The PCI qom path was not deduced correctly added regex for correct
path walking.

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
Zvonko Kaiser 2023-05-12 09:58:54 +00:00
parent 40101ea7db
commit b11246c3aa
5 changed files with 45 additions and 28 deletions

View File

@ -214,14 +214,18 @@ func GetVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceS
switch vfioDeviceType {
case config.VFIOPCIDeviceNormalType:
// Get bdf of device eg. 0000:00:1c.0
deviceBDF = getBDF(deviceFileName)
//deviceBDF = getBDF(deviceFileName)
// The old implementation did not consider the case where
// vfio devices are located on differente root busses. The
// kata-agent will handle the case now, here use the full PCI addr
deviceBDF = deviceFileName
// Get sysfs path used by cloud-hypervisor
deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName)
case config.VFIOPCIDeviceMediatedType:
// Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
deviceBDF = getBDF(getMediatedBDF(deviceSysfsDev))
deviceBDF = GetBDF(getMediatedBDF(deviceSysfsDev))
case config.VFIOAPDeviceMediatedType:
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
@ -244,7 +248,7 @@ func getMediatedBDF(deviceSysfsDev string) string {
// getBDF returns the BDF of pci device
// Expected input string format is [<domain>]:[<bus>][<slot>].[<func>] eg. 0000:02:10.0
func getBDF(deviceSysStr string) string {
func GetBDF(deviceSysStr string) string {
tokens := strings.SplitN(deviceSysStr, ":", 2)
if len(tokens) == 1 {
return ""

View File

@ -21,6 +21,7 @@ import (
"github.com/docker/go-units"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/api"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers"
volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/pkg/uuid"
@ -1152,7 +1153,9 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
kataDevice.Type = kataVfioApDevType
kataDevice.Options = dev.APDevices
} else {
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", dev.BDF, dev.GuestPciPath)
devBDF := drivers.GetBDF(dev.BDF)
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", devBDF, dev.GuestPciPath)
}
}

View File

@ -239,32 +239,40 @@ func evalMountPath(source, destination string) (string, string, error) {
// Mount describes a container mount.
// nolint: govet
type Mount struct {
// FSGroup a group ID that the group ownership of the files for the mounted volume
// will need to be changed when set.
FSGroup *int
// Source is the source of the mount.
Source string
// Destination is the destination of the mount (within the container).
Destination string
// Type specifies the type of filesystem to mount.
Type string
// HostPath used to store host side bind mount path
HostPath string
// GuestDeviceMount represents the path within the VM that the device
// is mounted. Only relevant for block devices. This is tracked in the event
// runtime wants to query the agent for mount stats.
GuestDeviceMount string
// BlockDeviceID represents block device that is attached to the
// VM in case this mount is a block device file or a directory
// backed by a block device.
BlockDeviceID string
// Type specifies the type of filesystem to mount.
Type string
// Options list all the mount options of the filesystem.
Options []string
// ReadOnly specifies if the mount should be read only or not
ReadOnly bool
// FSGroup a group ID that the group ownership of the files for the mounted volume
// will need to be changed when set.
FSGroup *int
// FSGroupChangePolicy specifies the policy that will be used when applying
// group id ownership change for a volume.
FSGroupChangePolicy volume.FSGroupChangePolicy
// Options list all the mount options of the filesystem.
Options []string
// ReadOnly specifies if the mount should be read only or not
ReadOnly bool
}
func isSymlink(path string) bool {

View File

@ -701,7 +701,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
}
}
if machine.Type == QemuQ35 {
if machine.Type == QemuQ35 || machine.Type == QemuVirt {
if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig); err != nil {
q.Logger().WithError(err).Errorf("Cannot create PCIe topology")
return err
@ -747,7 +747,6 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
return nil
}
q.Logger().Info("### PCIe Topology ###")
// Add PCIe Root Port or PCIe Switches to the hypervisor
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged
// into a PCIe Root Port or PCIe Switch.
@ -780,16 +779,12 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
if err != nil {
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
}
q.Logger().Info("### PCIe Topology devices ", devicesPerIOMMUGroup)
for _, vfioDevice := range devicesPerIOMMUGroup {
q.Logger().Info("### PCIe Topology vfioDevice ", vfioDevice)
if drivers.IsPCIeDevice(vfioDevice.BDF) {
numOfPluggablePorts = numOfPluggablePorts + 1
}
}
}
q.Logger().Info("### PCIe Topology numOfPluggablePorts ", numOfPluggablePorts)
// If number of PCIe root ports > 16 then bail out otherwise we may
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
// cannot be added which are crucial for Kata max slots on root bus is 32
@ -798,7 +793,7 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
}
if numOfPluggablePorts > maxPCIeSwitchPort {
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeRootPort)
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
}
if q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus {
@ -1757,6 +1752,8 @@ func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
}
slots = append(slots, devSlot)
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
var parentPath = qemuID
// We do not want to use a forever loop here, a deeper PCIe topology
// than 5 is already not advisable just for the sake of having enough
@ -1775,7 +1772,7 @@ func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
// If we hit /machine/q35/pcie.0 we're done this is the root bus
// we climbed the complete hierarchy
if strings.Contains(busQOM, "/machine/q35/pcie.0") {
if r.Match([]byte(busQOM)) {
break
}
@ -1863,7 +1860,7 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
}).Info("Start hot-plug VFIO device")
// In case MachineType is q35, a PCIe device is hotplugged on
// a PCIe Root Port or alternatively on a PCIe Switch Port
if q.HypervisorConfig().HypervisorMachineType != QemuQ35 {
if q.HypervisorConfig().HypervisorMachineType != QemuQ35 && q.HypervisorConfig().HypervisorMachineType != QemuVirt {
device.Bus = ""
} else {
var err error
@ -2636,9 +2633,9 @@ func genericAppendPCIeRootPort(devices []govmmQemu.Device, number uint32, machin
// genericAppendPCIeSwitch adds a PCIe Swtich
func genericAppendPCIeSwitchPort(devices []govmmQemu.Device, number uint32, machineType string, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device {
// Q35 has the correct PCIe support,
// Q35, Virt have the correct PCIe support,
// hence ignore all other machines
if machineType != QemuQ35 {
if machineType != QemuQ35 && machineType != QemuVirt {
return devices
}

View File

@ -630,18 +630,23 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
}
if coldPlugVFIO && isVFIO {
s.Logger().Info("### coldplug and vfio ", device, "coldplug ", sandboxConfig.HypervisorConfig.ColdPlugVFIO)
device.ColdPlug = true
device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO
vfioDevices = append(vfioDevices, device)
// We need to remove the devices marked for cold-plug
// otherwise at the container level the kata-agent
// will try to hot-plug them.
infos := sandboxConfig.Containers[cnt].DeviceInfos
infos = append(infos[:dev], infos[dev+1:]...)
sandboxConfig.Containers[cnt].DeviceInfos = infos
sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
}
}
var filteredDevices []config.DeviceInfo
for _, device := range containers.DeviceInfos {
if device.ID != "remove-we-are-cold-plugging" {
filteredDevices = append(filteredDevices, device)
}
}
sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices
}
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices