gpu: Various fixes for virt machine type

The PCI qom path was not deduced correctly added regex for correct
path walking.

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
Zvonko Kaiser 2023-05-12 09:58:54 +00:00
parent 40101ea7db
commit b11246c3aa
5 changed files with 45 additions and 28 deletions

View File

@ -214,14 +214,18 @@ func GetVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceS
switch vfioDeviceType { switch vfioDeviceType {
case config.VFIOPCIDeviceNormalType: case config.VFIOPCIDeviceNormalType:
// Get bdf of device eg. 0000:00:1c.0 // Get bdf of device eg. 0000:00:1c.0
deviceBDF = getBDF(deviceFileName) //deviceBDF = getBDF(deviceFileName)
// The old implementation did not consider the case where
// vfio devices are located on differente root busses. The
// kata-agent will handle the case now, here use the full PCI addr
deviceBDF = deviceFileName
// Get sysfs path used by cloud-hypervisor // Get sysfs path used by cloud-hypervisor
deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName) deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName)
case config.VFIOPCIDeviceMediatedType: case config.VFIOPCIDeviceMediatedType:
// Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4 // Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName) sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr) deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
deviceBDF = getBDF(getMediatedBDF(deviceSysfsDev)) deviceBDF = GetBDF(getMediatedBDF(deviceSysfsDev))
case config.VFIOAPDeviceMediatedType: case config.VFIOAPDeviceMediatedType:
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName) sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr) deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
@ -244,7 +248,7 @@ func getMediatedBDF(deviceSysfsDev string) string {
// getBDF returns the BDF of pci device // getBDF returns the BDF of pci device
// Expected input string format is [<domain>]:[<bus>][<slot>].[<func>] eg. 0000:02:10.0 // Expected input string format is [<domain>]:[<bus>][<slot>].[<func>] eg. 0000:02:10.0
func getBDF(deviceSysStr string) string { func GetBDF(deviceSysStr string) string {
tokens := strings.SplitN(deviceSysStr, ":", 2) tokens := strings.SplitN(deviceSysStr, ":", 2)
if len(tokens) == 1 { if len(tokens) == 1 {
return "" return ""

View File

@ -21,6 +21,7 @@ import (
"github.com/docker/go-units" "github.com/docker/go-units"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/api" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/api"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers"
volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume" volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/pkg/uuid" "github.com/kata-containers/kata-containers/src/runtime/pkg/uuid"
@ -1152,7 +1153,9 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
kataDevice.Type = kataVfioApDevType kataDevice.Type = kataVfioApDevType
kataDevice.Options = dev.APDevices kataDevice.Options = dev.APDevices
} else { } else {
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", dev.BDF, dev.GuestPciPath)
devBDF := drivers.GetBDF(dev.BDF)
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", devBDF, dev.GuestPciPath)
} }
} }

View File

@ -239,32 +239,40 @@ func evalMountPath(source, destination string) (string, string, error) {
// Mount describes a container mount. // Mount describes a container mount.
// nolint: govet // nolint: govet
type Mount struct { type Mount struct {
// FSGroup a group ID that the group ownership of the files for the mounted volume
// will need to be changed when set.
FSGroup *int
// Source is the source of the mount. // Source is the source of the mount.
Source string Source string
// Destination is the destination of the mount (within the container). // Destination is the destination of the mount (within the container).
Destination string Destination string
// Type specifies the type of filesystem to mount.
Type string
// HostPath used to store host side bind mount path // HostPath used to store host side bind mount path
HostPath string HostPath string
// GuestDeviceMount represents the path within the VM that the device // GuestDeviceMount represents the path within the VM that the device
// is mounted. Only relevant for block devices. This is tracked in the event // is mounted. Only relevant for block devices. This is tracked in the event
// runtime wants to query the agent for mount stats. // runtime wants to query the agent for mount stats.
GuestDeviceMount string GuestDeviceMount string
// BlockDeviceID represents block device that is attached to the // BlockDeviceID represents block device that is attached to the
// VM in case this mount is a block device file or a directory // VM in case this mount is a block device file or a directory
// backed by a block device. // backed by a block device.
BlockDeviceID string BlockDeviceID string
// Type specifies the type of filesystem to mount.
Type string // Options list all the mount options of the filesystem.
Options []string
// ReadOnly specifies if the mount should be read only or not
ReadOnly bool
// FSGroup a group ID that the group ownership of the files for the mounted volume
// will need to be changed when set.
FSGroup *int
// FSGroupChangePolicy specifies the policy that will be used when applying // FSGroupChangePolicy specifies the policy that will be used when applying
// group id ownership change for a volume. // group id ownership change for a volume.
FSGroupChangePolicy volume.FSGroupChangePolicy FSGroupChangePolicy volume.FSGroupChangePolicy
// Options list all the mount options of the filesystem.
Options []string
// ReadOnly specifies if the mount should be read only or not
ReadOnly bool
} }
func isSymlink(path string) bool { func isSymlink(path string) bool {

View File

@ -701,7 +701,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
} }
} }
if machine.Type == QemuQ35 { if machine.Type == QemuQ35 || machine.Type == QemuVirt {
if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig); err != nil { if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig); err != nil {
q.Logger().WithError(err).Errorf("Cannot create PCIe topology") q.Logger().WithError(err).Errorf("Cannot create PCIe topology")
return err return err
@ -747,7 +747,6 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
return nil return nil
} }
q.Logger().Info("### PCIe Topology ###")
// Add PCIe Root Port or PCIe Switches to the hypervisor // Add PCIe Root Port or PCIe Switches to the hypervisor
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged // The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged
// into a PCIe Root Port or PCIe Switch. // into a PCIe Root Port or PCIe Switch.
@ -780,16 +779,12 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
if err != nil { if err != nil {
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err) return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
} }
q.Logger().Info("### PCIe Topology devices ", devicesPerIOMMUGroup)
for _, vfioDevice := range devicesPerIOMMUGroup { for _, vfioDevice := range devicesPerIOMMUGroup {
q.Logger().Info("### PCIe Topology vfioDevice ", vfioDevice)
if drivers.IsPCIeDevice(vfioDevice.BDF) { if drivers.IsPCIeDevice(vfioDevice.BDF) {
numOfPluggablePorts = numOfPluggablePorts + 1 numOfPluggablePorts = numOfPluggablePorts + 1
} }
} }
} }
q.Logger().Info("### PCIe Topology numOfPluggablePorts ", numOfPluggablePorts)
// If number of PCIe root ports > 16 then bail out otherwise we may // If number of PCIe root ports > 16 then bail out otherwise we may
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices // use up all slots or IO memory on the root bus and vfio-XXX-pci devices
// cannot be added which are crucial for Kata max slots on root bus is 32 // cannot be added which are crucial for Kata max slots on root bus is 32
@ -798,7 +793,7 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort) return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
} }
if numOfPluggablePorts > maxPCIeSwitchPort { if numOfPluggablePorts > maxPCIeSwitchPort {
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeRootPort) return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
} }
if q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus { if q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus {
@ -1757,6 +1752,8 @@ func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
} }
slots = append(slots, devSlot) slots = append(slots, devSlot)
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
var parentPath = qemuID var parentPath = qemuID
// We do not want to use a forever loop here, a deeper PCIe topology // We do not want to use a forever loop here, a deeper PCIe topology
// than 5 is already not advisable just for the sake of having enough // than 5 is already not advisable just for the sake of having enough
@ -1775,7 +1772,7 @@ func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
// If we hit /machine/q35/pcie.0 we're done this is the root bus // If we hit /machine/q35/pcie.0 we're done this is the root bus
// we climbed the complete hierarchy // we climbed the complete hierarchy
if strings.Contains(busQOM, "/machine/q35/pcie.0") { if r.Match([]byte(busQOM)) {
break break
} }
@ -1863,7 +1860,7 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
}).Info("Start hot-plug VFIO device") }).Info("Start hot-plug VFIO device")
// In case MachineType is q35, a PCIe device is hotplugged on // In case MachineType is q35, a PCIe device is hotplugged on
// a PCIe Root Port or alternatively on a PCIe Switch Port // a PCIe Root Port or alternatively on a PCIe Switch Port
if q.HypervisorConfig().HypervisorMachineType != QemuQ35 { if q.HypervisorConfig().HypervisorMachineType != QemuQ35 && q.HypervisorConfig().HypervisorMachineType != QemuVirt {
device.Bus = "" device.Bus = ""
} else { } else {
var err error var err error
@ -2636,9 +2633,9 @@ func genericAppendPCIeRootPort(devices []govmmQemu.Device, number uint32, machin
// genericAppendPCIeSwitch adds a PCIe Swtich // genericAppendPCIeSwitch adds a PCIe Swtich
func genericAppendPCIeSwitchPort(devices []govmmQemu.Device, number uint32, machineType string, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device { func genericAppendPCIeSwitchPort(devices []govmmQemu.Device, number uint32, machineType string, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device {
// Q35 has the correct PCIe support, // Q35, Virt have the correct PCIe support,
// hence ignore all other machines // hence ignore all other machines
if machineType != QemuQ35 { if machineType != QemuQ35 && machineType != QemuVirt {
return devices return devices
} }

View File

@ -630,18 +630,23 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
} }
if coldPlugVFIO && isVFIO { if coldPlugVFIO && isVFIO {
s.Logger().Info("### coldplug and vfio ", device, "coldplug ", sandboxConfig.HypervisorConfig.ColdPlugVFIO)
device.ColdPlug = true device.ColdPlug = true
device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO
vfioDevices = append(vfioDevices, device) vfioDevices = append(vfioDevices, device)
// We need to remove the devices marked for cold-plug // We need to remove the devices marked for cold-plug
// otherwise at the container level the kata-agent // otherwise at the container level the kata-agent
// will try to hot-plug them. // will try to hot-plug them.
infos := sandboxConfig.Containers[cnt].DeviceInfos sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
infos = append(infos[:dev], infos[dev+1:]...)
sandboxConfig.Containers[cnt].DeviceInfos = infos
} }
} }
var filteredDevices []config.DeviceInfo
for _, device := range containers.DeviceInfos {
if device.ID != "remove-we-are-cold-plugging" {
filteredDevices = append(filteredDevices, device)
}
}
sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices
} }
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices