From b11246c3aab45b4bb37a8c8acdf7e0a3612a212d Mon Sep 17 00:00:00 2001 From: Zvonko Kaiser Date: Fri, 12 May 2023 09:58:54 +0000 Subject: [PATCH] gpu: Various fixes for virt machine type The PCI qom path was not deduced correctly added regex for correct path walking. Signed-off-by: Zvonko Kaiser --- src/runtime/pkg/device/drivers/vfio.go | 10 ++++++--- src/runtime/virtcontainers/kata_agent.go | 5 ++++- src/runtime/virtcontainers/mount.go | 26 ++++++++++++++++-------- src/runtime/virtcontainers/qemu.go | 19 ++++++++--------- src/runtime/virtcontainers/sandbox.go | 13 ++++++++---- 5 files changed, 45 insertions(+), 28 deletions(-) diff --git a/src/runtime/pkg/device/drivers/vfio.go b/src/runtime/pkg/device/drivers/vfio.go index 8f63b2fc6c..5e5f2a10bd 100644 --- a/src/runtime/pkg/device/drivers/vfio.go +++ b/src/runtime/pkg/device/drivers/vfio.go @@ -214,14 +214,18 @@ func GetVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceS switch vfioDeviceType { case config.VFIOPCIDeviceNormalType: // Get bdf of device eg. 0000:00:1c.0 - deviceBDF = getBDF(deviceFileName) + //deviceBDF = getBDF(deviceFileName) + // The old implementation did not consider the case where + // vfio devices are located on differente root busses. The + // kata-agent will handle the case now, here use the full PCI addr + deviceBDF = deviceFileName // Get sysfs path used by cloud-hypervisor deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName) case config.VFIOPCIDeviceMediatedType: // Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4 sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName) deviceSysfsDev, err = GetSysfsDev(sysfsDevStr) - deviceBDF = getBDF(getMediatedBDF(deviceSysfsDev)) + deviceBDF = GetBDF(getMediatedBDF(deviceSysfsDev)) case config.VFIOAPDeviceMediatedType: sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName) deviceSysfsDev, err = GetSysfsDev(sysfsDevStr) @@ -244,7 +248,7 @@ func getMediatedBDF(deviceSysfsDev string) string { // getBDF returns the BDF of pci device // Expected input string format is []:[][].[] eg. 0000:02:10.0 -func getBDF(deviceSysStr string) string { +func GetBDF(deviceSysStr string) string { tokens := strings.SplitN(deviceSysStr, ":", 2) if len(tokens) == 1 { return "" diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index a350c1a486..a3e5f5d421 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -21,6 +21,7 @@ import ( "github.com/docker/go-units" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/api" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers" volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" "github.com/kata-containers/kata-containers/src/runtime/pkg/uuid" @@ -1152,7 +1153,9 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c * kataDevice.Type = kataVfioApDevType kataDevice.Options = dev.APDevices } else { - kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", dev.BDF, dev.GuestPciPath) + + devBDF := drivers.GetBDF(dev.BDF) + kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", devBDF, dev.GuestPciPath) } } diff --git a/src/runtime/virtcontainers/mount.go b/src/runtime/virtcontainers/mount.go index acf4f05f6f..6c2e204208 100644 --- a/src/runtime/virtcontainers/mount.go +++ b/src/runtime/virtcontainers/mount.go @@ -239,32 +239,40 @@ func evalMountPath(source, destination string) (string, string, error) { // Mount describes a container mount. // nolint: govet type Mount struct { - // FSGroup a group ID that the group ownership of the files for the mounted volume - // will need to be changed when set. - FSGroup *int // Source is the source of the mount. Source string // Destination is the destination of the mount (within the container). Destination string + + // Type specifies the type of filesystem to mount. + Type string + // HostPath used to store host side bind mount path HostPath string + // GuestDeviceMount represents the path within the VM that the device // is mounted. Only relevant for block devices. This is tracked in the event // runtime wants to query the agent for mount stats. GuestDeviceMount string + // BlockDeviceID represents block device that is attached to the // VM in case this mount is a block device file or a directory // backed by a block device. BlockDeviceID string - // Type specifies the type of filesystem to mount. - Type string + + // Options list all the mount options of the filesystem. + Options []string + + // ReadOnly specifies if the mount should be read only or not + ReadOnly bool + + // FSGroup a group ID that the group ownership of the files for the mounted volume + // will need to be changed when set. + FSGroup *int + // FSGroupChangePolicy specifies the policy that will be used when applying // group id ownership change for a volume. FSGroupChangePolicy volume.FSGroupChangePolicy - // Options list all the mount options of the filesystem. - Options []string - // ReadOnly specifies if the mount should be read only or not - ReadOnly bool } func isSymlink(path string) bool { diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 284ce2d793..79dfb286e7 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -701,7 +701,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi } } - if machine.Type == QemuQ35 { + if machine.Type == QemuQ35 || machine.Type == QemuVirt { if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig); err != nil { q.Logger().WithError(err).Errorf("Cannot create PCIe topology") return err @@ -747,7 +747,6 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig return nil } - q.Logger().Info("### PCIe Topology ###") // Add PCIe Root Port or PCIe Switches to the hypervisor // The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged // into a PCIe Root Port or PCIe Switch. @@ -780,16 +779,12 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig if err != nil { return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err) } - q.Logger().Info("### PCIe Topology devices ", devicesPerIOMMUGroup) for _, vfioDevice := range devicesPerIOMMUGroup { - q.Logger().Info("### PCIe Topology vfioDevice ", vfioDevice) if drivers.IsPCIeDevice(vfioDevice.BDF) { numOfPluggablePorts = numOfPluggablePorts + 1 } } } - q.Logger().Info("### PCIe Topology numOfPluggablePorts ", numOfPluggablePorts) - // If number of PCIe root ports > 16 then bail out otherwise we may // use up all slots or IO memory on the root bus and vfio-XXX-pci devices // cannot be added which are crucial for Kata max slots on root bus is 32 @@ -798,7 +793,7 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort) } if numOfPluggablePorts > maxPCIeSwitchPort { - return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeRootPort) + return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort) } if q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus { @@ -1757,6 +1752,8 @@ func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) { } slots = append(slots, devSlot) + r, _ := regexp.Compile(`^/machine/.*/pcie.0`) + var parentPath = qemuID // We do not want to use a forever loop here, a deeper PCIe topology // than 5 is already not advisable just for the sake of having enough @@ -1775,7 +1772,7 @@ func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) { // If we hit /machine/q35/pcie.0 we're done this is the root bus // we climbed the complete hierarchy - if strings.Contains(busQOM, "/machine/q35/pcie.0") { + if r.Match([]byte(busQOM)) { break } @@ -1863,7 +1860,7 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op }).Info("Start hot-plug VFIO device") // In case MachineType is q35, a PCIe device is hotplugged on // a PCIe Root Port or alternatively on a PCIe Switch Port - if q.HypervisorConfig().HypervisorMachineType != QemuQ35 { + if q.HypervisorConfig().HypervisorMachineType != QemuQ35 && q.HypervisorConfig().HypervisorMachineType != QemuVirt { device.Bus = "" } else { var err error @@ -2636,9 +2633,9 @@ func genericAppendPCIeRootPort(devices []govmmQemu.Device, number uint32, machin // genericAppendPCIeSwitch adds a PCIe Swtich func genericAppendPCIeSwitchPort(devices []govmmQemu.Device, number uint32, machineType string, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device { - // Q35 has the correct PCIe support, + // Q35, Virt have the correct PCIe support, // hence ignore all other machines - if machineType != QemuQ35 { + if machineType != QemuQ35 && machineType != QemuVirt { return devices } diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 5244fdf00d..ed03c092d6 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -630,18 +630,23 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO } if coldPlugVFIO && isVFIO { - s.Logger().Info("### coldplug and vfio ", device, "coldplug ", sandboxConfig.HypervisorConfig.ColdPlugVFIO) device.ColdPlug = true device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO vfioDevices = append(vfioDevices, device) // We need to remove the devices marked for cold-plug // otherwise at the container level the kata-agent // will try to hot-plug them. - infos := sandboxConfig.Containers[cnt].DeviceInfos - infos = append(infos[:dev], infos[dev+1:]...) - sandboxConfig.Containers[cnt].DeviceInfos = infos + sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging" } } + var filteredDevices []config.DeviceInfo + for _, device := range containers.DeviceInfos { + if device.ID != "remove-we-are-cold-plugging" { + filteredDevices = append(filteredDevices, device) + } + } + sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices + } sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices