mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-25 06:52:13 +00:00
gpu: Various fixes for virt machine type
The PCI qom path was not deduced correctly added regex for correct path walking. Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
parent
40101ea7db
commit
b11246c3aa
@ -214,14 +214,18 @@ func GetVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceS
|
|||||||
switch vfioDeviceType {
|
switch vfioDeviceType {
|
||||||
case config.VFIOPCIDeviceNormalType:
|
case config.VFIOPCIDeviceNormalType:
|
||||||
// Get bdf of device eg. 0000:00:1c.0
|
// Get bdf of device eg. 0000:00:1c.0
|
||||||
deviceBDF = getBDF(deviceFileName)
|
//deviceBDF = getBDF(deviceFileName)
|
||||||
|
// The old implementation did not consider the case where
|
||||||
|
// vfio devices are located on differente root busses. The
|
||||||
|
// kata-agent will handle the case now, here use the full PCI addr
|
||||||
|
deviceBDF = deviceFileName
|
||||||
// Get sysfs path used by cloud-hypervisor
|
// Get sysfs path used by cloud-hypervisor
|
||||||
deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName)
|
deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName)
|
||||||
case config.VFIOPCIDeviceMediatedType:
|
case config.VFIOPCIDeviceMediatedType:
|
||||||
// Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
// Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
||||||
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
||||||
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
||||||
deviceBDF = getBDF(getMediatedBDF(deviceSysfsDev))
|
deviceBDF = GetBDF(getMediatedBDF(deviceSysfsDev))
|
||||||
case config.VFIOAPDeviceMediatedType:
|
case config.VFIOAPDeviceMediatedType:
|
||||||
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
||||||
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
||||||
@ -244,7 +248,7 @@ func getMediatedBDF(deviceSysfsDev string) string {
|
|||||||
|
|
||||||
// getBDF returns the BDF of pci device
|
// getBDF returns the BDF of pci device
|
||||||
// Expected input string format is [<domain>]:[<bus>][<slot>].[<func>] eg. 0000:02:10.0
|
// Expected input string format is [<domain>]:[<bus>][<slot>].[<func>] eg. 0000:02:10.0
|
||||||
func getBDF(deviceSysStr string) string {
|
func GetBDF(deviceSysStr string) string {
|
||||||
tokens := strings.SplitN(deviceSysStr, ":", 2)
|
tokens := strings.SplitN(deviceSysStr, ":", 2)
|
||||||
if len(tokens) == 1 {
|
if len(tokens) == 1 {
|
||||||
return ""
|
return ""
|
||||||
|
@ -21,6 +21,7 @@ import (
|
|||||||
"github.com/docker/go-units"
|
"github.com/docker/go-units"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/api"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/api"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers"
|
||||||
volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
|
volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/uuid"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/uuid"
|
||||||
@ -1152,7 +1153,9 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
|
|||||||
kataDevice.Type = kataVfioApDevType
|
kataDevice.Type = kataVfioApDevType
|
||||||
kataDevice.Options = dev.APDevices
|
kataDevice.Options = dev.APDevices
|
||||||
} else {
|
} else {
|
||||||
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", dev.BDF, dev.GuestPciPath)
|
|
||||||
|
devBDF := drivers.GetBDF(dev.BDF)
|
||||||
|
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", devBDF, dev.GuestPciPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -239,32 +239,40 @@ func evalMountPath(source, destination string) (string, string, error) {
|
|||||||
// Mount describes a container mount.
|
// Mount describes a container mount.
|
||||||
// nolint: govet
|
// nolint: govet
|
||||||
type Mount struct {
|
type Mount struct {
|
||||||
// FSGroup a group ID that the group ownership of the files for the mounted volume
|
|
||||||
// will need to be changed when set.
|
|
||||||
FSGroup *int
|
|
||||||
// Source is the source of the mount.
|
// Source is the source of the mount.
|
||||||
Source string
|
Source string
|
||||||
// Destination is the destination of the mount (within the container).
|
// Destination is the destination of the mount (within the container).
|
||||||
Destination string
|
Destination string
|
||||||
|
|
||||||
|
// Type specifies the type of filesystem to mount.
|
||||||
|
Type string
|
||||||
|
|
||||||
// HostPath used to store host side bind mount path
|
// HostPath used to store host side bind mount path
|
||||||
HostPath string
|
HostPath string
|
||||||
|
|
||||||
// GuestDeviceMount represents the path within the VM that the device
|
// GuestDeviceMount represents the path within the VM that the device
|
||||||
// is mounted. Only relevant for block devices. This is tracked in the event
|
// is mounted. Only relevant for block devices. This is tracked in the event
|
||||||
// runtime wants to query the agent for mount stats.
|
// runtime wants to query the agent for mount stats.
|
||||||
GuestDeviceMount string
|
GuestDeviceMount string
|
||||||
|
|
||||||
// BlockDeviceID represents block device that is attached to the
|
// BlockDeviceID represents block device that is attached to the
|
||||||
// VM in case this mount is a block device file or a directory
|
// VM in case this mount is a block device file or a directory
|
||||||
// backed by a block device.
|
// backed by a block device.
|
||||||
BlockDeviceID string
|
BlockDeviceID string
|
||||||
// Type specifies the type of filesystem to mount.
|
|
||||||
Type string
|
// Options list all the mount options of the filesystem.
|
||||||
|
Options []string
|
||||||
|
|
||||||
|
// ReadOnly specifies if the mount should be read only or not
|
||||||
|
ReadOnly bool
|
||||||
|
|
||||||
|
// FSGroup a group ID that the group ownership of the files for the mounted volume
|
||||||
|
// will need to be changed when set.
|
||||||
|
FSGroup *int
|
||||||
|
|
||||||
// FSGroupChangePolicy specifies the policy that will be used when applying
|
// FSGroupChangePolicy specifies the policy that will be used when applying
|
||||||
// group id ownership change for a volume.
|
// group id ownership change for a volume.
|
||||||
FSGroupChangePolicy volume.FSGroupChangePolicy
|
FSGroupChangePolicy volume.FSGroupChangePolicy
|
||||||
// Options list all the mount options of the filesystem.
|
|
||||||
Options []string
|
|
||||||
// ReadOnly specifies if the mount should be read only or not
|
|
||||||
ReadOnly bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func isSymlink(path string) bool {
|
func isSymlink(path string) bool {
|
||||||
|
@ -701,7 +701,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if machine.Type == QemuQ35 {
|
if machine.Type == QemuQ35 || machine.Type == QemuVirt {
|
||||||
if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig); err != nil {
|
if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig); err != nil {
|
||||||
q.Logger().WithError(err).Errorf("Cannot create PCIe topology")
|
q.Logger().WithError(err).Errorf("Cannot create PCIe topology")
|
||||||
return err
|
return err
|
||||||
@ -747,7 +747,6 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
q.Logger().Info("### PCIe Topology ###")
|
|
||||||
// Add PCIe Root Port or PCIe Switches to the hypervisor
|
// Add PCIe Root Port or PCIe Switches to the hypervisor
|
||||||
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged
|
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged
|
||||||
// into a PCIe Root Port or PCIe Switch.
|
// into a PCIe Root Port or PCIe Switch.
|
||||||
@ -780,16 +779,12 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
|
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
|
||||||
}
|
}
|
||||||
q.Logger().Info("### PCIe Topology devices ", devicesPerIOMMUGroup)
|
|
||||||
for _, vfioDevice := range devicesPerIOMMUGroup {
|
for _, vfioDevice := range devicesPerIOMMUGroup {
|
||||||
q.Logger().Info("### PCIe Topology vfioDevice ", vfioDevice)
|
|
||||||
if drivers.IsPCIeDevice(vfioDevice.BDF) {
|
if drivers.IsPCIeDevice(vfioDevice.BDF) {
|
||||||
numOfPluggablePorts = numOfPluggablePorts + 1
|
numOfPluggablePorts = numOfPluggablePorts + 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
q.Logger().Info("### PCIe Topology numOfPluggablePorts ", numOfPluggablePorts)
|
|
||||||
|
|
||||||
// If number of PCIe root ports > 16 then bail out otherwise we may
|
// If number of PCIe root ports > 16 then bail out otherwise we may
|
||||||
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
|
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
|
||||||
// cannot be added which are crucial for Kata max slots on root bus is 32
|
// cannot be added which are crucial for Kata max slots on root bus is 32
|
||||||
@ -798,7 +793,7 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
|||||||
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
|
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
|
||||||
}
|
}
|
||||||
if numOfPluggablePorts > maxPCIeSwitchPort {
|
if numOfPluggablePorts > maxPCIeSwitchPort {
|
||||||
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeRootPort)
|
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
|
||||||
}
|
}
|
||||||
|
|
||||||
if q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus {
|
if q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus {
|
||||||
@ -1757,6 +1752,8 @@ func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
|
|||||||
}
|
}
|
||||||
slots = append(slots, devSlot)
|
slots = append(slots, devSlot)
|
||||||
|
|
||||||
|
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
|
||||||
|
|
||||||
var parentPath = qemuID
|
var parentPath = qemuID
|
||||||
// We do not want to use a forever loop here, a deeper PCIe topology
|
// We do not want to use a forever loop here, a deeper PCIe topology
|
||||||
// than 5 is already not advisable just for the sake of having enough
|
// than 5 is already not advisable just for the sake of having enough
|
||||||
@ -1775,7 +1772,7 @@ func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
|
|||||||
|
|
||||||
// If we hit /machine/q35/pcie.0 we're done this is the root bus
|
// If we hit /machine/q35/pcie.0 we're done this is the root bus
|
||||||
// we climbed the complete hierarchy
|
// we climbed the complete hierarchy
|
||||||
if strings.Contains(busQOM, "/machine/q35/pcie.0") {
|
if r.Match([]byte(busQOM)) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1863,7 +1860,7 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
|||||||
}).Info("Start hot-plug VFIO device")
|
}).Info("Start hot-plug VFIO device")
|
||||||
// In case MachineType is q35, a PCIe device is hotplugged on
|
// In case MachineType is q35, a PCIe device is hotplugged on
|
||||||
// a PCIe Root Port or alternatively on a PCIe Switch Port
|
// a PCIe Root Port or alternatively on a PCIe Switch Port
|
||||||
if q.HypervisorConfig().HypervisorMachineType != QemuQ35 {
|
if q.HypervisorConfig().HypervisorMachineType != QemuQ35 && q.HypervisorConfig().HypervisorMachineType != QemuVirt {
|
||||||
device.Bus = ""
|
device.Bus = ""
|
||||||
} else {
|
} else {
|
||||||
var err error
|
var err error
|
||||||
@ -2636,9 +2633,9 @@ func genericAppendPCIeRootPort(devices []govmmQemu.Device, number uint32, machin
|
|||||||
// genericAppendPCIeSwitch adds a PCIe Swtich
|
// genericAppendPCIeSwitch adds a PCIe Swtich
|
||||||
func genericAppendPCIeSwitchPort(devices []govmmQemu.Device, number uint32, machineType string, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device {
|
func genericAppendPCIeSwitchPort(devices []govmmQemu.Device, number uint32, machineType string, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device {
|
||||||
|
|
||||||
// Q35 has the correct PCIe support,
|
// Q35, Virt have the correct PCIe support,
|
||||||
// hence ignore all other machines
|
// hence ignore all other machines
|
||||||
if machineType != QemuQ35 {
|
if machineType != QemuQ35 && machineType != QemuVirt {
|
||||||
return devices
|
return devices
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -630,18 +630,23 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
|||||||
sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
|
sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
|
||||||
}
|
}
|
||||||
if coldPlugVFIO && isVFIO {
|
if coldPlugVFIO && isVFIO {
|
||||||
s.Logger().Info("### coldplug and vfio ", device, "coldplug ", sandboxConfig.HypervisorConfig.ColdPlugVFIO)
|
|
||||||
device.ColdPlug = true
|
device.ColdPlug = true
|
||||||
device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO
|
device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO
|
||||||
vfioDevices = append(vfioDevices, device)
|
vfioDevices = append(vfioDevices, device)
|
||||||
// We need to remove the devices marked for cold-plug
|
// We need to remove the devices marked for cold-plug
|
||||||
// otherwise at the container level the kata-agent
|
// otherwise at the container level the kata-agent
|
||||||
// will try to hot-plug them.
|
// will try to hot-plug them.
|
||||||
infos := sandboxConfig.Containers[cnt].DeviceInfos
|
sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
|
||||||
infos = append(infos[:dev], infos[dev+1:]...)
|
|
||||||
sandboxConfig.Containers[cnt].DeviceInfos = infos
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
var filteredDevices []config.DeviceInfo
|
||||||
|
for _, device := range containers.DeviceInfos {
|
||||||
|
if device.ID != "remove-we-are-cold-plugging" {
|
||||||
|
filteredDevices = append(filteredDevices, device)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices
|
||||||
|
|
||||||
}
|
}
|
||||||
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
|
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user