Merge pull request #10743 from zvonkok/iommufd-gpu-fix

IOMMUFD GPU enhancement
This commit is contained in:
Hyounggyu Choi 2025-02-20 23:43:00 +01:00 committed by GitHub
commit 58647bb654
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 144 additions and 7 deletions

View File

@ -396,6 +396,9 @@ type VFIODev struct {
// sysfsdev of VFIO mediated device
SysfsDev string
// DevfsDev is used to identify a VFIO Group device or IOMMMUFD VFIO device
DevfsDev string
// VendorID specifies vendor id
VendorID string

View File

@ -9,9 +9,12 @@ package drivers
import (
"fmt"
"os"
"path"
"path/filepath"
"regexp"
"strconv"
"strings"
"syscall"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/api"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
@ -157,6 +160,93 @@ func checkIgnorePCIClass(pciClass string, deviceBDF string, bitmask uint64) (boo
return false, nil
}
func getMajorMinorFromDevPath(devPath string) (uint32, uint32, error) {
fi, err := os.Stat(devPath)
if err != nil {
return 0, 0, err
}
dev := fi.Sys().(*syscall.Stat_t)
return uint32(dev.Rdev >> 8), uint32(dev.Rdev & 0xff), nil
}
func extractIndex(devicePath string) (string, error) {
base := filepath.Base(devicePath)
const prefix = "vfio"
if !strings.HasPrefix(base, prefix) {
return "0", fmt.Errorf("unexpected device name format: %s", base)
}
return strings.TrimPrefix(base, prefix), nil
}
func getBdfFromVFIODev(major uint32, minor uint32) (string, error) {
devPath := fmt.Sprintf("/sys/dev/char/%d:%d", major, minor)
realPath, err := filepath.EvalSymlinks(devPath)
if err != nil {
return "", fmt.Errorf("Failed to resolve symlink for %s: %v", devPath, err)
}
bdfRegex := regexp.MustCompile(`([0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])`)
matches := bdfRegex.FindAllString(realPath, -1)
if len(matches) == 0 {
return "", fmt.Errorf("No BDF found in resolved path: %s", realPath)
}
return matches[len(matches)-1], nil
}
// GetDeviceFromVFIODev return the host device associated with the VFIO device
// There is only one device per VFIO device in the case of IOMMUFD
func GetDeviceFromVFIODev(device config.DeviceInfo) ([]*config.VFIODev, error) {
// The way we get the host BDF is by reading the symlink of the char
// device major:minor entries in /sys/chart/major:minor
// $ ls -l /dev/vfio/devices/vfio0
// crw------- 1 root root 237, 0 Jan 15 16:53 /dev/vfio/devices/vfio0
major, minor, err := getMajorMinorFromDevPath(device.HostPath)
if err != nil {
return nil, fmt.Errorf("Failed to get major:minor from %s: %v", device.HostPath, err)
}
// $ ls -l /sys/dev/char/237:0
// /sys/dev/char/237:0 -> ../../devices/pci0000:64/0000:64:00.0/0000:65:00.0/vfio-dev/vfio0
deviceBDF, err := getBdfFromVFIODev(major, minor)
if err != nil {
return nil, err
}
deviceSysfsDev := path.Join(config.SysBusPciDevicesPath, deviceBDF)
vfioDeviceType, err := GetVFIODeviceType(deviceSysfsDev)
if err != nil {
return nil, err
}
vendorID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesVendor)
deviceID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesDevice)
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
id, err := extractIndex(device.HostPath)
if err != nil {
return nil, err
}
vfio := config.VFIODev{
ID: id,
Type: vfioDeviceType,
BDF: deviceBDF,
SysfsDev: deviceSysfsDev,
DevfsDev: device.HostPath,
IsPCIe: IsPCIeDevice(deviceBDF),
Class: pciClass,
VendorID: vendorID,
DeviceID: deviceID,
Port: device.Port,
HostPath: device.HostPath,
}
vfioDevs := []*config.VFIODev{&vfio}
return vfioDevs, nil
}
// GetAllVFIODevicesFromIOMMUGroup returns all the VFIO devices in the IOMMU group
// We can reuse this function at various levels, sandbox, container.
func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODev, error) {

View File

@ -28,6 +28,7 @@ const (
iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group"
vfioDevPath = "/dev/vfio/%s"
vfioAPSysfsDir = "/sys/devices/vfio_ap"
IommufdDevPath = "/dev/vfio/devices"
)
// VFIODevice is a vfio device meant to be passed to the hypervisor
@ -64,9 +65,21 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
}
}()
device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo)
if err != nil {
return err
// This work for IOMMUFD enabled kernels > 6.x
// In the case of IOMMUFD the device.HostPath will look like
// /dev/vfio/devices/vfio0
// (1) Check if we have the new IOMMUFD or old container based VFIO
if strings.HasPrefix(device.DeviceInfo.HostPath, IommufdDevPath) {
device.VfioDevs, err = GetDeviceFromVFIODev(*device.DeviceInfo)
if err != nil {
return err
}
} else {
// Once we have
device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo)
if err != nil {
return err
}
}
for _, vfio := range device.VfioDevs {

View File

@ -25,6 +25,8 @@ import (
"strconv"
"strings"
"syscall"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers"
)
// Machine describes the machine type qemu will emulate.
@ -1859,6 +1861,9 @@ func (b PCIeSwitchDownstreamPortDevice) Valid() bool {
// VFIODevice represents a qemu vfio device meant for direct access by guest OS.
type VFIODevice struct {
// ID index of the vfio device in devfs or sysfs used for IOMMUFD
ID string
// Bus-Device-Function of device
BDF string
@ -1882,6 +1887,9 @@ type VFIODevice struct {
// SysfsDev specifies the sysfs matrix entry for the AP device
SysfsDev string
// DevfsDev is used to identify a VFIO Group device or IOMMMUFD VFIO device
DevfsDev string
}
// VFIODeviceTransport is a map of the vfio device name that corresponds to
@ -1936,6 +1944,12 @@ func (vfioDev VFIODevice) QemuParams(config *Config) []string {
deviceParams = append(deviceParams, fmt.Sprintf("devno=%s", vfioDev.DevNo))
}
if strings.HasPrefix(vfioDev.DevfsDev, drivers.IommufdDevPath) {
qemuParams = append(qemuParams, "-object")
qemuParams = append(qemuParams, fmt.Sprintf("iommufd,id=iommufd%s", vfioDev.ID))
deviceParams = append(deviceParams, fmt.Sprintf("iommufd=iommufd%s", vfioDev.ID))
}
qemuParams = append(qemuParams, "-device")
qemuParams = append(qemuParams, strings.Join(deviceParams, ","))

View File

@ -792,11 +792,25 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
return fmt.Errorf("Cannot get host path for device: %v err: %v", dev, err)
}
devicesPerIOMMUGroup, err := drivers.GetAllVFIODevicesFromIOMMUGroup(dev)
if err != nil {
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
var vfioDevices []*config.VFIODev
// This works for IOMMUFD enabled kernels > 6.x
// In the case of IOMMUFD the device.HostPath will look like
// /dev/vfio/devices/vfio0
// (1) Check if we have the new IOMMUFD or old container based VFIO
if strings.HasPrefix(dev.HostPath, drivers.IommufdDevPath) {
q.Logger().Infof("### IOMMUFD Path: %s", dev.HostPath)
vfioDevices, err = drivers.GetDeviceFromVFIODev(dev)
if err != nil {
return fmt.Errorf("Cannot get VFIO device from IOMMUFD with device: %v err: %v", dev, err)
}
} else {
vfioDevices, err = drivers.GetAllVFIODevicesFromIOMMUGroup(dev)
if err != nil {
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
}
}
for _, vfioDevice := range devicesPerIOMMUGroup {
for _, vfioDevice := range vfioDevices {
if drivers.IsPCIeDevice(vfioDevice.BDF) {
numOfPluggablePorts = numOfPluggablePorts + 1
}

View File

@ -708,10 +708,13 @@ func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev conf
devices = append(devices,
govmmQemu.VFIODevice{
ID: vfioDev.ID,
BDF: vfioDev.BDF,
VendorID: vfioDev.VendorID,
DeviceID: vfioDev.DeviceID,
Bus: vfioDev.Bus,
SysfsDev: vfioDev.SysfsDev,
DevfsDev: vfioDev.DevfsDev,
},
)