runtime: Add proper IOMMUFD parsing

With newer kernels we have a new backend for VFIO
called IOMMUFD this is a departure from VFIO IOMMU Groups
since it has only one device associated with an IOMMUFD entry.

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
Zvonko Kaiser 2025-01-15 20:33:42 +00:00
parent 121ac0c5c0
commit e82fdee20f
3 changed files with 120 additions and 7 deletions

View File

@ -9,9 +9,12 @@ package drivers
import (
"fmt"
"os"
"path"
"path/filepath"
"regexp"
"strconv"
"strings"
"syscall"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/api"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
@ -157,6 +160,92 @@ func checkIgnorePCIClass(pciClass string, deviceBDF string, bitmask uint64) (boo
return false, nil
}
func getMajorMinorFromDevPath(devPath string) (uint32, uint32, error) {
fi, err := os.Stat(devPath)
if err != nil {
return 0, 0, err
}
dev := fi.Sys().(*syscall.Stat_t)
return uint32(dev.Rdev >> 8), uint32(dev.Rdev & 0xff), nil
}
func extractIndex(devicePath string) (string, error) {
base := filepath.Base(devicePath)
const prefix = "vfio"
if !strings.HasPrefix(base, prefix) {
return "0", fmt.Errorf("unexpected device name format: %s", base)
}
return strings.TrimPrefix(base, prefix), nil
}
func getBdfFromVFIODev(major uint32, minor uint32) (string, error) {
devPath := fmt.Sprintf("/sys/dev/char/%d:%d", major, minor)
realPath, err := filepath.EvalSymlinks(devPath)
if err != nil {
return "", fmt.Errorf("Failed to resolve symlink for %s: %v", devPath, err)
}
bdfRegex := regexp.MustCompile(`([0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])`)
matches := bdfRegex.FindAllString(realPath, -1)
if len(matches) == 0 {
return "", fmt.Errorf("No BDF found in resolved path: %s", realPath)
}
return matches[len(matches)-1], nil
}
// GetDeviceFromVFIODev return the host device associated with the VFIO device
// There is only one device per VFIO device in the case of IOMMUFD
func GetDeviceFromVFIODev(device config.DeviceInfo) ([]*config.VFIODev, error) {
// The way we get the host BDF is by reading the symlink of the char
// device major:minor entries in /sys/chart/major:minor
// $ ls -l /dev/vfio/devices/vfio0
// crw------- 1 root root 237, 0 Jan 15 16:53 /dev/vfio/devices/vfio0
major, minor, err := getMajorMinorFromDevPath(device.HostPath)
if err != nil {
return nil, fmt.Errorf("Failed to get major:minor from %s: %v", device.HostPath, err)
}
// $ ls -l /sys/dev/char/237:0
// /sys/dev/char/237:0 -> ../../devices/pci0000:64/0000:64:00.0/0000:65:00.0/vfio-dev/vfio0
deviceBDF, err := getBdfFromVFIODev(major, minor)
if err != nil {
return nil, err
}
deviceSysfsDev := path.Join(config.SysBusPciDevicesPath, deviceBDF)
vfioDeviceType, err := GetVFIODeviceType(deviceSysfsDev)
if err != nil {
return nil, err
}
vendorID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesVendor)
deviceID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesDevice)
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
id, err := extractIndex(device.HostPath)
if err != nil {
return nil, err
}
vfio := config.VFIODev{
ID: id,
Type: vfioDeviceType,
BDF: deviceBDF,
SysfsDev: deviceSysfsDev,
IsPCIe: IsPCIeDevice(deviceBDF),
Class: pciClass,
VendorID: vendorID,
DeviceID: deviceID,
Port: device.Port,
HostPath: device.HostPath,
}
vfioDevs := []*config.VFIODev{&vfio}
return vfioDevs, nil
}
// GetAllVFIODevicesFromIOMMUGroup returns all the VFIO devices in the IOMMU group
// We can reuse this function at various levels, sandbox, container.
func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODev, error) {

View File

@ -28,6 +28,7 @@ const (
iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group"
vfioDevPath = "/dev/vfio/%s"
vfioAPSysfsDir = "/sys/devices/vfio_ap"
IommufdDevPath = "/dev/vfio/devices"
)
// VFIODevice is a vfio device meant to be passed to the hypervisor
@ -64,9 +65,18 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
}
}()
device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo)
if err != nil {
return err
// This work for IOMMUFD enabled kernels > 6.x
// In the case of IOMMUFD the device.HostPath will look like
// /dev/vfio/devices/vfio0
// (1) Check if we have the new IOMMUFD or old container based VFIO
if strings.HasPrefix(device.DeviceInfo.HostPath, IommufdDevPath) {
device.VfioDevs, err = GetDeviceFromVFIODev(*device.DeviceInfo)
} else {
// Once we have
device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo)
if err != nil {
return err
}
}
for _, vfio := range device.VfioDevs {

View File

@ -792,11 +792,25 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
return fmt.Errorf("Cannot get host path for device: %v err: %v", dev, err)
}
devicesPerIOMMUGroup, err := drivers.GetAllVFIODevicesFromIOMMUGroup(dev)
if err != nil {
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
vfioDevices := []*config.VFIODev{}
// This works for IOMMUFD enabled kernels > 6.x
// In the case of IOMMUFD the device.HostPath will look like
// /dev/vfio/devices/vfio0
// (1) Check if we have the new IOMMUFD or old container based VFIO
if strings.HasPrefix(dev.HostPath, drivers.IommufdDevPath) {
q.Logger().Infof("### IOMMUFD Path: %s", dev.HostPath)
vfioDevices, err = drivers.GetDeviceFromVFIODev(dev)
if err != nil {
return fmt.Errorf("Cannot get VFIO device from IOMMUFD with device: %v err: %v", dev, err)
}
} else {
vfioDevices, err = drivers.GetAllVFIODevicesFromIOMMUGroup(dev)
if err != nil {
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
}
}
for _, vfioDevice := range devicesPerIOMMUGroup {
for _, vfioDevice := range vfioDevices {
if drivers.IsPCIeDevice(vfioDevice.BDF) {
numOfPluggablePorts = numOfPluggablePorts + 1
}