diff --git a/src/runtime/pkg/device/drivers/utils.go b/src/runtime/pkg/device/drivers/utils.go index a89ec9b7e0..8e99aad663 100644 --- a/src/runtime/pkg/device/drivers/utils.go +++ b/src/runtime/pkg/device/drivers/utils.go @@ -9,9 +9,12 @@ package drivers import ( "fmt" "os" + "path" "path/filepath" + "regexp" "strconv" "strings" + "syscall" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/api" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" @@ -157,6 +160,92 @@ func checkIgnorePCIClass(pciClass string, deviceBDF string, bitmask uint64) (boo return false, nil } +func getMajorMinorFromDevPath(devPath string) (uint32, uint32, error) { + fi, err := os.Stat(devPath) + if err != nil { + return 0, 0, err + } + + dev := fi.Sys().(*syscall.Stat_t) + return uint32(dev.Rdev >> 8), uint32(dev.Rdev & 0xff), nil +} + +func extractIndex(devicePath string) (string, error) { + + base := filepath.Base(devicePath) + + const prefix = "vfio" + if !strings.HasPrefix(base, prefix) { + return "0", fmt.Errorf("unexpected device name format: %s", base) + } + return strings.TrimPrefix(base, prefix), nil +} + +func getBdfFromVFIODev(major uint32, minor uint32) (string, error) { + devPath := fmt.Sprintf("/sys/dev/char/%d:%d", major, minor) + realPath, err := filepath.EvalSymlinks(devPath) + if err != nil { + return "", fmt.Errorf("Failed to resolve symlink for %s: %v", devPath, err) + } + + bdfRegex := regexp.MustCompile(`([0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])`) + matches := bdfRegex.FindAllString(realPath, -1) + if len(matches) == 0 { + return "", fmt.Errorf("No BDF found in resolved path: %s", realPath) + } + return matches[len(matches)-1], nil +} + +// GetDeviceFromVFIODev return the host device associated with the VFIO device +// There is only one device per VFIO device in the case of IOMMUFD +func GetDeviceFromVFIODev(device config.DeviceInfo) ([]*config.VFIODev, error) { + // The way we get the host BDF is by reading the symlink of the char + // device major:minor entries in /sys/chart/major:minor + // $ ls -l /dev/vfio/devices/vfio0 + // crw------- 1 root root 237, 0 Jan 15 16:53 /dev/vfio/devices/vfio0 + major, minor, err := getMajorMinorFromDevPath(device.HostPath) + if err != nil { + return nil, fmt.Errorf("Failed to get major:minor from %s: %v", device.HostPath, err) + } + // $ ls -l /sys/dev/char/237:0 + // /sys/dev/char/237:0 -> ../../devices/pci0000:64/0000:64:00.0/0000:65:00.0/vfio-dev/vfio0 + deviceBDF, err := getBdfFromVFIODev(major, minor) + if err != nil { + return nil, err + } + + deviceSysfsDev := path.Join(config.SysBusPciDevicesPath, deviceBDF) + vfioDeviceType, err := GetVFIODeviceType(deviceSysfsDev) + if err != nil { + return nil, err + } + + vendorID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesVendor) + deviceID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesDevice) + pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass) + + id, err := extractIndex(device.HostPath) + if err != nil { + return nil, err + } + + vfio := config.VFIODev{ + ID: id, + Type: vfioDeviceType, + BDF: deviceBDF, + SysfsDev: deviceSysfsDev, + IsPCIe: IsPCIeDevice(deviceBDF), + Class: pciClass, + VendorID: vendorID, + DeviceID: deviceID, + Port: device.Port, + HostPath: device.HostPath, + } + vfioDevs := []*config.VFIODev{&vfio} + + return vfioDevs, nil +} + // GetAllVFIODevicesFromIOMMUGroup returns all the VFIO devices in the IOMMU group // We can reuse this function at various levels, sandbox, container. func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODev, error) { diff --git a/src/runtime/pkg/device/drivers/vfio.go b/src/runtime/pkg/device/drivers/vfio.go index f315fddaf0..9e4df43f6d 100644 --- a/src/runtime/pkg/device/drivers/vfio.go +++ b/src/runtime/pkg/device/drivers/vfio.go @@ -28,6 +28,7 @@ const ( iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group" vfioDevPath = "/dev/vfio/%s" vfioAPSysfsDir = "/sys/devices/vfio_ap" + IommufdDevPath = "/dev/vfio/devices" ) // VFIODevice is a vfio device meant to be passed to the hypervisor @@ -64,9 +65,18 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece } }() - device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo) - if err != nil { - return err + // This work for IOMMUFD enabled kernels > 6.x + // In the case of IOMMUFD the device.HostPath will look like + // /dev/vfio/devices/vfio0 + // (1) Check if we have the new IOMMUFD or old container based VFIO + if strings.HasPrefix(device.DeviceInfo.HostPath, IommufdDevPath) { + device.VfioDevs, err = GetDeviceFromVFIODev(*device.DeviceInfo) + } else { + // Once we have + device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo) + if err != nil { + return err + } } for _, vfio := range device.VfioDevs { diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index ba86c3d63a..22f22ab8c9 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -792,11 +792,25 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig return fmt.Errorf("Cannot get host path for device: %v err: %v", dev, err) } - devicesPerIOMMUGroup, err := drivers.GetAllVFIODevicesFromIOMMUGroup(dev) - if err != nil { - return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err) + vfioDevices := []*config.VFIODev{} + // This works for IOMMUFD enabled kernels > 6.x + // In the case of IOMMUFD the device.HostPath will look like + // /dev/vfio/devices/vfio0 + // (1) Check if we have the new IOMMUFD or old container based VFIO + if strings.HasPrefix(dev.HostPath, drivers.IommufdDevPath) { + q.Logger().Infof("### IOMMUFD Path: %s", dev.HostPath) + vfioDevices, err = drivers.GetDeviceFromVFIODev(dev) + if err != nil { + return fmt.Errorf("Cannot get VFIO device from IOMMUFD with device: %v err: %v", dev, err) + } + } else { + vfioDevices, err = drivers.GetAllVFIODevicesFromIOMMUGroup(dev) + if err != nil { + return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err) + } } - for _, vfioDevice := range devicesPerIOMMUGroup { + + for _, vfioDevice := range vfioDevices { if drivers.IsPCIeDevice(vfioDevice.BDF) { numOfPluggablePorts = numOfPluggablePorts + 1 }