From 68a586e52c88914f97519f6f664e16525dcfc4fd Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Wed, 16 Feb 2022 15:18:07 +0000 Subject: [PATCH 1/6] agent: Use a constant for CCW root bus path used a function like PCI does, but this is not necessary Signed-off-by: Jakob Naucke --- src/agent/src/device.rs | 4 ++-- src/agent/src/linux_abi.rs | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index ec277f78eb..2d01b63abf 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -280,7 +280,7 @@ pub async fn get_virtio_blk_ccw_device_name( sandbox: &Arc>, device: &ccw::Device, ) -> Result { - let matcher = VirtioBlkCCWMatcher::new(&create_ccw_root_bus_path(), device); + let matcher = VirtioBlkCCWMatcher::new(CCW_ROOT_BUS_PATH, device); let uev = wait_for_uevent(sandbox, matcher).await?; let devname = uev.devname; return match Path::new(SYSTEM_DEV_PATH).join(&devname).to_str() { @@ -1378,7 +1378,7 @@ mod tests { #[cfg(target_arch = "s390x")] #[tokio::test] async fn test_virtio_blk_ccw_matcher() { - let root_bus = create_ccw_root_bus_path(); + let root_bus = CCW_ROOT_BUS_PATH; let subsystem = "block"; let devname = "vda"; let relpath = "0.0.0002"; diff --git a/src/agent/src/linux_abi.rs b/src/agent/src/linux_abi.rs index c4d304d280..fbfb9b3771 100644 --- a/src/agent/src/linux_abi.rs +++ b/src/agent/src/linux_abi.rs @@ -65,9 +65,8 @@ pub fn create_pci_root_bus_path() -> String { } #[cfg(target_arch = "s390x")] -pub fn create_ccw_root_bus_path() -> String { - String::from("/devices/css0") -} +pub const CCW_ROOT_BUS_PATH: &str = "/devices/css0"; + // From https://www.kernel.org/doc/Documentation/acpi/namespace.txt // The Linux kernel's core ACPI subsystem creates struct acpi_device // objects for ACPI namespace objects representing devices, power resources From db89c88f4fcb3d59456595932adf8fd82a32fab5 Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Tue, 15 Feb 2022 17:15:21 +0100 Subject: [PATCH 2/6] agent: Use cfg-if for s390x CCW Uses fewer lines in upcoming VFIO-AP support. Signed-off-by: Jakob Naucke --- src/agent/Cargo.lock | 1 + src/agent/Cargo.toml | 1 + src/agent/src/device.rs | 9 +++++++-- src/agent/src/linux_abi.rs | 9 +++++++-- src/agent/src/main.rs | 9 +++++++-- 5 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 0de4591161..119183cb4c 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -801,6 +801,7 @@ dependencies = [ "async-recursion", "async-trait", "capctl", + "cfg-if 1.0.0", "cgroups-rs", "clap", "futures", diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index 8851798fd0..b5a82f257b 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -48,6 +48,7 @@ slog-scope = "4.1.2" slog-stdlog = "4.0.0" log = "0.4.11" +cfg-if = "1.0.0" prometheus = { version = "0.13.0", features = ["process"] } procfs = "0.12.0" anyhow = "1.0.32" diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 2d01b63abf..620163ee00 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -16,13 +16,12 @@ use std::str::FromStr; use std::sync::Arc; use tokio::sync::Mutex; -#[cfg(target_arch = "s390x")] -use crate::ccw; use crate::linux_abi::*; use crate::pci; use crate::sandbox::Sandbox; use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher}; use anyhow::{anyhow, Context, Result}; +use cfg_if::cfg_if; use oci::{LinuxDeviceCgroup, LinuxResources, Spec}; use protocols::agent::Device; use tracing::instrument; @@ -54,6 +53,12 @@ pub const DRIVER_VFIO_TYPE: &str = "vfio"; pub const DRIVER_OVERLAYFS_TYPE: &str = "overlayfs"; pub const FS_TYPE_HUGETLB: &str = "hugetlbfs"; +cfg_if! { + if #[cfg(target_arch = "s390x")] { + use crate::ccw; + } +} + #[instrument] pub fn online_device(path: &str) -> Result<()> { fs::write(path, "1")?; diff --git a/src/agent/src/linux_abi.rs b/src/agent/src/linux_abi.rs index fbfb9b3771..9df398d615 100644 --- a/src/agent/src/linux_abi.rs +++ b/src/agent/src/linux_abi.rs @@ -3,6 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 // +use cfg_if::cfg_if; + /// Linux ABI related constants. #[cfg(target_arch = "aarch64")] @@ -64,8 +66,11 @@ pub fn create_pci_root_bus_path() -> String { ret } -#[cfg(target_arch = "s390x")] -pub const CCW_ROOT_BUS_PATH: &str = "/devices/css0"; +cfg_if! { + if #[cfg(target_arch = "s390x")] { + pub const CCW_ROOT_BUS_PATH: &str = "/devices/css0"; + } +} // From https://www.kernel.org/doc/Documentation/acpi/namespace.txt // The Linux kernel's core ACPI subsystem creates struct acpi_device diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index d8e9fc828b..4ca89a9a15 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -20,6 +20,7 @@ extern crate scopeguard; extern crate slog; use anyhow::{anyhow, Context, Result}; +use cfg_if::cfg_if; use clap::{AppSettings, Parser}; use nix::fcntl::OFlag; use nix::sys::socket::{self, AddressFamily, SockFlag, SockType, VsockAddr}; @@ -34,8 +35,6 @@ use std::process::exit; use std::sync::Arc; use tracing::{instrument, span}; -#[cfg(target_arch = "s390x")] -mod ccw; mod config; mod console; mod device; @@ -74,6 +73,12 @@ use tokio::{ mod rpc; mod tracer; +cfg_if! { + if #[cfg(target_arch = "s390x")] { + mod ccw; + } +} + const NAME: &str = "kata-agent"; lazy_static! { From 4c527d00c7b747b8ffc66b8126d57f3fdaa61206 Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Tue, 8 Mar 2022 18:50:58 +0100 Subject: [PATCH 3/6] agent: Rename VFIO handling to VFIO PCI handling e.g., split_vfio_option is PCI-specific and should instead be named split_vfio_pci_option. This mutually affects the runtime, most notably how the labels are named for the agent. Signed-off-by: Jakob Naucke --- src/agent/src/device.rs | 32 ++++++++++++--------- src/runtime/pkg/device/config/config.go | 8 +++--- src/runtime/pkg/device/drivers/utils.go | 4 +-- src/runtime/pkg/device/drivers/vfio.go | 4 +-- src/runtime/pkg/device/drivers/vfio_test.go | 4 +-- src/runtime/virtcontainers/kata_agent.go | 14 ++++----- src/runtime/virtcontainers/qemu.go | 8 +++--- 7 files changed, 40 insertions(+), 34 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 620163ee00..23bc154a78 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -45,11 +45,11 @@ pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm"; pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral"; pub const DRIVER_LOCAL_TYPE: &str = "local"; pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind"; -// VFIO device to be bound to a guest kernel driver -pub const DRIVER_VFIO_GK_TYPE: &str = "vfio-gk"; -// VFIO device to be bound to vfio-pci and made available inside the +// VFIO PCI device to be bound to a guest kernel driver +pub const DRIVER_VFIO_PCI_GK_TYPE: &str = "vfio-pci-gk"; +// VFIO PCI device to be bound to vfio-pci and made available inside the // container as a VFIO device node -pub const DRIVER_VFIO_TYPE: &str = "vfio"; +pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci"; pub const DRIVER_OVERLAYFS_TYPE: &str = "overlayfs"; pub const FS_TYPE_HUGETLB: &str = "hugetlbfs"; @@ -704,7 +704,7 @@ async fn virtio_nvdimm_device_handler( Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into()) } -fn split_vfio_option(opt: &str) -> Option<(&str, &str)> { +fn split_vfio_pci_option(opt: &str) -> Option<(&str, &str)> { let mut tokens = opt.split('='); let hostbdf = tokens.next()?; let path = tokens.next()?; @@ -719,14 +719,18 @@ fn split_vfio_option(opt: &str) -> Option<(&str, &str)> { // Each option should have the form "DDDD:BB:DD.F=" // DDDD:BB:DD.F is the device's PCI address in the host // is a PCI path to the device in the guest (see pci.rs) -async fn vfio_device_handler(device: &Device, sandbox: &Arc>) -> Result { - let vfio_in_guest = device.field_type != DRIVER_VFIO_GK_TYPE; +#[instrument] +async fn vfio_pci_device_handler( + device: &Device, + sandbox: &Arc>, +) -> Result { + let vfio_in_guest = device.field_type != DRIVER_VFIO_PCI_GK_TYPE; let mut pci_fixups = Vec::<(pci::Address, pci::Address)>::new(); let mut group = None; for opt in device.options.iter() { let (host, pcipath) = - split_vfio_option(opt).ok_or_else(|| anyhow!("Malformed VFIO option {:?}", opt))?; + split_vfio_pci_option(opt).ok_or_else(|| anyhow!("Malformed VFIO option {:?}", opt))?; let host = pci::Address::from_str(host).context("Bad host PCI address in VFIO option {:?}")?; let pcipath = pci::Path::from_str(pcipath)?; @@ -833,7 +837,9 @@ async fn add_device(device: &Device, sandbox: &Arc>) -> Result virtiommio_blk_device_handler(device, sandbox).await, DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, sandbox).await, DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, sandbox).await, - DRIVER_VFIO_GK_TYPE | DRIVER_VFIO_TYPE => vfio_device_handler(device, sandbox).await, + DRIVER_VFIO_PCI_GK_TYPE | DRIVER_VFIO_PCI_TYPE => { + vfio_pci_device_handler(device, sandbox).await + } _ => Err(anyhow!("Unknown device type {}", device.field_type)), } } @@ -1492,13 +1498,13 @@ mod tests { } #[test] - fn test_split_vfio_option() { + fn test_split_vfio_pci_option() { assert_eq!( - split_vfio_option("0000:01:00.0=02/01"), + split_vfio_pci_option("0000:01:00.0=02/01"), Some(("0000:01:00.0", "02/01")) ); - assert_eq!(split_vfio_option("0000:01:00.0=02/01=rubbish"), None); - assert_eq!(split_vfio_option("0000:01:00.0"), None); + assert_eq!(split_vfio_pci_option("0000:01:00.0=02/01=rubbish"), None); + assert_eq!(split_vfio_pci_option("0000:01:00.0"), None); } #[test] diff --git a/src/runtime/pkg/device/config/config.go b/src/runtime/pkg/device/config/config.go index 35af6afec1..a44723dac6 100644 --- a/src/runtime/pkg/device/config/config.go +++ b/src/runtime/pkg/device/config/config.go @@ -258,11 +258,11 @@ const ( // VFIODeviceErrorType is the error type of VFIO device VFIODeviceErrorType VFIODeviceType = iota - // VFIODeviceNormalType is a normal VFIO device type - VFIODeviceNormalType + // VFIOPCIDeviceNormalType is a normal VFIO PCI device type + VFIOPCIDeviceNormalType - // VFIODeviceMediatedType is a VFIO mediated device type - VFIODeviceMediatedType + // VFIOPCIDeviceMediatedType is a VFIO PCI mediated device type + VFIOPCIDeviceMediatedType ) // VFIODev represents a VFIO drive used for hotplugging diff --git a/src/runtime/pkg/device/drivers/utils.go b/src/runtime/pkg/device/drivers/utils.go index 25f021eda0..abdf5d816b 100644 --- a/src/runtime/pkg/device/drivers/utils.go +++ b/src/runtime/pkg/device/drivers/utils.go @@ -94,12 +94,12 @@ func GetVFIODeviceType(deviceFileName string) config.VFIODeviceType { tokens := strings.Split(deviceFileName, ":") vfioDeviceType := config.VFIODeviceErrorType if len(tokens) == 3 { - vfioDeviceType = config.VFIODeviceNormalType + vfioDeviceType = config.VFIOPCIDeviceNormalType } else { //For example, 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 tokens = strings.Split(deviceFileName, "-") if len(tokens) == 5 { - vfioDeviceType = config.VFIODeviceMediatedType + vfioDeviceType = config.VFIOPCIDeviceMediatedType } } return vfioDeviceType diff --git a/src/runtime/pkg/device/drivers/vfio.go b/src/runtime/pkg/device/drivers/vfio.go index 58658b0b88..86033aa736 100644 --- a/src/runtime/pkg/device/drivers/vfio.go +++ b/src/runtime/pkg/device/drivers/vfio.go @@ -207,12 +207,12 @@ func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceS vfioDeviceType = GetVFIODeviceType(deviceFileName) switch vfioDeviceType { - case config.VFIODeviceNormalType: + case config.VFIOPCIDeviceNormalType: // Get bdf of device eg. 0000:00:1c.0 deviceBDF = getBDF(deviceFileName) // Get sysfs path used by cloud-hypervisor deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName) - case config.VFIODeviceMediatedType: + case config.VFIOPCIDeviceMediatedType: // Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4 sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName) deviceSysfsDev, err = getSysfsDev(sysfsDevStr) diff --git a/src/runtime/pkg/device/drivers/vfio_test.go b/src/runtime/pkg/device/drivers/vfio_test.go index 3c25a64c3c..8ee008e3b7 100644 --- a/src/runtime/pkg/device/drivers/vfio_test.go +++ b/src/runtime/pkg/device/drivers/vfio_test.go @@ -32,9 +32,9 @@ func TestGetVFIODetails(t *testing.T) { deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(d.deviceStr, "") switch vfioDeviceType { - case config.VFIODeviceNormalType: + case config.VFIOPCIDeviceNormalType: assert.Equal(t, d.expectedStr, deviceBDF) - case config.VFIODeviceMediatedType: + case config.VFIOPCIDeviceMediatedType: assert.Equal(t, d.expectedStr, deviceSysfsDev) default: assert.NotNil(t, err) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 66784404f3..98411aa458 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1117,10 +1117,10 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c * groupNum := filepath.Base(dev.ContainerPath) - // Each /dev/vfio/NN device represents a VFIO group, which - // could include several PCI devices. So we give group - // information in the main structure, then list each - // individual PCI device in the Options array. + // For VFIO-PCI, each /dev/vfio/NN device represents a VFIO group, + // which could include several PCI devices. So we give group + // information in the main structure, then list each individual PCI + // device in the Options array. // // Each option is formatted as "DDDD:BB:DD.F=" // DDDD:BB:DD.F is the device's PCI address on the @@ -1128,9 +1128,9 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c * // (see qomGetPciPath() for details). kataDevice := &grpc.Device{ ContainerPath: dev.ContainerPath, - Type: kataVfioDevType, + Type: kataVfioPciDevType, Id: groupNum, - Options: make([]string, len(devList)), + Options: nil, } // We always pass the device information to the agent, since @@ -1138,7 +1138,7 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c * // on the vfio_mode, we need to use a different device type so // the agent can handle it properly if c.sandbox.config.VfioMode == config.VFIOModeGuestKernel { - kataDevice.Type = kataVfioGuestKernelDevType + kataDevice.Type = kataVfioPciGuestKernelDevType } for i, pciDev := range devList { diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 0d4fa3e85b..17b3edbe78 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1743,9 +1743,9 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op } switch device.Type { - case config.VFIODeviceNormalType: + case config.VFIOPCIDeviceNormalType: err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile) - case config.VFIODeviceMediatedType: + case config.VFIOPCIDeviceMediatedType: if utils.IsAPVFIOMediatedDevice(device.SysfsDev) { err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev) } else { @@ -1767,9 +1767,9 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op }() switch device.Type { - case config.VFIODeviceNormalType: + case config.VFIOPCIDeviceNormalType: err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, addr, bridge.ID, romFile) - case config.VFIODeviceMediatedType: + case config.VFIOPCIDeviceMediatedType: if utils.IsAPVFIOMediatedDevice(device.SysfsDev) { err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev) } else { From b546eca26f0ef8e35288afe6cab0021fdc75ed46 Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Tue, 8 Mar 2022 18:51:08 +0100 Subject: [PATCH 4/6] runtime: Generalize VFIO devices Generalize VFIO devices to allow for adding AP in the next patch. The logic for VFIOPciDeviceMediatedType() has been changed and IsAPVFIOMediatedDevice() has been removed. The rationale for the revomal is: - VFIODeviceMediatedType is divided into 2 subtypes for AP and PCI - Logic of checking a subtype of mediated device is included in GetVFIODeviceType() - VFIOPciDeviceMediatedType() can simply fulfill the device addition based on a type categorized by GetVFIODeviceType() Signed-off-by: Jakob Naucke --- src/runtime/pkg/device/config/config.go | 22 +++++- src/runtime/pkg/device/drivers/vfio.go | 50 ++++++++---- src/runtime/virtcontainers/clh.go | 11 ++- src/runtime/virtcontainers/clh_test.go | 2 +- src/runtime/virtcontainers/kata_agent.go | 6 +- src/runtime/virtcontainers/qemu.go | 79 +++++++++++-------- src/runtime/virtcontainers/qemu_arch_base.go | 11 +-- .../virtcontainers/qemu_arch_base_test.go | 4 +- src/runtime/virtcontainers/sandbox.go | 16 +++- .../virtcontainers/utils/utils_linux.go | 3 +- .../virtcontainers/utils/utils_linux_test.go | 16 ---- 11 files changed, 135 insertions(+), 85 deletions(-) diff --git a/src/runtime/pkg/device/config/config.go b/src/runtime/pkg/device/config/config.go index a44723dac6..6bcbed32c3 100644 --- a/src/runtime/pkg/device/config/config.go +++ b/src/runtime/pkg/device/config/config.go @@ -265,8 +265,14 @@ const ( VFIOPCIDeviceMediatedType ) -// VFIODev represents a VFIO drive used for hotplugging -type VFIODev struct { +type VFIODev interface { + GetID() *string + GetType() VFIODeviceType + GetSysfsDev() *string +} + +// VFIOPCIDev represents a VFIO PCI device used for hotplugging +type VFIOPCIDev struct { // ID is used to identify this drive in the hypervisor options. ID string @@ -298,6 +304,18 @@ type VFIODev struct { IsPCIe bool } +func (d VFIOPCIDev) GetID() *string { + return &d.ID +} + +func (d VFIOPCIDev) GetType() VFIODeviceType { + return d.Type +} + +func (d VFIOPCIDev) GetSysfsDev() *string { + return &d.SysfsDev +} + // RNGDev represents a random number generator device type RNGDev struct { // ID is used to identify the device in the hypervisor options. diff --git a/src/runtime/pkg/device/drivers/vfio.go b/src/runtime/pkg/device/drivers/vfio.go index 86033aa736..a07a7df734 100644 --- a/src/runtime/pkg/device/drivers/vfio.go +++ b/src/runtime/pkg/device/drivers/vfio.go @@ -85,19 +85,29 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece if err != nil { return err } - vfio := &config.VFIODev{ - ID: utils.MakeNameID("vfio", device.DeviceInfo.ID+strconv.Itoa(i), maxDevIDSize), - Type: vfioDeviceType, - BDF: deviceBDF, - SysfsDev: deviceSysfsDev, - IsPCIe: isPCIeDevice(deviceBDF), - Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass), - } - device.VfioDevs = append(device.VfioDevs, vfio) - if vfio.IsPCIe { - vfio.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs)) - AllPCIeDevs[vfio.BDF] = true + id := utils.MakeNameID("vfio", device.DeviceInfo.ID+strconv.Itoa(i), maxDevIDSize) + + var vfio config.VFIODev + + switch vfioDeviceType { + case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: + isPCIe := isPCIeDevice(deviceBDF) + // Do not directly assign to `vfio` -- need to access field still + vfioPCI := config.VFIOPCIDev{ + ID: id, + Type: vfioDeviceType, + BDF: deviceBDF, + SysfsDev: deviceSysfsDev, + IsPCIe: isPCIe, + Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass), + } + if isPCIe { + vfioPCI.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs)) + AllPCIeDevs[deviceBDF] = true + } + vfio = vfioPCI } + device.VfioDevs = append(device.VfioDevs, &vfio) } coldPlug := device.DeviceInfo.ColdPlug @@ -180,7 +190,16 @@ func (device *VFIODevice) Save() config.DeviceState { devs := device.VfioDevs for _, dev := range devs { if dev != nil { - ds.VFIODevs = append(ds.VFIODevs, dev) + bdf := "" + if pciDev, ok := (*dev).(config.VFIOPCIDev); ok { + bdf = pciDev.BDF + } + ds.VFIODevs = append(ds.VFIODevs, &persistapi.VFIODev{ + ID: *(*dev).GetID(), + Type: uint32((*dev).GetType()), + BDF: bdf, + SysfsDev: *(*dev).GetSysfsDev(), + }) } } return ds @@ -192,12 +211,13 @@ func (device *VFIODevice) Load(ds config.DeviceState) { device.GenericDevice.Load(ds) for _, dev := range ds.VFIODevs { - device.VfioDevs = append(device.VfioDevs, &config.VFIODev{ + var vfioDev config.VFIODev = config.VFIOPCIDev{ ID: dev.ID, Type: config.VFIODeviceType(dev.Type), BDF: dev.BDF, SysfsDev: dev.SysfsDev, - }) + } + device.VfioDevs = append(device.VfioDevs, &vfioDev) } } diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 71bd931dce..d9be15082a 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -856,6 +856,11 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second) defer cancel() + pciDevice, ok := (*device).(config.VFIOPCIDev) + if !ok { + return fmt.Errorf("VFIO device %+v is not PCI, only PCI is supported in Cloud Hypervisor", device) + } + // Create the clh device config via the constructor to ensure default values are properly assigned clhDevice := *chclient.NewDeviceConfig(device.SysfsDev) pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice) @@ -879,7 +884,9 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf) } - device.GuestPciPath, err = types.PciPathFromString(tokens[0]) + guestPciPath, err := vcTypes.PciPathFromString(tokens[0]) + pciDevice.GuestPciPath = guestPciPath + *device = pciDevice return err } @@ -923,7 +930,7 @@ func (clh *cloudHypervisor) HotplugRemoveDevice(ctx context.Context, devInfo int case BlockDev: deviceID = clhDriveIndexToID(devInfo.(*config.BlockDrive).Index) case VfioDev: - deviceID = devInfo.(*config.VFIODev).ID + deviceID = *devInfo.(config.VFIODev).GetID() default: clh.Logger().WithFields(log.Fields{"devInfo": devInfo, "deviceType": devType}).Error("HotplugRemoveDevice: unsupported device") diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 20e6935d85..a1c3c4b643 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -624,7 +624,7 @@ func TestCloudHypervisorHotplugRemoveDevice(t *testing.T) { _, err = clh.HotplugRemoveDevice(context.Background(), &config.BlockDrive{}, BlockDev) assert.NoError(err, "Hotplug remove block device expected no error") - _, err = clh.HotplugRemoveDevice(context.Background(), &config.VFIODev{}, VfioDev) + _, err = clh.HotplugRemoveDevice(context.Background(), &config.VFIOPCIDev{}, VfioDev) assert.NoError(err, "Hotplug remove vfio block device expected no error") _, err = clh.HotplugRemoveDevice(context.Background(), nil, NetDev) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 98411aa458..60010a14a6 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1141,8 +1141,10 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c * kataDevice.Type = kataVfioPciGuestKernelDevType } - for i, pciDev := range devList { - kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDev.BDF, pciDev.GuestPciPath) + kataDevice.Options = make([]string, len(devList)) + for i, device := range devList { + pciDevice := (*device).(config.VFIOPCIDev) + kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDevice.BDF, pciDevice.GuestPciPath) } return kataDevice diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 17b3edbe78..6354b9d8a4 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1713,7 +1713,7 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op return err } - devID := device.ID + devID := *(*device).GetID() machineType := q.HypervisorConfig().HypervisorMachineType if op == AddDevice { @@ -1730,29 +1730,29 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op // for pc machine type instead of bridge. This is useful for devices that require // a large PCI BAR which is a currently a limitation with PCI bridges. if q.state.HotplugVFIOOnRootBus { - - // In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port. - switch machineType { - case QemuQ35: - if device.IsPCIe && q.state.PCIeRootPort <= 0 { - q.Logger().WithField("dev-id", device.ID).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35") - device.Bus = "" + switch (*device).GetType() { + case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: + // In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port. + pciDevice, ok := (*device).(config.VFIOPCIDev) + if !ok { + return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device) } - default: - device.Bus = "" - } + switch machineType { + case QemuQ35: + if pciDevice.IsPCIe && q.state.PCIeRootPort <= 0 { + q.Logger().WithField("dev-id", (*device).GetID()).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35") + pciDevice.Bus = "" + } + default: + pciDevice.Bus = "" + } + *device = pciDevice - switch device.Type { - case config.VFIOPCIDeviceNormalType: - err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile) - case config.VFIOPCIDeviceMediatedType: - if utils.IsAPVFIOMediatedDevice(device.SysfsDev) { - err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev) + if pciDevice.Type == config.VFIOPCIDeviceNormalType { + err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, pciDevice.Bus, romFile) } else { - err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile) + err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), "", pciDevice.Bus, romFile) } - default: - return fmt.Errorf("Incorrect VFIO device type found") } } else { addr, bridge, err := q.arch.addDeviceToBridge(ctx, devID, types.PCI) @@ -1766,15 +1766,15 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op } }() - switch device.Type { + switch (*device).GetType() { case config.VFIOPCIDeviceNormalType: - err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, addr, bridge.ID, romFile) - case config.VFIOPCIDeviceMediatedType: - if utils.IsAPVFIOMediatedDevice(device.SysfsDev) { - err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev) - } else { - err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, addr, bridge.ID, romFile) + pciDevice, ok := (*device).(config.VFIOPCIDev) + if !ok { + return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device) } + err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, addr, bridge.ID, romFile) + case config.VFIOPCIDeviceMediatedType: + err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), addr, bridge.ID, romFile) default: return fmt.Errorf("Incorrect VFIO device type found") } @@ -1782,13 +1782,24 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op if err != nil { return err } - // XXX: Depending on whether we're doing root port or - // bridge hotplug, and how the bridge is set up in - // other parts of the code, we may or may not already - // have information about the slot number of the - // bridge and or the device. For simplicity, just - // query both of them back from qemu - device.GuestPciPath, err = q.qomGetPciPath(devID) + + switch (*device).GetType() { + case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: + pciDevice, ok := (*device).(config.VFIOPCIDev) + if !ok { + return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device) + } + // XXX: Depending on whether we're doing root port or + // bridge hotplug, and how the bridge is set up in + // other parts of the code, we may or may not already + // have information about the slot number of the + // bridge and or the device. For simplicity, just + // query both of them back from qemu + guestPciPath, err := q.qomGetPciPath(devID) + pciDevice.GuestPciPath = guestPciPath + *device = pciDevice + return err + } return err } else { q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device") diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index 5a03b659f6..9aff1e76c2 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -675,16 +675,17 @@ func (q *qemuArchBase) appendVhostUserDevice(ctx context.Context, devices []govm } func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device { - if vfioDev.BDF == "" { + pciDevice := vfioDev.(config.VFIOPCIDev) + if pciDevice.BDF == "" { return devices } devices = append(devices, govmmQemu.VFIODevice{ - BDF: vfioDev.BDF, - VendorID: vfioDev.VendorID, - DeviceID: vfioDev.DeviceID, - Bus: vfioDev.Bus, + BDF: pciDevice.BDF, + VendorID: pciDevice.VendorID, + DeviceID: pciDevice.DeviceID, + Bus: pciDevice.Bus, }, ) diff --git a/src/runtime/virtcontainers/qemu_arch_base_test.go b/src/runtime/virtcontainers/qemu_arch_base_test.go index 37611bb5bb..51c11bd91d 100644 --- a/src/runtime/virtcontainers/qemu_arch_base_test.go +++ b/src/runtime/virtcontainers/qemu_arch_base_test.go @@ -463,7 +463,7 @@ func TestQemuArchBaseAppendVFIODevice(t *testing.T) { }, } - vfDevice := config.VFIODev{ + vfDevice := config.VFIOPCIDev{ BDF: bdf, } @@ -483,7 +483,7 @@ func TestQemuArchBaseAppendVFIODeviceWithVendorDeviceID(t *testing.T) { }, } - vfDevice := config.VFIODev{ + vfDevice := config.VFIOPCIDev{ BDF: bdf, VendorID: vendorID, DeviceID: deviceID, diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index dcc93048f8..e7c52fcc22 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -1856,11 +1856,15 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy // adding a group of VFIO devices for _, dev := range vfioDevices { if _, err := s.hypervisor.HotplugAddDevice(ctx, dev, VfioDev); err != nil { + bdf := "" + if pciDevice, ok := (*dev).(config.VFIOPCIDev); ok { + bdf = pciDevice.BDF + } s.Logger(). WithFields(logrus.Fields{ "sandbox": s.id, - "vfio-device-ID": dev.ID, - "vfio-device-BDF": dev.BDF, + "vfio-device-ID": (*dev).GetID(), + "vfio-device-BDF": bdf, }).WithError(err).Error("failed to hotplug VFIO device") return err } @@ -1909,11 +1913,15 @@ func (s *Sandbox) HotplugRemoveDevice(ctx context.Context, device api.Device, de // remove a group of VFIO devices for _, dev := range vfioDevices { if _, err := s.hypervisor.HotplugRemoveDevice(ctx, dev, VfioDev); err != nil { + bdf := "" + if pciDevice, ok := (*dev).(config.VFIOPCIDev); ok { + bdf = pciDevice.BDF + } s.Logger().WithError(err). WithFields(logrus.Fields{ "sandbox": s.id, - "vfio-device-ID": dev.ID, - "vfio-device-BDF": dev.BDF, + "vfio-device-ID": (*dev).GetID(), + "vfio-device-BDF": bdf, }).Error("failed to hot unplug VFIO device") return err } diff --git a/src/runtime/virtcontainers/utils/utils_linux.go b/src/runtime/virtcontainers/utils/utils_linux.go index 40c5c360ea..0b46d2572e 100644 --- a/src/runtime/virtcontainers/utils/utils_linux.go +++ b/src/runtime/virtcontainers/utils/utils_linux.go @@ -89,8 +89,7 @@ func FindContextID() (*os.File, uint64, error) { const ( procMountsFile = "/proc/mounts" - fieldsPerLine = 6 - vfioAPSysfsDir = "vfio_ap" + fieldsPerLine = 6 ) const ( diff --git a/src/runtime/virtcontainers/utils/utils_linux_test.go b/src/runtime/virtcontainers/utils/utils_linux_test.go index dbf9fde38b..c8e213c2c7 100644 --- a/src/runtime/virtcontainers/utils/utils_linux_test.go +++ b/src/runtime/virtcontainers/utils/utils_linux_test.go @@ -63,19 +63,3 @@ func TestGetDevicePathAndFsTypeOptionsSuccessful(t *testing.T) { assert.Equal(fstype, fstypeOut) assert.Equal(fsOptions, optsOut) } - -func TestIsAPVFIOMediatedDeviceFalse(t *testing.T) { - assert := assert.New(t) - - // Should be false for a PCI device - isAPMdev := IsAPVFIOMediatedDevice("/sys/bus/pci/devices/0000:00:02.0/a297db4a-f4c2-11e6-90f6-d3b88d6c9525") - assert.False(isAPMdev) -} - -func TestIsAPVFIOMediatedDeviceTrue(t *testing.T) { - assert := assert.New(t) - - // Typical AP sysfsdev - isAPMdev := IsAPVFIOMediatedDevice("/sys/devices/vfio_ap/matrix/a297db4a-f4c2-11e6-90f6-d3b88d6c9525") - assert.True(isAPMdev) -} From f666f8e2df6bfee78c34d1bf449865bd9c1f4b8f Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Tue, 8 Mar 2022 18:51:08 +0100 Subject: [PATCH 5/6] agent: Add VFIO-AP device handling Initial VFIO-AP support (#578) was simple, but somewhat hacky; a different code path would be chosen for performing the hotplug, and agent-side device handling was bound to knowing the assigned queue numbers (APQNs) through some other means; plus the code for awaiting them was written for the Go agent and never released. This code also artificially increased the hotplug timeout to wait for the (relatively expensive, thus limited to 5 seconds at the quickest) AP rescan, which is impractical for e.g. common k8s timeouts. Since then, the general handling logic was improved (#1190), but it assumed PCI in several places. In the runtime, introduce and parse AP devices. Annotate them as such when passing to the agent, and include information about the associated APQNs. The agent awaits the passed APQNs through uevents and triggers a rescan directly. Fixes: #3678 Signed-off-by: Jakob Naucke --- src/agent/src/ap.rs | 79 ++++++++++++++++++ src/agent/src/device.rs | 89 +++++++++++++++++++++ src/agent/src/linux_abi.rs | 2 + src/agent/src/main.rs | 1 + src/runtime/pkg/device/config/config.go | 29 +++++++ src/runtime/pkg/device/drivers/utils.go | 49 +++++++++--- src/runtime/pkg/device/drivers/vfio.go | 67 ++++++++++------ src/runtime/pkg/device/drivers/vfio_test.go | 2 +- src/runtime/virtcontainers/clh.go | 4 +- src/runtime/virtcontainers/kata_agent.go | 83 ++++++++++--------- src/runtime/virtcontainers/qemu.go | 4 + 11 files changed, 335 insertions(+), 74 deletions(-) create mode 100644 src/agent/src/ap.rs diff --git a/src/agent/src/ap.rs b/src/agent/src/ap.rs new file mode 100644 index 0000000000..454ada9ff0 --- /dev/null +++ b/src/agent/src/ap.rs @@ -0,0 +1,79 @@ +// Copyright (c) IBM Corp. 2022 +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fmt; +use std::str::FromStr; + +use anyhow::{anyhow, Context}; + +// IBM Adjunct Processor (AP) is the bus used by IBM Crypto Express hardware security modules on +// IBM Z & LinuxONE (s390x) +// AP bus ID follow the format . [1, p. 476], where +// - is the adapter ID, i.e. the card and +// - is the adapter domain. +// [1] https://www.ibm.com/docs/en/linuxonibm/pdf/lku5dd05.pdf + +#[derive(Debug)] +pub struct Address { + pub adapter_id: u8, + pub adapter_domain: u16, +} + +impl Address { + pub fn new(adapter_id: u8, adapter_domain: u16) -> Address { + Address { + adapter_id, + adapter_domain, + } + } +} + +impl FromStr for Address { + type Err = anyhow::Error; + + fn from_str(s: &str) -> anyhow::Result { + let split: Vec<&str> = s.split('.').collect(); + if split.len() != 2 { + return Err(anyhow!( + "Wrong AP bus format. It needs to be in the form ., got {:?}", + s + )); + } + + let adapter_id = u8::from_str_radix(split[0], 16).context(format!( + "Wrong AP bus format. AP ID needs to be in the form , got {:?}", + split[0] + ))?; + let adapter_domain = u16::from_str_radix(split[1], 16).context(format!( + "Wrong AP bus format. AP domain needs to be in the form , got {:?}", + split[1] + ))?; + + Ok(Address::new(adapter_id, adapter_domain)) + } +} + +impl fmt::Display for Address { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{:02x}.{:04x}", self.adapter_id, self.adapter_domain) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_from_str() { + let device = Address::from_str("a.1").unwrap(); + assert_eq!(format!("{}", device), "0a.0001"); + + assert!(Address::from_str("").is_err()); + assert!(Address::from_str(".").is_err()); + assert!(Address::from_str("0.0.0").is_err()); + assert!(Address::from_str("0g.0000").is_err()); + assert!(Address::from_str("0a.10000").is_err()); + } +} diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 23bc154a78..b95e304fbd 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -50,11 +50,13 @@ pub const DRIVER_VFIO_PCI_GK_TYPE: &str = "vfio-pci-gk"; // VFIO PCI device to be bound to vfio-pci and made available inside the // container as a VFIO device node pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci"; +pub const DRIVER_VFIO_AP_TYPE: &str = "vfio-ap"; pub const DRIVER_OVERLAYFS_TYPE: &str = "overlayfs"; pub const FS_TYPE_HUGETLB: &str = "hugetlbfs"; cfg_if! { if #[cfg(target_arch = "s390x")] { + use crate::ap; use crate::ccw; } } @@ -406,6 +408,39 @@ async fn get_vfio_device_name(sandbox: &Arc>, grp: IommuGroup) -> Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname)) } +#[cfg(target_arch = "s390x")] +#[derive(Debug)] +struct ApMatcher { + syspath: String, +} + +#[cfg(target_arch = "s390x")] +impl ApMatcher { + fn new(address: ap::Address) -> ApMatcher { + ApMatcher { + syspath: format!( + "{}/card{:02x}/{}", + AP_ROOT_BUS_PATH, address.adapter_id, address + ), + } + } +} + +#[cfg(target_arch = "s390x")] +impl UeventMatcher for ApMatcher { + fn is_match(&self, uev: &Uevent) -> bool { + uev.action == "add" && uev.devpath == self.syspath + } +} + +#[cfg(target_arch = "s390x")] +#[instrument] +async fn wait_for_ap_device(sandbox: &Arc>, address: ap::Address) -> Result<()> { + let matcher = ApMatcher::new(address); + wait_for_uevent(sandbox, matcher).await?; + Ok(()) +} + /// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN) #[instrument] fn scan_scsi_bus(scsi_addr: &str) -> Result<()> { @@ -772,6 +807,28 @@ async fn vfio_pci_device_handler( }) } +// The VFIO AP (Adjunct Processor) device handler takes all the APQNs provided as device options +// and awaits them. It sets the minimum AP rescan time of 5 seconds and temporarily adds that +// amoutn to the hotplug timeout. +#[cfg(target_arch = "s390x")] +#[instrument] +async fn vfio_ap_device_handler( + device: &Device, + sandbox: &Arc>, +) -> Result { + // Force AP bus rescan + fs::write(AP_SCANS_PATH, "1")?; + for apqn in device.options.iter() { + wait_for_ap_device(sandbox, ap::Address::from_str(apqn)?).await?; + } + Ok(Default::default()) +} + +#[cfg(not(target_arch = "s390x"))] +async fn vfio_ap_device_handler(_: &Device, _: &Arc>) -> Result { + Err(anyhow!("AP is only supported on s390x")) +} + #[instrument] pub async fn add_devices( devices: &[Device], @@ -840,6 +897,7 @@ async fn add_device(device: &Device, sandbox: &Arc>) -> Result { vfio_pci_device_handler(device, sandbox).await } + DRIVER_VFIO_AP_TYPE => vfio_ap_device_handler(device, sandbox).await, _ => Err(anyhow!("Unknown device type {}", device.field_type)), } } @@ -1583,4 +1641,35 @@ mod tests { // Test dev2 assert!(pci_iommu_group(&syspci, dev2).is_err()); } + + #[cfg(target_arch = "s390x")] + #[tokio::test] + async fn test_vfio_ap_matcher() { + let subsystem = "ap"; + let card = "0a"; + let relpath = format!("{}.0001", card); + + let mut uev = Uevent::default(); + uev.action = U_EVENT_ACTION_ADD.to_string(); + uev.subsystem = subsystem.to_string(); + uev.devpath = format!("{}/card{}/{}", AP_ROOT_BUS_PATH, card, relpath); + + let ap_address = ap::Address::from_str(&relpath).unwrap(); + let matcher = ApMatcher::new(ap_address); + + assert!(matcher.is_match(&uev)); + + let mut uev_remove = uev.clone(); + uev_remove.action = U_EVENT_ACTION_REMOVE.to_string(); + assert!(!matcher.is_match(&uev_remove)); + + let mut uev_other_device = uev.clone(); + uev_other_device.devpath = format!( + "{}/card{}/{}", + AP_ROOT_BUS_PATH, + card, + format!("{}.0002", card) + ); + assert!(!matcher.is_match(&uev_other_device)); + } } diff --git a/src/agent/src/linux_abi.rs b/src/agent/src/linux_abi.rs index 9df398d615..042acd0aed 100644 --- a/src/agent/src/linux_abi.rs +++ b/src/agent/src/linux_abi.rs @@ -69,6 +69,8 @@ pub fn create_pci_root_bus_path() -> String { cfg_if! { if #[cfg(target_arch = "s390x")] { pub const CCW_ROOT_BUS_PATH: &str = "/devices/css0"; + pub const AP_ROOT_BUS_PATH: &str = "/devices/ap"; + pub const AP_SCANS_PATH: &str = "/sys/bus/ap/scans"; } } diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index 4ca89a9a15..085ea396dc 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -75,6 +75,7 @@ mod tracer; cfg_if! { if #[cfg(target_arch = "s390x")] { + mod ap; mod ccw; } } diff --git a/src/runtime/pkg/device/config/config.go b/src/runtime/pkg/device/config/config.go index 6bcbed32c3..dee6291ed9 100644 --- a/src/runtime/pkg/device/config/config.go +++ b/src/runtime/pkg/device/config/config.go @@ -263,6 +263,9 @@ const ( // VFIOPCIDeviceMediatedType is a VFIO PCI mediated device type VFIOPCIDeviceMediatedType + + // VFIOAPDeviceMediatedType is a VFIO AP mediated device type + VFIOAPDeviceMediatedType ) type VFIODev interface { @@ -316,6 +319,32 @@ func (d VFIOPCIDev) GetSysfsDev() *string { return &d.SysfsDev } +type VFIOAPDev struct { + // ID is used to identify this drive in the hypervisor options. + ID string + + // sysfsdev of VFIO mediated device + SysfsDev string + + // APDevices are the Adjunct Processor devices assigned to the mdev + APDevices []string + + // Type of VFIO device + Type VFIODeviceType +} + +func (d VFIOAPDev) GetID() *string { + return &d.ID +} + +func (d VFIOAPDev) GetType() VFIODeviceType { + return d.Type +} + +func (d VFIOAPDev) GetSysfsDev() *string { + return &d.SysfsDev +} + // RNGDev represents a random number generator device type RNGDev struct { // ID is used to identify the device in the hypervisor options. diff --git a/src/runtime/pkg/device/drivers/utils.go b/src/runtime/pkg/device/drivers/utils.go index abdf5d816b..79e00adbd1 100644 --- a/src/runtime/pkg/device/drivers/utils.go +++ b/src/runtime/pkg/device/drivers/utils.go @@ -89,18 +89,47 @@ func readPCIProperty(propertyPath string) (string, error) { return strings.Split(string(buf), "\n")[0], nil } -func GetVFIODeviceType(deviceFileName string) config.VFIODeviceType { +func GetVFIODeviceType(deviceFilePath string) (config.VFIODeviceType, error) { + deviceFileName := filepath.Base(deviceFilePath) + //For example, 0000:04:00.0 tokens := strings.Split(deviceFileName, ":") - vfioDeviceType := config.VFIODeviceErrorType if len(tokens) == 3 { - vfioDeviceType = config.VFIOPCIDeviceNormalType - } else { - //For example, 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 - tokens = strings.Split(deviceFileName, "-") - if len(tokens) == 5 { - vfioDeviceType = config.VFIOPCIDeviceMediatedType - } + return config.VFIOPCIDeviceNormalType, nil } - return vfioDeviceType + + //For example, 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 + tokens = strings.Split(deviceFileName, "-") + if len(tokens) != 5 { + return config.VFIODeviceErrorType, fmt.Errorf("Incorrect tokens found while parsing VFIO details: %s", deviceFileName) + } + + deviceSysfsDev, err := GetSysfsDev(deviceFilePath) + if err != nil { + return config.VFIODeviceErrorType, err + } + + if strings.HasPrefix(deviceSysfsDev, vfioAPSysfsDir) { + return config.VFIOAPDeviceMediatedType, nil + } + + return config.VFIOPCIDeviceMediatedType, nil +} + +// GetSysfsDev returns the sysfsdev of mediated device +// Expected input string format is absolute path to the sysfs dev node +// eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4 +func GetSysfsDev(sysfsDevStr string) (string, error) { + return filepath.EvalSymlinks(sysfsDevStr) +} + +// GetAPVFIODevices retrieves all APQNs associated with a mediated VFIO-AP +// device +func GetAPVFIODevices(sysfsdev string) ([]string, error) { + data, err := os.ReadFile(filepath.Join(sysfsdev, "matrix")) + if err != nil { + return []string{}, err + } + // Split by newlines, omitting final newline + return strings.Split(string(data[:len(data)-1]), "\n"), nil } diff --git a/src/runtime/pkg/device/drivers/vfio.go b/src/runtime/pkg/device/drivers/vfio.go index a07a7df734..3450e61d9c 100644 --- a/src/runtime/pkg/device/drivers/vfio.go +++ b/src/runtime/pkg/device/drivers/vfio.go @@ -30,6 +30,7 @@ const ( iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group" vfioDevPath = "/dev/vfio/%s" pcieRootPortPrefix = "rp" + vfioAPSysfsDir = "/sys/devices/vfio_ap" ) var ( @@ -106,6 +107,17 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece AllPCIeDevs[deviceBDF] = true } vfio = vfioPCI + case config.VFIOAPDeviceMediatedType: + devices, err := GetAPVFIODevices(deviceSysfsDev) + if err != nil { + return err + } + vfio = config.VFIOAPDev{ + ID: id, + SysfsDev: deviceSysfsDev, + Type: config.VFIOAPDeviceMediatedType, + APDevices: devices, + } } device.VfioDevs = append(device.VfioDevs, &vfio) } @@ -190,16 +202,7 @@ func (device *VFIODevice) Save() config.DeviceState { devs := device.VfioDevs for _, dev := range devs { if dev != nil { - bdf := "" - if pciDev, ok := (*dev).(config.VFIOPCIDev); ok { - bdf = pciDev.BDF - } - ds.VFIODevs = append(ds.VFIODevs, &persistapi.VFIODev{ - ID: *(*dev).GetID(), - Type: uint32((*dev).GetType()), - BDF: bdf, - SysfsDev: *(*dev).GetSysfsDev(), - }) + ds.VFIODevs = append(ds.VFIODevs, dev) } } return ds @@ -211,20 +214,38 @@ func (device *VFIODevice) Load(ds config.DeviceState) { device.GenericDevice.Load(ds) for _, dev := range ds.VFIODevs { - var vfioDev config.VFIODev = config.VFIOPCIDev{ - ID: dev.ID, - Type: config.VFIODeviceType(dev.Type), - BDF: dev.BDF, - SysfsDev: dev.SysfsDev, + var vfio config.VFIODev + + if (*device.VfioDevs[0]).GetType() == config.VFIOAPDeviceMediatedType { + vfio = config.VFIOAPDev{ + ID: *(*dev).GetID(), + SysfsDev: *(*dev).GetSysfsDev(), + } + } else { + bdf := "" + if pciDev, ok := (*dev).(config.VFIOPCIDev); ok { + bdf = pciDev.BDF + } + vfio = config.VFIOPCIDev{ + ID: *(*dev).GetID(), + Type: config.VFIODeviceType((*dev).GetType()), + BDF: bdf, + SysfsDev: *(*dev).GetSysfsDev(), + } } - device.VfioDevs = append(device.VfioDevs, &vfioDev) + + device.VfioDevs = append(device.VfioDevs, &vfio) } } // It should implement GetAttachCount() and DeviceID() as api.Device implementation // here it shares function from *GenericDevice so we don't need duplicate codes func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) { - vfioDeviceType = GetVFIODeviceType(deviceFileName) + sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName) + vfioDeviceType, err = GetVFIODeviceType(sysfsDevStr) + if err != nil { + return deviceBDF, deviceSysfsDev, vfioDeviceType, err + } switch vfioDeviceType { case config.VFIOPCIDeviceNormalType: @@ -235,8 +256,11 @@ func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceS case config.VFIOPCIDeviceMediatedType: // Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4 sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName) - deviceSysfsDev, err = getSysfsDev(sysfsDevStr) + deviceSysfsDev, err = GetSysfsDev(sysfsDevStr) deviceBDF = getBDF(getMediatedBDF(deviceSysfsDev)) + case config.VFIOAPDeviceMediatedType: + sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName) + deviceSysfsDev, err = GetSysfsDev(sysfsDevStr) default: err = fmt.Errorf("Incorrect tokens found while parsing vfio details: %s", deviceFileName) } @@ -264,13 +288,6 @@ func getBDF(deviceSysStr string) string { return tokens[1] } -// getSysfsDev returns the sysfsdev of mediated device -// Expected input string format is absolute path to the sysfs dev node -// eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4 -func getSysfsDev(sysfsDevStr string) (string, error) { - return filepath.EvalSymlinks(sysfsDevStr) -} - // BindDevicetoVFIO binds the device to vfio driver after unbinding from host. // Will be called by a network interface or a generic pcie device. func BindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) (string, error) { diff --git a/src/runtime/pkg/device/drivers/vfio_test.go b/src/runtime/pkg/device/drivers/vfio_test.go index 8ee008e3b7..6a1ab61eb6 100644 --- a/src/runtime/pkg/device/drivers/vfio_test.go +++ b/src/runtime/pkg/device/drivers/vfio_test.go @@ -34,7 +34,7 @@ func TestGetVFIODetails(t *testing.T) { switch vfioDeviceType { case config.VFIOPCIDeviceNormalType: assert.Equal(t, d.expectedStr, deviceBDF) - case config.VFIOPCIDeviceMediatedType: + case config.VFIOPCIDeviceMediatedType, config.VFIOAPDeviceMediatedType: assert.Equal(t, d.expectedStr, deviceSysfsDev) default: assert.NotNil(t, err) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index d9be15082a..8799d6a93d 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -867,7 +867,7 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { if err != nil { return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err)) } - clh.devicesIds[device.ID] = pciInfo.GetId() + clh.devicesIds[*(*device).GetID()] = pciInfo.GetId() // clh doesn't use bridges, so the PCI path is simply the slot // number of the device. This will break if clh starts using @@ -884,7 +884,7 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf) } - guestPciPath, err := vcTypes.PciPathFromString(tokens[0]) + guestPciPath, err := types.PciPathFromString(tokens[0]) pciDevice.GuestPciPath = guestPciPath *device = pciDevice diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 60010a14a6..9e5c8b34f4 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -77,38 +77,39 @@ const ( ) var ( - checkRequestTimeout = 30 * time.Second - defaultRequestTimeout = 60 * time.Second - errorMissingOCISpec = errors.New("Missing OCI specification") - defaultKataHostSharedDir = "/run/kata-containers/shared/sandboxes/" - defaultKataGuestSharedDir = "/run/kata-containers/shared/containers/" - defaultKataGuestNydusRootDir = "/run/kata-containers/shared/" - mountGuestTag = "kataShared" - defaultKataGuestSandboxDir = "/run/kata-containers/sandbox/" - type9pFs = "9p" - typeVirtioFS = "virtiofs" - typeOverlayFS = "overlay" - kata9pDevType = "9p" - kataMmioBlkDevType = "mmioblk" - kataBlkDevType = "blk" - kataBlkCCWDevType = "blk-ccw" - kataSCSIDevType = "scsi" - kataNvdimmDevType = "nvdimm" - kataVirtioFSDevType = "virtio-fs" - kataOverlayDevType = "overlayfs" - kataWatchableBindDevType = "watchable-bind" - kataVfioDevType = "vfio" // VFIO device to used as VFIO in the container - kataVfioGuestKernelDevType = "vfio-gk" // VFIO device for consumption by the guest kernel - sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} - sharedDirVirtioFSOptions = []string{} - sharedDirVirtioFSDaxOptions = "dax" - shmDir = "shm" - kataEphemeralDevType = "ephemeral" - defaultEphemeralPath = filepath.Join(defaultKataGuestSandboxDir, kataEphemeralDevType) - grpcMaxDataSize = int64(1024 * 1024) - localDirOptions = []string{"mode=0777"} - maxHostnameLen = 64 - GuestDNSFile = "/etc/resolv.conf" + checkRequestTimeout = 30 * time.Second + defaultRequestTimeout = 60 * time.Second + errorMissingOCISpec = errors.New("Missing OCI specification") + defaultKataHostSharedDir = "/run/kata-containers/shared/sandboxes/" + defaultKataGuestSharedDir = "/run/kata-containers/shared/containers/" + defaultKataGuestNydusRootDir = "/run/kata-containers/shared/" + mountGuestTag = "kataShared" + defaultKataGuestSandboxDir = "/run/kata-containers/sandbox/" + type9pFs = "9p" + typeVirtioFS = "virtiofs" + typeOverlayFS = "overlay" + kata9pDevType = "9p" + kataMmioBlkDevType = "mmioblk" + kataBlkDevType = "blk" + kataBlkCCWDevType = "blk-ccw" + kataSCSIDevType = "scsi" + kataNvdimmDevType = "nvdimm" + kataVirtioFSDevType = "virtio-fs" + kataOverlayDevType = "overlayfs" + kataWatchableBindDevType = "watchable-bind" + kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container + kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel + kataVfioApDevType = "vfio-ap" + sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} + sharedDirVirtioFSOptions = []string{} + sharedDirVirtioFSDaxOptions = "dax" + shmDir = "shm" + kataEphemeralDevType = "ephemeral" + defaultEphemeralPath = filepath.Join(defaultKataGuestSandboxDir, kataEphemeralDevType) + grpcMaxDataSize = int64(1024 * 1024) + localDirOptions = []string{"mode=0777"} + maxHostnameLen = 64 + GuestDNSFile = "/etc/resolv.conf" ) const ( @@ -1126,6 +1127,11 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c * // DDDD:BB:DD.F is the device's PCI address on the // *host*. is the device's PCI path in the guest // (see qomGetPciPath() for details). + // + // For VFIO-AP, one VFIO group could include several queue devices. They are + // identified by APQNs (Adjunct Processor Queue Numbers), which do not differ + // between host and guest. They are passed as options so they can be awaited + // by the agent. kataDevice := &grpc.Device{ ContainerPath: dev.ContainerPath, Type: kataVfioPciDevType, @@ -1141,10 +1147,15 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c * kataDevice.Type = kataVfioPciGuestKernelDevType } - kataDevice.Options = make([]string, len(devList)) - for i, device := range devList { - pciDevice := (*device).(config.VFIOPCIDev) - kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDevice.BDF, pciDevice.GuestPciPath) + if (*devList[0]).GetType() == config.VFIOAPDeviceMediatedType { + kataDevice.Type = kataVfioApDevType + kataDevice.Options = (*devList[0]).(config.VFIOAPDev).APDevices + } else { + kataDevice.Options = make([]string, len(devList)) + for i, device := range devList { + pciDevice := (*device).(config.VFIOPCIDev) + kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDevice.BDF, pciDevice.GuestPciPath) + } } return kataDevice diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 6354b9d8a4..16872c1265 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1753,6 +1753,8 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op } else { err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), "", pciDevice.Bus, romFile) } + case config.VFIOAPDeviceMediatedType: + err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev()) } } else { addr, bridge, err := q.arch.addDeviceToBridge(ctx, devID, types.PCI) @@ -1775,6 +1777,8 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, addr, bridge.ID, romFile) case config.VFIOPCIDeviceMediatedType: err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), addr, bridge.ID, romFile) + case config.VFIOAPDeviceMediatedType: + err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev()) default: return fmt.Errorf("Incorrect VFIO device type found") } From 96baa838952594c506805c7cc809d7645cf84f18 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 4 Jul 2022 17:41:00 +0200 Subject: [PATCH 6/6] agent: Bring in VFIO-AP device handling again This PR is a continuing work for (kata-containers#3679). This generalizes the previous VFIO device handling which only focuses on PCI to include AP (IBM Z specific). Fixes: kata-containers#3678 Signed-off-by: Hyounggyu Choi --- src/agent/src/ap.rs | 20 +++++++++--------- src/agent/src/device.rs | 6 +++--- src/runtime/pkg/device/drivers/vfio.go | 21 +++++++++++++------ src/runtime/virtcontainers/clh.go | 12 +++++------ .../virtcontainers/utils/utils_linux.go | 12 ----------- 5 files changed, 34 insertions(+), 37 deletions(-) diff --git a/src/agent/src/ap.rs b/src/agent/src/ap.rs index 454ada9ff0..202d330640 100644 --- a/src/agent/src/ap.rs +++ b/src/agent/src/ap.rs @@ -1,18 +1,18 @@ -// Copyright (c) IBM Corp. 2022 +// Copyright (c) IBM Corp. 2023 // // SPDX-License-Identifier: Apache-2.0 // - use std::fmt; use std::str::FromStr; use anyhow::{anyhow, Context}; -// IBM Adjunct Processor (AP) is the bus used by IBM Crypto Express hardware security modules on -// IBM Z & LinuxONE (s390x) -// AP bus ID follow the format . [1, p. 476], where -// - is the adapter ID, i.e. the card and -// - is the adapter domain. +// IBM Adjunct Processor (AP) is used for cryptographic operations +// by IBM Crypto Express hardware security modules on IBM zSystem & LinuxONE (s390x). +// In Linux, virtual cryptographic devices are called AP queues. +// The name of an AP queue respects a format . in hexadecimal notation [1, p.467]: +// - is an adapter ID +// - is an adapter domain ID // [1] https://www.ibm.com/docs/en/linuxonibm/pdf/lku5dd05.pdf #[derive(Debug)] @@ -37,17 +37,17 @@ impl FromStr for Address { let split: Vec<&str> = s.split('.').collect(); if split.len() != 2 { return Err(anyhow!( - "Wrong AP bus format. It needs to be in the form ., got {:?}", + "Wrong AP bus format. It needs to be in the form . (e.g. 0a.003f), got {:?}", s )); } let adapter_id = u8::from_str_radix(split[0], 16).context(format!( - "Wrong AP bus format. AP ID needs to be in the form , got {:?}", + "Wrong AP bus format. AP ID needs to be in the form (e.g. 0a), got {:?}", split[0] ))?; let adapter_domain = u16::from_str_radix(split[1], 16).context(format!( - "Wrong AP bus format. AP domain needs to be in the form , got {:?}", + "Wrong AP bus format. AP domain needs to be in the form (e.g. 003f), got {:?}", split[1] ))?; diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index b95e304fbd..535a729f60 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -764,8 +764,8 @@ async fn vfio_pci_device_handler( let mut group = None; for opt in device.options.iter() { - let (host, pcipath) = - split_vfio_pci_option(opt).ok_or_else(|| anyhow!("Malformed VFIO option {:?}", opt))?; + let (host, pcipath) = split_vfio_pci_option(opt) + .ok_or_else(|| anyhow!("Malformed VFIO PCI option {:?}", opt))?; let host = pci::Address::from_str(host).context("Bad host PCI address in VFIO option {:?}")?; let pcipath = pci::Path::from_str(pcipath)?; @@ -809,7 +809,7 @@ async fn vfio_pci_device_handler( // The VFIO AP (Adjunct Processor) device handler takes all the APQNs provided as device options // and awaits them. It sets the minimum AP rescan time of 5 seconds and temporarily adds that -// amoutn to the hotplug timeout. +// amount to the hotplug timeout. #[cfg(target_arch = "s390x")] #[instrument] async fn vfio_ap_device_handler( diff --git a/src/runtime/pkg/device/drivers/vfio.go b/src/runtime/pkg/device/drivers/vfio.go index 3450e61d9c..94139aaa2f 100644 --- a/src/runtime/pkg/device/drivers/vfio.go +++ b/src/runtime/pkg/device/drivers/vfio.go @@ -118,6 +118,8 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece Type: config.VFIOAPDeviceMediatedType, APDevices: devices, } + default: + return fmt.Errorf("Failed to append device: VFIO device type unrecognized") } device.VfioDevs = append(device.VfioDevs, &vfio) } @@ -216,12 +218,9 @@ func (device *VFIODevice) Load(ds config.DeviceState) { for _, dev := range ds.VFIODevs { var vfio config.VFIODev - if (*device.VfioDevs[0]).GetType() == config.VFIOAPDeviceMediatedType { - vfio = config.VFIOAPDev{ - ID: *(*dev).GetID(), - SysfsDev: *(*dev).GetSysfsDev(), - } - } else { + vfioDeviceType := (*device.VfioDevs[0]).GetType() + switch vfioDeviceType { + case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: bdf := "" if pciDev, ok := (*dev).(config.VFIOPCIDev); ok { bdf = pciDev.BDF @@ -232,6 +231,16 @@ func (device *VFIODevice) Load(ds config.DeviceState) { BDF: bdf, SysfsDev: *(*dev).GetSysfsDev(), } + case config.VFIOAPDeviceMediatedType: + vfio = config.VFIOAPDev{ + ID: *(*dev).GetID(), + SysfsDev: *(*dev).GetSysfsDev(), + } + default: + deviceLogger().WithError( + fmt.Errorf("VFIO device type unrecognized"), + ).Error("Failed to append device") + return } device.VfioDevs = append(device.VfioDevs, &vfio) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 8799d6a93d..1f493772df 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -856,13 +856,8 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second) defer cancel() - pciDevice, ok := (*device).(config.VFIOPCIDev) - if !ok { - return fmt.Errorf("VFIO device %+v is not PCI, only PCI is supported in Cloud Hypervisor", device) - } - // Create the clh device config via the constructor to ensure default values are properly assigned - clhDevice := *chclient.NewDeviceConfig(device.SysfsDev) + clhDevice := *chclient.NewDeviceConfig(*(*device).GetSysfsDev()) pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice) if err != nil { return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err)) @@ -885,6 +880,11 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { } guestPciPath, err := types.PciPathFromString(tokens[0]) + + pciDevice, ok := (*device).(config.VFIOPCIDev) + if !ok { + return fmt.Errorf("VFIO device %+v is not PCI, only PCI is supported in Cloud Hypervisor", device) + } pciDevice.GuestPciPath = guestPciPath *device = pciDevice diff --git a/src/runtime/virtcontainers/utils/utils_linux.go b/src/runtime/virtcontainers/utils/utils_linux.go index 0b46d2572e..a31b8d3511 100644 --- a/src/runtime/virtcontainers/utils/utils_linux.go +++ b/src/runtime/virtcontainers/utils/utils_linux.go @@ -141,18 +141,6 @@ func GetDevicePathAndFsTypeOptions(mountPoint string) (devicePath, fsType string } } -// IsAPVFIOMediatedDevice decides whether a device is a VFIO-AP device -// by checking for the existence of "vfio_ap" in the path -func IsAPVFIOMediatedDevice(sysfsdev string) bool { - split := strings.Split(sysfsdev, string(os.PathSeparator)) - for _, el := range split { - if el == vfioAPSysfsDir { - return true - } - } - return false -} - func waitProcessUsingPidfd(pid int, timeoutSecs uint, logger *logrus.Entry) (bool, error) { pidfd, err := unix.PidfdOpen(pid, 0)