From 07f104085a6b9f58b02d965021deae3c51f51eb3 Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Tue, 27 Aug 2024 06:13:37 +0800 Subject: [PATCH] agent: Move vfio device code to vfio_device_handler Move vfio device code to vfio_device_handler to simplify the code. Signed-off-by: ChengyuZhu6 --- src/agent/src/device.rs | 279 ------------------ src/agent/src/device/vfio_device_handler.rs | 305 ++++++++++++++++++++ 2 files changed, 305 insertions(+), 279 deletions(-) create mode 100644 src/agent/src/device/vfio_device_handler.rs diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 37fd65c061..f929c255be 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -28,8 +28,6 @@ use oci_spec::runtime as oci; use protocols::agent::Device; use tracing::instrument; -use kata_types::device::{DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_PCI_GK_TYPE, DRIVER_VFIO_PCI_TYPE}; - // Convenience function to obtain the scope logger. fn sl() -> slog::Logger { slog_scope::logger().new(o!("subsystem" => "device")) @@ -37,95 +35,12 @@ fn sl() -> slog::Logger { const BLOCK: &str = "block"; -cfg_if! { - if #[cfg(target_arch = "s390x")] { - use crate::ap; - } -} - #[instrument] pub fn online_device(path: &str) -> Result<()> { fs::write(path, "1")?; Ok(()) } -// Force a given PCI device to bind to the given driver, does -// basically the same thing as -// driverctl set-override -#[instrument] -pub fn pci_driver_override(syspci: T, dev: pci::Address, drv: U) -> Result<()> -where - T: AsRef + std::fmt::Debug, - U: AsRef + std::fmt::Debug, -{ - let syspci = Path::new(&syspci); - let drv = drv.as_ref(); - info!(sl(), "rebind_pci_driver: {} => {:?}", dev, drv); - - let devpath = syspci.join("devices").join(dev.to_string()); - let overridepath = &devpath.join("driver_override"); - - fs::write(overridepath, drv.as_bytes())?; - - let drvpath = &devpath.join("driver"); - let need_unbind = match fs::read_link(drvpath) { - Ok(d) if d.file_name() == Some(drv) => return Ok(()), // Nothing to do - Err(e) if e.kind() == std::io::ErrorKind::NotFound => false, // No current driver - Err(e) => return Err(anyhow!("Error checking driver on {}: {}", dev, e)), - Ok(_) => true, // Current driver needs unbinding - }; - if need_unbind { - let unbindpath = &drvpath.join("unbind"); - fs::write(unbindpath, dev.to_string())?; - } - let probepath = syspci.join("drivers_probe"); - fs::write(probepath, dev.to_string())?; - Ok(()) -} - -// Represents an IOMMU group -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct IommuGroup(u32); - -impl fmt::Display for IommuGroup { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - write!(f, "{}", self.0) - } -} - -// Determine the IOMMU group of a PCI device -#[instrument] -fn pci_iommu_group(syspci: T, dev: pci::Address) -> Result> -where - T: AsRef + std::fmt::Debug, -{ - let syspci = Path::new(&syspci); - let grouppath = syspci - .join("devices") - .join(dev.to_string()) - .join("iommu_group"); - - match fs::read_link(&grouppath) { - // Device has no group - Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), - Err(e) => Err(anyhow!("Error reading link {:?}: {}", &grouppath, e)), - Ok(group) => { - if let Some(group) = group.file_name() { - if let Some(group) = group.to_str() { - if let Ok(group) = group.parse::() { - return Ok(Some(IommuGroup(group))); - } - } - } - Err(anyhow!( - "Unexpected IOMMU group link {:?} => {:?}", - grouppath, - group - )) - } - } -} - // pcipath_to_sysfs fetches the sysfs path for a PCI path, relative to // the sysfs path for the PCI host bridge, based on the PCI path // provided. @@ -169,45 +84,6 @@ pub fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result Result { - let root_bus = create_pci_root_bus_path(); - Ok(PciMatcher { - devpath: format!("{}{}", root_bus, relpath), - }) - } -} - -impl UeventMatcher for PciMatcher { - fn is_match(&self, uev: &Uevent) -> bool { - uev.devpath == self.devpath - } -} - -pub async fn wait_for_pci_device( - sandbox: &Arc>, - pcipath: &pci::Path, -) -> Result { - let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path()); - let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?; - let matcher = PciMatcher::new(&sysfs_rel_path)?; - - let uev = wait_for_uevent(sandbox, matcher).await?; - - let addr = uev - .devpath - .rsplit('/') - .next() - .ok_or_else(|| anyhow!("Bad device path {:?} in uevent", &uev.devpath))?; - let addr = pci::Address::from_str(addr)?; - Ok(addr) -} - #[derive(Debug)] struct NetPciMatcher { devpath: String, @@ -267,66 +143,6 @@ pub async fn wait_for_net_interface( Ok(()) } -#[derive(Debug)] -struct VfioMatcher { - syspath: String, -} - -impl VfioMatcher { - fn new(grp: IommuGroup) -> VfioMatcher { - VfioMatcher { - syspath: format!("/devices/virtual/vfio/{}", grp), - } - } -} - -impl UeventMatcher for VfioMatcher { - fn is_match(&self, uev: &Uevent) -> bool { - uev.devpath == self.syspath - } -} - -#[instrument] -async fn get_vfio_device_name(sandbox: &Arc>, grp: IommuGroup) -> Result { - let matcher = VfioMatcher::new(grp); - - let uev = wait_for_uevent(sandbox, matcher).await?; - Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname)) -} - -#[cfg(target_arch = "s390x")] -#[derive(Debug)] -struct ApMatcher { - syspath: String, -} - -#[cfg(target_arch = "s390x")] -impl ApMatcher { - fn new(address: ap::Address) -> ApMatcher { - ApMatcher { - syspath: format!( - "{}/card{:02x}/{}", - AP_ROOT_BUS_PATH, address.adapter_id, address - ), - } - } -} - -#[cfg(target_arch = "s390x")] -impl UeventMatcher for ApMatcher { - fn is_match(&self, uev: &Uevent) -> bool { - uev.action == "add" && uev.devpath == self.syspath - } -} - -#[cfg(target_arch = "s390x")] -#[instrument] -async fn wait_for_ap_device(sandbox: &Arc>, address: ap::Address) -> Result<()> { - let matcher = ApMatcher::new(address); - wait_for_uevent(sandbox, matcher).await?; - Ok(()) -} - #[derive(Debug, Clone)] pub struct DeviceInfo { // Device type, "b" for block device and "c" for character device @@ -586,101 +402,6 @@ pub fn update_env_pci( Ok(()) } -fn split_vfio_pci_option(opt: &str) -> Option<(&str, &str)> { - let mut tokens = opt.split('='); - let hostbdf = tokens.next()?; - let path = tokens.next()?; - if tokens.next().is_some() { - None - } else { - Some((hostbdf, path)) - } -} - -// device.options should have one entry for each PCI device in the VFIO group -// Each option should have the form "DDDD:BB:DD.F=" -// DDDD:BB:DD.F is the device's PCI address in the host -// is a PCI path to the device in the guest (see pci.rs) -#[instrument] -async fn vfio_pci_device_handler( - device: &Device, - sandbox: &Arc>, -) -> Result { - let vfio_in_guest = device.type_ != DRIVER_VFIO_PCI_GK_TYPE; - let mut pci_fixups = Vec::<(pci::Address, pci::Address)>::new(); - let mut group = None; - - for opt in device.options.iter() { - let (host, pcipath) = split_vfio_pci_option(opt) - .ok_or_else(|| anyhow!("Malformed VFIO PCI option {:?}", opt))?; - let host = - pci::Address::from_str(host).context("Bad host PCI address in VFIO option {:?}")?; - let pcipath = pci::Path::from_str(pcipath)?; - - let guestdev = wait_for_pci_device(sandbox, &pcipath).await?; - if vfio_in_guest { - pci_driver_override(SYSFS_BUS_PCI_PATH, guestdev, "vfio-pci")?; - - // Devices must have an IOMMU group to be usable via VFIO - let devgroup = pci_iommu_group(SYSFS_BUS_PCI_PATH, guestdev)? - .ok_or_else(|| anyhow!("{} has no IOMMU group", guestdev))?; - - if let Some(g) = group { - if g != devgroup { - return Err(anyhow!("{} is not in guest IOMMU group {}", guestdev, g)); - } - } - - group = Some(devgroup); - } - - // collect PCI address mapping for both vfio-pci-gk and vfio-pci device - pci_fixups.push((host, guestdev)); - } - - let dev_update = if vfio_in_guest { - // If there are any devices at all, logic above ensures that group is not None - let group = group.ok_or_else(|| anyhow!("failed to get VFIO group"))?; - - let vm_path = get_vfio_device_name(sandbox, group).await?; - - Some(DevUpdate::new(&vm_path, &vm_path)?) - } else { - None - }; - - Ok(SpecUpdate { - dev: dev_update, - pci: pci_fixups, - }) -} - -// The VFIO AP (Adjunct Processor) device handler takes all the APQNs provided as device options -// and awaits them. It sets the minimum AP rescan time of 5 seconds and temporarily adds that -// amount to the hotplug timeout. -#[cfg(target_arch = "s390x")] -#[instrument] -async fn vfio_ap_device_handler( - device: &Device, - sandbox: &Arc>, -) -> Result { - // Force AP bus rescan - fs::write(AP_SCANS_PATH, "1")?; - for apqn in device.options.iter() { - wait_for_ap_device(sandbox, ap::Address::from_str(apqn)?).await?; - } - let dev_update = Some(DevUpdate::new(Z9_CRYPT_DEV_PATH, Z9_CRYPT_DEV_PATH)?); - Ok(SpecUpdate { - dev: dev_update, - pci: Vec::new(), - }) -} - -#[cfg(not(target_arch = "s390x"))] -async fn vfio_ap_device_handler(_: &Device, _: &Arc>) -> Result { - Err(anyhow!("AP is only supported on s390x")) -} - #[instrument] pub async fn add_devices( devices: &[Device], diff --git a/src/agent/src/device/vfio_device_handler.rs b/src/agent/src/device/vfio_device_handler.rs new file mode 100644 index 0000000000..3be0e8b8ca --- /dev/null +++ b/src/agent/src/device/vfio_device_handler.rs @@ -0,0 +1,305 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2024 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[cfg(target_arch = "s390x")] +use crate::ap; +use crate::device::{ + pcipath_to_sysfs, DevUpdate, DeviceContext, DeviceHandler, SpecUpdate, DRIVER_VFIO_PCI_GK_TYPE, +}; +use crate::linux_abi::*; +use crate::pci; +use crate::sandbox::Sandbox; +use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher}; +use anyhow::{anyhow, Context, Result}; +use protocols::agent::Device; +use slog::Logger; +use std::ffi::OsStr; +use std::fmt; +use std::fs; +use std::os::unix::ffi::OsStrExt; +use std::path::Path; +use std::str::FromStr; +use std::sync::Arc; +use tokio::sync::Mutex; +use tracing::instrument; + +#[derive(Debug)] +pub struct VfioPciDeviceHandler {} + +#[derive(Debug)] +pub struct VfioApDeviceHandler {} + +#[async_trait::async_trait] +impl DeviceHandler for VfioPciDeviceHandler { + #[instrument] + async fn device_handler(&self, device: &Device, ctx: &mut DeviceContext) -> Result { + let vfio_in_guest = device.type_ != DRIVER_VFIO_PCI_GK_TYPE; + let mut pci_fixups = Vec::<(pci::Address, pci::Address)>::new(); + let mut group = None; + + for opt in device.options.iter() { + let (host, pcipath) = split_vfio_pci_option(opt) + .ok_or_else(|| anyhow!("Malformed VFIO PCI option {:?}", opt))?; + let host = + pci::Address::from_str(host).context("Bad host PCI address in VFIO option {:?}")?; + let pcipath = pci::Path::from_str(pcipath)?; + + let guestdev = wait_for_pci_device(ctx.sandbox, &pcipath).await?; + if vfio_in_guest { + pci_driver_override(ctx.logger, SYSFS_BUS_PCI_PATH, guestdev, "vfio-pci")?; + + // Devices must have an IOMMU group to be usable via VFIO + let devgroup = pci_iommu_group(SYSFS_BUS_PCI_PATH, guestdev)? + .ok_or_else(|| anyhow!("{} has no IOMMU group", guestdev))?; + + if let Some(g) = group { + if g != devgroup { + return Err(anyhow!("{} is not in guest IOMMU group {}", guestdev, g)); + } + } + + group = Some(devgroup); + } + + // collect PCI address mapping for both vfio-pci-gk and vfio-pci device + pci_fixups.push((host, guestdev)); + } + + let dev_update = if vfio_in_guest { + // If there are any devices at all, logic above ensures that group is not None + let group = group.ok_or_else(|| anyhow!("failed to get VFIO group"))?; + + let vm_path = get_vfio_pci_device_name(group, ctx.sandbox).await?; + + Some(DevUpdate::new(&vm_path, &vm_path)?) + } else { + None + }; + + Ok(SpecUpdate { + dev: dev_update, + pci: pci_fixups, + }) + } +} + +#[async_trait::async_trait] +impl DeviceHandler for VfioApDeviceHandler { + #[cfg(target_arch = "s390x")] + #[instrument] + async fn device_handler(&self, device: &Device, ctx: &mut DeviceContext) -> Result { + // Force AP bus rescan + fs::write(AP_SCANS_PATH, "1")?; + for apqn in device.options.iter() { + wait_for_ap_device(ctx.sandbox, ap::Address::from_str(apqn)?).await?; + } + let dev_update = Some(DevUpdate::new(Z9_CRYPT_DEV_PATH, Z9_CRYPT_DEV_PATH)?); + Ok(SpecUpdate { + dev: dev_update, + pci: Vec::new(), + }) + } + + #[cfg(not(target_arch = "s390x"))] + #[instrument] + async fn device_handler(&self, _: &Device, _: &mut DeviceContext) -> Result { + Err(anyhow!("VFIO-AP is only supported on s390x")) + } +} + +async fn get_vfio_pci_device_name( + grp: IommuGroup, + sandbox: &Arc>, +) -> Result { + let matcher = VfioMatcher::new(grp); + + let uev = wait_for_uevent(sandbox, matcher).await?; + Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname)) +} + +#[derive(Debug)] +pub struct VfioMatcher { + syspath: String, +} + +impl VfioMatcher { + pub fn new(grp: IommuGroup) -> VfioMatcher { + VfioMatcher { + syspath: format!("/devices/virtual/vfio/{}", grp), + } + } +} + +impl UeventMatcher for VfioMatcher { + fn is_match(&self, uev: &Uevent) -> bool { + uev.devpath == self.syspath + } +} + +#[cfg(target_arch = "s390x")] +#[derive(Debug)] +pub struct ApMatcher { + syspath: String, +} + +#[cfg(target_arch = "s390x")] +impl ApMatcher { + pub fn new(address: ap::Address) -> ApMatcher { + ApMatcher { + syspath: format!( + "{}/card{:02x}/{}", + AP_ROOT_BUS_PATH, address.adapter_id, address + ), + } + } +} + +#[cfg(target_arch = "s390x")] +impl UeventMatcher for ApMatcher { + fn is_match(&self, uev: &Uevent) -> bool { + uev.action == "add" && uev.devpath == self.syspath + } +} + +#[derive(Debug)] +pub struct PciMatcher { + devpath: String, +} + +impl PciMatcher { + pub fn new(relpath: &str) -> Result { + let root_bus = create_pci_root_bus_path(); + Ok(PciMatcher { + devpath: format!("{}{}", root_bus, relpath), + }) + } +} + +impl UeventMatcher for PciMatcher { + fn is_match(&self, uev: &Uevent) -> bool { + uev.devpath == self.devpath + } +} + +#[cfg(target_arch = "s390x")] +#[instrument] +async fn wait_for_ap_device(sandbox: &Arc>, address: ap::Address) -> Result<()> { + let matcher = ApMatcher::new(address); + wait_for_uevent(sandbox, matcher).await?; + Ok(()) +} + +pub async fn wait_for_pci_device( + sandbox: &Arc>, + pcipath: &pci::Path, +) -> Result { + let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path()); + let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?; + let matcher = PciMatcher::new(&sysfs_rel_path)?; + + let uev = wait_for_uevent(sandbox, matcher).await?; + + let addr = uev + .devpath + .rsplit('/') + .next() + .ok_or_else(|| anyhow!("Bad device path {:?} in uevent", &uev.devpath))?; + let addr = pci::Address::from_str(addr)?; + Ok(addr) +} + +// Represents an IOMMU group +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct IommuGroup(u32); + +impl fmt::Display for IommuGroup { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{}", self.0) + } +} + +// Determine the IOMMU group of a PCI device +#[instrument] +fn pci_iommu_group(syspci: T, dev: pci::Address) -> Result> +where + T: AsRef + std::fmt::Debug, +{ + let syspci = Path::new(&syspci); + let grouppath = syspci + .join("devices") + .join(dev.to_string()) + .join("iommu_group"); + + match fs::read_link(&grouppath) { + // Device has no group + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(e) => Err(anyhow!("Error reading link {:?}: {}", &grouppath, e)), + Ok(group) => { + if let Some(group) = group.file_name() { + if let Some(group) = group.to_str() { + if let Ok(group) = group.parse::() { + return Ok(Some(IommuGroup(group))); + } + } + } + Err(anyhow!( + "Unexpected IOMMU group link {:?} => {:?}", + grouppath, + group + )) + } + } +} + +fn split_vfio_pci_option(opt: &str) -> Option<(&str, &str)> { + let mut tokens = opt.split('='); + let hostbdf = tokens.next()?; + let path = tokens.next()?; + if tokens.next().is_some() { + None + } else { + Some((hostbdf, path)) + } +} + +// Force a given PCI device to bind to the given driver, does +// basically the same thing as +// driverctl set-override +#[instrument] +pub fn pci_driver_override( + logger: &Logger, + syspci: T, + dev: pci::Address, + drv: U, +) -> Result<()> +where + T: AsRef + std::fmt::Debug, + U: AsRef + std::fmt::Debug, +{ + let syspci = Path::new(&syspci); + let drv = drv.as_ref(); + info!(logger, "rebind_pci_driver: {} => {:?}", dev, drv); + + let devpath = syspci.join("devices").join(dev.to_string()); + let overridepath = &devpath.join("driver_override"); + + fs::write(overridepath, drv.as_bytes())?; + + let drvpath = &devpath.join("driver"); + let need_unbind = match fs::read_link(drvpath) { + Ok(d) if d.file_name() == Some(drv) => return Ok(()), // Nothing to do + Err(e) if e.kind() == std::io::ErrorKind::NotFound => false, // No current driver + Err(e) => return Err(anyhow!("Error checking driver on {}: {}", dev, e)), + Ok(_) => true, // Current driver needs unbinding + }; + if need_unbind { + let unbindpath = &drvpath.join("unbind"); + fs::write(unbindpath, dev.to_string())?; + } + let probepath = syspci.join("drivers_probe"); + fs::write(probepath, dev.to_string())?; + Ok(()) +}