diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 35f7847608..a2fc73d88d 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -3,472 +3,11 @@ // SPDX-License-Identifier: Apache-2.0 // -use nix::sys::stat; -use regex::Regex; -use std::collections::HashMap; -use std::ffi::OsStr; -use std::fmt; -use std::fs; -use std::os::unix::ffi::OsStrExt; -use std::os::unix::fs::MetadataExt; -use std::os::unix::prelude::FileTypeExt; -use std::path::{Path, PathBuf}; -use std::str::FromStr; -use std::sync::Arc; -use tokio::sync::Mutex; - -use crate::linux_abi::*; -use crate::pci; -use crate::sandbox::Sandbox; -use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher}; -use anyhow::{anyhow, Context, Result}; -use cfg_if::cfg_if; -use oci::{LinuxDeviceCgroup, Spec}; -use oci_spec::runtime as oci; -use protocols::agent::Device; -use tracing::instrument; - // Convenience function to obtain the scope logger. fn sl() -> slog::Logger { slog_scope::logger().new(o!("subsystem" => "device")) } -const BLOCK: &str = "block"; - -#[instrument] -pub fn online_device(path: &str) -> Result<()> { - fs::write(path, "1")?; - Ok(()) -} - -// pcipath_to_sysfs fetches the sysfs path for a PCI path, relative to -// the sysfs path for the PCI host bridge, based on the PCI path -// provided. -#[instrument] -pub fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result { - let mut bus = "0000:00".to_string(); - let mut relpath = String::new(); - - for i in 0..pcipath.len() { - let bdf = format!("{}:{}", bus, pcipath[i]); - - relpath = format!("{}/{}", relpath, bdf); - - if i == pcipath.len() - 1 { - // Final device need not be a bridge - break; - } - - // Find out the bus exposed by bridge - let bridgebuspath = format!("{}{}/pci_bus", root_bus_sysfs, relpath); - let mut files: Vec<_> = fs::read_dir(&bridgebuspath)?.collect(); - - match files.pop() { - Some(busfile) if files.is_empty() => { - bus = busfile? - .file_name() - .into_string() - .map_err(|e| anyhow!("Bad filename under {}: {:?}", &bridgebuspath, e))?; - } - _ => { - return Err(anyhow!( - "Expected exactly one PCI bus in {}, got {} instead", - bridgebuspath, - // Adjust to original value as we've already popped - files.len() + 1 - )); - } - }; - } - - Ok(relpath) -} - -#[derive(Debug, Clone)] -pub struct DeviceInfo { - // Device type, "b" for block device and "c" for character device - cgroup_type: String, - // The major and minor numbers for the device within the guest - guest_major: i64, - guest_minor: i64, -} - -impl DeviceInfo { - /// Create a device info. - /// - /// # Arguments - /// - /// * `vm_path` - Device's vm path. - /// * `is_rdev` - If the vm_path is a device, set to true. If the - /// vm_path is a file in a device, set to false. - pub fn new(vm_path: &str, is_rdev: bool) -> Result { - let cgroup_type; - let devid; - - let vm_path = PathBuf::from(vm_path); - if !vm_path.exists() { - return Err(anyhow!("VM device path {:?} doesn't exist", vm_path)); - } - - let metadata = fs::metadata(&vm_path)?; - - if is_rdev { - devid = metadata.rdev(); - let file_type = metadata.file_type(); - if file_type.is_block_device() { - cgroup_type = String::from("b"); - } else if file_type.is_char_device() { - cgroup_type = String::from("c"); - } else { - return Err(anyhow!("Unknown device {:?}'s cgroup type", vm_path)); - } - } else { - devid = metadata.dev(); - cgroup_type = String::from("b"); - } - - let guest_major = stat::major(devid) as i64; - let guest_minor = stat::minor(devid) as i64; - - Ok(DeviceInfo { - cgroup_type, - guest_major, - guest_minor, - }) - } -} - -// Represents the device-node and resource related updates to the OCI -// spec needed for a particular device -#[derive(Debug, Clone)] -struct DevUpdate { - info: DeviceInfo, - // an optional new path to update the device to in the "inner" container - // specification - final_path: Option, -} - -impl DevUpdate { - fn new(vm_path: &str, final_path: &str) -> Result { - Ok(DevUpdate { - final_path: Some(final_path.to_owned()), - ..DeviceInfo::new(vm_path, true)?.into() - }) - } -} - -impl From for DevUpdate { - fn from(info: DeviceInfo) -> Self { - DevUpdate { - info, - final_path: None, - } - } -} - -// Represents the updates to the OCI spec needed for a particular device -#[derive(Debug, Clone, Default)] -struct SpecUpdate { - dev: Option, - // optional corrections for PCI addresses - pci: Vec<(pci::Address, pci::Address)>, -} - -impl> From for SpecUpdate { - fn from(dev: T) -> Self { - SpecUpdate { - dev: Some(dev.into()), - pci: Vec::new(), - } - } -} - -// update_spec_devices updates the device list in the OCI spec to make -// it include details appropriate for the VM, instead of the host. It -// is given a map of (container_path => update) where: -// container_path: the path to the device in the original OCI spec -// update: information on changes to make to the device -#[instrument] -fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) -> Result<()> { - let linux = spec - .linux_mut() - .as_mut() - .ok_or_else(|| anyhow!("Spec didn't contain linux field"))?; - let mut res_updates = HashMap::<(String, i64, i64), DeviceInfo>::with_capacity(updates.len()); - - let mut default_devices = Vec::new(); - let linux_devices = linux.devices_mut().as_mut().unwrap_or(&mut default_devices); - for specdev in linux_devices.iter_mut() { - let devtype = specdev.typ().as_str().to_string(); - if let Some(update) = updates.remove(specdev.path().clone().display().to_string().as_str()) - { - let host_major = specdev.major(); - let host_minor = specdev.minor(); - - info!( - sl(), - "update_spec_devices() updating device"; - "container_path" => &specdev.path().display().to_string(), - "type" => &devtype, - "host_major" => host_major, - "host_minor" => host_minor, - "guest_major" => update.info.guest_major, - "guest_minor" => update.info.guest_minor, - "final_path" => update.final_path.as_ref(), - ); - - specdev.set_major(update.info.guest_major); - specdev.set_minor(update.info.guest_minor); - if let Some(final_path) = update.final_path { - specdev.set_path(PathBuf::from(&final_path)); - } - - if res_updates - .insert((devtype, host_major, host_minor), update.info) - .is_some() - { - return Err(anyhow!( - "Conflicting resource updates for host_major={} host_minor={}", - host_major, - host_minor - )); - } - } - } - - // Make sure we applied all of our updates - if !updates.is_empty() { - return Err(anyhow!( - "Missing devices in OCI spec: {:?}", - updates - .keys() - .map(|d| format!("{:?}", d)) - .collect::>() - .join(" ") - )); - } - - if let Some(resources) = linux.resources_mut().as_mut() { - if let Some(resources_devices) = resources.devices_mut().as_mut() { - for d in resources_devices.iter_mut() { - let dev_type = d.typ().unwrap_or_default().as_str().to_string(); - if let (Some(host_major), Some(host_minor)) = (d.major(), d.minor()) { - if let Some(update) = - res_updates.get(&(dev_type.clone(), host_major, host_minor)) - { - info!( - sl(), - "update_spec_devices() updating resource"; - "type" => &dev_type, - "host_major" => host_major, - "host_minor" => host_minor, - "guest_major" => update.guest_major, - "guest_minor" => update.guest_minor, - ); - - d.set_major(Some(update.guest_major)); - d.set_minor(Some(update.guest_minor)); - } - } - } - } - } - - Ok(()) -} - -// update_env_pci alters PCI addresses in a set of environment -// variables to be correct for the VM instead of the host. It is -// given a map of (host address => guest address) -#[instrument] -pub fn update_env_pci( - env: &mut [String], - pcimap: &HashMap, -) -> Result<()> { - // SR-IOV device plugin may add two environment variables for one resource: - // - PCIDEVICE__: a list of PCI device ids separated by comma - // - PCIDEVICE___INFO: detailed info in JSON for above PCI devices - // Both environment variables hold information about the same set of PCI devices. - // Below code updates both of them in two passes: - // - 1st pass updates PCIDEVICE__ and collects host to guest PCI address mapping - let mut pci_dev_map: HashMap> = HashMap::new(); - for envvar in env.iter_mut() { - let eqpos = envvar - .find('=') - .ok_or_else(|| anyhow!("Malformed OCI env entry {:?}", envvar))?; - - let (name, eqval) = envvar.split_at(eqpos); - let val = &eqval[1..]; - - if !name.starts_with("PCIDEVICE_") || name.ends_with("_INFO") { - continue; - } - - let mut addr_map: HashMap = HashMap::new(); - let mut guest_addrs = Vec::::new(); - for host_addr_str in val.split(',') { - let host_addr = pci::Address::from_str(host_addr_str) - .with_context(|| format!("Can't parse {} environment variable", name))?; - let guest_addr = pcimap - .get(&host_addr) - .ok_or_else(|| anyhow!("Unable to translate host PCI address {}", host_addr))?; - - guest_addrs.push(format!("{}", guest_addr)); - addr_map.insert(host_addr_str.to_string(), format!("{}", guest_addr)); - } - - pci_dev_map.insert(format!("{}_INFO", name), addr_map); - - envvar.replace_range(eqpos + 1.., guest_addrs.join(",").as_str()); - } - - // - 2nd pass update PCIDEVICE___INFO if it exists - for envvar in env.iter_mut() { - let eqpos = envvar - .find('=') - .ok_or_else(|| anyhow!("Malformed OCI env entry {:?}", envvar))?; - - let (name, _) = envvar.split_at(eqpos); - if !(name.starts_with("PCIDEVICE_") && name.ends_with("_INFO")) { - continue; - } - - if let Some(addr_map) = pci_dev_map.get(name) { - for (host_addr, guest_addr) in addr_map { - *envvar = envvar.replace(host_addr, guest_addr); - } - } - } - - Ok(()) -} - -#[instrument] -pub async fn add_devices( - devices: &[Device], - spec: &mut Spec, - sandbox: &Arc>, -) -> Result<()> { - let mut dev_updates = HashMap::<&str, DevUpdate>::with_capacity(devices.len()); - - for device in devices.iter() { - let update = add_device(device, sandbox).await?; - if let Some(dev_update) = update.dev { - if dev_updates - .insert(&device.container_path, dev_update.clone()) - .is_some() - { - return Err(anyhow!( - "Conflicting device updates for {}", - &device.container_path - )); - } - - // Update cgroup to allow all devices added to guest. - insert_devices_cgroup_rule(spec, &dev_update.info, true, "rwm") - .context("Update device cgroup")?; - } - - let mut sb = sandbox.lock().await; - for (host, guest) in update.pci { - if let Some(other_guest) = sb.pcimap.insert(host, guest) { - return Err(anyhow!( - "Conflicting guest address for host device {} ({} versus {})", - host, - guest, - other_guest - )); - } - } - } - - if let Some(process) = spec.process_mut() { - let env_vec: &mut Vec = - &mut process.env_mut().get_or_insert_with(Vec::new).to_vec(); - update_env_pci(env_vec, &sandbox.lock().await.pcimap)? - } - update_spec_devices(spec, dev_updates) -} - -#[instrument] -async fn add_device(device: &Device, sandbox: &Arc>) -> Result { - // log before validation to help with debugging gRPC protocol version differences. - info!(sl(), "device-id: {}, device-type: {}, device-vm-path: {}, device-container-path: {}, device-options: {:?}", - device.id, device.type_, device.vm_path, device.container_path, device.options); - - if device.type_.is_empty() { - return Err(anyhow!("invalid type for device {:?}", device)); - } - - if device.id.is_empty() && device.vm_path.is_empty() { - return Err(anyhow!("invalid ID and VM path for device {:?}", device)); - } - - if device.container_path.is_empty() { - return Err(anyhow!("invalid container path for device {:?}", device)); - } - - match device.type_.as_str() { - DRIVER_BLK_PCI_TYPE => virtio_blk_device_handler(device, sandbox).await, - DRIVER_BLK_CCW_TYPE => virtio_blk_ccw_device_handler(device, sandbox).await, - DRIVER_BLK_MMIO_TYPE => virtiommio_blk_device_handler(device, sandbox).await, - DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, sandbox).await, - DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, sandbox).await, - DRIVER_VFIO_PCI_GK_TYPE | DRIVER_VFIO_PCI_TYPE => { - vfio_pci_device_handler(device, sandbox).await - } - DRIVER_VFIO_AP_TYPE => vfio_ap_device_handler(device, sandbox).await, - _ => Err(anyhow!("Unknown device type {}", device.type_)), - } -} - -// Insert a devices cgroup rule to control access to device. -#[instrument] -pub fn insert_devices_cgroup_rule( - spec: &mut Spec, - dev_info: &DeviceInfo, - allow: bool, - access: &str, -) -> Result<()> { - let linux = spec - .linux_mut() - .as_mut() - .ok_or_else(|| anyhow!("Spec didn't container linux field"))?; - let devcgrp_type = dev_info - .cgroup_type - .parse::() - .context(format!( - "Failed to parse {:?} to Enum LinuxDeviceType", - dev_info.cgroup_type - ))?; - let linux_resource = &mut oci::LinuxResources::default(); - let resource = linux.resources_mut().as_mut().unwrap_or(linux_resource); - let mut device_cgrp = LinuxDeviceCgroup::default(); - device_cgrp.set_allow(allow); - device_cgrp.set_major(Some(dev_info.guest_major)); - device_cgrp.set_minor(Some(dev_info.guest_minor)); - device_cgrp.set_typ(Some(devcgrp_type)); - device_cgrp.set_access(Some(access.to_owned())); - - debug!( - sl(), - "Insert a devices cgroup rule"; - "linux_device_cgroup" => device_cgrp.allow(), - "guest_major" => device_cgrp.major(), - "guest_minor" => device_cgrp.minor(), - "type" => device_cgrp.typ().unwrap().as_str(), - "access" => device_cgrp.access().as_ref().unwrap().as_str(), - ); - - if let Some(devices) = resource.devices_mut() { - devices.push(device_cgrp); - } else { - resource.set_devices(Some(vec![device_cgrp])); - } - - Ok(()) -} - #[cfg(test)] mod tests { use super::*; diff --git a/src/agent/src/device/block_device_handler.rs b/src/agent/src/device/block_device_handler.rs index cd7567e582..914b9a25e6 100644 --- a/src/agent/src/device/block_device_handler.rs +++ b/src/agent/src/device/block_device_handler.rs @@ -16,6 +16,7 @@ use crate::pci; use crate::sandbox::Sandbox; use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher}; use anyhow::{anyhow, Context, Result}; +use kata_types::device::{DRIVER_BLK_CCW_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE}; use protocols::agent::Device; use regex::Regex; use std::path::Path; @@ -35,6 +36,11 @@ pub struct VirtioBlkMmioDeviceHandler {} #[async_trait::async_trait] impl DeviceHandler for VirtioBlkPciDeviceHandler { + #[instrument] + fn driver_types(&self) -> &[&str] { + &[DRIVER_BLK_PCI_TYPE] + } + #[instrument] async fn device_handler(&self, device: &Device, ctx: &mut DeviceContext) -> Result { let pcipath = pci::Path::from_str(&device.id)?; @@ -48,6 +54,11 @@ impl DeviceHandler for VirtioBlkPciDeviceHandler { #[async_trait::async_trait] impl DeviceHandler for VirtioBlkCcwDeviceHandler { + #[instrument] + fn driver_types(&self) -> &[&str] { + &[DRIVER_BLK_CCW_TYPE] + } + #[cfg(target_arch = "s390x")] #[instrument] async fn device_handler(&self, device: &Device, ctx: &mut DeviceContext) -> Result { @@ -71,6 +82,11 @@ impl DeviceHandler for VirtioBlkCcwDeviceHandler { #[async_trait::async_trait] impl DeviceHandler for VirtioBlkMmioDeviceHandler { + #[instrument] + fn driver_types(&self) -> &[&str] { + &[DRIVER_BLK_MMIO_TYPE] + } + #[instrument] async fn device_handler(&self, device: &Device, ctx: &mut DeviceContext) -> Result { if device.vm_path.is_empty() { diff --git a/src/agent/src/device/mod.rs b/src/agent/src/device/mod.rs new file mode 100644 index 0000000000..f8fcaf8309 --- /dev/null +++ b/src/agent/src/device/mod.rs @@ -0,0 +1,527 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2024 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +use self::block_device_handler::{VirtioBlkMmioDeviceHandler, VirtioBlkPciDeviceHandler}; +use self::nvdimm_device_handler::VirtioNvdimmDeviceHandler; +use self::scsi_device_handler::ScsiDeviceHandler; +use self::vfio_device_handler::{VfioApDeviceHandler, VfioPciDeviceHandler}; +use crate::pci; +use crate::sandbox::Sandbox; +use anyhow::{anyhow, Context, Result}; +use kata_types::device::DeviceHandlerManager; +use nix::sys::stat; +use oci::{LinuxDeviceCgroup, Spec}; +use oci_spec::runtime as oci; +use protocols::agent::Device; +use slog::Logger; +use std::collections::HashMap; +use std::fs; +use std::os::unix::fs::MetadataExt; +use std::os::unix::prelude::FileTypeExt; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::Arc; +use tokio::sync::Mutex; +use tracing::instrument; + +pub mod block_device_handler; +pub mod network_device_handler; +pub mod nvdimm_device_handler; +pub mod scsi_device_handler; +pub mod vfio_device_handler; + +pub const BLOCK: &str = "block"; + +#[derive(Debug, Clone)] +pub struct DeviceInfo { + // Device type, "b" for block device and "c" for character device + cgroup_type: String, + // The major and minor numbers for the device within the guest + guest_major: i64, + guest_minor: i64, +} + +impl DeviceInfo { + /// Create a device info. + /// + /// # Arguments + /// + /// * `vm_path` - Device's vm path. + /// * `is_rdev` - If the vm_path is a device, set to true. If the + /// vm_path is a file in a device, set to false. + pub fn new(vm_path: &str, is_rdev: bool) -> Result { + let cgroup_type; + let devid; + + let vm_path = PathBuf::from(vm_path); + if !vm_path.exists() { + return Err(anyhow!("VM device path {:?} doesn't exist", vm_path)); + } + + let metadata = fs::metadata(&vm_path)?; + + if is_rdev { + devid = metadata.rdev(); + let file_type = metadata.file_type(); + if file_type.is_block_device() { + cgroup_type = String::from("b"); + } else if file_type.is_char_device() { + cgroup_type = String::from("c"); + } else { + return Err(anyhow!("Unknown device {:?}'s cgroup type", vm_path)); + } + } else { + devid = metadata.dev(); + cgroup_type = String::from("b"); + } + + let guest_major = stat::major(devid) as i64; + let guest_minor = stat::minor(devid) as i64; + + Ok(DeviceInfo { + cgroup_type, + guest_major, + guest_minor, + }) + } +} + +// Represents the device-node and resource related updates to the OCI +// spec needed for a particular device +#[derive(Debug, Clone)] +struct DevUpdate { + info: DeviceInfo, + // an optional new path to update the device to in the "inner" container + // specification + final_path: Option, +} + +impl DevUpdate { + fn new(vm_path: &str, final_path: &str) -> Result { + Ok(DevUpdate { + final_path: Some(final_path.to_owned()), + ..DeviceInfo::new(vm_path, true)?.into() + }) + } +} + +impl From for DevUpdate { + fn from(info: DeviceInfo) -> Self { + DevUpdate { + info, + final_path: None, + } + } +} + +// Represents the updates to the OCI spec needed for a particular device +#[derive(Debug, Clone, Default)] +pub struct SpecUpdate { + dev: Option, + // optional corrections for PCI addresses + pci: Vec<(pci::Address, pci::Address)>, +} + +impl> From for SpecUpdate { + fn from(dev: T) -> Self { + SpecUpdate { + dev: Some(dev.into()), + pci: Vec::new(), + } + } +} + +#[derive(Debug)] +pub struct DeviceContext<'a> { + logger: &'a Logger, + sandbox: &'a Arc>, +} + +/// Trait object to handle device. +#[async_trait::async_trait] +pub trait DeviceHandler: Send + Sync { + /// Handle the device + async fn device_handler(&self, device: &Device, ctx: &mut DeviceContext) -> Result; + + /// Return the driver types that the handler manages. + fn driver_types(&self) -> &[&str]; +} + +#[rustfmt::skip] +lazy_static! { + pub static ref DEVICE_HANDLERS: DeviceHandlerManager> = { + let mut manager: DeviceHandlerManager> = DeviceHandlerManager::new(); + + let handlers: Vec> = vec![ + Arc::new(VirtioBlkMmioDeviceHandler {}), + Arc::new(VirtioBlkPciDeviceHandler {}), + Arc::new(VirtioNvdimmDeviceHandler {}), + Arc::new(ScsiDeviceHandler {}), + Arc::new(VfioPciDeviceHandler {}), + Arc::new(VfioApDeviceHandler {}), + #[cfg(target_arch = "s390x")] + Arc::new(self::block_device_handler::VirtioBlkCcwDeviceHandler {}), + ]; + + for handler in handlers { + manager.add_handler(handler.driver_types(), handler.clone()).unwrap(); + } + manager + }; +} + +#[instrument] +pub async fn add_devices( + logger: &Logger, + devices: &[Device], + spec: &mut Spec, + sandbox: &Arc>, +) -> Result<()> { + let mut dev_updates = HashMap::<&str, DevUpdate>::with_capacity(devices.len()); + + for device in devices.iter() { + validate_device(logger, device, sandbox).await?; + if let Some(handler) = DEVICE_HANDLERS.handler(&device.type_) { + let mut ctx = DeviceContext { logger, sandbox }; + + match handler.device_handler(device, &mut ctx).await { + Ok(update) => { + if let Some(dev_update) = update.dev { + if dev_updates + .insert(&device.container_path, dev_update.clone()) + .is_some() + { + return Err(anyhow!( + "Conflicting device updates for {}", + &device.container_path + )); + } + + // Update cgroup to allow all devices added to guest. + insert_devices_cgroup_rule(logger, spec, &dev_update.info, true, "rwm") + .context("Update device cgroup")?; + } + + let mut sb = sandbox.lock().await; + for (host, guest) in update.pci { + if let Some(other_guest) = sb.pcimap.insert(host, guest) { + return Err(anyhow!( + "Conflicting guest address for host device {} ({} versus {})", + host, + guest, + other_guest + )); + } + } + } + Err(e) => { + error!(logger, "failed to add devices, error: {e:?}"); + return Err(e); + } + } + } else { + return Err(anyhow!( + "Failed to find the device handler {}", + device.type_ + )); + } + } + + if let Some(process) = spec.process_mut() { + let env_vec: &mut Vec = + &mut process.env_mut().get_or_insert_with(Vec::new).to_vec(); + update_env_pci(env_vec, &sandbox.lock().await.pcimap)? + } + update_spec_devices(logger, spec, dev_updates) +} + +#[instrument] +async fn validate_device( + logger: &Logger, + device: &Device, + sandbox: &Arc>, +) -> Result<()> { + // log before validation to help with debugging gRPC protocol version differences. + info!( + logger, + "device-id: {}, device-type: {}, device-vm-path: {}, device-container-path: {}, device-options: {:?}", + device.id, device.type_, device.vm_path, device.container_path, device.options + ); + + if device.type_.is_empty() { + return Err(anyhow!("invalid type for device {:?}", device)); + } + + if device.id.is_empty() && device.vm_path.is_empty() { + return Err(anyhow!("invalid ID and VM path for device {:?}", device)); + } + + if device.container_path.is_empty() { + return Err(anyhow!("invalid container path for device {:?}", device)); + } + return Ok(()); +} + +// Insert a devices cgroup rule to control access to device. +#[instrument] +pub fn insert_devices_cgroup_rule( + logger: &Logger, + spec: &mut Spec, + dev_info: &DeviceInfo, + allow: bool, + access: &str, +) -> Result<()> { + let linux = spec + .linux_mut() + .as_mut() + .ok_or_else(|| anyhow!("Spec didn't container linux field"))?; + let devcgrp_type = dev_info + .cgroup_type + .parse::() + .context(format!( + "Failed to parse {:?} to Enum LinuxDeviceType", + dev_info.cgroup_type + ))?; + let linux_resource = &mut oci::LinuxResources::default(); + let resource = linux.resources_mut().as_mut().unwrap_or(linux_resource); + let mut device_cgrp = LinuxDeviceCgroup::default(); + device_cgrp.set_allow(allow); + device_cgrp.set_major(Some(dev_info.guest_major)); + device_cgrp.set_minor(Some(dev_info.guest_minor)); + device_cgrp.set_typ(Some(devcgrp_type)); + device_cgrp.set_access(Some(access.to_owned())); + + debug!( + logger, + "Insert a devices cgroup rule"; + "linux_device_cgroup" => device_cgrp.allow(), + "guest_major" => device_cgrp.major(), + "guest_minor" => device_cgrp.minor(), + "type" => device_cgrp.typ().unwrap().as_str(), + "access" => device_cgrp.access().as_ref().unwrap().as_str(), + ); + + if let Some(devices) = resource.devices_mut() { + devices.push(device_cgrp); + } else { + resource.set_devices(Some(vec![device_cgrp])); + } + + Ok(()) +} + +// update_env_pci alters PCI addresses in a set of environment +// variables to be correct for the VM instead of the host. It is +// given a map of (host address => guest address) +#[instrument] +pub fn update_env_pci( + env: &mut [String], + pcimap: &HashMap, +) -> Result<()> { + // SR-IOV device plugin may add two environment variables for one resource: + // - PCIDEVICE__: a list of PCI device ids separated by comma + // - PCIDEVICE___INFO: detailed info in JSON for above PCI devices + // Both environment variables hold information about the same set of PCI devices. + // Below code updates both of them in two passes: + // - 1st pass updates PCIDEVICE__ and collects host to guest PCI address mapping + let mut pci_dev_map: HashMap> = HashMap::new(); + for envvar in env.iter_mut() { + let eqpos = envvar + .find('=') + .ok_or_else(|| anyhow!("Malformed OCI env entry {:?}", envvar))?; + + let (name, eqval) = envvar.split_at(eqpos); + let val = &eqval[1..]; + + if !name.starts_with("PCIDEVICE_") || name.ends_with("_INFO") { + continue; + } + + let mut addr_map: HashMap = HashMap::new(); + let mut guest_addrs = Vec::::new(); + for host_addr_str in val.split(',') { + let host_addr = pci::Address::from_str(host_addr_str) + .with_context(|| format!("Can't parse {} environment variable", name))?; + let guest_addr = pcimap + .get(&host_addr) + .ok_or_else(|| anyhow!("Unable to translate host PCI address {}", host_addr))?; + + guest_addrs.push(format!("{}", guest_addr)); + addr_map.insert(host_addr_str.to_string(), format!("{}", guest_addr)); + } + + pci_dev_map.insert(format!("{}_INFO", name), addr_map); + + envvar.replace_range(eqpos + 1.., guest_addrs.join(",").as_str()); + } + + // - 2nd pass update PCIDEVICE___INFO if it exists + for envvar in env.iter_mut() { + let eqpos = envvar + .find('=') + .ok_or_else(|| anyhow!("Malformed OCI env entry {:?}", envvar))?; + + let (name, _) = envvar.split_at(eqpos); + if !(name.starts_with("PCIDEVICE_") && name.ends_with("_INFO")) { + continue; + } + + if let Some(addr_map) = pci_dev_map.get(name) { + for (host_addr, guest_addr) in addr_map { + *envvar = envvar.replace(host_addr, guest_addr); + } + } + } + + Ok(()) +} + +// update_spec_devices updates the device list in the OCI spec to make +// it include details appropriate for the VM, instead of the host. It +// is given a map of (container_path => update) where: +// container_path: the path to the device in the original OCI spec +// update: information on changes to make to the device +#[instrument] +fn update_spec_devices( + logger: &Logger, + spec: &mut Spec, + mut updates: HashMap<&str, DevUpdate>, +) -> Result<()> { + let linux = spec + .linux_mut() + .as_mut() + .ok_or_else(|| anyhow!("Spec didn't contain linux field"))?; + let mut res_updates = HashMap::<(String, i64, i64), DeviceInfo>::with_capacity(updates.len()); + + let mut default_devices = Vec::new(); + let linux_devices = linux.devices_mut().as_mut().unwrap_or(&mut default_devices); + for specdev in linux_devices.iter_mut() { + let devtype = specdev.typ().as_str().to_string(); + if let Some(update) = updates.remove(specdev.path().clone().display().to_string().as_str()) + { + let host_major = specdev.major(); + let host_minor = specdev.minor(); + + info!( + logger, + "update_spec_devices() updating device"; + "container_path" => &specdev.path().display().to_string(), + "type" => &devtype, + "host_major" => host_major, + "host_minor" => host_minor, + "guest_major" => update.info.guest_major, + "guest_minor" => update.info.guest_minor, + "final_path" => update.final_path.as_ref(), + ); + + specdev.set_major(update.info.guest_major); + specdev.set_minor(update.info.guest_minor); + if let Some(final_path) = update.final_path { + specdev.set_path(PathBuf::from(&final_path)); + } + + if res_updates + .insert((devtype, host_major, host_minor), update.info) + .is_some() + { + return Err(anyhow!( + "Conflicting resource updates for host_major={} host_minor={}", + host_major, + host_minor + )); + } + } + } + + // Make sure we applied all of our updates + if !updates.is_empty() { + return Err(anyhow!( + "Missing devices in OCI spec: {:?}", + updates + .keys() + .map(|d| format!("{:?}", d)) + .collect::>() + .join(" ") + )); + } + + if let Some(resources) = linux.resources_mut().as_mut() { + if let Some(resources_devices) = resources.devices_mut().as_mut() { + for d in resources_devices.iter_mut() { + let dev_type = d.typ().unwrap_or_default().as_str().to_string(); + if let (Some(host_major), Some(host_minor)) = (d.major(), d.minor()) { + if let Some(update) = + res_updates.get(&(dev_type.clone(), host_major, host_minor)) + { + info!( + logger, + "update_spec_devices() updating resource"; + "type" => &dev_type, + "host_major" => host_major, + "host_minor" => host_minor, + "guest_major" => update.guest_major, + "guest_minor" => update.guest_minor, + ); + + d.set_major(Some(update.guest_major)); + d.set_minor(Some(update.guest_minor)); + } + } + } + } + } + + Ok(()) +} + +// pcipath_to_sysfs fetches the sysfs path for a PCI path, relative to +// the sysfs path for the PCI host bridge, based on the PCI path +// provided. +#[instrument] +pub fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result { + let mut bus = "0000:00".to_string(); + let mut relpath = String::new(); + + for i in 0..pcipath.len() { + let bdf = format!("{}:{}", bus, pcipath[i]); + + relpath = format!("{}/{}", relpath, bdf); + + if i == pcipath.len() - 1 { + // Final device need not be a bridge + break; + } + + // Find out the bus exposed by bridge + let bridgebuspath = format!("{}{}/pci_bus", root_bus_sysfs, relpath); + let mut files: Vec<_> = fs::read_dir(&bridgebuspath)?.collect(); + + match files.pop() { + Some(busfile) if files.is_empty() => { + bus = busfile? + .file_name() + .into_string() + .map_err(|e| anyhow!("Bad filename under {}: {:?}", &bridgebuspath, e))?; + } + _ => { + return Err(anyhow!( + "Expected exactly one PCI bus in {}, got {} instead", + bridgebuspath, + // Adjust to original value as we've already popped + files.len() + 1 + )); + } + }; + } + + Ok(relpath) +} + +#[instrument] +pub fn online_device(path: &str) -> Result<()> { + fs::write(path, "1")?; + Ok(()) +} diff --git a/src/agent/src/device/nvdimm_device_handler.rs b/src/agent/src/device/nvdimm_device_handler.rs index 8d5c41439b..473b7a1491 100644 --- a/src/agent/src/device/nvdimm_device_handler.rs +++ b/src/agent/src/device/nvdimm_device_handler.rs @@ -9,6 +9,7 @@ use crate::linux_abi::ACPI_DEV_PATH; use crate::sandbox::Sandbox; use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher}; use anyhow::{anyhow, Context, Result}; +use kata_types::device::DRIVER_NVDIMM_TYPE; use protocols::agent::Device; use std::sync::Arc; use tokio::sync::Mutex; @@ -19,6 +20,11 @@ pub struct VirtioNvdimmDeviceHandler {} #[async_trait::async_trait] impl DeviceHandler for VirtioNvdimmDeviceHandler { + #[instrument] + fn driver_types(&self) -> &[&str] { + &[DRIVER_NVDIMM_TYPE] + } + #[instrument] async fn device_handler(&self, device: &Device, ctx: &mut DeviceContext) -> Result { if device.vm_path.is_empty() { diff --git a/src/agent/src/device/scsi_device_handler.rs b/src/agent/src/device/scsi_device_handler.rs index c65fd431ae..5370ad85d1 100644 --- a/src/agent/src/device/scsi_device_handler.rs +++ b/src/agent/src/device/scsi_device_handler.rs @@ -9,6 +9,7 @@ use crate::linux_abi::*; use crate::sandbox::Sandbox; use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher}; use anyhow::{anyhow, Context, Result}; +use kata_types::device::DRIVER_SCSI_TYPE; use protocols::agent::Device; use std::fs; use std::path::PathBuf; @@ -21,6 +22,11 @@ pub struct ScsiDeviceHandler {} #[async_trait::async_trait] impl DeviceHandler for ScsiDeviceHandler { + #[instrument] + fn driver_types(&self) -> &[&str] { + &[DRIVER_SCSI_TYPE] + } + #[instrument] async fn device_handler(&self, device: &Device, ctx: &mut DeviceContext) -> Result { let vm_path = get_scsi_device_name(ctx.sandbox, &device.id).await?; diff --git a/src/agent/src/device/vfio_device_handler.rs b/src/agent/src/device/vfio_device_handler.rs index 3be0e8b8ca..59b7911308 100644 --- a/src/agent/src/device/vfio_device_handler.rs +++ b/src/agent/src/device/vfio_device_handler.rs @@ -6,14 +6,13 @@ #[cfg(target_arch = "s390x")] use crate::ap; -use crate::device::{ - pcipath_to_sysfs, DevUpdate, DeviceContext, DeviceHandler, SpecUpdate, DRIVER_VFIO_PCI_GK_TYPE, -}; +use crate::device::{pcipath_to_sysfs, DevUpdate, DeviceContext, DeviceHandler, SpecUpdate}; use crate::linux_abi::*; use crate::pci; use crate::sandbox::Sandbox; use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher}; use anyhow::{anyhow, Context, Result}; +use kata_types::device::{DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_PCI_GK_TYPE, DRIVER_VFIO_PCI_TYPE}; use protocols::agent::Device; use slog::Logger; use std::ffi::OsStr; @@ -34,6 +33,11 @@ pub struct VfioApDeviceHandler {} #[async_trait::async_trait] impl DeviceHandler for VfioPciDeviceHandler { + #[instrument] + fn driver_types(&self) -> &[&str] { + &[DRIVER_VFIO_PCI_GK_TYPE, DRIVER_VFIO_PCI_TYPE] + } + #[instrument] async fn device_handler(&self, device: &Device, ctx: &mut DeviceContext) -> Result { let vfio_in_guest = device.type_ != DRIVER_VFIO_PCI_GK_TYPE; @@ -88,6 +92,11 @@ impl DeviceHandler for VfioPciDeviceHandler { #[async_trait::async_trait] impl DeviceHandler for VfioApDeviceHandler { + #[instrument] + fn driver_types(&self) -> &[&str] { + &[DRIVER_VFIO_AP_TYPE] + } + #[cfg(target_arch = "s390x")] #[instrument] async fn device_handler(&self, device: &Device, ctx: &mut DeviceContext) -> Result {