diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index 358776fab0..21128c5ec3 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -1354,9 +1354,11 @@ dependencies = [ "go-flag", "kata-sys-util", "kata-types", + "lazy_static", "libc", "logging", "nix 0.24.3", + "path-clean", "persist", "rand 0.8.5", "rust-ini", @@ -2124,6 +2126,12 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba" +[[package]] +name = "path-clean" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef" + [[package]] name = "percent-encoding" version = "2.2.0" diff --git a/src/runtime-rs/crates/hypervisor/Cargo.toml b/src/runtime-rs/crates/hypervisor/Cargo.toml index 6a61faea45..f629c3bd06 100644 --- a/src/runtime-rs/crates/hypervisor/Cargo.toml +++ b/src/runtime-rs/crates/hypervisor/Cargo.toml @@ -26,6 +26,8 @@ thiserror = "1.0" tokio = { version = "1.28.1", features = ["sync", "fs"] } vmm-sys-util = "0.11.0" rand = "0.8.4" +path-clean = "1.0.1" +lazy_static = "1.4" kata-sys-util = { path = "../../../libs/kata-sys-util" } kata-types = { path = "../../../libs/kata-types" } diff --git a/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs b/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs index 62d9f7e52f..74adaac0d0 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs @@ -10,18 +10,17 @@ use anyhow::{anyhow, Context, Result}; use kata_sys_util::rand::RandomBytes; use tokio::sync::{Mutex, RwLock}; -use super::{ - util::{get_host_path, get_virt_drive_name}, - Device, DeviceConfig, DeviceType, -}; use crate::{ - BlockConfig, BlockDevice, Hypervisor, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, + BlockConfig, BlockDevice, Hypervisor, VfioDevice, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, }; -pub type ArcMutexDevice = Arc>; +use super::{ + util::{get_host_path, get_virt_drive_name, DEVICE_TYPE_BLOCK}, + Device, DeviceConfig, DeviceType, +}; -const DEVICE_TYPE_BLOCK: &str = "b"; +pub type ArcMutexDevice = Arc>; /// block_index and released_block_index are used to search an available block index /// in Sandbox. @@ -90,9 +89,24 @@ impl DeviceManager { // handle attach error if let Err(e) = result { - if let DeviceType::Block(device) = device_guard.get_device_info().await { - self.shared_info.release_device_index(device.config.index); - }; + match device_guard.get_device_info().await { + DeviceType::Block(device) => { + self.shared_info.release_device_index(device.config.index); + } + DeviceType::Vfio(device) => { + // safe here: + // Only when vfio dev_type is `b`, virt_path MUST be Some(X), + // and needs do release_device_index. otherwise, let it go. + if device.config.dev_type == DEVICE_TYPE_BLOCK { + self.shared_info + .release_device_index(device.config.virt_path.unwrap().0); + } + } + _ => { + debug!(sl!(), "no need to do release device index."); + } + } + drop(device_guard); self.devices.remove(device_id); return Err(e); @@ -149,6 +163,11 @@ impl DeviceManager { return Some(device_id.to_string()); } } + DeviceType::Vfio(device) => { + if device.config.host_path == host_path { + return Some(device_id.to_string()); + } + } _ => { // TODO: support find other device type continue; @@ -168,7 +187,7 @@ impl DeviceManager { Some((current_index, virt_path_name)) } else { - // only dev_type is block, otherwise, it's useless. + // only dev_type is block, otherwise, it's None. None }; @@ -181,22 +200,31 @@ impl DeviceManager { let device_id = self.new_device_id()?; let dev: ArcMutexDevice = match device_config { DeviceConfig::BlockCfg(config) => { - // try to find the device, found and just return id. - if let Some(dev_id_matched) = self.find_device(config.path_on_host.clone()).await { - info!( - sl!(), - "device with host path:{:?} found. just return device id: {:?}", - config.path_on_host.clone(), - dev_id_matched - ); - - return Ok(dev_id_matched); + // try to find the device, if found and just return id. + if let Some(device_matched_id) = self.find_device(config.path_on_host.clone()).await + { + return Ok(device_matched_id); } self.create_block_device(config, device_id.clone()) .await .context("failed to create device")? } + DeviceConfig::VfioCfg(config) => { + let mut vfio_dev_config = config.clone(); + let dev_host_path = vfio_dev_config.host_path.clone(); + if let Some(device_matched_id) = self.find_device(dev_host_path).await { + return Ok(device_matched_id); + } + + let virt_path = self.get_dev_virt_path(vfio_dev_config.dev_type.as_str())?; + vfio_dev_config.virt_path = virt_path; + + Arc::new(Mutex::new(VfioDevice::new( + device_id.clone(), + &vfio_dev_config, + ))) + } _ => { return Err(anyhow!("invliad device type")); } @@ -230,8 +258,7 @@ impl DeviceManager { }; block_config.driver_option = block_driver; - // generate block device index and virt path - // safe here, Block device always has virt_path. + // generate virt path if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK)? { block_config.index = virt_path.0; block_config.virt_path = virt_path.1; @@ -239,10 +266,10 @@ impl DeviceManager { // if the path on host is empty, we need to get device host path from the device major and minor number // Otherwise, it might be rawfile based block device, the host path is already passed from the runtime, - // so we don't need to do anything here + // so we don't need to do anything here. if block_config.path_on_host.is_empty() { block_config.path_on_host = - get_host_path(DEVICE_TYPE_BLOCK.to_owned(), config.major, config.minor) + get_host_path(DEVICE_TYPE_BLOCK, config.major, config.minor) .context("failed to get host path")?; } diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/mod.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/mod.rs index 45487e3b5f..eabf9b5f1c 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/mod.rs @@ -4,20 +4,210 @@ // SPDX-License-Identifier: Apache-2.0 // +mod vfio; mod vhost_user; mod virtio_blk; +mod virtio_fs; +mod virtio_net; +mod virtio_vsock; + +pub use vfio::{ + bind_device_to_host, bind_device_to_vfio, get_host_guest_map, get_vfio_device, HostDevice, + VfioBusMode, VfioConfig, VfioDevice, +}; pub use virtio_blk::{ BlockConfig, BlockDevice, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, }; -mod virtio_net; -pub use virtio_net::{Address, NetworkConfig, NetworkDevice}; -mod vfio; -pub use vfio::{bind_device_to_host, bind_device_to_vfio, VfioBusMode, VfioConfig, VfioDevice}; -mod virtio_fs; pub use virtio_fs::{ ShareFsDevice, ShareFsDeviceConfig, ShareFsMountConfig, ShareFsMountDevice, ShareFsMountType, ShareFsOperation, }; -mod virtio_vsock; +pub use virtio_net::{Address, NetworkConfig, NetworkDevice}; pub use virtio_vsock::{HybridVsockConfig, HybridVsockDevice, VsockConfig, VsockDevice}; + +use anyhow::{anyhow, Context, Result}; + +// Tips: +// The Re-write `PciSlot` and `PciPath` with rust that it origins from `pcipath.go`: +// + +// The PCI spec reserves 5 bits for slot number (a.k.a. device +// number), giving slots 0..31 +const PCI_SLOT_BITS: u32 = 5; +const MAX_PCI_SLOTS: u32 = (1 << PCI_SLOT_BITS) - 1; + +// A PciSlot describes where a PCI device sits on a single bus +// +// This encapsulates the PCI slot number a.k.a device number, which is +// limited to a 5 bit value [0x00..0x1f] by the PCI specification +// +// To support multifunction device's, It's needed to extend +// this to include the PCI 3-bit function number as well. +#[derive(Clone, Debug, Default, PartialEq)] +pub struct PciSlot(pub u8); + +impl PciSlot { + pub fn convert_from_string(s: &str) -> Result { + if s.is_empty() || s.len() > 2 { + return Err(anyhow!("string given is invalid.")); + } + + let base = 16; + let n = u64::from_str_radix(s, base).context("convert string to number failed")?; + if n >> PCI_SLOT_BITS > 0 { + return Err(anyhow!( + "number {:?} exceeds MAX:{:?}, failed.", + n, + MAX_PCI_SLOTS + )); + } + + Ok(PciSlot(n as u8)) + } + + pub fn convert_from_u32(v: u32) -> Result { + if v > MAX_PCI_SLOTS { + return Err(anyhow!("value {:?} exceeds MAX: {:?}", v, MAX_PCI_SLOTS)); + } + + Ok(PciSlot(v as u8)) + } + + pub fn convert_to_string(&self) -> String { + format!("{:02x}", self.0) + } +} + +// A PciPath describes where a PCI sits in a PCI hierarchy. +// +// Consists of a list of PCI slots, giving the slot of each bridge +// that must be traversed from the PCI root to reach the device, +// followed by the slot of the device itself. +// +// When formatted into a string is written as "xx/.../yy/zz". Here, +// zz is the slot of the device on its PCI bridge, yy is the slot of +// the bridge on its parent bridge and so forth until xx is the slot +// of the "most upstream" bridge on the root bus. +// +// If a device is directly connected to the root bus, which used in +// lightweight hypervisors, such as dragonball/firecracker/clh, and +// its PciPath.slots will contains only one PciSlot. +#[derive(Clone, Debug, Default, PartialEq)] +pub struct PciPath { + // list of PCI slots + slots: Vec, +} + +impl PciPath { + // method to format the PciPath into a string + pub fn convert_to_string(&self) -> String { + self.slots + .iter() + .map(|pci_slot| format!("{:02x}", pci_slot.0)) + .collect::>() + .join("/") + } + + // method to parse a PciPath from a string + pub fn convert_from_string(path: &str) -> Result { + if path.is_empty() { + return Err(anyhow!("path given is empty.")); + } + + let mut pci_slots: Vec = Vec::new(); + let slots: Vec<&str> = path.split('/').collect(); + for slot in slots { + match PciSlot::convert_from_string(slot) { + Ok(s) => pci_slots.push(s), + Err(e) => return Err(anyhow!("slot is invalid with: {:?}", e)), + } + } + + Ok(PciPath { slots: pci_slots }) + } + + pub fn from_pci_slots(slots: Vec) -> Option { + if slots.is_empty() { + return None; + } + + Some(PciPath { slots }) + } + + // device_slot to get the slot of the device on its PCI bridge + pub fn get_device_slot(&self) -> Option { + self.slots.last().cloned() + } + + // root_slot to get the slot of the "most upstream" bridge on the root bus + pub fn get_root_slot(&self) -> Option { + self.slots.first().cloned() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pci_slot() { + // min + let pci_slot_01 = PciSlot::convert_from_string("00"); + assert!(pci_slot_01.is_ok()); + // max + let pci_slot_02 = PciSlot::convert_from_string("1f"); + assert!(pci_slot_02.is_ok()); + + // exceed + let pci_slot_03 = PciSlot::convert_from_string("20"); + assert!(pci_slot_03.is_err()); + + // valid number + let pci_slot_04 = PciSlot::convert_from_u32(1_u32); + assert!(pci_slot_04.is_ok()); + assert_eq!(pci_slot_04.as_ref().unwrap().0, 1_u8); + let pci_slot_str = pci_slot_04.as_ref().unwrap().convert_to_string(); + assert_eq!(pci_slot_str, format!("{:02x}", pci_slot_04.unwrap().0)); + + // max number + let pci_slot_05 = PciSlot::convert_from_u32(31_u32); + assert!(pci_slot_05.is_ok()); + assert_eq!(pci_slot_05.unwrap().0, 31_u8); + + // exceed and error + let pci_slot_06 = PciSlot::convert_from_u32(32_u32); + assert!(pci_slot_06.is_err()); + } + + #[test] + fn test_pci_patch() { + let pci_path_0 = PciPath::convert_from_string("01/0a/05"); + assert!(pci_path_0.is_ok()); + let pci_path_unwrap = pci_path_0.unwrap(); + assert_eq!(pci_path_unwrap.slots[0].0, 1); + assert_eq!(pci_path_unwrap.slots[1].0, 10); + assert_eq!(pci_path_unwrap.slots[2].0, 5); + + let pci_path_01 = PciPath::from_pci_slots(vec![PciSlot(1), PciSlot(10), PciSlot(5)]); + assert!(pci_path_01.is_some()); + let pci_path = pci_path_01.unwrap(); + let pci_path_02 = pci_path.convert_to_string(); + assert_eq!(pci_path_02, "01/0a/05".to_string()); + + let dev_slot = pci_path.get_device_slot(); + assert!(dev_slot.is_some()); + assert_eq!(dev_slot.unwrap().0, 5); + + let root_slot = pci_path.get_root_slot(); + assert!(root_slot.is_some()); + assert_eq!(root_slot.unwrap().0, 1); + } + + #[test] + fn test_get_host_guest_map() { + // test unwrap is fine, no panic occurs. + let hg_map = get_host_guest_map("".to_owned()); + assert!(hg_map.is_none()); + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs index 63fe400226..7d71e4e1ef 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs @@ -1,18 +1,98 @@ -// Copyright (c) 2019-2022 Alibaba Cloud -// Copyright (c) 2019-2022 Ant Group +// Copyright (c) 2022-2023 Alibaba Cloud +// Copyright (c) 2022-2023 Ant Group // // SPDX-License-Identifier: Apache-2.0 // -use std::{fs, path::Path, process::Command}; - -use crate::device::Device; -use crate::device::DeviceType; -use crate::Hypervisor as hypervisor; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use anyhow::anyhow; -use anyhow::{Context, Result}; +use std::{ + collections::HashMap, + fs, + path::{Path, PathBuf}, + process::Command, + sync::{ + atomic::{AtomicU8, Ordering}, + Arc, RwLock, + }, +}; + +use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; +use lazy_static::lazy_static; +use path_clean::PathClean; + +use crate::{ + device::{hypervisor, Device, DeviceType}, + PciPath, PciSlot, +}; +use kata_sys_util::fs::get_base_name; + +pub const SYS_BUS_PCI_DRIVER_PROBE: &str = "/sys/bus/pci/drivers_probe"; +pub const SYS_BUS_PCI_DEVICES: &str = "/sys/bus/pci/devices"; +pub const SYS_KERN_IOMMU_GROUPS: &str = "/sys/kernel/iommu_groups"; +pub const VFIO_PCI_DRIVER: &str = "vfio-pci"; +pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk"; +pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci"; +pub const MAX_DEV_ID_SIZE: usize = 31; + +const VFIO_PCI_DRIVER_NEW_ID: &str = "/sys/bus/pci/drivers/vfio-pci/new_id"; +const VFIO_PCI_DRIVER_UNBIND: &str = "/sys/bus/pci/drivers/vfio-pci/unbind"; +const SYS_CLASS_IOMMU: &str = "/sys/class/iommu"; +const INTEL_IOMMU_PREFIX: &str = "dmar"; +const AMD_IOMMU_PREFIX: &str = "ivhd"; + +lazy_static! { + static ref GUEST_DEVICE_ID: Arc = Arc::new(AtomicU8::new(0_u8)); + static ref HOST_GUEST_MAP: Arc>> = + Arc::new(RwLock::new(HashMap::new())); +} + +// map host/guest bdf and the mapping saved into `HOST_GUEST_MAP`, +// and return PciPath. +pub fn generate_guest_pci_path(bdf: String) -> Result { + let hg_map = HOST_GUEST_MAP.clone(); + let current_id = GUEST_DEVICE_ID.clone(); + + current_id.fetch_add(1, Ordering::SeqCst); + let slot = current_id.load(Ordering::SeqCst); + + // In some Hypervisors, dragonball, cloud-hypervisor or firecracker, + // the device is directly connected to the bus without intermediary bus. + // FIXME: Qemu's pci path needs to be implemented; + let host_bdf = normalize_device_bdf(bdf.as_str()); + let guest_bdf = format!("0000:00:{:02x}.0", slot); + + // safe, just do unwrap as `HOST_GUEST_MAP` is always valid. + hg_map.write().unwrap().insert(host_bdf, guest_bdf); + + Ok(PciPath { + slots: vec![PciSlot::convert_from_u32(slot.into()).context("pci slot convert failed.")?], + }) +} + +// get host/guest mapping for info +pub fn get_host_guest_map(host_bdf: String) -> Option { + // safe, just do unwrap as `HOST_GUEST_MAP` is always valid. + HOST_GUEST_MAP.read().unwrap().get(&host_bdf).cloned() +} + +pub fn do_check_iommu_on() -> Result { + let element = std::fs::read_dir(SYS_CLASS_IOMMU)? + .filter_map(|e| e.ok()) + .last(); + + if element.is_none() { + return Err(anyhow!("iommu is not enabled")); + } + + // safe here, the result of map is always be Some(true) or Some(false). + Ok(element + .map(|e| { + let x = e.file_name().to_string_lossy().into_owned(); + x.starts_with(INTEL_IOMMU_PREFIX) || x.starts_with(AMD_IOMMU_PREFIX) + }) + .unwrap()) +} fn override_driver(bdf: &str, driver: &str) -> Result<()> { let driver_override = format!("/sys/bus/pci/devices/{}/driver_override", bdf); @@ -22,56 +102,470 @@ fn override_driver(bdf: &str, driver: &str) -> Result<()> { Ok(()) } -const SYS_PCI_DEVICES_PATH: &str = "/sys/bus/pci/devices"; -const PCI_DRIVER_PROBE: &str = "/sys/bus/pci/drivers_probe"; -const VFIO_NEW_ID_PATH: &str = "/sys/bus/pci/drivers/vfio-pci/new_id"; -const VFIO_UNBIND_PATH: &str = "/sys/bus/pci/drivers/vfio-pci/unbind"; - -pub const VFIO_PCI: &str = "vfio-pci"; - -#[derive(Debug, Clone)] +#[derive(Clone, Debug, Default, PartialEq)] pub enum VfioBusMode { - PCI, + #[default] MMIO, + PCI, } impl VfioBusMode { - pub fn new(mode: &str) -> Result { - Ok(match mode { + pub fn new(mode: &str) -> Self { + match mode { "mmio" => VfioBusMode::MMIO, _ => VfioBusMode::PCI, - }) + } + } + + pub fn to_string(mode: VfioBusMode) -> String { + match mode { + VfioBusMode::MMIO => "mmio".to_owned(), + _ => "pci".to_owned(), + } + } + + // driver_type used for kata-agent + // (1) vfio-pci for add device handler, + // (2) mmioblk for add storage handler, + pub fn driver_type(mode: &str) -> &str { + match mode { + "b" => DRIVER_MMIO_BLK_TYPE, + _ => DRIVER_VFIO_PCI_TYPE, + } } } -#[derive(Debug, Clone)] -pub struct VfioConfig { +#[derive(Clone, Debug, Default)] +pub enum VfioDeviceType { + /// error type of VFIO device + Error, + + /// normal VFIO device type + #[default] + Normal, + + /// mediated VFIO device type + Mediated, +} + +// DeviceVendor represents a PCI device's device id and vendor id +// DeviceVendor: (device, vendor) +#[derive(Clone, Debug)] +pub struct DeviceVendor(String, String); + +impl DeviceVendor { + pub fn get_device_vendor(&self) -> Result<(u32, u32)> { + // default value is 0 when vendor_id or device_id is empty + if self.0.is_empty() || self.1.is_empty() { + return Ok((0, 0)); + } + + let do_convert = |id: &String| { + u32::from_str_radix( + id.trim_start_matches("0x") + .trim_matches(char::is_whitespace), + 16, + ) + .with_context(|| anyhow!("invalid id {:?}", id)) + }; + + let device = do_convert(&self.0).context("convert device failed")?; + let vendor = do_convert(&self.1).context("convert vendor failed")?; + + Ok((device, vendor)) + } + + pub fn get_device_vendor_id(&self) -> Result { + let (device, vendor) = self + .get_device_vendor() + .context("get device and vendor failed")?; + + Ok(((device & 0xffff) << 16) | (vendor & 0xffff)) + } +} + +// HostDevice represents a VFIO drive used to hotplug +#[derive(Clone, Debug, Default)] +pub struct HostDevice { + /// unique identifier of the device + pub hostdev_id: String, + /// Sysfs path for mdev bus type device pub sysfs_path: String, - /// PCI device information: "bus:slot:function" + /// PCI device information (BDF): "bus:slot:function" pub bus_slot_func: String, - /// Bus Mode, PCI or MMIO - pub mode: VfioBusMode, + /// device_vendor: device id and vendor id + pub device_vendor: Option, + + /// type of vfio device + pub vfio_type: VfioDeviceType, + + /// guest PCI path of device + pub guest_pci_path: Option, + + /// vfio_vendor for vendor's some special cases. + #[cfg(feature = "enable-vendor")] + pub vfio_vendor: VfioVendor, } -#[derive(Debug, Clone)] +// VfioConfig represents a VFIO drive used for hotplugging +#[derive(Clone, Debug, Default)] +pub struct VfioConfig { + /// usually host path will be /dev/vfio/N + pub host_path: String, + + /// device as block or char + pub dev_type: String, + + /// hostdev_prefix for devices, such as: + /// (1) phisycial endpoint: "physical_nic_" + /// (2) vfio mdev: "vfio_mdev_" + /// (3) vfio pci: "vfio_device_" + /// (4) vfio volume: "vfio_vol_" + /// (5) vfio nvme: "vfio_nvme_" + pub hostdev_prefix: String, + + /// device in guest which it appears inside the VM, + /// outside of the container mount namespace + /// virt_path: Option<(index, virt_path_name)> + pub virt_path: Option<(u64, String)>, +} + +#[derive(Clone, Debug, Default)] pub struct VfioDevice { - /// Unique identifier of the device - pub id: String, + pub device_id: String, + pub attach_count: u64, - /// Config info for Vfio Device + /// Bus Mode, PCI or MMIO + pub bus_mode: VfioBusMode, + /// driver type + pub driver_type: String, + + /// vfio config from business pub config: VfioConfig, + + // host device with multi-funtions + pub devices: Vec, + // options for vfio pci handler in kata-agent + pub device_options: Vec, } -/// binds the device to vfio driver after unbinding from host. -/// Will be called by a network interface or a generic pcie device. +impl VfioDevice { + // new with VfioConfig + pub fn new(device_id: String, dev_info: &VfioConfig) -> Self { + // devices and device_options are in a 1-1 mapping, used in + // vfio-pci handler for kata-agent. + let devices: Vec = Vec::with_capacity(MAX_DEV_ID_SIZE); + let device_options: Vec = Vec::with_capacity(MAX_DEV_ID_SIZE); + + // get bus mode and driver type based on the device type + let dev_type = dev_info.dev_type.as_str(); + let driver_type = VfioBusMode::driver_type(dev_type).to_owned(); + + Self { + device_id, + attach_count: 0, + bus_mode: VfioBusMode::PCI, + driver_type, + config: dev_info.clone(), + devices, + device_options, + } + } + + fn get_host_path(&self) -> String { + self.config.host_path.clone() + } + + fn get_vfio_prefix(&self) -> String { + self.config.hostdev_prefix.clone() + } + + // nornaml VFIO BDF: 0000:04:00.0 + // mediated VFIO BDF: 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 + fn get_vfio_device_type(&self, device_sys_path: String) -> Result { + let mut tokens: Vec<&str> = device_sys_path.as_str().split(':').collect(); + let vfio_type = match tokens.len() { + 3 => VfioDeviceType::Normal, + _ => { + tokens = device_sys_path.split('-').collect(); + if tokens.len() == 5 { + VfioDeviceType::Mediated + } else { + VfioDeviceType::Error + } + } + }; + + Ok(vfio_type) + } + + // get_sysfs_device returns the sysfsdev of mediated device + // expected input string format is absolute path to the sysfs dev node + // eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4 + fn get_sysfs_device(&self, sysfs_dev_path: PathBuf) -> Result { + let mut buf = + fs::canonicalize(sysfs_dev_path.clone()).context("sysfs device path not exist")?; + let mut resolved = false; + + // resolve symbolic links until there's no more to resolve + while buf.symlink_metadata()?.file_type().is_symlink() { + let link = fs::read_link(&buf)?; + buf.pop(); + buf.push(link); + resolved = true; + } + + // If a symbolic link was resolved, the resulting path may be relative to the original path + if resolved { + // If the original path is relative and the resolved path is not, the resolved path + // should be returned as absolute. + if sysfs_dev_path.is_relative() && buf.is_absolute() { + buf = fs::canonicalize(&buf)?; + } + } + + Ok(buf.clean().display().to_string()) + } + + // vfio device details: (device BDF, device SysfsDev, vfio Device Type) + fn get_vfio_device_details( + &self, + dev_file_name: String, + iommu_dev_path: PathBuf, + ) -> Result<(Option, String, VfioDeviceType)> { + let vfio_type = self.get_vfio_device_type(dev_file_name.clone())?; + match vfio_type { + VfioDeviceType::Normal => { + let dev_bdf = get_device_bdf(dev_file_name.clone()); + let dev_sys = [SYS_BUS_PCI_DEVICES, dev_file_name.as_str()].join("/"); + Ok((dev_bdf, dev_sys, vfio_type)) + } + VfioDeviceType::Mediated => { + // sysfsdev eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4 + let sysfs_dev = Path::new(&iommu_dev_path).join(dev_file_name); + let dev_sys = self + .get_sysfs_device(sysfs_dev) + .context("get sysfs device failed")?; + + let dev_bdf = if let Some(dev_s) = get_mediated_device_bdf(dev_sys.clone()) { + get_device_bdf(dev_s) + } else { + None + }; + + Ok((dev_bdf, dev_sys, vfio_type)) + } + _ => Err(anyhow!("unsupported vfio type : {:?}", vfio_type)), + } + } + + // read vendor and deviceor from /sys/bus/pci/devices/BDF/X + fn get_vfio_device_vendor(&self, bdf: &str) -> Result { + let device = + get_device_property(bdf, "device").context("get device from syspath failed")?; + let vendor = + get_device_property(bdf, "vendor").context("get vendor from syspath failed")?; + + Ok(DeviceVendor(device, vendor)) + } + + async fn set_vfio_config( + &mut self, + iommu_devs_path: PathBuf, + device_name: &str, + ) -> Result { + let vfio_dev_details = self + .get_vfio_device_details(device_name.to_owned(), iommu_devs_path) + .context("get vfio device details failed")?; + + // It's safe as BDF really exists. + let dev_bdf = vfio_dev_details.0.unwrap(); + let dev_vendor = self + .get_vfio_device_vendor(&dev_bdf) + .context("get property device and vendor failed")?; + + let mut vfio_dev = HostDevice { + bus_slot_func: dev_bdf.clone(), + device_vendor: Some(dev_vendor), + sysfs_path: vfio_dev_details.1, + vfio_type: vfio_dev_details.2, + ..Default::default() + }; + + // when vfio pci, kata-agent handles with device_options, and its + // format: "DDDD:BB:DD.F=" + // DDDD:BB:DD.F is the device's PCI address on host + // is the device's PCI path in the guest + if self.bus_mode == VfioBusMode::PCI { + let pci_path = + generate_guest_pci_path(dev_bdf.clone()).context("generate pci path failed")?; + vfio_dev.guest_pci_path = Some(pci_path.clone()); + self.device_options + .push(format!("0000:{}={}", dev_bdf, pci_path.convert_to_string())); + } + + Ok(vfio_dev) + } + + // filter Host or PCI Bridges that are in the same IOMMU group as the + // passed-through devices. One CANNOT pass-through a PCI bridge or Host + // bridge. Class 0x0604 is PCI bridge, 0x0600 is Host bridge + fn filter_bridge_device(&self, bdf: &str, bitmask: u64) -> Option { + let device_class = match get_device_property(bdf, "class") { + Ok(dev_class) => dev_class, + Err(_) => "".to_string(), + }; + + if device_class.is_empty() { + return None; + } + + match device_class.parse::() { + Ok(cid_u32) => { + // class code is 16 bits, remove the two trailing zeros + let class_code = u64::from(cid_u32) >> 8; + if class_code & bitmask == bitmask { + Some(class_code) + } else { + None + } + } + _ => None, + } + } +} + +#[async_trait] +impl Device for VfioDevice { + async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> { + // host path: /dev/vfio/X + let host_path = self.get_host_path(); + // vfio group: X + let vfio_group = get_base_name(host_path.clone())? + .into_string() + .map_err(|e| anyhow!("failed to get base name {:?}", e))?; + + // /sys/kernel/iommu_groups/X/devices + let iommu_devs_path = Path::new(SYS_KERN_IOMMU_GROUPS) + .join(vfio_group.as_str()) + .join("devices"); + + // /sys/kernel/iommu_groups/X/devices + // DDDD:BB:DD.F0 DDDD:BB:DD.F1 + let iommu_devices = fs::read_dir(iommu_devs_path.clone())? + .filter_map(|e| { + let x = e.ok()?.file_name().to_string_lossy().into_owned(); + Some(x) + }) + .collect::>(); + if iommu_devices.len() > 1 { + warn!(sl!(), "vfio device {} with multi-function", host_path); + } + + // pass all devices in iommu group, and use index to identify device. + for (index, device) in iommu_devices.iter().enumerate() { + // filter host or PCI bridge + if self.filter_bridge_device(device, 0x0600).is_some() { + continue; + } + + let mut hostdev: HostDevice = self + .set_vfio_config(iommu_devs_path.clone(), device) + .await + .context("set vfio config failed")?; + let dev_prefix = self.get_vfio_prefix(); + hostdev.hostdev_id = make_device_nameid(&dev_prefix, index, MAX_DEV_ID_SIZE); + + self.devices.push(hostdev); + } + + if self + .increase_attach_count() + .await + .context("failed to increase attach count")? + { + return Err(anyhow!("attach count increased failed as some reason.")); + } + + // do add device for vfio deivce + if let Err(e) = h.add_device(DeviceType::Vfio(self.clone())).await { + self.decrease_attach_count().await?; + + return Err(e); + } + + Ok(()) + } + + async fn detach(&mut self, h: &dyn hypervisor) -> Result> { + if self + .decrease_attach_count() + .await + .context("failed to decrease attach count")? + { + return Ok(None); + } + + if let Err(e) = h.remove_device(DeviceType::Vfio(self.clone())).await { + self.increase_attach_count().await?; + return Err(e); + } + + // only virt_path is Some, there's a device index + let device_index = if let Some(virt_path) = self.config.virt_path.clone() { + Some(virt_path.0) + } else { + None + }; + + Ok(device_index) + } + + async fn increase_attach_count(&mut self) -> Result { + match self.attach_count { + 0 => { + // do real attach + self.attach_count += 1; + Ok(false) + } + std::u64::MAX => Err(anyhow!("device was attached too many times")), + _ => { + self.attach_count += 1; + Ok(true) + } + } + } + + async fn decrease_attach_count(&mut self) -> Result { + match self.attach_count { + 0 => Err(anyhow!("detaching a device that wasn't attached")), + 1 => { + // do real wrok + self.attach_count -= 1; + Ok(false) + } + _ => { + self.attach_count -= 1; + Ok(true) + } + } + } + + async fn get_device_info(&self) -> DeviceType { + DeviceType::Vfio(self.clone()) + } +} + +// binds the device to vfio driver after unbinding from host. +// Will be called by a network interface or a generic pcie device. pub fn bind_device_to_vfio(bdf: &str, host_driver: &str, _vendor_device_id: &str) -> Result<()> { // modprobe vfio-pci - if !Path::new(VFIO_NEW_ID_PATH).exists() { + if !Path::new(VFIO_PCI_DRIVER_NEW_ID).exists() { Command::new("modprobe") - .arg(VFIO_PCI) + .arg(VFIO_PCI_DRIVER) .output() .expect("Failed to run modprobe vfio-pci"); } @@ -84,19 +578,22 @@ pub fn bind_device_to_vfio(bdf: &str, host_driver: &str, _vendor_device_id: &str if cmdline.contains("iommu=off") || !cmdline.contains("iommu=") { return Err(anyhow!("iommu isn't set on kernel cmdline")); } + + if !do_check_iommu_on().context("check iommu on failed")? { + return Err(anyhow!("IOMMU not enabled yet.")); + } } // if it's already bound to vfio - if is_equal_driver(bdf, VFIO_PCI) { + if is_equal_driver(bdf, VFIO_PCI_DRIVER) { info!(sl!(), "bdf : {} was already bound to vfio-pci", bdf); return Ok(()); } info!(sl!(), "host driver : {}", host_driver); - override_driver(bdf, VFIO_PCI).context("override driver")?; + override_driver(bdf, VFIO_PCI_DRIVER).context("override driver")?; let unbind_path = format!("/sys/bus/pci/devices/{}/driver/unbind", bdf); - // echo bdf > /sys/bus/pci/drivers/virtio-pci/unbind" fs::write(&unbind_path, bdf) .with_context(|| format!("Failed to echo {} > {}", bdf, &unbind_path))?; @@ -104,15 +601,16 @@ pub fn bind_device_to_vfio(bdf: &str, host_driver: &str, _vendor_device_id: &str info!(sl!(), "{} is unbound from {}", bdf, host_driver); // echo bdf > /sys/bus/pci/drivers_probe - fs::write(PCI_DRIVER_PROBE, bdf) - .with_context(|| format!("Failed to echo {} > {}", bdf, PCI_DRIVER_PROBE))?; + fs::write(SYS_BUS_PCI_DRIVER_PROBE, bdf) + .with_context(|| format!("Failed to echo {} > {}", bdf, SYS_BUS_PCI_DRIVER_PROBE))?; info!(sl!(), "echo {} > /sys/bus/pci/drivers_probe", bdf); + Ok(()) } pub fn is_equal_driver(bdf: &str, host_driver: &str) -> bool { - let sys_pci_devices_path = Path::new(SYS_PCI_DEVICES_PATH); + let sys_pci_devices_path = Path::new(SYS_BUS_PCI_DEVICES); let driver_file = sys_pci_devices_path.join(bdf).join("driver"); if driver_file.exists() { @@ -126,10 +624,9 @@ pub fn is_equal_driver(bdf: &str, host_driver: &str) -> bool { false } -/// bind_device_to_host binds the device to the host driver after unbinding from vfio-pci. +// bind_device_to_host binds the device to the host driver after unbinding from vfio-pci. pub fn bind_device_to_host(bdf: &str, host_driver: &str, _vendor_device_id: &str) -> Result<()> { // Unbind from vfio-pci driver to the original host driver - info!(sl!(), "bind {} to {}", bdf, host_driver); // if it's already bound to host_driver @@ -144,37 +641,136 @@ pub fn bind_device_to_host(bdf: &str, host_driver: &str, _vendor_device_id: &str override_driver(bdf, host_driver).context("override driver")?; // echo bdf > /sys/bus/pci/drivers/vfio-pci/unbind" - std::fs::write(VFIO_UNBIND_PATH, bdf) - .with_context(|| format!("echo {}> {}", bdf, VFIO_UNBIND_PATH))?; - info!(sl!(), "echo {} > {}", bdf, VFIO_UNBIND_PATH); + std::fs::write(VFIO_PCI_DRIVER_UNBIND, bdf) + .with_context(|| format!("echo {}> {}", bdf, VFIO_PCI_DRIVER_UNBIND))?; + info!(sl!(), "echo {} > {}", bdf, VFIO_PCI_DRIVER_UNBIND); // echo bdf > /sys/bus/pci/drivers_probe - std::fs::write(PCI_DRIVER_PROBE, bdf) - .with_context(|| format!("echo {} > {}", bdf, PCI_DRIVER_PROBE))?; - info!(sl!(), "echo {} > {}", bdf, PCI_DRIVER_PROBE); + std::fs::write(SYS_BUS_PCI_DRIVER_PROBE, bdf) + .with_context(|| format!("echo {} > {}", bdf, SYS_BUS_PCI_DRIVER_PROBE))?; + info!(sl!(), "echo {} > {}", bdf, SYS_BUS_PCI_DRIVER_PROBE); Ok(()) } -#[async_trait] -impl Device for VfioConfig { - async fn attach(&mut self, _h: &dyn hypervisor) -> Result<()> { - todo!() +// get_vfio_device_bdf returns the BDF of pci device +// expected format :. eg. 02:10.0 +fn get_device_bdf(dev_sys_str: String) -> Option { + let dev_sys = dev_sys_str; + if !dev_sys.starts_with("0000:") { + return Some(dev_sys); } - async fn detach(&mut self, _h: &dyn hypervisor) -> Result> { - todo!() + let parts: Vec<&str> = dev_sys.as_str().splitn(2, ':').collect(); + if parts.len() < 2 { + return None; } - async fn get_device_info(&self) -> DeviceType { - todo!() - } + parts.get(1).copied().map(|bdf| bdf.to_owned()) +} - async fn increase_attach_count(&mut self) -> Result { - todo!() - } - - async fn decrease_attach_count(&mut self) -> Result { - todo!() +// expected format ::. eg. 0000:02:10.0 +fn normalize_device_bdf(bdf: &str) -> String { + if !bdf.starts_with("0000") { + format!("0000:{}", bdf) + } else { + bdf.to_string() } } + +// make_device_nameid: generate a ID for the hypervisor commandline +fn make_device_nameid(name_type: &str, id: usize, max_len: usize) -> String { + let name_id = format!("{}_{}", name_type, id); + + if name_id.len() > max_len { + name_id[0..max_len].to_string() + } else { + name_id + } +} + +// get_mediated_device_bdf returns the MDEV BDF +// expected input string /sys/devices/pci0000:d7/BDF0/BDF1/.../MDEVBDF/UUID +fn get_mediated_device_bdf(dev_sys_str: String) -> Option { + let dev_sys = dev_sys_str; + let parts: Vec<&str> = dev_sys.as_str().split('/').collect(); + if parts.len() < 4 { + return None; + } + + parts + .get(parts.len() - 2) + .copied() + .map(|bdf| bdf.to_owned()) +} + +// dev_sys_path: /sys/bus/pci/devices/DDDD:BB:DD.F +// cfg_path: : /sys/bus/pci/devices/DDDD:BB:DD.F/xxx +fn get_device_property(bdf: &str, property: &str) -> Result { + let device_name = normalize_device_bdf(bdf); + + let dev_sys_path = Path::new(SYS_BUS_PCI_DEVICES).join(device_name); + let cfg_path = fs::read_to_string(dev_sys_path.join(property)).with_context(|| { + format!( + "failed to read {}", + dev_sys_path.join(property).to_str().unwrap() + ) + })?; + + Ok(cfg_path.as_str().trim_end_matches('\n').to_string()) +} + +pub fn get_vfio_iommu_group(bdf: String) -> Result { + // /sys/bus/pci/devices/DDDD:BB:DD.F/iommu_group + let dbdf = normalize_device_bdf(bdf.as_str()); + let iommugrp_path = Path::new(SYS_BUS_PCI_DEVICES) + .join(dbdf.as_str()) + .join("iommu_group"); + if !iommugrp_path.exists() { + warn!( + sl!(), + "IOMMU group path: {:?} not found, do bind device to vfio first.", iommugrp_path + ); + return Err(anyhow!("please do bind device to vfio")); + } + + // iommu group symlink: ../../../../../../kernel/iommu_groups/X + let iommugrp_symlink = fs::read_link(&iommugrp_path) + .map_err(|e| anyhow!("read iommu group symlink failed {:?}", e))?; + + // get base name from iommu group symlink: X + let iommu_group = get_base_name(iommugrp_symlink)? + .into_string() + .map_err(|e| anyhow!("failed to get iommu group {:?}", e))?; + + // we'd better verify the path to ensure it dose exist. + if !Path::new(SYS_KERN_IOMMU_GROUPS) + .join(&iommu_group) + .join("devices") + .join(dbdf.as_str()) + .exists() + { + return Err(anyhow!( + "device dbdf {:?} dosn't exist in {}/{}/devices.", + dbdf.as_str(), + SYS_KERN_IOMMU_GROUPS, + iommu_group + )); + } + + Ok(format!("/dev/vfio/{}", iommu_group)) +} + +pub fn get_vfio_device(device: String) -> Result { + // support both /dev/vfio/X and BDF or BDF + let mut vfio_device = device; + + let bdf_vec: Vec<&str> = vfio_device.as_str().split(&[':', '.'][..]).collect(); + if bdf_vec.len() >= 3 && bdf_vec.len() < 5 { + // DDDD:BB:DD.F -> /dev/vfio/X + vfio_device = + get_vfio_iommu_group(vfio_device.clone()).context("get vfio iommu group failed")?; + } + + Ok(vfio_device) +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/util.rs b/src/runtime-rs/crates/hypervisor/src/device/util.rs index 3aa5f7b0a9..5d999d8f6c 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/util.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/util.rs @@ -9,11 +9,14 @@ use ini::Ini; const SYS_DEV_PREFIX: &str = "/sys/dev"; +pub const DEVICE_TYPE_BLOCK: &str = "b"; +pub const DEVICE_TYPE_CHAR: &str = "c"; + // get_host_path is used to fetch the host path for the device. // The path passed in the spec refers to the path that should appear inside the container. // We need to find the actual device path on the host based on the major-minor numbers of the device. -pub fn get_host_path(dev_type: String, major: i64, minor: i64) -> Result { - let path_comp = match dev_type.as_str() { +pub fn get_host_path(dev_type: &str, major: i64, minor: i64) -> Result { + let path_comp = match dev_type { "c" | "u" => "char", "b" => "block", // for device type p will return an empty string diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs index 10e13b5787..80b4f33d36 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs @@ -16,7 +16,7 @@ use dragonball::api::v1::{ use super::DragonballInner; use crate::{ device::DeviceType, HybridVsockConfig, NetworkConfig, ShareFsDeviceConfig, ShareFsMountConfig, - ShareFsMountType, ShareFsOperation, VmmState, + ShareFsMountType, ShareFsOperation, VfioBusMode, VfioDevice, VmmState, }; const MB_TO_B: u32 = 1024 * 1024; @@ -47,9 +47,7 @@ impl DragonballInner { DeviceType::Network(network) => self .add_net_device(&network.config, network.id) .context("add net device"), - DeviceType::Vfio(_) => { - todo!() - } + DeviceType::Vfio(hostdev) => self.add_vfio_device(&hostdev).context("add vfio device"), DeviceType::Block(block) => self .add_block_device( block.config.path_on_host.as_str(), @@ -80,13 +78,77 @@ impl DragonballInner { self.remove_block_drive(drive_id.as_str()) .context("remove block drive") } - DeviceType::Vfio(_config) => { - todo!() + DeviceType::Vfio(hostdev) => { + let primary_device = hostdev.devices.first().unwrap().clone(); + let hostdev_id = primary_device.hostdev_id; + + self.remove_vfio_device(hostdev_id) } _ => Err(anyhow!("unsupported device {:?}", device)), } } + fn add_vfio_device(&mut self, device: &VfioDevice) -> Result<()> { + let vfio_device = device.clone(); + + // FIXME: + // A device with multi-funtions, or a IOMMU group with one more + // devices, the Primary device is selected to be passed to VM. + // And the the first one is Primary device. + // safe here, devices is not empty. + let primary_device = vfio_device.devices.first().unwrap().clone(); + + let vendor_device_id = if let Some(vd) = primary_device.device_vendor { + vd.get_device_vendor_id()? + } else { + 0 + }; + + let guest_dev_id = if let Some(pci_path) = primary_device.guest_pci_path { + // safe here, dragonball's pci device directly connects to root bus. + // usually, it has been assigned in vfio device manager. + pci_path.get_device_slot().unwrap().0 + } else { + 0 + }; + + let bus_mode = VfioBusMode::to_string(vfio_device.bus_mode); + + info!(sl!(), "Mock for dragonball insert host device."); + info!( + sl!(), + " Mock for dragonball insert host device. + host device id: {:?}, + bus_slot_func: {:?}, + bus mod: {:?}, + guest device id: {:?}, + vendor/device id: {:?}", + primary_device.hostdev_id, + primary_device.bus_slot_func, + bus_mode, + guest_dev_id, + vendor_device_id, + ); + + // FIXME: + // interface implementation to be done when dragonball supports + // self.vmm_instance.insert_host_device(host_cfg)?; + + Ok(()) + } + + fn remove_vfio_device(&mut self, hostdev_id: String) -> Result<()> { + info!( + sl!(), + "Mock for dragonball remove host_device with hostdev id {:?}", hostdev_id + ); + // FIXME: + // interface implementation to be done when dragonball supports + // self.vmm_instance.remove_host_device(hostdev_id)?; + + Ok(()) + } + fn add_block_device( &mut self, path: &str, diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index b60925c111..4744da40fe 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -12,9 +12,10 @@ use async_trait::async_trait; use hypervisor::{ device::{ device_manager::{do_handle_device, DeviceManager}, + util::{get_host_path, DEVICE_TYPE_CHAR}, DeviceConfig, DeviceType, }, - BlockConfig, Hypervisor, + BlockConfig, Hypervisor, VfioConfig, }; use kata_types::config::TomlConfig; use kata_types::mount::Mount; @@ -277,14 +278,15 @@ impl ResourceManagerInner { ..Default::default() }); - let device_info = do_handle_device(&self.device_manager, &dev_info) + let device_info = do_handle_device(&self.device_manager.clone(), &dev_info) .await .context("do handle device")?; - // create agent device + // create block device for kata agent, + // if driver is virtio-blk-pci, the id will be pci address. if let DeviceType::Block(device) = device_info { let agent_device = Device { - id: device.device_id.clone(), + id: device.config.virt_path.clone(), container_path: d.path.clone(), field_type: device.config.driver_option, vm_path: device.config.virt_path, @@ -293,6 +295,45 @@ impl ResourceManagerInner { devices.push(agent_device); } } + "c" => { + let host_path = get_host_path(DEVICE_TYPE_CHAR, d.major, d.minor) + .context("get host path failed")?; + // First of all, filter vfio devices. + if !host_path.starts_with("/dev/vfio") { + continue; + } + + let dev_info = DeviceConfig::VfioCfg(VfioConfig { + host_path, + dev_type: "c".to_string(), + hostdev_prefix: "vfio_device".to_owned(), + ..Default::default() + }); + + let device_info = do_handle_device(&self.device_manager.clone(), &dev_info) + .await + .context("do handle device")?; + + // vfio mode: vfio-pci and vfio-pci-gk for x86_64 + // - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container. + // - vfio-pci-gk, devices are managed by whatever driver in Guest kernel. + let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() { + "vfio" => "vfio-pci".to_string(), + _ => "vfio-pci-gk".to_string(), + }; + + // create agent device + if let DeviceType::Vfio(device) = device_info { + let agent_device = Device { + id: device.device_id, // just for kata-agent + container_path: d.path.clone(), + field_type: vfio_mode, + options: device.device_options, + ..Default::default() + }; + devices.push(agent_device); + } + } _ => { // TODO enable other devices type continue; diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs b/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs index 4db8b865f8..fd73190b07 100644 --- a/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs +++ b/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs @@ -10,7 +10,7 @@ use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; use hypervisor::device::DeviceType; use hypervisor::{device::driver, Hypervisor}; -use hypervisor::{VfioConfig, VfioDevice}; +use hypervisor::{HostDevice, VfioDevice}; use super::endpoint_persist::{EndpointState, PhysicalEndpointState}; use super::Endpoint; @@ -111,13 +111,14 @@ impl Endpoint for PhysicalEndpoint { // add vfio device let d = DeviceType::Vfio(VfioDevice { - id: format!("physical_nic_{}", self.name().await), - config: VfioConfig { - sysfs_path: "".to_string(), + attach_count: 0, + bus_mode: driver::VfioBusMode::new(mode), + devices: vec![HostDevice { + hostdev_id: format!("physical_nic_{}", self.name().await), bus_slot_func: self.bdf.clone(), - mode: driver::VfioBusMode::new(mode) - .with_context(|| format!("new vfio bus mode {:?}", mode))?, - }, + ..Default::default() + }], + ..Default::default() }); hypervisor.add_device(d).await.context("add device")?; Ok(())