mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-28 19:54:35 +00:00
runtime-rs: add support vfio device manager
Limitations: As no ready rust vmm's vfio manager is ready, it only supports part of vfio in runtime-rs. And the left part is to call vmm interfaces related to vfio add/remove. So when vmm/vfio manager ready, a new PR will be pushed to narrow the gap. Fixes: #6525 Signed-off-by: alex.lyn <alex.lyn@antgroup.com>
This commit is contained in:
parent
61e819ea8e
commit
1e3b372bbb
8
src/runtime-rs/Cargo.lock
generated
8
src/runtime-rs/Cargo.lock
generated
@ -1354,9 +1354,11 @@ dependencies = [
|
||||
"go-flag",
|
||||
"kata-sys-util",
|
||||
"kata-types",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"logging",
|
||||
"nix 0.24.3",
|
||||
"path-clean",
|
||||
"persist",
|
||||
"rand 0.8.5",
|
||||
"rust-ini",
|
||||
@ -2124,6 +2126,12 @@ version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba"
|
||||
|
||||
[[package]]
|
||||
name = "path-clean"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef"
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "2.2.0"
|
||||
|
@ -26,6 +26,8 @@ thiserror = "1.0"
|
||||
tokio = { version = "1.28.1", features = ["sync", "fs"] }
|
||||
vmm-sys-util = "0.11.0"
|
||||
rand = "0.8.4"
|
||||
path-clean = "1.0.1"
|
||||
lazy_static = "1.4"
|
||||
|
||||
kata-sys-util = { path = "../../../libs/kata-sys-util" }
|
||||
kata-types = { path = "../../../libs/kata-types" }
|
||||
|
@ -10,18 +10,17 @@ use anyhow::{anyhow, Context, Result};
|
||||
use kata_sys_util::rand::RandomBytes;
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
|
||||
use super::{
|
||||
util::{get_host_path, get_virt_drive_name},
|
||||
Device, DeviceConfig, DeviceType,
|
||||
};
|
||||
use crate::{
|
||||
BlockConfig, BlockDevice, Hypervisor, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE,
|
||||
BlockConfig, BlockDevice, Hypervisor, VfioDevice, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE,
|
||||
VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI,
|
||||
};
|
||||
|
||||
pub type ArcMutexDevice = Arc<Mutex<dyn Device>>;
|
||||
use super::{
|
||||
util::{get_host_path, get_virt_drive_name, DEVICE_TYPE_BLOCK},
|
||||
Device, DeviceConfig, DeviceType,
|
||||
};
|
||||
|
||||
const DEVICE_TYPE_BLOCK: &str = "b";
|
||||
pub type ArcMutexDevice = Arc<Mutex<dyn Device>>;
|
||||
|
||||
/// block_index and released_block_index are used to search an available block index
|
||||
/// in Sandbox.
|
||||
@ -90,9 +89,24 @@ impl DeviceManager {
|
||||
|
||||
// handle attach error
|
||||
if let Err(e) = result {
|
||||
if let DeviceType::Block(device) = device_guard.get_device_info().await {
|
||||
self.shared_info.release_device_index(device.config.index);
|
||||
};
|
||||
match device_guard.get_device_info().await {
|
||||
DeviceType::Block(device) => {
|
||||
self.shared_info.release_device_index(device.config.index);
|
||||
}
|
||||
DeviceType::Vfio(device) => {
|
||||
// safe here:
|
||||
// Only when vfio dev_type is `b`, virt_path MUST be Some(X),
|
||||
// and needs do release_device_index. otherwise, let it go.
|
||||
if device.config.dev_type == DEVICE_TYPE_BLOCK {
|
||||
self.shared_info
|
||||
.release_device_index(device.config.virt_path.unwrap().0);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
debug!(sl!(), "no need to do release device index.");
|
||||
}
|
||||
}
|
||||
|
||||
drop(device_guard);
|
||||
self.devices.remove(device_id);
|
||||
return Err(e);
|
||||
@ -149,6 +163,11 @@ impl DeviceManager {
|
||||
return Some(device_id.to_string());
|
||||
}
|
||||
}
|
||||
DeviceType::Vfio(device) => {
|
||||
if device.config.host_path == host_path {
|
||||
return Some(device_id.to_string());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// TODO: support find other device type
|
||||
continue;
|
||||
@ -168,7 +187,7 @@ impl DeviceManager {
|
||||
|
||||
Some((current_index, virt_path_name))
|
||||
} else {
|
||||
// only dev_type is block, otherwise, it's useless.
|
||||
// only dev_type is block, otherwise, it's None.
|
||||
None
|
||||
};
|
||||
|
||||
@ -181,22 +200,31 @@ impl DeviceManager {
|
||||
let device_id = self.new_device_id()?;
|
||||
let dev: ArcMutexDevice = match device_config {
|
||||
DeviceConfig::BlockCfg(config) => {
|
||||
// try to find the device, found and just return id.
|
||||
if let Some(dev_id_matched) = self.find_device(config.path_on_host.clone()).await {
|
||||
info!(
|
||||
sl!(),
|
||||
"device with host path:{:?} found. just return device id: {:?}",
|
||||
config.path_on_host.clone(),
|
||||
dev_id_matched
|
||||
);
|
||||
|
||||
return Ok(dev_id_matched);
|
||||
// try to find the device, if found and just return id.
|
||||
if let Some(device_matched_id) = self.find_device(config.path_on_host.clone()).await
|
||||
{
|
||||
return Ok(device_matched_id);
|
||||
}
|
||||
|
||||
self.create_block_device(config, device_id.clone())
|
||||
.await
|
||||
.context("failed to create device")?
|
||||
}
|
||||
DeviceConfig::VfioCfg(config) => {
|
||||
let mut vfio_dev_config = config.clone();
|
||||
let dev_host_path = vfio_dev_config.host_path.clone();
|
||||
if let Some(device_matched_id) = self.find_device(dev_host_path).await {
|
||||
return Ok(device_matched_id);
|
||||
}
|
||||
|
||||
let virt_path = self.get_dev_virt_path(vfio_dev_config.dev_type.as_str())?;
|
||||
vfio_dev_config.virt_path = virt_path;
|
||||
|
||||
Arc::new(Mutex::new(VfioDevice::new(
|
||||
device_id.clone(),
|
||||
&vfio_dev_config,
|
||||
)))
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!("invliad device type"));
|
||||
}
|
||||
@ -230,8 +258,7 @@ impl DeviceManager {
|
||||
};
|
||||
block_config.driver_option = block_driver;
|
||||
|
||||
// generate block device index and virt path
|
||||
// safe here, Block device always has virt_path.
|
||||
// generate virt path
|
||||
if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK)? {
|
||||
block_config.index = virt_path.0;
|
||||
block_config.virt_path = virt_path.1;
|
||||
@ -239,10 +266,10 @@ impl DeviceManager {
|
||||
|
||||
// if the path on host is empty, we need to get device host path from the device major and minor number
|
||||
// Otherwise, it might be rawfile based block device, the host path is already passed from the runtime,
|
||||
// so we don't need to do anything here
|
||||
// so we don't need to do anything here.
|
||||
if block_config.path_on_host.is_empty() {
|
||||
block_config.path_on_host =
|
||||
get_host_path(DEVICE_TYPE_BLOCK.to_owned(), config.major, config.minor)
|
||||
get_host_path(DEVICE_TYPE_BLOCK, config.major, config.minor)
|
||||
.context("failed to get host path")?;
|
||||
}
|
||||
|
||||
|
@ -4,20 +4,210 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
mod vfio;
|
||||
mod vhost_user;
|
||||
mod virtio_blk;
|
||||
mod virtio_fs;
|
||||
mod virtio_net;
|
||||
mod virtio_vsock;
|
||||
|
||||
pub use vfio::{
|
||||
bind_device_to_host, bind_device_to_vfio, get_host_guest_map, get_vfio_device, HostDevice,
|
||||
VfioBusMode, VfioConfig, VfioDevice,
|
||||
};
|
||||
pub use virtio_blk::{
|
||||
BlockConfig, BlockDevice, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, VIRTIO_BLOCK_MMIO,
|
||||
VIRTIO_BLOCK_PCI,
|
||||
};
|
||||
mod virtio_net;
|
||||
pub use virtio_net::{Address, NetworkConfig, NetworkDevice};
|
||||
mod vfio;
|
||||
pub use vfio::{bind_device_to_host, bind_device_to_vfio, VfioBusMode, VfioConfig, VfioDevice};
|
||||
mod virtio_fs;
|
||||
pub use virtio_fs::{
|
||||
ShareFsDevice, ShareFsDeviceConfig, ShareFsMountConfig, ShareFsMountDevice, ShareFsMountType,
|
||||
ShareFsOperation,
|
||||
};
|
||||
mod virtio_vsock;
|
||||
pub use virtio_net::{Address, NetworkConfig, NetworkDevice};
|
||||
pub use virtio_vsock::{HybridVsockConfig, HybridVsockDevice, VsockConfig, VsockDevice};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
|
||||
// Tips:
|
||||
// The Re-write `PciSlot` and `PciPath` with rust that it origins from `pcipath.go`:
|
||||
//
|
||||
|
||||
// The PCI spec reserves 5 bits for slot number (a.k.a. device
|
||||
// number), giving slots 0..31
|
||||
const PCI_SLOT_BITS: u32 = 5;
|
||||
const MAX_PCI_SLOTS: u32 = (1 << PCI_SLOT_BITS) - 1;
|
||||
|
||||
// A PciSlot describes where a PCI device sits on a single bus
|
||||
//
|
||||
// This encapsulates the PCI slot number a.k.a device number, which is
|
||||
// limited to a 5 bit value [0x00..0x1f] by the PCI specification
|
||||
//
|
||||
// To support multifunction device's, It's needed to extend
|
||||
// this to include the PCI 3-bit function number as well.
|
||||
#[derive(Clone, Debug, Default, PartialEq)]
|
||||
pub struct PciSlot(pub u8);
|
||||
|
||||
impl PciSlot {
|
||||
pub fn convert_from_string(s: &str) -> Result<PciSlot> {
|
||||
if s.is_empty() || s.len() > 2 {
|
||||
return Err(anyhow!("string given is invalid."));
|
||||
}
|
||||
|
||||
let base = 16;
|
||||
let n = u64::from_str_radix(s, base).context("convert string to number failed")?;
|
||||
if n >> PCI_SLOT_BITS > 0 {
|
||||
return Err(anyhow!(
|
||||
"number {:?} exceeds MAX:{:?}, failed.",
|
||||
n,
|
||||
MAX_PCI_SLOTS
|
||||
));
|
||||
}
|
||||
|
||||
Ok(PciSlot(n as u8))
|
||||
}
|
||||
|
||||
pub fn convert_from_u32(v: u32) -> Result<PciSlot> {
|
||||
if v > MAX_PCI_SLOTS {
|
||||
return Err(anyhow!("value {:?} exceeds MAX: {:?}", v, MAX_PCI_SLOTS));
|
||||
}
|
||||
|
||||
Ok(PciSlot(v as u8))
|
||||
}
|
||||
|
||||
pub fn convert_to_string(&self) -> String {
|
||||
format!("{:02x}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
// A PciPath describes where a PCI sits in a PCI hierarchy.
|
||||
//
|
||||
// Consists of a list of PCI slots, giving the slot of each bridge
|
||||
// that must be traversed from the PCI root to reach the device,
|
||||
// followed by the slot of the device itself.
|
||||
//
|
||||
// When formatted into a string is written as "xx/.../yy/zz". Here,
|
||||
// zz is the slot of the device on its PCI bridge, yy is the slot of
|
||||
// the bridge on its parent bridge and so forth until xx is the slot
|
||||
// of the "most upstream" bridge on the root bus.
|
||||
//
|
||||
// If a device is directly connected to the root bus, which used in
|
||||
// lightweight hypervisors, such as dragonball/firecracker/clh, and
|
||||
// its PciPath.slots will contains only one PciSlot.
|
||||
#[derive(Clone, Debug, Default, PartialEq)]
|
||||
pub struct PciPath {
|
||||
// list of PCI slots
|
||||
slots: Vec<PciSlot>,
|
||||
}
|
||||
|
||||
impl PciPath {
|
||||
// method to format the PciPath into a string
|
||||
pub fn convert_to_string(&self) -> String {
|
||||
self.slots
|
||||
.iter()
|
||||
.map(|pci_slot| format!("{:02x}", pci_slot.0))
|
||||
.collect::<Vec<String>>()
|
||||
.join("/")
|
||||
}
|
||||
|
||||
// method to parse a PciPath from a string
|
||||
pub fn convert_from_string(path: &str) -> Result<PciPath> {
|
||||
if path.is_empty() {
|
||||
return Err(anyhow!("path given is empty."));
|
||||
}
|
||||
|
||||
let mut pci_slots: Vec<PciSlot> = Vec::new();
|
||||
let slots: Vec<&str> = path.split('/').collect();
|
||||
for slot in slots {
|
||||
match PciSlot::convert_from_string(slot) {
|
||||
Ok(s) => pci_slots.push(s),
|
||||
Err(e) => return Err(anyhow!("slot is invalid with: {:?}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(PciPath { slots: pci_slots })
|
||||
}
|
||||
|
||||
pub fn from_pci_slots(slots: Vec<PciSlot>) -> Option<PciPath> {
|
||||
if slots.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(PciPath { slots })
|
||||
}
|
||||
|
||||
// device_slot to get the slot of the device on its PCI bridge
|
||||
pub fn get_device_slot(&self) -> Option<PciSlot> {
|
||||
self.slots.last().cloned()
|
||||
}
|
||||
|
||||
// root_slot to get the slot of the "most upstream" bridge on the root bus
|
||||
pub fn get_root_slot(&self) -> Option<PciSlot> {
|
||||
self.slots.first().cloned()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_pci_slot() {
|
||||
// min
|
||||
let pci_slot_01 = PciSlot::convert_from_string("00");
|
||||
assert!(pci_slot_01.is_ok());
|
||||
// max
|
||||
let pci_slot_02 = PciSlot::convert_from_string("1f");
|
||||
assert!(pci_slot_02.is_ok());
|
||||
|
||||
// exceed
|
||||
let pci_slot_03 = PciSlot::convert_from_string("20");
|
||||
assert!(pci_slot_03.is_err());
|
||||
|
||||
// valid number
|
||||
let pci_slot_04 = PciSlot::convert_from_u32(1_u32);
|
||||
assert!(pci_slot_04.is_ok());
|
||||
assert_eq!(pci_slot_04.as_ref().unwrap().0, 1_u8);
|
||||
let pci_slot_str = pci_slot_04.as_ref().unwrap().convert_to_string();
|
||||
assert_eq!(pci_slot_str, format!("{:02x}", pci_slot_04.unwrap().0));
|
||||
|
||||
// max number
|
||||
let pci_slot_05 = PciSlot::convert_from_u32(31_u32);
|
||||
assert!(pci_slot_05.is_ok());
|
||||
assert_eq!(pci_slot_05.unwrap().0, 31_u8);
|
||||
|
||||
// exceed and error
|
||||
let pci_slot_06 = PciSlot::convert_from_u32(32_u32);
|
||||
assert!(pci_slot_06.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pci_patch() {
|
||||
let pci_path_0 = PciPath::convert_from_string("01/0a/05");
|
||||
assert!(pci_path_0.is_ok());
|
||||
let pci_path_unwrap = pci_path_0.unwrap();
|
||||
assert_eq!(pci_path_unwrap.slots[0].0, 1);
|
||||
assert_eq!(pci_path_unwrap.slots[1].0, 10);
|
||||
assert_eq!(pci_path_unwrap.slots[2].0, 5);
|
||||
|
||||
let pci_path_01 = PciPath::from_pci_slots(vec![PciSlot(1), PciSlot(10), PciSlot(5)]);
|
||||
assert!(pci_path_01.is_some());
|
||||
let pci_path = pci_path_01.unwrap();
|
||||
let pci_path_02 = pci_path.convert_to_string();
|
||||
assert_eq!(pci_path_02, "01/0a/05".to_string());
|
||||
|
||||
let dev_slot = pci_path.get_device_slot();
|
||||
assert!(dev_slot.is_some());
|
||||
assert_eq!(dev_slot.unwrap().0, 5);
|
||||
|
||||
let root_slot = pci_path.get_root_slot();
|
||||
assert!(root_slot.is_some());
|
||||
assert_eq!(root_slot.unwrap().0, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_host_guest_map() {
|
||||
// test unwrap is fine, no panic occurs.
|
||||
let hg_map = get_host_guest_map("".to_owned());
|
||||
assert!(hg_map.is_none());
|
||||
}
|
||||
}
|
||||
|
@ -1,18 +1,98 @@
|
||||
// Copyright (c) 2019-2022 Alibaba Cloud
|
||||
// Copyright (c) 2019-2022 Ant Group
|
||||
// Copyright (c) 2022-2023 Alibaba Cloud
|
||||
// Copyright (c) 2022-2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::{fs, path::Path, process::Command};
|
||||
|
||||
use crate::device::Device;
|
||||
use crate::device::DeviceType;
|
||||
use crate::Hypervisor as hypervisor;
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
use anyhow::anyhow;
|
||||
use anyhow::{Context, Result};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fs,
|
||||
path::{Path, PathBuf},
|
||||
process::Command,
|
||||
sync::{
|
||||
atomic::{AtomicU8, Ordering},
|
||||
Arc, RwLock,
|
||||
},
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use lazy_static::lazy_static;
|
||||
use path_clean::PathClean;
|
||||
|
||||
use crate::{
|
||||
device::{hypervisor, Device, DeviceType},
|
||||
PciPath, PciSlot,
|
||||
};
|
||||
use kata_sys_util::fs::get_base_name;
|
||||
|
||||
pub const SYS_BUS_PCI_DRIVER_PROBE: &str = "/sys/bus/pci/drivers_probe";
|
||||
pub const SYS_BUS_PCI_DEVICES: &str = "/sys/bus/pci/devices";
|
||||
pub const SYS_KERN_IOMMU_GROUPS: &str = "/sys/kernel/iommu_groups";
|
||||
pub const VFIO_PCI_DRIVER: &str = "vfio-pci";
|
||||
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
|
||||
pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci";
|
||||
pub const MAX_DEV_ID_SIZE: usize = 31;
|
||||
|
||||
const VFIO_PCI_DRIVER_NEW_ID: &str = "/sys/bus/pci/drivers/vfio-pci/new_id";
|
||||
const VFIO_PCI_DRIVER_UNBIND: &str = "/sys/bus/pci/drivers/vfio-pci/unbind";
|
||||
const SYS_CLASS_IOMMU: &str = "/sys/class/iommu";
|
||||
const INTEL_IOMMU_PREFIX: &str = "dmar";
|
||||
const AMD_IOMMU_PREFIX: &str = "ivhd";
|
||||
|
||||
lazy_static! {
|
||||
static ref GUEST_DEVICE_ID: Arc<AtomicU8> = Arc::new(AtomicU8::new(0_u8));
|
||||
static ref HOST_GUEST_MAP: Arc<RwLock<HashMap<String, String>>> =
|
||||
Arc::new(RwLock::new(HashMap::new()));
|
||||
}
|
||||
|
||||
// map host/guest bdf and the mapping saved into `HOST_GUEST_MAP`,
|
||||
// and return PciPath.
|
||||
pub fn generate_guest_pci_path(bdf: String) -> Result<PciPath> {
|
||||
let hg_map = HOST_GUEST_MAP.clone();
|
||||
let current_id = GUEST_DEVICE_ID.clone();
|
||||
|
||||
current_id.fetch_add(1, Ordering::SeqCst);
|
||||
let slot = current_id.load(Ordering::SeqCst);
|
||||
|
||||
// In some Hypervisors, dragonball, cloud-hypervisor or firecracker,
|
||||
// the device is directly connected to the bus without intermediary bus.
|
||||
// FIXME: Qemu's pci path needs to be implemented;
|
||||
let host_bdf = normalize_device_bdf(bdf.as_str());
|
||||
let guest_bdf = format!("0000:00:{:02x}.0", slot);
|
||||
|
||||
// safe, just do unwrap as `HOST_GUEST_MAP` is always valid.
|
||||
hg_map.write().unwrap().insert(host_bdf, guest_bdf);
|
||||
|
||||
Ok(PciPath {
|
||||
slots: vec![PciSlot::convert_from_u32(slot.into()).context("pci slot convert failed.")?],
|
||||
})
|
||||
}
|
||||
|
||||
// get host/guest mapping for info
|
||||
pub fn get_host_guest_map(host_bdf: String) -> Option<String> {
|
||||
// safe, just do unwrap as `HOST_GUEST_MAP` is always valid.
|
||||
HOST_GUEST_MAP.read().unwrap().get(&host_bdf).cloned()
|
||||
}
|
||||
|
||||
pub fn do_check_iommu_on() -> Result<bool> {
|
||||
let element = std::fs::read_dir(SYS_CLASS_IOMMU)?
|
||||
.filter_map(|e| e.ok())
|
||||
.last();
|
||||
|
||||
if element.is_none() {
|
||||
return Err(anyhow!("iommu is not enabled"));
|
||||
}
|
||||
|
||||
// safe here, the result of map is always be Some(true) or Some(false).
|
||||
Ok(element
|
||||
.map(|e| {
|
||||
let x = e.file_name().to_string_lossy().into_owned();
|
||||
x.starts_with(INTEL_IOMMU_PREFIX) || x.starts_with(AMD_IOMMU_PREFIX)
|
||||
})
|
||||
.unwrap())
|
||||
}
|
||||
|
||||
fn override_driver(bdf: &str, driver: &str) -> Result<()> {
|
||||
let driver_override = format!("/sys/bus/pci/devices/{}/driver_override", bdf);
|
||||
@ -22,56 +102,470 @@ fn override_driver(bdf: &str, driver: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const SYS_PCI_DEVICES_PATH: &str = "/sys/bus/pci/devices";
|
||||
const PCI_DRIVER_PROBE: &str = "/sys/bus/pci/drivers_probe";
|
||||
const VFIO_NEW_ID_PATH: &str = "/sys/bus/pci/drivers/vfio-pci/new_id";
|
||||
const VFIO_UNBIND_PATH: &str = "/sys/bus/pci/drivers/vfio-pci/unbind";
|
||||
|
||||
pub const VFIO_PCI: &str = "vfio-pci";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Clone, Debug, Default, PartialEq)]
|
||||
pub enum VfioBusMode {
|
||||
PCI,
|
||||
#[default]
|
||||
MMIO,
|
||||
PCI,
|
||||
}
|
||||
|
||||
impl VfioBusMode {
|
||||
pub fn new(mode: &str) -> Result<Self> {
|
||||
Ok(match mode {
|
||||
pub fn new(mode: &str) -> Self {
|
||||
match mode {
|
||||
"mmio" => VfioBusMode::MMIO,
|
||||
_ => VfioBusMode::PCI,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_string(mode: VfioBusMode) -> String {
|
||||
match mode {
|
||||
VfioBusMode::MMIO => "mmio".to_owned(),
|
||||
_ => "pci".to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
// driver_type used for kata-agent
|
||||
// (1) vfio-pci for add device handler,
|
||||
// (2) mmioblk for add storage handler,
|
||||
pub fn driver_type(mode: &str) -> &str {
|
||||
match mode {
|
||||
"b" => DRIVER_MMIO_BLK_TYPE,
|
||||
_ => DRIVER_VFIO_PCI_TYPE,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct VfioConfig {
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub enum VfioDeviceType {
|
||||
/// error type of VFIO device
|
||||
Error,
|
||||
|
||||
/// normal VFIO device type
|
||||
#[default]
|
||||
Normal,
|
||||
|
||||
/// mediated VFIO device type
|
||||
Mediated,
|
||||
}
|
||||
|
||||
// DeviceVendor represents a PCI device's device id and vendor id
|
||||
// DeviceVendor: (device, vendor)
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DeviceVendor(String, String);
|
||||
|
||||
impl DeviceVendor {
|
||||
pub fn get_device_vendor(&self) -> Result<(u32, u32)> {
|
||||
// default value is 0 when vendor_id or device_id is empty
|
||||
if self.0.is_empty() || self.1.is_empty() {
|
||||
return Ok((0, 0));
|
||||
}
|
||||
|
||||
let do_convert = |id: &String| {
|
||||
u32::from_str_radix(
|
||||
id.trim_start_matches("0x")
|
||||
.trim_matches(char::is_whitespace),
|
||||
16,
|
||||
)
|
||||
.with_context(|| anyhow!("invalid id {:?}", id))
|
||||
};
|
||||
|
||||
let device = do_convert(&self.0).context("convert device failed")?;
|
||||
let vendor = do_convert(&self.1).context("convert vendor failed")?;
|
||||
|
||||
Ok((device, vendor))
|
||||
}
|
||||
|
||||
pub fn get_device_vendor_id(&self) -> Result<u32> {
|
||||
let (device, vendor) = self
|
||||
.get_device_vendor()
|
||||
.context("get device and vendor failed")?;
|
||||
|
||||
Ok(((device & 0xffff) << 16) | (vendor & 0xffff))
|
||||
}
|
||||
}
|
||||
|
||||
// HostDevice represents a VFIO drive used to hotplug
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct HostDevice {
|
||||
/// unique identifier of the device
|
||||
pub hostdev_id: String,
|
||||
|
||||
/// Sysfs path for mdev bus type device
|
||||
pub sysfs_path: String,
|
||||
|
||||
/// PCI device information: "bus:slot:function"
|
||||
/// PCI device information (BDF): "bus:slot:function"
|
||||
pub bus_slot_func: String,
|
||||
|
||||
/// Bus Mode, PCI or MMIO
|
||||
pub mode: VfioBusMode,
|
||||
/// device_vendor: device id and vendor id
|
||||
pub device_vendor: Option<DeviceVendor>,
|
||||
|
||||
/// type of vfio device
|
||||
pub vfio_type: VfioDeviceType,
|
||||
|
||||
/// guest PCI path of device
|
||||
pub guest_pci_path: Option<PciPath>,
|
||||
|
||||
/// vfio_vendor for vendor's some special cases.
|
||||
#[cfg(feature = "enable-vendor")]
|
||||
pub vfio_vendor: VfioVendor,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
// VfioConfig represents a VFIO drive used for hotplugging
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct VfioConfig {
|
||||
/// usually host path will be /dev/vfio/N
|
||||
pub host_path: String,
|
||||
|
||||
/// device as block or char
|
||||
pub dev_type: String,
|
||||
|
||||
/// hostdev_prefix for devices, such as:
|
||||
/// (1) phisycial endpoint: "physical_nic_"
|
||||
/// (2) vfio mdev: "vfio_mdev_"
|
||||
/// (3) vfio pci: "vfio_device_"
|
||||
/// (4) vfio volume: "vfio_vol_"
|
||||
/// (5) vfio nvme: "vfio_nvme_"
|
||||
pub hostdev_prefix: String,
|
||||
|
||||
/// device in guest which it appears inside the VM,
|
||||
/// outside of the container mount namespace
|
||||
/// virt_path: Option<(index, virt_path_name)>
|
||||
pub virt_path: Option<(u64, String)>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct VfioDevice {
|
||||
/// Unique identifier of the device
|
||||
pub id: String,
|
||||
pub device_id: String,
|
||||
pub attach_count: u64,
|
||||
|
||||
/// Config info for Vfio Device
|
||||
/// Bus Mode, PCI or MMIO
|
||||
pub bus_mode: VfioBusMode,
|
||||
/// driver type
|
||||
pub driver_type: String,
|
||||
|
||||
/// vfio config from business
|
||||
pub config: VfioConfig,
|
||||
|
||||
// host device with multi-funtions
|
||||
pub devices: Vec<HostDevice>,
|
||||
// options for vfio pci handler in kata-agent
|
||||
pub device_options: Vec<String>,
|
||||
}
|
||||
|
||||
/// binds the device to vfio driver after unbinding from host.
|
||||
/// Will be called by a network interface or a generic pcie device.
|
||||
impl VfioDevice {
|
||||
// new with VfioConfig
|
||||
pub fn new(device_id: String, dev_info: &VfioConfig) -> Self {
|
||||
// devices and device_options are in a 1-1 mapping, used in
|
||||
// vfio-pci handler for kata-agent.
|
||||
let devices: Vec<HostDevice> = Vec::with_capacity(MAX_DEV_ID_SIZE);
|
||||
let device_options: Vec<String> = Vec::with_capacity(MAX_DEV_ID_SIZE);
|
||||
|
||||
// get bus mode and driver type based on the device type
|
||||
let dev_type = dev_info.dev_type.as_str();
|
||||
let driver_type = VfioBusMode::driver_type(dev_type).to_owned();
|
||||
|
||||
Self {
|
||||
device_id,
|
||||
attach_count: 0,
|
||||
bus_mode: VfioBusMode::PCI,
|
||||
driver_type,
|
||||
config: dev_info.clone(),
|
||||
devices,
|
||||
device_options,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_host_path(&self) -> String {
|
||||
self.config.host_path.clone()
|
||||
}
|
||||
|
||||
fn get_vfio_prefix(&self) -> String {
|
||||
self.config.hostdev_prefix.clone()
|
||||
}
|
||||
|
||||
// nornaml VFIO BDF: 0000:04:00.0
|
||||
// mediated VFIO BDF: 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
|
||||
fn get_vfio_device_type(&self, device_sys_path: String) -> Result<VfioDeviceType> {
|
||||
let mut tokens: Vec<&str> = device_sys_path.as_str().split(':').collect();
|
||||
let vfio_type = match tokens.len() {
|
||||
3 => VfioDeviceType::Normal,
|
||||
_ => {
|
||||
tokens = device_sys_path.split('-').collect();
|
||||
if tokens.len() == 5 {
|
||||
VfioDeviceType::Mediated
|
||||
} else {
|
||||
VfioDeviceType::Error
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(vfio_type)
|
||||
}
|
||||
|
||||
// get_sysfs_device returns the sysfsdev of mediated device
|
||||
// expected input string format is absolute path to the sysfs dev node
|
||||
// eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
||||
fn get_sysfs_device(&self, sysfs_dev_path: PathBuf) -> Result<String> {
|
||||
let mut buf =
|
||||
fs::canonicalize(sysfs_dev_path.clone()).context("sysfs device path not exist")?;
|
||||
let mut resolved = false;
|
||||
|
||||
// resolve symbolic links until there's no more to resolve
|
||||
while buf.symlink_metadata()?.file_type().is_symlink() {
|
||||
let link = fs::read_link(&buf)?;
|
||||
buf.pop();
|
||||
buf.push(link);
|
||||
resolved = true;
|
||||
}
|
||||
|
||||
// If a symbolic link was resolved, the resulting path may be relative to the original path
|
||||
if resolved {
|
||||
// If the original path is relative and the resolved path is not, the resolved path
|
||||
// should be returned as absolute.
|
||||
if sysfs_dev_path.is_relative() && buf.is_absolute() {
|
||||
buf = fs::canonicalize(&buf)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(buf.clean().display().to_string())
|
||||
}
|
||||
|
||||
// vfio device details: (device BDF, device SysfsDev, vfio Device Type)
|
||||
fn get_vfio_device_details(
|
||||
&self,
|
||||
dev_file_name: String,
|
||||
iommu_dev_path: PathBuf,
|
||||
) -> Result<(Option<String>, String, VfioDeviceType)> {
|
||||
let vfio_type = self.get_vfio_device_type(dev_file_name.clone())?;
|
||||
match vfio_type {
|
||||
VfioDeviceType::Normal => {
|
||||
let dev_bdf = get_device_bdf(dev_file_name.clone());
|
||||
let dev_sys = [SYS_BUS_PCI_DEVICES, dev_file_name.as_str()].join("/");
|
||||
Ok((dev_bdf, dev_sys, vfio_type))
|
||||
}
|
||||
VfioDeviceType::Mediated => {
|
||||
// sysfsdev eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
||||
let sysfs_dev = Path::new(&iommu_dev_path).join(dev_file_name);
|
||||
let dev_sys = self
|
||||
.get_sysfs_device(sysfs_dev)
|
||||
.context("get sysfs device failed")?;
|
||||
|
||||
let dev_bdf = if let Some(dev_s) = get_mediated_device_bdf(dev_sys.clone()) {
|
||||
get_device_bdf(dev_s)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok((dev_bdf, dev_sys, vfio_type))
|
||||
}
|
||||
_ => Err(anyhow!("unsupported vfio type : {:?}", vfio_type)),
|
||||
}
|
||||
}
|
||||
|
||||
// read vendor and deviceor from /sys/bus/pci/devices/BDF/X
|
||||
fn get_vfio_device_vendor(&self, bdf: &str) -> Result<DeviceVendor> {
|
||||
let device =
|
||||
get_device_property(bdf, "device").context("get device from syspath failed")?;
|
||||
let vendor =
|
||||
get_device_property(bdf, "vendor").context("get vendor from syspath failed")?;
|
||||
|
||||
Ok(DeviceVendor(device, vendor))
|
||||
}
|
||||
|
||||
async fn set_vfio_config(
|
||||
&mut self,
|
||||
iommu_devs_path: PathBuf,
|
||||
device_name: &str,
|
||||
) -> Result<HostDevice> {
|
||||
let vfio_dev_details = self
|
||||
.get_vfio_device_details(device_name.to_owned(), iommu_devs_path)
|
||||
.context("get vfio device details failed")?;
|
||||
|
||||
// It's safe as BDF really exists.
|
||||
let dev_bdf = vfio_dev_details.0.unwrap();
|
||||
let dev_vendor = self
|
||||
.get_vfio_device_vendor(&dev_bdf)
|
||||
.context("get property device and vendor failed")?;
|
||||
|
||||
let mut vfio_dev = HostDevice {
|
||||
bus_slot_func: dev_bdf.clone(),
|
||||
device_vendor: Some(dev_vendor),
|
||||
sysfs_path: vfio_dev_details.1,
|
||||
vfio_type: vfio_dev_details.2,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// when vfio pci, kata-agent handles with device_options, and its
|
||||
// format: "DDDD:BB:DD.F=<pcipath>"
|
||||
// DDDD:BB:DD.F is the device's PCI address on host
|
||||
// <pcipath> is the device's PCI path in the guest
|
||||
if self.bus_mode == VfioBusMode::PCI {
|
||||
let pci_path =
|
||||
generate_guest_pci_path(dev_bdf.clone()).context("generate pci path failed")?;
|
||||
vfio_dev.guest_pci_path = Some(pci_path.clone());
|
||||
self.device_options
|
||||
.push(format!("0000:{}={}", dev_bdf, pci_path.convert_to_string()));
|
||||
}
|
||||
|
||||
Ok(vfio_dev)
|
||||
}
|
||||
|
||||
// filter Host or PCI Bridges that are in the same IOMMU group as the
|
||||
// passed-through devices. One CANNOT pass-through a PCI bridge or Host
|
||||
// bridge. Class 0x0604 is PCI bridge, 0x0600 is Host bridge
|
||||
fn filter_bridge_device(&self, bdf: &str, bitmask: u64) -> Option<u64> {
|
||||
let device_class = match get_device_property(bdf, "class") {
|
||||
Ok(dev_class) => dev_class,
|
||||
Err(_) => "".to_string(),
|
||||
};
|
||||
|
||||
if device_class.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
match device_class.parse::<u32>() {
|
||||
Ok(cid_u32) => {
|
||||
// class code is 16 bits, remove the two trailing zeros
|
||||
let class_code = u64::from(cid_u32) >> 8;
|
||||
if class_code & bitmask == bitmask {
|
||||
Some(class_code)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Device for VfioDevice {
|
||||
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
|
||||
// host path: /dev/vfio/X
|
||||
let host_path = self.get_host_path();
|
||||
// vfio group: X
|
||||
let vfio_group = get_base_name(host_path.clone())?
|
||||
.into_string()
|
||||
.map_err(|e| anyhow!("failed to get base name {:?}", e))?;
|
||||
|
||||
// /sys/kernel/iommu_groups/X/devices
|
||||
let iommu_devs_path = Path::new(SYS_KERN_IOMMU_GROUPS)
|
||||
.join(vfio_group.as_str())
|
||||
.join("devices");
|
||||
|
||||
// /sys/kernel/iommu_groups/X/devices
|
||||
// DDDD:BB:DD.F0 DDDD:BB:DD.F1
|
||||
let iommu_devices = fs::read_dir(iommu_devs_path.clone())?
|
||||
.filter_map(|e| {
|
||||
let x = e.ok()?.file_name().to_string_lossy().into_owned();
|
||||
Some(x)
|
||||
})
|
||||
.collect::<Vec<String>>();
|
||||
if iommu_devices.len() > 1 {
|
||||
warn!(sl!(), "vfio device {} with multi-function", host_path);
|
||||
}
|
||||
|
||||
// pass all devices in iommu group, and use index to identify device.
|
||||
for (index, device) in iommu_devices.iter().enumerate() {
|
||||
// filter host or PCI bridge
|
||||
if self.filter_bridge_device(device, 0x0600).is_some() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut hostdev: HostDevice = self
|
||||
.set_vfio_config(iommu_devs_path.clone(), device)
|
||||
.await
|
||||
.context("set vfio config failed")?;
|
||||
let dev_prefix = self.get_vfio_prefix();
|
||||
hostdev.hostdev_id = make_device_nameid(&dev_prefix, index, MAX_DEV_ID_SIZE);
|
||||
|
||||
self.devices.push(hostdev);
|
||||
}
|
||||
|
||||
if self
|
||||
.increase_attach_count()
|
||||
.await
|
||||
.context("failed to increase attach count")?
|
||||
{
|
||||
return Err(anyhow!("attach count increased failed as some reason."));
|
||||
}
|
||||
|
||||
// do add device for vfio deivce
|
||||
if let Err(e) = h.add_device(DeviceType::Vfio(self.clone())).await {
|
||||
self.decrease_attach_count().await?;
|
||||
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn detach(&mut self, h: &dyn hypervisor) -> Result<Option<u64>> {
|
||||
if self
|
||||
.decrease_attach_count()
|
||||
.await
|
||||
.context("failed to decrease attach count")?
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if let Err(e) = h.remove_device(DeviceType::Vfio(self.clone())).await {
|
||||
self.increase_attach_count().await?;
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
// only virt_path is Some, there's a device index
|
||||
let device_index = if let Some(virt_path) = self.config.virt_path.clone() {
|
||||
Some(virt_path.0)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(device_index)
|
||||
}
|
||||
|
||||
async fn increase_attach_count(&mut self) -> Result<bool> {
|
||||
match self.attach_count {
|
||||
0 => {
|
||||
// do real attach
|
||||
self.attach_count += 1;
|
||||
Ok(false)
|
||||
}
|
||||
std::u64::MAX => Err(anyhow!("device was attached too many times")),
|
||||
_ => {
|
||||
self.attach_count += 1;
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn decrease_attach_count(&mut self) -> Result<bool> {
|
||||
match self.attach_count {
|
||||
0 => Err(anyhow!("detaching a device that wasn't attached")),
|
||||
1 => {
|
||||
// do real wrok
|
||||
self.attach_count -= 1;
|
||||
Ok(false)
|
||||
}
|
||||
_ => {
|
||||
self.attach_count -= 1;
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_device_info(&self) -> DeviceType {
|
||||
DeviceType::Vfio(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
// binds the device to vfio driver after unbinding from host.
|
||||
// Will be called by a network interface or a generic pcie device.
|
||||
pub fn bind_device_to_vfio(bdf: &str, host_driver: &str, _vendor_device_id: &str) -> Result<()> {
|
||||
// modprobe vfio-pci
|
||||
if !Path::new(VFIO_NEW_ID_PATH).exists() {
|
||||
if !Path::new(VFIO_PCI_DRIVER_NEW_ID).exists() {
|
||||
Command::new("modprobe")
|
||||
.arg(VFIO_PCI)
|
||||
.arg(VFIO_PCI_DRIVER)
|
||||
.output()
|
||||
.expect("Failed to run modprobe vfio-pci");
|
||||
}
|
||||
@ -84,19 +578,22 @@ pub fn bind_device_to_vfio(bdf: &str, host_driver: &str, _vendor_device_id: &str
|
||||
if cmdline.contains("iommu=off") || !cmdline.contains("iommu=") {
|
||||
return Err(anyhow!("iommu isn't set on kernel cmdline"));
|
||||
}
|
||||
|
||||
if !do_check_iommu_on().context("check iommu on failed")? {
|
||||
return Err(anyhow!("IOMMU not enabled yet."));
|
||||
}
|
||||
}
|
||||
|
||||
// if it's already bound to vfio
|
||||
if is_equal_driver(bdf, VFIO_PCI) {
|
||||
if is_equal_driver(bdf, VFIO_PCI_DRIVER) {
|
||||
info!(sl!(), "bdf : {} was already bound to vfio-pci", bdf);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!(sl!(), "host driver : {}", host_driver);
|
||||
override_driver(bdf, VFIO_PCI).context("override driver")?;
|
||||
override_driver(bdf, VFIO_PCI_DRIVER).context("override driver")?;
|
||||
|
||||
let unbind_path = format!("/sys/bus/pci/devices/{}/driver/unbind", bdf);
|
||||
|
||||
// echo bdf > /sys/bus/pci/drivers/virtio-pci/unbind"
|
||||
fs::write(&unbind_path, bdf)
|
||||
.with_context(|| format!("Failed to echo {} > {}", bdf, &unbind_path))?;
|
||||
@ -104,15 +601,16 @@ pub fn bind_device_to_vfio(bdf: &str, host_driver: &str, _vendor_device_id: &str
|
||||
info!(sl!(), "{} is unbound from {}", bdf, host_driver);
|
||||
|
||||
// echo bdf > /sys/bus/pci/drivers_probe
|
||||
fs::write(PCI_DRIVER_PROBE, bdf)
|
||||
.with_context(|| format!("Failed to echo {} > {}", bdf, PCI_DRIVER_PROBE))?;
|
||||
fs::write(SYS_BUS_PCI_DRIVER_PROBE, bdf)
|
||||
.with_context(|| format!("Failed to echo {} > {}", bdf, SYS_BUS_PCI_DRIVER_PROBE))?;
|
||||
|
||||
info!(sl!(), "echo {} > /sys/bus/pci/drivers_probe", bdf);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn is_equal_driver(bdf: &str, host_driver: &str) -> bool {
|
||||
let sys_pci_devices_path = Path::new(SYS_PCI_DEVICES_PATH);
|
||||
let sys_pci_devices_path = Path::new(SYS_BUS_PCI_DEVICES);
|
||||
let driver_file = sys_pci_devices_path.join(bdf).join("driver");
|
||||
|
||||
if driver_file.exists() {
|
||||
@ -126,10 +624,9 @@ pub fn is_equal_driver(bdf: &str, host_driver: &str) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// bind_device_to_host binds the device to the host driver after unbinding from vfio-pci.
|
||||
// bind_device_to_host binds the device to the host driver after unbinding from vfio-pci.
|
||||
pub fn bind_device_to_host(bdf: &str, host_driver: &str, _vendor_device_id: &str) -> Result<()> {
|
||||
// Unbind from vfio-pci driver to the original host driver
|
||||
|
||||
info!(sl!(), "bind {} to {}", bdf, host_driver);
|
||||
|
||||
// if it's already bound to host_driver
|
||||
@ -144,37 +641,136 @@ pub fn bind_device_to_host(bdf: &str, host_driver: &str, _vendor_device_id: &str
|
||||
override_driver(bdf, host_driver).context("override driver")?;
|
||||
|
||||
// echo bdf > /sys/bus/pci/drivers/vfio-pci/unbind"
|
||||
std::fs::write(VFIO_UNBIND_PATH, bdf)
|
||||
.with_context(|| format!("echo {}> {}", bdf, VFIO_UNBIND_PATH))?;
|
||||
info!(sl!(), "echo {} > {}", bdf, VFIO_UNBIND_PATH);
|
||||
std::fs::write(VFIO_PCI_DRIVER_UNBIND, bdf)
|
||||
.with_context(|| format!("echo {}> {}", bdf, VFIO_PCI_DRIVER_UNBIND))?;
|
||||
info!(sl!(), "echo {} > {}", bdf, VFIO_PCI_DRIVER_UNBIND);
|
||||
|
||||
// echo bdf > /sys/bus/pci/drivers_probe
|
||||
std::fs::write(PCI_DRIVER_PROBE, bdf)
|
||||
.with_context(|| format!("echo {} > {}", bdf, PCI_DRIVER_PROBE))?;
|
||||
info!(sl!(), "echo {} > {}", bdf, PCI_DRIVER_PROBE);
|
||||
std::fs::write(SYS_BUS_PCI_DRIVER_PROBE, bdf)
|
||||
.with_context(|| format!("echo {} > {}", bdf, SYS_BUS_PCI_DRIVER_PROBE))?;
|
||||
info!(sl!(), "echo {} > {}", bdf, SYS_BUS_PCI_DRIVER_PROBE);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Device for VfioConfig {
|
||||
async fn attach(&mut self, _h: &dyn hypervisor) -> Result<()> {
|
||||
todo!()
|
||||
// get_vfio_device_bdf returns the BDF of pci device
|
||||
// expected format <bus>:<slot>.<func> eg. 02:10.0
|
||||
fn get_device_bdf(dev_sys_str: String) -> Option<String> {
|
||||
let dev_sys = dev_sys_str;
|
||||
if !dev_sys.starts_with("0000:") {
|
||||
return Some(dev_sys);
|
||||
}
|
||||
|
||||
async fn detach(&mut self, _h: &dyn hypervisor) -> Result<Option<u64>> {
|
||||
todo!()
|
||||
let parts: Vec<&str> = dev_sys.as_str().splitn(2, ':').collect();
|
||||
if parts.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
|
||||
async fn get_device_info(&self) -> DeviceType {
|
||||
todo!()
|
||||
}
|
||||
parts.get(1).copied().map(|bdf| bdf.to_owned())
|
||||
}
|
||||
|
||||
async fn increase_attach_count(&mut self) -> Result<bool> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn decrease_attach_count(&mut self) -> Result<bool> {
|
||||
todo!()
|
||||
// expected format <domain>:<bus>:<slot>.<func> eg. 0000:02:10.0
|
||||
fn normalize_device_bdf(bdf: &str) -> String {
|
||||
if !bdf.starts_with("0000") {
|
||||
format!("0000:{}", bdf)
|
||||
} else {
|
||||
bdf.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
// make_device_nameid: generate a ID for the hypervisor commandline
|
||||
fn make_device_nameid(name_type: &str, id: usize, max_len: usize) -> String {
|
||||
let name_id = format!("{}_{}", name_type, id);
|
||||
|
||||
if name_id.len() > max_len {
|
||||
name_id[0..max_len].to_string()
|
||||
} else {
|
||||
name_id
|
||||
}
|
||||
}
|
||||
|
||||
// get_mediated_device_bdf returns the MDEV BDF
|
||||
// expected input string /sys/devices/pci0000:d7/BDF0/BDF1/.../MDEVBDF/UUID
|
||||
fn get_mediated_device_bdf(dev_sys_str: String) -> Option<String> {
|
||||
let dev_sys = dev_sys_str;
|
||||
let parts: Vec<&str> = dev_sys.as_str().split('/').collect();
|
||||
if parts.len() < 4 {
|
||||
return None;
|
||||
}
|
||||
|
||||
parts
|
||||
.get(parts.len() - 2)
|
||||
.copied()
|
||||
.map(|bdf| bdf.to_owned())
|
||||
}
|
||||
|
||||
// dev_sys_path: /sys/bus/pci/devices/DDDD:BB:DD.F
|
||||
// cfg_path: : /sys/bus/pci/devices/DDDD:BB:DD.F/xxx
|
||||
fn get_device_property(bdf: &str, property: &str) -> Result<String> {
|
||||
let device_name = normalize_device_bdf(bdf);
|
||||
|
||||
let dev_sys_path = Path::new(SYS_BUS_PCI_DEVICES).join(device_name);
|
||||
let cfg_path = fs::read_to_string(dev_sys_path.join(property)).with_context(|| {
|
||||
format!(
|
||||
"failed to read {}",
|
||||
dev_sys_path.join(property).to_str().unwrap()
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(cfg_path.as_str().trim_end_matches('\n').to_string())
|
||||
}
|
||||
|
||||
pub fn get_vfio_iommu_group(bdf: String) -> Result<String> {
|
||||
// /sys/bus/pci/devices/DDDD:BB:DD.F/iommu_group
|
||||
let dbdf = normalize_device_bdf(bdf.as_str());
|
||||
let iommugrp_path = Path::new(SYS_BUS_PCI_DEVICES)
|
||||
.join(dbdf.as_str())
|
||||
.join("iommu_group");
|
||||
if !iommugrp_path.exists() {
|
||||
warn!(
|
||||
sl!(),
|
||||
"IOMMU group path: {:?} not found, do bind device to vfio first.", iommugrp_path
|
||||
);
|
||||
return Err(anyhow!("please do bind device to vfio"));
|
||||
}
|
||||
|
||||
// iommu group symlink: ../../../../../../kernel/iommu_groups/X
|
||||
let iommugrp_symlink = fs::read_link(&iommugrp_path)
|
||||
.map_err(|e| anyhow!("read iommu group symlink failed {:?}", e))?;
|
||||
|
||||
// get base name from iommu group symlink: X
|
||||
let iommu_group = get_base_name(iommugrp_symlink)?
|
||||
.into_string()
|
||||
.map_err(|e| anyhow!("failed to get iommu group {:?}", e))?;
|
||||
|
||||
// we'd better verify the path to ensure it dose exist.
|
||||
if !Path::new(SYS_KERN_IOMMU_GROUPS)
|
||||
.join(&iommu_group)
|
||||
.join("devices")
|
||||
.join(dbdf.as_str())
|
||||
.exists()
|
||||
{
|
||||
return Err(anyhow!(
|
||||
"device dbdf {:?} dosn't exist in {}/{}/devices.",
|
||||
dbdf.as_str(),
|
||||
SYS_KERN_IOMMU_GROUPS,
|
||||
iommu_group
|
||||
));
|
||||
}
|
||||
|
||||
Ok(format!("/dev/vfio/{}", iommu_group))
|
||||
}
|
||||
|
||||
pub fn get_vfio_device(device: String) -> Result<String> {
|
||||
// support both /dev/vfio/X and BDF<DDDD:BB:DD.F> or BDF<BB:DD.F2>
|
||||
let mut vfio_device = device;
|
||||
|
||||
let bdf_vec: Vec<&str> = vfio_device.as_str().split(&[':', '.'][..]).collect();
|
||||
if bdf_vec.len() >= 3 && bdf_vec.len() < 5 {
|
||||
// DDDD:BB:DD.F -> /dev/vfio/X
|
||||
vfio_device =
|
||||
get_vfio_iommu_group(vfio_device.clone()).context("get vfio iommu group failed")?;
|
||||
}
|
||||
|
||||
Ok(vfio_device)
|
||||
}
|
||||
|
@ -9,11 +9,14 @@ use ini::Ini;
|
||||
|
||||
const SYS_DEV_PREFIX: &str = "/sys/dev";
|
||||
|
||||
pub const DEVICE_TYPE_BLOCK: &str = "b";
|
||||
pub const DEVICE_TYPE_CHAR: &str = "c";
|
||||
|
||||
// get_host_path is used to fetch the host path for the device.
|
||||
// The path passed in the spec refers to the path that should appear inside the container.
|
||||
// We need to find the actual device path on the host based on the major-minor numbers of the device.
|
||||
pub fn get_host_path(dev_type: String, major: i64, minor: i64) -> Result<String> {
|
||||
let path_comp = match dev_type.as_str() {
|
||||
pub fn get_host_path(dev_type: &str, major: i64, minor: i64) -> Result<String> {
|
||||
let path_comp = match dev_type {
|
||||
"c" | "u" => "char",
|
||||
"b" => "block",
|
||||
// for device type p will return an empty string
|
||||
|
@ -16,7 +16,7 @@ use dragonball::api::v1::{
|
||||
use super::DragonballInner;
|
||||
use crate::{
|
||||
device::DeviceType, HybridVsockConfig, NetworkConfig, ShareFsDeviceConfig, ShareFsMountConfig,
|
||||
ShareFsMountType, ShareFsOperation, VmmState,
|
||||
ShareFsMountType, ShareFsOperation, VfioBusMode, VfioDevice, VmmState,
|
||||
};
|
||||
|
||||
const MB_TO_B: u32 = 1024 * 1024;
|
||||
@ -47,9 +47,7 @@ impl DragonballInner {
|
||||
DeviceType::Network(network) => self
|
||||
.add_net_device(&network.config, network.id)
|
||||
.context("add net device"),
|
||||
DeviceType::Vfio(_) => {
|
||||
todo!()
|
||||
}
|
||||
DeviceType::Vfio(hostdev) => self.add_vfio_device(&hostdev).context("add vfio device"),
|
||||
DeviceType::Block(block) => self
|
||||
.add_block_device(
|
||||
block.config.path_on_host.as_str(),
|
||||
@ -80,13 +78,77 @@ impl DragonballInner {
|
||||
self.remove_block_drive(drive_id.as_str())
|
||||
.context("remove block drive")
|
||||
}
|
||||
DeviceType::Vfio(_config) => {
|
||||
todo!()
|
||||
DeviceType::Vfio(hostdev) => {
|
||||
let primary_device = hostdev.devices.first().unwrap().clone();
|
||||
let hostdev_id = primary_device.hostdev_id;
|
||||
|
||||
self.remove_vfio_device(hostdev_id)
|
||||
}
|
||||
_ => Err(anyhow!("unsupported device {:?}", device)),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_vfio_device(&mut self, device: &VfioDevice) -> Result<()> {
|
||||
let vfio_device = device.clone();
|
||||
|
||||
// FIXME:
|
||||
// A device with multi-funtions, or a IOMMU group with one more
|
||||
// devices, the Primary device is selected to be passed to VM.
|
||||
// And the the first one is Primary device.
|
||||
// safe here, devices is not empty.
|
||||
let primary_device = vfio_device.devices.first().unwrap().clone();
|
||||
|
||||
let vendor_device_id = if let Some(vd) = primary_device.device_vendor {
|
||||
vd.get_device_vendor_id()?
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let guest_dev_id = if let Some(pci_path) = primary_device.guest_pci_path {
|
||||
// safe here, dragonball's pci device directly connects to root bus.
|
||||
// usually, it has been assigned in vfio device manager.
|
||||
pci_path.get_device_slot().unwrap().0
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let bus_mode = VfioBusMode::to_string(vfio_device.bus_mode);
|
||||
|
||||
info!(sl!(), "Mock for dragonball insert host device.");
|
||||
info!(
|
||||
sl!(),
|
||||
" Mock for dragonball insert host device.
|
||||
host device id: {:?},
|
||||
bus_slot_func: {:?},
|
||||
bus mod: {:?},
|
||||
guest device id: {:?},
|
||||
vendor/device id: {:?}",
|
||||
primary_device.hostdev_id,
|
||||
primary_device.bus_slot_func,
|
||||
bus_mode,
|
||||
guest_dev_id,
|
||||
vendor_device_id,
|
||||
);
|
||||
|
||||
// FIXME:
|
||||
// interface implementation to be done when dragonball supports
|
||||
// self.vmm_instance.insert_host_device(host_cfg)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove_vfio_device(&mut self, hostdev_id: String) -> Result<()> {
|
||||
info!(
|
||||
sl!(),
|
||||
"Mock for dragonball remove host_device with hostdev id {:?}", hostdev_id
|
||||
);
|
||||
// FIXME:
|
||||
// interface implementation to be done when dragonball supports
|
||||
// self.vmm_instance.remove_host_device(hostdev_id)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn add_block_device(
|
||||
&mut self,
|
||||
path: &str,
|
||||
|
@ -12,9 +12,10 @@ use async_trait::async_trait;
|
||||
use hypervisor::{
|
||||
device::{
|
||||
device_manager::{do_handle_device, DeviceManager},
|
||||
util::{get_host_path, DEVICE_TYPE_CHAR},
|
||||
DeviceConfig, DeviceType,
|
||||
},
|
||||
BlockConfig, Hypervisor,
|
||||
BlockConfig, Hypervisor, VfioConfig,
|
||||
};
|
||||
use kata_types::config::TomlConfig;
|
||||
use kata_types::mount::Mount;
|
||||
@ -277,14 +278,15 @@ impl ResourceManagerInner {
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let device_info = do_handle_device(&self.device_manager, &dev_info)
|
||||
let device_info = do_handle_device(&self.device_manager.clone(), &dev_info)
|
||||
.await
|
||||
.context("do handle device")?;
|
||||
|
||||
// create agent device
|
||||
// create block device for kata agent,
|
||||
// if driver is virtio-blk-pci, the id will be pci address.
|
||||
if let DeviceType::Block(device) = device_info {
|
||||
let agent_device = Device {
|
||||
id: device.device_id.clone(),
|
||||
id: device.config.virt_path.clone(),
|
||||
container_path: d.path.clone(),
|
||||
field_type: device.config.driver_option,
|
||||
vm_path: device.config.virt_path,
|
||||
@ -293,6 +295,45 @@ impl ResourceManagerInner {
|
||||
devices.push(agent_device);
|
||||
}
|
||||
}
|
||||
"c" => {
|
||||
let host_path = get_host_path(DEVICE_TYPE_CHAR, d.major, d.minor)
|
||||
.context("get host path failed")?;
|
||||
// First of all, filter vfio devices.
|
||||
if !host_path.starts_with("/dev/vfio") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let dev_info = DeviceConfig::VfioCfg(VfioConfig {
|
||||
host_path,
|
||||
dev_type: "c".to_string(),
|
||||
hostdev_prefix: "vfio_device".to_owned(),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let device_info = do_handle_device(&self.device_manager.clone(), &dev_info)
|
||||
.await
|
||||
.context("do handle device")?;
|
||||
|
||||
// vfio mode: vfio-pci and vfio-pci-gk for x86_64
|
||||
// - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container.
|
||||
// - vfio-pci-gk, devices are managed by whatever driver in Guest kernel.
|
||||
let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() {
|
||||
"vfio" => "vfio-pci".to_string(),
|
||||
_ => "vfio-pci-gk".to_string(),
|
||||
};
|
||||
|
||||
// create agent device
|
||||
if let DeviceType::Vfio(device) = device_info {
|
||||
let agent_device = Device {
|
||||
id: device.device_id, // just for kata-agent
|
||||
container_path: d.path.clone(),
|
||||
field_type: vfio_mode,
|
||||
options: device.device_options,
|
||||
..Default::default()
|
||||
};
|
||||
devices.push(agent_device);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// TODO enable other devices type
|
||||
continue;
|
||||
|
@ -10,7 +10,7 @@ use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::device::DeviceType;
|
||||
use hypervisor::{device::driver, Hypervisor};
|
||||
use hypervisor::{VfioConfig, VfioDevice};
|
||||
use hypervisor::{HostDevice, VfioDevice};
|
||||
|
||||
use super::endpoint_persist::{EndpointState, PhysicalEndpointState};
|
||||
use super::Endpoint;
|
||||
@ -111,13 +111,14 @@ impl Endpoint for PhysicalEndpoint {
|
||||
|
||||
// add vfio device
|
||||
let d = DeviceType::Vfio(VfioDevice {
|
||||
id: format!("physical_nic_{}", self.name().await),
|
||||
config: VfioConfig {
|
||||
sysfs_path: "".to_string(),
|
||||
attach_count: 0,
|
||||
bus_mode: driver::VfioBusMode::new(mode),
|
||||
devices: vec![HostDevice {
|
||||
hostdev_id: format!("physical_nic_{}", self.name().await),
|
||||
bus_slot_func: self.bdf.clone(),
|
||||
mode: driver::VfioBusMode::new(mode)
|
||||
.with_context(|| format!("new vfio bus mode {:?}", mode))?,
|
||||
},
|
||||
..Default::default()
|
||||
}],
|
||||
..Default::default()
|
||||
});
|
||||
hypervisor.add_device(d).await.context("add device")?;
|
||||
Ok(())
|
||||
|
Loading…
Reference in New Issue
Block a user