Merge pull request #7489 from Apokleos/pci_path

runtime-rs: add pci topology for pci devices
This commit is contained in:
Chao Wu 2023-12-27 18:52:06 +08:00 committed by GitHub
commit cbd4481bc1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 588 additions and 88 deletions

View File

@ -492,6 +492,38 @@ impl DeviceInfo {
}
}
/// Virtual machine PCIe Topology configuration.
#[derive(Clone, Debug, Default)]
pub struct TopologyConfigInfo {
/// Hypervisor name
pub hypervisor_name: String,
/// Device Info
pub device_info: DeviceInfo,
}
impl TopologyConfigInfo {
/// Initialize the topology config info from toml config
pub fn new(toml_config: &TomlConfig) -> Option<Self> {
// Firecracker does not support PCIe Devices, so we should not initialize such a PCIe topology for it.
// If the case of fc hit, just return None.
let hypervisor_names = [
HYPERVISOR_NAME_QEMU,
HYPERVISOR_NAME_CH,
HYPERVISOR_NAME_DRAGONBALL,
];
let hypervisor_name = toml_config.runtime.hypervisor_name.as_str();
if !hypervisor_names.contains(&hypervisor_name) {
return None;
}
let hv = toml_config.hypervisor.get(hypervisor_name)?;
Some(Self {
hypervisor_name: hypervisor_name.to_string(),
device_info: hv.device_info.clone(),
})
}
}
/// Configuration information for virtual machine.
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
pub struct MachineInfo {

View File

@ -8,6 +8,7 @@ use std::{collections::HashMap, sync::Arc};
use anyhow::{anyhow, Context, Result};
use kata_sys_util::rand::RandomBytes;
use kata_types::config::hypervisor::TopologyConfigInfo;
use tokio::sync::{Mutex, RwLock};
use crate::{
@ -18,6 +19,7 @@ use crate::{
};
use super::{
topology::PCIeTopology,
util::{get_host_path, get_virt_drive_name, DEVICE_TYPE_BLOCK},
Device, DeviceConfig, DeviceType,
};
@ -93,15 +95,20 @@ pub struct DeviceManager {
devices: HashMap<String, ArcMutexDevice>,
hypervisor: Arc<dyn Hypervisor>,
shared_info: SharedInfo,
pcie_topology: Option<PCIeTopology>,
}
impl DeviceManager {
pub async fn new(hypervisor: Arc<dyn Hypervisor>) -> Result<Self> {
pub async fn new(
hypervisor: Arc<dyn Hypervisor>,
topo_config: Option<&TopologyConfigInfo>,
) -> Result<Self> {
let devices = HashMap::<String, ArcMutexDevice>::new();
Ok(DeviceManager {
devices,
hypervisor,
shared_info: SharedInfo::new().await,
pcie_topology: PCIeTopology::new(topo_config),
})
}
@ -119,9 +126,12 @@ impl DeviceManager {
.devices
.get(device_id)
.context("failed to find device")?;
let mut device_guard = device.lock().await;
// attach device
let result = device_guard.attach(self.hypervisor.as_ref()).await;
let result = device_guard
.attach(&mut self.pcie_topology.as_mut(), self.hypervisor.as_ref())
.await;
// handle attach error
if let Err(e) = result {
match device_guard.get_device_info().await {
@ -161,7 +171,10 @@ impl DeviceManager {
pub async fn try_remove_device(&mut self, device_id: &str) -> Result<()> {
if let Some(dev) = self.devices.get(device_id) {
let mut device_guard = dev.lock().await;
let result = match device_guard.detach(self.hypervisor.as_ref()).await {
let result = match device_guard
.detach(&mut self.pcie_topology.as_mut(), self.hypervisor.as_ref())
.await
{
Ok(index) => {
if let Some(i) = index {
// release the declared device index
@ -599,6 +612,7 @@ mod tests {
BlockConfig, KATA_BLK_DEV_TYPE,
};
use anyhow::{anyhow, Context, Result};
use kata_types::config::hypervisor::TopologyConfigInfo;
use std::sync::Arc;
use tests_utils::load_test_config;
use tokio::sync::RwLock;
@ -606,6 +620,7 @@ mod tests {
async fn new_device_manager() -> Result<Arc<RwLock<DeviceManager>>> {
let hypervisor_name: &str = "qemu";
let toml_config = load_test_config(hypervisor_name.to_owned())?;
let topo_config = TopologyConfigInfo::new(&toml_config);
let hypervisor_config = toml_config
.hypervisor
.get(hypervisor_name)
@ -617,7 +632,7 @@ mod tests {
.await;
let dm = Arc::new(RwLock::new(
DeviceManager::new(Arc::new(hypervisor))
DeviceManager::new(Arc::new(hypervisor), topo_config.as_ref())
.await
.context("device manager")?,
));

View File

@ -5,27 +5,24 @@
//
use std::{
collections::HashMap,
fs,
path::{Path, PathBuf},
process::Command,
sync::{
atomic::{AtomicU8, Ordering},
Arc, RwLock,
},
};
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use lazy_static::lazy_static;
use path_clean::PathClean;
use kata_sys_util::fs::get_base_name;
use crate::device::{
hypervisor,
pci_path::{PciPath, PciSlot},
Device, DeviceType,
use crate::{
device::{
pci_path::PciPath,
topology::{do_add_pcie_endpoint, PCIeTopology},
Device, DeviceType, PCIeDevice,
},
register_pcie_device, unregister_pcie_device, update_pcie_device, Hypervisor as hypervisor,
};
pub const SYS_BUS_PCI_DRIVER_PROBE: &str = "/sys/bus/pci/drivers_probe";
@ -43,35 +40,6 @@ const INTEL_IOMMU_PREFIX: &str = "dmar";
const AMD_IOMMU_PREFIX: &str = "ivhd";
const ARM_IOMMU_PREFIX: &str = "smmu";
lazy_static! {
static ref GUEST_DEVICE_ID: Arc<AtomicU8> = Arc::new(AtomicU8::new(0_u8));
static ref HOST_GUEST_MAP: Arc<RwLock<HashMap<String, String>>> =
Arc::new(RwLock::new(HashMap::new()));
}
// map host/guest bdf and the mapping saved into `HOST_GUEST_MAP`,
// and return PciPath.
pub fn generate_guest_pci_path(bdf: String) -> Result<PciPath> {
let hg_map = HOST_GUEST_MAP.clone();
let current_id = GUEST_DEVICE_ID.clone();
current_id.fetch_add(1, Ordering::SeqCst);
let slot = current_id.load(Ordering::SeqCst);
// In some Hypervisors, dragonball, cloud-hypervisor or firecracker,
// the device is directly connected to the bus without intermediary bus.
// FIXME: Qemu's pci path needs to be implemented;
let host_bdf = normalize_device_bdf(bdf.as_str());
let guest_bdf = format!("0000:00:{:02x}.0", slot);
// safe, just do unwrap as `HOST_GUEST_MAP` is always valid.
hg_map.write().unwrap().insert(host_bdf, guest_bdf);
Ok(PciPath {
slots: vec![PciSlot::new(slot)],
})
}
pub fn do_check_iommu_on() -> Result<bool> {
let element = std::fs::read_dir(SYS_CLASS_IOMMU)?
.filter_map(|e| e.ok())
@ -476,7 +444,13 @@ impl VfioDevice {
#[async_trait]
impl Device for VfioDevice {
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
async fn attach(
&mut self,
pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<()> {
register_pcie_device!(self, pcie_topo)?;
if self
.increase_attach_count()
.await
@ -494,37 +468,23 @@ impl Device for VfioDevice {
self.devices = vfio.devices;
}
if self.bus_mode == VfioBusMode::PCI {
for hostdev in self.devices.iter_mut() {
if hostdev.guest_pci_path.is_none() {
// guest_pci_path may be empty for certain hypervisors such as
// dragonball
hostdev.guest_pci_path = Some(
generate_guest_pci_path(hostdev.bus_slot_func.clone())
.map_err(|e| anyhow!("generate pci path failed: {:?}", e))?,
);
}
// Safe to call unwrap here because of previous assignment.
let pci_path = hostdev.guest_pci_path.clone().unwrap();
self.device_options.push(format!(
"0000:{}={}",
hostdev.bus_slot_func.clone(),
pci_path.to_string()
));
}
}
update_pcie_device!(self, pcie_topo)?;
Ok(())
}
Err(e) => {
self.decrease_attach_count().await?;
unregister_pcie_device!(self, pcie_topo)?;
return Err(e);
}
}
}
async fn detach(&mut self, h: &dyn hypervisor) -> Result<Option<u64>> {
async fn detach(
&mut self,
pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<Option<u64>> {
if self
.decrease_attach_count()
.await
@ -545,6 +505,8 @@ impl Device for VfioDevice {
None
};
unregister_pcie_device!(self, pcie_topo)?;
Ok(device_index)
}
@ -588,6 +550,48 @@ impl Device for VfioDevice {
}
}
#[async_trait]
impl PCIeDevice for VfioDevice {
async fn register(&mut self, pcie_topo: &mut PCIeTopology) -> Result<()> {
if self.bus_mode != VfioBusMode::PCI {
return Ok(());
}
self.device_options.clear();
for hostdev in self.devices.iter_mut() {
let pci_path = do_add_pcie_endpoint(
self.device_id.clone(),
hostdev.guest_pci_path.clone(),
pcie_topo,
)
.context(format!(
"add pcie endpoint for host device {:?} in PCIe Topology failed",
self.device_id
))?;
hostdev.guest_pci_path = Some(pci_path.clone());
self.device_options.push(format!(
"0000:{}={}",
hostdev.bus_slot_func,
pci_path.to_string()
));
}
Ok(())
}
async fn unregister(&mut self, pcie_topo: &mut PCIeTopology) -> Result<()> {
if let Some(_slot) = pcie_topo.remove_device(&self.device_id.clone()) {
Ok(())
} else {
Err(anyhow!(
"vfio device with {:?} not found.",
self.device_id.clone()
))
}
}
}
// binds the device to vfio driver after unbinding from host.
// Will be called by a network interface or a generic pcie device.
pub fn bind_device_to_vfio(bdf: &str, host_driver: &str, _vendor_device_id: &str) -> Result<()> {

View File

@ -9,7 +9,7 @@ use async_trait::async_trait;
use super::VhostUserConfig;
use crate::{
device::{Device, DeviceType},
device::{topology::PCIeTopology, Device, DeviceType},
Hypervisor as hypervisor,
};
@ -45,7 +45,11 @@ impl VhostUserBlkDevice {
#[async_trait]
impl Device for VhostUserBlkDevice {
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
async fn attach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<()> {
// increase attach count, skip attach the device if the device is already attached
if self
.increase_attach_count()
@ -64,7 +68,11 @@ impl Device for VhostUserBlkDevice {
return Ok(());
}
async fn detach(&mut self, h: &dyn hypervisor) -> Result<Option<u64>> {
async fn detach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<Option<u64>> {
// get the count of device detached, and detach once it reaches 0
if self
.decrease_attach_count()

View File

@ -4,6 +4,7 @@
use anyhow::{Context, Result};
use async_trait::async_trait;
use crate::device::topology::PCIeTopology;
use crate::device::{Device, DeviceType};
use crate::{Hypervisor, VhostUserConfig};
@ -22,14 +23,22 @@ impl VhostUserNetDevice {
#[async_trait]
impl Device for VhostUserNetDevice {
async fn attach(&mut self, h: &dyn Hypervisor) -> Result<()> {
async fn attach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn Hypervisor,
) -> Result<()> {
h.add_device(DeviceType::VhostUserNetwork(self.clone()))
.await
.context("add vhost-user-net device to hypervisor")?;
Ok(())
}
async fn detach(&mut self, h: &dyn Hypervisor) -> Result<Option<u64>> {
async fn detach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn Hypervisor,
) -> Result<Option<u64>> {
h.remove_device(DeviceType::VhostUserNetwork(self.clone()))
.await
.context("remove vhost-user-net device from hypervisor")?;

View File

@ -5,6 +5,7 @@
//
use crate::device::pci_path::PciPath;
use crate::device::topology::PCIeTopology;
use crate::device::Device;
use crate::device::DeviceType;
use crate::Hypervisor as hypervisor;
@ -73,7 +74,11 @@ impl BlockDevice {
#[async_trait]
impl Device for BlockDevice {
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
async fn attach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<()> {
// increase attach count, skip attach the device if the device is already attached
if self
.increase_attach_count()
@ -98,7 +103,11 @@ impl Device for BlockDevice {
}
}
async fn detach(&mut self, h: &dyn hypervisor) -> Result<Option<u64>> {
async fn detach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<Option<u64>> {
// get the count of device detached, skip detach once it reaches the 0
if self
.decrease_attach_count()

View File

@ -7,7 +7,7 @@
use anyhow::{Context, Result};
use async_trait::async_trait;
use crate::device::{hypervisor, Device, DeviceType};
use crate::device::{hypervisor, topology::PCIeTopology, Device, DeviceType};
#[derive(Copy, Clone, Debug, Default)]
pub enum ShareFsMountOperation {
@ -99,7 +99,11 @@ impl ShareFsDevice {
#[async_trait]
impl Device for ShareFsDevice {
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
async fn attach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<()> {
h.add_device(DeviceType::ShareFs(self.clone()))
.await
.context("add share-fs device.")?;
@ -107,7 +111,11 @@ impl Device for ShareFsDevice {
Ok(())
}
async fn detach(&mut self, _h: &dyn hypervisor) -> Result<Option<u64>> {
async fn detach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
_h: &dyn hypervisor,
) -> Result<Option<u64>> {
// no need to detach share-fs device
Ok(None)

View File

@ -9,6 +9,7 @@ use std::fmt;
use anyhow::{Context, Result};
use async_trait::async_trait;
use crate::device::topology::PCIeTopology;
use crate::device::{Device, DeviceType};
use crate::Hypervisor as hypervisor;
@ -70,7 +71,11 @@ impl NetworkDevice {
#[async_trait]
impl Device for NetworkDevice {
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
async fn attach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<()> {
h.add_device(DeviceType::Network(self.clone()))
.await
.context("add network device.")?;
@ -78,7 +83,11 @@ impl Device for NetworkDevice {
return Ok(());
}
async fn detach(&mut self, h: &dyn hypervisor) -> Result<Option<u64>> {
async fn detach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<Option<u64>> {
h.remove_device(DeviceType::Network(self.clone()))
.await
.context("remove network device.")?;

View File

@ -12,7 +12,7 @@ use tokio::fs::{File, OpenOptions};
use async_trait::async_trait;
use crate::{
device::{Device, DeviceType},
device::{topology::PCIeTopology, Device, DeviceType},
Hypervisor as hypervisor,
};
@ -49,7 +49,11 @@ impl HybridVsockDevice {
#[async_trait]
impl Device for HybridVsockDevice {
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
async fn attach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<()> {
h.add_device(DeviceType::HybridVsock(self.clone()))
.await
.context("add hybrid vsock device.")?;
@ -57,7 +61,11 @@ impl Device for HybridVsockDevice {
return Ok(());
}
async fn detach(&mut self, _h: &dyn hypervisor) -> Result<Option<u64>> {
async fn detach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
_h: &dyn hypervisor,
) -> Result<Option<u64>> {
// no need to do detach, just return Ok(None)
Ok(None)
}
@ -135,7 +143,11 @@ impl VsockDevice {
#[async_trait]
impl Device for VsockDevice {
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> {
async fn attach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<()> {
h.add_device(DeviceType::Vsock(self.clone()))
.await
.context("add vsock device.")?;
@ -143,7 +155,11 @@ impl Device for VsockDevice {
return Ok(());
}
async fn detach(&mut self, _h: &dyn hypervisor) -> Result<Option<u64>> {
async fn detach(
&mut self,
_pcie_topo: &mut Option<&mut PCIeTopology>,
_h: &dyn hypervisor,
) -> Result<Option<u64>> {
// no need to do detach, just return Ok(None)
Ok(None)
}

View File

@ -15,9 +15,12 @@ use crate::{
use anyhow::Result;
use async_trait::async_trait;
use self::topology::PCIeTopology;
pub mod device_manager;
pub mod driver;
pub mod pci_path;
pub mod topology;
pub mod util;
#[derive(Debug)]
@ -53,9 +56,17 @@ impl fmt::Display for DeviceType {
#[async_trait]
pub trait Device: std::fmt::Debug + Send + Sync {
// attach is to plug device into VM
async fn attach(&mut self, h: &dyn hypervisor) -> Result<()>;
async fn attach(
&mut self,
pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<()>;
// detach is to unplug device from VM
async fn detach(&mut self, h: &dyn hypervisor) -> Result<Option<u64>>;
async fn detach(
&mut self,
pcie_topo: &mut Option<&mut PCIeTopology>,
h: &dyn hypervisor,
) -> Result<Option<u64>>;
// update is to do update for some device
async fn update(&mut self, h: &dyn hypervisor) -> Result<()>;
// get_device_info returns device config
@ -71,3 +82,11 @@ pub trait Device: std::fmt::Debug + Send + Sync {
// * err error: error while do decrease attach count
async fn decrease_attach_count(&mut self) -> Result<bool>;
}
#[async_trait]
pub trait PCIeDevice: std::fmt::Debug + Send + Sync {
// register pcie device into PCIe Topology for virtio-pci device or PCI/PCIe device.
async fn register(&mut self, topology: &mut PCIeTopology) -> Result<()>;
// unregister pcie device from PCIe Topology
async fn unregister(&mut self, topology: &mut PCIeTopology) -> Result<()>;
}

View File

@ -0,0 +1,366 @@
//
// Copyright (c) 2019-2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
/*
The design origins from https://github.com/qemu/qemu/blob/master/docs/pcie.txt
In order to better support the PCIe topologies of different VMMs, we adopt a layered approach.
The first layer is the base layer(the flatten PCIe topology), which mainly consists of the root bus,
which is mainly used by VMMs that only support devices being directly attached to the root bus.
However, not all VMMs have such simple PCIe topologies. For example, Qemu, which can fully simulate
the PCIe topology of the host, has a complex PCIe topology. In this case, we need to add PCIe RootPort,
PCIe Switch, and PCIe-PCI Bridge or pxb-pcie on top of the base layer, which is The Complex PCIe Topology.
The design graghs as below:
(1) The flatten PCIe Topology
pcie.0 bus (Root Complex)
----------------------------------------------------------------------------
| | | | | | | | | | | | | | | .. |
--|--------------------|------------------|-------------------------|-------
| | | |
V V V V
----------- ----------- ----------- -----------
| PCI Dev | | PCI Dev | | PCI Dev | | PCI Dev |
----------- ----------- ----------- -----------
(2) The Complex PCIe Topology(It'll be implemented when Qemu is ready in runtime-rs)
pcie.0 bus (Root Complex)
----------------------------------------------------------------------------
| | | | | | | | | | | | | | | .. |
------|----------------|--------------------------------------|-------------
| | |
V V V
------------- ------------- -------------
| Root Port | | Root Port | | Root Port |
------------- ------------- -------------
| |
| -------------------------|-----------------------
------------ | ----------------- |
| PCIe Dev | | PCI Express | Upstream Port | |
------------ | Switch ----------------- |
| | | |
| ------------------- ------------------- |
| | Downstream Port | | Downstream Port | |
| ------------------- ------------------- |
-------------|-----------------------|-----------
------------
| PCIe Dev |
------------
*/
use std::collections::{hash_map::Entry, HashMap};
use anyhow::{anyhow, Result};
use crate::device::pci_path::PciSlot;
use kata_types::config::hypervisor::TopologyConfigInfo;
use super::pci_path::PciPath;
const DEFAULT_PCIE_ROOT_BUS: &str = "pcie.0";
// Currently, CLH and Dragonball support device attachment solely on the root bus.
const DEFAULT_PCIE_ROOT_BUS_ADDRESS: &str = "0000:00";
pub const PCIE_ROOT_BUS_SLOTS_CAPACITY: u32 = 32;
// register_pcie_device: do pre register device into PCIe Topology which
// be called in device driver's attach before device real attached into
// VM. It'll allocate one available PCI path for the device.
// register_pcie_device can be expanded as below:
// register_pcie_device {
// match pcie_topology {
// Some(topology) => self.register(topology).await,
// None => Ok(())
// }
// }
#[macro_export]
macro_rules! register_pcie_device {
($self:ident, $opt:expr) => {
match $opt {
Some(topology) => $self.register(topology).await,
None => Ok(()),
}
};
}
// update_pcie_device: do update device info, as some VMMs will be able to
// return the device info containing guest PCI path which differs the one allocated
// in runtime. So we need to compair the two PCI path, and finally update it or not
// based on the difference between them.
// update_pcie_device can be expanded as below:
// update_pcie_device {
// match pcie_topology {
// Some(topology) => self.register(topology).await,
// None => Ok(())
// }
// }
#[macro_export]
macro_rules! update_pcie_device {
($self:ident, $opt:expr) => {
match $opt {
Some(topology) => $self.register(topology).await,
None => Ok(()),
}
};
}
// unregister_pcie_device: do unregister device from pcie topology.
// unregister_pcie_device can be expanded as below:
// unregister_pcie_device {
// match pcie_topology {
// Some(topology) => self.unregister(topology).await,
// None => Ok(())
// }
// }
#[macro_export]
macro_rules! unregister_pcie_device {
($self:ident, $opt:expr) => {
match $opt {
Some(topology) => $self.unregister(topology).await,
None => Ok(()),
}
};
}
pub trait PCIeDevice: Send + Sync {
fn device_id(&self) -> &str;
}
#[derive(Clone, Debug, Default)]
pub struct PCIeEndpoint {
// device_id for device in device manager
pub device_id: String,
// device's PCI Path in Guest
pub pci_path: PciPath,
// root_port for PCIe Device
pub root_port: Option<PCIeRootPort>,
// device_type is for device virtio-pci/PCI or PCIe
pub device_type: String,
}
impl PCIeDevice for PCIeEndpoint {
fn device_id(&self) -> &str {
self.device_id.as_str()
}
}
// reserved resource
#[derive(Clone, Debug, Default)]
pub struct ResourceReserved {
// This to work needs patches to QEMU
// The PCIE-PCI bridge can be hot-plugged only into pcie-root-port that has 'bus-reserve'
// property value to provide secondary bus for the hot-plugged bridge.
pub bus_reserve: String,
// reserve prefetched MMIO aperture, 64-bit
pub pref64_reserve: String,
// reserve prefetched MMIO aperture, 32-bit
pub pref32_reserve: String,
// reserve non-prefetched MMIO aperture, 32-bit *only*
pub memory_reserve: String,
// IO reservation
pub io_reserve: String,
}
// PCIe Root Port
#[derive(Clone, Debug, Default)]
pub struct PCIeRootPort {
// format: rp{n}, n>=0
pub id: String,
// default is pcie.0
pub bus: String,
// >=0, default is 0x00
pub address: String,
// (slot, chassis) pair is mandatory and must be unique for each pcie-root-port,
// chassis >=0, default is 0x00
pub chassis: u8,
// slot >=0, default is 0x00
pub slot: u8,
// multi_function is for PCIe Device passthrough
// true => "on", false => "off", default is off
pub multi_function: bool,
// reserved resource for some VMM, such as Qemu.
pub resource_reserved: ResourceReserved,
// romfile specifies the ROM file being used for this device.
pub romfile: String,
}
// PCIe Root Complex
#[derive(Clone, Debug, Default)]
pub struct PCIeRootComplex {
pub root_bus: String,
pub root_bus_address: String,
pub root_bus_devices: HashMap<String, PCIeEndpoint>,
}
#[derive(Debug, Default)]
pub struct PCIeTopology {
pub hypervisor_name: String,
pub root_complex: PCIeRootComplex,
pub bridges: u32,
pub pcie_root_ports: u32,
pub hotplug_vfio_on_root_bus: bool,
}
impl PCIeTopology {
// As some special case doesn't support PCIe devices, there's no need to build a PCIe Topology.
pub fn new(config_info: Option<&TopologyConfigInfo>) -> Option<Self> {
// if config_info is None, it will return None.
let topo_config = config_info?;
let root_complex = PCIeRootComplex {
root_bus: DEFAULT_PCIE_ROOT_BUS.to_owned(),
root_bus_address: DEFAULT_PCIE_ROOT_BUS_ADDRESS.to_owned(),
root_bus_devices: HashMap::with_capacity(PCIE_ROOT_BUS_SLOTS_CAPACITY as usize),
};
Some(Self {
hypervisor_name: topo_config.hypervisor_name.to_owned(),
root_complex,
bridges: topo_config.device_info.default_bridges,
pcie_root_ports: topo_config.device_info.pcie_root_port,
hotplug_vfio_on_root_bus: topo_config.device_info.hotplug_vfio_on_root_bus,
})
}
pub fn insert_device(&mut self, ep: &mut PCIeEndpoint) -> Option<PciPath> {
let to_pcipath = |v: u32| -> PciPath {
PciPath {
slots: vec![PciSlot(v as u8)],
}
};
let to_string = |v: u32| -> String { to_pcipath(v).to_string() };
// find the first available index as the allocated slot.
let allocated_slot = (0..PCIE_ROOT_BUS_SLOTS_CAPACITY).find(|&i| {
!self
.root_complex
.root_bus_devices
.contains_key(&to_string(i))
})?;
let pcipath = to_string(allocated_slot);
// update pci_path in Endpoint
ep.pci_path = to_pcipath(allocated_slot);
// convert the allocated slot to pci path and then insert it with ep
self.root_complex
.root_bus_devices
.insert(pcipath, ep.clone());
Some(to_pcipath(allocated_slot))
}
pub fn remove_device(&mut self, device_id: &str) -> Option<String> {
let mut target_device: Option<String> = None;
self.root_complex.root_bus_devices.retain(|k, v| {
if v.device_id() != device_id {
true
} else {
target_device = Some((*k).to_string());
false
}
});
target_device
}
pub fn update_device(&mut self, ep: &PCIeEndpoint) -> Option<PciPath> {
let pci_addr = ep.pci_path.clone();
// First, find the PCIe Endpoint corresponding to the endpoint in the Hash Map based on the PCI path.
// If found, it means that we do not need to update the device's position in the Hash Map.
// If not found, it means that the PCI Path corresponding to the device has changed, and the device's
// position in the Hash Map needs to be updated.
match self
.root_complex
.root_bus_devices
.entry(pci_addr.to_string())
{
Entry::Occupied(_) => None,
Entry::Vacant(_entry) => {
self.remove_device(&ep.device_id);
self.root_complex
.root_bus_devices
.insert(pci_addr.to_string(), ep.clone());
Some(pci_addr)
}
}
}
pub fn find_device(&mut self, device_id: &str) -> bool {
for v in self.root_complex.root_bus_devices.values() {
info!(
sl!(),
"find_device with: {:?}, {:?}.",
&device_id,
v.device_id()
);
if v.device_id() == device_id {
return true;
}
}
false
}
pub fn do_insert_or_update(&mut self, pciep: &mut PCIeEndpoint) -> Result<PciPath> {
// Try to check whether the device is present in the PCIe Topology.
// If the device dosen't exist, it proceeds to register it within the topology
let pci_path = if !self.find_device(&pciep.device_id) {
// Register a device within the PCIe topology, allocating and assigning it an available PCI Path.
// Upon successful insertion, it updates the pci_path in PCIeEndpoint and returns it.
// Finally, update both the guest_pci_path and devices_options with the allocated PciPath.
if let Some(pci_addr) = self.insert_device(pciep) {
pci_addr
} else {
return Err(anyhow!("pci path allocated failed."));
}
} else {
// If the device exists, it proceeds to update its pcipath within
// the topology and the device's guest_pci_path and device_options.
if let Some(pci_addr) = self.update_device(pciep) {
pci_addr
} else {
return Ok(pciep.pci_path.clone());
}
};
Ok(pci_path)
}
}
// do_add_pcie_endpoint do add a device into PCIe topology with pcie endpoint
// device_id: device's Unique ID in Device Manager.
// allocated_pcipath: allocated pcipath before add_device
// topology: PCIe Topology for devices to build a PCIe Topology in Guest.
pub fn do_add_pcie_endpoint(
device_id: String,
allocated_pcipath: Option<PciPath>,
topology: &mut PCIeTopology,
) -> Result<PciPath> {
let pcie_endpoint = &mut PCIeEndpoint {
device_type: "PCIe".to_string(),
device_id,
..Default::default()
};
if let Some(pci_path) = allocated_pcipath {
pcie_endpoint.pci_path = pci_path;
}
topology.do_insert_or_update(pcie_endpoint)
}

View File

@ -17,7 +17,7 @@ use hypervisor::{
},
BlockConfig, Hypervisor, VfioConfig,
};
use kata_types::config::TomlConfig;
use kata_types::config::{hypervisor::TopologyConfigInfo, TomlConfig};
use kata_types::mount::Mount;
use oci::{Linux, LinuxCpu, LinuxResources};
use persist::sandbox_persist::Persist;
@ -59,8 +59,9 @@ impl ResourceManagerInner {
toml_config: Arc<TomlConfig>,
init_size_manager: InitialSizeManager,
) -> Result<Self> {
let topo_config = TopologyConfigInfo::new(&toml_config);
// create device manager
let dev_manager = DeviceManager::new(hypervisor.clone())
let dev_manager = DeviceManager::new(hypervisor.clone(), topo_config.as_ref())
.await
.context("failed to create device manager")?;
@ -510,12 +511,14 @@ impl Persist for ResourceManagerInner {
sid: resource_args.sid.clone(),
config: resource_args.config,
};
let topo_config = TopologyConfigInfo::new(&args.config);
Ok(Self {
sid: resource_args.sid,
agent: resource_args.agent,
hypervisor: resource_args.hypervisor.clone(),
device_manager: Arc::new(RwLock::new(
DeviceManager::new(resource_args.hypervisor).await?,
DeviceManager::new(resource_args.hypervisor, topo_config.as_ref()).await?,
)),
network: None,
share_fs: None,

View File

@ -9,6 +9,7 @@ mod tests {
use std::sync::Arc;
use anyhow::{anyhow, Context, Result};
use kata_types::config::hypervisor::TopologyConfigInfo;
use netlink_packet_route::MACVLAN_MODE_PRIVATE;
use scopeguard::defer;
use tests_utils::load_test_config;
@ -29,6 +30,7 @@ mod tests {
async fn get_device_manager() -> Result<Arc<RwLock<DeviceManager>>> {
let hypervisor_name: &str = "qemu";
let toml_config = load_test_config(hypervisor_name.to_owned())?;
let topo_config = TopologyConfigInfo::new(&toml_config);
let hypervisor_config = toml_config
.hypervisor
.get(hypervisor_name)
@ -40,7 +42,7 @@ mod tests {
.await;
let dm = Arc::new(RwLock::new(
DeviceManager::new(Arc::new(hypervisor))
DeviceManager::new(Arc::new(hypervisor), topo_config.as_ref())
.await
.context("device manager")?,
));