runtime-rs: Add QEMU VFIO command-line parameter structs

Add QEMU command-line parameter types for VFIO device cold-plug:

- ObjectIommufd: /dev/iommu object for iommufd-backed passthrough
- PCIeVfioDevice: vfio-pci device on a PCIe root port or switch port,
  supporting both legacy VFIO group and iommufd cdev backends
- FWCfgDevice: firmware config device for fw_cfg blob injection
- VfioDeviceBase/VfioDeviceConfig/VfioDeviceGroup: high-level wrappers
  that compose the above into complete QEMU argument sets, resolving
  IOMMU groups, device nodes, and per-device fw_cfg entries

Refactor existing cmdline structs (BalloonDevice, VirtioNetDevice,
VirtioBlkDevice, etc.) to use a shared devices_to_params() helper
and align the ToQemuParams implementations.

Signed-off-by: Alex Lyn <alex.lyn@antgroup.com>
Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
Alex Lyn
2026-04-12 16:26:48 +02:00
committed by Fabiano Fidêncio
parent 748252ba15
commit 75ed85205a

View File

@@ -3,7 +3,7 @@
// SPDX-License-Identifier: Apache-2.0
//
use crate::device::topology::{PCIePortBusPrefix, TopologyPortDevice, DEFAULT_PCIE_ROOT_BUS};
use crate::device::topology::{TopologyPortDevice, DEFAULT_PCIE_ROOT_BUS};
use crate::qemu::qmp::get_qmp_socket_path;
use crate::utils::{
chown_to_parent, clear_cloexec, create_vhost_net_fds, open_named_tuntap, uses_native_ccw_bus,
@@ -11,6 +11,7 @@ use crate::utils::{
};
use crate::{kernel_param::KernelParams, Address, HypervisorConfig};
use std::borrow::Cow;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
@@ -19,11 +20,12 @@ use kata_types::rootless::is_rootless;
use serde::{Deserialize, Serialize};
use serde_json;
use std::collections::HashMap;
use std::fmt::Display;
use std::fmt::{Display, Write};
use std::fs::{read_to_string, File};
use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd};
use std::os::unix::net::UnixListener;
use std::path::PathBuf;
use std::str;
use tokio;
// These should have been called MiB and GiB for better readability but the
@@ -510,7 +512,7 @@ impl Machine {
accel: "kvm".to_owned(),
options: config.machine_info.machine_accelerators.clone(),
nvdimm: false,
kernel_irqchip: None,
kernel_irqchip: Some("on".to_owned()), // default to off, will be turned on if needed by VFIO devices
confidential_guest_support: "".to_owned(),
is_nvdimm_supported,
memory_backend: None,
@@ -2012,108 +2014,146 @@ impl ToQemuParams for ObjectTdxGuest {
}
}
/// PCIeRootPortDevice directly attached onto the root bus
/// -device pcie-root-port,id=rp0,bus=pcie.0,chassis=0,slot=0,multifunction=off,pref64-reserve=<X>B,mem-reserve=<Y>B
#[derive(Debug, Default)]
const DEFAULT_START_ADDR: &str = "0x5";
//const DEFAULT_ADDR: &str = "0x0";
/// Configuration for the IOMMUFD object backend.
#[derive(Debug, Clone)]
pub struct ObjectIommufd {
id: String,
}
impl ObjectIommufd {
pub fn new(id: impl Into<String>) -> Self {
Self { id: id.into() }
}
}
#[async_trait]
impl ToQemuParams for ObjectIommufd {
async fn qemu_params(&self) -> Result<Vec<String>> {
Ok(vec![
"-object".to_string(),
format!("iommufd,id={}", self.id),
])
}
}
/// Representation of a PCIe Root Port device in QEMU.
#[derive(Debug, Clone)]
pub struct PCIeRootPortDevice {
id: String,
bus: String,
chassis: String,
slot: String,
multifunction: String,
bus: Cow<'static, str>,
port: Option<u16>,
/// Numerical identifier for the chassis.
chassis: u32,
/// Optional slot identifier.
slot: Option<u32>,
/// Whether the device supports multiple functions.
multifunction: bool,
/// PCI address; supports simple ("0x5") and complex multifunction ("0x5.0x1") formats.
addr: String,
mem_reserve: String,
pref64_reserve: String,
}
impl PCIeRootPortDevice {
fn new(id: &str, bus: &str, chassis: &str, slot: &str, multifunc: bool, addr: &str) -> Self {
PCIeRootPortDevice {
id: id.to_string(),
bus: if bus.is_empty() {
DEFAULT_PCIE_ROOT_BUS.to_owned()
} else {
bus.to_string()
/// Creates a new PCIe Root Port device instance.
pub fn new(id: impl Into<String>, bus: impl Into<String>) -> Self {
Self {
id: id.into(),
bus: {
let bus_str = bus.into();
if bus_str.is_empty() {
Cow::Borrowed(DEFAULT_PCIE_ROOT_BUS)
} else {
Cow::Owned(bus_str)
}
},
chassis: if chassis.is_empty() {
"0x00".to_owned()
} else {
chassis.to_owned()
},
slot: if slot.is_empty() {
"0x00".to_owned()
} else {
slot.to_owned()
},
multifunction: if multifunc {
"on".to_owned()
} else {
"off".to_owned()
},
addr: if addr.is_empty() {
"0x00".to_owned()
} else {
addr.to_owned()
},
..Default::default()
port: None,
chassis: 1,
slot: None,
multifunction: false,
addr: DEFAULT_START_ADDR.to_string(),
}
}
fn set_mem_reserve(&mut self, mem_reserve: u64) -> &mut Self {
if mem_reserve > 0 {
self.mem_reserve = format!("{mem_reserve}B");
}
pub fn with_port(mut self, port: u16) -> Self {
self.port = Some(port);
self
}
fn set_pref64_reserve(&mut self, pref64_reserve: u64) -> &mut Self {
if pref64_reserve > 0 {
self.pref64_reserve = format!("{pref64_reserve}B");
}
pub fn with_chassis(mut self, chassis: u32) -> Self {
self.chassis = chassis;
self
}
pub fn with_slot(mut self, slot: u32) -> Self {
self.slot = Some(slot);
self
}
pub fn with_multifunction(mut self, multifunction: bool) -> Self {
self.multifunction = multifunction;
self
}
/// Sets the PCI address. Supports standard ("0x5") and multifunction ("0x5.0x1") strings.
pub fn with_addr(mut self, addr: impl Into<String>) -> Self {
self.addr = addr.into();
self
}
}
#[async_trait]
impl ToQemuParams for PCIeRootPortDevice {
async fn qemu_params(&self) -> Result<Vec<String>> {
let mut device_params = Vec::new();
let mut params = String::with_capacity(256);
// -device pcie-root-port,id=rp0
device_params.push(format!("{},id={}", "pcie-root-port", self.id));
device_params.push(format!("bus={}", self.bus));
device_params.push(format!("chassis={}", self.chassis));
device_params.push(format!("slot={}", self.slot));
device_params.push(format!("multifunction={}", self.multifunction));
if self.multifunction.as_str() == "on" {
device_params.push(format!("addr={}", self.addr));
}
if !self.mem_reserve.is_empty() {
device_params.push(format!("mem-reserve={}", self.mem_reserve));
}
if !self.pref64_reserve.is_empty() {
device_params.push(format!("pref64-reserve={}", self.pref64_reserve));
// Example: -device pcie-root-port,id=rp0
write!(params, "pcie-root-port,id={}", self.id).unwrap();
if let Some(port) = self.port {
write!(params, ",port={}", port).unwrap();
}
Ok(vec!["-device".to_string(), device_params.join(",")])
// Match govmm: only pass `addr=` for multifunction ports, or when a concrete address
// is required (VFIO cold-plug uses e.g. 0x09). Placeholder pool ports use addr "0" from
// `add_pcie_root_ports`; emitting `addr=0` for every `pcie-root-port` collides on
// `pcie.0` ("slot 0 ... in use by mch").
if self.multifunction || self.addr != "0" {
write!(params, ",addr={}", self.addr).unwrap();
}
write!(params, ",chassis={}", self.chassis).unwrap();
if let Some(slot) = self.slot {
write!(params, ",slot={}", slot).unwrap();
}
write!(params, ",bus={}", self.bus).unwrap();
write!(
params,
",multifunction={}",
if self.multifunction { "on" } else { "off" }
)
.unwrap();
Ok(vec!["-device".to_string(), params])
}
}
/// PCIeSwitchUpstreamPortDevice is the port attached to the root port.
#[derive(Debug, Default)]
/// PCIe Switch Upstream Port device, which must be connected to a PCIe Root Port,
/// and can have PCIe devices or downstream ports connected to it.
#[derive(Debug, Clone)]
pub struct PCIeSwitchUpstreamPortDevice {
id: String,
bus: String,
}
impl PCIeSwitchUpstreamPortDevice {
fn new(id: &str, bus: &str) -> Self {
PCIeSwitchUpstreamPortDevice {
id: id.to_string(),
bus: bus.to_string(),
pub fn new(id: impl Into<String>, bus: impl Into<String>) -> Self {
Self {
id: id.into(),
bus: bus.into(),
}
}
}
@@ -2121,17 +2161,16 @@ impl PCIeSwitchUpstreamPortDevice {
#[async_trait]
impl ToQemuParams for PCIeSwitchUpstreamPortDevice {
async fn qemu_params(&self) -> Result<Vec<String>> {
let mut device_params = Vec::new();
device_params.push(format!("{},id={}", "x3130-upstream", self.id));
device_params.push(format!("bus={}", self.bus));
Ok(vec!["-device".to_string(), device_params.join(",")])
Ok(vec![
"-device".to_string(),
format!("x3130-upstream,id={},bus={}", self.id, self.bus),
])
}
}
/// PCIeSwitchDownstreamPortDevice is the port attached to the root port.
#[derive(Debug, Default)]
/// PCIe Switch Downstream Port device, which must be connected to a PCIe Root Port or another downstream port,
/// and can have PCIe devices or another switch's downstream ports connected to it.
#[derive(Debug, Clone)]
pub struct PCIeSwitchDownstreamPortDevice {
// format: sup{n}, n>=0
pub id: String,
@@ -2140,36 +2179,312 @@ pub struct PCIeSwitchDownstreamPortDevice {
pub bus: String,
// (slot, chassis) pair is mandatory and must be unique for each downstream port, >=0, default is 0x00
pub chassis: String,
pub chassis: u32,
// >=0, default is 0x00
pub slot: String,
pub slot: u32,
}
impl PCIeSwitchDownstreamPortDevice {
fn new(bus: &str, chassis: u32, index: u32) -> Self {
PCIeSwitchDownstreamPortDevice {
// "swdp{i}"
id: format!("{}{}", PCIePortBusPrefix::SwitchDownstreamPort, index),
// "swup0"
bus: bus.to_string(),
chassis: chassis.to_string(),
slot: index.to_string(),
pub fn new(bus: impl Into<String>, chassis: u32, slot: u32) -> Self {
Self {
id: format!("swdp{}", slot),
bus: bus.into(),
chassis,
slot,
}
}
#[allow(dead_code)]
pub fn with_id(mut self, id: impl Into<String>) -> Self {
self.id = id.into();
self
}
}
#[async_trait]
impl ToQemuParams for PCIeSwitchDownstreamPortDevice {
async fn qemu_params(&self) -> Result<Vec<String>> {
let mut device_params = Vec::new();
Ok(vec![
"-device".to_string(),
format!(
"xio3130-downstream,id={},bus={},chassis={},slot={}",
self.id, self.bus, self.chassis, self.slot
),
])
}
}
device_params.push(format!("{},id={}", "xio3130-downstream", self.id));
device_params.push(format!("bus={}", self.bus));
device_params.push(format!("chassis={}", self.chassis));
device_params.push(format!("slot={}", self.slot));
/// VFIO PCI device
#[derive(Debug, Clone)]
pub struct PCIeVfioDevice {
host_bdf: String,
bus: String,
addr: String,
iommufd: Option<String>,
x_pci_vendor_id: Option<String>,
x_pci_device_id: Option<String>,
}
Ok(vec!["-device".to_string(), device_params.join(",")])
impl PCIeVfioDevice {
pub fn new(
host_bdf: impl Into<String>,
bus: impl Into<String>,
iommufd: impl Into<String>,
) -> Self {
Self {
host_bdf: host_bdf.into(),
bus: bus.into(),
addr: "0x0".to_string(),
iommufd: Some(iommufd.into()),
x_pci_vendor_id: None,
x_pci_device_id: None,
}
}
#[allow(dead_code)]
pub fn with_addr(mut self, addr: impl Into<String>) -> Self {
self.addr = addr.into();
self
}
pub fn with_vendor_id(mut self, vendor_id: impl Into<String>) -> Self {
self.x_pci_vendor_id = Some(vendor_id.into());
self
}
pub fn with_device_id(mut self, device_id: impl Into<String>) -> Self {
self.x_pci_device_id = Some(device_id.into());
self
}
}
#[async_trait]
impl ToQemuParams for PCIeVfioDevice {
async fn qemu_params(&self) -> Result<Vec<String>> {
let mut params = String::with_capacity(256);
write!(params, "vfio-pci,host={}", self.host_bdf).unwrap();
write!(params, ",bus={}", self.bus).unwrap();
write!(params, ",addr={}", self.addr).unwrap();
if let Some(iommufd) = &self.iommufd {
write!(params, ",iommufd={}", iommufd).unwrap();
}
if let Some(vendor) = &self.x_pci_vendor_id {
write!(params, ",x-pci-vendor-id={}", vendor).unwrap();
}
if let Some(device) = &self.x_pci_device_id {
write!(params, ",x-pci-device-id={}", device).unwrap();
}
Ok(vec!["-device".to_string(), params])
}
}
#[allow(dead_code)]
pub struct VfioDeviceBase {
/// Host BDF address (e.g., "0000:21:00.0" or the short form "21:00.0").
pub host_bdf: String,
/// The bus to which the device is attached (e.g., "pci.1").
pub bus: String,
/// IOMMU file descriptor ID (e.g., "iommufd0").
pub iommufd: Option<String>,
}
/// Comprehensive configuration for a VFIO device.
#[derive(Debug, Clone, Default)]
#[allow(dead_code)]
pub struct VfioDeviceConfig {
/// Host BDF address (e.g., "0000:21:00.0" or "21:00.0").
pub host_bdf: String,
/// The bus to which the device is attached (e.g., "pci.1").
pub bus: String,
/// Port number of the associated PCIe Root Port.
pub port: u16,
/// Chassis number of the associated PCIe Root Port.
pub chassis: u32,
/// Whether to enable multifunction support on the PCIe Root Port.
pub multifunction: bool,
/// Indicates if this is the primary device (function 0) in a multifunction group.
/// If true, the Root Port will be configured with `multifunction=on`.
pub is_multifunction_primary: bool,
/// Address of the PCIe Root Port on the system bus (e.g., "0x5" or "0x5.0x1").
pub root_port_addr: String,
/// Device address for the VFIO device itself (typically "0x0").
#[allow(dead_code)]
pub vfio_addr: String,
/// Optional PCI Vendor ID override.
pub x_pci_vendor_id: Option<String>,
/// Optional PCI Device ID override.
pub x_pci_device_id: Option<String>,
}
impl VfioDeviceConfig {
/// Creates a new VFIO device configuration.
pub fn new(host_bdf: impl Into<String>, port: u16, chassis: u32) -> Self {
let chassis_val = chassis;
Self {
host_bdf: host_bdf.into(),
bus: format!("pci.{}", chassis_val),
port,
chassis: chassis_val,
multifunction: true,
is_multifunction_primary: true,
// Defaults to 0x5 based on port offset; subsequent devices increment from here.
root_port_addr: format!("0x{}", port),
vfio_addr: format!("0x{}", port),
x_pci_vendor_id: None,
x_pci_device_id: None,
}
}
pub fn with_multifunction(mut self, multifunction: bool) -> Self {
self.multifunction = multifunction;
self
}
pub fn with_vfio_bus(mut self, bus: impl Into<String>) -> Self {
self.bus = bus.into();
self
}
/// Sets a specific root port address (used for non-multifunction modes).
pub fn with_root_port_addr(mut self, addr: impl Into<String>) -> Self {
self.root_port_addr = addr.into();
self.is_multifunction_primary = false;
self
}
/// Configures the device as the primary device (function 0) in a multifunction group.
#[allow(dead_code)]
pub fn as_multifunction_primary(base_addr: impl Into<String>) -> Self {
Self {
is_multifunction_primary: true,
root_port_addr: base_addr.into(),
..Default::default()
}
}
/// Configures the device as a secondary device (functions 1-7) in a multifunction group.
#[allow(dead_code)]
pub fn as_multifunction_secondary(base_addr: impl Into<String>, function: u8) -> Self {
if function == 0 || function > 7 {
panic!("Function number must be between 1 and 7 for secondary devices");
}
Self {
is_multifunction_primary: false,
root_port_addr: format!("{}.0x{:x}", base_addr.into(), function),
..Default::default()
}
}
#[allow(dead_code)]
pub fn with_vfio_addr(mut self, addr: impl Into<String>) -> Self {
self.vfio_addr = addr.into();
self
}
#[allow(dead_code)]
pub fn with_vendor_id(mut self, vendor_id: impl Into<String>) -> Self {
self.x_pci_vendor_id = Some(vendor_id.into());
self
}
#[allow(dead_code)]
pub fn with_device_id(mut self, device_id: impl Into<String>) -> Self {
self.x_pci_device_id = Some(device_id.into());
self
}
}
/// Configuration for a group of VFIO devices, typically used to manage multiple
/// devices sharing the same PCI slot via multifunction support.
#[derive(Debug, Clone)]
pub struct VfioDeviceGroup {
/// Base PCI slot address (e.g., "0x5").
pub base_addr: String,
/// Identifier for the IOMMU file descriptor (IOMMUFD) backend.
#[allow(dead_code)]
pub iommufd: String,
/// Starting port number for the assigned PCIe root ports.
pub start_port: u16,
/// Starting chassis number for the assigned PCIe root ports.
pub start_chassis: u32,
/// List of host BDF (Bus-Device-Function) addresses.
pub devices: Vec<String>,
/// Indicates whether to enable PCI multifunction support for this group.
pub multifunction: bool,
}
impl VfioDeviceGroup {
pub fn new(
base_addr: impl Into<String>,
iommufd: impl Into<String>,
start_port: u16,
start_chassis: u32,
) -> Self {
Self {
base_addr: base_addr.into(),
iommufd: iommufd.into(),
start_port,
start_chassis,
devices: Vec::new(),
multifunction: false,
}
}
pub fn with_devices(mut self, devices: Vec<String>) -> Self {
self.devices = devices;
self
}
pub fn with_multifunction(mut self, multifunction: bool) -> Self {
self.multifunction = multifunction;
self
}
/// Generates a list of configuration objects for all devices in the group.
pub fn generate_configs(&self) -> Vec<VfioDeviceConfig> {
self.devices
.iter()
.enumerate()
.map(|(idx, bdf)| {
let port = self.start_port + idx as u16;
let chassis = self.start_chassis + idx as u32;
let addr = if idx == 0 && self.multifunction {
// Use the base address for the primary device (function 0)
self.base_addr.clone()
} else if self.multifunction && idx > 0 {
// Map subsequent devices to specific PCI functions (e.g., 0x5.0x1)
format!("{}.0x{:x}", self.base_addr, idx)
} else {
// In non-multifunction mode, use the base address independently
self.base_addr.clone()
};
VfioDeviceConfig::new(bdf, port, chassis)
.with_multifunction(idx == 0 && self.multifunction)
.with_root_port_addr(addr)
})
.collect()
}
}
@@ -2615,6 +2930,165 @@ impl<'a> QemuCmdLine<'a> {
.set_nvdimm(false);
}
pub fn add_iommufd(&mut self, id: impl Into<String>) -> Result<()> {
let id_str = id.into();
if !id_str.is_empty() {
let iommufd = ObjectIommufd::new(id_str);
self.devices.push(Box::new(iommufd));
}
Ok(())
}
/// add_vfio_device
/// "-object", "iommufd,id=iommufd0",
///
/// -device pcie-root-port,port=24,chassis=9,id=pci.9,bus=pcie.0,multifunction=on,addr=0x4
/// -device vfio-pci,host=0000:21:00.0,x-pci-vendor-id=0x10de,x-pci-device-id=0x2321,bus=pci.1,addr=0x0,iommufd=iommufd0
pub fn add_vfio_device(&mut self, config: VfioDeviceConfig) -> Result<()> {
self.add_iommufd("iommufd0")?;
let root_port_id = format!("pci.{}", config.chassis);
let root_port = PCIeRootPortDevice::new(&root_port_id, DEFAULT_PCIE_ROOT_BUS)
.with_port(config.port)
.with_chassis(config.chassis)
.with_multifunction(config.multifunction)
.with_addr(&config.root_port_addr);
let mut vfio_device = PCIeVfioDevice::new(&config.host_bdf, root_port_id, "iommufd0");
if let Some(vendor_id) = &config.x_pci_vendor_id {
vfio_device = vfio_device.with_vendor_id(vendor_id);
}
if let Some(device_id) = &config.x_pci_device_id {
vfio_device = vfio_device.with_device_id(device_id);
}
self.devices.reserve(2);
self.devices.push(Box::new(root_port));
self.devices.push(Box::new(vfio_device));
Ok(())
}
/// Configures PCIe VFIO devices using multifunction Root Ports for optimized address space
/// -device pcie-root-port,id=root_port1,multifunction=on,chassis=x,addr=z.0 \
/// -device pcie-root-port,id=root_port2,chassis=x1,addr=z.1 \
pub fn add_pcie_vfio_device(&mut self, config: VfioDeviceConfig) -> Result<()> {
let machine_type = &self.config.machine_info.machine_type;
let (_start_addr, multi_function) = match machine_type.as_str() {
"q35" | "virt" => (DEFAULT_START_ADDR, false),
_ => {
info!(
sl!(),
"PCIe root ports not supported for machine type: {}", machine_type
);
return Ok(());
}
};
let iommufd_name = format!("iommufd{}", config.bus);
self.add_iommufd(&iommufd_name)?;
let root_port_id = config.bus.clone();
let root_port = PCIeRootPortDevice::new(&root_port_id, DEFAULT_PCIE_ROOT_BUS)
.with_chassis(config.chassis)
.with_slot(config.port as u32)
.with_multifunction(multi_function)
.with_addr(format!("0x{:02x}", config.port));
info!(sl!(), "PCIe Root Port: {:?}", root_port.clone());
let mut vfio_device = PCIeVfioDevice::new(&config.host_bdf, root_port_id, &iommufd_name);
if let Some(vendor_id) = &config.x_pci_vendor_id {
vfio_device = vfio_device.with_vendor_id(vendor_id);
}
if let Some(device_id) = &config.x_pci_device_id {
vfio_device = vfio_device.with_device_id(device_id);
}
self.devices.reserve(2);
self.devices.push(Box::new(root_port));
self.devices.push(Box::new(vfio_device));
Ok(())
}
/// Batch adds multiple VFIO devices to the QEMU command line.
pub fn add_vfio_devices(
&mut self,
configs: Vec<VfioDeviceConfig>,
) -> Result<()> {
if configs.is_empty() {
return Ok(());
}
self.devices.reserve(configs.len() * 2);
for config in configs {
self.add_vfio_device(config)?;
}
Ok(())
}
/// Adds a group of VFIO devices that share the same PCI slot (Multifunction configuration).
pub fn add_vfio_device_group(&mut self, group: VfioDeviceGroup) -> Result<()> {
let configs = group.generate_configs();
self.add_vfio_devices(configs)
}
/// Convenience method to configure a standard high-performance GPU and NVSwitch topology.
#[allow(dead_code)]
pub fn add_gpu_nvswitch_setup(
&mut self,
gpus: Vec<&str>,
nvswitches: Vec<&str>,
iommufd: &str,
) -> Result<()> {
self.add_iommufd(iommufd)?;
if !gpus.is_empty() {
let gpu_group = VfioDeviceGroup::new("0x5", iommufd, 16, 1)
.with_devices(gpus.iter().map(|s| s.to_string()).collect())
.with_multifunction(true);
self.add_vfio_device_group(gpu_group)?;
}
if !nvswitches.is_empty() {
let nvswitch_configs: Vec<VfioDeviceConfig> = nvswitches
.iter()
.enumerate()
.map(|(idx, bdf)| {
let port = 24 + idx as u16;
let chassis = 9 + idx as u32;
let addr = if idx == 0 {
"0x4".to_string()
} else {
format!("0x4.0x{:x}", idx)
};
let full_bdf = if bdf.starts_with("0000:") {
bdf.to_string()
} else {
format!("0000:{}", bdf)
};
VfioDeviceConfig::new(full_bdf, port, chassis)
.with_multifunction(idx == 0)
.with_root_port_addr(addr)
})
.collect();
self.add_vfio_devices(nvswitch_configs)?;
}
Ok(())
}
/// Note: add_pcie_root_port and add_pcie_switch_port follow kata-runtime's related implementations of vfio devices.
/// The design origins from https://github.com/qemu/qemu/blob/master/docs/pcie.txt
///
@@ -2645,15 +3119,13 @@ impl<'a> QemuCmdLine<'a> {
pub fn add_pcie_root_ports(
&mut self,
root_ports: HashMap<u32, TopologyPortDevice>,
mem_reserve: u64,
pref64_reserve: u64,
) -> Result<()> {
if root_ports.is_empty() {
return Ok(());
}
let machine_type: &str = &self.config.machine_info.machine_type;
let (addr, multi_function) = match machine_type {
let machine_type = &self.config.machine_info.machine_type;
let (addr, multi_function) = match machine_type.as_str() {
"q35" | "virt" => ("0", false),
_ => {
info!(
@@ -2664,20 +3136,27 @@ impl<'a> QemuCmdLine<'a> {
}
};
// -device pcie-root-port,id=root_port1,multifunction=on,chassis=x,addr=z.0[,slot=y][,bus=pcie.0]
for (index, rp) in root_ports.iter() {
let (chassis, slot) = (format!("{}", index + 1), format!("{index}"));
let mut root_port_dev = PCIeRootPortDevice::new(
&rp.port_id(), // rpX
&rp.bus, // pcie.0
&chassis,
&slot,
multi_function,
addr,
);
root_port_dev
.set_mem_reserve(mem_reserve)
.set_pref64_reserve(pref64_reserve);
self.devices.reserve(root_ports.len());
for (index, rp) in root_ports {
// VFIO cold-plug (see `add_pcie_vfio_device`) runs before this when resource order
// is CDI VFIO then port pool; it already emits `pcie-root-port,id=rpN` for reserved
// slots (`TopologyPortDevice::allocated`). Skip placeholders for those IDs or QEMU
// errors with duplicate device id (e.g. two `id=rp0`).
if rp.allocated {
debug!(
sl!(),
"skip add_pcie_root_ports for {} (already allocated / emitted)",
rp.port_id()
);
continue;
}
let root_port_dev = PCIeRootPortDevice::new(rp.port_id(), &rp.bus)
.with_chassis(index + 1)
.with_slot(index)
.with_multifunction(multi_function)
.with_addr(addr);
self.devices.push(Box::new(root_port_dev));
}
@@ -2698,15 +3177,13 @@ impl<'a> QemuCmdLine<'a> {
pub fn add_pcie_switch_ports(
&mut self,
switch_ports: HashMap<u32, TopologyPortDevice>,
mem_reserve: u64,
pref64_reserve: u64,
) -> Result<()> {
if switch_ports.is_empty() {
return Ok(());
}
let machine_type = &self.config.machine_info.machine_type;
if machine_type != "q35" && machine_type != "virt" {
if !matches!(machine_type.as_str(), "q35" | "virt") {
info!(
sl!(),
"PCIe switch ports not supported for machine type: {}", machine_type
@@ -2714,43 +3191,41 @@ impl<'a> QemuCmdLine<'a> {
return Ok(());
}
for (index, rp) in switch_ports.iter() {
// 1. Create Root Port
// -device pcie-root-port,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z]
let estimated_devices: usize = switch_ports
.values()
.map(|rp| {
2 + rp
.connected_switch
.as_ref()
.map_or(0, |s| s.switch_ports.len())
})
.sum();
self.devices.reserve(estimated_devices);
for (index, rp) in switch_ports {
let chassis = index + 1;
// Root Port
let pcie_root_port = PCIeRootPortDevice::new(rp.port_id(), &rp.bus)
.with_chassis(chassis)
.with_slot(index)
.with_multifunction(false)
.with_addr("0");
// (slot, chassis) pair is mandatory and must be unique for each PCI Express Root Port
let (slot, chassis) = (format!("{index}"), format!("{}", index + 1));
let mut pcie_root_port = PCIeRootPortDevice::new(
&rp.port_id(),
&rp.bus, // pcie.0
&chassis,
&slot,
false,
"0",
);
pcie_root_port
.set_mem_reserve(mem_reserve)
.set_pref64_reserve(pref64_reserve);
self.devices.push(Box::new(pcie_root_port));
if let Some(switch) = &rp.connected_switch {
// 2. Create Upstream Port
// -device x3130-upstream,id=upstream_port1,bus=root_port1[,addr=x]
let upstream_port_id = switch.port_id();
let pcie_switch_upstream_port =
PCIeSwitchUpstreamPortDevice::new(&upstream_port_id, &switch.bus);
self.devices.push(Box::new(pcie_switch_upstream_port));
// Upstream Port
let upstream_port =
PCIeSwitchUpstreamPortDevice::new(switch.port_id(), &switch.bus);
self.devices.push(Box::new(upstream_port));
// 3. Create Downstream Ports
// -device xio3130-downstream,id=downstream_port1,bus=upstream_port1,chassis=x1,slot=y1[,addr=z1]]
let next_chassis = chassis.parse::<u32>()? + 1;
for (index, swdp) in switch.switch_ports.iter() {
let pcie_switch_downstream_port = PCIeSwitchDownstreamPortDevice::new(
&swdp.bus,
next_chassis + index,
*index,
);
self.devices.push(Box::new(pcie_switch_downstream_port));
// Downstream Ports
let next_chassis = chassis + 1;
for (idx, swdp) in &switch.switch_ports {
let downstream_port =
PCIeSwitchDownstreamPortDevice::new(&swdp.bus, next_chassis + idx, *idx);
self.devices.push(Box::new(downstream_port));
}
}
}