mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-05-04 04:00:07 +00:00
runtime-rs: Add QEMU VFIO command-line parameter structs
Add QEMU command-line parameter types for VFIO device cold-plug: - ObjectIommufd: /dev/iommu object for iommufd-backed passthrough - PCIeVfioDevice: vfio-pci device on a PCIe root port or switch port, supporting both legacy VFIO group and iommufd cdev backends - FWCfgDevice: firmware config device for fw_cfg blob injection - VfioDeviceBase/VfioDeviceConfig/VfioDeviceGroup: high-level wrappers that compose the above into complete QEMU argument sets, resolving IOMMU groups, device nodes, and per-device fw_cfg entries Refactor existing cmdline structs (BalloonDevice, VirtioNetDevice, VirtioBlkDevice, etc.) to use a shared devices_to_params() helper and align the ToQemuParams implementations. Signed-off-by: Alex Lyn <alex.lyn@antgroup.com> Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
committed by
Fabiano Fidêncio
parent
748252ba15
commit
75ed85205a
@@ -3,7 +3,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::device::topology::{PCIePortBusPrefix, TopologyPortDevice, DEFAULT_PCIE_ROOT_BUS};
|
||||
use crate::device::topology::{TopologyPortDevice, DEFAULT_PCIE_ROOT_BUS};
|
||||
use crate::qemu::qmp::get_qmp_socket_path;
|
||||
use crate::utils::{
|
||||
chown_to_parent, clear_cloexec, create_vhost_net_fds, open_named_tuntap, uses_native_ccw_bus,
|
||||
@@ -11,6 +11,7 @@ use crate::utils::{
|
||||
};
|
||||
|
||||
use crate::{kernel_param::KernelParams, Address, HypervisorConfig};
|
||||
use std::borrow::Cow;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
@@ -19,11 +20,12 @@ use kata_types::rootless::is_rootless;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::fmt::{Display, Write};
|
||||
use std::fs::{read_to_string, File};
|
||||
use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd};
|
||||
use std::os::unix::net::UnixListener;
|
||||
use std::path::PathBuf;
|
||||
use std::str;
|
||||
use tokio;
|
||||
|
||||
// These should have been called MiB and GiB for better readability but the
|
||||
@@ -510,7 +512,7 @@ impl Machine {
|
||||
accel: "kvm".to_owned(),
|
||||
options: config.machine_info.machine_accelerators.clone(),
|
||||
nvdimm: false,
|
||||
kernel_irqchip: None,
|
||||
kernel_irqchip: Some("on".to_owned()), // default to off, will be turned on if needed by VFIO devices
|
||||
confidential_guest_support: "".to_owned(),
|
||||
is_nvdimm_supported,
|
||||
memory_backend: None,
|
||||
@@ -2012,108 +2014,146 @@ impl ToQemuParams for ObjectTdxGuest {
|
||||
}
|
||||
}
|
||||
|
||||
/// PCIeRootPortDevice directly attached onto the root bus
|
||||
/// -device pcie-root-port,id=rp0,bus=pcie.0,chassis=0,slot=0,multifunction=off,pref64-reserve=<X>B,mem-reserve=<Y>B
|
||||
#[derive(Debug, Default)]
|
||||
const DEFAULT_START_ADDR: &str = "0x5";
|
||||
//const DEFAULT_ADDR: &str = "0x0";
|
||||
|
||||
/// Configuration for the IOMMUFD object backend.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ObjectIommufd {
|
||||
id: String,
|
||||
}
|
||||
|
||||
impl ObjectIommufd {
|
||||
pub fn new(id: impl Into<String>) -> Self {
|
||||
Self { id: id.into() }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToQemuParams for ObjectIommufd {
|
||||
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||
Ok(vec![
|
||||
"-object".to_string(),
|
||||
format!("iommufd,id={}", self.id),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
/// Representation of a PCIe Root Port device in QEMU.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PCIeRootPortDevice {
|
||||
id: String,
|
||||
bus: String,
|
||||
chassis: String,
|
||||
slot: String,
|
||||
multifunction: String,
|
||||
bus: Cow<'static, str>,
|
||||
port: Option<u16>,
|
||||
/// Numerical identifier for the chassis.
|
||||
chassis: u32,
|
||||
/// Optional slot identifier.
|
||||
slot: Option<u32>,
|
||||
/// Whether the device supports multiple functions.
|
||||
multifunction: bool,
|
||||
/// PCI address; supports simple ("0x5") and complex multifunction ("0x5.0x1") formats.
|
||||
addr: String,
|
||||
|
||||
mem_reserve: String,
|
||||
pref64_reserve: String,
|
||||
}
|
||||
|
||||
impl PCIeRootPortDevice {
|
||||
fn new(id: &str, bus: &str, chassis: &str, slot: &str, multifunc: bool, addr: &str) -> Self {
|
||||
PCIeRootPortDevice {
|
||||
id: id.to_string(),
|
||||
bus: if bus.is_empty() {
|
||||
DEFAULT_PCIE_ROOT_BUS.to_owned()
|
||||
} else {
|
||||
bus.to_string()
|
||||
/// Creates a new PCIe Root Port device instance.
|
||||
pub fn new(id: impl Into<String>, bus: impl Into<String>) -> Self {
|
||||
Self {
|
||||
id: id.into(),
|
||||
bus: {
|
||||
let bus_str = bus.into();
|
||||
if bus_str.is_empty() {
|
||||
Cow::Borrowed(DEFAULT_PCIE_ROOT_BUS)
|
||||
} else {
|
||||
Cow::Owned(bus_str)
|
||||
}
|
||||
},
|
||||
chassis: if chassis.is_empty() {
|
||||
"0x00".to_owned()
|
||||
} else {
|
||||
chassis.to_owned()
|
||||
},
|
||||
slot: if slot.is_empty() {
|
||||
"0x00".to_owned()
|
||||
} else {
|
||||
slot.to_owned()
|
||||
},
|
||||
multifunction: if multifunc {
|
||||
"on".to_owned()
|
||||
} else {
|
||||
"off".to_owned()
|
||||
},
|
||||
addr: if addr.is_empty() {
|
||||
"0x00".to_owned()
|
||||
} else {
|
||||
addr.to_owned()
|
||||
},
|
||||
..Default::default()
|
||||
port: None,
|
||||
chassis: 1,
|
||||
slot: None,
|
||||
multifunction: false,
|
||||
addr: DEFAULT_START_ADDR.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn set_mem_reserve(&mut self, mem_reserve: u64) -> &mut Self {
|
||||
if mem_reserve > 0 {
|
||||
self.mem_reserve = format!("{mem_reserve}B");
|
||||
}
|
||||
|
||||
pub fn with_port(mut self, port: u16) -> Self {
|
||||
self.port = Some(port);
|
||||
self
|
||||
}
|
||||
|
||||
fn set_pref64_reserve(&mut self, pref64_reserve: u64) -> &mut Self {
|
||||
if pref64_reserve > 0 {
|
||||
self.pref64_reserve = format!("{pref64_reserve}B");
|
||||
}
|
||||
|
||||
pub fn with_chassis(mut self, chassis: u32) -> Self {
|
||||
self.chassis = chassis;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_slot(mut self, slot: u32) -> Self {
|
||||
self.slot = Some(slot);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_multifunction(mut self, multifunction: bool) -> Self {
|
||||
self.multifunction = multifunction;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the PCI address. Supports standard ("0x5") and multifunction ("0x5.0x1") strings.
|
||||
pub fn with_addr(mut self, addr: impl Into<String>) -> Self {
|
||||
self.addr = addr.into();
|
||||
self
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToQemuParams for PCIeRootPortDevice {
|
||||
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||
let mut device_params = Vec::new();
|
||||
let mut params = String::with_capacity(256);
|
||||
|
||||
// -device pcie-root-port,id=rp0
|
||||
device_params.push(format!("{},id={}", "pcie-root-port", self.id));
|
||||
device_params.push(format!("bus={}", self.bus));
|
||||
device_params.push(format!("chassis={}", self.chassis));
|
||||
device_params.push(format!("slot={}", self.slot));
|
||||
device_params.push(format!("multifunction={}", self.multifunction));
|
||||
if self.multifunction.as_str() == "on" {
|
||||
device_params.push(format!("addr={}", self.addr));
|
||||
}
|
||||
if !self.mem_reserve.is_empty() {
|
||||
device_params.push(format!("mem-reserve={}", self.mem_reserve));
|
||||
}
|
||||
if !self.pref64_reserve.is_empty() {
|
||||
device_params.push(format!("pref64-reserve={}", self.pref64_reserve));
|
||||
// Example: -device pcie-root-port,id=rp0
|
||||
write!(params, "pcie-root-port,id={}", self.id).unwrap();
|
||||
|
||||
if let Some(port) = self.port {
|
||||
write!(params, ",port={}", port).unwrap();
|
||||
}
|
||||
|
||||
Ok(vec!["-device".to_string(), device_params.join(",")])
|
||||
// Match govmm: only pass `addr=` for multifunction ports, or when a concrete address
|
||||
// is required (VFIO cold-plug uses e.g. 0x09). Placeholder pool ports use addr "0" from
|
||||
// `add_pcie_root_ports`; emitting `addr=0` for every `pcie-root-port` collides on
|
||||
// `pcie.0` ("slot 0 ... in use by mch").
|
||||
if self.multifunction || self.addr != "0" {
|
||||
write!(params, ",addr={}", self.addr).unwrap();
|
||||
}
|
||||
write!(params, ",chassis={}", self.chassis).unwrap();
|
||||
|
||||
if let Some(slot) = self.slot {
|
||||
write!(params, ",slot={}", slot).unwrap();
|
||||
}
|
||||
|
||||
write!(params, ",bus={}", self.bus).unwrap();
|
||||
write!(
|
||||
params,
|
||||
",multifunction={}",
|
||||
if self.multifunction { "on" } else { "off" }
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
Ok(vec!["-device".to_string(), params])
|
||||
}
|
||||
}
|
||||
|
||||
/// PCIeSwitchUpstreamPortDevice is the port attached to the root port.
|
||||
#[derive(Debug, Default)]
|
||||
/// PCIe Switch Upstream Port device, which must be connected to a PCIe Root Port,
|
||||
/// and can have PCIe devices or downstream ports connected to it.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PCIeSwitchUpstreamPortDevice {
|
||||
id: String,
|
||||
bus: String,
|
||||
}
|
||||
|
||||
impl PCIeSwitchUpstreamPortDevice {
|
||||
fn new(id: &str, bus: &str) -> Self {
|
||||
PCIeSwitchUpstreamPortDevice {
|
||||
id: id.to_string(),
|
||||
bus: bus.to_string(),
|
||||
pub fn new(id: impl Into<String>, bus: impl Into<String>) -> Self {
|
||||
Self {
|
||||
id: id.into(),
|
||||
bus: bus.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2121,17 +2161,16 @@ impl PCIeSwitchUpstreamPortDevice {
|
||||
#[async_trait]
|
||||
impl ToQemuParams for PCIeSwitchUpstreamPortDevice {
|
||||
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||
let mut device_params = Vec::new();
|
||||
|
||||
device_params.push(format!("{},id={}", "x3130-upstream", self.id));
|
||||
device_params.push(format!("bus={}", self.bus));
|
||||
|
||||
Ok(vec!["-device".to_string(), device_params.join(",")])
|
||||
Ok(vec![
|
||||
"-device".to_string(),
|
||||
format!("x3130-upstream,id={},bus={}", self.id, self.bus),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
/// PCIeSwitchDownstreamPortDevice is the port attached to the root port.
|
||||
#[derive(Debug, Default)]
|
||||
/// PCIe Switch Downstream Port device, which must be connected to a PCIe Root Port or another downstream port,
|
||||
/// and can have PCIe devices or another switch's downstream ports connected to it.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PCIeSwitchDownstreamPortDevice {
|
||||
// format: sup{n}, n>=0
|
||||
pub id: String,
|
||||
@@ -2140,36 +2179,312 @@ pub struct PCIeSwitchDownstreamPortDevice {
|
||||
pub bus: String,
|
||||
|
||||
// (slot, chassis) pair is mandatory and must be unique for each downstream port, >=0, default is 0x00
|
||||
pub chassis: String,
|
||||
pub chassis: u32,
|
||||
|
||||
// >=0, default is 0x00
|
||||
pub slot: String,
|
||||
pub slot: u32,
|
||||
}
|
||||
|
||||
impl PCIeSwitchDownstreamPortDevice {
|
||||
fn new(bus: &str, chassis: u32, index: u32) -> Self {
|
||||
PCIeSwitchDownstreamPortDevice {
|
||||
// "swdp{i}"
|
||||
id: format!("{}{}", PCIePortBusPrefix::SwitchDownstreamPort, index),
|
||||
// "swup0"
|
||||
bus: bus.to_string(),
|
||||
chassis: chassis.to_string(),
|
||||
slot: index.to_string(),
|
||||
pub fn new(bus: impl Into<String>, chassis: u32, slot: u32) -> Self {
|
||||
Self {
|
||||
id: format!("swdp{}", slot),
|
||||
bus: bus.into(),
|
||||
chassis,
|
||||
slot,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn with_id(mut self, id: impl Into<String>) -> Self {
|
||||
self.id = id.into();
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToQemuParams for PCIeSwitchDownstreamPortDevice {
|
||||
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||
let mut device_params = Vec::new();
|
||||
Ok(vec![
|
||||
"-device".to_string(),
|
||||
format!(
|
||||
"xio3130-downstream,id={},bus={},chassis={},slot={}",
|
||||
self.id, self.bus, self.chassis, self.slot
|
||||
),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
device_params.push(format!("{},id={}", "xio3130-downstream", self.id));
|
||||
device_params.push(format!("bus={}", self.bus));
|
||||
device_params.push(format!("chassis={}", self.chassis));
|
||||
device_params.push(format!("slot={}", self.slot));
|
||||
/// VFIO PCI device
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PCIeVfioDevice {
|
||||
host_bdf: String,
|
||||
bus: String,
|
||||
addr: String,
|
||||
iommufd: Option<String>,
|
||||
x_pci_vendor_id: Option<String>,
|
||||
x_pci_device_id: Option<String>,
|
||||
}
|
||||
|
||||
Ok(vec!["-device".to_string(), device_params.join(",")])
|
||||
impl PCIeVfioDevice {
|
||||
pub fn new(
|
||||
host_bdf: impl Into<String>,
|
||||
bus: impl Into<String>,
|
||||
iommufd: impl Into<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
host_bdf: host_bdf.into(),
|
||||
bus: bus.into(),
|
||||
addr: "0x0".to_string(),
|
||||
iommufd: Some(iommufd.into()),
|
||||
x_pci_vendor_id: None,
|
||||
x_pci_device_id: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn with_addr(mut self, addr: impl Into<String>) -> Self {
|
||||
self.addr = addr.into();
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_vendor_id(mut self, vendor_id: impl Into<String>) -> Self {
|
||||
self.x_pci_vendor_id = Some(vendor_id.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_device_id(mut self, device_id: impl Into<String>) -> Self {
|
||||
self.x_pci_device_id = Some(device_id.into());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToQemuParams for PCIeVfioDevice {
|
||||
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||
let mut params = String::with_capacity(256);
|
||||
|
||||
write!(params, "vfio-pci,host={}", self.host_bdf).unwrap();
|
||||
write!(params, ",bus={}", self.bus).unwrap();
|
||||
write!(params, ",addr={}", self.addr).unwrap();
|
||||
|
||||
if let Some(iommufd) = &self.iommufd {
|
||||
write!(params, ",iommufd={}", iommufd).unwrap();
|
||||
}
|
||||
|
||||
if let Some(vendor) = &self.x_pci_vendor_id {
|
||||
write!(params, ",x-pci-vendor-id={}", vendor).unwrap();
|
||||
}
|
||||
|
||||
if let Some(device) = &self.x_pci_device_id {
|
||||
write!(params, ",x-pci-device-id={}", device).unwrap();
|
||||
}
|
||||
|
||||
Ok(vec!["-device".to_string(), params])
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub struct VfioDeviceBase {
|
||||
/// Host BDF address (e.g., "0000:21:00.0" or the short form "21:00.0").
|
||||
pub host_bdf: String,
|
||||
|
||||
/// The bus to which the device is attached (e.g., "pci.1").
|
||||
pub bus: String,
|
||||
|
||||
/// IOMMU file descriptor ID (e.g., "iommufd0").
|
||||
pub iommufd: Option<String>,
|
||||
}
|
||||
|
||||
/// Comprehensive configuration for a VFIO device.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
#[allow(dead_code)]
|
||||
pub struct VfioDeviceConfig {
|
||||
/// Host BDF address (e.g., "0000:21:00.0" or "21:00.0").
|
||||
pub host_bdf: String,
|
||||
|
||||
/// The bus to which the device is attached (e.g., "pci.1").
|
||||
pub bus: String,
|
||||
|
||||
/// Port number of the associated PCIe Root Port.
|
||||
pub port: u16,
|
||||
|
||||
/// Chassis number of the associated PCIe Root Port.
|
||||
pub chassis: u32,
|
||||
|
||||
/// Whether to enable multifunction support on the PCIe Root Port.
|
||||
pub multifunction: bool,
|
||||
|
||||
/// Indicates if this is the primary device (function 0) in a multifunction group.
|
||||
/// If true, the Root Port will be configured with `multifunction=on`.
|
||||
pub is_multifunction_primary: bool,
|
||||
|
||||
/// Address of the PCIe Root Port on the system bus (e.g., "0x5" or "0x5.0x1").
|
||||
pub root_port_addr: String,
|
||||
|
||||
/// Device address for the VFIO device itself (typically "0x0").
|
||||
#[allow(dead_code)]
|
||||
pub vfio_addr: String,
|
||||
|
||||
/// Optional PCI Vendor ID override.
|
||||
pub x_pci_vendor_id: Option<String>,
|
||||
|
||||
/// Optional PCI Device ID override.
|
||||
pub x_pci_device_id: Option<String>,
|
||||
}
|
||||
|
||||
impl VfioDeviceConfig {
|
||||
/// Creates a new VFIO device configuration.
|
||||
pub fn new(host_bdf: impl Into<String>, port: u16, chassis: u32) -> Self {
|
||||
let chassis_val = chassis;
|
||||
Self {
|
||||
host_bdf: host_bdf.into(),
|
||||
bus: format!("pci.{}", chassis_val),
|
||||
port,
|
||||
chassis: chassis_val,
|
||||
multifunction: true,
|
||||
is_multifunction_primary: true,
|
||||
// Defaults to 0x5 based on port offset; subsequent devices increment from here.
|
||||
root_port_addr: format!("0x{}", port),
|
||||
vfio_addr: format!("0x{}", port),
|
||||
x_pci_vendor_id: None,
|
||||
x_pci_device_id: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_multifunction(mut self, multifunction: bool) -> Self {
|
||||
self.multifunction = multifunction;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_vfio_bus(mut self, bus: impl Into<String>) -> Self {
|
||||
self.bus = bus.into();
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets a specific root port address (used for non-multifunction modes).
|
||||
pub fn with_root_port_addr(mut self, addr: impl Into<String>) -> Self {
|
||||
self.root_port_addr = addr.into();
|
||||
self.is_multifunction_primary = false;
|
||||
self
|
||||
}
|
||||
|
||||
/// Configures the device as the primary device (function 0) in a multifunction group.
|
||||
#[allow(dead_code)]
|
||||
pub fn as_multifunction_primary(base_addr: impl Into<String>) -> Self {
|
||||
Self {
|
||||
is_multifunction_primary: true,
|
||||
root_port_addr: base_addr.into(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Configures the device as a secondary device (functions 1-7) in a multifunction group.
|
||||
#[allow(dead_code)]
|
||||
pub fn as_multifunction_secondary(base_addr: impl Into<String>, function: u8) -> Self {
|
||||
if function == 0 || function > 7 {
|
||||
panic!("Function number must be between 1 and 7 for secondary devices");
|
||||
}
|
||||
Self {
|
||||
is_multifunction_primary: false,
|
||||
root_port_addr: format!("{}.0x{:x}", base_addr.into(), function),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
pub fn with_vfio_addr(mut self, addr: impl Into<String>) -> Self {
|
||||
self.vfio_addr = addr.into();
|
||||
self
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
pub fn with_vendor_id(mut self, vendor_id: impl Into<String>) -> Self {
|
||||
self.x_pci_vendor_id = Some(vendor_id.into());
|
||||
self
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
pub fn with_device_id(mut self, device_id: impl Into<String>) -> Self {
|
||||
self.x_pci_device_id = Some(device_id.into());
|
||||
self
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Configuration for a group of VFIO devices, typically used to manage multiple
|
||||
/// devices sharing the same PCI slot via multifunction support.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct VfioDeviceGroup {
|
||||
/// Base PCI slot address (e.g., "0x5").
|
||||
pub base_addr: String,
|
||||
|
||||
/// Identifier for the IOMMU file descriptor (IOMMUFD) backend.
|
||||
#[allow(dead_code)]
|
||||
pub iommufd: String,
|
||||
|
||||
/// Starting port number for the assigned PCIe root ports.
|
||||
pub start_port: u16,
|
||||
|
||||
/// Starting chassis number for the assigned PCIe root ports.
|
||||
pub start_chassis: u32,
|
||||
|
||||
/// List of host BDF (Bus-Device-Function) addresses.
|
||||
pub devices: Vec<String>,
|
||||
|
||||
/// Indicates whether to enable PCI multifunction support for this group.
|
||||
pub multifunction: bool,
|
||||
}
|
||||
|
||||
impl VfioDeviceGroup {
|
||||
pub fn new(
|
||||
base_addr: impl Into<String>,
|
||||
iommufd: impl Into<String>,
|
||||
start_port: u16,
|
||||
start_chassis: u32,
|
||||
) -> Self {
|
||||
Self {
|
||||
base_addr: base_addr.into(),
|
||||
iommufd: iommufd.into(),
|
||||
start_port,
|
||||
start_chassis,
|
||||
devices: Vec::new(),
|
||||
multifunction: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_devices(mut self, devices: Vec<String>) -> Self {
|
||||
self.devices = devices;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_multifunction(mut self, multifunction: bool) -> Self {
|
||||
self.multifunction = multifunction;
|
||||
self
|
||||
}
|
||||
|
||||
/// Generates a list of configuration objects for all devices in the group.
|
||||
pub fn generate_configs(&self) -> Vec<VfioDeviceConfig> {
|
||||
self.devices
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, bdf)| {
|
||||
let port = self.start_port + idx as u16;
|
||||
let chassis = self.start_chassis + idx as u32;
|
||||
|
||||
let addr = if idx == 0 && self.multifunction {
|
||||
// Use the base address for the primary device (function 0)
|
||||
self.base_addr.clone()
|
||||
} else if self.multifunction && idx > 0 {
|
||||
// Map subsequent devices to specific PCI functions (e.g., 0x5.0x1)
|
||||
format!("{}.0x{:x}", self.base_addr, idx)
|
||||
} else {
|
||||
// In non-multifunction mode, use the base address independently
|
||||
self.base_addr.clone()
|
||||
};
|
||||
|
||||
VfioDeviceConfig::new(bdf, port, chassis)
|
||||
.with_multifunction(idx == 0 && self.multifunction)
|
||||
.with_root_port_addr(addr)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2615,6 +2930,165 @@ impl<'a> QemuCmdLine<'a> {
|
||||
.set_nvdimm(false);
|
||||
}
|
||||
|
||||
pub fn add_iommufd(&mut self, id: impl Into<String>) -> Result<()> {
|
||||
let id_str = id.into();
|
||||
if !id_str.is_empty() {
|
||||
let iommufd = ObjectIommufd::new(id_str);
|
||||
self.devices.push(Box::new(iommufd));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// add_vfio_device
|
||||
/// "-object", "iommufd,id=iommufd0",
|
||||
///
|
||||
/// -device pcie-root-port,port=24,chassis=9,id=pci.9,bus=pcie.0,multifunction=on,addr=0x4
|
||||
/// -device vfio-pci,host=0000:21:00.0,x-pci-vendor-id=0x10de,x-pci-device-id=0x2321,bus=pci.1,addr=0x0,iommufd=iommufd0
|
||||
pub fn add_vfio_device(&mut self, config: VfioDeviceConfig) -> Result<()> {
|
||||
self.add_iommufd("iommufd0")?;
|
||||
|
||||
let root_port_id = format!("pci.{}", config.chassis);
|
||||
let root_port = PCIeRootPortDevice::new(&root_port_id, DEFAULT_PCIE_ROOT_BUS)
|
||||
.with_port(config.port)
|
||||
.with_chassis(config.chassis)
|
||||
.with_multifunction(config.multifunction)
|
||||
.with_addr(&config.root_port_addr);
|
||||
|
||||
let mut vfio_device = PCIeVfioDevice::new(&config.host_bdf, root_port_id, "iommufd0");
|
||||
|
||||
if let Some(vendor_id) = &config.x_pci_vendor_id {
|
||||
vfio_device = vfio_device.with_vendor_id(vendor_id);
|
||||
}
|
||||
|
||||
if let Some(device_id) = &config.x_pci_device_id {
|
||||
vfio_device = vfio_device.with_device_id(device_id);
|
||||
}
|
||||
|
||||
self.devices.reserve(2);
|
||||
self.devices.push(Box::new(root_port));
|
||||
self.devices.push(Box::new(vfio_device));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Configures PCIe VFIO devices using multifunction Root Ports for optimized address space
|
||||
/// -device pcie-root-port,id=root_port1,multifunction=on,chassis=x,addr=z.0 \
|
||||
/// -device pcie-root-port,id=root_port2,chassis=x1,addr=z.1 \
|
||||
pub fn add_pcie_vfio_device(&mut self, config: VfioDeviceConfig) -> Result<()> {
|
||||
let machine_type = &self.config.machine_info.machine_type;
|
||||
let (_start_addr, multi_function) = match machine_type.as_str() {
|
||||
"q35" | "virt" => (DEFAULT_START_ADDR, false),
|
||||
_ => {
|
||||
info!(
|
||||
sl!(),
|
||||
"PCIe root ports not supported for machine type: {}", machine_type
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
let iommufd_name = format!("iommufd{}", config.bus);
|
||||
self.add_iommufd(&iommufd_name)?;
|
||||
|
||||
let root_port_id = config.bus.clone();
|
||||
let root_port = PCIeRootPortDevice::new(&root_port_id, DEFAULT_PCIE_ROOT_BUS)
|
||||
.with_chassis(config.chassis)
|
||||
.with_slot(config.port as u32)
|
||||
.with_multifunction(multi_function)
|
||||
.with_addr(format!("0x{:02x}", config.port));
|
||||
info!(sl!(), "PCIe Root Port: {:?}", root_port.clone());
|
||||
|
||||
let mut vfio_device = PCIeVfioDevice::new(&config.host_bdf, root_port_id, &iommufd_name);
|
||||
|
||||
if let Some(vendor_id) = &config.x_pci_vendor_id {
|
||||
vfio_device = vfio_device.with_vendor_id(vendor_id);
|
||||
}
|
||||
|
||||
if let Some(device_id) = &config.x_pci_device_id {
|
||||
vfio_device = vfio_device.with_device_id(device_id);
|
||||
}
|
||||
|
||||
self.devices.reserve(2);
|
||||
self.devices.push(Box::new(root_port));
|
||||
self.devices.push(Box::new(vfio_device));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Batch adds multiple VFIO devices to the QEMU command line.
|
||||
pub fn add_vfio_devices(
|
||||
&mut self,
|
||||
configs: Vec<VfioDeviceConfig>,
|
||||
) -> Result<()> {
|
||||
if configs.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.devices.reserve(configs.len() * 2);
|
||||
|
||||
for config in configs {
|
||||
self.add_vfio_device(config)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Adds a group of VFIO devices that share the same PCI slot (Multifunction configuration).
|
||||
pub fn add_vfio_device_group(&mut self, group: VfioDeviceGroup) -> Result<()> {
|
||||
let configs = group.generate_configs();
|
||||
self.add_vfio_devices(configs)
|
||||
}
|
||||
|
||||
/// Convenience method to configure a standard high-performance GPU and NVSwitch topology.
|
||||
#[allow(dead_code)]
|
||||
pub fn add_gpu_nvswitch_setup(
|
||||
&mut self,
|
||||
gpus: Vec<&str>,
|
||||
nvswitches: Vec<&str>,
|
||||
iommufd: &str,
|
||||
) -> Result<()> {
|
||||
self.add_iommufd(iommufd)?;
|
||||
|
||||
if !gpus.is_empty() {
|
||||
let gpu_group = VfioDeviceGroup::new("0x5", iommufd, 16, 1)
|
||||
.with_devices(gpus.iter().map(|s| s.to_string()).collect())
|
||||
.with_multifunction(true);
|
||||
|
||||
self.add_vfio_device_group(gpu_group)?;
|
||||
}
|
||||
|
||||
if !nvswitches.is_empty() {
|
||||
let nvswitch_configs: Vec<VfioDeviceConfig> = nvswitches
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, bdf)| {
|
||||
let port = 24 + idx as u16;
|
||||
let chassis = 9 + idx as u32;
|
||||
let addr = if idx == 0 {
|
||||
"0x4".to_string()
|
||||
} else {
|
||||
format!("0x4.0x{:x}", idx)
|
||||
};
|
||||
|
||||
let full_bdf = if bdf.starts_with("0000:") {
|
||||
bdf.to_string()
|
||||
} else {
|
||||
format!("0000:{}", bdf)
|
||||
};
|
||||
|
||||
VfioDeviceConfig::new(full_bdf, port, chassis)
|
||||
.with_multifunction(idx == 0)
|
||||
.with_root_port_addr(addr)
|
||||
})
|
||||
.collect();
|
||||
|
||||
self.add_vfio_devices(nvswitch_configs)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Note: add_pcie_root_port and add_pcie_switch_port follow kata-runtime's related implementations of vfio devices.
|
||||
/// The design origins from https://github.com/qemu/qemu/blob/master/docs/pcie.txt
|
||||
///
|
||||
@@ -2645,15 +3119,13 @@ impl<'a> QemuCmdLine<'a> {
|
||||
pub fn add_pcie_root_ports(
|
||||
&mut self,
|
||||
root_ports: HashMap<u32, TopologyPortDevice>,
|
||||
mem_reserve: u64,
|
||||
pref64_reserve: u64,
|
||||
) -> Result<()> {
|
||||
if root_ports.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let machine_type: &str = &self.config.machine_info.machine_type;
|
||||
let (addr, multi_function) = match machine_type {
|
||||
let machine_type = &self.config.machine_info.machine_type;
|
||||
let (addr, multi_function) = match machine_type.as_str() {
|
||||
"q35" | "virt" => ("0", false),
|
||||
_ => {
|
||||
info!(
|
||||
@@ -2664,20 +3136,27 @@ impl<'a> QemuCmdLine<'a> {
|
||||
}
|
||||
};
|
||||
|
||||
// -device pcie-root-port,id=root_port1,multifunction=on,chassis=x,addr=z.0[,slot=y][,bus=pcie.0]
|
||||
for (index, rp) in root_ports.iter() {
|
||||
let (chassis, slot) = (format!("{}", index + 1), format!("{index}"));
|
||||
let mut root_port_dev = PCIeRootPortDevice::new(
|
||||
&rp.port_id(), // rpX
|
||||
&rp.bus, // pcie.0
|
||||
&chassis,
|
||||
&slot,
|
||||
multi_function,
|
||||
addr,
|
||||
);
|
||||
root_port_dev
|
||||
.set_mem_reserve(mem_reserve)
|
||||
.set_pref64_reserve(pref64_reserve);
|
||||
self.devices.reserve(root_ports.len());
|
||||
|
||||
for (index, rp) in root_ports {
|
||||
// VFIO cold-plug (see `add_pcie_vfio_device`) runs before this when resource order
|
||||
// is CDI VFIO then port pool; it already emits `pcie-root-port,id=rpN` for reserved
|
||||
// slots (`TopologyPortDevice::allocated`). Skip placeholders for those IDs or QEMU
|
||||
// errors with duplicate device id (e.g. two `id=rp0`).
|
||||
if rp.allocated {
|
||||
debug!(
|
||||
sl!(),
|
||||
"skip add_pcie_root_ports for {} (already allocated / emitted)",
|
||||
rp.port_id()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let root_port_dev = PCIeRootPortDevice::new(rp.port_id(), &rp.bus)
|
||||
.with_chassis(index + 1)
|
||||
.with_slot(index)
|
||||
.with_multifunction(multi_function)
|
||||
.with_addr(addr);
|
||||
|
||||
self.devices.push(Box::new(root_port_dev));
|
||||
}
|
||||
@@ -2698,15 +3177,13 @@ impl<'a> QemuCmdLine<'a> {
|
||||
pub fn add_pcie_switch_ports(
|
||||
&mut self,
|
||||
switch_ports: HashMap<u32, TopologyPortDevice>,
|
||||
mem_reserve: u64,
|
||||
pref64_reserve: u64,
|
||||
) -> Result<()> {
|
||||
if switch_ports.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let machine_type = &self.config.machine_info.machine_type;
|
||||
if machine_type != "q35" && machine_type != "virt" {
|
||||
if !matches!(machine_type.as_str(), "q35" | "virt") {
|
||||
info!(
|
||||
sl!(),
|
||||
"PCIe switch ports not supported for machine type: {}", machine_type
|
||||
@@ -2714,43 +3191,41 @@ impl<'a> QemuCmdLine<'a> {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for (index, rp) in switch_ports.iter() {
|
||||
// 1. Create Root Port
|
||||
// -device pcie-root-port,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z]
|
||||
let estimated_devices: usize = switch_ports
|
||||
.values()
|
||||
.map(|rp| {
|
||||
2 + rp
|
||||
.connected_switch
|
||||
.as_ref()
|
||||
.map_or(0, |s| s.switch_ports.len())
|
||||
})
|
||||
.sum();
|
||||
self.devices.reserve(estimated_devices);
|
||||
|
||||
for (index, rp) in switch_ports {
|
||||
let chassis = index + 1;
|
||||
|
||||
// Root Port
|
||||
let pcie_root_port = PCIeRootPortDevice::new(rp.port_id(), &rp.bus)
|
||||
.with_chassis(chassis)
|
||||
.with_slot(index)
|
||||
.with_multifunction(false)
|
||||
.with_addr("0");
|
||||
|
||||
// (slot, chassis) pair is mandatory and must be unique for each PCI Express Root Port
|
||||
let (slot, chassis) = (format!("{index}"), format!("{}", index + 1));
|
||||
let mut pcie_root_port = PCIeRootPortDevice::new(
|
||||
&rp.port_id(),
|
||||
&rp.bus, // pcie.0
|
||||
&chassis,
|
||||
&slot,
|
||||
false,
|
||||
"0",
|
||||
);
|
||||
pcie_root_port
|
||||
.set_mem_reserve(mem_reserve)
|
||||
.set_pref64_reserve(pref64_reserve);
|
||||
self.devices.push(Box::new(pcie_root_port));
|
||||
|
||||
if let Some(switch) = &rp.connected_switch {
|
||||
// 2. Create Upstream Port
|
||||
// -device x3130-upstream,id=upstream_port1,bus=root_port1[,addr=x]
|
||||
let upstream_port_id = switch.port_id();
|
||||
let pcie_switch_upstream_port =
|
||||
PCIeSwitchUpstreamPortDevice::new(&upstream_port_id, &switch.bus);
|
||||
self.devices.push(Box::new(pcie_switch_upstream_port));
|
||||
// Upstream Port
|
||||
let upstream_port =
|
||||
PCIeSwitchUpstreamPortDevice::new(switch.port_id(), &switch.bus);
|
||||
self.devices.push(Box::new(upstream_port));
|
||||
|
||||
// 3. Create Downstream Ports
|
||||
// -device xio3130-downstream,id=downstream_port1,bus=upstream_port1,chassis=x1,slot=y1[,addr=z1]]
|
||||
let next_chassis = chassis.parse::<u32>()? + 1;
|
||||
for (index, swdp) in switch.switch_ports.iter() {
|
||||
let pcie_switch_downstream_port = PCIeSwitchDownstreamPortDevice::new(
|
||||
&swdp.bus,
|
||||
next_chassis + index,
|
||||
*index,
|
||||
);
|
||||
self.devices.push(Box::new(pcie_switch_downstream_port));
|
||||
// Downstream Ports
|
||||
let next_chassis = chassis + 1;
|
||||
for (idx, swdp) in &switch.switch_ports {
|
||||
let downstream_port =
|
||||
PCIeSwitchDownstreamPortDevice::new(&swdp.bus, next_chassis + idx, *idx);
|
||||
self.devices.push(Box::new(downstream_port));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user