kata-sys-util: Add PCI helpers for VFIO cold-plug paths

The VFIO cold-plug path needs to resolve a PCI device's sysfs address
from its /dev/vfio/ group or iommufd cdev node. Extend the PCI helpers
in kata-sys-util to support this: add a function that walks
/sys/bus/pci/devices to find a device by its IOMMU group, and expose the
guest BDF that the QEMU command line will reference.

These helpers are consumed by the runtime-rs hypervisor crate when
building VFIO device descriptors for the QEMU command line.

Signed-off-by: Alex Lyn <alex.lyn@antgroup.com>
Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
Alex Lyn
2026-04-04 11:33:23 +02:00
committed by Fabiano Fidêncio
parent f95503b4ab
commit c00e46bcbc
6 changed files with 4 additions and 470 deletions

41
Cargo.lock generated
View File

@@ -1860,12 +1860,6 @@ dependencies = [
"serde_json",
]
[[package]]
name = "downcast"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1"
[[package]]
name = "dragonball"
version = "0.1.0"
@@ -2272,12 +2266,6 @@ dependencies = [
"num",
]
[[package]]
name = "fragile"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28dd6caf6059519a65843af8fe2a3ae298b14b80179855aeb4adc2c1934ee619"
[[package]]
name = "fs2"
version = "0.4.3"
@@ -3684,7 +3672,6 @@ dependencies = [
"kata-types",
"lazy_static",
"libc",
"mockall",
"nix 0.26.4",
"oci-spec 0.8.4",
"pci-ids",
@@ -4214,32 +4201,6 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "mockall"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39a6bfcc6c8c7eed5ee98b9c3e33adc726054389233e201c95dab2d41a3839d2"
dependencies = [
"cfg-if 1.0.4",
"downcast",
"fragile",
"mockall_derive",
"predicates",
"predicates-tree",
]
[[package]]
name = "mockall_derive"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25ca3004c2efe9011bd4e461bd8256445052b9615405b4f7ea43fc8ca5c20898"
dependencies = [
"cfg-if 1.0.4",
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "msvc_spectre_libs"
version = "0.1.3"
@@ -5725,7 +5686,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
dependencies = [
"anyhow",
"itertools",
"itertools 0.10.5",
"proc-macro2",
"quote",
"syn 2.0.117",

View File

@@ -25,8 +25,7 @@ subprocess = "0.2.8"
rand = { version = "0.10.1", features = ["std", "std_rng", "thread_rng"] }
thiserror = "1.0.30"
hex = "0.4.3"
pci-ids = "0.2.5"
mockall = "0.13.1"
pci-ids = "0.2.6"
kata-types = { path = "../kata-types" }
oci-spec = { version = "0.8.1", features = ["runtime"] }

View File

@@ -1,160 +0,0 @@
// Copyright (c) 2024 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
#![allow(dead_code)]
use super::pci_manager::{MemoryResourceTrait, PCIDevice, PCIDeviceManager, PCIDevices};
const PCI_DEVICES_ROOT: &str = "/sys/bus/pci/devices";
const PCI_NVIDIA_VENDOR_ID: u16 = 0x10DE;
const PCI3D_CONTROLLER_CLASS: u32 = 0x030200;
struct NvidiaPCIDevice {
vendor_id: u16,
class_id: u32,
}
impl NvidiaPCIDevice {
pub fn new(vendor_id: u16, class_id: u32) -> Self {
Self {
vendor_id,
class_id,
}
}
pub fn get_bars_max_addressable_memory(&self) -> (u64, u64) {
let mut max_32bit = 2 * 1024 * 1024;
let mut max_64bit = 2 * 1024 * 1024;
let nvgpu_devices = self.get_pci_devices(Some(self.vendor_id));
for dev in nvgpu_devices {
let (mem_size_32bit, mem_size_64bit) = dev.resources.get_total_addressable_memory(true);
if max_32bit < mem_size_32bit {
max_32bit = mem_size_32bit;
}
if max_64bit < mem_size_64bit {
max_64bit = mem_size_64bit;
}
}
(max_32bit * 2, max_64bit)
}
fn is_vga_controller(&self, device: &PCIDevice) -> bool {
self.class_id == device.class
}
fn is_3d_controller(&self, device: &PCIDevice) -> bool {
self.class_id == device.class
}
fn is_gpu(&self, device: &PCIDevice) -> bool {
self.is_vga_controller(device) || self.is_3d_controller(device)
}
}
impl PCIDevices for NvidiaPCIDevice {
fn get_pci_devices(&self, vendor: Option<u16>) -> Vec<PCIDevice> {
let mut nvidia_devices: Vec<PCIDevice> = Vec::new();
let devices = PCIDeviceManager::new(PCI_DEVICES_ROOT)
.get_all_devices(vendor)
.unwrap_or_else(|_| vec![]);
for dev in devices.iter() {
if self.is_gpu(dev) {
nvidia_devices.push(dev.clone());
}
}
nvidia_devices
}
}
pub fn get_bars_max_addressable_memory() -> (u64, u64) {
let nvdevice = NvidiaPCIDevice::new(PCI_NVIDIA_VENDOR_ID, PCI3D_CONTROLLER_CLASS);
let (max_32bit, max_64bit) = nvdevice.get_bars_max_addressable_memory();
(max_32bit, max_64bit)
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use std::path::PathBuf;
use super::*;
use crate::pcilibs::pci_manager::{
MemoryResource, MemoryResources, MockPCIDevices, PCI_BASE_ADDRESS_MEM_TYPE32,
PCI_BASE_ADDRESS_MEM_TYPE64,
};
use mockall::predicate::*;
#[test]
fn test_get_bars_max_addressable_memory() {
let pci_device = PCIDevice {
device_path: PathBuf::new(),
address: "0000:00:00.0".to_string(),
vendor: PCI_NVIDIA_VENDOR_ID,
class: PCI3D_CONTROLLER_CLASS,
class_name: "3D Controller".to_string(),
device: 0x1c82,
device_name: "NVIDIA Device".to_string(),
driver: "nvidia".to_string(),
iommu_group: 0,
numa_node: 0,
resources: MemoryResources::default(),
};
let devices = vec![pci_device.clone()];
// Mock PCI device manager and devices
let mut mock_pci_manager = MockPCIDevices::default();
// Setting up Mock to return a device
mock_pci_manager
.expect_get_pci_devices()
.with(eq(Some(PCI_NVIDIA_VENDOR_ID)))
.returning(move |_| devices.clone());
// Create NvidiaPCIDevice
let nvidia_device = NvidiaPCIDevice::new(PCI_NVIDIA_VENDOR_ID, PCI3D_CONTROLLER_CLASS);
// Prepare memory resources
let mut resources: MemoryResources = HashMap::new();
// resource0 memsz = end - start => 1024
resources.insert(
0,
MemoryResource {
start: 0,
end: 1023,
flags: PCI_BASE_ADDRESS_MEM_TYPE32,
path: PathBuf::from("/fake/path/resource0"),
},
);
// resource1 memsz = end - start => 1024
resources.insert(
1,
MemoryResource {
start: 1024,
end: 2047,
flags: PCI_BASE_ADDRESS_MEM_TYPE64,
path: PathBuf::from("/fake/path/resource1"),
},
);
let pci_device_with_resources = PCIDevice {
resources: resources.clone(),
..pci_device
};
mock_pci_manager
.expect_get_pci_devices()
.with(eq(Some(PCI_NVIDIA_VENDOR_ID)))
.returning(move |_| vec![pci_device_with_resources.clone()]);
// Call the function under test
let (max_32bit, max_64bit) = nvidia_device.get_bars_max_addressable_memory();
// Assert the results
assert_eq!(max_32bit, 2 * 2 * 1024 * 1024);
assert_eq!(max_64bit, 2 * 1024 * 1024);
}
}

View File

@@ -2,5 +2,4 @@
//
// SPDX-License-Identifier: Apache-2.0
//
mod devices;
mod pci_manager;

View File

@@ -7,9 +7,8 @@
use std::collections::HashMap;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
use std::path::PathBuf;
use mockall::automock;
use pci_ids::{Classes, Vendors};
const PCI_DEV_DOMAIN: &str = "0000";
@@ -18,89 +17,11 @@ const PCI_CONFIG_SPACE_SZ: u64 = 256;
const UNKNOWN_DEVICE: &str = "UNKNOWN_DEVICE";
const UNKNOWN_CLASS: &str = "UNKNOWN_CLASS";
const PCI_IOV_NUM_BAR: usize = 6;
const PCI_BASE_ADDRESS_MEM_TYPE_MASK: u64 = 0x06;
pub(crate) const PCI_BASE_ADDRESS_MEM_TYPE32: u64 = 0x00; // 32 bit address
pub(crate) const PCI_BASE_ADDRESS_MEM_TYPE64: u64 = 0x04; // 64 bit address
fn address_to_id(address: &str) -> u64 {
let cleaned_address = address.replace(":", "").replace(".", "");
u64::from_str_radix(&cleaned_address, 16).unwrap_or(0)
}
// Calculate the next power of 2.
fn calc_next_power_of_2(mut n: u64) -> u64 {
if n < 1 {
return 1_u64;
}
n -= 1;
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
n |= n >> 32;
n + 1
}
#[derive(Clone, Debug, Default)]
pub(crate) struct MemoryResource {
pub(crate) start: u64,
pub(crate) end: u64,
pub(crate) flags: u64,
pub(crate) path: PathBuf,
}
pub(crate) type MemoryResources = HashMap<usize, MemoryResource>;
pub(crate) trait MemoryResourceTrait {
fn get_total_addressable_memory(&self, round_up: bool) -> (u64, u64);
}
impl MemoryResourceTrait for MemoryResources {
fn get_total_addressable_memory(&self, round_up: bool) -> (u64, u64) {
let mut mem_size_32bit = 0u64;
let mut mem_size_64bit = 0u64;
let mut keys: Vec<_> = self.keys().cloned().collect();
keys.sort();
for (num_bar, key) in keys.into_iter().enumerate() {
if key >= PCI_IOV_NUM_BAR || num_bar == PCI_IOV_NUM_BAR {
break;
}
if let Some(region) = self.get(&key) {
let flags = region.flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK;
let mem_type_32bit = flags == PCI_BASE_ADDRESS_MEM_TYPE32;
let mem_type_64bit = flags == PCI_BASE_ADDRESS_MEM_TYPE64;
let mem_size = region.end - region.start + 1;
if mem_type_32bit {
mem_size_32bit += mem_size;
}
if mem_type_64bit {
mem_size_64bit += mem_size;
}
}
}
if round_up {
mem_size_32bit = calc_next_power_of_2(mem_size_32bit);
mem_size_64bit = calc_next_power_of_2(mem_size_64bit);
}
(mem_size_32bit, mem_size_64bit)
}
}
#[automock]
pub trait PCIDevices {
fn get_pci_devices(&self, vendor: Option<u16>) -> Vec<PCIDevice>;
}
#[derive(Clone, Debug, Default)]
pub struct PCIDevice {
pub(crate) device_path: PathBuf,
@@ -113,7 +34,6 @@ pub struct PCIDevice {
pub(crate) driver: String,
pub(crate) iommu_group: i64,
pub(crate) numa_node: i64,
pub(crate) resources: MemoryResources,
}
pub struct PCIDeviceManager {
@@ -148,7 +68,7 @@ impl PCIDeviceManager {
Ok(pci_devices)
}
fn get_device_by_pci_bus_id(
pub fn get_device_by_pci_bus_id(
&self,
address: &str,
vendor: Option<u16>,
@@ -197,8 +117,6 @@ impl PCIDeviceManager {
.map(|numa| numa.trim().parse::<i64>().unwrap_or(-1))
.unwrap_or(-1);
let resources = self.parse_resources(&device_path)?;
let mut device_name = UNKNOWN_DEVICE.to_string();
for vendor in Vendors::iter() {
for device in vendor.devices() {
@@ -226,7 +144,6 @@ impl PCIDeviceManager {
driver,
iommu_group,
numa_node,
resources,
device_name,
class_name,
};
@@ -235,41 +152,6 @@ impl PCIDeviceManager {
Ok(Some(pci_device))
}
fn parse_resources(&self, device_path: &Path) -> io::Result<MemoryResources> {
let content = fs::read_to_string(device_path.join("resource"))?;
let mut resources: MemoryResources = MemoryResources::new();
for (i, line) in content.lines().enumerate() {
let values: Vec<&str> = line.split_whitespace().collect();
if values.len() != 3 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("there's more than 3 entries in line '{i}'"),
));
}
let mem_start = u64::from_str_radix(values[0].trim_start_matches("0x"), 16)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
let mem_end = u64::from_str_radix(values[1].trim_start_matches("0x"), 16)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
let mem_flags = u64::from_str_radix(values[2].trim_start_matches("0x"), 16)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
if mem_end > mem_start {
resources.insert(
i,
MemoryResource {
start: mem_start,
end: mem_end,
flags: mem_flags,
path: device_path.join(format!("resource{i}")),
},
);
}
}
Ok(resources)
}
}
/// Checks if the given BDF corresponds to a PCIe device.
@@ -312,63 +194,9 @@ mod tests {
fs::write(device_path.join("device"), "0x1234").unwrap();
fs::write(device_path.join("class"), "0x060100").unwrap();
fs::write(device_path.join("numa_node"), "0").unwrap();
fs::write(
device_path.join("resource"),
"0x00000000 0x0000ffff 0x00000404\n",
)
.unwrap();
dir
}
#[test]
fn test_calc_next_power_of_2() {
assert_eq!(calc_next_power_of_2(0), 1);
assert_eq!(calc_next_power_of_2(1), 1);
assert_eq!(calc_next_power_of_2(6), 8);
assert_eq!(calc_next_power_of_2(9), 16);
assert_eq!(calc_next_power_of_2(15), 16);
assert_eq!(calc_next_power_of_2(16), 16);
assert_eq!(calc_next_power_of_2(17), 32);
}
#[test]
fn test_get_total_addressable_memory() {
let mut resources: MemoryResources = HashMap::new();
// Adding a 32b memory region
resources.insert(
0,
MemoryResource {
start: 0,
end: 1023,
flags: PCI_BASE_ADDRESS_MEM_TYPE32,
path: PathBuf::from("/path/resource0"),
},
);
// Adding a 64b memory region
resources.insert(
1,
MemoryResource {
start: 1024,
end: 2047,
flags: PCI_BASE_ADDRESS_MEM_TYPE64,
path: PathBuf::from("/path/resource1"),
},
);
let (mem32, mem64) = resources.get_total_addressable_memory(false);
assert_eq!(mem32, 1024);
assert_eq!(mem64, 1024);
// Test with rounding up
let (mem32, mem64) = resources.get_total_addressable_memory(true);
// Nearest power of 2 is the number itself
assert_eq!(mem32, 1024);
assert_eq!(mem64, 1024);
}
#[test]
fn test_get_all_devices() {
// Setup mock data
@@ -390,25 +218,6 @@ mod tests {
assert_eq!(device.class, 0x060100);
}
#[test]
fn test_parse_resources() {
let tmpdir = setup_mock_device_files();
let manager = PCIDeviceManager::new(&tmpdir.path().to_string_lossy());
let device_path = tmpdir.path().join("0000:ff:1f.0");
let resources_result = manager.parse_resources(&device_path);
assert!(resources_result.is_ok());
let resources = resources_result.unwrap();
assert_eq!(resources.len(), 1);
let resource = resources.get(&0).unwrap();
assert_eq!(resource.start, 0x00000000);
assert_eq!(resource.end, 0x0000ffff);
assert_eq!(resource.flags, 0x00000404);
}
#[test]
fn test_is_pcie_device() {
// Create a mock PCI device config file

View File

@@ -1307,12 +1307,6 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257"
[[package]]
name = "downcast"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1"
[[package]]
name = "dsa"
version = "0.6.3"
@@ -1605,15 +1599,6 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "fragile"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8878864ba14bb86e818a412bfd6f18f9eabd4ec0f008a28e8f7eb61db532fcf9"
dependencies = [
"futures-core",
]
[[package]]
name = "fs_extra"
version = "1.3.0"
@@ -2595,7 +2580,6 @@ dependencies = [
"kata-types",
"lazy_static",
"libc",
"mockall",
"nix 0.26.4",
"oci-spec 0.8.4",
"pci-ids",
@@ -2908,32 +2892,6 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "mockall"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39a6bfcc6c8c7eed5ee98b9c3e33adc726054389233e201c95dab2d41a3839d2"
dependencies = [
"cfg-if 1.0.4",
"downcast",
"fragile",
"mockall_derive",
"predicates",
"predicates-tree",
]
[[package]]
name = "mockall_derive"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25ca3004c2efe9011bd4e461bd8256445052b9615405b4f7ea43fc8ca5c20898"
dependencies = [
"cfg-if 1.0.4",
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "multimap"
version = "0.8.3"
@@ -3648,32 +3606,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "predicates"
version = "3.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ada8f2932f28a27ee7b70dd6c1c39ea0675c55a36879ab92f3a715eaa1e63cfe"
dependencies = [
"anstyle",
"predicates-core",
]
[[package]]
name = "predicates-core"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cad38746f3166b4031b1a0d39ad9f954dd291e7854fcc0eed52ee41a0b50d144"
[[package]]
name = "predicates-tree"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0de1b847b39c8131db0467e9df1ff60e6d0562ab8e9a16e568ad0fdb372e2f2"
dependencies = [
"predicates-core",
"termtree",
]
[[package]]
name = "prettyplease"
version = "0.2.37"
@@ -5364,12 +5296,6 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "termtree"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
[[package]]
name = "tests_utils"
version = "0.1.0"