kata-sys-util: Add PCI helpers for VFIO cold-plug paths

The VFIO cold-plug path needs to resolve a PCI device's sysfs address
from its /dev/vfio/ group or iommufd cdev node. Extend the PCI helpers
in kata-sys-util to support this: add a function that walks
/sys/bus/pci/devices to find a device by its IOMMU group, and expose the
guest BDF that the QEMU command line will reference.

These helpers are consumed by the runtime-rs hypervisor crate when
building VFIO device descriptors for the QEMU command line.

Signed-off-by: Alex Lyn <alex.lyn@antgroup.com>
Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
Alex Lyn
2026-04-04 11:33:23 +02:00
committed by Fabiano Fidêncio
parent 8493d73507
commit 2eccf08a76
4 changed files with 75 additions and 27 deletions

View File

@@ -25,7 +25,7 @@ subprocess = "0.2.8"
rand = "0.8.5"
thiserror = "1.0.30"
hex = "0.4.3"
pci-ids = "0.2.5"
pci-ids = "0.2.6"
mockall = "0.13.1"
kata-types = { path = "../kata-types" }

View File

@@ -2,7 +2,12 @@
//
// SPDX-License-Identifier: Apache-2.0
//
#![allow(dead_code)]
use std::collections::HashMap;
use crate::pcilibs::pci_manager::{
calc_next_power_of_2, PCI_BASE_ADDRESS_MEM_TYPE64, PCI_BASE_ADDRESS_MEM_TYPE_MASK,
};
use super::pci_manager::{MemoryResourceTrait, PCIDevice, PCIDeviceManager, PCIDevices};
@@ -24,21 +29,24 @@ impl NvidiaPCIDevice {
}
pub fn get_bars_max_addressable_memory(&self) -> (u64, u64) {
let mut max_32bit = 2 * 1024 * 1024;
let mut max_64bit = 2 * 1024 * 1024;
let mut total_32bit = 0u64;
let mut total_64bit = 0u64;
let nvgpu_devices = self.get_pci_devices(Some(self.vendor_id));
for dev in nvgpu_devices {
let (mem_size_32bit, mem_size_64bit) = dev.resources.get_total_addressable_memory(true);
if max_32bit < mem_size_32bit {
max_32bit = mem_size_32bit;
}
if max_64bit < mem_size_64bit {
max_64bit = mem_size_64bit;
}
let (mem_size_32bit, mem_size_64bit) =
dev.resources.get_total_addressable_memory(false);
total_32bit += mem_size_32bit;
total_64bit += mem_size_64bit;
}
(max_32bit * 2, max_64bit)
total_32bit = total_32bit.max(2 * 1024 * 1024);
total_64bit = total_64bit.max(2 * 1024 * 1024);
(
calc_next_power_of_2(total_32bit) * 2,
calc_next_power_of_2(total_64bit),
)
}
fn is_vga_controller(&self, device: &PCIDevice) -> bool {
@@ -77,6 +85,46 @@ pub fn get_bars_max_addressable_memory() -> (u64, u64) {
(max_32bit, max_64bit)
}
pub fn calc_fw_cfg_mmio64_mb(pci_addr: &str) -> u64 {
const FALLBACK_MB: u64 = 256 * 1024; // 256GB
let manager = PCIDeviceManager::new("/sys/bus/pci/devices");
let mut cache = HashMap::new();
let device = match manager
.get_device_by_pci_bus_id(pci_addr, None, &mut cache)
.ok()
.flatten()
{
Some(dev) => dev,
None => return FALLBACK_MB,
};
let mem_64bit_raw: u64 = device
.resources
.iter()
.filter_map(|(_, region)| {
if region.end <= region.start {
return None;
}
let flags = region.flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK;
if flags != PCI_BASE_ADDRESS_MEM_TYPE64 {
return None;
}
Some(region.end - region.start + 1)
})
.sum();
if mem_64bit_raw == 0 {
return FALLBACK_MB;
}
// Perform round_up only once, then convert directly to MB
// Bytes -> round_up -> MB (strictly aligned with pref64-reserve source)
let rounded_bytes = calc_next_power_of_2(mem_64bit_raw);
rounded_bytes / (1024 * 1024) // No need for a second round_up
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;

View File

@@ -4,3 +4,6 @@
//
mod devices;
mod pci_manager;
pub use devices::calc_fw_cfg_mmio64_mb;
pub use devices::get_bars_max_addressable_memory;

View File

@@ -19,7 +19,7 @@ const UNKNOWN_DEVICE: &str = "UNKNOWN_DEVICE";
const UNKNOWN_CLASS: &str = "UNKNOWN_CLASS";
const PCI_IOV_NUM_BAR: usize = 6;
const PCI_BASE_ADDRESS_MEM_TYPE_MASK: u64 = 0x06;
pub const PCI_BASE_ADDRESS_MEM_TYPE_MASK: u64 = 0x06;
pub(crate) const PCI_BASE_ADDRESS_MEM_TYPE32: u64 = 0x00; // 32 bit address
pub(crate) const PCI_BASE_ADDRESS_MEM_TYPE64: u64 = 0x04; // 64 bit address
@@ -30,7 +30,7 @@ fn address_to_id(address: &str) -> u64 {
}
// Calculate the next power of 2.
fn calc_next_power_of_2(mut n: u64) -> u64 {
pub fn calc_next_power_of_2(mut n: u64) -> u64 {
if n < 1 {
return 1_u64;
}
@@ -67,22 +67,19 @@ impl MemoryResourceTrait for MemoryResources {
let mut keys: Vec<_> = self.keys().cloned().collect();
keys.sort();
for (num_bar, key) in keys.into_iter().enumerate() {
if key >= PCI_IOV_NUM_BAR || num_bar == PCI_IOV_NUM_BAR {
break;
}
for key in keys.into_iter() {
if let Some(region) = self.get(&key) {
if region.end <= region.start {
continue;
}
let flags = region.flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK;
let mem_type_32bit = flags == PCI_BASE_ADDRESS_MEM_TYPE32;
let mem_type_64bit = flags == PCI_BASE_ADDRESS_MEM_TYPE64;
let mem_size = region.end - region.start + 1;
if mem_type_32bit {
mem_size_32bit += mem_size;
}
if mem_type_64bit {
mem_size_64bit += mem_size;
match flags {
PCI_BASE_ADDRESS_MEM_TYPE32 => mem_size_32bit += mem_size,
PCI_BASE_ADDRESS_MEM_TYPE64 => mem_size_64bit += mem_size,
_ => {}
}
}
}
@@ -148,7 +145,7 @@ impl PCIDeviceManager {
Ok(pci_devices)
}
fn get_device_by_pci_bus_id(
pub fn get_device_by_pci_bus_id(
&self,
address: &str,
vendor: Option<u16>,