From 2eccf08a76912358b51fa25628e90dc3d3d4920b Mon Sep 17 00:00:00 2001 From: Alex Lyn Date: Sat, 4 Apr 2026 11:33:23 +0200 Subject: [PATCH] kata-sys-util: Add PCI helpers for VFIO cold-plug paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VFIO cold-plug path needs to resolve a PCI device's sysfs address from its /dev/vfio/ group or iommufd cdev node. Extend the PCI helpers in kata-sys-util to support this: add a function that walks /sys/bus/pci/devices to find a device by its IOMMU group, and expose the guest BDF that the QEMU command line will reference. These helpers are consumed by the runtime-rs hypervisor crate when building VFIO device descriptors for the QEMU command line. Signed-off-by: Alex Lyn Signed-off-by: Fabiano FidĂȘncio --- src/libs/kata-sys-util/Cargo.toml | 2 +- src/libs/kata-sys-util/src/pcilibs/devices.rs | 70 ++++++++++++++++--- src/libs/kata-sys-util/src/pcilibs/mod.rs | 3 + .../kata-sys-util/src/pcilibs/pci_manager.rs | 27 ++++--- 4 files changed, 75 insertions(+), 27 deletions(-) diff --git a/src/libs/kata-sys-util/Cargo.toml b/src/libs/kata-sys-util/Cargo.toml index d42286b844..8dd703517b 100644 --- a/src/libs/kata-sys-util/Cargo.toml +++ b/src/libs/kata-sys-util/Cargo.toml @@ -25,7 +25,7 @@ subprocess = "0.2.8" rand = "0.8.5" thiserror = "1.0.30" hex = "0.4.3" -pci-ids = "0.2.5" +pci-ids = "0.2.6" mockall = "0.13.1" kata-types = { path = "../kata-types" } diff --git a/src/libs/kata-sys-util/src/pcilibs/devices.rs b/src/libs/kata-sys-util/src/pcilibs/devices.rs index c61eb8dc6e..22f066108e 100644 --- a/src/libs/kata-sys-util/src/pcilibs/devices.rs +++ b/src/libs/kata-sys-util/src/pcilibs/devices.rs @@ -2,7 +2,12 @@ // // SPDX-License-Identifier: Apache-2.0 // -#![allow(dead_code)] + +use std::collections::HashMap; + +use crate::pcilibs::pci_manager::{ + calc_next_power_of_2, PCI_BASE_ADDRESS_MEM_TYPE64, PCI_BASE_ADDRESS_MEM_TYPE_MASK, +}; use super::pci_manager::{MemoryResourceTrait, PCIDevice, PCIDeviceManager, PCIDevices}; @@ -24,21 +29,24 @@ impl NvidiaPCIDevice { } pub fn get_bars_max_addressable_memory(&self) -> (u64, u64) { - let mut max_32bit = 2 * 1024 * 1024; - let mut max_64bit = 2 * 1024 * 1024; + let mut total_32bit = 0u64; + let mut total_64bit = 0u64; let nvgpu_devices = self.get_pci_devices(Some(self.vendor_id)); for dev in nvgpu_devices { - let (mem_size_32bit, mem_size_64bit) = dev.resources.get_total_addressable_memory(true); - if max_32bit < mem_size_32bit { - max_32bit = mem_size_32bit; - } - if max_64bit < mem_size_64bit { - max_64bit = mem_size_64bit; - } + let (mem_size_32bit, mem_size_64bit) = + dev.resources.get_total_addressable_memory(false); + total_32bit += mem_size_32bit; + total_64bit += mem_size_64bit; } - (max_32bit * 2, max_64bit) + total_32bit = total_32bit.max(2 * 1024 * 1024); + total_64bit = total_64bit.max(2 * 1024 * 1024); + + ( + calc_next_power_of_2(total_32bit) * 2, + calc_next_power_of_2(total_64bit), + ) } fn is_vga_controller(&self, device: &PCIDevice) -> bool { @@ -77,6 +85,46 @@ pub fn get_bars_max_addressable_memory() -> (u64, u64) { (max_32bit, max_64bit) } +pub fn calc_fw_cfg_mmio64_mb(pci_addr: &str) -> u64 { + const FALLBACK_MB: u64 = 256 * 1024; // 256GB + + let manager = PCIDeviceManager::new("/sys/bus/pci/devices"); + let mut cache = HashMap::new(); + + let device = match manager + .get_device_by_pci_bus_id(pci_addr, None, &mut cache) + .ok() + .flatten() + { + Some(dev) => dev, + None => return FALLBACK_MB, + }; + + let mem_64bit_raw: u64 = device + .resources + .iter() + .filter_map(|(_, region)| { + if region.end <= region.start { + return None; + } + let flags = region.flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK; + if flags != PCI_BASE_ADDRESS_MEM_TYPE64 { + return None; + } + Some(region.end - region.start + 1) + }) + .sum(); + + if mem_64bit_raw == 0 { + return FALLBACK_MB; + } + + // Perform round_up only once, then convert directly to MB + // Bytes -> round_up -> MB (strictly aligned with pref64-reserve source) + let rounded_bytes = calc_next_power_of_2(mem_64bit_raw); + rounded_bytes / (1024 * 1024) // No need for a second round_up +} + #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/src/libs/kata-sys-util/src/pcilibs/mod.rs b/src/libs/kata-sys-util/src/pcilibs/mod.rs index 453f71634a..e9401fc4ba 100644 --- a/src/libs/kata-sys-util/src/pcilibs/mod.rs +++ b/src/libs/kata-sys-util/src/pcilibs/mod.rs @@ -4,3 +4,6 @@ // mod devices; mod pci_manager; + +pub use devices::calc_fw_cfg_mmio64_mb; +pub use devices::get_bars_max_addressable_memory; diff --git a/src/libs/kata-sys-util/src/pcilibs/pci_manager.rs b/src/libs/kata-sys-util/src/pcilibs/pci_manager.rs index 8f5fbef0b5..ad74b7fd01 100644 --- a/src/libs/kata-sys-util/src/pcilibs/pci_manager.rs +++ b/src/libs/kata-sys-util/src/pcilibs/pci_manager.rs @@ -19,7 +19,7 @@ const UNKNOWN_DEVICE: &str = "UNKNOWN_DEVICE"; const UNKNOWN_CLASS: &str = "UNKNOWN_CLASS"; const PCI_IOV_NUM_BAR: usize = 6; -const PCI_BASE_ADDRESS_MEM_TYPE_MASK: u64 = 0x06; +pub const PCI_BASE_ADDRESS_MEM_TYPE_MASK: u64 = 0x06; pub(crate) const PCI_BASE_ADDRESS_MEM_TYPE32: u64 = 0x00; // 32 bit address pub(crate) const PCI_BASE_ADDRESS_MEM_TYPE64: u64 = 0x04; // 64 bit address @@ -30,7 +30,7 @@ fn address_to_id(address: &str) -> u64 { } // Calculate the next power of 2. -fn calc_next_power_of_2(mut n: u64) -> u64 { +pub fn calc_next_power_of_2(mut n: u64) -> u64 { if n < 1 { return 1_u64; } @@ -67,22 +67,19 @@ impl MemoryResourceTrait for MemoryResources { let mut keys: Vec<_> = self.keys().cloned().collect(); keys.sort(); - for (num_bar, key) in keys.into_iter().enumerate() { - if key >= PCI_IOV_NUM_BAR || num_bar == PCI_IOV_NUM_BAR { - break; - } - + for key in keys.into_iter() { if let Some(region) = self.get(&key) { + if region.end <= region.start { + continue; + } + let flags = region.flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK; - let mem_type_32bit = flags == PCI_BASE_ADDRESS_MEM_TYPE32; - let mem_type_64bit = flags == PCI_BASE_ADDRESS_MEM_TYPE64; let mem_size = region.end - region.start + 1; - if mem_type_32bit { - mem_size_32bit += mem_size; - } - if mem_type_64bit { - mem_size_64bit += mem_size; + match flags { + PCI_BASE_ADDRESS_MEM_TYPE32 => mem_size_32bit += mem_size, + PCI_BASE_ADDRESS_MEM_TYPE64 => mem_size_64bit += mem_size, + _ => {} } } } @@ -148,7 +145,7 @@ impl PCIDeviceManager { Ok(pci_devices) } - fn get_device_by_pci_bus_id( + pub fn get_device_by_pci_bus_id( &self, address: &str, vendor: Option,