mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-04-12 15:02:36 +00:00
kata-sys-util: Add PCI helpers for VFIO cold-plug paths
The VFIO cold-plug path needs to resolve a PCI device's sysfs address from its /dev/vfio/ group or iommufd cdev node. Extend the PCI helpers in kata-sys-util to support this: add a function that walks /sys/bus/pci/devices to find a device by its IOMMU group, and expose the guest BDF that the QEMU command line will reference. These helpers are consumed by the runtime-rs hypervisor crate when building VFIO device descriptors for the QEMU command line. Signed-off-by: Alex Lyn <alex.lyn@antgroup.com> Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
committed by
Fabiano Fidêncio
parent
8493d73507
commit
2eccf08a76
@@ -25,7 +25,7 @@ subprocess = "0.2.8"
|
||||
rand = "0.8.5"
|
||||
thiserror = "1.0.30"
|
||||
hex = "0.4.3"
|
||||
pci-ids = "0.2.5"
|
||||
pci-ids = "0.2.6"
|
||||
mockall = "0.13.1"
|
||||
|
||||
kata-types = { path = "../kata-types" }
|
||||
|
||||
@@ -2,7 +2,12 @@
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#![allow(dead_code)]
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::pcilibs::pci_manager::{
|
||||
calc_next_power_of_2, PCI_BASE_ADDRESS_MEM_TYPE64, PCI_BASE_ADDRESS_MEM_TYPE_MASK,
|
||||
};
|
||||
|
||||
use super::pci_manager::{MemoryResourceTrait, PCIDevice, PCIDeviceManager, PCIDevices};
|
||||
|
||||
@@ -24,21 +29,24 @@ impl NvidiaPCIDevice {
|
||||
}
|
||||
|
||||
pub fn get_bars_max_addressable_memory(&self) -> (u64, u64) {
|
||||
let mut max_32bit = 2 * 1024 * 1024;
|
||||
let mut max_64bit = 2 * 1024 * 1024;
|
||||
let mut total_32bit = 0u64;
|
||||
let mut total_64bit = 0u64;
|
||||
|
||||
let nvgpu_devices = self.get_pci_devices(Some(self.vendor_id));
|
||||
for dev in nvgpu_devices {
|
||||
let (mem_size_32bit, mem_size_64bit) = dev.resources.get_total_addressable_memory(true);
|
||||
if max_32bit < mem_size_32bit {
|
||||
max_32bit = mem_size_32bit;
|
||||
}
|
||||
if max_64bit < mem_size_64bit {
|
||||
max_64bit = mem_size_64bit;
|
||||
}
|
||||
let (mem_size_32bit, mem_size_64bit) =
|
||||
dev.resources.get_total_addressable_memory(false);
|
||||
total_32bit += mem_size_32bit;
|
||||
total_64bit += mem_size_64bit;
|
||||
}
|
||||
|
||||
(max_32bit * 2, max_64bit)
|
||||
total_32bit = total_32bit.max(2 * 1024 * 1024);
|
||||
total_64bit = total_64bit.max(2 * 1024 * 1024);
|
||||
|
||||
(
|
||||
calc_next_power_of_2(total_32bit) * 2,
|
||||
calc_next_power_of_2(total_64bit),
|
||||
)
|
||||
}
|
||||
|
||||
fn is_vga_controller(&self, device: &PCIDevice) -> bool {
|
||||
@@ -77,6 +85,46 @@ pub fn get_bars_max_addressable_memory() -> (u64, u64) {
|
||||
(max_32bit, max_64bit)
|
||||
}
|
||||
|
||||
pub fn calc_fw_cfg_mmio64_mb(pci_addr: &str) -> u64 {
|
||||
const FALLBACK_MB: u64 = 256 * 1024; // 256GB
|
||||
|
||||
let manager = PCIDeviceManager::new("/sys/bus/pci/devices");
|
||||
let mut cache = HashMap::new();
|
||||
|
||||
let device = match manager
|
||||
.get_device_by_pci_bus_id(pci_addr, None, &mut cache)
|
||||
.ok()
|
||||
.flatten()
|
||||
{
|
||||
Some(dev) => dev,
|
||||
None => return FALLBACK_MB,
|
||||
};
|
||||
|
||||
let mem_64bit_raw: u64 = device
|
||||
.resources
|
||||
.iter()
|
||||
.filter_map(|(_, region)| {
|
||||
if region.end <= region.start {
|
||||
return None;
|
||||
}
|
||||
let flags = region.flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK;
|
||||
if flags != PCI_BASE_ADDRESS_MEM_TYPE64 {
|
||||
return None;
|
||||
}
|
||||
Some(region.end - region.start + 1)
|
||||
})
|
||||
.sum();
|
||||
|
||||
if mem_64bit_raw == 0 {
|
||||
return FALLBACK_MB;
|
||||
}
|
||||
|
||||
// Perform round_up only once, then convert directly to MB
|
||||
// Bytes -> round_up -> MB (strictly aligned with pref64-reserve source)
|
||||
let rounded_bytes = calc_next_power_of_2(mem_64bit_raw);
|
||||
rounded_bytes / (1024 * 1024) // No need for a second round_up
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
|
||||
@@ -4,3 +4,6 @@
|
||||
//
|
||||
mod devices;
|
||||
mod pci_manager;
|
||||
|
||||
pub use devices::calc_fw_cfg_mmio64_mb;
|
||||
pub use devices::get_bars_max_addressable_memory;
|
||||
|
||||
@@ -19,7 +19,7 @@ const UNKNOWN_DEVICE: &str = "UNKNOWN_DEVICE";
|
||||
const UNKNOWN_CLASS: &str = "UNKNOWN_CLASS";
|
||||
|
||||
const PCI_IOV_NUM_BAR: usize = 6;
|
||||
const PCI_BASE_ADDRESS_MEM_TYPE_MASK: u64 = 0x06;
|
||||
pub const PCI_BASE_ADDRESS_MEM_TYPE_MASK: u64 = 0x06;
|
||||
|
||||
pub(crate) const PCI_BASE_ADDRESS_MEM_TYPE32: u64 = 0x00; // 32 bit address
|
||||
pub(crate) const PCI_BASE_ADDRESS_MEM_TYPE64: u64 = 0x04; // 64 bit address
|
||||
@@ -30,7 +30,7 @@ fn address_to_id(address: &str) -> u64 {
|
||||
}
|
||||
|
||||
// Calculate the next power of 2.
|
||||
fn calc_next_power_of_2(mut n: u64) -> u64 {
|
||||
pub fn calc_next_power_of_2(mut n: u64) -> u64 {
|
||||
if n < 1 {
|
||||
return 1_u64;
|
||||
}
|
||||
@@ -67,22 +67,19 @@ impl MemoryResourceTrait for MemoryResources {
|
||||
let mut keys: Vec<_> = self.keys().cloned().collect();
|
||||
keys.sort();
|
||||
|
||||
for (num_bar, key) in keys.into_iter().enumerate() {
|
||||
if key >= PCI_IOV_NUM_BAR || num_bar == PCI_IOV_NUM_BAR {
|
||||
break;
|
||||
}
|
||||
|
||||
for key in keys.into_iter() {
|
||||
if let Some(region) = self.get(&key) {
|
||||
if region.end <= region.start {
|
||||
continue;
|
||||
}
|
||||
|
||||
let flags = region.flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK;
|
||||
let mem_type_32bit = flags == PCI_BASE_ADDRESS_MEM_TYPE32;
|
||||
let mem_type_64bit = flags == PCI_BASE_ADDRESS_MEM_TYPE64;
|
||||
let mem_size = region.end - region.start + 1;
|
||||
|
||||
if mem_type_32bit {
|
||||
mem_size_32bit += mem_size;
|
||||
}
|
||||
if mem_type_64bit {
|
||||
mem_size_64bit += mem_size;
|
||||
match flags {
|
||||
PCI_BASE_ADDRESS_MEM_TYPE32 => mem_size_32bit += mem_size,
|
||||
PCI_BASE_ADDRESS_MEM_TYPE64 => mem_size_64bit += mem_size,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -148,7 +145,7 @@ impl PCIDeviceManager {
|
||||
Ok(pci_devices)
|
||||
}
|
||||
|
||||
fn get_device_by_pci_bus_id(
|
||||
pub fn get_device_by_pci_bus_id(
|
||||
&self,
|
||||
address: &str,
|
||||
vendor: Option<u16>,
|
||||
|
||||
Reference in New Issue
Block a user