runtime-rs: resolve cold-plug VFIO guest PCI path via QMP

The PCIe topology pre-computes a wrong path for cold-plugged physical-
endpoint VFs because the root port has no explicit addr and QEMU auto-
assigns its slot. The pre-computed PciPath { slots: [PciSlot(0)] }
resolves to 0000:00:00.0 (the Q35 MCH), causing
wait_for_pci_net_interface to time out looking for a netdev there.

Add resolve_vfio_device_pci_path(hostdev_id) to the Hypervisor trait.
Implement it in QemuInner using qmp.get_device_by_qdev_id(), which
queries QEMU's query-pci to find the full guest PCIe path (e.g. "05/00"
= slot 5 on pcie.0 / slot 0 on the root port bus).

Store the QEMU device ID (hostdev_id) in PhysicalEndpoint during
attach(). Add vfio_hostdev_id() and set_guest_pci_path() to the
Endpoint trait and add an endpoints() accessor to the Network trait.

In setup_after_start_vm(), call resolve_physical_endpoint_pci_paths()
before apply_network_to_agent() to populate the correct path from QMP
into each PhysicalEndpoint's guest_pci_path field. The field is then
consumed by network_with_netns::interfaces() to fill Interface.device_path
before update_interface is sent to the agent.

This is the runtime-rs counterpart of the Go runtime's
ResolveColdPlugVFIOGuestPciPaths / qomGetPciPath.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Assisted-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Fabiano Fidêncio
2026-05-27 20:36:11 +02:00
parent f8ee9133e5
commit be2ec02c9a
8 changed files with 127 additions and 9 deletions

View File

@@ -13,6 +13,7 @@ pub mod device;
pub mod hypervisor_persist;
pub use device::driver::*;
use device::DeviceType;
pub use device::pci_path::PciPath;
#[cfg(all(
feature = "dragonball",
any(target_arch = "x86_64", target_arch = "aarch64")
@@ -161,4 +162,15 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync {
async fn set_guest_memory_block_size(&self, size: u32);
async fn guest_memory_block_size(&self) -> u32;
async fn get_passfd_listener_addr(&self) -> Result<(String, u32)>;
/// Resolve the in-guest PCIe path for a cold-plugged physical-endpoint VF
/// by querying QMP (query-pci + device search by QEMU device ID).
/// Only meaningful after the VM has started and QMP is initialised.
/// Default: Err (non-QEMU hypervisors do not support this).
async fn resolve_vfio_device_pci_path(&self, hostdev_id: &str) -> Result<PciPath> {
Err(anyhow::anyhow!(
"resolve_vfio_device_pci_path not supported for this hypervisor (device: {})",
hostdev_id
))
}
}

View File

@@ -1154,6 +1154,18 @@ impl QemuInner {
Ok(())
}
/// Resolve the in-guest PCIe path for a cold-plugged physical-endpoint VF
/// via QMP query-pci. Must be called after the VM has started and QMP is
/// initialised. This is the runtime-rs pair of the Go runtime's
/// `ResolveColdPlugVFIOGuestPciPaths` / `qomGetPciPath` call.
pub(crate) fn resolve_vfio_device_pci_path(&mut self, hostdev_id: &str) -> Result<PciPath> {
let qmp = self
.qmp
.as_mut()
.ok_or_else(|| anyhow!("QMP not initialised; cannot resolve PCI path for {}", hostdev_id))?;
qmp.get_device_by_qdev_id(hostdev_id)
}
}
#[async_trait]

View File

@@ -7,6 +7,7 @@ mod cmdline_generator;
mod inner;
mod qmp;
use crate::device::pci_path::PciPath;
use crate::device::DeviceType;
use crate::hypervisor_persist::HypervisorState;
use crate::{Hypervisor, MemoryConfig};
@@ -212,6 +213,13 @@ impl Hypervisor for Qemu {
async fn get_passfd_listener_addr(&self) -> Result<(String, u32)> {
Err(anyhow::anyhow!("Not yet supported"))
}
async fn resolve_vfio_device_pci_path(&self, hostdev_id: &str) -> Result<PciPath> {
self.inner
.write()
.await
.resolve_vfio_device_pci_path(hostdev_id)
}
}
#[async_trait]

View File

@@ -345,6 +345,17 @@ impl ResourceManagerInner {
}
if let Some(network) = self.network.as_ref() {
// For cold-plugged physical-endpoint VFs, the PCIe topology
// pre-computes a wrong path because the root port has no explicit
// addr and QEMU auto-assigns its slot. Resolve the actual path
// via QMP (query-pci + device search) before sending
// update_interface to the agent.
resolve_physical_endpoint_pci_paths(
network.as_ref(),
self.hypervisor.as_ref(),
)
.await;
self.apply_network_to_agent(network.as_ref()).await?;
}
@@ -899,3 +910,41 @@ impl Persist for ResourceManagerInner {
})
}
}
/// For each physical-endpoint VF in the network, resolve the actual in-guest
/// PCIe path via QMP (query-pci) and update the endpoint's `guest_pci_path`.
///
/// This must be called after the VM has started (QMP is initialised) and
/// before `apply_network_to_agent`, because the PCIe topology pre-computes
/// a wrong path (root port has no explicit addr → QEMU auto-assigns its slot;
/// only QMP can reveal the actual assignment).
async fn resolve_physical_endpoint_pci_paths(
network: &dyn crate::network::Network,
hypervisor: &dyn hypervisor::Hypervisor,
) {
for endpoint in network.endpoints().await {
if let Some(hostdev_id) = endpoint.vfio_hostdev_id().await {
match hypervisor.resolve_vfio_device_pci_path(&hostdev_id).await {
Ok(pci_path) => {
let path_str = pci_path.to_string();
info!(
sl!(),
"resolved physical endpoint guest PCI path: \
hostdev_id={} path={}",
hostdev_id,
path_str
);
endpoint.set_guest_pci_path(path_str).await;
}
Err(e) => {
warn!(
sl!(),
"failed to resolve guest PCI path for hostdev {}: {}",
hostdev_id,
e
);
}
}
}
}
}

View File

@@ -42,4 +42,12 @@ pub trait Endpoint: std::fmt::Debug + Send + Sync {
async fn guest_pci_path(&self) -> Option<String> {
None
}
/// Returns the QEMU device ID for the cold-plugged VF (e.g.
/// `"physical_nic__346_0"`), or `None` for non-physical endpoints.
/// Used to resolve the actual guest PCI path via QMP after VM start.
async fn vfio_hostdev_id(&self) -> Option<String> {
None
}
/// Update the guest PCI path (called after QMP resolution).
async fn set_guest_pci_path(&self, _path: String) {}
}

View File

@@ -62,10 +62,14 @@ pub struct PhysicalEndpoint {
driver: String,
vendor_device_id: VendorDevice,
d: Arc<RwLock<DeviceManager>>,
/// Guest PCI path computed by do_add_pcie_endpoint() at attach() time.
/// Populated after attach() succeeds; used to set device_path in the
/// agent's update_interface request for IB/RoCE GID table population.
/// Guest PCI path — populated after QMP resolution in setup_after_start_vm.
/// The pre-computed topology path from attach() is WRONG for physical
/// endpoints because the root port has no explicit addr; the correct path
/// requires QMP query-pci after VM boots.
guest_pci_path: std::sync::Mutex<Option<String>>,
/// QEMU device ID for the cold-plugged VF (e.g. "physical_nic__346_0").
/// Stored during attach() for use in QMP-based path resolution.
hostdev_id: std::sync::Mutex<Option<String>>,
}
impl PhysicalEndpoint {
@@ -98,6 +102,7 @@ impl PhysicalEndpoint {
bdf,
d,
guest_pci_path: std::sync::Mutex::new(None),
hostdev_id: std::sync::Mutex::new(None),
})
}
}
@@ -163,14 +168,14 @@ impl Endpoint for PhysicalEndpoint {
.await
.context("do handle device failed.")?;
// Extract and cache the guest PCI path so guest_pci_path() can
// expose it to handle_interfaces() for device_path in update_interface.
// Store the QEMU hostdev_id for later QMP-based PCI path resolution.
// The topology-computed guest_pci_path from do_add_pcie_endpoint() is
// WRONG for physical endpoints (root port has no explicit addr so QEMU
// auto-assigns its slot; the correct path requires QMP after VM boot).
if let hypervisor::device::DeviceType::Vfio(vfio_dev) = device_type {
if let Some(hostdev) = vfio_dev.devices.first() {
if let Some(pci_path) = &hostdev.guest_pci_path {
if let Ok(mut guard) = self.guest_pci_path.lock() {
*guard = Some(pci_path.to_string());
}
if let Ok(mut guard) = self.hostdev_id.lock() {
*guard = Some(hostdev.hostdev_id.clone());
}
}
}
@@ -217,6 +222,16 @@ impl Endpoint for PhysicalEndpoint {
async fn guest_pci_path(&self) -> Option<String> {
self.guest_pci_path.lock().ok()?.clone()
}
async fn vfio_hostdev_id(&self) -> Option<String> {
self.hostdev_id.lock().ok()?.clone()
}
async fn set_guest_pci_path(&self, path: String) {
if let Ok(mut guard) = self.guest_pci_path.lock() {
*guard = Some(path);
}
}
}
// ---------------------------------------------------------------------------

View File

@@ -44,6 +44,11 @@ pub trait Network: Send + Sync {
async fn neighs(&self) -> Result<Vec<agent::ARPNeighbor>>;
async fn save(&self) -> Option<Vec<EndpointState>>;
async fn remove(&self, h: &dyn Hypervisor) -> Result<()>;
/// Returns the list of network endpoints. Used to resolve PCI paths
/// via QMP before sending update_interface to the agent.
async fn endpoints(&self) -> Vec<std::sync::Arc<dyn endpoint::Endpoint>> {
vec![]
}
}
pub async fn new(

View File

@@ -168,6 +168,15 @@ impl Network for NetworkWithNetns {
fs::remove_dir_all(inner.netns_path.clone()).context("failed to remove netns path")?;
Ok(())
}
async fn endpoints(&self) -> Vec<std::sync::Arc<dyn crate::network::endpoint::Endpoint>> {
let inner = self.inner.read().await;
inner
.entity_list
.iter()
.map(|e| e.endpoint.clone())
.collect()
}
}
/// Lightweight probe: enter the netns and check whether any non-loopback