diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs index e4bf7919cd..ae5377f859 100644 --- a/src/runtime-rs/crates/hypervisor/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -13,6 +13,7 @@ pub mod device; pub mod hypervisor_persist; pub use device::driver::*; use device::DeviceType; +pub use device::pci_path::PciPath; #[cfg(all( feature = "dragonball", any(target_arch = "x86_64", target_arch = "aarch64") @@ -161,4 +162,15 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync { async fn set_guest_memory_block_size(&self, size: u32); async fn guest_memory_block_size(&self) -> u32; async fn get_passfd_listener_addr(&self) -> Result<(String, u32)>; + + /// Resolve the in-guest PCIe path for a cold-plugged physical-endpoint VF + /// by querying QMP (query-pci + device search by QEMU device ID). + /// Only meaningful after the VM has started and QMP is initialised. + /// Default: Err (non-QEMU hypervisors do not support this). + async fn resolve_vfio_device_pci_path(&self, hostdev_id: &str) -> Result { + Err(anyhow::anyhow!( + "resolve_vfio_device_pci_path not supported for this hypervisor (device: {})", + hostdev_id + )) + } } diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs index 248296faf0..97b16a095c 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs @@ -1154,6 +1154,18 @@ impl QemuInner { Ok(()) } + + /// Resolve the in-guest PCIe path for a cold-plugged physical-endpoint VF + /// via QMP query-pci. Must be called after the VM has started and QMP is + /// initialised. This is the runtime-rs pair of the Go runtime's + /// `ResolveColdPlugVFIOGuestPciPaths` / `qomGetPciPath` call. + pub(crate) fn resolve_vfio_device_pci_path(&mut self, hostdev_id: &str) -> Result { + let qmp = self + .qmp + .as_mut() + .ok_or_else(|| anyhow!("QMP not initialised; cannot resolve PCI path for {}", hostdev_id))?; + qmp.get_device_by_qdev_id(hostdev_id) + } } #[async_trait] diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs index fe99c4993f..9800f5ccec 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs @@ -7,6 +7,7 @@ mod cmdline_generator; mod inner; mod qmp; +use crate::device::pci_path::PciPath; use crate::device::DeviceType; use crate::hypervisor_persist::HypervisorState; use crate::{Hypervisor, MemoryConfig}; @@ -212,6 +213,13 @@ impl Hypervisor for Qemu { async fn get_passfd_listener_addr(&self) -> Result<(String, u32)> { Err(anyhow::anyhow!("Not yet supported")) } + + async fn resolve_vfio_device_pci_path(&self, hostdev_id: &str) -> Result { + self.inner + .write() + .await + .resolve_vfio_device_pci_path(hostdev_id) + } } #[async_trait] diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index 9b58bf7470..2d2aedcc81 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -345,6 +345,17 @@ impl ResourceManagerInner { } if let Some(network) = self.network.as_ref() { + // For cold-plugged physical-endpoint VFs, the PCIe topology + // pre-computes a wrong path because the root port has no explicit + // addr and QEMU auto-assigns its slot. Resolve the actual path + // via QMP (query-pci + device search) before sending + // update_interface to the agent. + resolve_physical_endpoint_pci_paths( + network.as_ref(), + self.hypervisor.as_ref(), + ) + .await; + self.apply_network_to_agent(network.as_ref()).await?; } @@ -899,3 +910,41 @@ impl Persist for ResourceManagerInner { }) } } + +/// For each physical-endpoint VF in the network, resolve the actual in-guest +/// PCIe path via QMP (query-pci) and update the endpoint's `guest_pci_path`. +/// +/// This must be called after the VM has started (QMP is initialised) and +/// before `apply_network_to_agent`, because the PCIe topology pre-computes +/// a wrong path (root port has no explicit addr → QEMU auto-assigns its slot; +/// only QMP can reveal the actual assignment). +async fn resolve_physical_endpoint_pci_paths( + network: &dyn crate::network::Network, + hypervisor: &dyn hypervisor::Hypervisor, +) { + for endpoint in network.endpoints().await { + if let Some(hostdev_id) = endpoint.vfio_hostdev_id().await { + match hypervisor.resolve_vfio_device_pci_path(&hostdev_id).await { + Ok(pci_path) => { + let path_str = pci_path.to_string(); + info!( + sl!(), + "resolved physical endpoint guest PCI path: \ + hostdev_id={} path={}", + hostdev_id, + path_str + ); + endpoint.set_guest_pci_path(path_str).await; + } + Err(e) => { + warn!( + sl!(), + "failed to resolve guest PCI path for hostdev {}: {}", + hostdev_id, + e + ); + } + } + } + } +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs b/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs index f0aa9ffb37..e11e84b0e3 100644 --- a/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs +++ b/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs @@ -42,4 +42,12 @@ pub trait Endpoint: std::fmt::Debug + Send + Sync { async fn guest_pci_path(&self) -> Option { None } + /// Returns the QEMU device ID for the cold-plugged VF (e.g. + /// `"physical_nic__346_0"`), or `None` for non-physical endpoints. + /// Used to resolve the actual guest PCI path via QMP after VM start. + async fn vfio_hostdev_id(&self) -> Option { + None + } + /// Update the guest PCI path (called after QMP resolution). + async fn set_guest_pci_path(&self, _path: String) {} } diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs b/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs index 4cb21772f8..49b1c0d97e 100644 --- a/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs +++ b/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs @@ -62,10 +62,14 @@ pub struct PhysicalEndpoint { driver: String, vendor_device_id: VendorDevice, d: Arc>, - /// Guest PCI path computed by do_add_pcie_endpoint() at attach() time. - /// Populated after attach() succeeds; used to set device_path in the - /// agent's update_interface request for IB/RoCE GID table population. + /// Guest PCI path — populated after QMP resolution in setup_after_start_vm. + /// The pre-computed topology path from attach() is WRONG for physical + /// endpoints because the root port has no explicit addr; the correct path + /// requires QMP query-pci after VM boots. guest_pci_path: std::sync::Mutex>, + /// QEMU device ID for the cold-plugged VF (e.g. "physical_nic__346_0"). + /// Stored during attach() for use in QMP-based path resolution. + hostdev_id: std::sync::Mutex>, } impl PhysicalEndpoint { @@ -98,6 +102,7 @@ impl PhysicalEndpoint { bdf, d, guest_pci_path: std::sync::Mutex::new(None), + hostdev_id: std::sync::Mutex::new(None), }) } } @@ -163,14 +168,14 @@ impl Endpoint for PhysicalEndpoint { .await .context("do handle device failed.")?; - // Extract and cache the guest PCI path so guest_pci_path() can - // expose it to handle_interfaces() for device_path in update_interface. + // Store the QEMU hostdev_id for later QMP-based PCI path resolution. + // The topology-computed guest_pci_path from do_add_pcie_endpoint() is + // WRONG for physical endpoints (root port has no explicit addr so QEMU + // auto-assigns its slot; the correct path requires QMP after VM boot). if let hypervisor::device::DeviceType::Vfio(vfio_dev) = device_type { if let Some(hostdev) = vfio_dev.devices.first() { - if let Some(pci_path) = &hostdev.guest_pci_path { - if let Ok(mut guard) = self.guest_pci_path.lock() { - *guard = Some(pci_path.to_string()); - } + if let Ok(mut guard) = self.hostdev_id.lock() { + *guard = Some(hostdev.hostdev_id.clone()); } } } @@ -217,6 +222,16 @@ impl Endpoint for PhysicalEndpoint { async fn guest_pci_path(&self) -> Option { self.guest_pci_path.lock().ok()?.clone() } + + async fn vfio_hostdev_id(&self) -> Option { + self.hostdev_id.lock().ok()?.clone() + } + + async fn set_guest_pci_path(&self, path: String) { + if let Ok(mut guard) = self.guest_pci_path.lock() { + *guard = Some(path); + } + } } // --------------------------------------------------------------------------- diff --git a/src/runtime-rs/crates/resource/src/network/mod.rs b/src/runtime-rs/crates/resource/src/network/mod.rs index f7441262b2..703cb1797f 100644 --- a/src/runtime-rs/crates/resource/src/network/mod.rs +++ b/src/runtime-rs/crates/resource/src/network/mod.rs @@ -44,6 +44,11 @@ pub trait Network: Send + Sync { async fn neighs(&self) -> Result>; async fn save(&self) -> Option>; async fn remove(&self, h: &dyn Hypervisor) -> Result<()>; + /// Returns the list of network endpoints. Used to resolve PCI paths + /// via QMP before sending update_interface to the agent. + async fn endpoints(&self) -> Vec> { + vec![] + } } pub async fn new( diff --git a/src/runtime-rs/crates/resource/src/network/network_with_netns.rs b/src/runtime-rs/crates/resource/src/network/network_with_netns.rs index 86d7308438..f606568756 100644 --- a/src/runtime-rs/crates/resource/src/network/network_with_netns.rs +++ b/src/runtime-rs/crates/resource/src/network/network_with_netns.rs @@ -168,6 +168,15 @@ impl Network for NetworkWithNetns { fs::remove_dir_all(inner.netns_path.clone()).context("failed to remove netns path")?; Ok(()) } + + async fn endpoints(&self) -> Vec> { + let inner = self.inner.read().await; + inner + .entity_list + .iter() + .map(|e| e.endpoint.clone()) + .collect() + } } /// Lightweight probe: enter the netns and check whether any non-loopback