mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-05-01 05:14:56 +00:00
runtime-rs: rescan network at Start RPC for Docker 26+
Docker 26+ configures the container's veth pair between the Create
and Start RPCs by bind-mounting `/proc/<vmm_pid>/ns/net`. The Rust
shim's network scan during sandbox creation finds no interfaces
because they don't exist yet.
The Go shim (commit f7878cc) solves this with `detectHypervisorNetns`
inside `addAllEndpoints`: when the placeholder netns is empty, it
switches to the hypervisor's network namespace and rescans there.
Port this approach to the Rust shim:
- Add `rescan_network()` to the `Sandbox` trait
- Implement it on `VirtSandbox`: build a rescan config that always
targets the hypervisor's netns (`/proc/<vmm_pid>/ns/net`),
bypassing the placeholder netns and the `network_created` flag
- Call `sandbox.rescan_network()` synchronously in the `StartProcess`
handler, before `cm.start_process()`, so interfaces are wired
before the container process runs
Fixes: #9340
Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Made-with: Cursor
This commit is contained in:
@@ -51,6 +51,10 @@ pub trait Sandbox: Send + Sync {
|
||||
shim_pid: u32,
|
||||
) -> Result<()>;
|
||||
|
||||
// Docker 26+ network rescan: discover interfaces that Docker configured
|
||||
// between the Create and Start RPCs.
|
||||
async fn rescan_network(&self) -> Result<()>;
|
||||
|
||||
// metrics function
|
||||
async fn agent_metrics(&self) -> Result<String>;
|
||||
async fn hypervisor_metrics(&self) -> Result<String>;
|
||||
|
||||
@@ -655,6 +655,20 @@ impl RuntimeHandlerManager {
|
||||
Ok(TaskResponse::WaitProcess(exit_status))
|
||||
}
|
||||
TaskRequest::StartProcess(process_id) => {
|
||||
// Docker 26+ configures the veth between the Create and Start
|
||||
// RPCs. Rescan now so interfaces are wired before the process
|
||||
// starts. The rescan uses a lightweight netlink probe during
|
||||
// polling and only does the expensive endpoint setup once
|
||||
// interfaces are detected.
|
||||
if process_id.process_type == ProcessType::Container {
|
||||
if let Err(e) = sandbox.rescan_network().await {
|
||||
error!(
|
||||
sl!(),
|
||||
"network rescan failed; container may lack networking: {:?}", e
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let shim_pid = cm
|
||||
.start_process(&process_id)
|
||||
.await
|
||||
|
||||
@@ -563,6 +563,46 @@ impl VirtSandbox {
|
||||
) -> bool {
|
||||
!prestart_hooks.is_empty() || !create_runtime_hooks.is_empty()
|
||||
}
|
||||
|
||||
/// Build a network rescan config targeting the hypervisor's network
|
||||
/// namespace. Docker 26+ bind-mounts `/proc/<vmm_pid>/ns/net` and
|
||||
/// configures veth pairs there between Create and Start, so the
|
||||
/// hypervisor netns is where the interfaces will appear — regardless
|
||||
/// of whether we earlier created a placeholder netns (network_created)
|
||||
/// or not. This mirrors the Go shim's `detectHypervisorNetns` logic
|
||||
/// inside `addAllEndpoints` (commit f7878cc).
|
||||
async fn netns_rescan_config(&self) -> Option<NetworkWithNetNsConfig> {
|
||||
let toml = self.resource_manager.config().await;
|
||||
if toml.runtime.disable_new_netns {
|
||||
return None;
|
||||
}
|
||||
if dan_config_path(&toml, &self.sid).exists() {
|
||||
return None;
|
||||
}
|
||||
self.sandbox_config.as_ref()?;
|
||||
|
||||
let vmm_pid = match self.hypervisor.get_vmm_master_tid().await {
|
||||
Ok(pid) => pid,
|
||||
Err(e) => {
|
||||
warn!(sl!(), "netns_rescan_config: cannot get VMM PID: {:?}", e);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let netns_path = format!("/proc/{}/ns/net", vmm_pid);
|
||||
|
||||
let queues = self
|
||||
.hypervisor
|
||||
.hypervisor_config()
|
||||
.await
|
||||
.network_info
|
||||
.network_queues as usize;
|
||||
Some(NetworkWithNetNsConfig {
|
||||
network_model: toml.runtime.internetworking_model.clone(),
|
||||
netns_path,
|
||||
queues,
|
||||
network_created: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -748,6 +788,7 @@ impl Sandbox for VirtSandbox {
|
||||
});
|
||||
self.monitor.start(id, self.agent.clone());
|
||||
self.save().await.context("save state")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -877,6 +918,20 @@ impl Sandbox for VirtSandbox {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn rescan_network(&self) -> Result<()> {
|
||||
if let Some(net_cfg) = self.netns_rescan_config().await {
|
||||
info!(
|
||||
sl!(),
|
||||
"rescan_network: scanning netns={}", net_cfg.netns_path
|
||||
);
|
||||
self.resource_manager
|
||||
.rescan_network_if_unconfigured(net_cfg)
|
||||
.await
|
||||
.context("network rescan during start")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn wait_process(
|
||||
&self,
|
||||
cm: Arc<dyn ContainerManager>,
|
||||
|
||||
Reference in New Issue
Block a user