Compare commits

...

5 Commits

Author SHA1 Message Date
Fabiano Fidêncio
6031a1219b runtime-rs: fix Docker 26+ networking by rescanning after Start
Docker 26+ configures networking after the Start response rather than
through prestart hooks, which means the network namespace may not have
any interfaces when the sandbox is first created. This is the runtime-rs
counterpart of the Go runtime fix in PR #12754.

Three changes are made:

1. Discover Docker's pre-created network namespace from OCI hook args
   (libnetwork-setkey) during sandbox creation, avoiding a placeholder
   netns when the real one is already available.

2. Add an async rescan_network method to VirtSandbox that polls the
   network namespace for up to 5 seconds (50ms interval) looking for
   late-appearing interfaces, then pushes them to the guest agent.

3. Spawn the async rescan after StartProcess for sandbox containers,
   matching the timing of the Go runtime's RescanNetwork goroutine.

Fixes: #9340

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Made-with: Cursor
2026-04-05 14:48:23 +02:00
Fabiano Fidêncio
f074ceec6d Merge pull request #12682 from PiotrProkop/fix-direct-io-kata
runtime-rs: fix setting directio via config file
2026-04-03 16:11:57 +02:00
Fabiano Fidêncio
945aa5b43f Merge pull request #12774 from zvonkok/bump-nvrc
nvrc: Bump to the latest Release
2026-04-03 15:39:01 +02:00
Zvonko Kaiser
3e23ee9998 nvrc: Bump to the latest Release
v0.1.4 has a bugfix for nvrc.log=trace which is now
optional.

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
2026-04-02 17:40:47 -04:00
PiotrProkop
67af63a540 runtime-rs: fix setting directio via config file
This fix applies the config file value as a fallback when block_device_cache_direct annotation is not explicitly set on the pod.

Signed-off-by: PiotrProkop <pprokop@nvidia.com>
2026-04-01 16:59:04 +02:00
11 changed files with 285 additions and 5 deletions

View File

@@ -279,6 +279,7 @@ jobs:
matrix:
vmm:
- qemu
- qemu-runtime-rs
runs-on: ubuntu-22.04
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}

View File

@@ -132,7 +132,9 @@ jobs:
# all due to a single flaky instance.
fail-fast: false
matrix:
vmm: ['qemu']
vmm:
- qemu
- qemu-runtime-rs
runs-on: s390x-large
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}

1
Cargo.lock generated
View File

@@ -5824,6 +5824,7 @@ dependencies = [
"protobuf",
"protocols",
"resource",
"rstest",
"runtime-spec",
"serde_json",
"shim-interface",

View File

@@ -858,7 +858,12 @@ impl QemuInner {
block_device.config.index,
&block_device.config.path_on_host,
&block_device.config.blkdev_aio.to_string(),
block_device.config.is_direct,
Some(
block_device
.config
.is_direct
.unwrap_or(self.config.blockdev_info.block_device_cache_direct),
),
block_device.config.is_readonly,
block_device.config.no_drop,
)

View File

@@ -84,6 +84,16 @@ impl ResourceManager {
inner.handle_network(network_config).await
}
pub async fn has_network_endpoints(&self) -> bool {
let inner = self.inner.read().await;
inner.has_network_endpoints().await
}
pub async fn setup_network_in_guest(&self) -> Result<()> {
let inner = self.inner.read().await;
inner.setup_network_in_guest().await
}
#[instrument]
pub async fn setup_after_start_vm(&self) -> Result<()> {
let mut inner = self.inner.write().await;

View File

@@ -296,6 +296,33 @@ impl ResourceManagerInner {
Ok(())
}
pub async fn has_network_endpoints(&self) -> bool {
if let Some(network) = &self.network {
match network.interfaces().await {
std::result::Result::Ok(interfaces) => !interfaces.is_empty(),
Err(_) => false,
}
} else {
false
}
}
pub async fn setup_network_in_guest(&self) -> Result<()> {
if let Some(network) = self.network.as_ref() {
let network = network.as_ref();
self.handle_interfaces(network)
.await
.context("handle interfaces during network rescan")?;
self.handle_neighbours(network)
.await
.context("handle neighbours during network rescan")?;
self.handle_routes(network)
.await
.context("handle routes during network rescan")?;
}
Ok(())
}
pub async fn setup_after_start_vm(&mut self) -> Result<()> {
self.cgroups_resource
.setup_after_start_vm(self.hypervisor.as_ref())

View File

@@ -53,6 +53,9 @@ linux_container = { workspace = true, optional = true }
virt_container = { workspace = true, optional = true }
wasm_container = { workspace = true, optional = true }
[dev-dependencies]
rstest = { workspace = true }
[features]
default = ["virt"]
linux = ["linux_container"]

View File

@@ -51,6 +51,13 @@ pub trait Sandbox: Send + Sync {
shim_pid: u32,
) -> Result<()>;
/// Re-scan the network namespace for late-discovered endpoints.
/// This handles runtimes like Docker 26+ that configure networking
/// after the Start response. The default implementation is a no-op.
async fn rescan_network(&self) -> Result<()> {
Ok(())
}
// metrics function
async fn agent_metrics(&self) -> Result<String>;
async fn hypervisor_metrics(&self) -> Result<String>;

View File

@@ -69,6 +69,53 @@ use crate::{
tracer::{KataTracer, ROOTSPAN},
};
const DOCKER_LIBNETWORK_SETKEY: &str = "libnetwork-setkey";
const DOCKER_NETNS_PREFIXES: &[&str] = &["/var/run/docker/netns/", "/run/docker/netns/"];
fn is_valid_docker_sandbox_id(id: &str) -> bool {
id.len() == 64 && id.bytes().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f'))
}
/// Discover Docker's pre-created network namespace path from OCI spec hooks.
///
/// Docker's libnetwork-setkey hook contains the sandbox ID as its
/// argument following "libnetwork-setkey", which maps to a netns file
/// under /var/run/docker/netns/<sandbox_id> or /run/docker/netns/<sandbox_id>.
fn docker_netns_path(spec: &oci::Spec) -> Option<String> {
let hooks = spec.hooks().as_ref()?;
let hook_sets: [&[oci::Hook]; 2] = [
hooks.prestart().as_deref().unwrap_or_default(),
hooks.create_runtime().as_deref().unwrap_or_default(),
];
for hooks in &hook_sets {
for hook in *hooks {
if let Some(args) = hook.args() {
for (i, arg) in args.iter().enumerate() {
if arg == DOCKER_LIBNETWORK_SETKEY && i + 1 < args.len() {
let sandbox_id = &args[i + 1];
if !is_valid_docker_sandbox_id(sandbox_id) {
continue;
}
for prefix in DOCKER_NETNS_PREFIXES {
let ns_path = format!("{}{}", prefix, sandbox_id);
if let Ok(metadata) = std::fs::symlink_metadata(&ns_path) {
if metadata.is_file() {
return Some(ns_path);
}
}
}
}
}
}
}
}
None
}
fn convert_string_to_slog_level(string_level: &str) -> slog::Level {
match string_level {
"trace" => slog::Level::Trace,
@@ -377,8 +424,17 @@ impl RuntimeHandlerManager {
if ns.path().is_some() {
netns = ns.path().clone().map(|p| p.display().to_string());
}
// if we get empty netns from oci spec, we need to create netns for the VM
else {
// Docker 26+ may configure networking outside of the OCI
// spec namespace path. Try to discover the netns from hook
// args before falling back to creating a placeholder.
else if let Some(docker_ns) = docker_netns_path(spec) {
info!(
sl!(),
"discovered Docker network namespace from hook args";
"netns" => &docker_ns
);
netns = Some(docker_ns);
} else {
let ns_name = generate_netns_name();
let raw_netns = NetNs::new(ns_name)?;
let path = Some(PathBuf::from(raw_netns.path()).display().to_string());
@@ -639,6 +695,7 @@ impl RuntimeHandlerManager {
Ok(TaskResponse::WaitProcess(exit_status))
}
TaskRequest::StartProcess(process_id) => {
let is_sandbox_container = cm.is_sandbox_container(&process_id).await;
let shim_pid = cm
.start_process(&process_id)
.await
@@ -647,6 +704,25 @@ impl RuntimeHandlerManager {
let pid = shim_pid.pid;
let process_type = process_id.process_type;
let container_id = process_id.container_id().to_string();
// Schedule an async network rescan for sandbox containers.
// This handles runtimes that configure networking after the
// Start response (e.g. Docker 26+). rescan_network is
// idempotent — it returns immediately if endpoints already
// exist.
if is_sandbox_container {
let sandbox_rescan = sandbox.clone();
tokio::spawn(async move {
if let Err(e) = sandbox_rescan.rescan_network().await {
error!(
sl!(),
"async network rescan failed — container may lack networking: {:?}",
e
);
}
});
}
tokio::spawn(async move {
let result = sandbox.wait_process(cm, process_id, pid).await;
if let Err(e) = result {
@@ -920,3 +996,85 @@ fn configure_non_root_hypervisor(config: &mut Hypervisor) -> Result<()> {
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use oci_spec::runtime::{HookBuilder, HooksBuilder, SpecBuilder};
use rstest::rstest;
const VALID_SANDBOX_ID: &str =
"a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2";
#[rstest]
#[case::all_lowercase_hex(VALID_SANDBOX_ID, true)]
#[case::all_zeros("0000000000000000000000000000000000000000000000000000000000000000", true)]
#[case::uppercase_hex("A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2", false)]
#[case::too_short("a1b2c3d4", false)]
#[case::non_hex("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", false)]
#[case::path_traversal("../../../etc/passwd", false)]
#[case::empty("", false)]
fn test_is_valid_docker_sandbox_id(#[case] id: &str, #[case] expected: bool) {
assert_eq!(is_valid_docker_sandbox_id(id), expected);
}
fn make_hook_with_args(args: Vec<&str>) -> oci::Hook {
HookBuilder::default()
.path("/usr/bin/test")
.args(args.into_iter().map(String::from).collect::<Vec<_>>())
.build()
.unwrap()
}
#[rstest]
#[case::no_hooks(None, None)]
#[case::unrelated_hooks(
Some(HooksBuilder::default()
.prestart(vec![make_hook_with_args(vec!["some-hook", "arg1"])])
.build().unwrap()),
None
)]
#[case::invalid_sandbox_id(
Some(HooksBuilder::default()
.prestart(vec![make_hook_with_args(vec![
"/usr/bin/dockerd", "libnetwork-setkey", "not-a-valid-id",
])])
.build().unwrap()),
None
)]
#[case::setkey_at_end_of_args(
Some(HooksBuilder::default()
.prestart(vec![make_hook_with_args(vec![
"/usr/bin/dockerd", "libnetwork-setkey",
])])
.build().unwrap()),
None
)]
#[case::valid_prestart_but_no_file(
Some(HooksBuilder::default()
.prestart(vec![make_hook_with_args(vec![
"/usr/bin/dockerd", "libnetwork-setkey", VALID_SANDBOX_ID,
])])
.build().unwrap()),
None
)]
#[case::valid_create_runtime_but_no_file(
Some(HooksBuilder::default()
.create_runtime(vec![make_hook_with_args(vec![
"/usr/bin/dockerd", "libnetwork-setkey", VALID_SANDBOX_ID,
])])
.build().unwrap()),
None
)]
fn test_docker_netns_path(
#[case] hooks: Option<oci::Hooks>,
#[case] expected: Option<String>,
) {
let mut builder = SpecBuilder::default();
if let Some(h) = hooks {
builder = builder.hooks(h);
}
let spec = builder.build().unwrap();
assert_eq!(docker_netns_path(&spec), expected);
}
}

View File

@@ -58,6 +58,7 @@ use resource::{ResourceConfig, ResourceManager};
use runtime_spec as spec;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use strum::Display;
use tokio::sync::{mpsc::Sender, Mutex, RwLock};
use tracing::instrument;
@@ -973,6 +974,71 @@ impl Sandbox for VirtSandbox {
self.hypervisor.get_hypervisor_metrics().await
}
async fn rescan_network(&self) -> Result<()> {
let config = self.resource_manager.config().await;
if config.runtime.disable_new_netns {
return Ok(());
}
if dan_config_path(&config, &self.sid).exists() {
return Ok(());
}
if self.resource_manager.has_network_endpoints().await {
return Ok(());
}
let sandbox_config = match &self.sandbox_config {
Some(c) => c,
None => return Ok(()),
};
let netns_path = match &sandbox_config.network_env.netns {
Some(p) => p.clone(),
None => return Ok(()),
};
const MAX_WAIT: Duration = Duration::from_secs(5);
const POLL_INTERVAL: Duration = Duration::from_millis(50);
let deadline = tokio::time::Instant::now() + MAX_WAIT;
info!(sl!(), "waiting for network interfaces in namespace");
loop {
let network_config = NetworkConfig::NetNs(NetworkWithNetNsConfig {
network_model: config.runtime.internetworking_model.clone(),
netns_path: netns_path.clone(),
queues: self
.hypervisor
.hypervisor_config()
.await
.network_info
.network_queues as usize,
network_created: sandbox_config.network_env.network_created,
});
if let Err(e) = self.resource_manager.handle_network(network_config).await {
warn!(sl!(), "network rescan attempt failed: {:?}", e);
}
if self.resource_manager.has_network_endpoints().await {
info!(sl!(), "network interfaces discovered during rescan");
return self
.resource_manager
.setup_network_in_guest()
.await
.context("setup network in guest after rescan");
}
if tokio::time::Instant::now() >= deadline {
warn!(
sl!(),
"no network interfaces found after timeout — networking may be configured later"
);
return Ok(());
}
tokio::time::sleep(POLL_INTERVAL).await;
}
}
async fn set_policy(&self, policy: &str) -> Result<()> {
if policy.is_empty() {
debug!(sl!(), "sb: set_policy skipped without policy");

View File

@@ -234,7 +234,7 @@ externals:
nvrc:
# yamllint disable-line rule:line-length
desc: "The NVRC project provides a Rust binary that implements a simple init system for microVMs"
version: "v0.1.3"
version: "v0.1.4"
url: "https://github.com/NVIDIA/nvrc/releases/download/"
nvidia: