Compare commits

..

11 Commits

Author SHA1 Message Date
Fabiano Fidêncio
6031a1219b runtime-rs: fix Docker 26+ networking by rescanning after Start
Docker 26+ configures networking after the Start response rather than
through prestart hooks, which means the network namespace may not have
any interfaces when the sandbox is first created. This is the runtime-rs
counterpart of the Go runtime fix in PR #12754.

Three changes are made:

1. Discover Docker's pre-created network namespace from OCI hook args
   (libnetwork-setkey) during sandbox creation, avoiding a placeholder
   netns when the real one is already available.

2. Add an async rescan_network method to VirtSandbox that polls the
   network namespace for up to 5 seconds (50ms interval) looking for
   late-appearing interfaces, then pushes them to the guest agent.

3. Spawn the async rescan after StartProcess for sandbox containers,
   matching the timing of the Go runtime's RescanNetwork goroutine.

Fixes: #9340

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Made-with: Cursor
2026-04-05 14:48:23 +02:00
Fabiano Fidêncio
f074ceec6d Merge pull request #12682 from PiotrProkop/fix-direct-io-kata
runtime-rs: fix setting directio via config file
2026-04-03 16:11:57 +02:00
Fabiano Fidêncio
945aa5b43f Merge pull request #12774 from zvonkok/bump-nvrc
nvrc: Bump to the latest Release
2026-04-03 15:39:01 +02:00
Fabiano Fidêncio
ccfdf5e11b Merge pull request #12754 from llink5/fix/docker26-networking-9340
runtime: fix Docker 26+ networking by rescanning after Start
2026-04-03 13:15:38 +02:00
RuoqingHe
26bd5ad754 Merge pull request #12762 from YutingNie/fix-runtime-rs-shared-fs-typo
runtime-rs: Fix typo in share_fs error message
2026-04-03 15:24:33 +08:00
Yuting Nie
517882f93d runtime-rs: Fix typo in share_fs error message
There's a typo in the error message which gets prompted when an
unsupported share_fs was configured. Fixed shred -> shared.

Signed-off-by: Yuting Nie <yuting.nie@spacemit.com>
2026-04-03 05:23:46 +00:00
Alex Lyn
4a1c2b6620 Merge pull request #12309 from kata-containers/stale-issues-by-date
workflows: Create workflow to stale issues based on date
2026-04-03 09:31:34 +08:00
Zvonko Kaiser
3e23ee9998 nvrc: Bump to the latest Release
v0.1.4 has a bugfix for nvrc.log=trace which is now
optional.

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
2026-04-02 17:40:47 -04:00
llink5
f7878cc385 runtime: fix Docker 26+ networking by rescanning after Start
Docker 26+ configures container networking (veth pair, IP addresses,
routes) after task creation rather than before. Kata's endpoint scan
runs during CreateSandbox, before the interfaces exist, resulting in
VMs starting without network connectivity (no -netdev passed to QEMU).

Add RescanNetwork() which runs asynchronously after the Start RPC.
It polls the network namespace until Docker's interfaces appear, then
hotplugs them to QEMU and informs the guest agent to configure them
inside the VM.

Additional fixes:
- mountinfo parser: find fs type dynamically instead of hardcoded
  field index, fixing parsing with optional mount tags (shared:,
  master:)
- IsDockerContainer: check CreateRuntime hooks for Docker 26+
- DockerNetnsPath: extract netns path from libnetwork-setkey hook
  args with path traversal protection
- detectHypervisorNetns: verify PID ownership via /proc/pid/cmdline
  to guard against PID recycling
- startVM guard: rescan when len(endpoints)==0 after VM start

Fixes: #9340

Signed-off-by: llink5 <llink5@users.noreply.github.com>
2026-04-02 21:23:16 +02:00
PiotrProkop
67af63a540 runtime-rs: fix setting directio via config file
This fix applies the config file value as a fallback when block_device_cache_direct annotation is not explicitly set on the pod.

Signed-off-by: PiotrProkop <pprokop@nvidia.com>
2026-04-01 16:59:04 +02:00
stevenhorsman
99eaa8fcb1 workflows: Create workflow to stale issues based on date
The standard stale/action is intended to be run regularly with
a date offset, but we want to have one we can run against a specific
date in order to run the stale bot against issues created since a particular
release milestone, so calculate the offset in one step and use it in the next.

At the moment we want to run this to stale issues before 9th October 2022 when Kata 3.0 was release, so default to this.

Note the stale action only processes a few issues at a time to avoid rate limiting, so why we want a cron job to it can get through
the backlog, but also to stale/unstale issues that are commented on.

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-31 15:57:37 +01:00
14 changed files with 333 additions and 11 deletions

View File

@@ -279,11 +279,12 @@ jobs:
matrix:
vmm:
- qemu
- qemu-runtime-rs
runs-on: ubuntu-22.04
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0

View File

@@ -132,12 +132,14 @@ jobs:
# all due to a single flaky instance.
fail-fast: false
matrix:
vmm: ['qemu']
vmm:
- qemu
- qemu-runtime-rs
runs-on: s390x-large
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0

42
.github/workflows/stale_issues.yaml vendored Normal file
View File

@@ -0,0 +1,42 @@
name: 'Stale issues with activity before a fixed date'
on:
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
inputs:
date:
description: "Date of stale cut-off. All issues not updated since this date will be marked as stale. Format: YYYY-MM-DD e.g. 2022-10-09"
default: "2022-10-09"
required: false
type: string
permissions: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
stale:
name: stale
runs-on: ubuntu-24.04
permissions:
actions: write # Needed to manage caches for state persistence across runs
issues: write # Needed to add/remove labels, post comments, or close issues
steps:
- name: Calculate the age to stale
run: |
echo AGE=$(( ( $(date +%s) - $(date -d "${DATE:-2022-10-09}" +%s) ) / 86400 )) >> "$GITHUB_ENV"
env:
DATE: ${{ inputs.date }}
- name: Run the stale action
uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
with:
stale-pr-message: 'This issue has had no activity since before ${DATE}. Please comment on the issue, or it will be closed in 30 days'
days-before-pr-stale: -1
days-before-pr-close: -1
days-before-issue-stale: ${AGE}
days-before-issue-close: 30
env:
DATE: ${{ inputs.date }}

1
Cargo.lock generated
View File

@@ -5824,6 +5824,7 @@ dependencies = [
"protobuf",
"protocols",
"resource",
"rstest",
"runtime-spec",
"serde_json",
"shim-interface",

View File

@@ -858,7 +858,12 @@ impl QemuInner {
block_device.config.index,
&block_device.config.path_on_host,
&block_device.config.blkdev_aio.to_string(),
block_device.config.is_direct,
Some(
block_device
.config
.is_direct
.unwrap_or(self.config.blockdev_info.block_device_cache_direct),
),
block_device.config.is_readonly,
block_device.config.no_drop,
)

View File

@@ -84,6 +84,16 @@ impl ResourceManager {
inner.handle_network(network_config).await
}
pub async fn has_network_endpoints(&self) -> bool {
let inner = self.inner.read().await;
inner.has_network_endpoints().await
}
pub async fn setup_network_in_guest(&self) -> Result<()> {
let inner = self.inner.read().await;
inner.setup_network_in_guest().await
}
#[instrument]
pub async fn setup_after_start_vm(&self) -> Result<()> {
let mut inner = self.inner.write().await;

View File

@@ -296,6 +296,33 @@ impl ResourceManagerInner {
Ok(())
}
pub async fn has_network_endpoints(&self) -> bool {
if let Some(network) = &self.network {
match network.interfaces().await {
std::result::Result::Ok(interfaces) => !interfaces.is_empty(),
Err(_) => false,
}
} else {
false
}
}
pub async fn setup_network_in_guest(&self) -> Result<()> {
if let Some(network) = self.network.as_ref() {
let network = network.as_ref();
self.handle_interfaces(network)
.await
.context("handle interfaces during network rescan")?;
self.handle_neighbours(network)
.await
.context("handle neighbours during network rescan")?;
self.handle_routes(network)
.await
.context("handle routes during network rescan")?;
}
Ok(())
}
pub async fn setup_after_start_vm(&mut self) -> Result<()> {
self.cgroups_resource
.setup_after_start_vm(self.hypervisor.as_ref())

View File

@@ -165,6 +165,6 @@ pub fn new(id: &str, config: &SharedFsInfo) -> Result<Arc<dyn ShareFs>> {
VIRTIO_FS => Ok(Arc::new(
ShareVirtioFsStandalone::new(id, config).context("new standalone virtio fs")?,
)),
_ => Err(anyhow!("unsupported shred fs {:?}", &shared_fs)),
_ => Err(anyhow!("unsupported shared fs {:?}", &shared_fs)),
}
}

View File

@@ -53,6 +53,9 @@ linux_container = { workspace = true, optional = true }
virt_container = { workspace = true, optional = true }
wasm_container = { workspace = true, optional = true }
[dev-dependencies]
rstest = { workspace = true }
[features]
default = ["virt"]
linux = ["linux_container"]

View File

@@ -51,6 +51,13 @@ pub trait Sandbox: Send + Sync {
shim_pid: u32,
) -> Result<()>;
/// Re-scan the network namespace for late-discovered endpoints.
/// This handles runtimes like Docker 26+ that configure networking
/// after the Start response. The default implementation is a no-op.
async fn rescan_network(&self) -> Result<()> {
Ok(())
}
// metrics function
async fn agent_metrics(&self) -> Result<String>;
async fn hypervisor_metrics(&self) -> Result<String>;

View File

@@ -69,6 +69,53 @@ use crate::{
tracer::{KataTracer, ROOTSPAN},
};
const DOCKER_LIBNETWORK_SETKEY: &str = "libnetwork-setkey";
const DOCKER_NETNS_PREFIXES: &[&str] = &["/var/run/docker/netns/", "/run/docker/netns/"];
fn is_valid_docker_sandbox_id(id: &str) -> bool {
id.len() == 64 && id.bytes().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f'))
}
/// Discover Docker's pre-created network namespace path from OCI spec hooks.
///
/// Docker's libnetwork-setkey hook contains the sandbox ID as its
/// argument following "libnetwork-setkey", which maps to a netns file
/// under /var/run/docker/netns/<sandbox_id> or /run/docker/netns/<sandbox_id>.
fn docker_netns_path(spec: &oci::Spec) -> Option<String> {
let hooks = spec.hooks().as_ref()?;
let hook_sets: [&[oci::Hook]; 2] = [
hooks.prestart().as_deref().unwrap_or_default(),
hooks.create_runtime().as_deref().unwrap_or_default(),
];
for hooks in &hook_sets {
for hook in *hooks {
if let Some(args) = hook.args() {
for (i, arg) in args.iter().enumerate() {
if arg == DOCKER_LIBNETWORK_SETKEY && i + 1 < args.len() {
let sandbox_id = &args[i + 1];
if !is_valid_docker_sandbox_id(sandbox_id) {
continue;
}
for prefix in DOCKER_NETNS_PREFIXES {
let ns_path = format!("{}{}", prefix, sandbox_id);
if let Ok(metadata) = std::fs::symlink_metadata(&ns_path) {
if metadata.is_file() {
return Some(ns_path);
}
}
}
}
}
}
}
}
None
}
fn convert_string_to_slog_level(string_level: &str) -> slog::Level {
match string_level {
"trace" => slog::Level::Trace,
@@ -377,8 +424,17 @@ impl RuntimeHandlerManager {
if ns.path().is_some() {
netns = ns.path().clone().map(|p| p.display().to_string());
}
// if we get empty netns from oci spec, we need to create netns for the VM
else {
// Docker 26+ may configure networking outside of the OCI
// spec namespace path. Try to discover the netns from hook
// args before falling back to creating a placeholder.
else if let Some(docker_ns) = docker_netns_path(spec) {
info!(
sl!(),
"discovered Docker network namespace from hook args";
"netns" => &docker_ns
);
netns = Some(docker_ns);
} else {
let ns_name = generate_netns_name();
let raw_netns = NetNs::new(ns_name)?;
let path = Some(PathBuf::from(raw_netns.path()).display().to_string());
@@ -639,6 +695,7 @@ impl RuntimeHandlerManager {
Ok(TaskResponse::WaitProcess(exit_status))
}
TaskRequest::StartProcess(process_id) => {
let is_sandbox_container = cm.is_sandbox_container(&process_id).await;
let shim_pid = cm
.start_process(&process_id)
.await
@@ -647,6 +704,25 @@ impl RuntimeHandlerManager {
let pid = shim_pid.pid;
let process_type = process_id.process_type;
let container_id = process_id.container_id().to_string();
// Schedule an async network rescan for sandbox containers.
// This handles runtimes that configure networking after the
// Start response (e.g. Docker 26+). rescan_network is
// idempotent — it returns immediately if endpoints already
// exist.
if is_sandbox_container {
let sandbox_rescan = sandbox.clone();
tokio::spawn(async move {
if let Err(e) = sandbox_rescan.rescan_network().await {
error!(
sl!(),
"async network rescan failed — container may lack networking: {:?}",
e
);
}
});
}
tokio::spawn(async move {
let result = sandbox.wait_process(cm, process_id, pid).await;
if let Err(e) = result {
@@ -920,3 +996,85 @@ fn configure_non_root_hypervisor(config: &mut Hypervisor) -> Result<()> {
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use oci_spec::runtime::{HookBuilder, HooksBuilder, SpecBuilder};
use rstest::rstest;
const VALID_SANDBOX_ID: &str =
"a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2";
#[rstest]
#[case::all_lowercase_hex(VALID_SANDBOX_ID, true)]
#[case::all_zeros("0000000000000000000000000000000000000000000000000000000000000000", true)]
#[case::uppercase_hex("A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2", false)]
#[case::too_short("a1b2c3d4", false)]
#[case::non_hex("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", false)]
#[case::path_traversal("../../../etc/passwd", false)]
#[case::empty("", false)]
fn test_is_valid_docker_sandbox_id(#[case] id: &str, #[case] expected: bool) {
assert_eq!(is_valid_docker_sandbox_id(id), expected);
}
fn make_hook_with_args(args: Vec<&str>) -> oci::Hook {
HookBuilder::default()
.path("/usr/bin/test")
.args(args.into_iter().map(String::from).collect::<Vec<_>>())
.build()
.unwrap()
}
#[rstest]
#[case::no_hooks(None, None)]
#[case::unrelated_hooks(
Some(HooksBuilder::default()
.prestart(vec![make_hook_with_args(vec!["some-hook", "arg1"])])
.build().unwrap()),
None
)]
#[case::invalid_sandbox_id(
Some(HooksBuilder::default()
.prestart(vec![make_hook_with_args(vec![
"/usr/bin/dockerd", "libnetwork-setkey", "not-a-valid-id",
])])
.build().unwrap()),
None
)]
#[case::setkey_at_end_of_args(
Some(HooksBuilder::default()
.prestart(vec![make_hook_with_args(vec![
"/usr/bin/dockerd", "libnetwork-setkey",
])])
.build().unwrap()),
None
)]
#[case::valid_prestart_but_no_file(
Some(HooksBuilder::default()
.prestart(vec![make_hook_with_args(vec![
"/usr/bin/dockerd", "libnetwork-setkey", VALID_SANDBOX_ID,
])])
.build().unwrap()),
None
)]
#[case::valid_create_runtime_but_no_file(
Some(HooksBuilder::default()
.create_runtime(vec![make_hook_with_args(vec![
"/usr/bin/dockerd", "libnetwork-setkey", VALID_SANDBOX_ID,
])])
.build().unwrap()),
None
)]
fn test_docker_netns_path(
#[case] hooks: Option<oci::Hooks>,
#[case] expected: Option<String>,
) {
let mut builder = SpecBuilder::default();
if let Some(h) = hooks {
builder = builder.hooks(h);
}
let spec = builder.build().unwrap();
assert_eq!(docker_netns_path(&spec), expected);
}
}

View File

@@ -58,6 +58,7 @@ use resource::{ResourceConfig, ResourceManager};
use runtime_spec as spec;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use strum::Display;
use tokio::sync::{mpsc::Sender, Mutex, RwLock};
use tracing::instrument;
@@ -973,6 +974,71 @@ impl Sandbox for VirtSandbox {
self.hypervisor.get_hypervisor_metrics().await
}
async fn rescan_network(&self) -> Result<()> {
let config = self.resource_manager.config().await;
if config.runtime.disable_new_netns {
return Ok(());
}
if dan_config_path(&config, &self.sid).exists() {
return Ok(());
}
if self.resource_manager.has_network_endpoints().await {
return Ok(());
}
let sandbox_config = match &self.sandbox_config {
Some(c) => c,
None => return Ok(()),
};
let netns_path = match &sandbox_config.network_env.netns {
Some(p) => p.clone(),
None => return Ok(()),
};
const MAX_WAIT: Duration = Duration::from_secs(5);
const POLL_INTERVAL: Duration = Duration::from_millis(50);
let deadline = tokio::time::Instant::now() + MAX_WAIT;
info!(sl!(), "waiting for network interfaces in namespace");
loop {
let network_config = NetworkConfig::NetNs(NetworkWithNetNsConfig {
network_model: config.runtime.internetworking_model.clone(),
netns_path: netns_path.clone(),
queues: self
.hypervisor
.hypervisor_config()
.await
.network_info
.network_queues as usize,
network_created: sandbox_config.network_env.network_created,
});
if let Err(e) = self.resource_manager.handle_network(network_config).await {
warn!(sl!(), "network rescan attempt failed: {:?}", e);
}
if self.resource_manager.has_network_endpoints().await {
info!(sl!(), "network interfaces discovered during rescan");
return self
.resource_manager
.setup_network_in_guest()
.await
.context("setup network in guest after rescan");
}
if tokio::time::Instant::now() >= deadline {
warn!(
sl!(),
"no network interfaces found after timeout — networking may be configured later"
);
return Ok(());
}
tokio::time::sleep(POLL_INTERVAL).await;
}
}
async fn set_policy(&self, policy: &str) -> Result<()> {
if policy.is_empty() {
debug!(sl!(), "sb: set_policy skipped without policy");

View File

@@ -635,9 +635,9 @@ func TestDockerNetnsPath(t *testing.T) {
assert := assert.New(t)
// Valid 64-char hex sandbox IDs for test cases.
validID := strings.Repeat("ab", 32) // 64 hex chars
validID2 := strings.Repeat("cd", 32) // another 64 hex chars
invalidShortID := "abc123" // too short
validID := strings.Repeat("ab", 32) // 64 hex chars
validID2 := strings.Repeat("cd", 32) // another 64 hex chars
invalidShortID := "abc123" // too short
invalidUpperID := strings.Repeat("AB", 32) // uppercase rejected
// nil spec

View File

@@ -234,7 +234,7 @@ externals:
nvrc:
# yamllint disable-line rule:line-length
desc: "The NVRC project provides a Rust binary that implements a simple init system for microVMs"
version: "v0.1.3"
version: "v0.1.4"
url: "https://github.com/NVIDIA/nvrc/releases/download/"
nvidia: