From 4dc288401e6b739c2b52222080144a07cf98c653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Sun, 7 Jun 2026 13:38:13 +0200 Subject: [PATCH] runtime-rs: make sandbox cgroup runtime attach idempotent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dragonball nerdctl CI job can race when creating and attaching the runtime process to the sandbox cgroup, surfacing an os error 17 (AlreadyExists) during shim task creation. Let's retry add_proc once on this pre-existing cgroup condition so startup remains robust. Signed-off-by: Fabiano FidĂȘncio Assisted-by: Codex --- .../resource/src/cgroups/resource_inner.rs | 47 ++++++++++++++++--- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/src/runtime-rs/crates/resource/src/cgroups/resource_inner.rs b/src/runtime-rs/crates/resource/src/cgroups/resource_inner.rs index b6088bf8d0..bd56240b39 100644 --- a/src/runtime-rs/crates/resource/src/cgroups/resource_inner.rs +++ b/src/runtime-rs/crates/resource/src/cgroups/resource_inner.rs @@ -5,6 +5,7 @@ // use std::collections::{HashMap, HashSet}; +use std::error::Error as _; use std::process; use std::str::FromStr; use std::time::Duration; @@ -41,6 +42,36 @@ pub(crate) struct CgroupsResourceInner { } impl CgroupsResourceInner { + fn is_already_exists_error(err: &cgroups_rs::manager::Error) -> bool { + let mut source = err.source(); + + while let Some(inner_err) = source { + if let Some(io_err) = inner_err.downcast_ref::() { + if io_err.kind() == std::io::ErrorKind::AlreadyExists { + return true; + } + } + + source = inner_err.source(); + } + + false + } + + fn add_proc_with_existing_retry( + cgroup: &mut CgroupManager, + pid: CgroupPid, + context: &str, + ) -> Result<()> { + match cgroup.add_proc(pid) { + Ok(_) => Ok(()), + Err(err) if Self::is_already_exists_error(&err) => cgroup + .add_proc(pid) + .with_context(|| format!("{context} (retry after pre-existing cgroup)")), + Err(err) => Err(err).context(context.to_string()), + } + } + /// Create cgroup managers according to the cgroup configuration. /// /// # Returns @@ -90,13 +121,17 @@ impl CgroupsResourceInner { // The runtime is prioritized to be added to the overhead cgroup. let pid = CgroupPid::from(process::id() as u64); if let Some(overhead_cgroup) = overhead_cgroup.as_mut() { - overhead_cgroup - .add_proc(pid) - .context("add runtime to overhead cgroup")?; + Self::add_proc_with_existing_retry( + overhead_cgroup, + pid, + "add runtime to overhead cgroup", + )?; } else { - sandbox_cgroup - .add_proc(pid) - .context("add runtime to sandbox cgroup")?; + Self::add_proc_with_existing_retry( + &mut sandbox_cgroup, + pid, + "add runtime to sandbox cgroup", + )?; } Ok(Self {