From 4e3d257dc02848a9cae695816dc35bee5e6763af Mon Sep 17 00:00:00 2001 From: Chris Ayoub Date: Tue, 23 Jun 2026 21:21:11 +0000 Subject: [PATCH] agent: Apply init subcgroup in cgroupfs manager When cgroup v2 is enabled, exec can fail with EBUSY while writing the process to cgroup.procs if the container process has been delegated to an init subcgroup. PR #10845 fixed this behavior for the systemd/D-Bus cgroup manager path, which was related to #10733. The cgroupfs manager still writes the process directly to the container cgroup, so apply the same init subcgroup handling there. Also fix the cgroupfs init-subcgroup existence check for absolute OCI cgroup paths by joining the trimmed cgroup path under the cgroup root. Fixes: #9701 Signed-off-by: Chris Ayoub Generated-By: OpenAI Codex --- src/agent/rustjail/src/cgroups/fs/mod.rs | 86 ++++++++++++++++++++---- 1 file changed, 73 insertions(+), 13 deletions(-) diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index 1549d49e39..0ddc8294e1 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -43,6 +43,8 @@ use std::sync::{Arc, RwLock}; use super::DevicesCgroupInfo; const GUEST_CPUS_PATH: &str = "/sys/devices/system/cpu/online"; +const ROOT_SUBCGROUP: &str = "/"; +const INIT_SUBCGROUP: &str = "/init/"; // Convenience function to obtain the scope logger. fn sl() -> slog::Logger { @@ -83,7 +85,17 @@ macro_rules! set_resource { impl CgroupManager for Manager { fn apply(&self, pid: pid_t) -> Result<()> { - self.cgroup.add_task_by_tgid(CgroupPid::from(pid as u64))?; + let cgroup_pid = CgroupPid::from(pid as u64); + if cgroup_has_init_subcgroup(&self.cpath) { + let cpath = self.cgroup_path_with_subcgroup(INIT_SUBCGROUP); + load_cgroup(cgroups::hierarchies::auto(), &cpath) + .add_task_by_tgid(cgroup_pid) + .with_context(|| format!("add task {} to cgroup {}", pid, cpath))?; + } else { + self.cgroup + .add_task_by_tgid(cgroup_pid) + .with_context(|| format!("add task {} to cgroup {}", pid, self.cpath))?; + } Ok(()) } @@ -1011,6 +1023,17 @@ pub fn get_paths() -> Result> { Ok(m) } +fn cgroup_path_under_root(root: impl AsRef, cpath: &str) -> PathBuf { + root.as_ref().join(cpath.trim_start_matches('/')) +} + +fn cgroup_has_init_subcgroup(cpath: &str) -> bool { + cgroups::hierarchies::is_cgroup2_unified_mode() + && cgroup_path_under_root(cgroups::hierarchies::auto().root(), cpath) + .join("init") + .exists() +} + pub fn get_mounts(paths: &HashMap) -> Result> { let mut m = HashMap::new(); @@ -1168,19 +1191,23 @@ impl Manager { } pub fn subcgroup(&self) -> &str { - // Check if we're in a Docker-in-Docker setup by verifying: + // Check if we're running with cgroup v2 delegation by verifying: // 1. We're using cgroups v2 (which restricts direct process control) - // 2. An "init" subdirectory exists (used by DinD for process delegation) - let is_dind = cgroups::hierarchies::is_cgroup2_unified_mode() - && cgroups::hierarchies::auto() - .root() - .join(&self.cpath) - .join("init") - .exists(); - if is_dind { - "/init/" + // 2. An "init" subdirectory exists for delegated process attachment + // (for example, Docker-in-Docker or systemd in a container) + if cgroup_has_init_subcgroup(&self.cpath) { + INIT_SUBCGROUP } else { - "/" + ROOT_SUBCGROUP + } + } + + fn cgroup_path_with_subcgroup(&self, subcgroup: &str) -> String { + let subcgroup = subcgroup.trim_matches('/'); + if subcgroup.is_empty() { + self.cpath.clone() + } else { + Path::new(&self.cpath).join(subcgroup).display().to_string() } } @@ -1386,6 +1413,7 @@ fn convert_memory_swap_to_v2_value(memory_swap: i64, memory: i64) -> Result #[cfg(test)] mod tests { use std::collections::HashMap; + use std::path::PathBuf; use std::process::Command; use std::sync::{Arc, RwLock}; use std::time::{SystemTime, UNIX_EPOCH}; @@ -1398,13 +1426,45 @@ mod tests { use oci_spec::runtime as oci; use test_utils::skip_if_not_root; - use super::default_allowed_devices; + use super::{cgroup_path_under_root, default_allowed_devices, load_cgroup}; use crate::cgroups::fs::{ line_to_vec, lines_to_map, Manager, DEFAULT_ALLOWED_DEVICES, WILDCARD, }; use crate::cgroups::DevicesCgroupInfo; use crate::container::DEFAULT_DEVICES; + #[test] + fn test_cgroup_path_under_root_trims_absolute_cpath() { + assert_eq!( + cgroup_path_under_root( + "/sys/fs/cgroup", + "/docker.slice/docker-containers.slice/container" + ), + PathBuf::from("/sys/fs/cgroup/docker.slice/docker-containers.slice/container") + ); + } + + #[test] + fn test_cgroup_path_with_subcgroup_preserves_absolute_cpath() { + let manager = Manager { + paths: HashMap::new(), + mounts: HashMap::new(), + cpath: "/docker.slice/docker-containers.slice/container".to_string(), + cgroup: load_cgroup(cgroups::hierarchies::auto(), "/"), + pod_cgroup: None, + devcg_allowed_all: false, + }; + + assert_eq!( + manager.cgroup_path_with_subcgroup("/init/"), + "/docker.slice/docker-containers.slice/container/init" + ); + assert_eq!( + manager.cgroup_path_with_subcgroup("/"), + "/docker.slice/docker-containers.slice/container" + ); + } + #[test] fn test_line_to_vec() { let test_cases = vec![