diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index f601f06069..e00650494c 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -18,7 +18,7 @@ use cgroups::{ DeviceResource, HugePageResource, MaxValue, NetworkPriority, }; -use crate::cgroups::{is_all_devices_rule, Manager as CgroupManager}; +use crate::cgroups::{rule_for_all_devices, Manager as CgroupManager}; use crate::container::DEFAULT_DEVICES; use anyhow::{anyhow, Context, Result}; use libc::{self, pid_t}; @@ -66,7 +66,7 @@ pub struct Manager { #[serde(skip)] pod_cgroup: Option, #[serde(skip)] - devcg_whitelist: bool, + devcg_allowed_all: bool, } // set_resource is used to set reources by cgroup controller. @@ -125,7 +125,7 @@ impl CgroupManager for Manager { } // set devices resources - if !self.devcg_whitelist { + if !self.devcg_allowed_all { set_devices_resources(&self.cgroup, &r.devices, res, pod_res); } debug!( @@ -329,7 +329,7 @@ fn set_devices_resources( let mut devices = vec![]; for d in device_resources.iter() { - if is_all_devices_rule(d) { + if rule_for_all_devices(d) { continue; } if let Some(dev) = linux_device_cgroup_to_device_resource(d) { @@ -1024,56 +1024,57 @@ impl Manager { // Cgroup path of parent of container let pod_cpath = PathBuf::from(cpath) .parent() - .context("Get parent of cgroup path")? + .unwrap_or(Path::new("/")) .display() .to_string(); - // Create a cgroup for the pod if not exists. - // Note that creating pod cgroup MUST be done before the pause - // container's cgroup created, since the upper node might have - // some excessive permissions, and children inherit upper - // node's rules. You'll feel painful to shrink upper nodes' - // permissions if the new permissions are subset of old. - pod_cgroup = Some(load_cgroup(cgroups::hierarchies::auto(), &pod_cpath)); - let pod_cg = pod_cgroup.as_ref().unwrap(); - - // The default is blacklist mode. - let is_whitelist_mode = Self::devcg_whitelist_rule(spec).unwrap_or(false); - if devices_group_info.inited { - // Do nothing for whitelist except for updating whitelist - // field of devices_group_info. - debug!(sl(), "Devices cgroup has been initialzied."); - - // Set allowed all devices to pod cgroup - if !devices_group_info.whitelist && is_whitelist_mode { - info!( - sl(), - "Pod devices cgroup is changed to whitelist mode, devices_group_info = {:?}", - devices_group_info - ); - Self::setup_whitelist_mode(pod_cg) - .with_context(|| format!("Setup whitelist mode for {}", pod_cpath))?; - devices_group_info.whitelist = true; - } + if pod_cpath.as_str() == "/" { + // Skip setting pod cgroup for cpath due to no parent path + pod_cgroup = None } else { - // This is the first container (aka pause container) - debug!(sl(), "Started to init devices cgroup"); + // Create a cgroup for the pod if not exists. + // Note that creating pod cgroup MUST be done before the pause + // container's cgroup created, since the upper node might have + // some excessive permissions, and children inherit upper + // node's rules. You'll feel painful to shrink upper nodes' + // permissions if the new permissions are subset of old. + pod_cgroup = Some(load_cgroup(cgroups::hierarchies::auto(), &pod_cpath)); + let pod_cg = pod_cgroup.as_ref().unwrap(); - pod_cg.create().context("Create pod cgroup")?; + let is_allowded_all = Self::has_allowed_all_devices_rule(spec); + if devices_group_info.inited { + debug!(sl(), "Devices cgroup has been initialzied."); - // Setup blacklist for pod cgroup - if !is_whitelist_mode { - // Setup blacklist rule and allowed default devices for - // blacklist. - Self::setup_blacklist_mode(pod_cg) - .with_context(|| format!("Setup blacklist mode for {}", pod_cpath))?; + // Set allowed all devices to pod cgroup + if !devices_group_info.allowed_all && is_allowded_all { + info!( + sl(), + "Pod devices cgroup is changed to allowed all devices mode, devices_group_info = {:?}", + devices_group_info + ); + Self::setup_allowed_all_mode(pod_cg).with_context(|| { + format!("Setup allowed all devices mode for {}", pod_cpath) + })?; + devices_group_info.allowed_all = true; + } } else { - Self::setup_whitelist_mode(pod_cg) - .with_context(|| format!("Setup whitelist mode for {}", pod_cpath))?; - devices_group_info.whitelist = true; - } + // This is the first container (aka pause container) + debug!(sl(), "Started to init devices cgroup"); - devices_group_info.inited = true + pod_cg.create().context("Create pod cgroup")?; + + if !is_allowded_all { + Self::setup_devcg_whitelist(pod_cg).with_context(|| { + format!("Setup device cgroup whitelist for {}", pod_cpath) + })?; + } else { + Self::setup_allowed_all_mode(pod_cg) + .with_context(|| format!("Setup allowed all mode for {}", pod_cpath))?; + devices_group_info.allowed_all = true; + } + + devices_group_info.inited = true + } } } else { pod_cgroup = None; @@ -1085,9 +1086,9 @@ impl Manager { // contains some permissions that the container doesn't need. // Therefore, resetting the container's devices cgroup is required. if let Some(devices_group_info) = devices_group_info.as_ref() { - if !devices_group_info.whitelist { - Self::setup_blacklist_mode(&cg) - .with_context(|| format!("Setup blacklist mode for {}", cpath))?; + if !devices_group_info.allowed_all { + Self::setup_devcg_whitelist(&cg) + .with_context(|| format!("Setup device cgroup whitelist for {}", cpath))?; } } @@ -1098,8 +1099,8 @@ impl Manager { cpath: cpath.to_string(), cgroup: cg, pod_cgroup, - devcg_whitelist: devices_group_info - .map(|info| info.whitelist) + devcg_allowed_all: devices_group_info + .map(|info| info.allowed_all) .unwrap_or(false), }) } @@ -1118,7 +1119,7 @@ impl Manager { cpath: cpath.to_string(), pod_cgroup: None, cgroup: cg, - devcg_whitelist: false, + devcg_allowed_all: false, }) } @@ -1143,7 +1144,7 @@ impl Manager { Ok((m, mounts)) } - fn setup_whitelist_mode(cgroup: &cgroups::Cgroup) -> Result<()> { + fn setup_allowed_all_mode(cgroup: &cgroups::Cgroup) -> Result<()> { // Insert two rules: `b *:* rwm` and `c *:* rwm`. // The reason of not inserting `a *:* rwm` is that the Linux kernel // will deny writing `a` to `devices.allow` once a cgroup has @@ -1183,7 +1184,10 @@ impl Manager { Ok(()) } - fn setup_blacklist_mode(cgroup: &cgroups::Cgroup) -> Result<()> { + /// Setup device cgroup whitelist: + /// - Deny all devices in order to cleanup device cgroup. + /// - Allow default devices and default allowed devices. + fn setup_devcg_whitelist(cgroup: &cgroups::Cgroup) -> Result<()> { #[allow(unused_mut)] let mut dev_res_list = vec![DeviceResource { allow: false, @@ -1212,27 +1216,22 @@ impl Manager { Ok(()) } - /// Check if OCI spec contains a whiltlist rule. - /// - /// # Returns - /// - /// * `Some(true)` - Contains whitelist all rule; - /// * `Some(false)` - Contains blacklist all rule; - /// * `None` - Neither whitelist rule nor blacklist rule found. - fn devcg_whitelist_rule(spec: &Spec) -> Option { + /// Check if OCI spec contains a rule of allowed all devices. + fn has_allowed_all_devices_rule(spec: &Spec) -> bool { let linux = match spec.linux.as_ref() { Some(linux) => linux, - None => return None, + None => return false, }; let resources = match linux.resources.as_ref() { Some(resource) => resource, - None => return None, + None => return false, }; resources .devices .iter() - .find(|dev| is_all_devices_rule(dev)) + .find(|dev| rule_for_all_devices(dev)) .map(|dev| dev.allow) + .unwrap_or_default() } } @@ -1404,7 +1403,7 @@ mod tests { struct TestCase { cpath: Vec, devices: Vec>, - whitelist: Vec, + allowed_all: Vec, pod_devices_list: Vec, container_devices_list: Vec, } @@ -1431,7 +1430,7 @@ mod tests { "/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9", )], devices: vec![vec![allow_all.clone()]], - whitelist: vec![true], + allowed_all: vec![true], pod_devices_list: vec![String::from("a *:* rwm\n")], container_devices_list: vec![String::from("a *:* rwm\n")], }, @@ -1440,7 +1439,7 @@ mod tests { "/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9", )], devices: vec![vec![deny_all.clone()]], - whitelist: vec![false], + allowed_all: vec![false], pod_devices_list: vec![String::new()], container_devices_list: vec![String::new()], }, @@ -1454,7 +1453,7 @@ mod tests { ), ], devices: vec![vec![deny_all.clone()], vec![allow_all.clone()]], - whitelist: vec![false, true], + allowed_all: vec![false, true], pod_devices_list: vec![String::new(), String::from("b *:* rwm\nc *:* rwm\n")], container_devices_list: vec![String::new(), String::from("b *:* rwm\nc *:* rwm\n")], }, @@ -1468,7 +1467,7 @@ mod tests { ), ], devices: vec![vec![allow_all], vec![deny_all]], - whitelist: vec![true, true], + allowed_all: vec![true, true], pod_devices_list: vec![String::from("a *:* rwm\n"), String::from("a *:* rwm\n")], container_devices_list: vec![ String::from("a *:* rwm\n"), @@ -1481,7 +1480,7 @@ mod tests { let sandbox = MockSandbox::new(); let devcg_info = sandbox.devcg_info.read().unwrap(); assert!(!devcg_info.inited); - assert!(!devcg_info.whitelist); + assert!(!devcg_info.allowed_all); drop(devcg_info); let mut managers = Vec::with_capacity(tc.devices.len()); @@ -1503,8 +1502,8 @@ mod tests { let devcg_info = sandbox.devcg_info.read().unwrap(); assert!(devcg_info.inited); assert_eq!( - devcg_info.whitelist, tc.whitelist[cid], - "Round {}, cid {} whitelist assertion failure", + devcg_info.allowed_all, tc.allowed_all[cid], + "Round {}, cid {} allowed all assertion failure", round, cid ); drop(devcg_info); diff --git a/src/agent/rustjail/src/cgroups/mod.rs b/src/agent/rustjail/src/cgroups/mod.rs index ec4727b406..7eb8a73763 100644 --- a/src/agent/rustjail/src/cgroups/mod.rs +++ b/src/agent/rustjail/src/cgroups/mod.rs @@ -22,7 +22,7 @@ pub struct DevicesCgroupInfo { inited: bool, /// Indicate if pod's devices cgroup is in whitelist mode. Returns true /// once one container requires `a *:* rwm` permission. - whitelist: bool, + allowed_all: bool, } pub trait Manager { @@ -71,15 +71,15 @@ impl Debug for dyn Manager + Send + Sync { } } -/// Check if device cgroup is an all rule from OCI spec. +/// Check if device cgroup is a rule for all devices from OCI spec. /// /// The formats representing all devices between OCI spec and cgroups-rs /// are different. -/// - OCI spec: major: 0, minor: 0, type: "" (All devices), access: "rwm"; +/// - OCI spec: major: 0, minor: 0, type: "", access: "rwm"; /// - Cgroups-rs: major: -1, minor: -1, type: "a", access: "rwm"; /// - Linux: a *:* rwm #[inline] -fn is_all_devices_rule(dev_cgroup: &LinuxDeviceCgroup) -> bool { +fn rule_for_all_devices(dev_cgroup: &LinuxDeviceCgroup) -> bool { dev_cgroup.major.unwrap_or(0) == 0 && dev_cgroup.minor.unwrap_or(0) == 0 && (dev_cgroup.r#type.as_str() == "" || dev_cgroup.r#type.as_str() == "a")