diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 80632c589d..4b60694095 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -355,9 +355,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b098e7c3a70d03c288fa0a96ccf13e770eb3d78c4cc0e1549b3c13215d5f965" +checksum = "1fb3af90c8d48ad5f432d8afb521b5b40c2a2fce46dd60e05912de51c47fba64" dependencies = [ "libc", "log", diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index 47e1d378bc..e7cb0f7c2d 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -53,7 +53,7 @@ cfg-if = "1.0.0" prometheus = { version = "0.13.0", features = ["process"] } procfs = "0.12.0" anyhow = "1.0.32" -cgroups = { package = "cgroups-rs", version = "0.3.2" } +cgroups = { package = "cgroups-rs", version = "0.3.3" } # Tracing tracing = "0.1.26" diff --git a/src/agent/rustjail/Cargo.toml b/src/agent/rustjail/Cargo.toml index 231fa353ab..9c3cabb72e 100644 --- a/src/agent/rustjail/Cargo.toml +++ b/src/agent/rustjail/Cargo.toml @@ -25,7 +25,7 @@ scan_fmt = "0.2.6" regex = "1.5.6" path-absolutize = "1.2.0" anyhow = "1.0.32" -cgroups = { package = "cgroups-rs", version = "0.3.2" } +cgroups = { package = "cgroups-rs", version = "0.3.3" } rlimit = "0.5.3" cfg-if = "0.1.0" diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index 6145f5f9c0..e00650494c 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -18,13 +18,13 @@ use cgroups::{ DeviceResource, HugePageResource, MaxValue, NetworkPriority, }; -use crate::cgroups::Manager as CgroupManager; +use crate::cgroups::{rule_for_all_devices, Manager as CgroupManager}; use crate::container::DEFAULT_DEVICES; use anyhow::{anyhow, Context, Result}; use libc::{self, pid_t}; use oci::{ LinuxBlockIo, LinuxCpu, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory, - LinuxNetwork, LinuxPids, LinuxResources, + LinuxNetwork, LinuxPids, LinuxResources, Spec, }; use protobuf::MessageField; @@ -35,7 +35,10 @@ use protocols::agent::{ use std::any::Any; use std::collections::HashMap; use std::fs; -use std::path::Path; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; + +use super::DevicesCgroupInfo; const GUEST_CPUS_PATH: &str = "/sys/devices/system/cpu/online"; @@ -60,6 +63,10 @@ pub struct Manager { pub cpath: String, #[serde(skip)] cgroup: cgroups::Cgroup, + #[serde(skip)] + pod_cgroup: Option, + #[serde(skip)] + devcg_allowed_all: bool, } // set_resource is used to set reources by cgroup controller. @@ -85,6 +92,7 @@ impl CgroupManager for Manager { ); let res = &mut cgroups::Resources::default(); + let pod_res = &mut cgroups::Resources::default(); // set cpuset and cpu reources if let Some(cpu) = &r.cpu { @@ -117,10 +125,18 @@ impl CgroupManager for Manager { } // set devices resources - set_devices_resources(&self.cgroup, &r.devices, res); - info!(sl(), "resources after processed {:?}", res); + if !self.devcg_allowed_all { + set_devices_resources(&self.cgroup, &r.devices, res, pod_res); + } + debug!( + sl(), + "Resources after processed, pod_res = {:?}, res = {:?}", pod_res, res + ); // apply resources + if let Some(pod_cg) = self.pod_cgroup.as_ref() { + pod_cg.apply(pod_res)?; + } self.cgroup.apply(res)?; Ok(()) @@ -179,7 +195,14 @@ impl CgroupManager for Manager { } fn destroy(&mut self) -> Result<()> { - let _ = self.cgroup.delete(); + if let Err(err) = self.cgroup.delete() { + warn!( + sl(), + "Failed to delete cgroup {}: {}", + self.cgroup.path(), + err + ); + } Ok(()) } @@ -300,28 +323,21 @@ fn set_devices_resources( _cg: &cgroups::Cgroup, device_resources: &[LinuxDeviceCgroup], res: &mut cgroups::Resources, + pod_res: &mut cgroups::Resources, ) { info!(sl(), "cgroup manager set devices"); let mut devices = vec![]; for d in device_resources.iter() { - if let Some(dev) = linux_device_group_to_cgroup_device(d) { - devices.push(dev); - } - } - - for d in DEFAULT_DEVICES.iter() { - if let Some(dev) = linux_device_to_cgroup_device(d) { - devices.push(dev); - } - } - - for d in DEFAULT_ALLOWED_DEVICES.iter() { - if let Some(dev) = linux_device_group_to_cgroup_device(d) { + if rule_for_all_devices(d) { + continue; + } + if let Some(dev) = linux_device_cgroup_to_device_resource(d) { devices.push(dev); } } + pod_res.devices.devices = devices.clone(); res.devices.devices = devices; } @@ -519,28 +535,7 @@ fn build_blk_io_device_throttle_resource( blk_io_device_throttle_resources } -fn linux_device_to_cgroup_device(d: &LinuxDevice) -> Option { - let dev_type = match DeviceType::from_char(d.r#type.chars().next()) { - Some(t) => t, - None => return None, - }; - - let permissions = vec![ - DevicePermissions::Read, - DevicePermissions::Write, - DevicePermissions::MkNod, - ]; - - Some(DeviceResource { - allow: true, - devtype: dev_type, - major: d.major, - minor: d.minor, - access: permissions, - }) -} - -fn linux_device_group_to_cgroup_device(d: &LinuxDeviceCgroup) -> Option { +fn linux_device_cgroup_to_device_resource(d: &LinuxDeviceCgroup) -> Option { let dev_type = match DeviceType::from_char(d.r#type.chars().next()) { Some(t) => t, None => return None, @@ -1001,13 +996,136 @@ pub fn get_mounts(paths: &HashMap) -> Result, path: &str) -> Result { let valid_path = path.trim_start_matches('/').to_string(); cgroups::Cgroup::new(h, valid_path.as_str()).map_err(anyhow::Error::from) } +#[inline] +fn load_cgroup(h: Box, path: &str) -> Cgroup { + let valid_path = path.trim_start_matches('/').to_string(); + cgroups::Cgroup::load(h, valid_path.as_str()) +} + impl Manager { - pub fn new(cpath: &str) -> Result { + pub fn new( + cpath: &str, + spec: &Spec, + devcg_info: Option>>, + ) -> Result { + let (paths, mounts) = Self::get_paths_and_mounts(cpath).context("Get paths and mounts")?; + + // Do not expect poisoning lock + let mut devices_group_info = devcg_info.as_ref().map(|i| i.write().unwrap()); + let pod_cgroup: Option; + + if let Some(devices_group_info) = devices_group_info.as_mut() { + // Cgroup path of parent of container + let pod_cpath = PathBuf::from(cpath) + .parent() + .unwrap_or(Path::new("/")) + .display() + .to_string(); + + if pod_cpath.as_str() == "/" { + // Skip setting pod cgroup for cpath due to no parent path + pod_cgroup = None + } else { + // Create a cgroup for the pod if not exists. + // Note that creating pod cgroup MUST be done before the pause + // container's cgroup created, since the upper node might have + // some excessive permissions, and children inherit upper + // node's rules. You'll feel painful to shrink upper nodes' + // permissions if the new permissions are subset of old. + pod_cgroup = Some(load_cgroup(cgroups::hierarchies::auto(), &pod_cpath)); + let pod_cg = pod_cgroup.as_ref().unwrap(); + + let is_allowded_all = Self::has_allowed_all_devices_rule(spec); + if devices_group_info.inited { + debug!(sl(), "Devices cgroup has been initialzied."); + + // Set allowed all devices to pod cgroup + if !devices_group_info.allowed_all && is_allowded_all { + info!( + sl(), + "Pod devices cgroup is changed to allowed all devices mode, devices_group_info = {:?}", + devices_group_info + ); + Self::setup_allowed_all_mode(pod_cg).with_context(|| { + format!("Setup allowed all devices mode for {}", pod_cpath) + })?; + devices_group_info.allowed_all = true; + } + } else { + // This is the first container (aka pause container) + debug!(sl(), "Started to init devices cgroup"); + + pod_cg.create().context("Create pod cgroup")?; + + if !is_allowded_all { + Self::setup_devcg_whitelist(pod_cg).with_context(|| { + format!("Setup device cgroup whitelist for {}", pod_cpath) + })?; + } else { + Self::setup_allowed_all_mode(pod_cg) + .with_context(|| format!("Setup allowed all mode for {}", pod_cpath))?; + devices_group_info.allowed_all = true; + } + + devices_group_info.inited = true + } + } + } else { + pod_cgroup = None; + } + + // Create a cgroup for the container. + let cg = new_cgroup(cgroups::hierarchies::auto(), cpath)?; + // The rules of container cgroup are copied from its parent, which + // contains some permissions that the container doesn't need. + // Therefore, resetting the container's devices cgroup is required. + if let Some(devices_group_info) = devices_group_info.as_ref() { + if !devices_group_info.allowed_all { + Self::setup_devcg_whitelist(&cg) + .with_context(|| format!("Setup device cgroup whitelist for {}", cpath))?; + } + } + + Ok(Self { + paths, + mounts, + // rels: paths, + cpath: cpath.to_string(), + cgroup: cg, + pod_cgroup, + devcg_allowed_all: devices_group_info + .map(|info| info.allowed_all) + .unwrap_or(false), + }) + } + + /// Create a cgroupfs manager for systemd cgroup. + /// The device cgroup is disabled in systemd cgroup, given that it is + /// implemented by eBPF. + pub fn new_systemd(cpath: &str) -> Result { + let (paths, mounts) = Self::get_paths_and_mounts(cpath).context("Get paths and mounts")?; + + let cg = new_cgroup(cgroups::hierarchies::auto(), cpath)?; + + Ok(Self { + paths, + mounts, + cpath: cpath.to_string(), + pod_cgroup: None, + cgroup: cg, + devcg_allowed_all: false, + }) + } + + fn get_paths_and_mounts( + cpath: &str, + ) -> Result<(HashMap, HashMap)> { let mut m = HashMap::new(); let paths = get_paths()?; @@ -1020,21 +1138,140 @@ impl Manager { continue; } - let p = format!("{}/{}", mnt.unwrap(), cpath); - - m.insert(key.to_string(), p); + m.insert(key.to_string(), format!("{}/{}", mnt.unwrap(), cpath)); } - let cg = new_cgroup(cgroups::hierarchies::auto(), cpath)?; - - Ok(Self { - paths: m, - mounts, - // rels: paths, - cpath: cpath.to_string(), - cgroup: cg, - }) + Ok((m, mounts)) } + + fn setup_allowed_all_mode(cgroup: &cgroups::Cgroup) -> Result<()> { + // Insert two rules: `b *:* rwm` and `c *:* rwm`. + // The reason of not inserting `a *:* rwm` is that the Linux kernel + // will deny writing `a` to `devices.allow` once a cgroup has + // children. You can refer to + // https://www.kernel.org/doc/Documentation/cgroup-v1/devices.txt. + let res = cgroups::Resources { + devices: cgroups::DeviceResources { + devices: vec![ + DeviceResource { + allow: true, + devtype: DeviceType::Block, + major: -1, + minor: -1, + access: vec![ + DevicePermissions::Read, + DevicePermissions::Write, + DevicePermissions::MkNod, + ], + }, + DeviceResource { + allow: true, + devtype: DeviceType::Char, + major: -1, + minor: -1, + access: vec![ + DevicePermissions::Read, + DevicePermissions::Write, + DevicePermissions::MkNod, + ], + }, + ], + }, + ..Default::default() + }; + cgroup.apply(&res)?; + + Ok(()) + } + + /// Setup device cgroup whitelist: + /// - Deny all devices in order to cleanup device cgroup. + /// - Allow default devices and default allowed devices. + fn setup_devcg_whitelist(cgroup: &cgroups::Cgroup) -> Result<()> { + #[allow(unused_mut)] + let mut dev_res_list = vec![DeviceResource { + allow: false, + devtype: DeviceType::All, + major: -1, + minor: -1, + access: vec![ + DevicePermissions::Read, + DevicePermissions::Write, + DevicePermissions::MkNod, + ], + }]; + // Do not append default allowed devices for simplicity while + // testing. + #[cfg(not(test))] + dev_res_list.append(&mut default_allowed_devices()); + + let res = cgroups::Resources { + devices: cgroups::DeviceResources { + devices: dev_res_list, + }, + ..Default::default() + }; + cgroup.apply(&res)?; + + Ok(()) + } + + /// Check if OCI spec contains a rule of allowed all devices. + fn has_allowed_all_devices_rule(spec: &Spec) -> bool { + let linux = match spec.linux.as_ref() { + Some(linux) => linux, + None => return false, + }; + let resources = match linux.resources.as_ref() { + Some(resource) => resource, + None => return false, + }; + resources + .devices + .iter() + .find(|dev| rule_for_all_devices(dev)) + .map(|dev| dev.allow) + .unwrap_or_default() + } +} + +/// Generate a list for allowed devices including `DEFAULT_DEVICES` and +/// `DEFAULT_ALLOWED_DEVICES`. +fn default_allowed_devices() -> Vec { + let mut dev_res_list = Vec::new(); + DEFAULT_DEVICES.iter().for_each(|dev| { + if let Some(dev_res) = linux_device_to_device_resource(dev) { + dev_res_list.push(dev_res) + } + }); + DEFAULT_ALLOWED_DEVICES.iter().for_each(|dev| { + if let Some(dev_res) = linux_device_cgroup_to_device_resource(dev) { + dev_res_list.push(dev_res) + } + }); + dev_res_list +} + +/// Convert LinuxDevice to DeviceResource. +fn linux_device_to_device_resource(d: &LinuxDevice) -> Option { + let dev_type = match DeviceType::from_char(d.r#type.chars().next()) { + Some(t) => t, + None => return None, + }; + + let permissions = vec![ + DevicePermissions::Read, + DevicePermissions::Write, + DevicePermissions::MkNod, + ]; + + Some(DeviceResource { + allow: true, + devtype: dev_type, + major: d.major, + minor: d.minor, + access: permissions, + }) } // get the guest's online cpus. @@ -1085,7 +1322,20 @@ fn convert_memory_swap_to_v2_value(memory_swap: i64, memory: i64) -> Result #[cfg(test)] mod tests { - use super::*; + use std::collections::HashMap; + use std::process::Command; + use std::sync::{Arc, RwLock}; + + use cgroups::devices::{DevicePermissions, DeviceType}; + use oci::{Linux, LinuxDeviceCgroup, LinuxResources, Spec}; + use test_utils::skip_if_not_root; + + use super::default_allowed_devices; + use crate::cgroups::fs::{ + line_to_vec, lines_to_map, Manager, DEFAULT_ALLOWED_DEVICES, WILDCARD, + }; + use crate::cgroups::DevicesCgroupInfo; + use crate::container::DEFAULT_DEVICES; #[test] fn test_line_to_vec() { @@ -1133,4 +1383,192 @@ mod tests { ); } } + + struct MockSandbox { + devcg_info: Arc>, + } + + impl MockSandbox { + fn new() -> Self { + Self { + devcg_info: Arc::new(RwLock::new(DevicesCgroupInfo::default())), + } + } + } + + #[test] + fn test_new_fs_manager() { + skip_if_not_root!(); + + struct TestCase { + cpath: Vec, + devices: Vec>, + allowed_all: Vec, + pod_devices_list: Vec, + container_devices_list: Vec, + } + + let allow_all = LinuxDeviceCgroup { + allow: true, + r#type: String::new(), + major: Some(0), + minor: Some(0), + access: String::from("rwm"), + }; + + let deny_all = LinuxDeviceCgroup { + allow: false, + r#type: String::new(), + major: Some(0), + minor: Some(0), + access: String::from("rwm"), + }; + + let test_cases = vec![ + TestCase { + cpath: vec![String::from( + "/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9", + )], + devices: vec![vec![allow_all.clone()]], + allowed_all: vec![true], + pod_devices_list: vec![String::from("a *:* rwm\n")], + container_devices_list: vec![String::from("a *:* rwm\n")], + }, + TestCase { + cpath: vec![String::from( + "/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9", + )], + devices: vec![vec![deny_all.clone()]], + allowed_all: vec![false], + pod_devices_list: vec![String::new()], + container_devices_list: vec![String::new()], + }, + TestCase { + cpath: vec![ + String::from( + "/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9", + ), + String::from( + "/kata-agent-fs-manager-test/1c7affca-1f65-427c-ba92-caff1cea61f6", + ), + ], + devices: vec![vec![deny_all.clone()], vec![allow_all.clone()]], + allowed_all: vec![false, true], + pod_devices_list: vec![String::new(), String::from("b *:* rwm\nc *:* rwm\n")], + container_devices_list: vec![String::new(), String::from("b *:* rwm\nc *:* rwm\n")], + }, + TestCase { + cpath: vec![ + String::from( + "/kata-agent-fs-manager-test/449ccd81-9320-4f3e-bb67-78f84700fac9", + ), + String::from( + "/kata-agent-fs-manager-test/1c7affca-1f65-427c-ba92-caff1cea61f6", + ), + ], + devices: vec![vec![allow_all], vec![deny_all]], + allowed_all: vec![true, true], + pod_devices_list: vec![String::from("a *:* rwm\n"), String::from("a *:* rwm\n")], + container_devices_list: vec![ + String::from("a *:* rwm\n"), + String::from("a *:* rwm\n"), + ], + }, + ]; + + for (round, tc) in test_cases.iter().enumerate() { + let sandbox = MockSandbox::new(); + let devcg_info = sandbox.devcg_info.read().unwrap(); + assert!(!devcg_info.inited); + assert!(!devcg_info.allowed_all); + drop(devcg_info); + let mut managers = Vec::with_capacity(tc.devices.len()); + + for cid in 0..tc.devices.len() { + let spec = Spec { + linux: Some(Linux { + resources: Some(LinuxResources { + devices: tc.devices[cid].clone(), + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }; + managers.push( + Manager::new(&tc.cpath[cid], &spec, Some(sandbox.devcg_info.clone())).unwrap(), + ); + + let devcg_info = sandbox.devcg_info.read().unwrap(); + assert!(devcg_info.inited); + assert_eq!( + devcg_info.allowed_all, tc.allowed_all[cid], + "Round {}, cid {} allowed all assertion failure", + round, cid + ); + drop(devcg_info); + + let pod_devices_list = Command::new("cat") + .arg("/sys/fs/cgroup/devices/kata-agent-fs-manager-test/devices.list") + .output() + .unwrap(); + let container_devices_list = Command::new("cat") + .arg(&format!( + "/sys/fs/cgroup/devices{}/devices.list", + tc.cpath[cid] + )) + .output() + .unwrap(); + + let pod_devices_list = String::from_utf8(pod_devices_list.stdout).unwrap(); + let container_devices_list = + String::from_utf8(container_devices_list.stdout).unwrap(); + + assert_eq!(&pod_devices_list, &tc.pod_devices_list[cid]); + assert_eq!(&container_devices_list, &tc.container_devices_list[cid]) + } + + // Clean up cgroups + managers + .iter() + .for_each(|manager| manager.cgroup.delete().unwrap()); + // The pod_cgroup must not be None + managers[0].pod_cgroup.as_ref().unwrap().delete().unwrap(); + } + } + + #[test] + fn test_default_allowed_devices() { + let allowed_devices = default_allowed_devices(); + assert_eq!( + allowed_devices.len(), + DEFAULT_DEVICES.len() + DEFAULT_ALLOWED_DEVICES.len() + ); + + let allowed_permissions = vec![ + DevicePermissions::Read, + DevicePermissions::Write, + DevicePermissions::MkNod, + ]; + + let default_devices_0 = &allowed_devices[0]; + assert!(default_devices_0.allow); + assert_eq!(default_devices_0.devtype, DeviceType::Char); + assert_eq!(default_devices_0.major, 1); + assert_eq!(default_devices_0.minor, 3); + assert!(default_devices_0 + .access + .iter() + .all(|&p| allowed_permissions.iter().any(|&ap| ap == p))); + + let default_allowed_devices_0 = &allowed_devices[DEFAULT_DEVICES.len()]; + assert!(default_allowed_devices_0.allow); + assert_eq!(default_allowed_devices_0.devtype, DeviceType::Char); + assert_eq!(default_allowed_devices_0.major, WILDCARD); + assert_eq!(default_allowed_devices_0.minor, WILDCARD); + assert_eq!( + default_allowed_devices_0.access, + vec![DevicePermissions::MkNod] + ); + } } diff --git a/src/agent/rustjail/src/cgroups/mock.rs b/src/agent/rustjail/src/cgroups/mock.rs index 8ac77c63b2..ce1cfb2bc7 100644 --- a/src/agent/rustjail/src/cgroups/mock.rs +++ b/src/agent/rustjail/src/cgroups/mock.rs @@ -10,10 +10,13 @@ use crate::protocols::agent::{BlkioStats, CgroupStats, CpuStats, MemoryStats, Pi use anyhow::Result; use cgroups::freezer::FreezerState; use libc::{self, pid_t}; -use oci::LinuxResources; +use oci::{LinuxResources, Spec}; use std::any::Any; use std::collections::HashMap; use std::string::String; +use std::sync::{Arc, RwLock}; + +use super::DevicesCgroupInfo; #[derive(Serialize, Deserialize, Debug, Clone)] pub struct Manager { @@ -72,7 +75,11 @@ impl CgroupManager for Manager { } impl Manager { - pub fn new(cpath: &str) -> Result { + pub fn new( + cpath: &str, + _spec: &Spec, + _devcg_info: Option>>, + ) -> Result { Ok(Self { paths: HashMap::new(), mounts: HashMap::new(), diff --git a/src/agent/rustjail/src/cgroups/mod.rs b/src/agent/rustjail/src/cgroups/mod.rs index c4e3b178b5..7eb8a73763 100644 --- a/src/agent/rustjail/src/cgroups/mod.rs +++ b/src/agent/rustjail/src/cgroups/mod.rs @@ -5,7 +5,7 @@ use anyhow::{anyhow, Result}; use core::fmt::Debug; -use oci::LinuxResources; +use oci::{LinuxDeviceCgroup, LinuxResources}; use protocols::agent::CgroupStats; use std::any::Any; @@ -16,6 +16,15 @@ pub mod mock; pub mod notifier; pub mod systemd; +#[derive(Default, Debug)] +pub struct DevicesCgroupInfo { + /// Indicate if the pod cgroup is initialized. + inited: bool, + /// Indicate if pod's devices cgroup is in whitelist mode. Returns true + /// once one container requires `a *:* rwm` permission. + allowed_all: bool, +} + pub trait Manager { fn apply(&self, _pid: i32) -> Result<()> { Err(anyhow!("not supported!".to_string())) @@ -61,3 +70,20 @@ impl Debug for dyn Manager + Send + Sync { write!(f, "{}", self.name()) } } + +/// Check if device cgroup is a rule for all devices from OCI spec. +/// +/// The formats representing all devices between OCI spec and cgroups-rs +/// are different. +/// - OCI spec: major: 0, minor: 0, type: "", access: "rwm"; +/// - Cgroups-rs: major: -1, minor: -1, type: "a", access: "rwm"; +/// - Linux: a *:* rwm +#[inline] +fn rule_for_all_devices(dev_cgroup: &LinuxDeviceCgroup) -> bool { + dev_cgroup.major.unwrap_or(0) == 0 + && dev_cgroup.minor.unwrap_or(0) == 0 + && (dev_cgroup.r#type.as_str() == "" || dev_cgroup.r#type.as_str() == "a") + && dev_cgroup.access.contains('r') + && dev_cgroup.access.contains('w') + && dev_cgroup.access.contains('m') +} diff --git a/src/agent/rustjail/src/cgroups/systemd/manager.rs b/src/agent/rustjail/src/cgroups/systemd/manager.rs index 19be1c9610..b4974d2bb7 100644 --- a/src/agent/rustjail/src/cgroups/systemd/manager.rs +++ b/src/agent/rustjail/src/cgroups/systemd/manager.rs @@ -113,7 +113,7 @@ impl Manager { let (parent_slice, unit_name) = cgroups_path.parse()?; let cpath = parent_slice + "/" + &unit_name; - let fs_manager = FsManager::new(cpath.as_str())?; + let fs_manager = FsManager::new_systemd(cpath.as_str())?; Ok(Manager { paths: fs_manager.paths.clone(), diff --git a/src/agent/rustjail/src/container.rs b/src/agent/rustjail/src/container.rs index e17cd32820..897c5a3c86 100644 --- a/src/agent/rustjail/src/container.rs +++ b/src/agent/rustjail/src/container.rs @@ -23,7 +23,7 @@ use crate::cgroups::fs::Manager as FsManager; #[cfg(test)] use crate::cgroups::mock::Manager as FsManager; use crate::cgroups::systemd::manager::Manager as SystemdManager; -use crate::cgroups::Manager; +use crate::cgroups::{DevicesCgroupInfo, Manager}; #[cfg(feature = "standard-oci-runtime")] use crate::console; use crate::log_child; @@ -55,7 +55,7 @@ use regex::Regex; use std::collections::HashMap; use std::os::unix::io::FromRawFd; use std::str::FromStr; -use std::sync::Arc; +use std::sync::{Arc, RwLock}; use slog::{info, o, Logger}; @@ -136,7 +136,7 @@ lazy_static! { m }; -// type to name hashmap, better to be in NAMESPACES + // type to name hashmap, better to be in NAMESPACES pub static ref TYPETONAME: HashMap<&'static str, &'static str> = { let mut m = HashMap::new(); m.insert("ipc", "ipc"); @@ -1494,6 +1494,7 @@ impl LinuxContainer { pub fn new + Display + Clone>( id: T, base: T, + devcg_info: Option>>, config: Config, logger: &Logger, ) -> Result { @@ -1535,19 +1536,12 @@ impl LinuxContainer { }; let cgroup_manager: Box = if config.use_systemd_cgroup { - Box::new(SystemdManager::new(cpath.as_str()).map_err(|e| { - anyhow!(format!( - "fail to create cgroup manager with path {}: {:}", - cpath, e - )) - })?) + Box::new(SystemdManager::new(cpath.as_str()).context("Create systemd manager")?) } else { - Box::new(FsManager::new(cpath.as_str()).map_err(|e| { - anyhow!(format!( - "fail to create cgroup manager with path {}: {:}", - cpath, e - )) - })?) + Box::new( + FsManager::new(cpath.as_str(), spec, devcg_info) + .context("Create cgroupfs manager")?, + ) }; info!(logger, "new cgroup_manager {:?}", &cgroup_manager); @@ -1611,12 +1605,16 @@ mod tests { use super::*; use crate::process::Process; use nix::unistd::Uid; + use oci::{LinuxDeviceCgroup, Root}; use std::fs; use std::os::unix::fs::MetadataExt; use std::os::unix::io::AsRawFd; + use std::time::UNIX_EPOCH; use tempfile::tempdir; use test_utils::skip_if_not_root; + const CGROUP_PARENT: &str = "kata.agent.test.k8s.io"; + fn sl() -> slog::Logger { slog_scope::logger() } @@ -1717,13 +1715,42 @@ mod tests { } fn create_dummy_opts() -> CreateOpts { - let mut root = oci::Root::default(); - root.path = "/tmp".to_string(); + let start = SystemTime::now(); + let since_the_epoch = start + .duration_since(UNIX_EPOCH) + .expect("Time went backwards"); - let linux = Linux::default(); - let mut spec = Spec::default(); - spec.root = Some(root).into(); - spec.linux = Some(linux).into(); + let root = Root { + path: String::from("/tmp"), + ..Default::default() + }; + + let linux_resources = LinuxResources { + devices: vec![LinuxDeviceCgroup { + allow: true, + r#type: String::new(), + major: None, + minor: None, + access: String::from("rwm"), + }], + ..Default::default() + }; + + let cgroups_path = format!( + "/{}/dummycontainer{}", + CGROUP_PARENT, + since_the_epoch.as_millis() + ); + + let spec = Spec { + linux: Some(Linux { + cgroups_path, + resources: Some(linux_resources), + ..Default::default() + }), + root: Some(root), + ..Default::default() + }; CreateOpts { cgroup_name: "".to_string(), @@ -1748,6 +1775,7 @@ mod tests { LinuxContainer::new( "some_id", &dir.path().join("rootfs").to_str().unwrap(), + None, create_dummy_opts(), &slog_scope::logger(), ), @@ -1777,9 +1805,10 @@ mod tests { #[test] fn test_linuxcontainer_pause() { let ret = new_linux_container_and_then(|mut c: LinuxContainer| { - c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| { - anyhow!(format!("fail to create cgroup manager with path: {:}", e)) - })?); + c.cgroup_manager = + Box::new(FsManager::new("", &Spec::default(), None).map_err(|e| { + anyhow!(format!("fail to create cgroup manager with path: {:}", e)) + })?); c.pause().map_err(|e| anyhow!(e)) }); @@ -1801,9 +1830,10 @@ mod tests { #[test] fn test_linuxcontainer_resume() { let ret = new_linux_container_and_then(|mut c: LinuxContainer| { - c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| { - anyhow!(format!("fail to create cgroup manager with path: {:}", e)) - })?); + c.cgroup_manager = + Box::new(FsManager::new("", &Spec::default(), None).map_err(|e| { + anyhow!(format!("fail to create cgroup manager with path: {:}", e)) + })?); // Change status to paused, this way we can resume it c.status.transition(ContainerState::Paused); c.resume().map_err(|e| anyhow!(e)) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 4e91bc1759..5587e77991 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -11,6 +11,7 @@ use std::fmt; use std::fs; use std::os::unix::ffi::OsStrExt; use std::os::unix::fs::MetadataExt; +use std::os::unix::prelude::FileTypeExt; use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; @@ -31,7 +32,6 @@ fn sl() -> slog::Logger { slog_scope::logger().new(o!("subsystem" => "device")) } -const VM_ROOTFS: &str = "/"; const BLOCK: &str = "block"; pub const DRIVER_9P_TYPE: &str = "9p"; pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs"; @@ -515,25 +515,53 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> { } #[derive(Debug, Clone)] -struct DevNumUpdate { - // the major and minor numbers for the device within the guest +pub struct DeviceInfo { + // Device type, "b" for block device and "c" for character device + cgroup_type: String, + // The major and minor numbers for the device within the guest guest_major: i64, guest_minor: i64, } -impl DevNumUpdate { - fn from_vm_path>(vm_path: T) -> Result { - let vm_path = vm_path.as_ref(); +impl DeviceInfo { + /// Create a device info. + /// + /// # Arguments + /// + /// * `vm_path` - Device's vm path. + /// * `is_rdev` - If the vm_path is a device, set to true. If the + /// vm_path is a file in a device, set to false. + pub fn new(vm_path: &str, is_rdev: bool) -> Result { + let cgroup_type; + let devid; + let vm_path = PathBuf::from(vm_path); if !vm_path.exists() { return Err(anyhow!("VM device path {:?} doesn't exist", vm_path)); } - let devid = fs::metadata(vm_path)?.rdev(); + let metadata = fs::metadata(&vm_path)?; + + if is_rdev { + devid = metadata.rdev(); + let file_type = metadata.file_type(); + if file_type.is_block_device() { + cgroup_type = String::from("b"); + } else if file_type.is_char_device() { + cgroup_type = String::from("c"); + } else { + return Err(anyhow!("Unknown device {:?}'s cgroup type", vm_path)); + } + } else { + devid = metadata.dev(); + cgroup_type = String::from("b"); + } + let guest_major = stat::major(devid) as i64; let guest_minor = stat::minor(devid) as i64; - Ok(DevNumUpdate { + Ok(DeviceInfo { + cgroup_type, guest_major, guest_minor, }) @@ -544,25 +572,25 @@ impl DevNumUpdate { // spec needed for a particular device #[derive(Debug, Clone)] struct DevUpdate { - num: DevNumUpdate, + info: DeviceInfo, // an optional new path to update the device to in the "inner" container // specification final_path: Option, } impl DevUpdate { - fn from_vm_path>(vm_path: T, final_path: String) -> Result { + fn new(vm_path: &str, final_path: &str) -> Result { Ok(DevUpdate { - final_path: Some(final_path), - ..DevNumUpdate::from_vm_path(vm_path)?.into() + final_path: Some(final_path.to_owned()), + ..DeviceInfo::new(vm_path, true)?.into() }) } } -impl From for DevUpdate { - fn from(num: DevNumUpdate) -> Self { +impl From for DevUpdate { + fn from(info: DeviceInfo) -> Self { DevUpdate { - num, + info, final_path: None, } } @@ -596,7 +624,7 @@ fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) - .linux .as_mut() .ok_or_else(|| anyhow!("Spec didn't contain linux field"))?; - let mut res_updates = HashMap::<(&str, i64, i64), DevNumUpdate>::with_capacity(updates.len()); + let mut res_updates = HashMap::<(&str, i64, i64), DeviceInfo>::with_capacity(updates.len()); for specdev in &mut linux.devices { if let Some(update) = updates.remove(specdev.path.as_str()) { @@ -610,13 +638,13 @@ fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) - "type" => &specdev.r#type, "host_major" => host_major, "host_minor" => host_minor, - "guest_major" => update.num.guest_major, - "guest_minor" => update.num.guest_minor, + "guest_major" => update.info.guest_major, + "guest_minor" => update.info.guest_minor, "final_path" => update.final_path.as_ref(), ); - specdev.major = update.num.guest_major; - specdev.minor = update.num.guest_minor; + specdev.major = update.info.guest_major; + specdev.minor = update.info.guest_minor; if let Some(final_path) = update.final_path { specdev.path = final_path; } @@ -624,7 +652,7 @@ fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) - if res_updates .insert( (specdev.r#type.as_str(), host_major, host_minor), - update.num, + update.info, ) .is_some() { @@ -728,7 +756,9 @@ async fn virtiommio_blk_device_handler( .context("failed to get mmio device name")?; } - Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into()) + Ok(DeviceInfo::new(device.vm_path(), true) + .context("New device info")? + .into()) } // device.Id should be a PCI path string @@ -740,7 +770,9 @@ async fn virtio_blk_device_handler( let pcipath = pci::Path::from_str(&device.id)?; let vm_path = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?; - Ok(DevNumUpdate::from_vm_path(vm_path)?.into()) + Ok(DeviceInfo::new(&vm_path, true) + .context("New device info")? + .into()) } // device.id should be a CCW path string @@ -753,7 +785,7 @@ async fn virtio_blk_ccw_device_handler( let ccw_device = ccw::Device::from_str(&device.id)?; let vm_path = get_virtio_blk_ccw_device_name(sandbox, &ccw_device).await?; - Ok(DevNumUpdate::from_vm_path(vm_path)?.into()) + Ok(DeviceInfo::new(&vm_path, true)?.into()) } #[cfg(not(target_arch = "s390x"))] @@ -770,7 +802,9 @@ async fn virtio_scsi_device_handler( ) -> Result { let vm_path = get_scsi_device_name(sandbox, &device.id).await?; - Ok(DevNumUpdate::from_vm_path(vm_path)?.into()) + Ok(DeviceInfo::new(&vm_path, true) + .context("New device info")? + .into()) } #[instrument] @@ -782,7 +816,9 @@ async fn virtio_nvdimm_device_handler( return Err(anyhow!("Invalid path for nvdimm device")); } - Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into()) + Ok(DeviceInfo::new(device.vm_path(), true) + .context("New device info")? + .into()) } fn split_vfio_pci_option(opt: &str) -> Option<(&str, &str)> { @@ -842,7 +878,7 @@ async fn vfio_pci_device_handler( let vm_path = get_vfio_device_name(sandbox, group).await?; - Some(DevUpdate::from_vm_path(&vm_path, vm_path.clone())?) + Some(DevUpdate::new(&vm_path, &vm_path)?) } else { None }; @@ -887,7 +923,7 @@ pub async fn add_devices( let update = add_device(device, sandbox).await?; if let Some(dev_update) = update.dev { if dev_updates - .insert(&device.container_path, dev_update) + .insert(&device.container_path, dev_update.clone()) .is_some() { return Err(anyhow!( @@ -907,6 +943,10 @@ pub async fn add_devices( )); } } + + // Update cgroup to allow all devices added to guest. + insert_devices_cgroup_rule(spec, &dev_update.info, true, "rwm") + .context("Update device cgroup")?; } } @@ -948,16 +988,14 @@ async fn add_device(device: &Device, sandbox: &Arc>) -> Result Result<()> { - let meta = fs::metadata(VM_ROOTFS)?; - let rdev = meta.dev(); - let major = stat::major(rdev) as i64; - let minor = stat::minor(rdev) as i64; - +pub fn insert_devices_cgroup_rule( + spec: &mut Spec, + dev_info: &DeviceInfo, + allow: bool, + access: &str, +) -> Result<()> { let linux = spec .linux .as_mut() @@ -965,13 +1003,25 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> { let resources = linux.resources.get_or_insert(LinuxResources::default()); - resources.devices.push(LinuxDeviceCgroup { - allow: false, - major: Some(major), - minor: Some(minor), - r#type: String::from("b"), - access: String::from("rw"), - }); + let cgroup = LinuxDeviceCgroup { + allow, + major: Some(dev_info.guest_major), + minor: Some(dev_info.guest_minor), + r#type: dev_info.cgroup_type.clone(), + access: access.to_owned(), + }; + + debug!( + sl(), + "Insert a devices cgroup rule"; + "linux_device_cgroup" => cgroup.allow, + "guest_major" => cgroup.major, + "guest_minor" => cgroup.minor, + "type" => cgroup.r#type.as_str(), + "access" => cgroup.access.as_str(), + ); + + resources.devices.push(cgroup); Ok(()) } @@ -984,6 +1034,8 @@ mod tests { use std::iter::FromIterator; use tempfile::tempdir; + const VM_ROOTFS: &str = "/"; + #[test] fn test_update_device_cgroup() { let mut spec = Spec { @@ -991,7 +1043,8 @@ mod tests { ..Default::default() }; - update_device_cgroup(&mut spec).unwrap(); + let dev_info = DeviceInfo::new(VM_ROOTFS, false).unwrap(); + insert_devices_cgroup_rule(&mut spec, &dev_info, false, "rw").unwrap(); let devices = spec.linux.unwrap().resources.unwrap().devices; assert_eq!(devices.len(), 1); @@ -1011,7 +1064,7 @@ mod tests { let mut spec = Spec::default(); // vm_path empty - let update = DevNumUpdate::from_vm_path(""); + let update = DeviceInfo::new("", true); assert!(update.is_err()); // linux is empty @@ -1021,7 +1074,7 @@ mod tests { &mut spec, HashMap::from_iter(vec![( container_path, - DevNumUpdate::from_vm_path(vm_path).unwrap().into(), + DeviceInfo::new(vm_path, true).unwrap().into(), )]), ); assert!(res.is_err()); @@ -1033,7 +1086,7 @@ mod tests { &mut spec, HashMap::from_iter(vec![( container_path, - DevNumUpdate::from_vm_path(vm_path).unwrap().into(), + DeviceInfo::new(vm_path, true).unwrap().into(), )]), ); assert!(res.is_err()); @@ -1050,7 +1103,7 @@ mod tests { &mut spec, HashMap::from_iter(vec![( container_path, - DevNumUpdate::from_vm_path(vm_path).unwrap().into(), + DeviceInfo::new(vm_path, true).unwrap().into(), )]), ); assert!( @@ -1068,7 +1121,7 @@ mod tests { &mut spec, HashMap::from_iter(vec![( container_path, - DevNumUpdate::from_vm_path(vm_path).unwrap().into(), + DeviceInfo::new(vm_path, true).unwrap().into(), )]), ); assert!(res.is_ok()); @@ -1094,7 +1147,7 @@ mod tests { &mut spec, HashMap::from_iter(vec![( container_path, - DevNumUpdate::from_vm_path(vm_path).unwrap().into(), + DeviceInfo::new(vm_path, true).unwrap().into(), )]), ); assert!(res.is_ok()); @@ -1178,11 +1231,11 @@ mod tests { let updates = HashMap::from_iter(vec![ ( container_path_a, - DevNumUpdate::from_vm_path(vm_path_a).unwrap().into(), + DeviceInfo::new(vm_path_a, true).unwrap().into(), ), ( container_path_b, - DevNumUpdate::from_vm_path(vm_path_b).unwrap().into(), + DeviceInfo::new(vm_path_b, true).unwrap().into(), ), ]); let res = update_spec_devices(&mut spec, updates); @@ -1263,7 +1316,7 @@ mod tests { &mut spec, HashMap::from_iter(vec![( container_path, - DevNumUpdate::from_vm_path(vm_path).unwrap().into(), + DeviceInfo::new(vm_path, true).unwrap().into(), )]), ); assert!(res.is_ok()); @@ -1307,7 +1360,7 @@ mod tests { &mut spec, HashMap::from_iter(vec![( container_path, - DevUpdate::from_vm_path(vm_path, final_path.to_string()).unwrap(), + DevUpdate::new(vm_path, final_path).unwrap(), )]), ); assert!(res.is_ok()); diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 822a68cb2f..276208bf3f 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -52,9 +52,7 @@ use nix::sys::{stat, statfs}; use nix::unistd::{self, Pid}; use rustjail::process::ProcessOperations; -use crate::device::{ - add_devices, get_virtio_blk_pci_device_name, update_device_cgroup, update_env_pci, -}; +use crate::device::{add_devices, get_virtio_blk_pci_device_name, update_env_pci}; use crate::linux_abi::*; use crate::metrics::get_metrics; use crate::mount::baremount; @@ -238,9 +236,6 @@ impl AgentService { update_container_namespaces(&s, &mut oci, use_sandbox_pidns)?; - // Add the root partition to the device cgroup to prevent access - update_device_cgroup(&mut oci)?; - // Append guest hooks append_guest_hooks(&s, &mut oci)?; @@ -272,8 +267,13 @@ impl AgentService { container_name, }; - let mut ctr: LinuxContainer = - LinuxContainer::new(cid.as_str(), CONTAINER_BASE, opts, &sl())?; + let mut ctr: LinuxContainer = LinuxContainer::new( + cid.as_str(), + CONTAINER_BASE, + Some(s.devcg_info.clone()), + opts, + &sl(), + )?; let pipe_size = AGENT_CONFIG.container_pipe_size; @@ -1981,16 +1981,20 @@ fn load_kernel_module(module: &protocols::agent::KernelModule) -> Result<()> { #[cfg(test)] mod tests { + use std::time::{SystemTime, UNIX_EPOCH}; + use super::*; use crate::{namespace::Namespace, protocols::agent_ttrpc_async::AgentService as _}; use nix::mount; use nix::sched::{unshare, CloneFlags}; - use oci::{Hook, Hooks, Linux, LinuxNamespace}; + use oci::{Hook, Hooks, Linux, LinuxDeviceCgroup, LinuxNamespace, LinuxResources}; use tempfile::{tempdir, TempDir}; use test_utils::{assert_result, skip_if_not_root}; use ttrpc::{r#async::TtrpcContext, MessageHeader}; use which::which; + const CGROUP_PARENT: &str = "kata.agent.test.k8s.io"; + fn check_command(cmd: &str) -> bool { which(cmd).is_ok() } @@ -2005,13 +2009,39 @@ mod tests { } fn create_dummy_opts() -> CreateOpts { + let start = SystemTime::now(); + let since_the_epoch = start + .duration_since(UNIX_EPOCH) + .expect("Time went backwards"); + let root = Root { path: String::from("/"), ..Default::default() }; + let linux_resources = LinuxResources { + devices: vec![LinuxDeviceCgroup { + allow: true, + r#type: String::new(), + major: None, + minor: None, + access: String::from("rwm"), + }], + ..Default::default() + }; + + let cgroups_path = format!( + "/{}/dummycontainer{}", + CGROUP_PARENT, + since_the_epoch.as_millis() + ); + let spec = Spec { - linux: Some(oci::Linux::default()), + linux: Some(Linux { + cgroups_path, + resources: Some(linux_resources), + ..Default::default() + }), root: Some(root), ..Default::default() }; @@ -2035,6 +2065,7 @@ mod tests { LinuxContainer::new( "some_id", dir.path().join("rootfs").to_str().unwrap(), + None, create_dummy_opts(), &slog_scope::logger(), ) @@ -2141,6 +2172,8 @@ mod tests { #[tokio::test] async fn test_do_write_stream() { + skip_if_not_root!(); + #[derive(Debug)] struct TestData<'a> { create_container: bool, diff --git a/src/agent/src/sandbox.rs b/src/agent/src/sandbox.rs index accf364559..8137939c8e 100644 --- a/src/agent/src/sandbox.rs +++ b/src/agent/src/sandbox.rs @@ -12,7 +12,7 @@ use std::os::unix::fs::PermissionsExt; use std::path::Path; use std::str::FromStr; use std::sync::atomic::{AtomicU32, Ordering}; -use std::sync::Arc; +use std::sync::{Arc, RwLock}; use std::time::{Duration, Instant}; use std::{thread, time}; @@ -26,7 +26,7 @@ use nix::sys::stat::Mode; use oci::{Hook, Hooks}; use protocols::agent::{OnlineCPUMemRequest, SharedMount}; use regex::Regex; -use rustjail::cgroups as rustjail_cgroups; +use rustjail::cgroups::{self as rustjail_cgroups, DevicesCgroupInfo}; use rustjail::container::BaseContainer; use rustjail::container::LinuxContainer; use rustjail::process::Process; @@ -118,6 +118,7 @@ pub struct Sandbox { pub event_tx: Option>, pub bind_watcher: BindWatcher, pub pcimap: HashMap, + pub devcg_info: Arc>, } impl Sandbox { @@ -151,6 +152,7 @@ impl Sandbox { event_tx: Some(tx), bind_watcher: BindWatcher::new(), pcimap: HashMap::new(), + devcg_info: Arc::new(RwLock::new(DevicesCgroupInfo::default())), }) } @@ -658,7 +660,7 @@ mod tests { use crate::mount::baremount; use anyhow::{anyhow, Error}; use nix::mount::MsFlags; - use oci::{Linux, Root, Spec}; + use oci::{Linux, LinuxDeviceCgroup, LinuxResources, Root, Spec}; use rustjail::container::LinuxContainer; use rustjail::process::Process; use rustjail::specconv::CreateOpts; @@ -667,9 +669,12 @@ mod tests { use std::io::prelude::*; use std::os::unix::fs::PermissionsExt; use std::path::Path; + use std::time::{SystemTime, UNIX_EPOCH}; use tempfile::{tempdir, Builder, TempDir}; use test_utils::skip_if_not_root; + const CGROUP_PARENT: &str = "kata.agent.test.k8s.io"; + fn bind_mount(src: &str, dst: &str, logger: &Logger) -> Result<(), Error> { let src_path = Path::new(src); let dst_path = Path::new(dst); @@ -824,13 +829,39 @@ mod tests { } fn create_dummy_opts() -> CreateOpts { + let start = SystemTime::now(); + let since_the_epoch = start + .duration_since(UNIX_EPOCH) + .expect("Time went backwards"); + let root = Root { path: String::from("/"), ..Default::default() }; + let linux_resources = LinuxResources { + devices: vec![LinuxDeviceCgroup { + allow: true, + r#type: String::new(), + major: None, + minor: None, + access: String::from("rwm"), + }], + ..Default::default() + }; + + let cgroups_path = format!( + "/{}/dummycontainer{}", + CGROUP_PARENT, + since_the_epoch.as_millis() + ); + let spec = Spec { - linux: Some(Linux::default()), + linux: Some(Linux { + cgroups_path, + resources: Some(linux_resources), + ..Default::default() + }), root: Some(root), ..Default::default() }; @@ -853,22 +884,24 @@ mod tests { .map_err(|e| anyhow!(e).context("tempdir failed")) .unwrap(); - // Create a new container - ( - LinuxContainer::new( - "some_id", - dir.path().join("rootfs").to_str().unwrap(), - create_dummy_opts(), - &slog_scope::logger(), - ) - .unwrap(), - dir, + let container = LinuxContainer::new( + "some_id", + dir.path().join("rootfs").to_str().unwrap(), + None, + create_dummy_opts(), + &slog_scope::logger(), ) + .unwrap(); + + // Create a new container + (container, dir) } #[tokio::test] #[serial] async fn get_container_entry_exist() { + skip_if_not_root!(); + let logger = slog::Logger::root(slog::Discard, o!()); let mut s = Sandbox::new(&logger).unwrap(); let (linux_container, _root) = create_linuxcontainer(); @@ -892,6 +925,8 @@ mod tests { #[tokio::test] #[serial] async fn add_and_get_container() { + skip_if_not_root!(); + let logger = slog::Logger::root(slog::Discard, o!()); let mut s = Sandbox::new(&logger).unwrap(); let (linux_container, _root) = create_linuxcontainer(); @@ -903,6 +938,8 @@ mod tests { #[tokio::test] #[serial] async fn update_shared_pidns() { + skip_if_not_root!(); + let logger = slog::Logger::root(slog::Discard, o!()); let mut s = Sandbox::new(&logger).unwrap(); let test_pid = 9999; @@ -953,6 +990,8 @@ mod tests { #[tokio::test] async fn test_find_container_process() { + skip_if_not_root!(); + let logger = slog::Logger::root(slog::Discard, o!()); let mut s = Sandbox::new(&logger).unwrap(); let cid = "container-123"; @@ -998,6 +1037,8 @@ mod tests { #[tokio::test] async fn test_find_process() { + skip_if_not_root!(); + let logger = slog::Logger::root(slog::Discard, o!()); let test_pids = [std::i32::MIN, -1, 0, 1, std::i32::MAX]; diff --git a/src/tools/runk/Cargo.lock b/src/tools/runk/Cargo.lock index 73f9db2d58..c8884f898e 100644 --- a/src/tools/runk/Cargo.lock +++ b/src/tools/runk/Cargo.lock @@ -292,9 +292,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b098e7c3a70d03c288fa0a96ccf13e770eb3d78c4cc0e1549b3c13215d5f965" +checksum = "1fb3af90c8d48ad5f432d8afb521b5b40c2a2fce46dd60e05912de51c47fba64" dependencies = [ "libc", "log", diff --git a/src/tools/runk/libcontainer/src/container.rs b/src/tools/runk/libcontainer/src/container.rs index f8ddcdfe5f..c7c3e068be 100644 --- a/src/tools/runk/libcontainer/src/container.rs +++ b/src/tools/runk/libcontainer/src/container.rs @@ -361,6 +361,7 @@ pub fn create_linux_container( .map(|s| s.to_string()) .ok_or_else(|| anyhow!("failed to convert bundle path"))? .as_str(), + None, config, logger, )?; @@ -384,6 +385,7 @@ pub fn load_linux_container( .to_str() .map(|s| s.to_string()) .ok_or_else(|| anyhow!("failed to convert a root path"))?, + None, status.config.clone(), logger, )?; diff --git a/tests/integration/cri-containerd/integration-tests.sh b/tests/integration/cri-containerd/integration-tests.sh index 13caac55f7..58d28dcb71 100755 --- a/tests/integration/cri-containerd/integration-tests.sh +++ b/tests/integration/cri-containerd/integration-tests.sh @@ -430,6 +430,143 @@ function stop_containerd() { sudo systemctl stop containerd } +function mountLoopDevice() { + local loop_file="$1" + if [ -e "$loop_file" ]; then + sudo rm -f $loop_file + info "$loop_file was removed" + fi + + sudo dd if=/dev/zero of=$loop_file bs=100M count=2 + sudo mkfs.ext4 $loop_file + sudo losetup -fP $loop_file + local loinfo=$(sudo losetup -a | grep $loop_file) + local device=$(echo "$loinfo" | awk -F'[: ]' '{print $1}') + echo $device +} + +function startDeviceCgroupContainers() { + local pod_yaml=${REPORT_DIR}/device-cgroup-pod.yaml + local container1_yaml=${REPORT_DIR}/device-cgroup-container1.yaml + local container2_yaml=${REPORT_DIR}/device-cgroup-container2.yaml + local image="busybox:latest" + + cat > "$pod_yaml" < "$container1_yaml" < "$container2_yaml" <