From 87887026f60d6c7ebe2629ce18a7fc591e37e30f Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Thu, 9 Dec 2021 16:22:22 +0800 Subject: [PATCH] libs/sys-util: add utilities to manipulate cgroup Add utilities to manipulate cgroup, currently only v1 is supported. Signed-off-by: Liu Jiang Signed-off-by: He Rongguang Signed-off-by: Jiahuan Chao Signed-off-by: Qingyuan Hou Signed-off-by: Quanwei Zhou Signed-off-by: Tim Zhang --- src/libs/Cargo.lock | 97 ++++ src/libs/kata-sys-util/Cargo.toml | 5 + src/libs/kata-sys-util/src/cgroup.rs | 735 +++++++++++++++++++++++++++ src/libs/kata-sys-util/src/lib.rs | 1 + src/libs/kata-types/src/cpu.rs | 70 +++ src/libs/kata-types/src/lib.rs | 5 +- 6 files changed, 912 insertions(+), 1 deletion(-) create mode 100644 src/libs/kata-sys-util/src/cgroup.rs create mode 100644 src/libs/kata-types/src/cpu.rs diff --git a/src/libs/Cargo.lock b/src/libs/Cargo.lock index 7106c4bc1d..ac6b2b8def 100644 --- a/src/libs/Cargo.lock +++ b/src/libs/Cargo.lock @@ -80,6 +80,18 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cgroups-rs" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b827f9d9f6c2fff719d25f5d44cbc8d2ef6df1ef00d055c5c14d5dc25529579" +dependencies = [ + "libc", + "log", + "nix 0.23.1", + "regex", +] + [[package]] name = "chrono" version = "0.4.19" @@ -89,6 +101,7 @@ dependencies = [ "libc", "num-integer", "num-traits", + "time", "winapi", ] @@ -338,12 +351,17 @@ checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" name = "kata-sys-util" version = "0.1.0" dependencies = [ + "cgroups-rs", + "chrono", "common-path", "fail", "kata-types", "lazy_static", "libc", "nix 0.23.1", + "num_cpus", + "once_cell", + "serial_test", "slog", "slog-scope", "tempfile", @@ -379,6 +397,16 @@ version = "0.2.124" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50" +[[package]] +name = "lock_api" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.16" @@ -524,6 +552,31 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + [[package]] name = "petgraph" version = "0.5.1" @@ -741,6 +794,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + [[package]] name = "serde" version = "1.0.136" @@ -772,6 +831,28 @@ dependencies = [ "serde", ] +[[package]] +name = "serial_test" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0bccbcf40c8938196944a3da0e133e031a33f4d6b72db3bda3cc556e361905d" +dependencies = [ + "lazy_static", + "parking_lot", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2acd6defeddb41eb60bb468f8825d0cfd0c2a76bc03bfd235b6a1dc4f6a1ad5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "slab" version = "0.4.6" @@ -819,6 +900,12 @@ dependencies = [ "slog", ] +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + [[package]] name = "socket2" version = "0.4.4" @@ -889,6 +976,16 @@ dependencies = [ "once_cell", ] +[[package]] +name = "time" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "tokio" version = "1.17.0" diff --git a/src/libs/kata-sys-util/Cargo.toml b/src/libs/kata-sys-util/Cargo.toml index 264849468c..f56ff9627b 100644 --- a/src/libs/kata-sys-util/Cargo.toml +++ b/src/libs/kata-sys-util/Cargo.toml @@ -11,11 +11,14 @@ license = "Apache-2.0" edition = "2018" [dependencies] +cgroups = { package = "cgroups-rs", version = "0.2.7" } +chrono = "0.4.0" common-path = "=1.0.0" fail = "0.5.0" lazy_static = "1.4.0" libc = "0.2.100" nix = "0.23.0" +once_cell = "1.9.0" slog = "2.5.2" slog-scope = "4.4.0" thiserror = "1.0.30" @@ -23,4 +26,6 @@ thiserror = "1.0.30" kata-types = { path = "../kata-types" } [dev-dependencies] +num_cpus = "1.13.1" +serial_test = "0.5.1" tempfile = "3.2.0" diff --git a/src/libs/kata-sys-util/src/cgroup.rs b/src/libs/kata-sys-util/src/cgroup.rs new file mode 100644 index 0000000000..3edcf98245 --- /dev/null +++ b/src/libs/kata-sys-util/src/cgroup.rs @@ -0,0 +1,735 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2019-2021 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::ops::Deref; +use std::path::{Component, Path, PathBuf}; +use std::sync::Mutex; + +use cgroups::{Cgroup, CgroupPid, Controllers, Hierarchy, Subsystem}; +use lazy_static::lazy_static; +use once_cell::sync::Lazy; + +use crate::sl; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Can not add tgid {0} to cgroup, {1:?}")] + AddTgid(u64, #[source] cgroups::error::Error), + #[error("failed to apply resources to cgroup: {0:?}")] + ApplyResource(#[source] cgroups::error::Error), + #[error("failed to delete cgroup after {0} retries")] + DeleteCgroup(u64), + #[error("Invalid cgroup path {0}")] + InvalidCgroupPath(String), +} + +pub type Result = std::result::Result; + +lazy_static! { + /// Disable cgroup v1 subsystems. + pub static ref DISABLED_HIERARCHIES: Mutex> = Mutex::new(Vec::new()); +} + +/// Update the disabled cgroup subsystems. +/// +/// Some cgroup controllers may be disabled by runtime configuration file. The sandbox may call +/// this method to disable those cgroup controllers once. +pub fn update_disabled_cgroup_list(hierarchies: &[String]) { + let mut disabled_hierarchies = DISABLED_HIERARCHIES.lock().unwrap(); + disabled_hierarchies.clear(); + for hierarchy in hierarchies { + //disabled_hierarchies.push(hie.clone()); + match hierarchy.as_str() { + "blkio" => disabled_hierarchies.push(cgroups::Controllers::BlkIo), + "cpu" => disabled_hierarchies.push(cgroups::Controllers::Cpu), + "cpuset" => disabled_hierarchies.push(cgroups::Controllers::CpuSet), + "cpuacct" => disabled_hierarchies.push(cgroups::Controllers::CpuAcct), + "devices" => disabled_hierarchies.push(cgroups::Controllers::Devices), + "freezer" => disabled_hierarchies.push(cgroups::Controllers::Freezer), + "hugetlb" => disabled_hierarchies.push(cgroups::Controllers::HugeTlb), + "memory" => disabled_hierarchies.push(cgroups::Controllers::Mem), + "net_cls" => disabled_hierarchies.push(cgroups::Controllers::NetCls), + "net_prio" => disabled_hierarchies.push(cgroups::Controllers::NetPrio), + "perf_event" => disabled_hierarchies.push(cgroups::Controllers::PerfEvent), + "pids" => disabled_hierarchies.push(cgroups::Controllers::Pids), + "systemd" => disabled_hierarchies.push(cgroups::Controllers::Systemd), + _ => warn!(sl!(), "unknown cgroup controller {}", hierarchy), + } + } + debug!( + sl!(), + "disable cgroup list {:?} from {:?}", disabled_hierarchies, hierarchies + ); +} + +/// Filter out disabled cgroup subsystems. +pub fn filter_disabled_cgroup(controllers: &mut Vec) { + let disabled_hierarchies = DISABLED_HIERARCHIES.lock().unwrap(); + controllers.retain(|x| !disabled_hierarchies.contains(x)); +} + +#[derive(Copy, Clone, Debug)] +pub enum PidType { + /// Add pid to `tasks` + Tasks, + /// Add pid to `cgroup.procs` + CgroupProcs, +} + +/// Get the singleton instance for cgroup v1 hierarchy object. +pub fn get_cgroup_hierarchies() -> &'static cgroups::hierarchies::V1 { + static GLOBAL: Lazy = Lazy::new(cgroups::hierarchies::V1::new); + GLOBAL.deref() +} + +// Prepend a kata specific string to oci cgroup path to form a different cgroup path, thus cAdvisor +// couldn't find kata containers cgroup path on host to prevent it from grabbing the stats data. +const CGROUP_KATA_PREFIX: &str = "kata"; + +/// Convert to a Kata specific cgroup path. +pub fn gen_kata_cgroup_path(path: &str) -> PathBuf { + // Be careful to trim off the possible '/' prefix. Joining an absolute path to a `Path` object + // will replace the old `Path` instead of concat. + Path::new(CGROUP_KATA_PREFIX).join(path.trim_start_matches('/')) +} + +/// Convert to a cgroup path for K8S sandbox. +pub fn gen_sandbox_cgroup_path(path: &str) -> PathBuf { + PathBuf::from(path) +} + +/// A customized cgroup v1 hierarchy object with configurable filters for supported subsystems. +#[derive(Debug)] +pub struct V1Customized { + mount_point: PathBuf, + controllers: Vec, +} + +impl V1Customized { + /// Create a new instance of [`V1Customized`]. + /// + /// The `controllers` configures the subsystems to enable. + /// + /// Note : + /// 1. When enabling both blkio and memory cgroups, blkio cgroup must be enabled before memory + /// cgroup due to a limitation in writeback control of blkio cgroup. + /// 2. cpu, cpuset, cpuacct should be adjacent to each other. + pub fn new(controllers: Vec) -> Self { + let mount_point = get_cgroup_hierarchies().root(); + + V1Customized { + mount_point, + controllers, + } + } +} + +impl Hierarchy for V1Customized { + fn subsystems(&self) -> Vec { + let subsystems = get_cgroup_hierarchies().subsystems(); + + subsystems + .into_iter() + .filter(|sub| { + self.controllers + .contains(&sub.to_controller().control_type()) + }) + .collect::>() + } + + fn root(&self) -> PathBuf { + self.mount_point.clone() + } + + fn root_control_group(&self) -> Cgroup { + Cgroup::load(Box::new(V1Customized::new(self.controllers.clone())), "") + } + + fn v2(&self) -> bool { + false + } +} + +/// An boxed cgroup hierarchy object. +pub type BoxedHierarchyObject = Box; + +/// Create a cgroup hierarchy object with all subsystems disabled. +pub fn get_empty_hierarchy() -> BoxedHierarchyObject { + Box::new(V1Customized::new(vec![])) +} + +/// Create a cgroup hierarchy object for pod sandbox. +pub fn get_sandbox_hierarchy(no_mem: bool) -> BoxedHierarchyObject { + let mut controllers = vec![ + cgroups::Controllers::BlkIo, + cgroups::Controllers::Cpu, + cgroups::Controllers::CpuSet, + cgroups::Controllers::CpuAcct, + cgroups::Controllers::PerfEvent, + ]; + + if !no_mem { + controllers.push(cgroups::Controllers::Mem); + } + filter_disabled_cgroup(&mut controllers); + Box::new(V1Customized::new(controllers)) +} + +/// Create a cgroup hierarchy object with mem subsystem. +/// +/// Note: the mem subsystem may have been disabled, so it will get filtered out. +pub fn get_mem_hierarchy() -> BoxedHierarchyObject { + let mut controllers = vec![cgroups::Controllers::Mem]; + filter_disabled_cgroup(&mut controllers); + Box::new(V1Customized::new(controllers)) +} + +/// Create a cgroup hierarchy object with CPU related subsystems. +/// +/// Note: the mem subsystem may have been disabled, so it will get filtered out. +pub fn get_cpu_hierarchy() -> BoxedHierarchyObject { + let mut controllers = vec![ + cgroups::Controllers::Cpu, + cgroups::Controllers::CpuSet, + cgroups::Controllers::CpuAcct, + ]; + filter_disabled_cgroup(&mut controllers); + Box::new(V1Customized::new(controllers)) +} + +/// Get cgroup hierarchy object from `path`. +pub fn get_hierarchy_by_path(path: &str) -> Result { + let v1 = get_cgroup_hierarchies().clone(); + let valid_path = valid_cgroup_path(path)?; + let cg = cgroups::Cgroup::load(Box::new(v1), valid_path.as_str()); + + let mut hierarchy = vec![]; + for subsys in cg.subsystems() { + let controller = subsys.to_controller(); + if controller.exists() { + hierarchy.push(controller.control_type()); + } + } + + Ok(Box::new(V1Customized::new(hierarchy))) +} + +/// Create or load a cgroup object from a path. +pub fn create_or_load_cgroup(path: &str) -> Result { + let hie = Box::new(get_cgroup_hierarchies().clone()); + + create_or_load_cgroup_with_hier(hie, path) +} + +/// Create or load a cgroup v1 object from a path, with a given hierarchy object. +pub fn create_or_load_cgroup_with_hier(hie: BoxedHierarchyObject, path: &str) -> Result { + let valid_path = valid_cgroup_path(path)?; + if is_cgroup_exist(valid_path.as_str()) { + Ok(cgroups::Cgroup::load(hie, valid_path.as_str())) + } else { + Ok(cgroups::Cgroup::new(hie, valid_path.as_str())) + } +} + +/// Check whether `path` hosts a cgroup hierarchy directory. +pub fn is_cgroup_exist(path: &str) -> bool { + let valid_path = match valid_cgroup_path(path) { + Ok(v) => v, + Err(e) => { + warn!(sl!(), "{}", e); + return false; + } + }; + + let v1 = get_cgroup_hierarchies().clone(); + let cg = cgroups::Cgroup::load(Box::new(v1), valid_path.as_str()); + for subsys in cg.subsystems() { + if subsys.to_controller().exists() { + debug!(sl!(), "cgroup {} exist", path); + return true; + } + } + + false +} + +// Validate the cgroup path is a relative path, do not include ".", "..". +fn valid_cgroup_path(path: &str) -> Result { + let path = path.trim_start_matches('/').to_string(); + + for comp in Path::new(&path).components() { + if !matches!(comp, Component::Normal(_)) { + return Err(Error::InvalidCgroupPath(path.to_string())); + } + } + + Ok(path) +} + +/// Remove all task from cgroup and delete the cgroup. +pub fn force_delete_cgroup(cg: cgroups::Cgroup) -> Result<()> { + delete_cgroup_with_retry(cg, |cg: &Cgroup| { + // if task exist need to delete first. + for cg_pid in cg.tasks() { + warn!(sl!(), "Delete cgroup task pid {}", cg_pid.pid); + cg.remove_task(cg_pid); + } + }) +} + +/// Try to delete a cgroup, call the `do_process` handler at each iteration. +pub fn delete_cgroup_with_retry(cg: Cgroup, mut do_process: F) -> Result<()> +where + F: FnMut(&Cgroup), +{ + // sleep DURATION + const SLEEP_MILLISECS: u64 = 10; + const RETRY_COUNT: u64 = 200; + + // In case of deletion failure caused by "Resource busy", sleep DURATION and retry RETRY times. + for index in 0..RETRY_COUNT { + do_process(&cg); + + if cg.delete().is_ok() { + if index > 0 { + info!( + sl!(), + "cgroup delete cgroup cost {} ms, retry {} times", + index * SLEEP_MILLISECS, + index, + ); + } + return Ok(()); + } + std::thread::sleep(std::time::Duration::from_millis(SLEEP_MILLISECS)) + } + + Err(Error::DeleteCgroup(RETRY_COUNT)) +} + +/// Move the process `pid` into the cgroup `to`. +pub fn move_tgid(pid: u64, to: &Cgroup) -> Result<()> { + info!(sl!(), "try to move tid {:?}", pid); + to.add_task_by_tgid(CgroupPid::from(pid)) + .map_err(|e| Error::AddTgid(pid, e)) +} + +/// Move all processes tasks from `from` to `to`. +pub fn move_cgroup_task(from: &Cgroup, to: &Cgroup) -> Result<()> { + info!(sl!(), "try to move tasks {:?}", from.tasks()); + for cg_pid in from.tasks() { + from.remove_task(CgroupPid::from(cg_pid.pid)); + // TODO: enhance cgroups to implement Copy for CgroupPid + // https://github.com/kata-containers/cgroups-rs/issues/70 + let pid = cg_pid.pid; + to.add_task(cg_pid).map_err(|e| Error::AddTgid(pid, e))?; + } + + Ok(()) +} + +/// Associate a group of tasks with a cgroup, and optionally configure resources for the cgroup. +pub fn update_cgroup_task_resources( + hierarchy: BoxedHierarchyObject, + path: &str, + pids: &[u64], + pid_type: PidType, + resources: Option<&cgroups::Resources>, +) -> Result<()> { + if hierarchy.subsystems().is_empty() { + return Ok(()); + } + fail::fail_point!("update_cgroup_task_resources", |_| { () }); + + let cg = create_or_load_cgroup_with_hier(hierarchy, path)?; + for pid in pids { + let result = match pid_type { + PidType::Tasks => cg.add_task(CgroupPid { pid: *pid }), + PidType::CgroupProcs => cg.add_task_by_tgid(CgroupPid { pid: *pid }), + }; + if let Err(err) = result { + return Err(Error::AddTgid(*pid, err)); + } + } + + if let Some(res) = resources { + cg.apply(res).map_err(Error::ApplyResource)?; + } + + debug!( + sl!(), + "update {:?} {:?} resources {:?} for cgroup {}", pid_type, pids, resources, path + ); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use cgroups::Controllers; + use serial_test::serial; + use std::sync::atomic::{AtomicUsize, Ordering}; + + static GLOBAL_COUNTER: AtomicUsize = AtomicUsize::new(0); + + fn gen_test_path() -> String { + let pid = nix::unistd::getpid().as_raw(); + let index = GLOBAL_COUNTER.fetch_add(1, Ordering::SeqCst); + let path = format!("kata-tests-{}-{}", pid, index); + println!("test path {}", path); + path + } + + fn get_hierarchy(controllers: Vec) -> Box { + Box::new(V1Customized::new(controllers)) + } + + #[test] + fn test_v1_customized_cgroup() { + update_disabled_cgroup_list(&[]); + + let c = V1Customized::new(vec![]); + assert_eq!(c.subsystems().len(), 0); + assert!(!c.v2()); + + let c = V1Customized::new(vec![Controllers::Cpu, Controllers::CpuSet]); + assert_eq!(c.subsystems().len(), 2); + assert!(!c.v2()); + } + + #[test] + #[serial] + fn test_filter_disabled_cgroup() { + update_disabled_cgroup_list(&[]); + assert_eq!(DISABLED_HIERARCHIES.lock().unwrap().len(), 0); + + let disabeld = ["perf_event".to_string()]; + update_disabled_cgroup_list(&disabeld); + assert_eq!(DISABLED_HIERARCHIES.lock().unwrap().len(), 1); + assert_eq!( + DISABLED_HIERARCHIES.lock().unwrap()[0], + Controllers::PerfEvent + ); + + let mut subsystems = vec![Controllers::BlkIo, Controllers::PerfEvent, Controllers::Cpu]; + filter_disabled_cgroup(&mut subsystems); + assert_eq!(subsystems.len(), 2); + assert_eq!(subsystems[0], Controllers::BlkIo); + assert_eq!(subsystems[1], Controllers::Cpu); + + let disabeld = ["cpu".to_string(), "cpuset".to_string()]; + update_disabled_cgroup_list(&disabeld); + assert_eq!(DISABLED_HIERARCHIES.lock().unwrap().len(), 2); + + let mut subsystems = vec![Controllers::BlkIo, Controllers::PerfEvent, Controllers::Cpu]; + filter_disabled_cgroup(&mut subsystems); + assert_eq!(subsystems.len(), 2); + assert_eq!(subsystems[0], Controllers::BlkIo); + assert_eq!(subsystems[1], Controllers::PerfEvent); + + update_disabled_cgroup_list(&[]); + } + + #[test] + fn test_create_empty_hierarchy() { + update_disabled_cgroup_list(&[]); + + let controller = get_empty_hierarchy(); + assert_eq!(controller.subsystems().len(), 0); + assert!(!controller.root_control_group().v2()); + } + + #[test] + #[serial] + fn test_create_sandbox_hierarchy() { + update_disabled_cgroup_list(&[]); + + let controller = get_sandbox_hierarchy(true); + assert_eq!(controller.subsystems().len(), 5); + assert!(!controller.root_control_group().v2()); + + let controller = get_sandbox_hierarchy(false); + assert_eq!(controller.subsystems().len(), 6); + assert!(!controller.root_control_group().v2()); + } + + #[test] + #[serial] + fn test_get_hierarchy() { + update_disabled_cgroup_list(&[]); + + let controller = get_mem_hierarchy(); + assert!(!controller.v2()); + assert_eq!(controller.subsystems().len(), 1); + + let controller = get_cpu_hierarchy(); + assert!(!controller.v2()); + assert_eq!(controller.subsystems().len(), 3); + } + + #[test] + #[serial] + fn test_create_cgroup_default() { + update_disabled_cgroup_list(&[]); + // test need root permission + if !nix::unistd::getuid().is_root() { + println!("test need root permission"); + return; + } + + let v1 = Box::new(cgroups::hierarchies::V1::new()); + let test_path = gen_test_path(); + let cg_path = test_path.as_str(); + assert!(!is_cgroup_exist(cg_path)); + + // new cgroup + let cg = cgroups::Cgroup::new(v1, cg_path); + assert!(is_cgroup_exist(cg_path)); + + // add task + let _ = cg.add_task(cgroups::CgroupPid { + pid: nix::unistd::getpid().as_raw() as u64, + }); + + // delete cgroup + force_delete_cgroup(cg).unwrap(); + assert!(!is_cgroup_exist(cg_path)); + } + + #[test] + #[serial] + fn test_create_cgroup_cpus() { + update_disabled_cgroup_list(&[]); + // test need root permission + if !nix::unistd::getuid().is_root() { + println!("test need root permission"); + return; + } + if num_cpus::get() <= 1 { + println!("The unit test is only supported on SMP systems."); + return; + } + + let test_path = gen_test_path(); + let cg_path = test_path.as_str(); + assert!(!is_cgroup_exist(cg_path)); + + // new cgroup + let cgroup = create_or_load_cgroup(cg_path).unwrap(); + let cpus: &cgroups::cpuset::CpuSetController = cgroup.controller_of().unwrap(); + cpus.set_cpus("0-1").unwrap(); + assert!(is_cgroup_exist(cg_path)); + + // current cgroup + let current_cgroup = create_or_load_cgroup(cg_path).unwrap(); + let current_cpus: &cgroups::cpuset::CpuSetController = + current_cgroup.controller_of().unwrap(); + // check value + assert_eq!(cpus.cpuset().cpus, current_cpus.cpuset().cpus); + + // delete cgroup + force_delete_cgroup(cgroup).unwrap(); + assert!(!is_cgroup_exist(cg_path)); + } + + #[test] + #[serial] + fn test_create_cgroup_with_parent() { + update_disabled_cgroup_list(&[]); + // test need root permission + if !nix::unistd::getuid().is_root() { + println!("test need root permission"); + return; + } + if num_cpus::get() <= 1 { + println!("The unit test is only supported on SMP systems."); + return; + } + + let test_path = gen_test_path(); + let cg_path = test_path.as_str(); + assert!(!is_cgroup_exist(cg_path)); + + // new cgroup + let cg = create_or_load_cgroup(cg_path).unwrap(); + let cpus: &cgroups::cpuset::CpuSetController = cg.controller_of().unwrap(); + cpus.set_cpus("0-1").unwrap(); + assert!(is_cgroup_exist(cg_path)); + + // new cgroup 1 + let cg_test_path_1 = format!("{}/vcpu0", test_path); + let cg_path_1 = cg_test_path_1.as_str(); + let cg1 = create_or_load_cgroup(cg_path_1).unwrap(); + let cpus1: &cgroups::cpuset::CpuSetController = cg1.controller_of().unwrap(); + cpus1.set_cpus("0").unwrap(); + assert!(is_cgroup_exist(cg_path_1)); + + // new cgroup 2 + let cg_test_path_2 = format!("{}/vcpu1", test_path); + let cg_path_2 = cg_test_path_2.as_str(); + // new cgroup + let cg2 = create_or_load_cgroup(cg_path_2).unwrap(); + let cpus2: &cgroups::cpuset::CpuSetController = cg2.controller_of().unwrap(); + cpus2.set_cpus("1").unwrap(); + assert!(is_cgroup_exist(cg_path_2)); + + // must delete sub dir first + force_delete_cgroup(cg1).unwrap(); + assert!(!is_cgroup_exist(cg_path_1)); + force_delete_cgroup(cg2).unwrap(); + assert!(!is_cgroup_exist(cg_path_2)); + force_delete_cgroup(cg).unwrap(); + assert!(!is_cgroup_exist(cg_path)); + } + + fn assert_customize_path_exist(path: &str, current_subsystems: &[Subsystem], expect: bool) { + println!("assert customize path {} exist expect {}", path, expect); + let v1 = Box::new(cgroups::hierarchies::V1::new()); + let v1_cg = Cgroup::load(v1, path); + let v1_subsystems = v1_cg.subsystems(); + + for v1_sub in v1_subsystems { + let check_expect = || -> bool { + for current_sub in current_subsystems { + if v1_sub.to_controller().control_type() + == current_sub.to_controller().control_type() + { + return expect; + } + } + false + }(); + assert_eq!( + check_expect, + v1_sub.to_controller().exists(), + "failed to check path {:?} subsystem {:?}", + path, + v1_sub + ) + } + } + + fn clean_cgroup_v1(path: &str) { + let v1 = Box::new(cgroups::hierarchies::V1::new()); + let cg = Cgroup::load(v1.clone(), path); + delete_cgroup_with_retry(cg, |_: &Cgroup| {}).unwrap(); + + let check_cg = Cgroup::load(v1, path); + assert_customize_path_exist(path, check_cg.subsystems(), false); + } + + #[test] + #[serial] + fn test_customize_hierarchies() { + update_disabled_cgroup_list(&[]); + // test need root permission + if !nix::unistd::getuid().is_root() { + println!("test need root permission"); + return; + } + + let cg_path_1 = "test_customize_hierarchies1"; + let cg_path_2 = "test_customize_hierarchies2"; + + // clean + clean_cgroup_v1(cg_path_1); + clean_cgroup_v1(cg_path_2); + + // check customized cgroup + // With some kernels, Cpu and CpuAcct are combined into one directory, so enable both + // to ease test code. + let controllers_1 = vec![Controllers::Cpu, Controllers::CpuAcct]; + let controllers_2 = vec![Controllers::Cpu, Controllers::CpuSet, Controllers::CpuAcct]; + let cg_1 = Cgroup::new(get_hierarchy(controllers_1.clone()), cg_path_1); + let cg_2 = Cgroup::new(get_hierarchy(controllers_2.clone()), cg_path_2); + + assert_customize_path_exist(cg_path_1, cg_1.subsystems(), true); + assert_customize_path_exist(cg_path_2, cg_2.subsystems(), true); + + // delete + let _ = cg_1.delete(); + let _ = cg_2.delete(); + + // check after delete + let check_cg_1 = Cgroup::load(get_hierarchy(controllers_1), cg_path_1); + let check_cg_2 = Cgroup::load(get_hierarchy(controllers_2), cg_path_2); + assert_customize_path_exist(cg_path_1, check_cg_1.subsystems(), false); + assert_customize_path_exist(cg_path_2, check_cg_2.subsystems(), false); + } + + #[test] + #[serial] + fn test_task_move() { + update_disabled_cgroup_list(&[]); + // test need root permission + if !nix::unistd::getuid().is_root() { + println!("test need root permission"); + return; + } + + let cg_path_1 = "test_task_move_before"; + let cg_path_2 = "test_task_move_after"; + + // clean + clean_cgroup_v1(cg_path_1); + clean_cgroup_v1(cg_path_2); + + // With some kernels, Cpu and CpuAcct are combined into one directory, so enable both + // to ease test code. + let controllers = vec![Controllers::Cpu, Controllers::CpuAcct]; + let cg_1 = Cgroup::new(get_hierarchy(controllers.clone()), cg_path_1); + let cg_2 = Cgroup::new(get_hierarchy(controllers.clone()), cg_path_2); + + assert_customize_path_exist(cg_path_1, cg_1.subsystems(), true); + assert_customize_path_exist(cg_path_2, cg_2.subsystems(), true); + + // add task + let pid = libc::pid_t::from(nix::unistd::getpid()) as u64; + let _ = cg_1.add_task(CgroupPid::from(pid)).unwrap(); + let mut cg_task_1 = cg_1.tasks(); + let mut cg_task_2 = cg_2.tasks(); + assert_eq!(1, cg_task_1.len()); + assert_eq!(0, cg_task_2.len()); + + // move task + let _ = cg_2.add_task(CgroupPid::from(pid)).unwrap(); + cg_task_1 = cg_1.tasks(); + cg_task_2 = cg_2.tasks(); + assert_eq!(0, cg_task_1.len()); + assert_eq!(1, cg_task_2.len()); + + cg_2.remove_task(CgroupPid::from(pid)); + + // delete + cg_1.delete().unwrap(); + // delete cg_2 with retry because of possible unknown failed + // caused by "Resource busy", we do the same in the production + // code, so it makes sense in the test. + delete_cgroup_with_retry(cg_2, |_| {}).unwrap(); + + // check after delete + let check_cg_1 = Cgroup::load(get_hierarchy(controllers.clone()), cg_path_1); + let check_cg_2 = Cgroup::load(get_hierarchy(controllers), cg_path_2); + assert_customize_path_exist(cg_path_1, check_cg_1.subsystems(), false); + assert_customize_path_exist(cg_path_2, check_cg_2.subsystems(), false); + } + + #[test] + fn test_gen_kata_cgroup_path() { + assert_eq!( + &gen_kata_cgroup_path("sandbox1/container2"), + Path::new("kata/sandbox1/container2") + ); + assert_eq!( + &gen_kata_cgroup_path("/sandbox1/container2"), + Path::new("kata/sandbox1/container2") + ); + assert_eq!( + &gen_kata_cgroup_path("/sandbox1:container2"), + Path::new("kata/sandbox1:container2") + ); + } +} diff --git a/src/libs/kata-sys-util/src/lib.rs b/src/libs/kata-sys-util/src/lib.rs index 62706c15de..a4786f5f1f 100644 --- a/src/libs/kata-sys-util/src/lib.rs +++ b/src/libs/kata-sys-util/src/lib.rs @@ -6,6 +6,7 @@ #[macro_use] extern crate slog; +pub mod cgroup; pub mod fs; pub mod mount; diff --git a/src/libs/kata-types/src/cpu.rs b/src/libs/kata-types/src/cpu.rs new file mode 100644 index 0000000000..b209834b3e --- /dev/null +++ b/src/libs/kata-types/src/cpu.rs @@ -0,0 +1,70 @@ +// Copyright (c) 2022 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +/// A list of CPU IDs. +#[derive(Debug)] +pub struct CpuSet(Vec); + +impl CpuSet { + /// Create a new instance of `CpuSet`. + pub fn new() -> Self { + CpuSet(vec![]) + } + + /// Add new CPUs into the set. + pub fn extend(&mut self, cpus: &[u32]) { + self.0.extend_from_slice(cpus); + self.0.sort_unstable(); + self.0.dedup(); + } + + /// Returns true if the CPU set contains elements. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Get number of elements in the CPU set. + pub fn len(&self) -> usize { + self.0.len() + } +} + +impl From> for CpuSet { + fn from(mut cpus: Vec) -> Self { + cpus.sort_unstable(); + cpus.dedup(); + CpuSet(cpus) + } +} + +/// Test whether two CPU sets are equal. +impl PartialEq for CpuSet { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_cpu_list_equal() { + let cpuset1 = CpuSet::from(vec![1, 2, 3]); + let cpuset2 = CpuSet::from(vec![3, 2, 1]); + let cpuset3 = CpuSet::from(vec![]); + let cpuset4 = CpuSet::from(vec![3, 2, 4]); + let cpuset5 = CpuSet::from(vec![1, 2, 3, 3, 2, 1]); + + assert_eq!(cpuset1.len(), 3); + assert!(cpuset3.is_empty()); + assert_eq!(cpuset5.len(), 3); + + assert_eq!(cpuset1, cpuset2); + assert_eq!(cpuset1, cpuset5); + assert_ne!(cpuset1, cpuset3); + assert_ne!(cpuset1, cpuset4); + } +} diff --git a/src/libs/kata-types/src/lib.rs b/src/libs/kata-types/src/lib.rs index c05efd3207..06c8cf2956 100644 --- a/src/libs/kata-types/src/lib.rs +++ b/src/libs/kata-types/src/lib.rs @@ -11,7 +11,7 @@ extern crate slog; #[macro_use] extern crate serde; -/// Constants and data types annotations. +/// Constants and data types related to annotations. pub mod annotations; /// Kata configuration information from configuration file. @@ -20,6 +20,9 @@ pub mod config; /// Constants and data types related to container. pub mod container; +/// Constants and data types related to CPU. +pub mod cpu; + /// Constants and data types related to Kubernetes/kubelet. pub mod k8s;