diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 987f16ffa2..6caa298b5e 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -1510,6 +1510,7 @@ dependencies = [ "slog", "slog-scope", "tempfile", + "test-utils", "tokio", ] diff --git a/src/agent/rustjail/Cargo.toml b/src/agent/rustjail/Cargo.toml index 6d0a3a5182..69de9016a1 100644 --- a/src/agent/rustjail/Cargo.toml +++ b/src/agent/rustjail/Cargo.toml @@ -36,6 +36,7 @@ libseccomp = { version = "0.2.3", optional = true } [dev-dependencies] serial_test = "0.5.0" tempfile = "3.1.0" +test-utils = { path = "../../libs/test-utils" } [features] seccomp = ["libseccomp"] diff --git a/src/agent/rustjail/src/container.rs b/src/agent/rustjail/src/container.rs index 2c360cf164..8f8c148476 100644 --- a/src/agent/rustjail/src/container.rs +++ b/src/agent/rustjail/src/container.rs @@ -1656,12 +1656,12 @@ fn valid_env(e: &str) -> Option<(&str, &str)> { mod tests { use super::*; use crate::process::Process; - use crate::skip_if_not_root; use nix::unistd::Uid; use std::fs; use std::os::unix::fs::MetadataExt; use std::os::unix::io::AsRawFd; use tempfile::tempdir; + use test_utils::skip_if_not_root; use tokio::process::Command; macro_rules! sl { diff --git a/src/agent/rustjail/src/lib.rs b/src/agent/rustjail/src/lib.rs index dafac6381e..fb51d9f395 100644 --- a/src/agent/rustjail/src/lib.rs +++ b/src/agent/rustjail/src/lib.rs @@ -514,15 +514,6 @@ pub fn grpc_to_oci(grpc: &grpc::Spec) -> oci::Spec { #[cfg(test)] mod tests { use super::*; - #[macro_export] - macro_rules! skip_if_not_root { - () => { - if !nix::unistd::Uid::effective().is_root() { - println!("INFO: skipping {} which needs root", module_path!()); - return; - } - }; - } // Parameters: // diff --git a/src/agent/rustjail/src/mount.rs b/src/agent/rustjail/src/mount.rs index dd980530d0..4670301b23 100644 --- a/src/agent/rustjail/src/mount.rs +++ b/src/agent/rustjail/src/mount.rs @@ -1072,7 +1072,6 @@ fn readonly_path(path: &str) -> Result<()> { mod tests { use super::*; use crate::assert_result; - use crate::skip_if_not_root; use std::fs::create_dir; use std::fs::create_dir_all; use std::fs::remove_dir_all; @@ -1080,6 +1079,7 @@ mod tests { use std::os::unix::fs; use std::os::unix::io::AsRawFd; use tempfile::tempdir; + use test_utils::skip_if_not_root; #[test] #[serial(chdir)] diff --git a/src/agent/rustjail/src/seccomp.rs b/src/agent/rustjail/src/seccomp.rs index fab0197873..d8edbcd004 100644 --- a/src/agent/rustjail/src/seccomp.rs +++ b/src/agent/rustjail/src/seccomp.rs @@ -122,10 +122,10 @@ pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> { #[cfg(test)] mod tests { use super::*; - use crate::skip_if_not_root; use libc::{dup3, process_vm_readv, EPERM, O_CLOEXEC}; use std::io::Error; use std::ptr::null; + use test_utils::skip_if_not_root; macro_rules! syscall_assert { ($e1: expr, $e2: expr) => { diff --git a/src/tools/runk/Cargo.lock b/src/tools/runk/Cargo.lock index 9a9ed7bc4b..e78e58058c 100644 --- a/src/tools/runk/Cargo.lock +++ b/src/tools/runk/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "aho-corasick" version = "0.7.18" @@ -111,13 +117,13 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdae996d9638ba03253ffa1c93345a585974a97abbdeab9176c77922f3efc1e8" +checksum = "cf5525f2cf84d5113ab26bfb6474180eb63224b4b1e4be31ee87be4098f11399" dependencies = [ "libc", "log", - "nix", + "nix 0.24.2", "regex", ] @@ -174,6 +180,15 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if 1.0.0", +] + [[package]] name = "crossbeam-channel" version = "0.5.4" @@ -313,6 +328,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" +[[package]] +name = "flate2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -438,6 +463,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "ident_case" version = "1.0.1" @@ -485,6 +516,12 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "io-lifetimes" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24c3f4eff5495aee4c0399d7b6a0dc2b6e81be84242ffbfcf253ebacccc1d0cb" + [[package]] name = "itertools" version = "0.10.3" @@ -508,27 +545,30 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.124" +version = "0.2.127" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50" +checksum = "505e71a4706fa491e9b1b55f51b95d4037d0821ee40131190475f692b35b009b" [[package]] name = "libcontainer" version = "0.0.1" dependencies = [ "anyhow", + "cgroups-rs", "chrono", "derive_builder", "libc", "logging", - "nix", + "nix 0.23.1", "oci", + "procfs", "rustjail", "scopeguard", "serde", "serde_json", "slog", "tempfile", + "test-utils", ] [[package]] @@ -540,6 +580,12 @@ dependencies = [ "clap", ] +[[package]] +name = "linux-raw-sys" +version = "0.0.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d2456c373231a208ad294c33dc5bff30051eafd954cd4caae83a712b12854d" + [[package]] name = "lock_api" version = "0.4.7" @@ -585,6 +631,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "miniz_oxide" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" +dependencies = [ + "adler", +] + [[package]] name = "mio" version = "0.8.2" @@ -627,6 +682,18 @@ dependencies = [ "memoffset", ] +[[package]] +name = "nix" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "195cdbc1741b8134346d515b3a56a1c94b0912758009cfd53f99ea0f57b065fc" +dependencies = [ + "bitflags", + "cfg-if 1.0.0", + "libc", + "memoffset", +] + [[package]] name = "ntapi" version = "0.3.7" @@ -716,7 +783,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.34.0", ] [[package]] @@ -793,6 +860,21 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "procfs" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1391b61957e3b6f25a59ca2e057d22a44415917d87893986f6627fef109d32f" +dependencies = [ + "bitflags", + "byteorder", + "chrono", + "flate2", + "hex", + "lazy_static", + "rustix", +] + [[package]] name = "prost" version = "0.8.0" @@ -947,7 +1029,7 @@ dependencies = [ "libcontainer", "liboci-cli", "logging", - "nix", + "nix 0.23.1", "oci", "rustjail", "serde", @@ -960,6 +1042,20 @@ dependencies = [ "users", ] +[[package]] +name = "rustix" +version = "0.35.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d51cc38aa10f6bbb377ed28197aa052aa4e2b762c22be9d3153d01822587e787" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys 0.36.1", +] + [[package]] name = "rustjail" version = "0.1.0" @@ -974,7 +1070,7 @@ dependencies = [ "inotify", "lazy_static", "libc", - "nix", + "nix 0.23.1", "oci", "path-absolutize", "protobuf", @@ -1176,6 +1272,13 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "test-utils" +version = "0.1.0" +dependencies = [ + "nix 0.24.2", +] + [[package]] name = "textwrap" version = "0.15.0" @@ -1273,7 +1376,7 @@ dependencies = [ "byteorder", "libc", "log", - "nix", + "nix 0.23.1", "protobuf", "protobuf-codegen-pure", "thiserror", @@ -1400,11 +1503,24 @@ version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5acdd78cb4ba54c0045ac14f62d8f94a03d10047904ae2a40afa1e99d8f70825" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.34.0", + "windows_i686_gnu 0.34.0", + "windows_i686_msvc 0.34.0", + "windows_x86_64_gnu 0.34.0", + "windows_x86_64_msvc 0.34.0", +] + +[[package]] +name = "windows-sys" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +dependencies = [ + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", ] [[package]] @@ -1413,26 +1529,56 @@ version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17cffbe740121affb56fad0fc0e421804adf0ae00891205213b5cecd30db881d" +[[package]] +name = "windows_aarch64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" + [[package]] name = "windows_i686_gnu" version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2564fde759adb79129d9b4f54be42b32c89970c18ebf93124ca8870a498688ed" +[[package]] +name = "windows_i686_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" + [[package]] name = "windows_i686_msvc" version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9cd9d32ba70453522332c14d38814bceeb747d80b3958676007acadd7e166956" +[[package]] +name = "windows_i686_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" + [[package]] name = "windows_x86_64_gnu" version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfce6deae227ee8d356d19effc141a509cc503dfd1f850622ec4b0f84428e1f4" +[[package]] +name = "windows_x86_64_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" + [[package]] name = "windows_x86_64_msvc" version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d19538ccc21819d01deaf88d6a17eae6596a12e9aafdbb97916fb49896d89de9" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" diff --git a/src/tools/runk/libcontainer/Cargo.toml b/src/tools/runk/libcontainer/Cargo.toml index ed96a4bfff..0db70d3d17 100644 --- a/src/tools/runk/libcontainer/Cargo.toml +++ b/src/tools/runk/libcontainer/Cargo.toml @@ -19,6 +19,9 @@ chrono = { version = "0.4.19", features = ["serde"] } serde = { version = "1.0.133", features = ["derive"] } serde_json = "1.0.74" scopeguard = "1.1.0" +cgroups = { package = "cgroups-rs", version = "0.2.10" } +procfs = "0.14.0" [dev-dependencies] tempfile = "3.3.0" +test-utils = { path = "../../../libs/test-utils" } diff --git a/src/tools/runk/libcontainer/src/builder.rs b/src/tools/runk/libcontainer/src/builder.rs index afb02b3565..c175c77c41 100644 --- a/src/tools/runk/libcontainer/src/builder.rs +++ b/src/tools/runk/libcontainer/src/builder.rs @@ -3,11 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 // -use crate::container::{get_config_path, ContainerLauncher}; -use crate::{ - status::{get_current_container_state, Status}, - utils::validate_process_spec, -}; +use crate::container::{get_config_path, Container, ContainerLauncher}; +use crate::utils::validate_process_spec; use anyhow::{anyhow, Result}; use derive_builder::Builder; use oci::{ContainerState, Process as OCIProcess, Spec}; @@ -138,32 +135,35 @@ impl ActivatedContainer { logger, "enter ActivatedContainer::create_launcher {:?}", self ); - let status = Status::load(&self.root, &self.id)?; - let state = get_current_container_state(&status)?; + let container = Container::load(&self.root, &self.id)?; // If state is Created or Running, we can execute the process. - if state != ContainerState::Created && state != ContainerState::Running { - return Err(anyhow!("cannot exec in a stopped or paused container")); + if container.state != ContainerState::Created && container.state != ContainerState::Running + { + return Err(anyhow!( + "cannot exec in a stopped or paused container, state: {:?}", + container.state + )); } - let mut config = status.config; + let mut config = container.status.config; let spec = config.spec.as_mut().unwrap(); - self.adapt_exec_spec(spec, status.pid, logger)?; + self.adapt_exec_spec(spec, container.status.pid, logger)?; debug!(logger, "adapted spec: {:?}", spec); validate_spec(spec, &self.console_socket)?; debug!(logger, "create LinuxContainer with config: {:?}", config); // Maybe we should move some properties from status into LinuxContainer, // like pid, process_start_time, created, cgroup_manager, etc. But it works now. - let container = + let runner = create_linux_container(&self.id, &self.root, config, self.console_socket, logger)?; Ok(ContainerLauncher::new( &self.id, - &status.bundle, + &container.status.bundle, &self.root, false, - container, + runner, self.pid_file, )) } @@ -264,13 +264,14 @@ pub fn validate_spec(spec: &Spec, console_socket: &Option) -> Result<() mod tests { use super::*; use crate::container::CONFIG_FILE_NAME; - use crate::utils::test_utils::TEST_ROOTFS_PATH; + use crate::status::Status; + use crate::utils::test_utils::*; use chrono::DateTime; use nix::unistd::getpid; use oci::{self, Root, Spec}; use oci::{Linux, LinuxNamespace, User}; - use rustjail::cgroups::fs::Manager; use rustjail::container::TYPETONAME; + use scopeguard::defer; use slog::o; use std::fs::create_dir; use std::time::SystemTime; @@ -279,6 +280,7 @@ mod tests { path::PathBuf, }; use tempfile::tempdir; + use test_utils::skip_if_not_root; #[derive(Debug)] struct TestData { @@ -323,7 +325,9 @@ mod tests { .to_string_lossy() .to_string(); let test_data = TestData { - id: String::from("test"), + // Since tests are executed concurrently, container_id must be unique in tests with cgroup. + // Or the cgroup directory may be removed by other tests in advance. + id: String::from("test_init_container_create_launcher"), bundle: bundle_dir.path().to_path_buf(), root: root_dir.into_path(), console_socket: Some(PathBuf::from("test")), @@ -356,6 +360,10 @@ mod tests { Some(launcher.runner.console_socket), test_data.console_socket ); + // If it is run by root, create_launcher will create cgroup dirs successfully. So we need to do some cleanup stuff. + if nix::unistd::Uid::effective().is_root() { + clean_up_cgroup(Path::new(&test_data.id)); + } } #[test] @@ -454,6 +462,11 @@ mod tests { } fn create_dummy_status(id: &str, pid: i32, root: &Path, spec: &Spec) -> Status { + let start_time = procfs::process::Process::new(pid) + .unwrap() + .stat() + .unwrap() + .starttime; Status { oci_version: spec.version.clone(), id: id.to_string(), @@ -461,9 +474,9 @@ mod tests { root: root.to_path_buf(), bundle: PathBuf::from("/tmp"), rootfs: TEST_ROOTFS_PATH.to_string(), - process_start_time: 0, + process_start_time: start_time, created: DateTime::from(SystemTime::now()), - cgroup_manager: Manager::new("test").unwrap(), + cgroup_manager: serde_json::from_str(TEST_CGM_DATA).unwrap(), config: CreateOpts { spec: Some(spec.clone()), ..Default::default() @@ -498,11 +511,14 @@ mod tests { #[test] fn test_activated_container_create() { + // create cgroup directory needs root permission + skip_if_not_root!(); let logger = slog::Logger::root(slog::Discard, o!()); let bundle_dir = tempdir().unwrap(); let root = tempdir().unwrap(); - // let bundle = temp - let id = "test".to_string(); + // Since tests are executed concurrently, container_id must be unique in tests with cgroup. + // Or the cgroup directory may be removed by other tests in advance. + let id = "test_activated_container_create".to_string(); create_activated_dirs(root.path(), &id, bundle_dir.path()); let pid = getpid().as_raw(); @@ -516,6 +532,10 @@ mod tests { let status = create_dummy_status(&id, pid, root.path(), &spec); status.save().unwrap(); + // create empty cgroup directory to avoid is_pause failing + let cgroup = create_dummy_cgroup(Path::new(id.as_str())); + defer!(cgroup.delete().unwrap()); + let result = ActivatedContainerBuilder::default() .id(id) .root(root.into_path()) @@ -575,6 +595,8 @@ mod tests { #[test] fn test_activated_container_create_with_process() { + // create cgroup directory needs root permission + skip_if_not_root!(); const PROCESS_FILE_NAME: &str = "process.json"; let bundle_dir = tempdir().unwrap(); let process_file = bundle_dir.path().join(PROCESS_FILE_NAME); @@ -588,7 +610,9 @@ mod tests { let logger = slog::Logger::root(slog::Discard, o!()); let root = tempdir().unwrap(); - let id = "test".to_string(); + // Since tests are executed concurrently, container_id must be unique in tests with cgroup. + // Or the cgroup directory may be removed by other tests in advance. + let id = "test_activated_container_create_with_process".to_string(); let pid = getpid().as_raw(); let mut spec = create_dummy_spec(); spec.root.as_mut().unwrap().path = bundle_dir @@ -600,6 +624,10 @@ mod tests { let status = create_dummy_status(&id, pid, root.path(), &spec); status.save().unwrap(); + // create empty cgroup directory to avoid is_pause failing + let cgroup = create_dummy_cgroup(Path::new(id.as_str())); + defer!(cgroup.delete().unwrap()); + let launcher = ActivatedContainerBuilder::default() .id(id) .root(root.into_path()) diff --git a/src/tools/runk/libcontainer/src/cgroup.rs b/src/tools/runk/libcontainer/src/cgroup.rs index 9b53bb3689..586c6e8943 100644 --- a/src/tools/runk/libcontainer/src/cgroup.rs +++ b/src/tools/runk/libcontainer/src/cgroup.rs @@ -3,24 +3,15 @@ // SPDX-License-Identifier: Apache-2.0 // -use anyhow::{anyhow, Result}; -use rustjail::cgroups::fs::Manager as CgroupManager; -use std::{ - path::Path, - {fs, thread, time}, -}; - -pub fn destroy_cgroup(cgroup_mg: &CgroupManager) -> Result<()> { - for path in cgroup_mg.paths.values() { - remove_cgroup_dir(Path::new(path))?; - } - - Ok(()) -} +use anyhow::anyhow; +use anyhow::Result; +use cgroups; +use cgroups::freezer::{FreezerController, FreezerState}; +use std::{thread, time}; // Try to remove the provided cgroups path five times with increasing delay between tries. // If after all there are not removed cgroups, an appropriate error will be returned. -fn remove_cgroup_dir(path: &Path) -> Result<()> { +pub fn remove_cgroup_dir(cgroup: &cgroups::Cgroup) -> Result<()> { let mut retries = 5; let mut delay = time::Duration::from_millis(10); while retries != 0 { @@ -29,12 +20,58 @@ fn remove_cgroup_dir(path: &Path) -> Result<()> { thread::sleep(delay); } - if !path.exists() || fs::remove_dir(path).is_ok() { + if cgroup.delete().is_ok() { return Ok(()); } retries -= 1; } - return Err(anyhow!("failed to remove cgroups paths: {:?}", path)); + return Err(anyhow!("failed to remove cgroups paths")); +} + +// Make sure we get a stable freezer state, so retry if the cgroup is still undergoing freezing. +pub fn get_freezer_state(freezer: &FreezerController) -> Result { + let mut retries = 10; + while retries != 0 { + let state = freezer.state()?; + match state { + FreezerState::Thawed => return Ok(FreezerState::Thawed), + FreezerState::Frozen => return Ok(FreezerState::Frozen), + FreezerState::Freezing => { + // sleep for 10 ms, wait for the cgroup to finish freezing + thread::sleep(time::Duration::from_millis(10)); + retries -= 1; + } + } + } + Ok(FreezerState::Freezing) +} + +// check whether freezer state is frozen +pub fn is_paused(cgroup: &cgroups::Cgroup) -> Result { + let freezer_controller: &FreezerController = cgroup + .controller_of() + .ok_or_else(|| anyhow!("failed to get freezer controller"))?; + let freezer_state = get_freezer_state(freezer_controller)?; + match freezer_state { + FreezerState::Frozen => Ok(true), + _ => Ok(false), + } +} + +pub fn freeze(cgroup: &cgroups::Cgroup, state: FreezerState) -> Result<()> { + let freezer_controller: &FreezerController = cgroup + .controller_of() + .ok_or_else(|| anyhow!("failed to get freezer controller"))?; + match state { + FreezerState::Frozen => { + freezer_controller.freeze()?; + } + FreezerState::Thawed => { + freezer_controller.thaw()?; + } + _ => return Err(anyhow!("invalid freezer state")), + } + Ok(()) } diff --git a/src/tools/runk/libcontainer/src/container.rs b/src/tools/runk/libcontainer/src/container.rs index abc40fbba3..9a3c0fa61d 100644 --- a/src/tools/runk/libcontainer/src/container.rs +++ b/src/tools/runk/libcontainer/src/container.rs @@ -3,14 +3,20 @@ // SPDX-License-Identifier: Apache-2.0 // -use crate::status::{self, get_all_pid, get_current_container_state, Status}; +use crate::cgroup::{freeze, remove_cgroup_dir}; +use crate::status::{self, get_current_container_state, Status}; use anyhow::{anyhow, Result}; +use cgroups; +use cgroups::freezer::FreezerState; +use cgroups::hierarchies::is_cgroup2_unified_mode; use nix::sys::signal::kill; use nix::{ sys::signal::Signal, + sys::signal::SIGKILL, unistd::{chdir, unlink, Pid}, }; use oci::ContainerState; +use procfs; use rustjail::{ container::{BaseContainer, LinuxContainer, EXEC_FIFO_FILENAME}, process::{Process, ProcessOperations}, @@ -35,20 +41,55 @@ pub enum ContainerAction { pub struct Container { pub status: Status, pub state: ContainerState, + pub cgroup: cgroups::Cgroup, } +// Container represents a container that is created by the container runtime. impl Container { pub fn load(state_root: &Path, id: &str) -> Result { let status = Status::load(state_root, id)?; - let state = get_current_container_state(&status)?; - Ok(Self { status, state }) + let spec = status + .config + .spec + .as_ref() + .ok_or_else(|| anyhow!("spec config was not present"))?; + let linux = spec + .linux + .as_ref() + .ok_or_else(|| anyhow!("linux config was not present"))?; + let cpath = if linux.cgroups_path.is_empty() { + id.to_string() + } else { + linux + .cgroups_path + .clone() + .trim_start_matches('/') + .to_string() + }; + let cgroup = cgroups::Cgroup::load(cgroups::hierarchies::auto(), cpath); + let state = get_current_container_state(&status, &cgroup)?; + Ok(Self { + status, + state, + cgroup, + }) } pub fn processes(&self) -> Result> { - get_all_pid(&self.status.cgroup_manager) + let pids = self.cgroup.tasks(); + let result = pids.iter().map(|x| Pid::from_raw(x.pid as i32)).collect(); + Ok(result) } pub fn kill(&self, signal: Signal, all: bool) -> Result<()> { + if self.state == ContainerState::Stopped { + return Err(anyhow!( + "container {} can't be killed because it is {:?}", + self.status.id, + self.state + )); + } + if all { let pids = self.processes()?; for pid in pids { @@ -58,18 +99,46 @@ impl Container { kill(pid, signal)?; } } else { - if self.state == ContainerState::Stopped { - return Err(anyhow!("container {} not running", self.status.id)); - } let pid = Pid::from_raw(self.status.pid); if status::is_process_running(pid)? { kill(pid, signal)?; } } + // For cgroup v1, killing a process in a frozen cgroup does nothing until it's thawed. + // Only thaw the cgroup for SIGKILL. + // Ref: https://github.com/opencontainers/runc/pull/3217 + if !is_cgroup2_unified_mode() && self.state == ContainerState::Paused && signal == SIGKILL { + freeze(&self.cgroup, FreezerState::Thawed)?; + } Ok(()) } - // TODO: add pause and resume + pub fn pause(&self) -> Result<()> { + if self.state != ContainerState::Running && self.state != ContainerState::Created { + return Err(anyhow!( + "failed to pause container: current status is: {:?}", + self.state + )); + } + freeze(&self.cgroup, FreezerState::Frozen)?; + Ok(()) + } + + pub fn resume(&self) -> Result<()> { + if self.state != ContainerState::Paused { + return Err(anyhow!( + "failed to resume container: current status is: {:?}", + self.state + )); + } + freeze(&self.cgroup, FreezerState::Thawed)?; + Ok(()) + } + + pub fn destroy(&self) -> Result<()> { + remove_cgroup_dir(&self.cgroup)?; + self.status.remove_dir() + } } /// Used to run a process. If init is set, it will create a container and run the process in it. @@ -190,11 +259,14 @@ impl ContainerLauncher { /// Generate runk specified Status fn get_status(&self) -> Result { let oci_state = self.runner.oci_state()?; + // read start time from /proc//stat + let proc = procfs::process::Process::new(self.runner.init_process_pid)?; + let process_start_time = proc.stat()?.starttime; Status::new( &self.state_root, &self.bundle, oci_state, - self.runner.init_process_start_time, + process_start_time, self.runner.created, self.runner .cgroup_manager diff --git a/src/tools/runk/libcontainer/src/status.rs b/src/tools/runk/libcontainer/src/status.rs index 3cd9768b10..6a3480c591 100644 --- a/src/tools/runk/libcontainer/src/status.rs +++ b/src/tools/runk/libcontainer/src/status.rs @@ -3,6 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // +use crate::cgroup::is_paused; use crate::container::get_fifo_path; use crate::utils::*; use anyhow::{anyhow, Result}; @@ -14,6 +15,7 @@ use nix::{ unistd::Pid, }; use oci::{ContainerState, State as OCIState}; +use procfs::process::ProcState; use rustjail::{cgroups::fs::Manager as CgroupManager, specconv::CreateOpts}; use serde::{Deserialize, Serialize}; use std::{ @@ -35,6 +37,10 @@ pub struct Status { pub rootfs: String, pub process_start_time: u64, pub created: DateTime, + // Methods of Manager traits in rustjail are invisible, and CgroupManager.cgroup can't be serialized. + // So it is cumbersome to manage cgroups by this field. Instead, we use cgroups-rs::cgroup directly in Container to manager cgroups. + // Another solution is making some methods public outside rustjail and adding getter/setter for CgroupManager.cgroup. + // Temporarily keep this field for compatibility. pub cgroup_manager: CgroupManager, pub config: CreateOpts, } @@ -143,53 +149,34 @@ pub fn is_process_running(pid: Pid) -> Result { } } -pub fn get_current_container_state(status: &Status) -> Result { - let running = is_process_running(Pid::from_raw(status.pid))?; - let mut has_fifo = false; - - if running { - let fifo = get_fifo_path(status); - if fifo.exists() { - has_fifo = true - } +// Returns the current state of a container. It will read cgroupfs and procfs to determine the state. +// https://github.com/opencontainers/runc/blob/86d6898f3052acba1ebcf83aa2eae3f6cc5fb471/libcontainer/container_linux.go#L1953 +pub fn get_current_container_state( + status: &Status, + cgroup: &cgroups::Cgroup, +) -> Result { + if is_paused(cgroup)? { + return Ok(ContainerState::Paused); } - - if running && !has_fifo { - // TODO: Check paused status. - // runk does not support pause command currently. + let proc = procfs::process::Process::new(status.pid); + // if reading /proc/ occurs error, then the process is not running + if proc.is_err() { + return Ok(ContainerState::Stopped); } - - if !running { - Ok(ContainerState::Stopped) - } else if has_fifo { - Ok(ContainerState::Created) - } else { - Ok(ContainerState::Running) + let proc_stat = proc.unwrap().stat()?; + // if start time is not equal, then the pid is reused, and the process is not running + if proc_stat.starttime != status.process_start_time { + return Ok(ContainerState::Stopped); } -} - -pub fn get_all_pid(cgm: &CgroupManager) -> Result> { - let cgroup_path = cgm.paths.get("devices"); - match cgroup_path { - Some(v) => { - let path = Path::new(v); - if !path.exists() { - return Err(anyhow!("cgroup devices file does not exist")); + match proc_stat.state()? { + ProcState::Zombie | ProcState::Dead => Ok(ContainerState::Stopped), + _ => { + let fifo = get_fifo_path(status); + if fifo.exists() { + return Ok(ContainerState::Created); } - - let procs_path = path.join("cgroup.procs"); - let pids: Vec = lines_from_file(&procs_path)? - .into_iter() - .map(|v| { - Pid::from_raw( - v.parse::() - .expect("failed to parse string into pid_t"), - ) - }) - .collect(); - Ok(pids) + Ok(ContainerState::Running) } - None => Err(anyhow!("cgroup devices file dose not exist")), } } @@ -197,10 +184,12 @@ pub fn get_all_pid(cgm: &CgroupManager) -> Result> { mod tests { use super::*; use crate::utils::test_utils::*; + use ::test_utils::skip_if_not_root; use chrono::{DateTime, Utc}; use nix::unistd::getpid; use oci::ContainerState; use rustjail::cgroups::fs::Manager as CgroupManager; + use scopeguard::defer; use std::path::Path; use std::time::SystemTime; @@ -235,14 +224,13 @@ mod tests { #[test] fn test_get_current_container_state() { - let status = create_dummy_status(); - let state = get_current_container_state(&status).unwrap(); + skip_if_not_root!(); + let mut status = create_dummy_status(); + status.id = "test_get_current_container_state".to_string(); + // crete a dummy cgroup to make sure is_pause doesn't return error + let cgroup = create_dummy_cgroup(Path::new(&status.id)); + defer!(cgroup.delete().unwrap()); + let state = get_current_container_state(&status, &cgroup).unwrap(); assert_eq!(state, ContainerState::Running); } - - #[test] - fn test_get_all_pid() { - let cgm: CgroupManager = serde_json::from_str(TEST_CGM_DATA).unwrap(); - assert!(get_all_pid(&cgm).is_ok()); - } } diff --git a/src/tools/runk/libcontainer/src/utils.rs b/src/tools/runk/libcontainer/src/utils.rs index bcb8b9748e..a65a3568d5 100644 --- a/src/tools/runk/libcontainer/src/utils.rs +++ b/src/tools/runk/libcontainer/src/utils.rs @@ -114,11 +114,16 @@ pub(crate) mod test_utils { let cgm: CgroupManager = serde_json::from_str(TEST_CGM_DATA).unwrap(); let oci_state = create_dummy_oci_state(); let created = SystemTime::now(); + let start_time = procfs::process::Process::new(oci_state.pid) + .unwrap() + .stat() + .unwrap() + .starttime; let status = Status::new( Path::new(TEST_STATE_ROOT_PATH), Path::new(TEST_BUNDLE_PATH), oci_state, - 1, + start_time, created, cgm, create_dummy_opts(), @@ -128,6 +133,15 @@ pub(crate) mod test_utils { status } + pub fn create_dummy_cgroup(cpath: &Path) -> cgroups::Cgroup { + cgroups::Cgroup::new(cgroups::hierarchies::auto(), cpath) + } + + pub fn clean_up_cgroup(cpath: &Path) { + let cgroup = cgroups::Cgroup::load(cgroups::hierarchies::auto(), cpath); + cgroup.delete().unwrap(); + } + #[test] pub fn test_validate_process_spec() { let valid_process = Process { diff --git a/src/tools/runk/src/commands/delete.rs b/src/tools/runk/src/commands/delete.rs index 4884c02914..ead8aa47eb 100644 --- a/src/tools/runk/src/commands/delete.rs +++ b/src/tools/runk/src/commands/delete.rs @@ -4,13 +4,10 @@ // use anyhow::{anyhow, Result}; -use libcontainer::{ - cgroup, - status::{get_current_container_state, Status}, -}; +use libcontainer::{container::Container, status::Status}; use liboci_cli::Delete; use nix::{ - errno::Errno, + sys::signal::SIGKILL, sys::signal::{kill, Signal}, unistd::Pid, }; @@ -26,13 +23,14 @@ pub async fn run(opts: Delete, root: &Path, logger: &Logger) -> Result<()> { return Err(anyhow!("container {} does not exist", container_id)); } - let status = if let Ok(value) = Status::load(root, container_id) { + let container = if let Ok(value) = Container::load(root, container_id) { value } else { fs::remove_dir_all(status_dir)?; return Ok(()); }; + let status = &container.status; let spec = status .config .spec @@ -42,7 +40,7 @@ pub async fn run(opts: Delete, root: &Path, logger: &Logger) -> Result<()> { let oci_state = OCIState { version: status.oci_version.clone(), id: status.id.clone(), - status: get_current_container_state(&status)?, + status: container.state, pid: status.pid, bundle: status .bundle @@ -64,20 +62,16 @@ pub async fn run(opts: Delete, root: &Path, logger: &Logger) -> Result<()> { match oci_state.status { ContainerState::Stopped => { - destroy_container(&status)?; + container.destroy()?; } ContainerState::Created => { kill(Pid::from_raw(status.pid), Some(Signal::SIGKILL))?; - destroy_container(&status)?; + container.destroy()?; } _ => { if opts.force { - if let Err(errno) = kill(Pid::from_raw(status.pid), Some(Signal::SIGKILL)) { - if errno != Errno::ESRCH { - return Err(anyhow!("{}", errno)); - } - } - destroy_container(&status)?; + container.kill(SIGKILL, true)?; + container.destroy()?; } else { return Err(anyhow!( "cannot delete container {} that is not stopped", @@ -91,10 +85,3 @@ pub async fn run(opts: Delete, root: &Path, logger: &Logger) -> Result<()> { Ok(()) } - -fn destroy_container(status: &Status) -> Result<()> { - cgroup::destroy_cgroup(&status.cgroup_manager)?; - status.remove_dir()?; - - Ok(()) -} diff --git a/src/tools/runk/src/commands/list.rs b/src/tools/runk/src/commands/list.rs index ef39042047..e3020cb5aa 100644 --- a/src/tools/runk/src/commands/list.rs +++ b/src/tools/runk/src/commands/list.rs @@ -5,7 +5,7 @@ use super::state::get_container_state_name; use anyhow::Result; -use libcontainer::status::{get_current_container_state, Status}; +use libcontainer::container::Container; use liboci_cli::List; use oci::ContainerState; use slog::{info, Logger}; @@ -19,7 +19,7 @@ pub fn run(_: List, root: &Path, logger: &Logger) -> Result<()> { let mut content = String::new(); for entry in fs::read_dir(root)? { let entry = entry?; - // Possibly race with runk delete, so continue loop when any error occurs below + // Possibly race with other command of runk, so continue loop when any error occurs below let metadata = match entry.metadata() { Ok(metadata) => metadata, Err(_) => continue, @@ -31,18 +31,15 @@ pub fn run(_: List, root: &Path, logger: &Logger) -> Result<()> { Ok(id) => id, Err(_) => continue, }; - let status = match Status::load(root, &container_id) { - Ok(status) => status, - Err(_) => continue, - }; - let state = match get_current_container_state(&status) { - Ok(state) => state, + let container = match Container::load(root, &container_id) { + Ok(container) => container, Err(_) => continue, }; + let state = container.state; // Just like runc, pid of stopped container is 0 let pid = match state { ContainerState::Stopped => 0, - _ => status.pid, + _ => container.status.pid, }; // May replace get_user_by_uid with getpwuid(3) let owner = match get_user_by_uid(metadata.uid()) { @@ -55,8 +52,8 @@ pub fn run(_: List, root: &Path, logger: &Logger) -> Result<()> { container_id, pid, get_container_state_name(state), - status.bundle.display(), - status.created, + container.status.bundle.display(), + container.status.created, owner ); } diff --git a/src/tools/runk/src/commands/mod.rs b/src/tools/runk/src/commands/mod.rs index 4243035214..249b1440ab 100644 --- a/src/tools/runk/src/commands/mod.rs +++ b/src/tools/runk/src/commands/mod.rs @@ -8,7 +8,9 @@ pub mod delete; pub mod exec; pub mod kill; pub mod list; +pub mod pause; pub mod ps; +pub mod resume; pub mod run; pub mod spec; pub mod start; diff --git a/src/tools/runk/src/commands/pause.rs b/src/tools/runk/src/commands/pause.rs new file mode 100644 index 0000000000..fee9498a02 --- /dev/null +++ b/src/tools/runk/src/commands/pause.rs @@ -0,0 +1,18 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; +use libcontainer::container::Container; +use liboci_cli::Pause; +use slog::{info, Logger}; +use std::path::Path; + +pub fn run(opts: Pause, root: &Path, logger: &Logger) -> Result<()> { + let container = Container::load(root, &opts.container_id)?; + container.pause()?; + + info!(&logger, "pause command finished successfully"); + Ok(()) +} diff --git a/src/tools/runk/src/commands/resume.rs b/src/tools/runk/src/commands/resume.rs new file mode 100644 index 0000000000..7d8b9d39d9 --- /dev/null +++ b/src/tools/runk/src/commands/resume.rs @@ -0,0 +1,18 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; +use libcontainer::container::Container; +use liboci_cli::Resume; +use slog::{info, Logger}; +use std::path::Path; + +pub fn run(opts: Resume, root: &Path, logger: &Logger) -> Result<()> { + let container = Container::load(root, &opts.container_id)?; + container.resume()?; + + info!(&logger, "pause command finished successfully"); + Ok(()) +} diff --git a/src/tools/runk/src/commands/start.rs b/src/tools/runk/src/commands/start.rs index 7504936887..8176aa0fa7 100644 --- a/src/tools/runk/src/commands/start.rs +++ b/src/tools/runk/src/commands/start.rs @@ -5,39 +5,29 @@ use crate::commands::state::get_container_state_name; use anyhow::{anyhow, Result}; -use libcontainer::{ - container::get_fifo_path, - status::{get_current_container_state, Status}, -}; +use libcontainer::container::{get_fifo_path, Container}; use liboci_cli::Start; use nix::unistd::unlink; use oci::ContainerState; use slog::{info, Logger}; -use std::{fs::OpenOptions, io::prelude::*, path::Path, time::SystemTime}; +use std::{fs::OpenOptions, io::prelude::*, path::Path}; pub fn run(opts: Start, state_root: &Path, logger: &Logger) -> Result<()> { - let mut status = Status::load(state_root, &opts.container_id)?; - let state = get_current_container_state(&status)?; - if state != ContainerState::Created { + let container = Container::load(state_root, &opts.container_id)?; + if container.state != ContainerState::Created { return Err(anyhow!( "cannot start a container in the {} state", - get_container_state_name(state) + get_container_state_name(container.state) )); }; - let fifo_path = get_fifo_path(&status); + let fifo_path = get_fifo_path(&container.status); let mut file = OpenOptions::new().write(true).open(&fifo_path)?; file.write_all("0".as_bytes())?; info!(&logger, "container started"); - status.process_start_time = SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH)? - .as_secs(); - - status.save()?; - if fifo_path.exists() { unlink(&fifo_path)?; } diff --git a/src/tools/runk/src/commands/state.rs b/src/tools/runk/src/commands/state.rs index eb6b87d492..4e3bf6f330 100644 --- a/src/tools/runk/src/commands/state.rs +++ b/src/tools/runk/src/commands/state.rs @@ -5,7 +5,7 @@ use anyhow::Result; use chrono::{DateTime, Utc}; -use libcontainer::status::{get_current_container_state, Status}; +use libcontainer::{container::Container, status::Status}; use liboci_cli::State; use oci::ContainerState; use serde::{Deserialize, Serialize}; @@ -37,9 +37,8 @@ impl RuntimeState { } pub fn run(opts: State, state_root: &Path, logger: &Logger) -> Result<()> { - let status = Status::load(state_root, &opts.container_id)?; - let state = get_current_container_state(&status)?; - let oci_state = RuntimeState::new(status, state); + let container = Container::load(state_root, &opts.container_id)?; + let oci_state = RuntimeState::new(container.status, container.state); let json_state = &serde_json::to_string_pretty(&oci_state)?; println!("{}", json_state); diff --git a/src/tools/runk/src/main.rs b/src/tools/runk/src/main.rs index 4565e6a369..9f338ec404 100644 --- a/src/tools/runk/src/main.rs +++ b/src/tools/runk/src/main.rs @@ -81,6 +81,8 @@ async fn cmd_run(subcmd: SubCommand, root_path: &Path, logger: &Logger) -> Resul CommonCmd::List(list) => commands::list::run(list, root_path, logger), CommonCmd::Exec(exec) => commands::exec::run(exec, root_path, logger).await, CommonCmd::Ps(ps) => commands::ps::run(ps, root_path, logger), + CommonCmd::Pause(pause) => commands::pause::run(pause, root_path, logger), + CommonCmd::Resume(resume) => commands::resume::run(resume, root_path, logger), _ => { return Err(anyhow!("command is not implemented yet")); }