From 15065e4472aea1a50eb96136e43cbb72aa75490d Mon Sep 17 00:00:00 2001 From: bin liu Date: Mon, 24 Aug 2020 19:44:16 +0800 Subject: [PATCH] agent: add cgroup v2 support This PR add basic cgroup v2 support for agent. Fixes: #146, #357 Signed-off-by: bin liu --- src/agent/Cargo.lock | 29 +- src/agent/Cargo.toml | 1 + src/agent/rustjail/Cargo.toml | 1 + src/agent/rustjail/src/cgroups/fs/mod.rs | 2177 +++++++++----------- src/agent/rustjail/src/cgroups/mod.rs | 17 +- src/agent/rustjail/src/cgroups/notifier.rs | 209 ++ src/agent/rustjail/src/container.rs | 64 +- src/agent/rustjail/src/mount.rs | 60 +- src/agent/src/config.rs | 88 +- src/agent/src/main.rs | 24 +- src/agent/src/mount.rs | 39 +- src/agent/src/rpc.rs | 46 +- src/agent/src/sandbox.rs | 29 +- src/runtime/containerd-shim-v2/wait.go | 2 +- src/runtime/virtcontainers/sandbox.go | 6 +- 15 files changed, 1454 insertions(+), 1338 deletions(-) create mode 100644 src/agent/rustjail/src/cgroups/notifier.rs diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index b8569e237b..c99d63c873 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -97,6 +97,17 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +[[package]] +name = "cgroups" +version = "0.1.1-alpha.0" +source = "git+https://github.com/kata-containers/cgroups-rs?tag=0.1.1#3852d7c1805499cd6a0e37ec400d81a7085d91a7" +dependencies = [ + "libc", + "log", + "nix 0.18.0", + "regex", +] + [[package]] name = "chrono" version = "0.4.11" @@ -240,6 +251,7 @@ name = "kata-agent" version = "0.1.0" dependencies = [ "anyhow", + "cgroups", "lazy_static", "libc", "logging", @@ -271,9 +283,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.70" +version = "0.2.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3baa92041a6fec78c687fa0cc2b3fae8884f743d672cf551bed1d6dac6988d0f" +checksum = "f2f96b10ec2560088a8e76961b00d47107b3a625fecb76dedb29ee7ccbf98235" [[package]] name = "libflate" @@ -363,6 +375,18 @@ dependencies = [ "void", ] +[[package]] +name = "nix" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83450fe6a6142ddd95fb064b746083fc4ef1705fe81f64a64e1d4b39f54a1055" +dependencies = [ + "bitflags", + "cc", + "cfg-if", + "libc", +] + [[package]] name = "num-integer" version = "0.1.42" @@ -619,6 +643,7 @@ version = "0.1.0" dependencies = [ "anyhow", "caps", + "cgroups", "dirs", "lazy_static", "libc", diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index 7b4ef1f1eb..ac3db0d5c2 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -32,6 +32,7 @@ tempfile = "3.1.0" prometheus = { version = "0.9.0", features = ["process"] } procfs = "0.7.9" anyhow = "1.0.32" +cgroups = { git = "https://github.com/kata-containers/cgroups-rs", tag = "0.1.1"} [workspace] members = [ diff --git a/src/agent/rustjail/Cargo.toml b/src/agent/rustjail/Cargo.toml index c8481efa6b..97c03d1e59 100644 --- a/src/agent/rustjail/Cargo.toml +++ b/src/agent/rustjail/Cargo.toml @@ -24,3 +24,4 @@ regex = "1.1" path-absolutize = "1.2.0" dirs = "3.0.1" anyhow = "1.0.32" +cgroups = { git = "https://github.com/kata-containers/cgroups-rs", tag = "0.1.1"} diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index 70657398a8..37ead8d167 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -1,15 +1,34 @@ -// Copyright (c) 2019 Ant Financial +// Copyright (c) 2019, 2020 Ant Group // // SPDX-License-Identifier: Apache-2.0 // -use crate::cgroups::FreezerState; + +use cgroups::blkio::{BlkIo, BlkIoController, BlkIoData, IoService}; +use cgroups::cpu::CpuController; +use cgroups::cpuacct::CpuAcctController; +use cgroups::cpuset::CpuSetController; +use cgroups::devices::DevicePermissions; +use cgroups::devices::DeviceType; +use cgroups::freezer::{FreezerController, FreezerState}; +use cgroups::hugetlb::HugeTlbController; +use cgroups::memory::MemController; +use cgroups::pid::PidController; +use cgroups::{ + BlkIoDeviceResource, BlkIoDeviceThrottleResource, Cgroup, CgroupPid, Controller, + DeviceResource, DeviceResources, HugePageResource, MaxValue, NetworkPriority, +}; + use crate::cgroups::Manager as CgroupManager; use crate::container::DEFAULT_DEVICES; use anyhow::{anyhow, Context, Error, Result}; use lazy_static; use libc::{self, pid_t}; use nix::errno::Errno; -use oci::{LinuxDeviceCgroup, LinuxResources, LinuxThrottleDevice, LinuxWeightDevice}; +use oci::{ + LinuxBlockIO, LinuxCPU, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory, + LinuxNetwork, LinuxPids, LinuxResources, LinuxThrottleDevice, LinuxWeightDevice, +}; + use protobuf::{CachedSize, RepeatedField, SingularPtrField, UnknownFields}; use protocols::agent::{ BlkioStats, BlkioStatsEntry, CgroupStats, CpuStats, CpuUsage, HugetlbStats, MemoryData, @@ -27,40 +46,498 @@ macro_rules! sl { }; } -pub struct CpuSet(); -pub struct Cpu(); -pub struct Devices(); -pub struct Memory(); -pub struct CpuAcct(); -pub struct Pids(); -pub struct Blkio(); -pub struct HugeTLB(); -pub struct NetCls(); -pub struct NetPrio(); -pub struct PerfEvent(); -pub struct Freezer(); -pub struct Named(); - -pub trait Subsystem { - fn name(&self) -> String { - "unknown".to_string() - } - - fn set(&self, _dir: &str, _r: &LinuxResources, _update: bool) -> Result<()> { - Ok(()) - } - - fn apply(&self, path: &str, pid: pid_t) -> Result<()> { - write_file(path, CGROUP_PROCS, pid)?; - Ok(()) +pub fn load_or_create<'a>(h: Box<&'a dyn cgroups::Hierarchy>, path: &str) -> Cgroup<'a> { + let valid_path = path.trim_start_matches("/").to_string(); + let cg = load(h.clone(), &valid_path); + if cg.is_none() { + info!(sl!(), "create new cgroup: {}", &valid_path); + cgroups::Cgroup::new(h, valid_path.as_str()) + } else { + cg.unwrap() } } +pub fn load<'a>(h: Box<&'a dyn cgroups::Hierarchy>, path: &str) -> Option> { + let valid_path = path.trim_start_matches("/").to_string(); + let cg = cgroups::Cgroup::load(h, valid_path.as_str()); + let cpu_controller: &CpuController = cg.controller_of().unwrap(); + if cpu_controller.exists() { + Some(cg) + } else { + None + } +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Manager { + pub paths: HashMap, + pub mounts: HashMap, + // pub rels: HashMap, + pub cpath: String, +} + +// set_resource is used to set reources by cgroup controller. +macro_rules! set_resource { + ($cont:ident, $func:ident, $res:ident, $field:ident) => { + let resource_value = $res.$field.unwrap_or(0); + if resource_value != 0 { + $cont.$func(resource_value)?; + } + }; +} + +impl CgroupManager for Manager { + fn apply(&self, pid: pid_t) -> Result<()> { + let h = cgroups::hierarchies::auto(); + let h = Box::new(&*h); + let cg = load_or_create(h, &self.cpath); + cg.add_task(CgroupPid::from(pid as u64)); + Ok(()) + } + + fn set(&self, r: &LinuxResources, update: bool) -> Result<()> { + let h = cgroups::hierarchies::auto(); + let h = Box::new(&*h); + let cg = load_or_create(h, &self.cpath); + info!( + sl!(), + "cgroup manager set resources for container. Resources input {:?}", r + ); + + let res = &mut cgroups::Resources::default(); + + // set cpuset and cpu reources + if let Some(cpu) = &r.cpu { + set_cpu_resources(&cg, cpu)?; + } + + // set memory resources + if let Some(memory) = &r.memory { + set_memory_resources(&cg, memory, update)?; + } + + // set pids resources + if let Some(pids_resources) = &r.pids { + set_pids_resources(&cg, pids_resources)?; + } + + // set block_io resources + if let Some(blkio) = &r.block_io { + set_block_io_resources(&cg, blkio, res)?; + } + + // set hugepages resources + if r.hugepage_limits.len() > 0 { + set_hugepages_resources(&cg, &r.hugepage_limits, res)?; + } + + // set network resources + if let Some(network) = &r.network { + set_network_resources(&cg, network, res)?; + } + + // set devices resources + set_devices_resources(&cg, &r.devices, res)?; + info!(sl!(), "resources after processed {:?}", res); + + // apply resources + cg.apply(res)?; + + Ok(()) + } + + fn get_stats(&self) -> Result { + let h = cgroups::hierarchies::auto(); + let h = Box::new(&*h); + let cg = load_or_create(h, &self.cpath); + + // CpuStats + let cpu_usage = get_cpuacct_stats(&cg); + + let throttling_data = get_cpu_stats(&cg); + + let cpu_stats = SingularPtrField::some(CpuStats { + cpu_usage, + throttling_data, + unknown_fields: UnknownFields::default(), + cached_size: CachedSize::default(), + }); + + // Memorystats + let memory_stats = get_memory_stats(&cg); + + // PidsStats + let pids_stats = get_pids_stats(&cg); + + // BlkioStats + // note that virtiofs has no blkio stats + let blkio_stats = get_blkio_stats(&cg); + + // HugetlbStats + let hugetlb_stats = get_hugetlb_stats(&cg); + + Ok(CgroupStats { + cpu_stats, + memory_stats, + pids_stats, + blkio_stats, + hugetlb_stats, + unknown_fields: UnknownFields::default(), + cached_size: CachedSize::default(), + }) + } + + fn freeze(&self, state: FreezerState) -> Result<()> { + let h = cgroups::hierarchies::auto(); + let h = Box::new(&*h); + let cg = load_or_create(h, &self.cpath); + let freezer_controller: &FreezerController = cg.controller_of().unwrap(); + match state { + FreezerState::Thawed => { + freezer_controller.thaw(); + } + FreezerState::Frozen => { + freezer_controller.freeze(); + } + _ => { + return Err(nix::Error::Sys(Errno::EINVAL).into()); + } + } + + Ok(()) + } + + fn destroy(&mut self) -> Result<()> { + let h = cgroups::hierarchies::auto(); + let h = Box::new(&*h); + let cg = load(h, &self.cpath); + if cg.is_some() { + cg.unwrap().delete(); + } + Ok(()) + } + + fn get_pids(&self) -> Result> { + let h = cgroups::hierarchies::auto(); + let h = Box::new(&*h); + let cg = load_or_create(h, &self.cpath); + let mem_controller: &MemController = cg.controller_of().unwrap(); + let pids = mem_controller.tasks(); + let result = pids.iter().map(|x| x.pid as i32).collect::>(); + + Ok(result) + } +} + +fn set_network_resources( + cg: &cgroups::Cgroup, + network: &LinuxNetwork, + res: &mut cgroups::Resources, +) -> Result<()> { + info!(sl!(), "cgroup manager set network"); + + // set classid + // description can be found at https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/net_cls.html + let class_id = network.class_id.unwrap_or(0) as u64; + if class_id != 0 { + res.network.class_id = class_id; + } + + // set network priorities + // description can be found at https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/net_prio.html + let mut priorities = vec![]; + for p in network.priorities.iter() { + priorities.push(NetworkPriority { + name: p.name.clone(), + priority: p.priority as u64, + }); + } + + res.network.update_values = true; + res.network.priorities = priorities; + Ok(()) +} + +fn set_devices_resources( + cg: &cgroups::Cgroup, + device_resources: &Vec, + res: &mut cgroups::Resources, +) -> Result<()> { + info!(sl!(), "cgroup manager set devices"); + let mut devices = vec![]; + + for d in device_resources.iter() { + let dev = linux_device_group_to_cgroup_device(&d); + devices.push(dev); + } + + for d in DEFAULT_DEVICES.iter() { + let dev = linux_device_to_cgroup_device(&d); + devices.push(dev); + } + + for d in DEFAULT_ALLOWED_DEVICES.iter() { + let dev = linux_device_group_to_cgroup_device(&d); + devices.push(dev); + } + + res.devices.update_values = true; + res.devices.devices = devices; + + Ok(()) +} + +fn set_hugepages_resources( + cg: &cgroups::Cgroup, + hugepage_limits: &Vec, + res: &mut cgroups::Resources, +) -> Result<()> { + info!(sl!(), "cgroup manager set hugepage"); + res.hugepages.update_values = true; + let mut limits = vec![]; + + for l in hugepage_limits.iter() { + let hr = HugePageResource { + size: l.page_size.clone(), + limit: l.limit, + }; + limits.push(hr); + } + res.hugepages.limits = limits; + + Ok(()) +} + +fn set_block_io_resources( + cg: &cgroups::Cgroup, + blkio: &LinuxBlockIO, + res: &mut cgroups::Resources, +) -> Result<()> { + info!(sl!(), "cgroup manager set block io"); + res.blkio.update_values = true; + + if cg.v2() { + res.blkio.weight = convert_blk_io_to_v2_value(blkio.weight); + res.blkio.leaf_weight = convert_blk_io_to_v2_value(blkio.leaf_weight); + } else { + res.blkio.weight = blkio.weight; + res.blkio.leaf_weight = blkio.leaf_weight; + } + + let mut blk_device_resources = vec![]; + for d in blkio.weight_device.iter() { + let (w, lw) = if cg.v2() { + ( + convert_blk_io_to_v2_value(blkio.weight), + convert_blk_io_to_v2_value(blkio.leaf_weight), + ) + } else { + (blkio.weight, blkio.leaf_weight) + }; + + let dr = BlkIoDeviceResource { + major: d.blk.major as u64, + minor: d.blk.minor as u64, + weight: w, + leaf_weight: lw, + }; + blk_device_resources.push(dr); + } + res.blkio.weight_device = blk_device_resources; + + res.blkio.throttle_read_bps_device = + build_blk_io_device_throttle_resource(&blkio.throttle_read_bps_device); + res.blkio.throttle_write_bps_device = + build_blk_io_device_throttle_resource(&blkio.throttle_write_bps_device); + res.blkio.throttle_read_iops_device = + build_blk_io_device_throttle_resource(&blkio.throttle_read_iops_device); + res.blkio.throttle_write_iops_device = + build_blk_io_device_throttle_resource(&blkio.throttle_write_iops_device); + + Ok(()) +} + +fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCPU) -> Result<()> { + info!(sl!(), "cgroup manager set cpu"); + + let cpuset_controller: &CpuSetController = cg.controller_of().unwrap(); + + if !cpu.cpus.is_empty() { + cpuset_controller.set_cpus(&cpu.cpus); + } + + if !cpu.mems.is_empty() { + cpuset_controller.set_mems(&cpu.mems); + } + + let cpu_controller: &CpuController = cg.controller_of().unwrap(); + + if let Some(shares) = cpu.shares { + let shares = if cg.v2() { + convert_shares_to_v2_value(shares) + } else { + shares + }; + if shares != 0 { + cpu_controller.set_shares(shares); + } + } + + cpu_controller.set_cfs_quota_and_period(cpu.quota, cpu.period); + + set_resource!(cpu_controller, set_rt_runtime, cpu, realtime_runtime); + set_resource!(cpu_controller, set_rt_period_us, cpu, realtime_period); + + Ok(()) +} + +fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool) -> Result<()> { + info!(sl!(), "cgroup manager set memory"); + let mem_controller: &MemController = cg.controller_of().unwrap(); + + if !update { + // initialize kmem limits for accounting + mem_controller.set_kmem_limit(1)?; + mem_controller.set_kmem_limit(-1)?; + } + + set_resource!(mem_controller, set_limit, memory, limit); + set_resource!(mem_controller, set_soft_limit, memory, reservation); + set_resource!(mem_controller, set_kmem_limit, memory, kernel); + set_resource!(mem_controller, set_tcp_limit, memory, kernel_tcp); + + if let Some(swap) = memory.swap { + // set memory swap + let swap = if cg.v2() { + convert_memory_swap_to_v2_value(swap, memory.limit.unwrap_or(0))? + } else { + swap + }; + if swap != 0 { + mem_controller.set_memswap_limit(swap)?; + } + } + + if let Some(swapiness) = memory.swapiness { + if swapiness >= 0 && swapiness <= 100 { + mem_controller.set_swappiness(swapiness as u64)?; + } else { + return Err(anyhow!( + "invalid value:{}. valid memory swappiness range is 0-100", + swapiness + )); + } + } + + if memory.disable_oom_killer.unwrap_or(false) { + mem_controller.disable_oom_killer()?; + } + + Ok(()) +} + +fn set_pids_resources(cg: &cgroups::Cgroup, pids: &LinuxPids) -> Result<()> { + info!(sl!(), "cgroup manager set pids"); + let pid_controller: &PidController = cg.controller_of().unwrap(); + let v = if pids.limit > 0 { + MaxValue::Value(pids.limit) + } else { + MaxValue::Max + }; + pid_controller + .set_pid_max(v) + .context("failed to set pids resources") +} + +fn build_blk_io_device_throttle_resource( + input: &Vec, +) -> Vec { + let mut blk_io_device_throttle_resources = vec![]; + for d in input.iter() { + let tr = BlkIoDeviceThrottleResource { + major: d.blk.major as u64, + minor: d.blk.minor as u64, + rate: d.rate, + }; + blk_io_device_throttle_resources.push(tr); + } + + blk_io_device_throttle_resources +} + +fn linux_device_to_cgroup_device(d: &LinuxDevice) -> DeviceResource { + let dev_type = DeviceType::from_char(d.r#type.chars().next()).unwrap(); + + let mut permissions = vec![ + DevicePermissions::Read, + DevicePermissions::Write, + DevicePermissions::MkNod, + ]; + + DeviceResource { + allow: true, + devtype: dev_type, + major: d.major, + minor: d.minor, + access: permissions, + } +} + +fn linux_device_group_to_cgroup_device(d: &LinuxDeviceCgroup) -> DeviceResource { + let dev_type = DeviceType::from_char(d.r#type.chars().next()).unwrap(); + + let mut permissions: Vec = vec![]; + for p in d.access.chars().collect::>() { + match p { + 'r' => permissions.push(DevicePermissions::Read), + 'w' => permissions.push(DevicePermissions::Write), + 'm' => permissions.push(DevicePermissions::MkNod), + _ => {} + } + } + + DeviceResource { + allow: d.allow, + devtype: dev_type, + major: d.major.unwrap_or(0), + minor: d.minor.unwrap_or(0), + access: permissions, + } +} + +// split space separated values into an vector of u64 +fn line_to_vec(line: &str) -> Vec { + line.split_whitespace() + .filter_map(|x| x.parse::().ok()) + .collect::>() +} + +// split flat keyed values into an hashmap of +fn lines_to_map(content: &str) -> HashMap { + content + .lines() + .map(|x| x.split_whitespace().collect::>()) + .filter(|x| x.len() == 2 && x[1].parse::().is_ok()) + .fold(HashMap::new(), |mut hm, mut x| { + hm.insert(x[0].to_string(), x[1].parse::().unwrap()); + hm + }) +} + +pub const NANO_PER_SECOND: u64 = 1000000000; pub const WILDCARD: i64 = -1; lazy_static! { + pub static ref CLOCK_TICKS: f64 = { + let n = unsafe { libc::sysconf(libc::_SC_CLK_TCK) }; + + n as f64 + }; + pub static ref DEFAULT_ALLOWED_DEVICES: Vec = { let mut v = Vec::new(); + + // all mknod to all char devices v.push(LinuxDeviceCgroup { allow: true, r#type: "c".to_string(), @@ -69,6 +546,7 @@ lazy_static! { access: "m".to_string(), }); + // all mknod to all block devices v.push(LinuxDeviceCgroup { allow: true, r#type: "b".to_string(), @@ -77,6 +555,7 @@ lazy_static! { access: "m".to_string(), }); + // all read/write/mknod to char device /dev/console v.push(LinuxDeviceCgroup { allow: true, r#type: "c".to_string(), @@ -85,6 +564,7 @@ lazy_static! { access: "rwm".to_string(), }); + // all read/write/mknod to char device /dev/pts/ v.push(LinuxDeviceCgroup { allow: true, r#type: "c".to_string(), @@ -93,6 +573,7 @@ lazy_static! { access: "rwm".to_string(), }); + // all read/write/mknod to char device /dev/ptmx v.push(LinuxDeviceCgroup { allow: true, r#type: "c".to_string(), @@ -101,6 +582,7 @@ lazy_static! { access: "rwm".to_string(), }); + // all read/write/mknod to char device /dev/net/tun v.push(LinuxDeviceCgroup { allow: true, r#type: "c".to_string(), @@ -111,1036 +593,338 @@ lazy_static! { v }; - pub static ref BMAP: HashMap = { - let mut m = HashMap::new(); - m.insert("k".to_string(), KiB); - m.insert("m".to_string(), MiB); - m.insert("g".to_string(), GiB); - m.insert("t".to_string(), TiB); - m.insert("p".to_string(), PiB); - m - }; - pub static ref DMAP: HashMap = { - let mut m = HashMap::new(); - m.insert("k".to_string(), KB); - m.insert("m".to_string(), MB); - m.insert("g".to_string(), GB); - m.insert("t".to_string(), TB); - m.insert("p".to_string(), PB); - m - }; - pub static ref DABBRS: Vec = { - let m = vec![ - "B".to_string(), - "KB".to_string(), - "MB".to_string(), - "GB".to_string(), - "TB".to_string(), - "PB".to_string(), - "EB".to_string(), - "ZB".to_string(), - "YB".to_string(), - ]; - m - }; - pub static ref BABBRS: Vec = { - let m = vec![ - "B".to_string(), - "KiB".to_string(), - "MiB".to_string(), - "GiB".to_string(), - "TiB".to_string(), - "PiB".to_string(), - "EiB".to_string(), - "ZiB".to_string(), - "YiB".to_string(), - ]; - m - }; - pub static ref HUGEPAGESIZES: Vec = { - let m = match get_hugepage_sizes() { - Err(_) => Vec::new(), - Ok(s) => s, - }; - - m - }; } -pub fn io_error_kind_eq(e: &Error, k: std::io::ErrorKind) -> bool { - if let Some(ref re) = e.downcast_ref::() { - return re.kind() == k; - } +fn get_cpu_stats(cg: &cgroups::Cgroup) -> SingularPtrField { + let cpu_controller: &CpuController = cg.controller_of().unwrap(); - return false; + let stat = cpu_controller.cpu().stat; + + let h = lines_to_map(&stat); + + SingularPtrField::some(ThrottlingData { + periods: *h.get("nr_periods").unwrap_or(&0), + throttled_periods: *h.get("nr_throttled").unwrap_or(&0), + throttled_time: *h.get("throttled_time").unwrap_or(&0), + unknown_fields: UnknownFields::default(), + cached_size: CachedSize::default(), + }) } -pub const KB: u128 = 1000; -pub const MB: u128 = 1000 * KB; -pub const GB: u128 = 1000 * MB; -pub const TB: u128 = 1000 * GB; -pub const PB: u128 = 1000 * TB; - -pub const KiB: u128 = 1024; -pub const MiB: u128 = 1024 * KiB; -pub const GiB: u128 = 1024 * MiB; -pub const TiB: u128 = 1024 * GiB; -pub const PiB: u128 = 1024 * TiB; - -pub const HUGETLB_BASE: &'static str = "hugetlb"; -pub const HUGETLB_USAGE: &'static str = "usage_in_bytes"; -pub const HUGETLB_MAX_USAGE: &'static str = "max_usage_in_bytes"; -pub const HUGETLB_FAILCNT: &'static str = "failcnt"; - -fn parse_size(s: &str, m: &HashMap) -> Result { - let re = Regex::new(r"(?P\d+)(?P[kKmMgGtTpP]?)[bB]?$")?; - let caps = re.captures(s).unwrap(); - - let num = caps.name("num"); - let size: u128 = if num.is_some() { - num.unwrap().as_str().trim().parse::()? - } else { - return Err(nix::Error::Sys(Errno::EINVAL).into()); - }; - - let q = caps.name("mul"); - let mul: u128 = if q.is_some() { - let t = m.get(q.unwrap().as_str()); - if t.is_some() { - *t.unwrap() - } else { - return Err(nix::Error::Sys(Errno::EINVAL).into()); - } - } else { - return Err(nix::Error::Sys(Errno::EINVAL).into()); - }; - - Ok(size * mul) -} - -fn custom_size(mut size: f64, base: f64, m: &Vec) -> String { - let mut i = 0; - while size >= base && i < m.len() - 1 { - size /= base; - i += 1; - } - - format!("{}{}", size, m[i].as_str()) -} - -pub const HUGEPAGESIZE_DIR: &'static str = "/sys/kernel/mm/hugepages"; - -fn get_hugepage_sizes() -> Result> { - let mut m = Vec::new(); - for e in fs::read_dir(HUGEPAGESIZE_DIR)? { - let name = e?.file_name().into_string().unwrap(); - let parts: Vec<&str> = name.split('-').collect(); - if parts.len() != 2 { - continue; - } - let size = parse_size(parts[1], &BMAP)?; - m.push(custom_size(size as f64, 1024.0, DABBRS.as_ref())); - } - - Ok(m) -} - -pub const CPUSET_CPUS: &'static str = "cpuset.cpus"; -pub const CPUSET_MEMS: &'static str = "cpuset.mems"; -pub const CGROUP_PROCS: &'static str = "cgroup.procs"; -pub const CPU_RT_PERIOD_US: &'static str = "cpu.rt_period_us"; -pub const CPU_RT_RUNTIME_US: &'static str = "cpu.rt_runtime_us"; -pub const CPU_SHARES: &'static str = "cpu.shares"; -pub const CPU_CFS_QUOTA_US: &'static str = "cpu.cfs_quota_us"; -pub const CPU_CFS_PERIOD_US: &'static str = "cpu.cfs_period_us"; -pub const DEVICES_ALLOW: &'static str = "devices.allow"; -pub const DEVICES_DENY: &'static str = "devices.deny"; -pub const MEMORY_LIMIT: &'static str = "memory.limit_in_bytes"; -pub const MEMORY_SOFT_LIMIT: &'static str = "memory.soft_limit_in_bytes"; -pub const MEMSW_LIMIT: &'static str = "memory.memsw.limit_in_bytes"; -pub const KMEM_LIMIT: &'static str = "memory.kmem.limit_in_bytes"; -pub const KMEM_TCP_LIMIT: &'static str = "memory.kmem.tcp.limit_in_bytes"; -pub const SWAPPINESS: &'static str = "memory.swappiness"; -pub const OOM_CONTROL: &'static str = "memory.oom_control"; -pub const PIDS_MAX: &'static str = "pids.max"; -pub const BLKIO_WEIGHT: &'static str = "blkio.weight"; -pub const BLKIO_LEAF_WEIGHT: &'static str = "blkio.leaf_weight"; -pub const BLKIO_WEIGHT_DEVICE: &'static str = "blkio.weight_device"; -pub const BLKIO_LEAF_WEIGHT_DEVICE: &'static str = "blkio.leaf_weight_device"; -pub const BLKIO_READ_BPS_DEVICE: &'static str = "blkio.throttle.read_bps_device"; -pub const BLKIO_WRITE_BPS_DEVICE: &'static str = "blkio.throttle.write_bps_device"; -pub const BLKIO_READ_IOPS_DEVICE: &'static str = "blkio.throttle.read_iops_device"; -pub const BLKIO_WRITE_IOPS_DEVICE: &'static str = "blkio.throttle.write_iops_device"; -pub const NET_CLS_CLASSID: &'static str = "net_cls.classid"; -pub const NET_PRIO_IFPRIOMAP: &'static str = "net_prio.ifpriomap"; - -pub const CPU_STAT: &'static str = "cpu.stat"; -pub const CPUACCT_STAT: &'static str = "cpuacct.stat"; -pub const NANO_PER_SECOND: u64 = 1000000000; -pub const CPUACCT_USAGE: &'static str = "cpuacct.usage"; -pub const CPUACCT_PERCPU: &'static str = "cpuacct.usage_percpu"; -pub const MEMORY_STAT: &'static str = "memory.stat"; -pub const MEM_USAGE: &'static str = "usage_in_bytes"; -pub const MEM_MAX_USAGE: &'static str = "max_usage_in_bytes"; -pub const MEM_FAILCNT: &'static str = "failcnt"; -pub const MEM_LIMIT: &'static str = "limit_in_bytes"; -pub const MEM_HIERARCHY: &'static str = "memory.use_hierarchy"; -pub const PIDS_CURRENT: &'static str = "pids.current"; -pub const BLKIO_SECTORS: &'static str = "blkio.sectors_recursive"; -pub const BLKIO_IO_SERVICE_BYTES: &'static str = "blkio.io_service_bytes_recursive"; -pub const BLKIO_IO_SERVICED: &'static str = "blkio.io_serviced_recursive"; -pub const BLKIO_IO_QUEUED: &'static str = "blkio.io_queued_recursive"; -pub const BLKIO_IO_SERVICE_TIME: &'static str = "blkio.io_service_time_recursive"; -pub const BLKIO_IO_WAIT_TIME: &'static str = "blkio.io_wait_time_recursive"; -pub const BLKIO_IO_MERGED: &'static str = "blkio.io_merged_recursive"; -pub const BLKIO_TIME: &'static str = "blkio.time_recursive"; -pub const BLKIO_THROTTLE_IO_SERVICE_BYTES: &'static str = "blkio.throttle.io_service_bytes"; -pub const BLKIO_THROTTLE_IO_SERVICED: &'static str = "blkio.throttle.io_serviced"; - -lazy_static! { - pub static ref CLOCK_TICKS: f64 = { - let n = unsafe { libc::sysconf(libc::_SC_CLK_TCK) }; - - n as f64 - }; -} - -pub fn init_static() { - lazy_static::initialize(&DEFAULT_ALLOWED_DEVICES); - lazy_static::initialize(&BMAP); - lazy_static::initialize(&DMAP); - lazy_static::initialize(&BABBRS); - lazy_static::initialize(&DABBRS); - lazy_static::initialize(&HUGEPAGESIZES); - lazy_static::initialize(&CLOCK_TICKS); -} - -fn write_file(dir: &str, file: &str, v: T) -> Result<()> -where - T: ToString, -{ - let p = format!("{}/{}", dir, file); - fs::write(p.as_str(), v.to_string().as_bytes()) - .context(format!("couldn't write to file: {:?}", p))?; - - Ok(()) -} - -fn read_file(dir: &str, file: &str) -> Result { - let p = format!("{}/{}", dir, file); - let ret = fs::read_to_string(p.as_str())?; - Ok(ret) -} - -fn copy_parent(dir: &str, file: &str) -> Result<()> { - let parent = if let Some(index) = dir.rfind('/') { - &dir[..index] - } else { - return Err(anyhow!("cannot copy file from parent")); - }; - - match read_file(parent, file) { - Ok(v) => { - if !v.trim().is_empty() { - info!(sl!(), "value: \"{}\"", v.as_str().trim()); - return write_file(dir, file, v); - } else { - copy_parent(parent, file)?; - return copy_parent(dir, file); - } - } - Err(ref e) if io_error_kind_eq(e, std::io::ErrorKind::NotFound) => { - copy_parent(parent, file)?; - return copy_parent(dir, file); - } - Err(e) => return Err(e.into()), - } -} - -fn write_nonzero(dir: &str, file: &str, v: i128) -> Result<()> { - if v != 0 { - write_file(dir, file, v.to_string())?; - } - - Ok(()) -} - -fn try_write_nonzero(dir: &str, file: &str, v: i128) -> Result<()> { - match write_nonzero(dir, file, v) { - Ok(_) => Ok(()), - Err(ref e) if io_error_kind_eq(e, std::io::ErrorKind::PermissionDenied) => Ok(()), - Err(e) => Err(anyhow!(e)), - } -} - -fn remove(dir: &str) -> Result<()> { - fs::remove_dir_all(dir)?; - Ok(()) -} - -fn apply(dir: &str, pid: pid_t) -> Result<()> { - write_file(dir, CGROUP_PROCS, pid)?; - Ok(()) -} - -fn try_write_file(dir: &str, file: &str, v: T) -> Result<()> { - match write_file(dir, file, v) { - Err(e) => { - let err = Errno::last(); - if err == Errno::EINVAL || err == Errno::ENODEV || err == Errno::ERANGE { - warn!(sl!(), "Invalid Arguments!"); - return Ok(()); - } - - info!(sl!(), "{}", err.desc()); - - return Err(anyhow!(e)); +fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> SingularPtrField { + let cpuacct_controller: Option<&CpuAcctController> = cg.controller_of(); + if cpuacct_controller.is_none() { + if cg.v2() { + return SingularPtrField::some(CpuUsage { + total_usage: 0, + percpu_usage: vec![], + usage_in_kernelmode: 0, + usage_in_usermode: 0, + unknown_fields: UnknownFields::default(), + cached_size: CachedSize::default(), + }); } - Ok(_) => {} - } + // try to get from cpu controller + let cpu_controller: &CpuController = cg.controller_of().unwrap(); + let stat = cpu_controller.cpu().stat; + let h = lines_to_map(&stat); + let usage_in_usermode = *h.get("user_usec").unwrap(); + let usage_in_kernelmode = *h.get("system_usec").unwrap(); + let total_usage = *h.get("usage_usec").unwrap(); + let percpu_usage = vec![]; - Ok(()) -} - -impl Subsystem for CpuSet { - fn name(&self) -> String { - "cpuset".to_string() - } - - fn set(&self, dir: &str, r: &LinuxResources, update: bool) -> Result<()> { - let mut cpus: &str = ""; - let mut mems: &str = ""; - - if r.cpu.is_some() { - let cpu = r.cpu.as_ref().unwrap(); - cpus = cpu.cpus.as_str(); - mems = cpu.mems.as_str(); - } - - // For updatecontainer, just set the new value - if update { - if !cpus.is_empty() { - try_write_file(dir, CPUSET_CPUS, cpus)?; - } - - if !mems.is_empty() { - try_write_file(dir, CPUSET_MEMS, mems)?; - } - - return Ok(()); - } - - // for the first time - - if !update { - copy_parent(dir, CPUSET_CPUS)?; - copy_parent(dir, CPUSET_MEMS)?; - } - - // cpuset and mems can be invalid - // how to deal with it? Just ingore error for now - if !cpus.is_empty() { - try_write_file(dir, CPUSET_CPUS, cpus)?; - } - - if !mems.is_empty() { - info!(sl!(), "{}", mems); - try_write_file(dir, CPUSET_MEMS, mems)?; - } - - Ok(()) - } -} - -impl Subsystem for Cpu { - fn name(&self) -> String { - "cpu".to_string() - } - - fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - if r.cpu.is_none() { - return Ok(()); - } - - let cpu = r.cpu.as_ref().unwrap(); - - try_write_nonzero( - dir, - CPU_RT_PERIOD_US, - cpu.realtime_period.unwrap_or(0) as i128, - )?; - try_write_nonzero( - dir, - CPU_RT_RUNTIME_US, - cpu.realtime_runtime.unwrap_or(0) as i128, - )?; - write_nonzero(dir, CPU_SHARES, cpu.shares.unwrap_or(0) as i128)?; - write_nonzero(dir, CPU_CFS_QUOTA_US, cpu.quota.unwrap_or(0) as i128)?; - write_nonzero(dir, CPU_CFS_PERIOD_US, cpu.period.unwrap_or(0) as i128)?; - - Ok(()) - } -} - -fn get_param_key_value(dir: &str, file: &str) -> Result> { - let mut m = HashMap::new(); - let p = format!("{}/{}", dir, file); - - for l in fs::read_to_string(p.as_str())?.lines() { - let t: Vec<&str> = l.split(' ').collect(); - if t.len() != 2 { - continue; - } - - m.insert(t[0].to_string(), t[1].to_string()); - } - - Ok(m) -} - -fn get_param_key_u64(dir: &str, file: &str) -> Result> { - let mut m = HashMap::new(); - let p = format!("{}/{}", dir, file); - - for l in fs::read_to_string(p.as_str())?.lines() { - let t: Vec<&str> = l.split(' ').collect(); - if t.len() != 2 { - continue; - } - - m.insert(t[0].to_string(), t[1].trim().parse::()?); - } - - Ok(m) -} - -fn get_param_u64(dir: &str, file: &str) -> Result { - let p = format!("{}/{}", dir, file); - let ret = fs::read_to_string(p.as_str())?.trim().parse::()?; - Ok(ret) -} - -impl Cpu { - fn get_stats(&self, dir: &str) -> Result { - let h = get_param_key_u64(dir, CPU_STAT)?; - - Ok(ThrottlingData { - periods: *h.get("nr_periods").unwrap(), - throttled_periods: *h.get("nr_throttled").unwrap(), - throttled_time: *h.get("throttled_time").unwrap(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), - }) - } -} - -impl Subsystem for CpuAcct { - fn name(&self) -> String { - "cpuacct".to_string() - } - - fn set(&self, _dir: &str, _r: &LinuxResources, _update: bool) -> Result<()> { - Ok(()) - } -} - -fn get_cpuacct_percpu_usage(dir: &str) -> Result> { - let mut m = Vec::new(); - let file = format!("{}/{}", dir, CPUACCT_PERCPU); - - for n in fs::read_to_string(file.as_str())?.split(' ') { - m.push(n.trim().parse::()?); - } - - Ok(m) -} - -fn get_percpu_usage(dir: &str, file: &str) -> Result> { - let mut m = Vec::new(); - let p = format!("{}/{}", dir, file); - info!(sl!(), "{}", p.as_str()); - - for n in fs::read_to_string(p.as_str())?.split(' ') { - info!(sl!(), "{}", n); - if !n.trim().is_empty() { - m.push(n.trim().parse::()?); - } - } - - Ok(m) -} - -impl CpuAcct { - fn get_stats(&self, dir: &str) -> Result { - let h = get_param_key_u64(dir, CPUACCT_STAT)?; - - let usage_in_usermode = - (((*h.get("user").unwrap() * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64; - let usage_in_kernelmode = - (((*h.get("system").unwrap() * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64; - - info!(sl!(), "stat"); - - let total_usage = get_param_u64(dir, CPUACCT_USAGE)?; - info!(sl!(), "usage"); - - let percpu_usage = get_percpu_usage(dir, CPUACCT_PERCPU)?; - info!(sl!(), "percpu"); - - Ok(CpuUsage { + return SingularPtrField::some(CpuUsage { total_usage, percpu_usage, usage_in_kernelmode, usage_in_usermode, unknown_fields: UnknownFields::default(), cached_size: CachedSize::default(), - }) - } -} - -fn write_device(d: &LinuxDeviceCgroup, dir: &str) -> Result<()> { - let file = if d.allow { DEVICES_ALLOW } else { DEVICES_DENY }; - - let major = if d.major.unwrap_or(0) == WILDCARD { - "*".to_string() - } else { - d.major.unwrap_or(0).to_string() - }; - - let minor = if d.minor.unwrap_or(0) == WILDCARD { - "*".to_string() - } else { - d.minor.unwrap_or(0).to_string() - }; - - let t = if d.r#type.is_empty() { - "a" - } else { - d.r#type.as_str() - }; - - let v = format!( - "{} {}:{} {}", - t, - major.as_str(), - minor.as_str(), - d.access.as_str() - ); - - info!(sl!(), "{}", v.as_str()); - - write_file(dir, file, v.as_str()) -} - -impl Subsystem for Devices { - fn name(&self) -> String { - "devices".to_string() + }); } - fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - for d in r.devices.iter() { - write_device(d, dir)?; - } + let cpuacct_controller = cpuacct_controller.unwrap(); + let cpuacct = cpuacct_controller.cpuacct(); - for d in DEFAULT_DEVICES.iter() { - let td = LinuxDeviceCgroup { - allow: true, - r#type: d.r#type.clone(), - major: Some(d.major), - minor: Some(d.minor), - access: "rwm".to_string(), - }; + let h = lines_to_map(&cpuacct.stat); + let usage_in_usermode = + (((*h.get("user").unwrap() * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64; + let usage_in_kernelmode = + (((*h.get("system").unwrap() * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64; - write_device(&td, dir)?; - } + let total_usage = cpuacct.usage; - for d in DEFAULT_ALLOWED_DEVICES.iter() { - write_device(d, dir)?; - } + let percpu_usage = line_to_vec(&cpuacct.usage_percpu); - Ok(()) - } + SingularPtrField::some(CpuUsage { + total_usage, + percpu_usage, + usage_in_kernelmode, + usage_in_usermode, + unknown_fields: UnknownFields::default(), + cached_size: CachedSize::default(), + }) } -fn try_write(dir: &str, file: &str, v: T) -> Result<()> -where - T: ToString, -{ - match write_file(dir, file, v) { - Err(ref e) - if io_error_kind_eq(e, std::io::ErrorKind::PermissionDenied) - || io_error_kind_eq(e, std::io::ErrorKind::Other) => - { - return Ok(()); - } +fn get_memory_stats(cg: &cgroups::Cgroup) -> SingularPtrField { + let memory_controller: &MemController = cg.controller_of().unwrap(); - Err(e) => return Err(anyhow!(e)), + // cache from memory stat + let memory = memory_controller.memory_stat(); + let cache = memory.stat.cache; - Ok(_) => return Ok(()), - } -} + // use_hierarchy + let value = memory.use_hierarchy; + let use_hierarchy = if value == 1 { true } else { false }; -impl Subsystem for Memory { - fn name(&self) -> String { - "memory".to_string() - } + // gte memory datas + let usage = SingularPtrField::some(MemoryData { + usage: memory.usage_in_bytes, + max_usage: memory.max_usage_in_bytes, + failcnt: memory.fail_cnt, + limit: memory.limit_in_bytes as u64, + unknown_fields: UnknownFields::default(), + cached_size: CachedSize::default(), + }); - fn set(&self, dir: &str, r: &LinuxResources, update: bool) -> Result<()> { - if r.memory.is_none() { - return Ok(()); - } + // get swap usage + let memswap = memory_controller.memswap(); - let memory = r.memory.as_ref().unwrap(); - // initialize kmem limits for accounting - if !update { - try_write(dir, KMEM_LIMIT, 1)?; - try_write(dir, KMEM_LIMIT, -1)?; - } + let swap_usage = SingularPtrField::some(MemoryData { + usage: memswap.usage_in_bytes, + max_usage: memswap.max_usage_in_bytes, + failcnt: memswap.fail_cnt, + limit: memswap.limit_in_bytes as u64, + unknown_fields: UnknownFields::default(), + cached_size: CachedSize::default(), + }); - write_nonzero(dir, MEMORY_LIMIT, memory.limit.unwrap_or(0) as i128)?; - write_nonzero( - dir, - MEMORY_SOFT_LIMIT, - memory.reservation.unwrap_or(0) as i128, - )?; + // get kernel usage + let kmem_stat = memory_controller.kmem_stat(); - try_write_nonzero(dir, MEMSW_LIMIT, memory.swap.unwrap_or(0) as i128)?; - try_write_nonzero(dir, KMEM_LIMIT, memory.kernel.unwrap_or(0) as i128)?; + let kernel_usage = SingularPtrField::some(MemoryData { + usage: kmem_stat.usage_in_bytes, + max_usage: kmem_stat.max_usage_in_bytes, + failcnt: kmem_stat.fail_cnt, + limit: kmem_stat.limit_in_bytes as u64, + unknown_fields: UnknownFields::default(), + cached_size: CachedSize::default(), + }); - write_nonzero(dir, KMEM_TCP_LIMIT, memory.kernel_tcp.unwrap_or(0) as i128)?; - - if memory.swapiness.unwrap_or(0) <= 100 { - write_file(dir, SWAPPINESS, memory.swapiness.unwrap_or(0))?; - } - - if memory.disable_oom_killer.unwrap_or(false) { - write_file(dir, OOM_CONTROL, 1)?; - } - - Ok(()) - } -} - -fn get_exist_memory_data(dir: &str, sub: &str) -> Result> { - let res = match get_memory_data(dir, sub) { - Err(ref e) if io_error_kind_eq(e, std::io::ErrorKind::NotFound) => None, - - Ok(r) => Some(r), - - Err(e) => return Err(anyhow!(e)), - }; - - Ok(res) -} - -fn get_memory_data(dir: &str, sub: &str) -> Result { - let base = "memory"; - let (fusage, fmax_usage, ffailcnt, flimit) = if sub.is_empty() { - ( - format!("{}.{}", base, MEM_USAGE), - format!("{}.{}", base, MEM_MAX_USAGE), - format!("{}.{}", base, MEM_FAILCNT), - format!("{}.{}", base, MEM_LIMIT), - ) - } else { - ( - format!("{}.{}.{}", base, sub, MEM_USAGE), - format!("{}.{}.{}", base, sub, MEM_MAX_USAGE), - format!("{}.{}.{}", base, sub, MEM_FAILCNT), - format!("{}.{}.{}", base, sub, MEM_LIMIT), - ) - }; - - let usage = get_param_u64(dir, fusage.as_str())?; - let max_usage = get_param_u64(dir, fmax_usage.as_str())?; - let failcnt = get_param_u64(dir, ffailcnt.as_str())?; - let limit = get_param_u64(dir, flimit.as_str())?; - - Ok(MemoryData { + SingularPtrField::some(MemoryStats { + cache, usage, - max_usage, - failcnt, + swap_usage, + kernel_usage, + use_hierarchy, + stats: memory.stat.raw, + unknown_fields: UnknownFields::default(), + cached_size: CachedSize::default(), + }) +} + +fn get_pids_stats(cg: &cgroups::Cgroup) -> SingularPtrField { + let pid_controller: &PidController = cg.controller_of().unwrap(); + + let current = pid_controller.get_pid_current().unwrap_or(0); + let max = pid_controller.get_pid_max(); + + let limit = if max.is_err() { + 0 + } else { + match max.unwrap() { + MaxValue::Value(v) => v, + MaxValue::Max => 0, + } + } as u64; + + SingularPtrField::some(PidsStats { + current, limit, unknown_fields: UnknownFields::default(), cached_size: CachedSize::default(), }) } -impl Memory { - fn get_stats(&self, dir: &str) -> Result { - let h = get_param_key_u64(dir, MEMORY_STAT)?; - let cache = *h.get("cache").unwrap(); - info!(sl!(), "cache"); +/* +examples(from runc, cgroup v1): +https://github.com/opencontainers/runc/blob/a5847db387ae28c0ca4ebe4beee1a76900c86414/libcontainer/cgroups/fs/blkio.go - let value = get_param_u64(dir, MEM_HIERARCHY)?; - let use_hierarchy = if value == 1 { true } else { false }; + blkio.sectors + 8:0 6792 - info!(sl!(), "hierarchy"); + blkio.io_service_bytes + 8:0 Read 1282048 + 8:0 Write 2195456 + 8:0 Sync 2195456 + 8:0 Async 1282048 + 8:0 Total 3477504 + Total 3477504 - // gte memory datas - let usage = SingularPtrField::from_option(get_exist_memory_data(dir, "")?); - let swap_usage = SingularPtrField::from_option(get_exist_memory_data(dir, "memsw")?); - let kernel_usage = SingularPtrField::from_option(get_exist_memory_data(dir, "kmem")?); + blkio.io_serviced + 8:0 Read 124 + 8:0 Write 104 + 8:0 Sync 104 + 8:0 Async 124 + 8:0 Total 228 + Total 228 - Ok(MemoryStats { - cache, - usage, - swap_usage, - kernel_usage, - use_hierarchy, - stats: h, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), - }) - } -} + blkio.io_queued + 8:0 Read 0 + 8:0 Write 0 + 8:0 Sync 0 + 8:0 Async 0 + 8:0 Total 0 + Total 0 +*/ -impl Subsystem for Pids { - fn name(&self) -> String { - "pids".to_string() - } - - fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - if r.pids.is_none() { - return Ok(()); - } - - let pids = r.pids.as_ref().unwrap(); - - let v = if pids.limit > 0 { - pids.limit.to_string() - } else { - "max".to_string() - }; - - write_file(dir, PIDS_MAX, v.as_str())?; - - Ok(()) - } -} - -fn get_param_string(dir: &str, file: &str) -> Result { - let p = format!("{}/{}", dir, file); - - let c = fs::read_to_string(p.as_str())?; - - Ok(c) -} - -impl Pids { - fn get_stats(&self, dir: &str) -> Result { - let current = get_param_u64(dir, PIDS_CURRENT)?; - let c = get_param_string(dir, PIDS_MAX)?; - - let limit = if c.contains("max") { - 0 - } else { - c.trim().parse::()? - }; - - Ok(PidsStats { - current, - limit, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), - }) - } -} - -#[inline] -fn weight(d: &LinuxWeightDevice) -> (String, String) { - ( - format!("{:?} {:?}", d.blk, d.weight), - format!("{:?} {:?}", d.blk, d.leaf_weight), - ) -} - -#[inline] -fn rate(d: &LinuxThrottleDevice) -> String { - format!("{:?} {}", d.blk, d.rate) -} - -fn write_blkio_device(dir: &str, file: &str, v: T) -> Result<()> { - match write_file(dir, file, v) { - Err(ref e) if io_error_kind_eq(e, std::io::ErrorKind::Other) => { - // only ignore ENODEV - let raw = std::io::Error::last_os_error().raw_os_error().unwrap(); - if Errno::from_i32(raw) == Errno::ENODEV { - return Ok(()); - } - } - - Err(e) => return Err(anyhow!(e)), - - Ok(_) => {} - } - - Ok(()) -} - -impl Subsystem for Blkio { - fn name(&self) -> String { - "blkio".to_string() - } - - fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - if r.block_io.is_none() { - return Ok(()); - } - - let blkio = r.block_io.as_ref().unwrap(); - - write_nonzero(dir, BLKIO_WEIGHT, blkio.weight.unwrap_or(0) as i128)?; - write_nonzero( - dir, - BLKIO_LEAF_WEIGHT, - blkio.leaf_weight.unwrap_or(0) as i128, - )?; - - for d in blkio.weight_device.iter() { - let (w, lw) = weight(d); - write_blkio_device(dir, BLKIO_WEIGHT_DEVICE, w)?; - write_blkio_device(dir, BLKIO_LEAF_WEIGHT_DEVICE, lw)?; - } - - for d in blkio.throttle_read_bps_device.iter() { - write_blkio_device(dir, BLKIO_READ_BPS_DEVICE, rate(d))?; - } - - for d in blkio.throttle_write_bps_device.iter() { - write_blkio_device(dir, BLKIO_WRITE_BPS_DEVICE, rate(d))?; - } - - for d in blkio.throttle_read_iops_device.iter() { - write_blkio_device(dir, BLKIO_READ_IOPS_DEVICE, rate(d))?; - } - - for d in blkio.throttle_write_iops_device.iter() { - write_blkio_device(dir, BLKIO_WRITE_IOPS_DEVICE, rate(d))?; - } - - Ok(()) - } -} - -fn get_blkio_stat(dir: &str, file: &str) -> Result> { - let p = format!("{}/{}", dir, file); +fn get_blkio_stat_blkiodata(blkiodata: &Vec) -> RepeatedField { let mut m = RepeatedField::new(); - - // do as runc - if !Path::new(&p).exists() { - return Ok(RepeatedField::new()); + if blkiodata.len() == 0 { + return m; } - for l in fs::read_to_string(p.as_str())?.lines() { - let parts: Vec<&str> = l.split(' ').collect(); - - if parts.len() < 3 { - if parts.len() == 2 && parts[0].to_lowercase() == "total".to_string() { - continue; - } else { - return Err(nix::Error::Sys(Errno::EINVAL).into()); - } - } - - let op = parts[1].to_string(); - let value = parts[2].parse::()?; - - let devno: Vec<&str> = parts[0].split(':').collect(); - - if devno.len() != 2 { - return Err(nix::Error::Sys(Errno::EINVAL).into()); - } - - let major = devno[0].parse::()?; - let minor = devno[1].parse::()?; - + // blkio.time_recursive and blkio.sectors_recursive have no op field. + let op = "".to_string(); + for d in blkiodata { m.push(BlkioStatsEntry { - major, - minor, - op, - value, + major: d.major as u64, + minor: d.minor as u64, + op: op.clone(), + value: d.data, unknown_fields: UnknownFields::default(), cached_size: CachedSize::default(), }); } - /* - if m.len() == 0 { - // return Err here? not sure about it - return Err(nix::Error::Sys(Errno:ENODATA).into()); - } - */ - - Ok(m) + m } -impl Blkio { - fn get_stats(&self, dir: &str) -> Result { - let mut m = BlkioStats::new(); - let entry = get_blkio_stat(dir, BLKIO_IO_SERVICED)?; - if entry.len() == 0 { - // fall back to generic stats - // blkio.throttle.io_service_bytes, - // maybe io_service_bytes_recursive? - // stick to runc for now - m.io_service_bytes_recursive = get_blkio_stat(dir, BLKIO_THROTTLE_IO_SERVICE_BYTES)?; - m.io_serviced_recursive = get_blkio_stat(dir, BLKIO_THROTTLE_IO_SERVICED)?; - } else { - // cfq stats - m.sectors_recursive = get_blkio_stat(dir, BLKIO_SECTORS)?; - m.io_service_bytes_recursive = get_blkio_stat(dir, BLKIO_IO_SERVICE_BYTES)?; - m.io_serviced_recursive = get_blkio_stat(dir, BLKIO_IO_SERVICED)?; - m.io_queued_recursive = get_blkio_stat(dir, BLKIO_IO_QUEUED)?; - m.io_service_time_recursive = get_blkio_stat(dir, BLKIO_IO_SERVICE_TIME)?; - m.io_wait_time_recursive = get_blkio_stat(dir, BLKIO_IO_WAIT_TIME)?; - m.io_merged_recursive = get_blkio_stat(dir, BLKIO_IO_MERGED)?; - m.io_time_recursive = get_blkio_stat(dir, BLKIO_TIME)?; - } +fn get_blkio_stat_ioservice(services: &Vec) -> RepeatedField { + let mut m = RepeatedField::new(); - Ok(m) + if services.len() == 0 { + return m; + } + + for s in services { + m.push(build_blkio_stats_entry(s.major, s.minor, "read", s.read)); + m.push(build_blkio_stats_entry(s.major, s.minor, "write", s.write)); + m.push(build_blkio_stats_entry(s.major, s.minor, "sync", s.sync)); + m.push(build_blkio_stats_entry( + s.major, s.minor, "async", s.r#async, + )); + m.push(build_blkio_stats_entry(s.major, s.minor, "total", s.total)); + } + m +} + +fn build_blkio_stats_entry(major: i16, minor: i16, op: &str, value: u64) -> BlkioStatsEntry { + BlkioStatsEntry { + major: major as u64, + minor: minor as u64, + op: op.to_string(), + value: value, + unknown_fields: UnknownFields::default(), + cached_size: CachedSize::default(), } } -impl Subsystem for HugeTLB { - fn name(&self) -> String { - "hugetlb".to_string() +fn get_blkio_stats_v2(cg: &cgroups::Cgroup) -> SingularPtrField { + let blkio_controller: &BlkIoController = cg.controller_of().unwrap(); + let blkio = blkio_controller.blkio(); + + let mut resp = BlkioStats::new(); + let mut blkio_stats = RepeatedField::new(); + + let stat = blkio.io_stat; + for s in stat { + blkio_stats.push(build_blkio_stats_entry(s.major, s.minor, "read", s.rbytes)); + blkio_stats.push(build_blkio_stats_entry(s.major, s.minor, "write", s.wbytes)); + blkio_stats.push(build_blkio_stats_entry(s.major, s.minor, "rios", s.rios)); + blkio_stats.push(build_blkio_stats_entry(s.major, s.minor, "wios", s.wios)); + blkio_stats.push(build_blkio_stats_entry( + s.major, s.minor, "dbytes", s.dbytes, + )); + blkio_stats.push(build_blkio_stats_entry(s.major, s.minor, "dios", s.dios)); } - fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - for l in r.hugepage_limits.iter() { - let file = format!("hugetlb.{}.limit_in_bytes", l.page_size); - write_file(dir, file.as_str(), l.limit)?; - } - Ok(()) - } + resp.io_service_bytes_recursive = blkio_stats; + + SingularPtrField::some(resp) } -impl HugeTLB { - fn get_stats(&self, dir: &str) -> Result> { - let mut h = HashMap::new(); - for pagesize in HUGEPAGESIZES.iter() { - let fusage = format!("{}.{}.{}", HUGETLB_BASE, pagesize, HUGETLB_USAGE); - let fmax = format!("{}.{}.{}", HUGETLB_BASE, pagesize, HUGETLB_MAX_USAGE); - let ffailcnt = format!("{}.{}.{}", HUGETLB_BASE, pagesize, HUGETLB_FAILCNT); - - let usage = get_param_u64(dir, fusage.as_str())?; - let max_usage = get_param_u64(dir, fmax.as_str())?; - let failcnt = get_param_u64(dir, ffailcnt.as_str())?; - - h.insert( - pagesize.to_string(), - HugetlbStats { - usage, - max_usage, - failcnt, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), - }, - ); - } - - Ok(h) +fn get_blkio_stats(cg: &cgroups::Cgroup) -> SingularPtrField { + if cg.v2() { + return get_blkio_stats_v2(&cg); } + + let blkio_controller: &BlkIoController = cg.controller_of().unwrap(); + let blkio = blkio_controller.blkio(); + + let mut m = BlkioStats::new(); + let io_serviced_recursive = blkio.io_serviced_recursive; + + if io_serviced_recursive.len() == 0 { + // fall back to generic stats + // blkio.throttle.io_service_bytes, + // maybe io_service_bytes_recursive? + // stick to runc for now + m.io_service_bytes_recursive = get_blkio_stat_ioservice(&blkio.throttle.io_service_bytes); + m.io_serviced_recursive = get_blkio_stat_ioservice(&blkio.throttle.io_serviced); + } else { + // Try to read CFQ stats available on all CFQ enabled kernels first + // IoService type data + m.io_service_bytes_recursive = get_blkio_stat_ioservice(&blkio.io_service_bytes_recursive); + m.io_serviced_recursive = get_blkio_stat_ioservice(&io_serviced_recursive); + m.io_queued_recursive = get_blkio_stat_ioservice(&blkio.io_queued_recursive); + m.io_service_time_recursive = get_blkio_stat_ioservice(&blkio.io_service_time_recursive); + m.io_wait_time_recursive = get_blkio_stat_ioservice(&blkio.io_wait_time_recursive); + m.io_merged_recursive = get_blkio_stat_ioservice(&blkio.io_merged_recursive); + + // BlkIoData type data + m.io_time_recursive = get_blkio_stat_blkiodata(&blkio.time_recursive); + m.sectors_recursive = get_blkio_stat_blkiodata(&blkio.sectors_recursive); + } + + SingularPtrField::some(m) } -impl Subsystem for NetCls { - fn name(&self) -> String { - "net_cls".to_string() +fn get_hugetlb_stats(cg: &cgroups::Cgroup) -> HashMap { + let mut h = HashMap::new(); + + let hugetlb_controller: Option<&HugeTlbController> = cg.controller_of(); + if hugetlb_controller.is_none() { + return h; + } + let hugetlb_controller = hugetlb_controller.unwrap(); + + let sizes = hugetlb_controller.get_sizes(); + for size in sizes { + let usage = hugetlb_controller.usage_in_bytes(&size).unwrap_or(0); + let max_usage = hugetlb_controller.max_usage_in_bytes(&size).unwrap_or(0); + let failcnt = hugetlb_controller.failcnt(&size).unwrap_or(0); + + h.insert( + size.to_string(), + HugetlbStats { + usage, + max_usage, + failcnt, + unknown_fields: UnknownFields::default(), + cached_size: CachedSize::default(), + }, + ); } - fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - if r.network.is_none() { - return Ok(()); - } - - let network = r.network.as_ref().unwrap(); - - write_nonzero(dir, NET_CLS_CLASSID, network.class_id.unwrap_or(0) as i128)?; - - Ok(()) - } -} - -impl Subsystem for NetPrio { - fn name(&self) -> String { - "net_prio".to_string() - } - - fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - if r.network.is_none() { - return Ok(()); - } - - let network = r.network.as_ref().unwrap(); - - for p in network.priorities.iter() { - let prio = format!("{} {}", p.name, p.priority); - - try_write_file(dir, NET_PRIO_IFPRIOMAP, prio)?; - } - - Ok(()) - } -} - -impl Subsystem for PerfEvent { - fn name(&self) -> String { - "perf_event".to_string() - } - - fn set(&self, _dir: &str, _r: &LinuxResources, _update: bool) -> Result<()> { - Ok(()) - } -} - -impl Subsystem for Freezer { - fn name(&self) -> String { - "freezer".to_string() - } - - fn set(&self, _dir: &str, _r: &LinuxResources, _update: bool) -> Result<()> { - Ok(()) - } -} - -impl Subsystem for Named { - fn name(&self) -> String { - "name=systemd".to_string() - } - - fn set(&self, _dir: &str, _r: &LinuxResources, _update: bool) -> Result<()> { - Ok(()) - } -} - -fn get_subsystem(name: &str) -> Option> { - match name { - "cpuset" => Some(Box::new(CpuSet())), - "cpu" => Some(Box::new(Cpu())), - "devices" => Some(Box::new(Devices())), - "memory" => Some(Box::new(Memory())), - "cpuacct" => Some(Box::new(CpuAcct())), - "pids" => Some(Box::new(Pids())), - "blkio" => Some(Box::new(Blkio())), - "hugetlb" => Some(Box::new(HugeTLB())), - "net_cls" => Some(Box::new(NetCls())), - "net_prio" => Some(Box::new(NetPrio())), - "perf_event" => Some(Box::new(PerfEvent())), - "freezer" => Some(Box::new(Freezer())), - "name=systemd" => Some(Box::new(Named())), - _ => { - info!(sl!(), "Found unexpected cgroup"; "cgroup" => name); - None - } - } + h } pub const PATHS: &'static str = "/proc/self/cgroup"; pub const MOUNTS: &'static str = "/proc/self/mountinfo"; -fn get_paths() -> Result> { +pub fn get_paths() -> Result> { let mut m = HashMap::new(); for l in fs::read_to_string(PATHS)?.lines() { let fl: Vec<&str> = l.split(':').collect(); @@ -1151,13 +935,19 @@ fn get_paths() -> Result> { let keys: Vec<&str> = fl[1].split(',').collect(); for key in &keys { - m.insert(key.to_string(), fl[2].to_string()); + // this is a workaround, cgroup file are using `name=systemd`, + // but if file system the name is `systemd` + if *key == "name=systemd" { + m.insert("systemd".to_string(), fl[2].to_string()); + } else { + m.insert(key.to_string(), fl[2].to_string()); + } } } Ok(m) } -fn get_mounts() -> Result> { +pub fn get_mounts() -> Result> { let mut m = HashMap::new(); let paths = get_paths()?; @@ -1187,204 +977,6 @@ fn get_mounts() -> Result> { Ok(m) } -fn get_procs(dir: &str) -> Result> { - let file = format!("{}/{}", dir, CGROUP_PROCS); - let mut m = Vec::new(); - - for l in fs::read_to_string(file.as_str())?.lines() { - m.push(l.trim().parse::()?); - } - - Ok(m) -} - -fn get_all_procs(dir: &str) -> Result> { - let mut m = Vec::new(); - - for e in fs::read_dir(dir)? { - let path = e?.path(); - - if path.is_dir() { - m.append(get_all_procs(path.to_str().unwrap())?.as_mut()); - } - - if path.is_file() && path.ends_with(CGROUP_PROCS) { - let dir = path.parent().unwrap().to_str().unwrap(); - - m.append(get_procs(dir)?.as_mut()); - - return Ok(m); - } - - if path.is_file() { - continue; - } - } - - Ok(m) -} - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct Manager { - pub paths: HashMap, - pub mounts: HashMap, - pub rels: HashMap, - pub cpath: String, -} - -pub const THAWED: &'static str = "THAWED"; -pub const FROZEN: &'static str = "FROZEN"; - -impl CgroupManager for Manager { - fn apply(&self, pid: pid_t) -> Result<()> { - for (key, value) in &self.paths { - if let Some(sub) = get_subsystem(key) { - sub.apply(value, pid)?; - } - } - - Ok(()) - } - - fn set(&self, spec: &LinuxResources, update: bool) -> Result<()> { - for (key, value) in &self.paths { - let _ = fs::create_dir_all(value); - if let Some(sub) = get_subsystem(key) { - sub.set(value, spec, update)?; - } - } - - Ok(()) - } - - fn get_stats(&self) -> Result { - // CpuStats - info!(sl!(), "cpu_usage"); - let cpu_usage = if self.paths.get("cpuacct").is_some() { - SingularPtrField::some(CpuAcct().get_stats(self.paths.get("cpuacct").unwrap())?) - } else { - SingularPtrField::none() - }; - - info!(sl!(), "throttling_data"); - let throttling_data = if self.paths.get("cpu").is_some() { - SingularPtrField::some(Cpu().get_stats(self.paths.get("cpu").unwrap())?) - } else { - SingularPtrField::none() - }; - - info!(sl!(), "cpu_stats"); - let cpu_stats = if cpu_usage.is_none() && throttling_data.is_none() { - SingularPtrField::none() - } else { - SingularPtrField::some(CpuStats { - cpu_usage, - throttling_data, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), - }) - }; - - // Memorystats - info!(sl!(), "memory_stats"); - let memory_stats = if self.paths.get("memory").is_some() { - SingularPtrField::some(Memory().get_stats(self.paths.get("memory").unwrap())?) - } else { - SingularPtrField::none() - }; - - // PidsStats - info!(sl!(), "pids_stats"); - let pids_stats = if self.paths.get("pids").is_some() { - SingularPtrField::some(Pids().get_stats(self.paths.get("pids").unwrap())?) - } else { - SingularPtrField::none() - }; - - // BlkioStats - // note that virtiofs has no blkio stats - info!(sl!(), "blkio_stats"); - let blkio_stats = if self.paths.get("blkio").is_some() { - match Blkio().get_stats(self.paths.get("blkio").unwrap()) { - Ok(stat) => SingularPtrField::some(stat), - Err(e) => { - warn!(sl!(), "failed to get blkio stats"); - SingularPtrField::none() - } - } - } else { - SingularPtrField::none() - }; - - // HugetlbStats - info!(sl!(), "hugetlb_stats"); - let hugetlb_stats = if self.paths.get("hugetlb").is_some() { - HugeTLB().get_stats(self.paths.get("hugetlb").unwrap())? - } else { - HashMap::new() - }; - - Ok(CgroupStats { - cpu_stats, - memory_stats, - pids_stats, - blkio_stats, - hugetlb_stats, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), - }) - } - - fn get_paths(&self) -> Result> { - Ok(self.paths.clone()) - } - - fn freeze(&self, state: FreezerState) -> Result<()> { - if state == THAWED || state == FROZEN { - if self.paths.get("freezer").is_some() { - let dir = self.paths.get("freezer").unwrap(); - write_file(dir, "freezer.state", state)?; - } - } else { - if !state.is_empty() { - // invalid state - return Err(nix::Error::Sys(Errno::EINVAL).into()); - } - } - Ok(()) - } - - fn destroy(&mut self) -> Result<()> { - for (_, d) in &self.paths { - remove(d)?; - } - - self.paths = HashMap::new(); - - Ok(()) - } - - fn get_pids(&self) -> Result> { - let m = if self.paths.get("devices").is_some() { - get_procs(self.paths.get("devices").unwrap())? - } else { - return Err(anyhow!("no devices cgroup".to_string())); - }; - - Ok(m) - } - - fn get_all_pids(&self) -> Result> { - let m = if self.paths.get("devices").is_some() { - get_all_procs(self.paths.get("devices").unwrap())? - } else { - return Err(anyhow!("no devices cgroup")); - }; - - Ok(m) - } -} - impl Manager { pub fn new(cpath: &str) -> Result { let mut m = HashMap::new(); @@ -1415,59 +1007,200 @@ impl Manager { Ok(Self { paths: m, mounts, - rels: paths, + // rels: paths, cpath: cpath.to_string(), }) } - pub fn update_cpuset_path(&self, cpuset: &str) -> Result<()> { - let root = if self.mounts.get("cpuset").is_some() { - self.mounts.get("cpuset").unwrap() - } else { - return Err(nix::Error::Sys(Errno::ENOENT).into()); - }; - - let relss = if self.rels.get("cpuset").is_some() { - self.rels.get("cpuset").unwrap() - } else { - return Err(nix::Error::Sys(Errno::ENOENT).into()); - }; - - let mut dir: String = root.to_string(); - let rels: Vec<&str> = relss.split('/').collect(); - let cpaths: Vec<&str> = self.cpath.as_str().split('/').collect(); - - for d in rels.iter() { - if d.is_empty() { - continue; - } - - dir.push('/'); - dir.push_str(d); - write_file(dir.as_str(), CPUSET_CPUS, cpuset)?; + pub fn update_cpuset_path(&self, cpuset_cpus: &str) -> Result<()> { + if cpuset_cpus == "" { + return Ok(()); } + info!(sl!(), "update_cpuset_path to: {}", cpuset_cpus); - for d in cpaths.iter() { - if d.is_empty() { - continue; + let h = cgroups::hierarchies::auto(); + let h = Box::new(&*h); + let root_cg = load_or_create(h, ""); + + let root_cpuset_controller: &CpuSetController = root_cg.controller_of().unwrap(); + let path = root_cpuset_controller.path(); + let root_path = Path::new(path); + info!(sl!(), "root cpuset path: {:?}", &path); + + let h = cgroups::hierarchies::auto(); + let h = Box::new(&*h); + let cg = load_or_create(h, &self.cpath); + let cpuset_controller: &CpuSetController = cg.controller_of().unwrap(); + let path = cpuset_controller.path(); + let container_path = Path::new(path); + info!(sl!(), "container cpuset path: {:?}", &path); + + let mut paths = vec![]; + for ancestor in container_path.ancestors() { + if ancestor == root_path { + break; } + if ancestor != container_path { + paths.push(ancestor); + } + } + info!(sl!(), "paths to update cpuset: {:?}", &paths); - dir.push('/'); - dir.push_str(d); - write_file(dir.as_str(), CPUSET_CPUS, cpuset)?; + let mut i = paths.len(); + loop { + if i == 0 { + break; + } + i = i - 1; + let h = cgroups::hierarchies::auto(); + let h = Box::new(&*h); + + // remove cgroup root from path + let r_path = &paths[i] + .to_str() + .unwrap() + .trim_start_matches(root_path.to_str().unwrap()); + info!(sl!(), "updating cpuset for path {:?}", &r_path); + let cg = load_or_create(h, &r_path); + let cpuset_controller: &CpuSetController = cg.controller_of().unwrap(); + cpuset_controller.set_cpus(cpuset_cpus); } Ok(()) } + + pub fn get_cg_path(&self, cg: &str) -> Option { + if cgroups::hierarchies::is_cgroup2_unified_mode() { + let cg_path = format!("/sys/fs/cgroup/{}", self.cpath); + return Some(cg_path); + } + + // for cgroup v1 + self.paths.get(cg).map(|s| s.to_string()) + } } pub fn get_guest_cpuset() -> Result { - let m = get_mounts()?; + // for cgroup v2 + if cgroups::hierarchies::is_cgroup2_unified_mode() { + let c = fs::read_to_string("/sys/fs/cgroup/cpuset.cpus.effective")?; + return Ok(c); + } + // for cgroup v1 + let m = get_mounts()?; if m.get("cpuset").is_none() { warn!(sl!(), "no cpuset cgroup!"); return Err(nix::Error::Sys(Errno::ENOENT).into()); } - get_param_string(m.get("cpuset").unwrap(), CPUSET_CPUS) + let p = format!("{}/cpuset.cpus", m.get("cpuset").unwrap()); + let c = fs::read_to_string(p.as_str())?; + Ok(c) +} + +// Since the OCI spec is designed for cgroup v1, in some cases +// there is need to convert from the cgroup v1 configuration to cgroup v2 +// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142) +// convert from [2-262144] to [1-10000] +// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)" +// from https://github.com/opencontainers/runc/blob/a5847db387ae28c0ca4ebe4beee1a76900c86414/libcontainer/cgroups/utils.go#L394 +pub fn convert_shares_to_v2_value(shares: u64) -> u64 { + if shares == 0 { + return 0; + } + 1 + ((shares - 2) * 9999) / 262142 +} + +// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec +// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap +// is defined as memory+swap combined, while in cgroup v2 swap is a separate value. +fn convert_memory_swap_to_v2_value(memory_swap: i64, memory: i64) -> Result { + // for compatibility with cgroup1 controller, set swap to unlimited in + // case the memory is set to unlimited, and swap is not explicitly set, + // treating the request as "set both memory and swap to unlimited". + if memory == -1 && memory_swap == 0 { + return Ok(-1); + } + if memory_swap == -1 || memory_swap == 0 { + // -1 is "max", 0 is "unset", so treat as is + return Ok(memory_swap); + } + // sanity checks + if memory == 0 || memory == -1 { + return Err(anyhow!("unable to set swap limit without memory limit")); + } + if memory < 0 { + return Err(anyhow!("invalid memory value: {}", memory)); + } + if memory_swap < memory { + return Err(anyhow!("memory+swap limit should be >= memory limit")); + } + Ok(memory_swap - memory) +} + +// Since the OCI spec is designed for cgroup v1, in some cases +// there is need to convert from the cgroup v1 configuration to cgroup v2 +// the formula for BlkIOWeight is y = (1 + (x - 10) * 9999 / 990) +// convert linearly from [10-1000] to [1-10000] +// https://github.com/opencontainers/runc/blob/a5847db387ae28c0ca4ebe4beee1a76900c86414/libcontainer/cgroups/utils.go#L382 +fn convert_blk_io_to_v2_value(blk_io_weight: Option) -> Option { + let v = blk_io_weight.unwrap_or(0); + if v != 0 { + return None; + } + + Some(1 + (v - 10) * 9999 / 990 as u16) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_line_to_vec() { + let test_cases = vec![ + ("1 2 3", vec![1, 2, 3]), + ("a 1 b 2 3 c", vec![1, 2, 3]), + ("a b c", vec![]), + ]; + + for test_case in test_cases { + let result = line_to_vec(test_case.0); + assert_eq!( + result, test_case.1, + "except: {:?} for input {}", + test_case.1, test_case.0 + ); + } + } + + #[test] + fn test_lines_to_map() { + let hm1: HashMap = [ + ("a".to_string(), 1), + ("b".to_string(), 2), + ("c".to_string(), 3), + ("e".to_string(), 5), + ] + .iter() + .cloned() + .collect(); + let hm2: HashMap = [("a".to_string(), 1)].iter().cloned().collect(); + + let test_cases = vec![ + ("a 1\nb 2\nc 3\nd X\ne 5\n", hm1), + ("a 1", hm2), + ("a c", HashMap::new()), + ]; + + for test_case in test_cases { + let result = lines_to_map(test_case.0); + assert_eq!( + result, test_case.1, + "except: {:?} for input {}", + test_case.1, test_case.0 + ); + } + } } diff --git a/src/agent/rustjail/src/cgroups/mod.rs b/src/agent/rustjail/src/cgroups/mod.rs index 8165650752..0e6052542a 100644 --- a/src/agent/rustjail/src/cgroups/mod.rs +++ b/src/agent/rustjail/src/cgroups/mod.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Ant Financial +// Copyright (c) 2019,2020 Ant Financial // // SPDX-License-Identifier: Apache-2.0 // @@ -9,10 +9,11 @@ use oci::LinuxResources; use protocols::agent::CgroupStats; use std::collections::HashMap; -pub mod fs; -pub mod systemd; +use cgroups::freezer::FreezerState; -pub type FreezerState = &'static str; +pub mod fs; +pub mod notifier; +pub mod systemd; pub trait Manager { fn apply(&self, _pid: i32) -> Result<()> { @@ -23,10 +24,6 @@ pub trait Manager { Err(anyhow!("not supported!")) } - fn get_all_pids(&self) -> Result> { - Err(anyhow!("not supported!")) - } - fn get_stats(&self) -> Result { Err(anyhow!("not supported!")) } @@ -39,10 +36,6 @@ pub trait Manager { Err(anyhow!("not supported!")) } - fn get_paths(&self) -> Result> { - Err(anyhow!("not supported!")) - } - fn set(&self, _container: &LinuxResources, _update: bool) -> Result<()> { Err(anyhow!("not supported!")) } diff --git a/src/agent/rustjail/src/cgroups/notifier.rs b/src/agent/rustjail/src/cgroups/notifier.rs new file mode 100644 index 0000000000..0b343dc6a7 --- /dev/null +++ b/src/agent/rustjail/src/cgroups/notifier.rs @@ -0,0 +1,209 @@ +// Copyright (c) 2020 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Result}; +use eventfd::{eventfd, EfdFlags}; +use nix::sys::eventfd; +use nix::sys::inotify::{AddWatchFlags, InitFlags, Inotify}; +use std::fs::{self, File}; +use std::io::Read; +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::path::{Path, PathBuf}; +use std::sync::mpsc::{self, Receiver}; +use std::thread; + +// Convenience macro to obtain the scope logger +macro_rules! sl { + () => { + slog_scope::logger().new(o!("subsystem" => "cgroups_notifier")) + }; +} + +pub fn notify_oom(cid: &str, cg_dir: String) -> Result> { + if cgroups::hierarchies::is_cgroup2_unified_mode() { + return notify_on_oom_v2(cid, cg_dir); + } + notify_on_oom(cid, cg_dir) +} + +// get_value_from_cgroup parse cgroup file with `Flat keyed` +// and get the value of `key`. +// Flat keyed file format: +// KEY0 VAL0\n +// KEY1 VAL1\n +fn get_value_from_cgroup(path: &PathBuf, key: &str) -> Result { + let content = fs::read_to_string(path)?; + info!( + sl!(), + "get_value_from_cgroup file: {:?}, content: {}", &path, &content + ); + + for line in content.lines() { + let arr: Vec<&str> = line.split(" ").collect(); + if arr.len() == 2 && arr[0] == key { + let r = arr[1].parse::()?; + return Ok(r); + } + } + Ok(0) +} + +// notify_on_oom returns channel on which you can expect event about OOM, +// if process died without OOM this channel will be closed. +pub fn notify_on_oom_v2(containere_id: &str, cg_dir: String) -> Result> { + register_memory_event_v2(containere_id, cg_dir, "memory.events", "cgroup.events") +} + +fn register_memory_event_v2( + containere_id: &str, + cg_dir: String, + memory_event_name: &str, + cgroup_event_name: &str, +) -> Result> { + let event_control_path = Path::new(&cg_dir).join(memory_event_name); + let cgroup_event_control_path = Path::new(&cg_dir).join(cgroup_event_name); + info!( + sl!(), + "register_memory_event_v2 event_control_path: {:?}", &event_control_path + ); + info!( + sl!(), + "register_memory_event_v2 cgroup_event_control_path: {:?}", &cgroup_event_control_path + ); + + let fd = Inotify::init(InitFlags::empty()).unwrap(); + + // watching oom kill + let ev_fd = fd + .add_watch(&event_control_path, AddWatchFlags::IN_MODIFY) + .unwrap(); + // Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited + let cg_fd = fd + .add_watch(&cgroup_event_control_path, AddWatchFlags::IN_MODIFY) + .unwrap(); + info!(sl!(), "ev_fd: {:?}", ev_fd); + info!(sl!(), "cg_fd: {:?}", cg_fd); + + let (sender, receiver) = mpsc::channel(); + let containere_id = containere_id.to_string(); + + thread::spawn(move || { + loop { + let events = fd.read_events().unwrap(); + info!( + sl!(), + "container[{}] get events for container: {:?}", &containere_id, &events + ); + + for event in events { + if event.mask & AddWatchFlags::IN_MODIFY != AddWatchFlags::IN_MODIFY { + continue; + } + info!(sl!(), "event.wd: {:?}", event.wd); + + match event.wd { + ev_fd => { + let oom = get_value_from_cgroup(&event_control_path, "oom_kill"); + if oom.unwrap_or(0) > 0 { + sender.send(containere_id.clone()).unwrap(); + return; + } + } + cg_fd => { + let pids = get_value_from_cgroup(&cgroup_event_control_path, "populated"); + if pids.unwrap_or(-1) == 0 { + return; + } + } + } + } + // When a cgroup is destroyed, an event is sent to eventfd. + // So if the control path is gone, return instead of notifying. + if !Path::new(&event_control_path).exists() { + return; + } + } + }); + + Ok(receiver) +} + +// notify_on_oom returns channel on which you can expect event about OOM, +// if process died without OOM this channel will be closed. +fn notify_on_oom(cid: &str, dir: String) -> Result> { + if dir == "" { + return Err(anyhow!("memory controller missing")); + } + + register_memory_event(cid, dir, "memory.oom_control", "") +} + +// level is one of "low", "medium", or "critical" +fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result> { + if dir == "" { + return Err(anyhow!("memory controller missing")); + } + + if level != "low" && level != "medium" && level != "critical" { + return Err(anyhow!("invalid pressure level {}", level)); + } + + register_memory_event(cid, dir, "memory.pressure_level", level) +} + +fn register_memory_event( + cid: &str, + cg_dir: String, + event_name: &str, + arg: &str, +) -> Result> { + let path = Path::new(&cg_dir).join(event_name); + let event_file = File::open(path.clone())?; + + let eventfd = eventfd(0, EfdFlags::EFD_CLOEXEC)?; + + let event_control_path = Path::new(&cg_dir).join("cgroup.event_control"); + let data; + if arg == "" { + data = format!("{} {}", eventfd, event_file.as_raw_fd()); + } else { + data = format!("{} {} {}", eventfd, event_file.as_raw_fd(), arg); + } + + fs::write(&event_control_path, data)?; + + let mut eventfd_file = unsafe { File::from_raw_fd(eventfd) }; + + let (sender, receiver) = mpsc::channel(); + let containere_id = cid.to_string(); + + thread::spawn(move || { + loop { + let mut buf = [0; 8]; + match eventfd_file.read(&mut buf) { + Err(err) => { + warn!(sl!(), "failed to read from eventfd: {:?}", err); + return; + } + Ok(_) => { + let content = fs::read_to_string(path.clone()); + info!( + sl!(), + "OOM event for container: {}, content: {:?}", &containere_id, content + ); + } + } + + // When a cgroup is destroyed, an event is sent to eventfd. + // So if the control path is gone, return instead of notifying. + if !Path::new(&event_control_path).exists() { + return; + } + sender.send(containere_id.clone()).unwrap(); + } + }); + + Ok(receiver) +} diff --git a/src/agent/rustjail/src/container.rs b/src/agent/rustjail/src/container.rs index d61f32ea4b..b3b89fe0bf 100644 --- a/src/agent/rustjail/src/container.rs +++ b/src/agent/rustjail/src/container.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Ant Financial +// Copyright (c) 2019, 2020 Ant Group // // SPDX-License-Identifier: Apache-2.0 // @@ -21,8 +21,8 @@ use std::clone::Clone; use std::fmt::Display; use std::process::{Child, Command}; -// use crate::configs::namespaces::{NamespaceType}; -use crate::cgroups::Manager as CgroupManager; +use cgroups::freezer::FreezerState; + use crate::process::Process; // use crate::intelrdt::Manager as RdtManager; use crate::log_child; @@ -31,6 +31,7 @@ use crate::sync::*; // use crate::stats::Stats; use crate::capabilities::{self, CAPSMAP}; use crate::cgroups::fs::{self as fscgroup, Manager as FsManager}; +use crate::cgroups::Manager; use crate::{mount, validator}; use protocols::agent::StatsContainerResponse; @@ -242,9 +243,7 @@ pub trait BaseContainer { // Or use Mutex as a member of struct, like C? // a lot of String in the struct might be &str #[derive(Debug)] -pub struct LinuxContainer -// where T: CgroupManager -{ +pub struct LinuxContainer { pub id: String, pub root: String, pub config: Config, @@ -303,7 +302,7 @@ impl Container for LinuxContainer { self.cgroup_manager .as_ref() .unwrap() - .freeze(fscgroup::FROZEN)?; + .freeze(FreezerState::Frozen)?; self.status.transition(Status::PAUSED); return Ok(()); @@ -321,7 +320,7 @@ impl Container for LinuxContainer { self.cgroup_manager .as_ref() .unwrap() - .freeze(fscgroup::THAWED)?; + .freeze(FreezerState::Thawed)?; self.status.transition(Status::RUNNING); return Ok(()); @@ -352,6 +351,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { lazy_static::initialize(&CAPSMAP); let init = std::env::var(INIT)?.eq(format!("{}", true).as_str()); + let no_pivot = std::env::var(NO_PIVOT)?.eq(format!("{}", true).as_str()); let crfd = std::env::var(CRFD_FD)?.parse::().unwrap(); let cfd_log = std::env::var(CLOG_FD)?.parse::().unwrap(); @@ -372,6 +372,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { let buf = read_sync(crfd)?; let cm_str = std::str::from_utf8(&buf)?; + let cm: FsManager = serde_json::from_str(cm_str)?; let p = if spec.process.is_some() { @@ -752,8 +753,6 @@ impl BaseContainer for LinuxContainer { } info!(logger, "exec fifo opened!"); - fscgroup::init_static(); - if self.config.spec.is_none() { return Err(anyhow!("no spec")); } @@ -1125,7 +1124,7 @@ fn join_namespaces( p: &Process, cm: &FsManager, st: &OCIState, - child: &mut Child, + _child: &mut Child, pwfd: RawFd, prfd: RawFd, ) -> Result<()> { @@ -1326,6 +1325,7 @@ impl LinuxContainer { }; let cgroup_manager = FsManager::new(cpath.as_str())?; + info!(logger, "new cgroup_manager {:?}", &cgroup_manager); Ok(LinuxContainer { id: id.clone(), @@ -1349,48 +1349,6 @@ impl LinuxContainer { fn load>(_id: T, _base: T) -> Result { Err(anyhow!("not supported")) } - /* - fn new_parent_process(&self, p: &Process) -> Result> { - let (pfd, cfd) = socket::socketpair(AddressFamily::Unix, - SockType::Stream, SockProtocol::Tcp, - SockFlag::SOCK_CLOEXEC)?; - - let cmd = Command::new(self.init_path) - .args(self.init_args[1..]) - .env("_LIBCONTAINER_INITPIPE", format!("{}", - cfd)) - .env("_LIBCONTAINER_STATEDIR", self.root) - .current_dir(Path::new(self.config.rootfs)) - .stdin(p.stdin) - .stdout(p.stdout) - .stderr(p.stderr); - - if p.console_socket.is_some() { - cmd.env("_LIBCONTAINER_CONSOLE", format!("{}", - unsafe { p.console_socket.unwrap().as_raw_fd() })); - } - - if !p.init { - return self.new_setns_process(p, cmd, pfd, cfd); - } - - let fifo_file = format!("{}/{}", self.root, EXEC_FIFO_FILENAME); - let fifofd = fcntl::open(fifo_file, - OFlag::O_PATH | OFlag::O_CLOEXEC, - Mode::from_bits(0).unwrap())?; - - cmd.env("_LIBCONTAINER_FIFOFD", format!("{}", fifofd)); - - self.new_init_process(p, cmd, pfd, cfd) - } - - fn new_setns_process(&self, p: &Process, cmd: &mut Command, pfd: Rawfd, cfd: Rawfd) -> Result { - } - - fn new_init_process(&self, p: &Process, cmd: &mut Command, pfd: Rawfd, cfd: Rawfd) -> Result { - cmd.env("_LINCONTAINER_INITTYPE", INITSTANDARD); - } - */ } // Handle the differing rlimit types for different targets diff --git a/src/agent/rustjail/src/mount.rs b/src/agent/rustjail/src/mount.rs index 460ba2c56b..c48946ba34 100644 --- a/src/agent/rustjail/src/mount.rs +++ b/src/agent/rustjail/src/mount.rs @@ -176,6 +176,38 @@ pub fn init_rootfs( Ok(()) } +fn mount_cgroups_v2(cfd_log: RawFd, m: &Mount, rootfs: &str, flags: MsFlags) -> Result<()> { + let olddir = unistd::getcwd()?; + unistd::chdir(rootfs)?; + + // https://github.com/opencontainers/runc/blob/09ddc63afdde16d5fb859a1d3ab010bd45f08497/libcontainer/rootfs_linux.go#L287 + let bm = Mount { + source: "cgroup".to_string(), + r#type: "cgroup2".to_string(), + destination: m.destination.clone(), + options: Vec::new(), + }; + + let mount_flags: MsFlags = flags; + + mount_from(cfd_log, &bm, rootfs, mount_flags, "", "")?; + + unistd::chdir(&olddir)?; + + if flags.contains(MsFlags::MS_RDONLY) { + let dest = format!("{}{}", rootfs, m.destination.as_str()); + mount::mount( + Some(dest.as_str()), + dest.as_str(), + None::<&str>, + flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT, + None::<&str>, + )?; + } + + Ok(()) +} + fn mount_cgroups( cfd_log: RawFd, m: &Mount, @@ -185,6 +217,9 @@ fn mount_cgroups( cpath: &HashMap, mounts: &HashMap, ) -> Result<()> { + if cgroups::hierarchies::is_cgroup2_unified_mode() { + return mount_cgroups_v2(cfd_log, &m, rootfs, flags); + } // mount tmpfs let ctm = Mount { source: "tmpfs".to_string(), @@ -194,7 +229,6 @@ fn mount_cgroups( }; let cflags = MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV; - // info!(logger, "tmpfs"); mount_from(cfd_log, &ctm, rootfs, cflags, "", "")?; let olddir = unistd::getcwd()?; @@ -527,29 +561,23 @@ fn mount_from( if src.is_file() { let _ = OpenOptions::new().create(true).write(true).open(&dest); } - src + src.to_str().unwrap().to_string() } else { let _ = fs::create_dir_all(&dest); - PathBuf::from(&m.source) - }; - - // ignore this check since some mount's src didn't been a directory - // such as tmpfs. - /* - match stat::stat(src.to_str().unwrap()) { - Ok(_) => {} - Err(e) => { - info!("{}: {}", src.to_str().unwrap(), e.as_errno().unwrap().desc()); - } + if m.r#type.as_str() == "cgroup2" { + "cgroup2".to_string() + } else { + let tmp = PathBuf::from(&m.source); + tmp.to_str().unwrap().to_string() } - */ + }; match stat::stat(dest.as_str()) { Ok(_) => {} Err(e) => { log_child!( cfd_log, - "{}: {}", + "dest stat error. {}: {}", dest.as_str(), e.as_errno().unwrap().desc() ); @@ -557,7 +585,7 @@ fn mount_from( } match mount::mount( - Some(src.to_str().unwrap()), + Some(src.as_str()), dest.as_str(), Some(m.r#type.as_str()), flags, diff --git a/src/agent/src/config.rs b/src/agent/src/config.rs index 1b854fe743..3efab76e4e 100644 --- a/src/agent/src/config.rs +++ b/src/agent/src/config.rs @@ -14,6 +14,7 @@ const HOTPLUG_TIMOUT_OPTION: &str = "agent.hotplug_timeout"; const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport"; const LOG_VPORT_OPTION: &str = "agent.log_vport"; const CONTAINER_PIPE_SIZE_OPTION: &str = "agent.container_pipe_size"; +const UNIFIED_CGROUP_HIERARCHY_OPTION: &str = "agent.unified_cgroup_hierarchy"; const DEFAULT_LOG_LEVEL: slog::Level = slog::Level::Info; const DEFAULT_HOTPLUG_TIMEOUT: time::Duration = time::Duration::from_secs(3); @@ -36,6 +37,7 @@ pub struct agentConfig { pub log_vport: i32, pub container_pipe_size: i32, pub server_addr: String, + pub unified_cgroup_hierarchy: bool, } impl agentConfig { @@ -49,6 +51,7 @@ impl agentConfig { log_vport: 0, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, server_addr: format!("{}:{}", VSOCK_ADDR, VSOCK_PORT), + unified_cgroup_hierarchy: false, } } @@ -56,6 +59,7 @@ impl agentConfig { let cmdline = fs::read_to_string(file)?; let params: Vec<&str> = cmdline.split_ascii_whitespace().collect(); for param in params.iter() { + // parse cmdline flags if param.eq(&DEBUG_CONSOLE_FLAG) { self.debug_console = true; } @@ -64,6 +68,7 @@ impl agentConfig { self.dev_mode = true; } + // parse cmdline options if param.starts_with(format!("{}=", LOG_LEVEL_OPTION).as_str()) { let level = get_log_level(param)?; self.log_level = level; @@ -95,6 +100,11 @@ impl agentConfig { let container_pipe_size = get_container_pipe_size(param)?; self.container_pipe_size = container_pipe_size } + + if param.starts_with(format!("{}=", UNIFIED_CGROUP_HIERARCHY_OPTION).as_str()) { + let b = get_bool_value(param, false); + self.unified_cgroup_hierarchy = b; + } } if let Ok(addr) = env::var(SERVER_ADDR_ENV_VAR) { @@ -175,6 +185,34 @@ fn get_hotplug_timeout(param: &str) -> Result { Ok(time::Duration::from_secs(value.unwrap())) } +fn get_bool_value(param: &str, default: bool) -> bool { + let fields: Vec<&str> = param.split("=").collect(); + + if fields.len() != 2 { + return default; + } + + let v = fields[1]; + + // bool + let t: std::result::Result = v.parse(); + if t.is_ok() { + return t.unwrap(); + } + + // integer + let i: std::result::Result = v.parse(); + if i.is_err() { + return default; + } + + // only `0` returns false, otherwise returns true + match i.unwrap() { + 0 => false, + _ => true, + } +} + fn get_container_pipe_size(param: &str) -> Result { let fields: Vec<&str> = param.split("=").collect(); @@ -269,6 +307,7 @@ mod tests { log_level: slog::Level, hotplug_timeout: time::Duration, container_pipe_size: i32, + unified_cgroup_hierarchy: bool, } let tests = &[ @@ -279,6 +318,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "agent.debug_console agent.devmodex", @@ -287,6 +327,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "agent.logx=debug", @@ -295,6 +336,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "agent.log=debug", @@ -303,6 +345,7 @@ mod tests { log_level: slog::Level::Debug, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "", @@ -311,6 +354,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "foo", @@ -319,6 +363,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "foo bar", @@ -327,6 +372,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "foo bar", @@ -335,6 +381,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "foo agent bar", @@ -343,6 +390,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "foo debug_console agent bar devmode", @@ -351,6 +399,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "agent.debug_console", @@ -359,6 +408,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: " agent.debug_console ", @@ -367,6 +417,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "agent.debug_console foo", @@ -375,6 +426,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: " agent.debug_console foo", @@ -383,6 +435,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "foo agent.debug_console bar", @@ -391,6 +444,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "foo agent.debug_console", @@ -399,6 +453,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "foo agent.debug_console ", @@ -407,6 +462,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "agent.devmode", @@ -415,6 +471,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: " agent.devmode ", @@ -423,6 +480,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "agent.devmode foo", @@ -431,6 +489,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: " agent.devmode foo", @@ -439,6 +498,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "foo agent.devmode bar", @@ -447,6 +507,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "foo agent.devmode", @@ -455,6 +516,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "foo agent.devmode ", @@ -463,6 +525,7 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { contents: "agent.devmode agent.debug_console", @@ -471,54 +534,61 @@ mod tests { log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { - contents: "agent.devmode agent.debug_console agent.hotplug_timeout=100", + contents: "agent.devmode agent.debug_console agent.hotplug_timeout=100 agent.unified_cgroup_hierarchy=a", debug_console: true, dev_mode: true, log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: time::Duration::from_secs(100), container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { - contents: "agent.devmode agent.debug_console agent.hotplug_timeout=0", + contents: "agent.devmode agent.debug_console agent.hotplug_timeout=0 agent.unified_cgroup_hierarchy=11", debug_console: true, dev_mode: true, log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: true, }, TestData { - contents: "agent.devmode agent.debug_console agent.container_pipe_size=2097152", + contents: "agent.devmode agent.debug_console agent.container_pipe_size=2097152 agent.unified_cgroup_hierarchy=false", debug_console: true, dev_mode: true, log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: 2097152, + unified_cgroup_hierarchy: false, }, TestData { - contents: "agent.devmode agent.debug_console agent.container_pipe_size=100", + contents: "agent.devmode agent.debug_console agent.container_pipe_size=100 agent.unified_cgroup_hierarchy=true", debug_console: true, dev_mode: true, log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: 100, + unified_cgroup_hierarchy: true, }, TestData { - contents: "agent.devmode agent.debug_console agent.container_pipe_size=0", + contents: "agent.devmode agent.debug_console agent.container_pipe_size=0 agent.unified_cgroup_hierarchy=0", debug_console: true, dev_mode: true, log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: false, }, TestData { - contents: "agent.devmode agent.debug_console agent.container_pip_siz=100", + contents: "agent.devmode agent.debug_console agent.container_pip_siz=100 agent.unified_cgroup_hierarchy=1", debug_console: true, dev_mode: true, log_level: DEFAULT_LOG_LEVEL, hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, + unified_cgroup_hierarchy: true, }, ]; @@ -550,6 +620,7 @@ mod tests { let mut config = agentConfig::new(); assert_eq!(config.debug_console, false, "{}", msg); assert_eq!(config.dev_mode, false, "{}", msg); + assert_eq!(config.unified_cgroup_hierarchy, false, "{}", msg); assert_eq!( config.hotplug_timeout, time::Duration::from_secs(3), @@ -563,6 +634,11 @@ mod tests { assert_eq!(d.debug_console, config.debug_console, "{}", msg); assert_eq!(d.dev_mode, config.dev_mode, "{}", msg); + assert_eq!( + d.unified_cgroup_hierarchy, config.unified_cgroup_hierarchy, + "{}", + msg + ); assert_eq!(d.log_level, config.log_level, "{}", msg); assert_eq!(d.hotplug_timeout, config.hotplug_timeout, "{}", msg); assert_eq!(d.container_pipe_size, config.container_pipe_size, "{}", msg); diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index 01fd71f7b7..1a46713815 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -131,6 +131,15 @@ fn main() -> Result<()> { let writer = unsafe { File::from_raw_fd(wfd) }; let agentConfig = AGENT_CONFIG.clone(); + // once parsed cmdline and set the config, release the write lock + // as soon as possible in case other thread would get read lock on + // it. + { + let mut config = agentConfig.write().unwrap(); + config.parse_cmdline(KERNEL_CMDLINE_FILE)?; + } + + let config = agentConfig.read().unwrap(); let init_mode = unistd::getpid() == Pid::from_raw(1); if init_mode { @@ -144,18 +153,9 @@ fn main() -> Result<()> { // since before do the base mount, it wouldn't access "/proc/cmdline" // to get the customzied debug level. let logger = logging::create_logger(NAME, "agent", slog::Level::Debug, writer); - init_agent_as_init(&logger)?; + init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?; } - // once parsed cmdline and set the config, release the write lock - // as soon as possible in case other thread would get read lock on - // it. - { - let mut config = agentConfig.write().unwrap(); - config.parse_cmdline(KERNEL_CMDLINE_FILE)?; - } - - let config = agentConfig.read().unwrap(); let log_vport = config.log_vport as u32; let log_handle = thread::spawn(move || -> Result<()> { let mut reader = unsafe { File::from_raw_fd(rfd) }; @@ -339,9 +339,9 @@ fn setup_signal_handler(logger: &Logger, sandbox: Arc>) -> Result // init_agent_as_init will do the initializations such as setting up the rootfs // when this agent has been run as the init process. -fn init_agent_as_init(logger: &Logger) -> Result<()> { +fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<()> { general_mount(logger)?; - cgroups_mount(logger)?; + cgroups_mount(logger, unified_cgroup_hierarchy)?; fs::remove_file(Path::new("/dev/ptmx"))?; unixfs::symlink(Path::new("/dev/pts/ptmx"), Path::new("/dev/ptmx"))?; diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index 7ef6d745b3..4b0c5d8303 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -542,7 +542,22 @@ pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Resul )) } -pub fn get_cgroup_mounts(logger: &Logger, cg_path: &str) -> Result> { +pub fn get_cgroup_mounts( + logger: &Logger, + cg_path: &str, + unified_cgroup_hierarchy: bool, +) -> Result> { + // cgroup v2 + // https://github.com/kata-containers/agent/blob/8c9bbadcd448c9a67690fbe11a860aaacc69813c/agent.go#L1249 + if unified_cgroup_hierarchy { + return Ok(vec![INIT_MOUNT { + fstype: "cgroup2", + src: "cgroup2", + dest: "/sys/fs/cgroup", + options: vec!["nosuid", "nodev", "noexec", "relatime", "nsdelegate"], + }]); + } + let file = File::open(&cg_path)?; let reader = BufReader::new(file); @@ -617,10 +632,10 @@ pub fn get_cgroup_mounts(logger: &Logger, cg_path: &str) -> Result Result<()> { +pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<()> { let logger = logger.new(o!("subsystem" => "mount")); - let cgroups = get_cgroup_mounts(&logger, PROC_CGROUPS)?; + let cgroups = get_cgroup_mounts(&logger, PROC_CGROUPS, unified_cgroup_hierarchy)?; for cg in cgroups.iter() { mount_to_rootfs(&logger, cg)?; @@ -1071,6 +1086,20 @@ mod tests { } } + #[test] + fn test_get_cgroup_v2_mounts() { + let dir = tempdir().expect("failed to create tmpdir"); + let drain = slog::Discard; + let logger = slog::Logger::root(drain, o!()); + let result = get_cgroup_mounts(&logger, "", true); + + assert_eq!(true, result.is_ok()); + let result = result.unwrap(); + assert_eq!(1, result.len()); + assert_eq!(result[0].fstype, "cgroup2"); + assert_eq!(result[0].src, "cgroup2"); + } + #[test] fn test_get_cgroup_mounts() { #[derive(Debug)] @@ -1175,7 +1204,7 @@ mod tests { ]; // First, test a missing file - let result = get_cgroup_mounts(&logger, enoent_filename); + let result = get_cgroup_mounts(&logger, enoent_filename, false); assert!(result.is_err()); let error_msg = format!("{}", result.unwrap_err()); @@ -1198,7 +1227,7 @@ mod tests { file.write_all(d.contents.as_bytes()) .expect(&format!("{}: failed to write file contents", msg)); - let result = get_cgroup_mounts(&logger, filename); + let result = get_cgroup_mounts(&logger, filename, false); let msg = format!("{}: result: {:?}", msg, result); if d.error_contains != "" { diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 4e801e166c..1870949ede 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -12,7 +12,7 @@ use oci::{LinuxNamespace, Root, Spec}; use protobuf::{RepeatedField, SingularPtrField}; use protocols::agent::{ AgentDetails, CopyFileRequest, GuestDetailsResponse, Interfaces, ListProcessesResponse, - Metrics, ReadStreamResponse, Routes, StatsContainerResponse, WaitProcessResponse, + Metrics, OOMEvent, ReadStreamResponse, Routes, StatsContainerResponse, WaitProcessResponse, WriteStreamResponse, }; use protocols::empty::Empty; @@ -21,6 +21,7 @@ use protocols::health::{ }; use protocols::types::Interface; use rustjail; +use rustjail::cgroups::notifier; use rustjail::container::{BaseContainer, Container, LinuxContainer}; use rustjail::process::Process; use rustjail::specconv::CreateOpts; @@ -94,7 +95,7 @@ impl agentService { } }; - info!(sl!(), "receive createcontainer {}", &cid); + info!(sl!(), "receive createcontainer, spec: {:?}", &oci); // re-scan PCI bus // looking for hidden devices @@ -178,6 +179,7 @@ impl agentService { let sandbox = self.sandbox.clone(); let mut s = sandbox.lock().unwrap(); + let sid = s.id.clone(); let ctr: &mut LinuxContainer = match s.get_container(cid.as_str()) { Some(cr) => cr, @@ -188,6 +190,15 @@ impl agentService { ctr.exec()?; + // start oom event loop + if sid != cid && ctr.cgroup_manager.is_some() { + let cg_path = ctr.cgroup_manager.as_ref().unwrap().get_cg_path("memory"); + if cg_path.is_some() { + let rx = notifier::notify_oom(cid.as_str(), cg_path.unwrap())?; + s.run_oom_event_monitor(rx, cid); + } + } + Ok(()) } @@ -329,7 +340,7 @@ impl agentService { sl!(), "signal process"; "container-id" => cid.clone(), - "exec-id" => eid.clone() + "exec-id" => eid.clone(), ); if eid == "" { @@ -488,7 +499,6 @@ impl agentService { let eid = req.exec_id; let mut fd: RawFd = -1; - info!(sl!(), "read stdout for {}/{}", cid.clone(), eid.clone()); { let s = self.sandbox.clone(); let mut sandbox = s.lock().unwrap(); @@ -1293,6 +1303,34 @@ impl protocols::agent_ttrpc::AgentService for agentService { } } } + + fn get_oom_event( + &self, + _ctx: &ttrpc::TtrpcContext, + _req: protocols::agent::GetOOMEventRequest, + ) -> ttrpc::Result { + let sandbox = self.sandbox.clone(); + let s = sandbox.lock().unwrap(); + let event_rx = &s.event_rx.clone(); + let event_rx = event_rx.lock().unwrap(); + drop(s); + drop(sandbox); + + match event_rx.recv() { + Err(err) => { + return Err(ttrpc::Error::RpcStatus(ttrpc::get_status( + ttrpc::Code::INTERNAL, + err.to_string(), + ))) + } + Ok(container_id) => { + info!(sl!(), "get_oom_event return {}", &container_id); + let mut resp = OOMEvent::new(); + resp.container_id = container_id; + return Ok(resp); + } + } + } } #[derive(Clone)] diff --git a/src/agent/src/sandbox.rs b/src/agent/src/sandbox.rs index 26b6554fad..8c5eacbe49 100644 --- a/src/agent/src/sandbox.rs +++ b/src/agent/src/sandbox.rs @@ -10,12 +10,13 @@ use crate::namespace::Namespace; use crate::namespace::NSTYPEPID; use crate::network::Network; use anyhow::{anyhow, Context, Result}; +use cgroups; use libc::pid_t; use netlink::{RtnlHandle, NETLINK_ROUTE}; use oci::{Hook, Hooks}; use protocols::agent::OnlineCPUMemRequest; use regex::Regex; -use rustjail::cgroups; +use rustjail::cgroups as rustjail_cgroups; use rustjail::container::BaseContainer; use rustjail::container::LinuxContainer; use rustjail::process::Process; @@ -24,7 +25,8 @@ use std::collections::HashMap; use std::fs; use std::os::unix::fs::PermissionsExt; use std::path::Path; -use std::sync::mpsc::Sender; +use std::sync::mpsc::{self, Receiver, Sender}; +use std::sync::{Arc, Mutex}; use std::{thread, time}; #[derive(Debug)] @@ -46,12 +48,16 @@ pub struct Sandbox { pub sender: Option>, pub rtnl: Option, pub hooks: Option, + pub event_rx: Arc>>, + pub event_tx: Sender, } impl Sandbox { pub fn new(logger: &Logger) -> Result { let fs_type = get_mount_fs_type("/")?; let logger = logger.new(o!("subsystem" => "sandbox")); + let (tx, rx) = mpsc::channel::(); + let event_rx = Arc::new(Mutex::new(rx)); Ok(Sandbox { logger: logger.clone(), @@ -71,6 +77,8 @@ impl Sandbox { sender: None, rtnl: Some(RtnlHandle::new(NETLINK_ROUTE, 0).unwrap()), hooks: None, + event_rx: event_rx, + event_tx: tx, }) } @@ -239,7 +247,7 @@ impl Sandbox { online_memory(&self.logger)?; } - let cpuset = cgroups::fs::get_guest_cpuset()?; + let cpuset = rustjail_cgroups::fs::get_guest_cpuset()?; for (_, ctr) in self.containers.iter() { info!(self.logger, "updating {}", ctr.id.as_str()); @@ -302,6 +310,21 @@ impl Sandbox { Ok(hooks) } + + pub fn run_oom_event_monitor(&self, rx: Receiver, container_id: String) { + let tx = self.event_tx.clone(); + let logger = self.logger.clone(); + + thread::spawn(move || { + for event in rx { + info!(logger, "got an OOM event {:?}", event); + match tx.send(container_id.clone()) { + Err(err) => error!(logger, "failed to send message: {:?}", err), + Ok(_) => {} + } + } + }); + } } fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Result { diff --git a/src/runtime/containerd-shim-v2/wait.go b/src/runtime/containerd-shim-v2/wait.go index 8cf5ff7108..5688eddb83 100644 --- a/src/runtime/containerd-shim-v2/wait.go +++ b/src/runtime/containerd-shim-v2/wait.go @@ -146,7 +146,7 @@ func watchOOMEvents(ctx context.Context, s *service) { // If the GetOOMEvent call is not implemented, then the agent is most likely an older version, // stop attempting to get OOM events. // for rust agent, the response code is not found - if isGRPCErrorCode(codes.NotFound, err) { + if isGRPCErrorCode(codes.NotFound, err) || err.Error() == "Dead agent" { return } continue diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index f056ae902a..f588d472b7 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -1203,10 +1203,10 @@ func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, erro // Rollback if error happens. if err != nil { logger := s.Logger().WithFields(logrus.Fields{"container-id": c.id, "sandox-id": s.id, "rollback": true}) - logger.Warning("Cleaning up partially created container") + logger.WithError(err).Error("Cleaning up partially created container") if err2 := c.stop(true); err2 != nil { - logger.WithError(err2).Warning("Could not delete container") + logger.WithError(err2).Error("Could not delete container") } logger.Debug("Removing stopped container from sandbox store") @@ -1894,9 +1894,11 @@ func (s *Sandbox) updateResources() error { return err } + s.Logger().Debugf("Request to hypervisor to update oldCPUs/newCPUs: %d/%d", oldCPUs, newCPUs) // If the CPUs were increased, ask agent to online them if oldCPUs < newCPUs { vcpusAdded := newCPUs - oldCPUs + s.Logger().Debugf("Request to onlineCPUMem with %d CPUs", vcpusAdded) if err := s.agent.onlineCPUMem(vcpusAdded, true); err != nil { return err }