diff --git a/src/libs/kata-types/src/annotations/cri_containerd.rs b/src/libs/kata-types/src/annotations/cri_containerd.rs index db6462a8c8..8b2d63fafd 100644 --- a/src/libs/kata-types/src/annotations/cri_containerd.rs +++ b/src/libs/kata-types/src/annotations/cri_containerd.rs @@ -11,3 +11,14 @@ pub const SANDBOX: &str = "sandbox"; pub const CONTAINER: &str = "container"; pub const SANDBOX_ID_LABEL_KEY: &str = "io.kubernetes.cri.sandbox-id"; + +// Ref: https://pkg.go.dev/github.com/containerd/containerd@v1.6.7/pkg/cri/annotations +// SandboxCPU annotations are based on the initial CPU configuration for the sandbox. This is calculated as the +// sum of container CPU resources, optionally provided by Kubelet (introduced in 1.23) as part of the PodSandboxConfig +pub const SANDBOX_CPU_QUOTA_KEY: &str = "io.kubernetes.cri.sandbox-cpu-quota"; +pub const SANDBOX_CPU_PERIOD_KEY: &str = "io.kubernetes.cri.sandbox-cpu-period"; +pub const SANDBOX_CPU_SHARE_KEY: &str = "io.kubernetes.cri.sandbox-cpu-shares"; + +// SandboxMemory is the initial amount of memory associated with this sandbox. This is calculated as the sum +// of container memory, optionally provided by Kubelet (introduced in 1.23) as part of the PodSandboxConfig +pub const SANDBOX_MEM_KEY: &str = "io.kubernetes.cri.sandbox-memory"; diff --git a/src/libs/kata-types/src/annotations/mod.rs b/src/libs/kata-types/src/annotations/mod.rs index 0a517e2216..07cdbe1a1d 100644 --- a/src/libs/kata-types/src/annotations/mod.rs +++ b/src/libs/kata-types/src/annotations/mod.rs @@ -16,6 +16,8 @@ use crate::config::hypervisor::get_hypervisor_plugin; use crate::config::TomlConfig; use crate::sl; +use self::cri_containerd::{SANDBOX_CPU_PERIOD_KEY, SANDBOX_CPU_QUOTA_KEY, SANDBOX_MEM_KEY}; + /// CRI-containerd specific annotations. pub mod cri_containerd; @@ -376,6 +378,28 @@ impl Annotation { self.get(CONTAINER_TYPE_KEY) } + /// Get the annotation of cpu quota for sandbox + pub fn get_sandbox_cpu_quota(&self) -> u64 { + let value = self + .get_value::(SANDBOX_CPU_QUOTA_KEY) + .unwrap_or(Some(0)); + value.unwrap_or(0) + } + + /// Get the annotation of cpu period for sandbox + pub fn get_sandbox_cpu_period(&self) -> i64 { + let value = self + .get_value::(SANDBOX_CPU_PERIOD_KEY) + .unwrap_or(Some(0)); + value.unwrap_or(0) + } + + /// Get the annotation of memory for sandbox + pub fn get_sandbox_mem(&self) -> i64 { + let value = self.get_value::(SANDBOX_MEM_KEY).unwrap_or(Some(0)); + value.unwrap_or(0) + } + /// Get the annotation to specify the Resources.Memory.Swappiness. pub fn get_container_resource_swappiness(&self) -> Result> { match self.get_value::(KATA_ANNO_CONTAINER_RES_SWAPPINESS) { diff --git a/src/libs/kata-types/src/config/runtime.rs b/src/libs/kata-types/src/config/runtime.rs index ce8e9efa59..a9fa3de9db 100644 --- a/src/libs/kata-types/src/config/runtime.rs +++ b/src/libs/kata-types/src/config/runtime.rs @@ -99,6 +99,10 @@ pub struct Runtime { #[serde(default)] pub enable_pprof: bool, + /// If enabled, static resource management will calculate the vcpu and memory for the sandbox/container + #[serde(default)] + pub static_resource_mgmt: bool, + /// Determines whether container seccomp profiles are passed to the virtual machine and /// applied by the kata agent. If set to true, seccomp is not applied within the guest. #[serde(default)] diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index 48c5703541..e8358b6760 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -122,7 +122,7 @@ DEFMSIZE9P := 8192 DEFVFIOMODE := guest-kernel # Default cgroup model DEFSANDBOXCGROUPONLY ?= false -DEFSTATICRESOURCEMGMT ?= false +DEFSTATICRESOURCEMGMT_DB ?= false DEFBINDMOUNTS := [] SED = sed CLI_DIR = cmd @@ -255,7 +255,7 @@ USER_VARS += DEFMSIZE9P USER_VARS += DEFENTROPYSOURCE USER_VARS += DEFVALIDENTROPYSOURCES USER_VARS += DEFSANDBOXCGROUPONLY -USER_VARS += DEFSTATICRESOURCEMGMT +USER_VARS += DEFSTATICRESOURCEMGMT_DB USER_VARS += DEFBINDMOUNTS USER_VARS += DEFVFIOMODE USER_VARS += BUILDFLAGS diff --git a/src/runtime-rs/config/configuration-dragonball.toml.in b/src/runtime-rs/config/configuration-dragonball.toml.in index bda6a8d3a1..6efb18ccf6 100644 --- a/src/runtime-rs/config/configuration-dragonball.toml.in +++ b/src/runtime-rs/config/configuration-dragonball.toml.in @@ -247,3 +247,6 @@ experimental=@DEFAULTEXPFEATURES@ # If enabled, user can run pprof tools with shim v2 process through kata-monitor. # (default: false) # enable_pprof = true + +static_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@ + diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs index a98d78efcf..786088633b 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs @@ -176,6 +176,7 @@ impl DragonballInner { serial_path: Some(serial_path), mem_size_mib: self.config.memory_info.default_memory as usize, vcpu_count: self.config.cpu_info.default_vcpus as u8, + max_vcpu_count: self.config.cpu_info.default_maxvcpus as u8, ..Default::default() }; info!(sl!(), "vm config: {:?}", vm_config); diff --git a/src/runtime-rs/crates/runtimes/src/lib.rs b/src/runtime-rs/crates/runtimes/src/lib.rs index 0853ee9c56..d10b38c7fd 100644 --- a/src/runtime-rs/crates/runtimes/src/lib.rs +++ b/src/runtime-rs/crates/runtimes/src/lib.rs @@ -11,3 +11,4 @@ logging::logger_with_subsystem!(sl, "runtimes"); pub mod manager; pub use manager::RuntimeHandlerManager; +mod static_resource; diff --git a/src/runtime-rs/crates/runtimes/src/manager.rs b/src/runtime-rs/crates/runtimes/src/manager.rs index c2c6c84c65..390cbac159 100644 --- a/src/runtime-rs/crates/runtimes/src/manager.rs +++ b/src/runtime-rs/crates/runtimes/src/manager.rs @@ -7,6 +7,8 @@ use std::sync::Arc; use anyhow::{anyhow, Context, Result}; + +use crate::static_resource::StaticResourceManager; use common::{ message::Message, types::{Request, Response}, @@ -310,6 +312,23 @@ fn load_config(spec: &oci::Spec) -> Result { let (mut toml_config, _) = TomlConfig::load_from_file(&config_path).context("load toml config")?; annotation.update_config_by_annotation(&mut toml_config)?; + + // Sandbox sizing information *may* be provided in two scenarios: + // 1. The upper layer runtime (ie, containerd or crio) provide sandbox sizing information as an annotation + // in the 'sandbox container's' spec. This would typically be a scenario where as part of a create sandbox + // request the upper layer runtime receives this information as part of a pod, and makes it available to us + // for sizing purposes. + // 2. If this is not a sandbox infrastructure container, but instead a standalone single container (analogous to "docker run..."), + // then the container spec itself will contain appropriate sizing information for the entire sandbox (since it is + // a single container. + if toml_config.runtime.static_resource_mgmt { + info!(sl!(), "static resource management enabled"); + let static_resource_manager = StaticResourceManager::new(spec) + .context("failed to construct static resource manager")?; + static_resource_manager + .setup_config(&mut toml_config) + .context("failed to setup static resource mgmt config")?; + } info!(sl!(), "get config content {:?}", &toml_config); Ok(toml_config) } diff --git a/src/runtime-rs/crates/runtimes/src/static_resource.rs b/src/runtime-rs/crates/runtimes/src/static_resource.rs new file mode 100644 index 0000000000..0e04d21505 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/src/static_resource.rs @@ -0,0 +1,167 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2019-2021 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::convert::TryFrom; + +use anyhow::{Context, Result}; + +use kata_types::{ + annotations::Annotation, config::TomlConfig, container::ContainerType, + cpu::LinuxContainerCpuResources, k8s::container_type, +}; + +// static resource that StaticResourceManager needs, this is the spec for the +// sandbox/container's workload +#[derive(Clone, Copy, Debug)] +struct StaticResource { + vcpu: u32, + mem_mb: u32, +} + +// generate static resource(vcpu and memory in MiB) from spec's information +// used for static resource management +impl TryFrom<&oci::Spec> for StaticResource { + type Error = anyhow::Error; + fn try_from(spec: &oci::Spec) -> Result { + let mut vcpu: u32 = 0; + let mut mem_mb: u32 = 0; + match container_type(spec) { + // podsandbox, from annotation + ContainerType::PodSandbox => { + let annotation = Annotation::new(spec.annotations.clone()); + let (period, quota, memory) = + get_sizing_info(annotation).context("failed to get sizing info")?; + let cpu = oci::LinuxCpu { + period: Some(period), + quota: Some(quota), + ..Default::default() + }; + // although it may not be actually a linux container, we are only using the calculation inside + // LinuxContainerCpuResources::try_from to generate our vcpu number + if let Ok(cpu_resource) = LinuxContainerCpuResources::try_from(&cpu) { + vcpu = get_nr_vcpu(&cpu_resource); + } + mem_mb = convert_memory_to_mb(memory); + } + // single container, from container spec + _ => { + if let Some(linux) = &spec.linux { + if let Some(resource) = &linux.resources { + if let Some(cpu) = &resource.cpu { + if let Ok(cpu_resource) = LinuxContainerCpuResources::try_from(cpu) { + vcpu = get_nr_vcpu(&cpu_resource); + } + } + if let Some(mem) = &resource.memory { + let memory = mem.limit.unwrap_or(0); + mem_mb = convert_memory_to_mb(memory); + } + } + } + } + } + info!( + sl!(), + "static resource mgmt result: vcpu={}, mem_mb={}", vcpu, mem_mb + ); + Ok(Self { vcpu, mem_mb }) + } +} + +// StaticResourceManager is responsible for static resource management +// +// static resource management sizing information is optionally provided, either by +// upper layer runtime (containerd / crio) or by the container spec itself (when it +// is a standalone single container such as the one started with *docker run*) +// +// the sizing information uses three values, cpu quota, cpu period and memory limit, +// and with above values it calculates the # vcpus and memory for the workload and +// add them to default value of the config +#[derive(Clone, Copy, Debug)] +pub struct StaticResourceManager { + resource: StaticResource, +} + +impl StaticResourceManager { + pub fn new(spec: &oci::Spec) -> Result { + Ok(Self { + resource: StaticResource::try_from(spec) + .context("failed to construct static resource")?, + }) + } + + pub fn setup_config(&self, config: &mut TomlConfig) -> Result<()> { + // update this data to the hypervisor config for later use by hypervisor + let hypervisor_name = &config.runtime.hypervisor_name; + let mut hv = config + .hypervisor + .get_mut(hypervisor_name) + .context("failed to get hypervisor config")?; + hv.cpu_info.default_vcpus += self.resource.vcpu as i32; + hv.memory_info.default_memory += self.resource.mem_mb; + Ok(()) + } +} + +fn get_nr_vcpu(resource: &LinuxContainerCpuResources) -> u32 { + if let Some(v) = resource.get_vcpus() { + v as u32 + } else { + 0 + } +} + +fn convert_memory_to_mb(memory_in_byte: i64) -> u32 { + if memory_in_byte < 0 { + 0 + } else { + (memory_in_byte / 1024 / 1024) as u32 + } +} + +// from the upper layer runtime's annotation (e.g. crio, k8s), get the *cpu quota, +// cpu period and memory limit* for a sandbox/container +fn get_sizing_info(annotation: Annotation) -> Result<(u64, i64, i64)> { + // since we are *adding* our result to the config, a value of 0 will cause no change + // and if the annotation is not assigned (but static resource management is), we will + // log a *warning* to fill that with zero value + let period = annotation.get_sandbox_cpu_quota(); + let quota = annotation.get_sandbox_cpu_period(); + let memory = annotation.get_sandbox_mem(); + Ok((period, quota, memory)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_static_resource_mgmt() { + // data below should result in 2200 mCPU(round up to 3 vcpus) and 512 MiB of memory + let period: u64 = 100000; + let memory: i64 = 1048576 * 512; // 512 MiB + let quota: i64 = 220000; + + let cpu = oci::LinuxCpu { + period: Some(period), + quota: Some(quota), + ..Default::default() + }; + if let Ok(cpu_resource) = LinuxContainerCpuResources::try_from(&cpu) { + if let Some(v) = cpu_resource.get_vcpus() { + assert_eq!(v, 3); + } + } + + let mem_mb = if memory < 0 { + 0 + } else { + (memory / 1024 / 1024) as u32 + }; + + assert_eq!(mem_mb, 512); + } +}