From a34c74a2d4c802b8475bc9ac5c00ebb3c7d8090d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 24 Jun 2026 18:47:38 +0200 Subject: [PATCH] runtime-rs: size static sandboxes with overhead values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When static sandbox sizing is enabled, keep configured defaults when workloads do not specify CPU or memory limits. When limits are present, size the VM as requested resources plus overhead_vcpus/overhead_memory values derived from runtime-rs profile defaults. Limit-driven vCPU sizing is clamped to a minimum of one vCPU so a 0.0 result never yields an unbootable VM, and sandbox setup fails early with a clear, actionable error when the computed memory is 0 MiB (pointing at memory limits or non-zero default/overhead memory settings). This keeps static VM sizing predictable across runtime-rs profiles, including NVIDIA ones. Signed-off-by: Fabiano FidĂȘncio Assisted-by: Cursor --- .../kata-types/src/config/hypervisor/mod.rs | 57 +++++++ src/runtime-rs/Makefile | 28 +++ ...configuration-clh-azure-runtime-rs.toml.in | 17 ++ .../configuration-clh-runtime-rs.toml.in | 17 ++ .../config/configuration-dragonball.toml.in | 17 ++ ...iguration-qemu-coco-dev-runtime-rs.toml.in | 17 ++ ...uration-qemu-nvidia-gpu-runtime-rs.toml.in | 17 ++ ...ion-qemu-nvidia-gpu-snp-runtime-rs.toml.in | 17 ++ ...ion-qemu-nvidia-gpu-tdx-runtime-rs.toml.in | 17 ++ .../configuration-qemu-runtime-rs.toml.in | 17 ++ .../configuration-qemu-se-runtime-rs.toml.in | 17 ++ .../configuration-qemu-snp-runtime-rs.toml.in | 17 ++ .../configuration-qemu-tdx-runtime-rs.toml.in | 17 ++ .../resource/src/cpu_mem/initial_size.rs | 161 ++++++++++++++---- .../pod-guest-pull-in-trusted-storage.yaml.in | 1 + 15 files changed, 401 insertions(+), 33 deletions(-) diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs index f40bed3b1f..eaab7eb82f 100644 --- a/src/libs/kata-types/src/config/hypervisor/mod.rs +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -641,6 +641,13 @@ pub struct CpuInfo { /// - `> number of physical cores`: Set to actual number of physical cores #[serde(default)] pub default_vcpus: f32, + /// vCPU overhead to be added when sandbox/container CPU limits are provided. + /// + /// This value is used by runtime-rs static sandbox sizing as: + /// - if no CPU limits are provided: use `default_vcpus` + /// - if CPU limits are provided: use `overhead_vcpus + workload_vcpus` + #[serde(default)] + pub overhead_vcpus: f32, /// Default maximum number of vCPUs per SB/VM: /// - Unspecified or `0`: Set to actual number of physical cores or @@ -973,6 +980,14 @@ pub struct MemoryInfo { /// Default memory size in MiB for SB/VM. #[serde(default)] pub default_memory: u32, + /// Memory overhead in MiB to be added when sandbox/container memory + /// limits are provided. + /// + /// This value is used by runtime-rs static sandbox sizing as: + /// - if no memory limits are provided: use `default_memory` + /// - if memory limits are provided: use `overhead_memory + workload_memory` + #[serde(default)] + pub overhead_memory: u32, /// Default maximum memory in MiB per SB/VM: /// - Unspecified or `0`: Set to actual physical RAM @@ -1974,11 +1989,13 @@ mod tests { input: &mut CpuInfo { cpu_features: "".to_string(), default_vcpus: 0.0, + overhead_vcpus: 0.0, default_maxvcpus: 0, }, output: CpuInfo { cpu_features: "".to_string(), default_vcpus, + overhead_vcpus: 0.0, default_maxvcpus: node_cpus as u32, }, }, @@ -1987,11 +2004,13 @@ mod tests { input: &mut CpuInfo { cpu_features: "a,b,c".to_string(), default_vcpus: 9999999.0, + overhead_vcpus: 0.0, default_maxvcpus: 9999999, }, output: CpuInfo { cpu_features: "a,b,c".to_string(), default_vcpus: node_cpus, + overhead_vcpus: 0.0, default_maxvcpus: node_cpus as u32, }, }, @@ -2000,14 +2019,31 @@ mod tests { input: &mut CpuInfo { cpu_features: "a, b ,c".to_string(), default_vcpus: -1.0, + overhead_vcpus: 0.0, default_maxvcpus: 1, }, output: CpuInfo { cpu_features: "a,b,c".to_string(), default_vcpus: 1.0, + overhead_vcpus: 0.0, default_maxvcpus: 1, }, }, + TestData { + desc: "overhead_vcpus explicitly set keeps value", + input: &mut CpuInfo { + cpu_features: "x, y".to_string(), + default_vcpus: 0.0, + overhead_vcpus: 0.5, + default_maxvcpus: 2, + }, + output: CpuInfo { + cpu_features: "x,y".to_string(), + default_vcpus, + overhead_vcpus: 0.5, + default_maxvcpus: 2, + }, + }, ]; for tc in tests.iter_mut() { @@ -2029,9 +2065,30 @@ mod tests { "test[{}] default_maxvcpus", tc.desc ); + assert_eq!( + tc.input.overhead_vcpus, tc.output.overhead_vcpus, + "test[{}] overhead_vcpus", + tc.desc + ); } } + #[test] + fn test_memory_info_adjust_config_keeps_explicit_overhead_memory() { + let mut mem = MemoryInfo { + default_memory: 1024, + overhead_memory: 512, + default_maxmemory: 4096, + ..Default::default() + }; + + mem.adjust_config().unwrap(); + + assert_eq!(mem.overhead_memory, 512); + assert_eq!(mem.default_memory, 1024); + assert_eq!(mem.default_maxmemory, 4096); + } + #[cfg(all(target_arch = "powerpc64", target_endian = "little"))] use rstest::rstest; diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index edcdcf2ee3..aa79ae33a2 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -161,6 +161,22 @@ DEFVCPUS := 1 DEFMAXVCPUS := 0 ##VAR DEFMEMSZ= Default memory size in MiB DEFMEMSZ := 2048 +##VAR DEFOVERHEADVCPUS_QEMU= vCPU overhead for qemu runtimes +DEFOVERHEADVCPUS_QEMU := 0.2 +##VAR DEFOVERHEADMEMSZ_QEMU= Memory overhead (MiB) for qemu runtimes +DEFOVERHEADMEMSZ_QEMU := 32 +##VAR DEFOVERHEADVCPUS_CLH= vCPU overhead for clh runtimes +DEFOVERHEADVCPUS_CLH := 0.2 +##VAR DEFOVERHEADMEMSZ_CLH= Memory overhead (MiB) for clh runtimes +DEFOVERHEADMEMSZ_CLH := 32 +##VAR DEFOVERHEADVCPUS_DB= vCPU overhead for dragonball runtimes +DEFOVERHEADVCPUS_DB := 0.2 +##VAR DEFOVERHEADMEMSZ_DB= Memory overhead (MiB) for dragonball runtimes +DEFOVERHEADMEMSZ_DB := 32 +##VAR DEFOVERHEADVCPUS_TEE= vCPU overhead for TEE runtimes +DEFOVERHEADVCPUS_TEE := 0.4 +##VAR DEFOVERHEADMEMSZ_TEE= Memory overhead (MiB) for SNP/TDX runtimes +DEFOVERHEADMEMSZ_TEE := 128 ##VAR DEFMEMSLOTS= Default memory slots # Cases to consider : # - nvdimm rootfs image @@ -452,6 +468,8 @@ endif KERNELVERITYPARAMS_NV ?= DEFAULTVCPUS_NV := 1 DEFAULTMEMORY_NV := 8192 + DEFOVERHEADVCPUS_NV := 0.5 + DEFOVERHEADMEMSZ_NV := 512 DEFAULTTIMEOUT_NV := 1200 DEFAULTLAUNCHPROCESSTIMEOUT_NV := 15 DEFAULTPCIEROOTPORT_NV := 8 @@ -672,6 +690,14 @@ USER_VARS += SHAREDIR USER_VARS += SYSCONFDIR USER_VARS += DEFVCPUS USER_VARS += DEFVCPUS_QEMU +USER_VARS += DEFOVERHEADVCPUS_QEMU +USER_VARS += DEFOVERHEADMEMSZ_QEMU +USER_VARS += DEFOVERHEADVCPUS_CLH +USER_VARS += DEFOVERHEADMEMSZ_CLH +USER_VARS += DEFOVERHEADVCPUS_TEE +USER_VARS += DEFOVERHEADVCPUS_DB +USER_VARS += DEFOVERHEADMEMSZ_DB +USER_VARS += DEFOVERHEADMEMSZ_TEE USER_VARS += DEFMAXVCPUS USER_VARS += DEFMAXVCPUS_DB USER_VARS += DEFMAXVCPUS_QEMU @@ -760,6 +786,8 @@ USER_VARS += KERNELPARAMS_CONFIDENTIAL_NV USER_VARS += KERNELVERITYPARAMS_NV USER_VARS += DEFAULTVCPUS_NV USER_VARS += DEFAULTMEMORY_NV +USER_VARS += DEFOVERHEADVCPUS_NV +USER_VARS += DEFOVERHEADMEMSZ_NV USER_VARS += DEFAULTTIMEOUT_NV USER_VARS += DEFAULTLAUNCHPROCESSTIMEOUT_NV USER_VARS += DEFAULTPCIEROOTPORT_NV diff --git a/src/runtime-rs/config/configuration-clh-azure-runtime-rs.toml.in b/src/runtime-rs/config/configuration-clh-azure-runtime-rs.toml.in index 308ad7bbd9..668f5e527d 100644 --- a/src/runtime-rs/config/configuration-clh-azure-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-clh-azure-runtime-rs.toml.in @@ -65,6 +65,15 @@ kernel_params = "@KERNELPARAMS@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = @DEFVCPUS@ +# Guest-side vCPU overhead budget (fractional) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_vcpus = requested_vcpus + overhead_vcpus +# (rounded up at boot). If a workload limit is set on another dimension (for example +# memory) but CPU is missing, requested_vcpus is treated as 0 and vm_vcpus equals +# overhead_vcpus (minimum 1 at boot). When no workload limits are present, +# default_vcpus is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_vcpus = @DEFOVERHEADVCPUS_CLH@ + # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number # of vCPUs supported by KVM if that number is exceeded @@ -85,6 +94,14 @@ default_maxvcpus = @DEFMAXVCPUS@ # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFMEMSZ@ +# Guest-side memory overhead budget (MiB) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_memory = requested_memory + overhead_memory. +# If a workload limit is set on another dimension (for example CPU) but memory is +# missing, requested_memory is treated as 0, so vm_memory equals overhead_memory. +# When no workload limits are present, default_memory is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_memory = @DEFOVERHEADMEMSZ_CLH@ + # Shared file system type: # - virtio-fs # - virtio-fs-nydus diff --git a/src/runtime-rs/config/configuration-clh-runtime-rs.toml.in b/src/runtime-rs/config/configuration-clh-runtime-rs.toml.in index c34e95b152..842f77fcd9 100644 --- a/src/runtime-rs/config/configuration-clh-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-clh-runtime-rs.toml.in @@ -65,6 +65,15 @@ kernel_params = "@KERNELPARAMS@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = @DEFVCPUS@ +# Guest-side vCPU overhead budget (fractional) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_vcpus = requested_vcpus + overhead_vcpus +# (rounded up at boot). If a workload limit is set on another dimension (for example +# memory) but CPU is missing, requested_vcpus is treated as 0 and vm_vcpus equals +# overhead_vcpus (minimum 1 at boot). When no workload limits are present, +# default_vcpus is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_vcpus = @DEFOVERHEADVCPUS_CLH@ + # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number # of vCPUs supported by KVM if that number is exceeded @@ -85,6 +94,14 @@ default_maxvcpus = @DEFMAXVCPUS@ # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFMEMSZ@ +# Guest-side memory overhead budget (MiB) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_memory = requested_memory + overhead_memory. +# If a workload limit is set on another dimension (for example CPU) but memory is +# missing, requested_memory is treated as 0, so vm_memory equals overhead_memory. +# When no workload limits are present, default_memory is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_memory = @DEFOVERHEADMEMSZ_CLH@ + # Shared file system type: # - virtio-fs # - virtio-fs-nydus diff --git a/src/runtime-rs/config/configuration-dragonball.toml.in b/src/runtime-rs/config/configuration-dragonball.toml.in index 44e5c903f2..bc13e8427c 100644 --- a/src/runtime-rs/config/configuration-dragonball.toml.in +++ b/src/runtime-rs/config/configuration-dragonball.toml.in @@ -68,6 +68,15 @@ firmware = "@FIRMWAREPATH@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = @DEFVCPUS@ +# Guest-side vCPU overhead budget (fractional) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_vcpus = requested_vcpus + overhead_vcpus +# (rounded up at boot). If a workload limit is set on another dimension (for example +# memory) but CPU is missing, requested_vcpus is treated as 0 and vm_vcpus equals +# overhead_vcpus (minimum 1 at boot). When no workload limits are present, +# default_vcpus is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_vcpus = @DEFOVERHEADVCPUS_DB@ + # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number @@ -112,6 +121,14 @@ reclaim_guest_freed_memory = false # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFMEMSZ@ +# Guest-side memory overhead budget (MiB) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_memory = requested_memory + overhead_memory. +# If a workload limit is set on another dimension (for example CPU) but memory is +# missing, requested_memory is treated as 0, so vm_memory equals overhead_memory. +# When no workload limits are present, default_memory is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_memory = @DEFOVERHEADMEMSZ_DB@ + # Default maximum memory in MiB per SB / VM # unspecified or == 0 --> will be set to the actual amount of physical RAM # > 0 <= amount of physical RAM --> will be set to the specified number diff --git a/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in index 437cd740c4..3002a8254d 100644 --- a/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in @@ -107,6 +107,15 @@ cpu_features = "@CPUFEATURES@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = @DEFVCPUS_QEMU@ +# Guest-side vCPU overhead budget (fractional) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_vcpus = requested_vcpus + overhead_vcpus +# (rounded up at boot). If a workload limit is set on another dimension (for example +# memory) but CPU is missing, requested_vcpus is treated as 0 and vm_vcpus equals +# overhead_vcpus (minimum 1 at boot). When no workload limits are present, +# default_vcpus is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_vcpus = @DEFOVERHEADVCPUS_TEE@ + # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number # of vCPUs supported by KVM if that number is exceeded @@ -149,6 +158,14 @@ reclaim_guest_freed_memory = false # Default memory size in MiB for SB/VM. # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFMEMSZ@ + +# Guest-side memory overhead budget (MiB) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_memory = requested_memory + overhead_memory. +# If a workload limit is set on another dimension (for example CPU) but memory is +# missing, requested_memory is treated as 0, so vm_memory equals overhead_memory. +# When no workload limits are present, default_memory is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_memory = @DEFOVERHEADMEMSZ_TEE@ # # Default memory slots per SB/VM. # If unspecified then it will be set @DEFMEMSLOTS@. diff --git a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-runtime-rs.toml.in index 3738301bcd..ff1785ad14 100644 --- a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-runtime-rs.toml.in @@ -99,6 +99,15 @@ cpu_features = "@CPUFEATURES@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = @DEFAULTVCPUS_NV@ +# Guest-side vCPU overhead budget (fractional) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_vcpus = requested_vcpus + overhead_vcpus +# (rounded up at boot). If a workload limit is set on another dimension (for example +# memory) but CPU is missing, requested_vcpus is treated as 0 and vm_vcpus equals +# overhead_vcpus (minimum 1 at boot). When no workload limits are present, +# default_vcpus is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_vcpus = @DEFOVERHEADVCPUS_NV@ + # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number # of vCPUs supported by KVM if that number is exceeded @@ -141,6 +150,14 @@ reclaim_guest_freed_memory = false # Default memory size in MiB for SB/VM. # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFAULTMEMORY_NV@ + +# Guest-side memory overhead budget (MiB) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_memory = requested_memory + overhead_memory. +# If a workload limit is set on another dimension (for example CPU) but memory is +# missing, requested_memory is treated as 0, so vm_memory equals overhead_memory. +# When no workload limits are present, default_memory is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_memory = @DEFOVERHEADMEMSZ_NV@ # # Default memory slots per SB/VM. # If unspecified then it will be set @DEFMEMSLOTS@. diff --git a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-snp-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-snp-runtime-rs.toml.in index 06f7a2e9f9..82814e2bf0 100644 --- a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-snp-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-snp-runtime-rs.toml.in @@ -140,6 +140,15 @@ cpu_features = "@CPUFEATURES@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = @DEFAULTVCPUS_NV@ +# Guest-side vCPU overhead budget (fractional) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_vcpus = requested_vcpus + overhead_vcpus +# (rounded up at boot). If a workload limit is set on another dimension (for example +# memory) but CPU is missing, requested_vcpus is treated as 0 and vm_vcpus equals +# overhead_vcpus (minimum 1 at boot). When no workload limits are present, +# default_vcpus is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_vcpus = @DEFOVERHEADVCPUS_NV@ + # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number # of vCPUs supported by KVM if that number is exceeded @@ -182,6 +191,14 @@ reclaim_guest_freed_memory = false # Default memory size in MiB for SB/VM. # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFAULTMEMORY_NV@ + +# Guest-side memory overhead budget (MiB) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_memory = requested_memory + overhead_memory. +# If a workload limit is set on another dimension (for example CPU) but memory is +# missing, requested_memory is treated as 0, so vm_memory equals overhead_memory. +# When no workload limits are present, default_memory is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_memory = @DEFOVERHEADMEMSZ_NV@ # # Default memory slots per SB/VM. # If unspecified then it will be set @DEFMEMSLOTS@. diff --git a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-tdx-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-tdx-runtime-rs.toml.in index 9ae7041cc5..1df79b54b9 100644 --- a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-tdx-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-tdx-runtime-rs.toml.in @@ -116,6 +116,15 @@ cpu_features = "@CPUFEATURES@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = @DEFAULTVCPUS_NV@ +# Guest-side vCPU overhead budget (fractional) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_vcpus = requested_vcpus + overhead_vcpus +# (rounded up at boot). If a workload limit is set on another dimension (for example +# memory) but CPU is missing, requested_vcpus is treated as 0 and vm_vcpus equals +# overhead_vcpus (minimum 1 at boot). When no workload limits are present, +# default_vcpus is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_vcpus = @DEFOVERHEADVCPUS_NV@ + # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number # of vCPUs supported by KVM if that number is exceeded @@ -158,6 +167,14 @@ reclaim_guest_freed_memory = false # Default memory size in MiB for SB/VM. # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFAULTMEMORY_NV@ + +# Guest-side memory overhead budget (MiB) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_memory = requested_memory + overhead_memory. +# If a workload limit is set on another dimension (for example CPU) but memory is +# missing, requested_memory is treated as 0, so vm_memory equals overhead_memory. +# When no workload limits are present, default_memory is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_memory = @DEFOVERHEADMEMSZ_NV@ # # Default memory slots per SB/VM. # If unspecified then it will be set @DEFMEMSLOTS@. diff --git a/src/runtime-rs/config/configuration-qemu-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-runtime-rs.toml.in index 47adda3409..897ae5166d 100644 --- a/src/runtime-rs/config/configuration-qemu-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-runtime-rs.toml.in @@ -86,6 +86,15 @@ cpu_features = "@CPUFEATURES@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = @DEFVCPUS_QEMU@ +# Guest-side vCPU overhead budget (fractional) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_vcpus = requested_vcpus + overhead_vcpus +# (rounded up at boot). If a workload limit is set on another dimension (for example +# memory) but CPU is missing, requested_vcpus is treated as 0 and vm_vcpus equals +# overhead_vcpus (minimum 1 at boot). When no workload limits are present, +# default_vcpus is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_vcpus = @DEFOVERHEADVCPUS_QEMU@ + # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number # of vCPUs supported by KVM if that number is exceeded @@ -128,6 +137,14 @@ reclaim_guest_freed_memory = false # Default memory size in MiB for SB/VM. # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFMEMSZ@ + +# Guest-side memory overhead budget (MiB) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_memory = requested_memory + overhead_memory. +# If a workload limit is set on another dimension (for example CPU) but memory is +# missing, requested_memory is treated as 0, so vm_memory equals overhead_memory. +# When no workload limits are present, default_memory is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_memory = @DEFOVERHEADMEMSZ_QEMU@ # # Default memory slots per SB/VM. # If unspecified then it will be set @DEFMEMSLOTS@. diff --git a/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in index 7b3d1649e4..21b8d27560 100644 --- a/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in @@ -95,6 +95,15 @@ cpu_features = "@CPUFEATURES@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = @DEFVCPUS_QEMU@ +# Guest-side vCPU overhead budget (fractional) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_vcpus = requested_vcpus + overhead_vcpus +# (rounded up at boot). If a workload limit is set on another dimension (for example +# memory) but CPU is missing, requested_vcpus is treated as 0 and vm_vcpus equals +# overhead_vcpus (minimum 1 at boot). When no workload limits are present, +# default_vcpus is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_vcpus = @DEFOVERHEADVCPUS_TEE@ + # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number # of vCPUs supported by KVM if that number is exceeded @@ -127,6 +136,14 @@ default_bridges = @DEFBRIDGES@ # Default memory size in MiB for SB/VM. # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFMEMSZ@ + +# Guest-side memory overhead budget (MiB) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_memory = requested_memory + overhead_memory. +# If a workload limit is set on another dimension (for example CPU) but memory is +# missing, requested_memory is treated as 0, so vm_memory equals overhead_memory. +# When no workload limits are present, default_memory is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_memory = @DEFOVERHEADMEMSZ_TEE@ # # Default memory slots per SB/VM. # If unspecified then it will be set @DEFMEMSLOTS@. diff --git a/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in index de39c6a424..2770269b00 100644 --- a/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in @@ -133,6 +133,15 @@ cpu_features = "@CPUFEATURES@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = @DEFVCPUS_QEMU@ +# Guest-side vCPU overhead budget (fractional) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_vcpus = requested_vcpus + overhead_vcpus +# (rounded up at boot). If a workload limit is set on another dimension (for example +# memory) but CPU is missing, requested_vcpus is treated as 0 and vm_vcpus equals +# overhead_vcpus (minimum 1 at boot). When no workload limits are present, +# default_vcpus is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_vcpus = @DEFOVERHEADVCPUS_TEE@ + # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number # of vCPUs supported by KVM if that number is exceeded @@ -166,6 +175,14 @@ default_bridges = @DEFBRIDGES@ # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFMEMSZ@ +# Guest-side memory overhead budget (MiB) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_memory = requested_memory + overhead_memory. +# If a workload limit is set on another dimension (for example CPU) but memory is +# missing, requested_memory is treated as 0, so vm_memory equals overhead_memory. +# When no workload limits are present, default_memory is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_memory = @DEFOVERHEADMEMSZ_TEE@ + # # Default memory slots per SB/VM. # If unspecified then it will be set @DEFMEMSLOTS@. diff --git a/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in index ccf5b4da37..d09e7583c4 100644 --- a/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in @@ -111,6 +111,15 @@ cpu_features = "@CPUFEATURES@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = 1 +# Guest-side vCPU overhead budget (fractional) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_vcpus = requested_vcpus + overhead_vcpus +# (rounded up at boot). If a workload limit is set on another dimension (for example +# memory) but CPU is missing, requested_vcpus is treated as 0 and vm_vcpus equals +# overhead_vcpus (minimum 1 at boot). When no workload limits are present, +# default_vcpus is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_vcpus = @DEFOVERHEADVCPUS_TEE@ + # Default maximum number of vCPUs per SB/VM: # unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number # of vCPUs supported by KVM if that number is exceeded @@ -143,6 +152,14 @@ default_bridges = @DEFBRIDGES@ # Default memory size in MiB for SB/VM. # If unspecified then it will be set @DEFMEMSZ@ MiB. default_memory = @DEFMEMSZ@ + +# Guest-side memory overhead budget (MiB) used with static_sandbox_resource_mgmt. +# When workload limits are present, vm_memory = requested_memory + overhead_memory. +# If a workload limit is set on another dimension (for example CPU) but memory is +# missing, requested_memory is treated as 0, so vm_memory equals overhead_memory. +# When no workload limits are present, default_memory is used instead. +# See docs/how-to/how-to-size-sandbox-overhead-runtime-rs.md +overhead_memory = @DEFOVERHEADMEMSZ_TEE@ # # Default memory slots per SB/VM. # If unspecified then it will be set @DEFMEMSLOTS@. diff --git a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs index b886d9faae..934207d37c 100644 --- a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs +++ b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs @@ -6,7 +6,7 @@ use std::{collections::HashMap, convert::TryFrom}; -use anyhow::{Context, Result}; +use anyhow::{ensure, Context, Result}; use kata_types::{ annotations::Annotation, config::TomlConfig, container::ContainerType, cpu::LinuxContainerCpuResources, k8s::container_type, @@ -159,28 +159,36 @@ impl InitialSizeManager { .get_mut(hypervisor_name) .context("failed to get hypervisor config")?; - if self.resource.vcpu > 0.0 { - info!(sl!(), "resource with vcpu {}", self.resource.vcpu); - if config.runtime.static_sandbox_resource_mgmt { - hv.cpu_info.default_vcpus += self.resource.vcpu; - } - } - - if config.runtime.static_sandbox_resource_mgmt { - let new_vcpus_ceil = hv.cpu_info.default_vcpus.ceil() as u32; - hv.cpu_info.default_maxvcpus = new_vcpus_ceil; - } - self.resource.orig_toml_default_mem = hv.memory_info.default_memory; - if self.resource.mem_mb > 0 { - info!(sl!(), "resource with memory {}", self.resource.mem_mb); - if config.runtime.static_sandbox_resource_mgmt { - hv.memory_info.default_memory += self.resource.mem_mb; - if hv.memory_info.default_maxmemory < hv.memory_info.default_memory { - hv.memory_info.default_maxmemory = hv.memory_info.default_memory; - } - } + + // Non-static mode keeps configured defaults unchanged. + if !config.runtime.static_sandbox_resource_mgmt { + validate_non_zero_sandbox_memory(hypervisor_name, hv.memory_info.default_memory)?; + return Ok(()); } + + if self.resource.vcpu > 0.0 || self.resource.mem_mb > 0 { + if self.resource.vcpu > 0.0 { + info!(sl!(), "resource with vcpu {}", self.resource.vcpu); + } + if self.resource.mem_mb > 0 { + info!(sl!(), "resource with memory {}", self.resource.mem_mb); + } + + hv.cpu_info.default_vcpus = + (hv.cpu_info.overhead_vcpus + self.resource.vcpu).max(1.0); + + hv.memory_info.default_memory = + hv.memory_info.overhead_memory + self.resource.mem_mb; + hv.memory_info.default_maxmemory = hv + .memory_info + .default_maxmemory + .max(hv.memory_info.default_memory); + } + + hv.cpu_info.default_maxvcpus = hv.cpu_info.default_vcpus.ceil() as u32; + + validate_non_zero_sandbox_memory(hypervisor_name, hv.memory_info.default_memory)?; Ok(()) } @@ -189,6 +197,15 @@ impl InitialSizeManager { } } +fn validate_non_zero_sandbox_memory(hypervisor_name: &str, memory_mib: u32) -> Result<()> { + ensure!( + memory_mib > 0, + "computed sandbox memory is 0 MiB for hypervisor '{}'; set a non-zero memory limit or configure non-zero default_memory/overhead_memory", + hypervisor_name + ); + Ok(()) +} + fn get_nr_vcpu(resource: &LinuxContainerCpuResources) -> f32 { if let Some(v) = resource.get_vcpus() { v as f32 @@ -227,6 +244,7 @@ mod tests { use super::*; use kata_types::annotations::cri_containerd; use oci_spec::runtime::{LinuxBuilder, LinuxMemory, LinuxMemoryBuilder, LinuxResourcesBuilder}; + use rstest::rstest; use std::collections::HashMap; #[derive(Clone)] struct InputData { @@ -398,8 +416,10 @@ mod tests { fn make_config( default_vcpus: f32, + overhead_vcpus: f32, default_maxvcpus: u32, default_memory: u32, + overhead_memory: u32, default_maxmemory: u32, static_sandbox_resource_mgmt: bool, ) -> TomlConfig { @@ -411,8 +431,10 @@ mod tests { .insert("qemu".to_owned(), Hypervisor::default()); config.hypervisor.entry("qemu".to_owned()).and_modify(|hv| { hv.cpu_info.default_vcpus = default_vcpus; + hv.cpu_info.overhead_vcpus = overhead_vcpus; hv.cpu_info.default_maxvcpus = default_maxvcpus; hv.memory_info.default_memory = default_memory; + hv.memory_info.overhead_memory = overhead_memory; hv.memory_info.default_maxmemory = default_maxmemory; }); config.runtime.hypervisor_name = "qemu".to_owned(); @@ -422,7 +444,7 @@ mod tests { #[test] fn test_setup_config_static_applies_vcpu_and_memory() { - let mut config = make_config(1.0, 4, 256, 4096, true); + let mut config = make_config(1.0, 0.5, 4, 256, 128, 4096, true); let mut mgr = InitialSizeManager { resource: InitialSize { vcpu: 1.2, @@ -433,13 +455,13 @@ mod tests { mgr.setup_config(&mut config).unwrap(); let hv = config.hypervisor.get("qemu").unwrap(); - assert_eq!(hv.cpu_info.default_vcpus, 2.2); - assert_eq!(hv.memory_info.default_memory, 768); + assert_eq!(hv.cpu_info.default_vcpus, 1.7); + assert_eq!(hv.memory_info.default_memory, 640); } #[test] fn test_setup_config_non_static_does_not_apply() { - let mut config = make_config(1.0, 4, 256, 4096, false); + let mut config = make_config(1.0, 0.5, 4, 256, 128, 4096, false); let mut mgr = InitialSizeManager { resource: InitialSize { vcpu: 1.2, @@ -456,7 +478,7 @@ mod tests { #[test] fn test_setup_config_clamps_maxvcpus() { - let mut config = make_config(1.0, 2, 256, 4096, true); + let mut config = make_config(1.0, 1.0, 2, 256, 128, 4096, true); let mut mgr = InitialSizeManager { resource: InitialSize { vcpu: 2.5, @@ -473,7 +495,7 @@ mod tests { #[test] fn test_setup_config_static_reduces_maxvcpus_to_static_total() { - let mut config = make_config(1.0, 8, 256, 4096, true); + let mut config = make_config(1.0, 0.5, 8, 256, 128, 4096, true); let mut mgr = InitialSizeManager { resource: InitialSize { vcpu: 1.2, @@ -484,13 +506,13 @@ mod tests { mgr.setup_config(&mut config).unwrap(); let hv = config.hypervisor.get("qemu").unwrap(); - assert_eq!(hv.cpu_info.default_vcpus, 2.2); - assert_eq!(hv.cpu_info.default_maxvcpus, 3); + assert_eq!(hv.cpu_info.default_vcpus, 1.7); + assert_eq!(hv.cpu_info.default_maxvcpus, 2); } #[test] fn test_setup_config_clamps_maxmemory() { - let mut config = make_config(1.0, 4, 256, 300, true); + let mut config = make_config(1.0, 0.5, 4, 256, 128, 300, true); let mut mgr = InitialSizeManager { resource: InitialSize { vcpu: 0.0, @@ -501,13 +523,13 @@ mod tests { mgr.setup_config(&mut config).unwrap(); let hv = config.hypervisor.get("qemu").unwrap(); - assert_eq!(hv.memory_info.default_memory, 768); - assert_eq!(hv.memory_info.default_maxmemory, 768); + assert_eq!(hv.memory_info.default_memory, 640); + assert_eq!(hv.memory_info.default_maxmemory, 640); } #[test] fn test_setup_config_preserves_orig_toml_default_mem() { - let mut config = make_config(1.0, 4, 256, 4096, true); + let mut config = make_config(1.0, 0.5, 4, 256, 128, 4096, true); let mut mgr = InitialSizeManager { resource: InitialSize { vcpu: 0.0, @@ -551,4 +573,77 @@ mod tests { assert!((mgr.resource.vcpu - 1.2).abs() < VCPU_TOLERANCE); assert_eq!(mgr.resource.mem_mb, 256); } + + #[test] + fn test_setup_config_static_without_limits_uses_toml_defaults() { + let mut config = make_config(2.0, 0.5, 8, 512, 128, 4096, true); + let mut mgr = InitialSizeManager { + resource: InitialSize { + vcpu: 0.0, + mem_mb: 0, + orig_toml_default_mem: 0, + }, + }; + + mgr.setup_config(&mut config).unwrap(); + let hv = config.hypervisor.get("qemu").unwrap(); + assert_eq!(hv.cpu_info.default_vcpus, 2.0); + assert_eq!(hv.memory_info.default_memory, 512); + } + + #[test] + fn test_setup_config_static_errors_on_zero_memory() { + let mut config = make_config(1.0, 0.5, 8, 1024, 0, 4096, true); + let mut mgr = InitialSizeManager { + resource: InitialSize { + vcpu: 1.0, + mem_mb: 0, + orig_toml_default_mem: 0, + }, + }; + + let err = mgr.setup_config(&mut config).unwrap_err().to_string(); + assert!(err.contains("computed sandbox memory is 0 MiB")); + assert!(err.contains("default_memory/overhead_memory")); + } + + #[rstest] + #[case::both_limits(3.0, 0.75, 1024, 256, 1.25, 1024, 2.0, 1280)] + #[case::cpu_only_limit(3.0, 0.5, 1024, 128, 1.5, 0, 2.0, 128)] + #[case::memory_only_limit(3.0, 0.5, 1024, 128, 0.0, 512, 1.0, 640)] + #[case::both_limits_zero_overhead(3.0, 0.0, 1024, 0, 1.25, 1024, 1.25, 1024)] + #[case::memory_only_zero_overhead(3.0, 0.0, 1024, 0, 0.0, 512, 1.0, 512)] + fn test_setup_config_static_requested_vs_defaults( + #[case] default_vcpus: f32, + #[case] overhead_vcpus: f32, + #[case] default_memory: u32, + #[case] overhead_memory: u32, + #[case] requested_vcpus: f32, + #[case] requested_mem_mb: u32, + #[case] expected_default_vcpus: f32, + #[case] expected_default_memory: u32, + ) { + let mut config = make_config( + default_vcpus, + overhead_vcpus, + 8, + default_memory, + overhead_memory, + 4096, + true, + ); + let mut mgr = InitialSizeManager { + resource: InitialSize { + vcpu: requested_vcpus, + mem_mb: requested_mem_mb, + orig_toml_default_mem: 0, + }, + }; + + mgr.setup_config(&mut config).unwrap(); + let hv = config.hypervisor.get("qemu").unwrap(); + + assert_eq!(hv.cpu_info.default_vcpus, expected_default_vcpus); + assert_eq!(hv.memory_info.default_memory, expected_default_memory); + } } diff --git a/tests/integration/kubernetes/runtimeclass_workloads/pod-guest-pull-in-trusted-storage.yaml.in b/tests/integration/kubernetes/runtimeclass_workloads/pod-guest-pull-in-trusted-storage.yaml.in index 8bc921b17f..a1fcb53393 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/pod-guest-pull-in-trusted-storage.yaml.in +++ b/tests/integration/kubernetes/runtimeclass_workloads/pod-guest-pull-in-trusted-storage.yaml.in @@ -31,6 +31,7 @@ spec: resources: limits: cpu: "2" + memory: "2Gi" volumeDevices: - devicePath: /dev/trusted_store name: trusted-storage