runtime: Enforce that OCI memory limit exceeds 128MB baseline

For our Kata UVM, we know we need at least 128MB of memory to prevent instability in the guest.

Enforce this constraint with a descriptive error to prevent users from destabilizing the UVM with faulty k8s configurations.

Signed-off-by: Cameron Baird <cameronbaird@microsoft.com>
This commit is contained in:
Cameron Baird
2025-07-29 21:44:59 +00:00
committed by Cameron E Baird
parent f58fd1a726
commit 317ebb81de
5 changed files with 43 additions and 23 deletions

View File

@@ -278,6 +278,11 @@ DEFSTATICRESOURCEMGMT_TEE = true
DEFSTATICSANDBOXWORKLOADMEM ?= 2048
DEFSTATICSANDBOXWORKLOADVCPUS ?= 1
# If set, the runtime will enforce that pods deployed in a sandbox
# explicitly setting memory limits using resources.limits.memory
# allow at least this amount of memory in MiB so that the sandbox can properly start.
DEFSANDBOXWORKLOADMEMMIN ?= 128
DEFDISABLEIMAGENVDIMM ?= false
DEFBINDMOUNTS := []
@@ -751,6 +756,7 @@ USER_VARS += DEFSTATICRESOURCEMGMT_FC
USER_VARS += DEFSTATICRESOURCEMGMT_STRATOVIRT
USER_VARS += DEFSTATICRESOURCEMGMT_TEE
USER_VARS += DEFSTATICSANDBOXWORKLOADMEM
USER_VARS += DEFSANDBOXWORKLOADMEMMIN
USER_VARS += DEFSTATICSANDBOXWORKLOADVCPUS
USER_VARS += DEFBINDMOUNTS
USER_VARS += DEFCREATECONTAINERTIMEOUT

View File

@@ -442,6 +442,11 @@ static_sandbox_default_workload_mem=@DEFSTATICSANDBOXWORKLOADMEM@
# default amount of vcpus available within the sandbox.
static_sandbox_default_workload_vcpus=@DEFSTATICSANDBOXWORKLOADVCPUS@
# The runtime will enforce that pods deployed in a sandbox
# explicitly setting memory limits using resources.limits.memory
# allow at least this amount of memory in MiB so that the sandbox can properly start.
sandbox_workload_mem_min=@DEFSANDBOXWORKLOADMEMMIN@
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`

View File

@@ -174,28 +174,29 @@ type hypervisor struct {
}
type runtime struct {
InterNetworkModel string `toml:"internetworking_model"`
JaegerEndpoint string `toml:"jaeger_endpoint"`
JaegerUser string `toml:"jaeger_user"`
JaegerPassword string `toml:"jaeger_password"`
VfioMode string `toml:"vfio_mode"`
GuestSeLinuxLabel string `toml:"guest_selinux_label"`
SandboxBindMounts []string `toml:"sandbox_bind_mounts"`
Experimental []string `toml:"experimental"`
Tracing bool `toml:"enable_tracing"`
DisableNewNetNs bool `toml:"disable_new_netns"`
DisableGuestSeccomp bool `toml:"disable_guest_seccomp"`
EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"`
Debug bool `toml:"enable_debug"`
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"`
StaticSandboxWorkloadDefaultMem uint32 `toml:"static_sandbox_default_workload_mem"`
InterNetworkModel string `toml:"internetworking_model"`
JaegerEndpoint string `toml:"jaeger_endpoint"`
JaegerUser string `toml:"jaeger_user"`
JaegerPassword string `toml:"jaeger_password"`
VfioMode string `toml:"vfio_mode"`
GuestSeLinuxLabel string `toml:"guest_selinux_label"`
SandboxBindMounts []string `toml:"sandbox_bind_mounts"`
Experimental []string `toml:"experimental"`
Tracing bool `toml:"enable_tracing"`
DisableNewNetNs bool `toml:"disable_new_netns"`
DisableGuestSeccomp bool `toml:"disable_guest_seccomp"`
EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"`
Debug bool `toml:"enable_debug"`
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"`
StaticSandboxWorkloadDefaultMem uint32 `toml:"static_sandbox_default_workload_mem"`
StaticSandboxWorkloadDefaultVcpus float32 `toml:"static_sandbox_default_workload_vcpus"`
EnablePprof bool `toml:"enable_pprof"`
DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"`
CreateContainerTimeout uint64 `toml:"create_container_timeout"`
DanConf string `toml:"dan_conf"`
ForceGuestPull bool `toml:"experimental_force_guest_pull"`
SandboxWorkloadMemMin uint32 `toml:"sandbox_workload_mem_min"`
EnablePprof bool `toml:"enable_pprof"`
DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"`
CreateContainerTimeout uint64 `toml:"create_container_timeout"`
DanConf string `toml:"dan_conf"`
ForceGuestPull bool `toml:"experimental_force_guest_pull"`
}
type agent struct {
@@ -1565,6 +1566,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel
config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt
config.StaticSandboxWorkloadDefaultMem = tomlConf.Runtime.StaticSandboxWorkloadDefaultMem
config.SandboxWorkloadMemMin = tomlConf.Runtime.SandboxWorkloadMemMin
config.StaticSandboxWorkloadDefaultVcpus = tomlConf.Runtime.StaticSandboxWorkloadDefaultVcpus
config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly
config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs

View File

@@ -159,6 +159,9 @@ type RuntimeConfig struct {
// vcpus to allocate for workloads within the sandbox when workload vcpus is unspecified
StaticSandboxWorkloadDefaultVcpus float32
// Minimum memory (in MiB) to enforce is allocated for workloads within the sandbox when workload memory is specified
SandboxWorkloadMemMin uint32
// Determines if create a netns for hypervisor process
DisableNewNetNs bool
@@ -1202,6 +1205,10 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st
}
if sandboxConfig.SandboxResources.WorkloadMemMB < runtime.SandboxWorkloadMemMin {
return vc.SandboxConfig{}, fmt.Errorf("pod memory limit too low: minimum %dMiB, got %dMiB", runtime.SandboxWorkloadMemMin, sandboxConfig.SandboxResources.WorkloadMemMB)
}
return sandboxConfig, nil
}

View File

@@ -29,9 +29,9 @@ runtime_make_flags="SKIP_GO_VERSION_CHECK=1 QEMUCMD= FCCMD= ACRNCMD= STRATOVIRTC
# - for ConfPods we explicitly set the cloud-hypervisor path. The path is independent of the PREFIX variable
# as we have a single CLH binary for both vanilla Kata and ConfPods
if [ "${CONF_PODS}" == "no" ]; then
runtime_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION}"
runtime_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION} DEFSANDBOXWORKLOADMEMMIN=128"
else
runtime_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION}"
runtime_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION} DEFSANDBOXWORKLOADMEMMIN=192"
fi
# On Mariner 3.0 we use cgroupsv2 with a single sandbox cgroup