diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 49635b15d5..7e6cbf1f12 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -278,6 +278,11 @@ DEFSTATICRESOURCEMGMT_TEE = true DEFSTATICSANDBOXWORKLOADMEM ?= 2048 DEFSTATICSANDBOXWORKLOADVCPUS ?= 1 +# If set, the runtime will enforce that pods deployed in a sandbox +# explicitly setting memory limits using resources.limits.memory +# allow at least this amount of memory in MiB so that the sandbox can properly start. +DEFSANDBOXWORKLOADMEMMIN ?= 128 + DEFDISABLEIMAGENVDIMM ?= false DEFBINDMOUNTS := [] @@ -751,6 +756,7 @@ USER_VARS += DEFSTATICRESOURCEMGMT_FC USER_VARS += DEFSTATICRESOURCEMGMT_STRATOVIRT USER_VARS += DEFSTATICRESOURCEMGMT_TEE USER_VARS += DEFSTATICSANDBOXWORKLOADMEM +USER_VARS += DEFSANDBOXWORKLOADMEMMIN USER_VARS += DEFSTATICSANDBOXWORKLOADVCPUS USER_VARS += DEFBINDMOUNTS USER_VARS += DEFCREATECONTAINERTIMEOUT diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 44d0934225..6206f0a385 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -442,6 +442,11 @@ static_sandbox_default_workload_mem=@DEFSTATICSANDBOXWORKLOADMEM@ # default amount of vcpus available within the sandbox. static_sandbox_default_workload_vcpus=@DEFSTATICSANDBOXWORKLOADVCPUS@ +# The runtime will enforce that pods deployed in a sandbox +# explicitly setting memory limits using resources.limits.memory +# allow at least this amount of memory in MiB so that the sandbox can properly start. +sandbox_workload_mem_min=@DEFSANDBOXWORKLOADMEMMIN@ + # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 357915917b..cd4461e0cc 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -174,28 +174,29 @@ type hypervisor struct { } type runtime struct { - InterNetworkModel string `toml:"internetworking_model"` - JaegerEndpoint string `toml:"jaeger_endpoint"` - JaegerUser string `toml:"jaeger_user"` - JaegerPassword string `toml:"jaeger_password"` - VfioMode string `toml:"vfio_mode"` - GuestSeLinuxLabel string `toml:"guest_selinux_label"` - SandboxBindMounts []string `toml:"sandbox_bind_mounts"` - Experimental []string `toml:"experimental"` - Tracing bool `toml:"enable_tracing"` - DisableNewNetNs bool `toml:"disable_new_netns"` - DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` - EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` - Debug bool `toml:"enable_debug"` - SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` - StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` - StaticSandboxWorkloadDefaultMem uint32 `toml:"static_sandbox_default_workload_mem"` + InterNetworkModel string `toml:"internetworking_model"` + JaegerEndpoint string `toml:"jaeger_endpoint"` + JaegerUser string `toml:"jaeger_user"` + JaegerPassword string `toml:"jaeger_password"` + VfioMode string `toml:"vfio_mode"` + GuestSeLinuxLabel string `toml:"guest_selinux_label"` + SandboxBindMounts []string `toml:"sandbox_bind_mounts"` + Experimental []string `toml:"experimental"` + Tracing bool `toml:"enable_tracing"` + DisableNewNetNs bool `toml:"disable_new_netns"` + DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` + EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` + Debug bool `toml:"enable_debug"` + SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` + StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` + StaticSandboxWorkloadDefaultMem uint32 `toml:"static_sandbox_default_workload_mem"` StaticSandboxWorkloadDefaultVcpus float32 `toml:"static_sandbox_default_workload_vcpus"` - EnablePprof bool `toml:"enable_pprof"` - DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"` - CreateContainerTimeout uint64 `toml:"create_container_timeout"` - DanConf string `toml:"dan_conf"` - ForceGuestPull bool `toml:"experimental_force_guest_pull"` + SandboxWorkloadMemMin uint32 `toml:"sandbox_workload_mem_min"` + EnablePprof bool `toml:"enable_pprof"` + DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"` + CreateContainerTimeout uint64 `toml:"create_container_timeout"` + DanConf string `toml:"dan_conf"` + ForceGuestPull bool `toml:"experimental_force_guest_pull"` } type agent struct { @@ -1565,6 +1566,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt config.StaticSandboxWorkloadDefaultMem = tomlConf.Runtime.StaticSandboxWorkloadDefaultMem + config.SandboxWorkloadMemMin = tomlConf.Runtime.SandboxWorkloadMemMin config.StaticSandboxWorkloadDefaultVcpus = tomlConf.Runtime.StaticSandboxWorkloadDefaultVcpus config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index aad04053d2..4ff3231069 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -159,6 +159,9 @@ type RuntimeConfig struct { // vcpus to allocate for workloads within the sandbox when workload vcpus is unspecified StaticSandboxWorkloadDefaultVcpus float32 + // Minimum memory (in MiB) to enforce is allocated for workloads within the sandbox when workload memory is specified + SandboxWorkloadMemMin uint32 + // Determines if create a netns for hypervisor process DisableNewNetNs bool @@ -1202,6 +1205,10 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st } + if sandboxConfig.SandboxResources.WorkloadMemMB < runtime.SandboxWorkloadMemMin { + return vc.SandboxConfig{}, fmt.Errorf("pod memory limit too low: minimum %dMiB, got %dMiB", runtime.SandboxWorkloadMemMin, sandboxConfig.SandboxResources.WorkloadMemMB) + } + return sandboxConfig, nil } diff --git a/tools/osbuilder/node-builder/azure-linux/package_build.sh b/tools/osbuilder/node-builder/azure-linux/package_build.sh index fb93eec197..346ba5a9f0 100755 --- a/tools/osbuilder/node-builder/azure-linux/package_build.sh +++ b/tools/osbuilder/node-builder/azure-linux/package_build.sh @@ -29,9 +29,9 @@ runtime_make_flags="SKIP_GO_VERSION_CHECK=1 QEMUCMD= FCCMD= ACRNCMD= STRATOVIRTC # - for ConfPods we explicitly set the cloud-hypervisor path. The path is independent of the PREFIX variable # as we have a single CLH binary for both vanilla Kata and ConfPods if [ "${CONF_PODS}" == "no" ]; then - runtime_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION}" + runtime_make_flags+=" DEFSTATICRESOURCEMGMT_CLH=true KERNELPATH_CLH=${KERNEL_BINARY_LOCATION} DEFSANDBOXWORKLOADMEMMIN=128" else - runtime_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION}" + runtime_make_flags+=" CLHPATH=${CLOUD_HYPERVISOR_LOCATION} DEFSANDBOXWORKLOADMEMMIN=192" fi # On Mariner 3.0 we use cgroupsv2 with a single sandbox cgroup