diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go b/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go index 7cb8d79b3e9..f14b6d1a6fc 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go @@ -107,18 +107,8 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod, lcr.HugepageLimits = GetHugepageLimitsFromResources(container.Resources) - if swapConfigurationHelper := newSwapConfigurationHelper(*m.machineInfo); utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) { - // NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec: - // https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory - switch m.memorySwapBehavior { - case kubelettypes.LimitedSwap: - swapConfigurationHelper.ConfigureLimitedSwap(lcr, pod, container) - default: - swapConfigurationHelper.ConfigureUnlimitedSwap(lcr) - } - } else { - swapConfigurationHelper.ConfigureNoSwap(lcr) - } + // Configure swap for the container + m.configureContainerSwapResources(lcr, pod, container) // Set memory.min and memory.high to enforce MemoryQoS if enforceMemoryQoS { @@ -170,6 +160,30 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod, return lcr } +// configureContainerSwapResources configures the swap resources for a specified (linux) container. +// Swap is only configured if a swap cgroup controller is available and the NodeSwap feature gate is enabled. +func (m *kubeGenericRuntimeManager) configureContainerSwapResources(lcr *runtimeapi.LinuxContainerResources, pod *v1.Pod, container *v1.Container) { + if !swapControllerAvailable() { + klog.InfoS("No swap cgroup controller present", "swapBehavior", m.memorySwapBehavior, "pod", klog.KObj(pod), "containerName", container.Name) + return + } + swapConfigurationHelper := newSwapConfigurationHelper(*m.machineInfo) + + if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) { + swapConfigurationHelper.ConfigureNoSwap(lcr) + return + } + + // NOTE(ehashman): Behavior is defined in the opencontainers runtime spec: + // https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory + switch m.memorySwapBehavior { + case kubelettypes.LimitedSwap: + swapConfigurationHelper.ConfigureLimitedSwap(lcr, pod, container) + default: + swapConfigurationHelper.ConfigureUnlimitedSwap(lcr) + } +} + // generateContainerResources generates platform specific (linux) container resources config for runtime func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources { enforceMemoryQoS := false @@ -315,7 +329,10 @@ var ( swapControllerAvailabilityOnce sync.Once ) -func swapControllerAvailable() bool { +// Note: this function variable is being added here so it would be possible to mock +// the swap controller availability for unit tests by assigning a new function to it. Without it, +// the swap controller availability would solely depend on the environment running the test. +var swapControllerAvailable = func() bool { // See https://github.com/containerd/containerd/pull/7838/ swapControllerAvailabilityOnce.Do(func() { const warn = "Failed to detect the availability of the swap controller, assuming not available" diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go b/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go index b50eee1d4ee..59be8cead50 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go @@ -22,14 +22,15 @@ package kuberuntime import ( "context" "fmt" - "k8s.io/kubernetes/pkg/kubelet/cm" - "k8s.io/kubernetes/pkg/kubelet/types" "math" "os" "reflect" "strconv" "testing" + "k8s.io/kubernetes/pkg/kubelet/cm" + "k8s.io/kubernetes/pkg/kubelet/types" + "github.com/google/go-cmp/cmp" libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/stretchr/testify/assert" @@ -845,12 +846,12 @@ func TestGenerateLinuxContainerResources(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { + defer setSwapControllerAvailableDuringTest(false)() if tc.scalingFg { defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() } setCgroupVersionDuringTest(cgroupV1) - tc.expected.MemorySwapLimitInBytes = tc.expected.MemoryLimitInBytes pod.Spec.Containers[0].Resources = v1.ResourceRequirements{Limits: tc.limits, Requests: tc.requests} if len(tc.cStatus) > 0 { @@ -891,6 +892,19 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { Status: v1.PodStatus{}, } + expectSwapDisabled := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) { + const msg = "container is expected to not have swap configured" + + for _, r := range resources { + switch cgroupVersion { + case cgroupV1: + assert.Equal(t, int64(0), r.MemorySwapLimitInBytes, msg) + case cgroupV2: + assert.NotContains(t, r.Unified, cm.Cgroup2MaxSwapFilename, msg) + } + } + } + expectNoSwap := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) { const msg = "container is expected to not have swap access" @@ -939,6 +953,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { name string cgroupVersion CgroupVersion qosClass v1.PodQOSClass + swapDisabledOnNode bool nodeSwapFeatureGateEnabled bool swapBehavior string addContainerWithoutRequests bool @@ -994,28 +1009,28 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { // With Guaranteed and Best-effort QoS { - name: "Best-effort Qos, cgroups v2, LimitedSwap", + name: "Best-effort QoS, cgroups v2, LimitedSwap", cgroupVersion: cgroupV2, qosClass: v1.PodQOSBurstable, nodeSwapFeatureGateEnabled: true, swapBehavior: types.LimitedSwap, }, { - name: "Best-effort Qos, cgroups v2, UnlimitedSwap", + name: "Best-effort QoS, cgroups v2, UnlimitedSwap", cgroupVersion: cgroupV2, qosClass: v1.PodQOSBurstable, nodeSwapFeatureGateEnabled: true, swapBehavior: types.UnlimitedSwap, }, { - name: "Guaranteed Qos, cgroups v2, LimitedSwap", + name: "Guaranteed QoS, cgroups v2, LimitedSwap", cgroupVersion: cgroupV2, qosClass: v1.PodQOSGuaranteed, nodeSwapFeatureGateEnabled: true, swapBehavior: types.LimitedSwap, }, { - name: "Guaranteed Qos, cgroups v2, UnlimitedSwap", + name: "Guaranteed QoS, cgroups v2, UnlimitedSwap", cgroupVersion: cgroupV2, qosClass: v1.PodQOSGuaranteed, nodeSwapFeatureGateEnabled: true, @@ -1024,7 +1039,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { // With a "guaranteed" container (when memory requests equal to limits) { - name: "Burstable Qos, cgroups v2, LimitedSwap, with a guaranteed container", + name: "Burstable QoS, cgroups v2, LimitedSwap, with a guaranteed container", cgroupVersion: cgroupV2, qosClass: v1.PodQOSBurstable, nodeSwapFeatureGateEnabled: true, @@ -1033,7 +1048,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { addGuaranteedContainer: true, }, { - name: "Burstable Qos, cgroups v2, UnlimitedSwap, with a guaranteed container", + name: "Burstable QoS, cgroups v2, UnlimitedSwap, with a guaranteed container", cgroupVersion: cgroupV2, qosClass: v1.PodQOSBurstable, nodeSwapFeatureGateEnabled: true, @@ -1044,7 +1059,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { // Swap is expected to be allocated { - name: "Burstable Qos, cgroups v2, LimitedSwap", + name: "Burstable QoS, cgroups v2, LimitedSwap", cgroupVersion: cgroupV2, qosClass: v1.PodQOSBurstable, nodeSwapFeatureGateEnabled: true, @@ -1053,7 +1068,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { addGuaranteedContainer: false, }, { - name: "Burstable Qos, cgroups v2, UnlimitedSwap", + name: "Burstable QoS, cgroups v2, UnlimitedSwap", cgroupVersion: cgroupV2, qosClass: v1.PodQOSBurstable, nodeSwapFeatureGateEnabled: true, @@ -1062,7 +1077,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { addGuaranteedContainer: false, }, { - name: "Burstable Qos, cgroups v2, LimitedSwap, with a container with no requests", + name: "Burstable QoS, cgroups v2, LimitedSwap, with a container with no requests", cgroupVersion: cgroupV2, qosClass: v1.PodQOSBurstable, nodeSwapFeatureGateEnabled: true, @@ -1071,7 +1086,158 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { addGuaranteedContainer: false, }, { - name: "Burstable Qos, cgroups v2, UnlimitedSwap, with a container with no requests", + name: "Burstable QoS, cgroups v2, UnlimitedSwap, with a container with no requests", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + addContainerWithoutRequests: true, + addGuaranteedContainer: false, + }, + // All the above examples with Swap disabled on node + { + name: "Swap disabled on node, cgroups v1, LimitedSwap, Burstable QoS", + swapDisabledOnNode: true, + cgroupVersion: cgroupV1, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + }, + { + name: "Swap disabled on node, cgroups v1, UnlimitedSwap, Burstable QoS", + swapDisabledOnNode: true, + cgroupVersion: cgroupV1, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + }, + { + name: "Swap disabled on node, cgroups v1, LimitedSwap, Best-effort QoS", + swapDisabledOnNode: true, + cgroupVersion: cgroupV1, + qosClass: v1.PodQOSBestEffort, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + }, + + // With feature gate turned off + { + name: "Swap disabled on node, NodeSwap feature gate turned off, cgroups v2, LimitedSwap", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: false, + swapBehavior: types.LimitedSwap, + }, + { + name: "Swap disabled on node, NodeSwap feature gate turned off, cgroups v2, UnlimitedSwap", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: false, + swapBehavior: types.UnlimitedSwap, + }, + + // With no swapBehavior, UnlimitedSwap should be the default + { + name: "Swap disabled on node, With no swapBehavior - UnlimitedSwap should be the default", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBestEffort, + nodeSwapFeatureGateEnabled: true, + swapBehavior: "", + }, + + // With Guaranteed and Best-effort QoS + { + name: "Swap disabled on node, Best-effort QoS, cgroups v2, LimitedSwap", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + }, + { + name: "Swap disabled on node, Best-effort QoS, cgroups v2, UnlimitedSwap", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + }, + { + name: "Swap disabled on node, Guaranteed QoS, cgroups v2, LimitedSwap", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSGuaranteed, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + }, + { + name: "Swap disabled on node, Guaranteed QoS, cgroups v2, UnlimitedSwap", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSGuaranteed, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + }, + + // With a "guaranteed" container (when memory requests equal to limits) + { + name: "Swap disabled on node, Burstable QoS, cgroups v2, LimitedSwap, with a guaranteed container", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + addContainerWithoutRequests: false, + addGuaranteedContainer: true, + }, + { + name: "Swap disabled on node, Burstable QoS, cgroups v2, UnlimitedSwap, with a guaranteed container", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + addContainerWithoutRequests: false, + addGuaranteedContainer: true, + }, + + // Swap is expected to be allocated + { + name: "Swap disabled on node, Burstable QoS, cgroups v2, LimitedSwap", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + addContainerWithoutRequests: false, + addGuaranteedContainer: false, + }, + { + name: "Swap disabled on node, Burstable QoS, cgroups v2, UnlimitedSwap", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + addContainerWithoutRequests: false, + addGuaranteedContainer: false, + }, + { + name: "Swap disabled on node, Burstable QoS, cgroups v2, LimitedSwap, with a container with no requests", + swapDisabledOnNode: true, + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + addContainerWithoutRequests: true, + addGuaranteedContainer: false, + }, + { + name: "Swap disabled on node, Burstable QoS, cgroups v2, UnlimitedSwap, with a container with no requests", + swapDisabledOnNode: true, cgroupVersion: cgroupV2, qosClass: v1.PodQOSBurstable, nodeSwapFeatureGateEnabled: true, @@ -1082,6 +1248,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { setCgroupVersionDuringTest(tc.cgroupVersion) + defer setSwapControllerAvailableDuringTest(!tc.swapDisabledOnNode)() defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tc.nodeSwapFeatureGateEnabled)() m.memorySwapBehavior = tc.swapBehavior @@ -1117,6 +1284,11 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { resourcesC1 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false) resourcesC2 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[1], false) + if tc.swapDisabledOnNode { + expectSwapDisabled(tc.cgroupVersion, resourcesC1, resourcesC2) + return + } + if !tc.nodeSwapFeatureGateEnabled || tc.cgroupVersion == cgroupV1 || (tc.swapBehavior == types.LimitedSwap && tc.qosClass != v1.PodQOSBurstable) { expectNoSwap(tc.cgroupVersion, resourcesC1, resourcesC2) return @@ -1151,3 +1323,14 @@ func setCgroupVersionDuringTest(version CgroupVersion) { return version == cgroupV2 } } + +func setSwapControllerAvailableDuringTest(available bool) func() { + original := swapControllerAvailable + swapControllerAvailable = func() bool { + return available + } + + return func() { + swapControllerAvailable = original + } +}