diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go b/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go index 996754c2417..b50eee1d4ee 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go @@ -21,6 +21,9 @@ package kuberuntime import ( "context" + "fmt" + "k8s.io/kubernetes/pkg/kubelet/cm" + "k8s.io/kubernetes/pkg/kubelet/types" "math" "os" "reflect" @@ -38,7 +41,6 @@ import ( runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" "k8s.io/kubernetes/pkg/features" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" - kubelettypes "k8s.io/kubernetes/pkg/kubelet/types" ) func makeExpectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIndex int, enforceMemoryQoS bool) *runtimeapi.ContainerConfig { @@ -695,96 +697,6 @@ func TestGenerateLinuxContainerConfigNamespaces(t *testing.T) { } } -func TestGenerateLinuxContainerConfigSwap(t *testing.T) { - defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, true)() - _, _, m, err := createTestRuntimeManager() - if err != nil { - t.Fatalf("error creating test RuntimeManager: %v", err) - } - m.machineInfo.MemoryCapacity = 1000000 - containerName := "test" - - for _, tc := range []struct { - name string - swapSetting string - pod *v1.Pod - expected int64 - }{ - { - name: "config unset, memory limit set", - // no swap setting - pod: &v1.Pod{ - Spec: v1.PodSpec{ - Containers: []v1.Container{{ - Name: containerName, - Resources: v1.ResourceRequirements{ - Limits: v1.ResourceList{ - "memory": resource.MustParse("1000"), - }, - Requests: v1.ResourceList{ - "memory": resource.MustParse("1000"), - }, - }, - }}, - }, - }, - expected: 1000, - }, - { - name: "config unset, no memory limit", - // no swap setting - pod: &v1.Pod{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - {Name: containerName}, - }, - }, - }, - expected: 0, - }, - { - // Note: behaviour will be the same as previous two cases - name: "config set to LimitedSwap, memory limit set", - swapSetting: kubelettypes.LimitedSwap, - pod: &v1.Pod{ - Spec: v1.PodSpec{ - Containers: []v1.Container{{ - Name: containerName, - Resources: v1.ResourceRequirements{ - Limits: v1.ResourceList{ - "memory": resource.MustParse("1000"), - }, - Requests: v1.ResourceList{ - "memory": resource.MustParse("1000"), - }, - }, - }}, - }, - }, - expected: 1000, - }, - { - name: "UnlimitedSwap enabled", - swapSetting: kubelettypes.UnlimitedSwap, - pod: &v1.Pod{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - {Name: containerName}, - }, - }, - }, - expected: -1, - }, - } { - t.Run(tc.name, func(t *testing.T) { - m.memorySwapBehavior = tc.swapSetting - actual, err := m.generateLinuxContainerConfig(&tc.pod.Spec.Containers[0], tc.pod, nil, "", nil, false) - assert.NoError(t, err) - assert.Equal(t, tc.expected, actual.Resources.MemorySwapLimitInBytes, "memory swap config for %s", tc.name) - }) - } -} - func TestGenerateLinuxContainerResources(t *testing.T) { _, _, m, err := createTestRuntimeManager() assert.NoError(t, err) @@ -936,6 +848,10 @@ func TestGenerateLinuxContainerResources(t *testing.T) { if tc.scalingFg { defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() } + + setCgroupVersionDuringTest(cgroupV1) + tc.expected.MemorySwapLimitInBytes = tc.expected.MemoryLimitInBytes + pod.Spec.Containers[0].Resources = v1.ResourceRequirements{Limits: tc.limits, Requests: tc.requests} if len(tc.cStatus) > 0 { pod.Status.ContainerStatuses = tc.cStatus @@ -950,6 +866,279 @@ func TestGenerateLinuxContainerResources(t *testing.T) { //TODO(vinaykul,InPlacePodVerticalScaling): Add unit tests for cgroup v1 & v2 } +func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { + _, _, m, err := createTestRuntimeManager() + assert.NoError(t, err) + m.machineInfo.MemoryCapacity = 42949672960 // 40Gb == 40 * 1024^3 + m.machineInfo.SwapCapacity = 5368709120 // 5Gb == 5 * 1024^3 + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: "12345678", + Name: "foo", + Namespace: "bar", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "c1", + }, + { + Name: "c2", + }, + }, + }, + Status: v1.PodStatus{}, + } + + expectNoSwap := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) { + const msg = "container is expected to not have swap access" + + for _, r := range resources { + switch cgroupVersion { + case cgroupV1: + assert.Equal(t, r.MemoryLimitInBytes, r.MemorySwapLimitInBytes, msg) + case cgroupV2: + assert.Equal(t, "0", r.Unified[cm.Cgroup2MaxSwapFilename], msg) + } + } + } + + expectUnlimitedSwap := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) { + const msg = "container is expected to have unlimited swap access" + + for _, r := range resources { + switch cgroupVersion { + case cgroupV1: + assert.Equal(t, int64(-1), r.MemorySwapLimitInBytes, msg) + case cgroupV2: + assert.Equal(t, "max", r.Unified[cm.Cgroup2MaxSwapFilename], msg) + } + } + } + + expectSwap := func(cgroupVersion CgroupVersion, swapBytesExpected int64, resources *runtimeapi.LinuxContainerResources) { + msg := fmt.Sprintf("container swap is expected to be limited by %d bytes", swapBytesExpected) + + switch cgroupVersion { + case cgroupV1: + assert.Equal(t, resources.MemoryLimitInBytes+swapBytesExpected, resources.MemorySwapLimitInBytes, msg) + case cgroupV2: + assert.Equal(t, fmt.Sprintf("%d", swapBytesExpected), resources.Unified[cm.Cgroup2MaxSwapFilename], msg) + } + } + + calcSwapForBurstablePods := func(containerMemoryRequest int64) int64 { + swapSize, err := calcSwapForBurstablePods(containerMemoryRequest, int64(m.machineInfo.MemoryCapacity), int64(m.machineInfo.SwapCapacity)) + assert.NoError(t, err) + + return swapSize + } + + for _, tc := range []struct { + name string + cgroupVersion CgroupVersion + qosClass v1.PodQOSClass + nodeSwapFeatureGateEnabled bool + swapBehavior string + addContainerWithoutRequests bool + addGuaranteedContainer bool + }{ + // With cgroup v1 + { + name: "cgroups v1, LimitedSwap, Burstable QoS", + cgroupVersion: cgroupV1, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + }, + { + name: "cgroups v1, UnlimitedSwap, Burstable QoS", + cgroupVersion: cgroupV1, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + }, + { + name: "cgroups v1, LimitedSwap, Best-effort QoS", + cgroupVersion: cgroupV1, + qosClass: v1.PodQOSBestEffort, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + }, + + // With feature gate turned off + { + name: "NodeSwap feature gate turned off, cgroups v2, LimitedSwap", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: false, + swapBehavior: types.LimitedSwap, + }, + { + name: "NodeSwap feature gate turned off, cgroups v2, UnlimitedSwap", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: false, + swapBehavior: types.UnlimitedSwap, + }, + + // With no swapBehavior, UnlimitedSwap should be the default + { + name: "With no swapBehavior - UnlimitedSwap should be the default", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBestEffort, + nodeSwapFeatureGateEnabled: true, + swapBehavior: "", + }, + + // With Guaranteed and Best-effort QoS + { + name: "Best-effort Qos, cgroups v2, LimitedSwap", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + }, + { + name: "Best-effort Qos, cgroups v2, UnlimitedSwap", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + }, + { + name: "Guaranteed Qos, cgroups v2, LimitedSwap", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSGuaranteed, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + }, + { + name: "Guaranteed Qos, cgroups v2, UnlimitedSwap", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSGuaranteed, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + }, + + // With a "guaranteed" container (when memory requests equal to limits) + { + name: "Burstable Qos, cgroups v2, LimitedSwap, with a guaranteed container", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + addContainerWithoutRequests: false, + addGuaranteedContainer: true, + }, + { + name: "Burstable Qos, cgroups v2, UnlimitedSwap, with a guaranteed container", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + addContainerWithoutRequests: false, + addGuaranteedContainer: true, + }, + + // Swap is expected to be allocated + { + name: "Burstable Qos, cgroups v2, LimitedSwap", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + addContainerWithoutRequests: false, + addGuaranteedContainer: false, + }, + { + name: "Burstable Qos, cgroups v2, UnlimitedSwap", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + addContainerWithoutRequests: false, + addGuaranteedContainer: false, + }, + { + name: "Burstable Qos, cgroups v2, LimitedSwap, with a container with no requests", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.LimitedSwap, + addContainerWithoutRequests: true, + addGuaranteedContainer: false, + }, + { + name: "Burstable Qos, cgroups v2, UnlimitedSwap, with a container with no requests", + cgroupVersion: cgroupV2, + qosClass: v1.PodQOSBurstable, + nodeSwapFeatureGateEnabled: true, + swapBehavior: types.UnlimitedSwap, + addContainerWithoutRequests: true, + addGuaranteedContainer: false, + }, + } { + t.Run(tc.name, func(t *testing.T) { + setCgroupVersionDuringTest(tc.cgroupVersion) + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tc.nodeSwapFeatureGateEnabled)() + m.memorySwapBehavior = tc.swapBehavior + + var resourceReqsC1, resourceReqsC2 v1.ResourceRequirements + switch tc.qosClass { + case v1.PodQOSBurstable: + resourceReqsC1 = v1.ResourceRequirements{ + Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")}, + } + + if !tc.addContainerWithoutRequests { + resourceReqsC2 = v1.ResourceRequirements{ + Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")}, + } + + if tc.addGuaranteedContainer { + resourceReqsC2.Limits = v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")} + } + } + case v1.PodQOSGuaranteed: + resourceReqsC1 = v1.ResourceRequirements{ + Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi"), v1.ResourceCPU: resource.MustParse("1")}, + Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi"), v1.ResourceCPU: resource.MustParse("1")}, + } + resourceReqsC2 = v1.ResourceRequirements{ + Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi"), v1.ResourceCPU: resource.MustParse("1")}, + Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi"), v1.ResourceCPU: resource.MustParse("1")}, + } + } + pod.Spec.Containers[0].Resources = resourceReqsC1 + pod.Spec.Containers[1].Resources = resourceReqsC2 + + resourcesC1 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false) + resourcesC2 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[1], false) + + if !tc.nodeSwapFeatureGateEnabled || tc.cgroupVersion == cgroupV1 || (tc.swapBehavior == types.LimitedSwap && tc.qosClass != v1.PodQOSBurstable) { + expectNoSwap(tc.cgroupVersion, resourcesC1, resourcesC2) + return + } + + if tc.swapBehavior == types.UnlimitedSwap || tc.swapBehavior == "" { + expectUnlimitedSwap(tc.cgroupVersion, resourcesC1, resourcesC2) + return + } + + c1ExpectedSwap := calcSwapForBurstablePods(resourceReqsC1.Requests.Memory().Value()) + c2ExpectedSwap := int64(0) + if !tc.addContainerWithoutRequests && !tc.addGuaranteedContainer { + c2ExpectedSwap = calcSwapForBurstablePods(resourceReqsC2.Requests.Memory().Value()) + } + + expectSwap(tc.cgroupVersion, c1ExpectedSwap, resourcesC1) + expectSwap(tc.cgroupVersion, c2ExpectedSwap, resourcesC2) + }) + } +} + type CgroupVersion string const (