Merge pull request #118764 from iholder101/Swap/burstableQoS-impl

Add full cgroup v2 swap support with automatically calculated swap limit for LimitedSwap and Burstable QoS Pods
This commit is contained in:
Kubernetes Prow Robot 2023-07-17 19:49:07 -07:00 committed by GitHub
commit da2fdf8cc3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 831 additions and 169 deletions

View File

@ -47,6 +47,8 @@ CGROUP_DRIVER=${CGROUP_DRIVER:-""}
CGROUP_ROOT=${CGROUP_ROOT:-""} CGROUP_ROOT=${CGROUP_ROOT:-""}
# owner of client certs, default to current user if not specified # owner of client certs, default to current user if not specified
USER=${USER:-$(whoami)} USER=${USER:-$(whoami)}
# if true, limited swap is being used instead of unlimited swap (default)
LIMITED_SWAP=${LIMITED_SWAP:-""}
# required for cni installation # required for cni installation
CNI_CONFIG_DIR=${CNI_CONFIG_DIR:-/etc/cni/net.d} CNI_CONFIG_DIR=${CNI_CONFIG_DIR:-/etc/cni/net.d}
@ -832,6 +834,13 @@ tracing:
EOF EOF
fi fi
if [[ "$LIMITED_SWAP" == "true" ]]; then
cat <<EOF >> "${TMP_DIR}"/kubelet.yaml
memorySwap:
swapBehavior: LimitedSwap
EOF
fi
{ {
# authentication # authentication
echo "authentication:" echo "authentication:"

View File

@ -596,8 +596,9 @@ const (
// Allow pods to failover to a different node in case of non graceful node shutdown // Allow pods to failover to a different node in case of non graceful node shutdown
NodeOutOfServiceVolumeDetach featuregate.Feature = "NodeOutOfServiceVolumeDetach" NodeOutOfServiceVolumeDetach featuregate.Feature = "NodeOutOfServiceVolumeDetach"
// owner: @ehashman // owner: @iholder101
// alpha: v1.22 // alpha: v1.22
// beta1: v1.28. For more info, please look at the KEP: https://kep.k8s.io/2400.
// //
// Permits kubelet to run with swap enabled // Permits kubelet to run with swap enabled
NodeSwap featuregate.Feature = "NodeSwap" NodeSwap featuregate.Feature = "NodeSwap"
@ -1074,7 +1075,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
NodeOutOfServiceVolumeDetach: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.31 NodeOutOfServiceVolumeDetach: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.31
NodeSwap: {Default: false, PreRelease: featuregate.Alpha}, NodeSwap: {Default: false, PreRelease: featuregate.Beta},
PDBUnhealthyPodEvictionPolicy: {Default: true, PreRelease: featuregate.Beta}, PDBUnhealthyPodEvictionPolicy: {Default: true, PreRelease: featuregate.Beta},

View File

@ -45,11 +45,12 @@ import (
const ( const (
// systemdSuffix is the cgroup name suffix for systemd // systemdSuffix is the cgroup name suffix for systemd
systemdSuffix string = ".slice" systemdSuffix string = ".slice"
// MemoryMin is memory.min for cgroup v2 // Cgroup2MemoryMin is memory.min for cgroup v2
MemoryMin string = "memory.min" Cgroup2MemoryMin string = "memory.min"
// MemoryHigh is memory.high for cgroup v2 // Cgroup2MemoryHigh is memory.high for cgroup v2
MemoryHigh string = "memory.high" Cgroup2MemoryHigh string = "memory.high"
Cgroup2MaxCpuLimit string = "max" Cgroup2MaxCpuLimit string = "max"
Cgroup2MaxSwapFilename string = "memory.swap.max"
) )
var RootCgroupName = CgroupName([]string{}) var RootCgroupName = CgroupName([]string{})

View File

@ -196,7 +196,7 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64,
} }
if memoryMin > 0 { if memoryMin > 0 {
result.Unified = map[string]string{ result.Unified = map[string]string{
MemoryMin: strconv.FormatInt(memoryMin, 10), Cgroup2MemoryMin: strconv.FormatInt(memoryMin, 10),
} }
} }
} }

View File

@ -147,7 +147,7 @@ func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.
if rp.Unified == nil { if rp.Unified == nil {
rp.Unified = make(map[string]string) rp.Unified = make(map[string]string)
} }
rp.Unified[MemoryMin] = strconv.FormatInt(*rp.Memory, 10) rp.Unified[Cgroup2MemoryMin] = strconv.FormatInt(*rp.Memory, 10)
} }
} }

View File

@ -292,7 +292,7 @@ func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*Cgrou
if configs[v1.PodQOSBurstable].ResourceParameters.Unified == nil { if configs[v1.PodQOSBurstable].ResourceParameters.Unified == nil {
configs[v1.PodQOSBurstable].ResourceParameters.Unified = make(map[string]string) configs[v1.PodQOSBurstable].ResourceParameters.Unified = make(map[string]string)
} }
configs[v1.PodQOSBurstable].ResourceParameters.Unified[MemoryMin] = strconv.FormatInt(burstableMin, 10) configs[v1.PodQOSBurstable].ResourceParameters.Unified[Cgroup2MemoryMin] = strconv.FormatInt(burstableMin, 10)
klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSBurstable, "memoryMin", burstableMin) klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSBurstable, "memoryMin", burstableMin)
} }
@ -300,7 +300,7 @@ func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*Cgrou
if configs[v1.PodQOSGuaranteed].ResourceParameters.Unified == nil { if configs[v1.PodQOSGuaranteed].ResourceParameters.Unified == nil {
configs[v1.PodQOSGuaranteed].ResourceParameters.Unified = make(map[string]string) configs[v1.PodQOSGuaranteed].ResourceParameters.Unified = make(map[string]string)
} }
configs[v1.PodQOSGuaranteed].ResourceParameters.Unified[MemoryMin] = strconv.FormatInt(guaranteedMin, 10) configs[v1.PodQOSGuaranteed].ResourceParameters.Unified[Cgroup2MemoryMin] = strconv.FormatInt(guaranteedMin, 10)
klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSGuaranteed, "memoryMin", guaranteedMin) klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSGuaranteed, "memoryMin", guaranteedMin)
} }
} }

View File

@ -20,6 +20,9 @@ limitations under the License.
package kuberuntime package kuberuntime
import ( import (
"fmt"
cadvisorv1 "github.com/google/cadvisor/info/v1"
kubeapiqos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"math" "math"
"os" "os"
"strconv" "strconv"
@ -46,7 +49,7 @@ func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config
enforceMemoryQoS := false enforceMemoryQoS := false
// Set memory.min and memory.high if MemoryQoS enabled with cgroups v2 // Set memory.min and memory.high if MemoryQoS enabled with cgroups v2
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) && if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
libcontainercgroups.IsCgroup2UnifiedMode() { isCgroup2UnifiedMode() {
enforceMemoryQoS = true enforceMemoryQoS = true
} }
cl, err := m.generateLinuxContainerConfig(container, pod, uid, username, nsTarget, enforceMemoryQoS) cl, err := m.generateLinuxContainerConfig(container, pod, uid, username, nsTarget, enforceMemoryQoS)
@ -99,21 +102,17 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod,
lcr.HugepageLimits = GetHugepageLimitsFromResources(container.Resources) lcr.HugepageLimits = GetHugepageLimitsFromResources(container.Resources)
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) { if swapConfigurationHelper := newSwapConfigurationHelper(*m.machineInfo); utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
// NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec: // NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec:
// https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory // https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory
switch m.memorySwapBehavior { switch m.memorySwapBehavior {
case kubelettypes.UnlimitedSwap:
// -1 = unlimited swap
lcr.MemorySwapLimitInBytes = -1
case kubelettypes.LimitedSwap: case kubelettypes.LimitedSwap:
fallthrough swapConfigurationHelper.ConfigureLimitedSwap(lcr, pod, container)
default: default:
// memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit swapConfigurationHelper.ConfigureUnlimitedSwap(lcr)
// Some swapping is still possible.
// Note that if memory limit is 0, memory swap limit is ignored.
lcr.MemorySwapLimitInBytes = lcr.MemoryLimitInBytes
} }
} else {
swapConfigurationHelper.ConfigureNoSwap(lcr)
} }
// Set memory.min and memory.high to enforce MemoryQoS // Set memory.min and memory.high to enforce MemoryQoS
@ -122,7 +121,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod,
memoryRequest := container.Resources.Requests.Memory().Value() memoryRequest := container.Resources.Requests.Memory().Value()
memoryLimit := container.Resources.Limits.Memory().Value() memoryLimit := container.Resources.Limits.Memory().Value()
if memoryRequest != 0 { if memoryRequest != 0 {
unified[cm.MemoryMin] = strconv.FormatInt(memoryRequest, 10) unified[cm.Cgroup2MemoryMin] = strconv.FormatInt(memoryRequest, 10)
} }
// Guaranteed pods by their QoS definition requires that memory request equals memory limit and cpu request must equal cpu limit. // Guaranteed pods by their QoS definition requires that memory request equals memory limit and cpu request must equal cpu limit.
@ -148,7 +147,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod,
} }
} }
if memoryHigh != 0 && memoryHigh > memoryRequest { if memoryHigh != 0 && memoryHigh > memoryRequest {
unified[cm.MemoryHigh] = strconv.FormatInt(memoryHigh, 10) unified[cm.Cgroup2MemoryHigh] = strconv.FormatInt(memoryHigh, 10)
} }
} }
if len(unified) > 0 { if len(unified) > 0 {
@ -171,7 +170,7 @@ func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, cont
enforceMemoryQoS := false enforceMemoryQoS := false
// Set memory.min and memory.high if MemoryQoS enabled with cgroups v2 // Set memory.min and memory.high if MemoryQoS enabled with cgroups v2
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) && if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
libcontainercgroups.IsCgroup2UnifiedMode() { isCgroup2UnifiedMode() {
enforceMemoryQoS = true enforceMemoryQoS = true
} }
return &runtimeapi.ContainerResources{ return &runtimeapi.ContainerResources{
@ -216,7 +215,7 @@ func (m *kubeGenericRuntimeManager) calculateLinuxResources(cpuRequest, cpuLimit
} }
// runc requires cgroupv2 for unified mode // runc requires cgroupv2 for unified mode
if libcontainercgroups.IsCgroup2UnifiedMode() { if isCgroup2UnifiedMode() {
resources.Unified = map[string]string{ resources.Unified = map[string]string{
// Ask the kernel to kill all processes in the container cgroup in case of OOM. // Ask the kernel to kill all processes in the container cgroup in case of OOM.
// See memory.oom.group in https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html for // See memory.oom.group in https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html for
@ -298,3 +297,94 @@ func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *k
} }
return cStatusResources return cStatusResources
} }
// Note: this function variable is being added here so it would be possible to mock
// the cgroup version for unit tests by assigning a new mocked function into it. Without it,
// the cgroup version would solely depend on the environment running the test.
var isCgroup2UnifiedMode = func() bool {
return libcontainercgroups.IsCgroup2UnifiedMode()
}
type swapConfigurationHelper struct {
machineInfo cadvisorv1.MachineInfo
}
func newSwapConfigurationHelper(machineInfo cadvisorv1.MachineInfo) *swapConfigurationHelper {
return &swapConfigurationHelper{machineInfo: machineInfo}
}
func (m swapConfigurationHelper) ConfigureLimitedSwap(lcr *runtimeapi.LinuxContainerResources, pod *v1.Pod, container *v1.Container) {
podQos := kubeapiqos.GetPodQOS(pod)
containerDoesNotRequestMemory := container.Resources.Requests.Memory().IsZero() && container.Resources.Limits.Memory().IsZero()
memoryRequestEqualsToLimit := container.Resources.Requests.Memory().Cmp(*container.Resources.Limits.Memory()) == 0
if podQos != v1.PodQOSBurstable || containerDoesNotRequestMemory || !isCgroup2UnifiedMode() || memoryRequestEqualsToLimit {
m.ConfigureNoSwap(lcr)
return
}
containerMemoryRequest := container.Resources.Requests.Memory()
swapLimit, err := calcSwapForBurstablePods(containerMemoryRequest.Value(), int64(m.machineInfo.MemoryCapacity), int64(m.machineInfo.SwapCapacity))
if err != nil {
klog.ErrorS(err, "cannot calculate swap allocation amount; disallowing swap")
m.ConfigureNoSwap(lcr)
return
}
m.configureSwap(lcr, swapLimit)
}
func (m swapConfigurationHelper) ConfigureNoSwap(lcr *runtimeapi.LinuxContainerResources) {
if !isCgroup2UnifiedMode() {
// memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit
// Some swapping is still possible.
// Note that if memory limit is 0, memory swap limit is ignored.
lcr.MemorySwapLimitInBytes = lcr.MemoryLimitInBytes
return
}
m.configureSwap(lcr, 0)
}
func (m swapConfigurationHelper) ConfigureUnlimitedSwap(lcr *runtimeapi.LinuxContainerResources) {
if !isCgroup2UnifiedMode() {
m.ConfigureNoSwap(lcr)
return
}
if lcr.Unified == nil {
lcr.Unified = map[string]string{}
}
lcr.Unified[cm.Cgroup2MaxSwapFilename] = "max"
}
func (m swapConfigurationHelper) configureSwap(lcr *runtimeapi.LinuxContainerResources, swapMemory int64) {
if !isCgroup2UnifiedMode() {
klog.ErrorS(fmt.Errorf("swap configuration is not supported with cgroup v1"), "swap configuration under cgroup v1 is unexpected")
return
}
if lcr.Unified == nil {
lcr.Unified = map[string]string{}
}
lcr.Unified[cm.Cgroup2MaxSwapFilename] = fmt.Sprintf("%d", swapMemory)
}
// The swap limit is calculated as (<containerMemoryRequest>/<nodeTotalMemory>)*<totalPodsSwapAvailable>.
// For more info, please look at the following KEP: https://kep.k8s.io/2400
func calcSwapForBurstablePods(containerMemoryRequest, nodeTotalMemory, totalPodsSwapAvailable int64) (int64, error) {
if nodeTotalMemory <= 0 {
return 0, fmt.Errorf("total node memory is 0")
}
if containerMemoryRequest > nodeTotalMemory {
return 0, fmt.Errorf("container request %d is larger than total node memory %d", containerMemoryRequest, nodeTotalMemory)
}
containerMemoryProportion := float64(containerMemoryRequest) / float64(nodeTotalMemory)
swapAllocation := containerMemoryProportion * float64(totalPodsSwapAvailable)
return int64(swapAllocation), nil
}

View File

@ -21,6 +21,9 @@ package kuberuntime
import ( import (
"context" "context"
"fmt"
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/pkg/kubelet/types"
"math" "math"
"os" "os"
"reflect" "reflect"
@ -38,7 +41,6 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
) )
func makeExpectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIndex int, enforceMemoryQoS bool) *runtimeapi.ContainerConfig { func makeExpectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIndex int, enforceMemoryQoS bool) *runtimeapi.ContainerConfig {
@ -249,6 +251,7 @@ func TestCalculateLinuxResources(t *testing.T) {
cpuLim *resource.Quantity cpuLim *resource.Quantity
memLim *resource.Quantity memLim *resource.Quantity
expected *runtimeapi.LinuxContainerResources expected *runtimeapi.LinuxContainerResources
cgroupVersion CgroupVersion
}{ }{
{ {
name: "Request128MBLimit256MB", name: "Request128MBLimit256MB",
@ -261,6 +264,7 @@ func TestCalculateLinuxResources(t *testing.T) {
CpuShares: 1024, CpuShares: 1024,
MemoryLimitInBytes: 134217728, MemoryLimitInBytes: 134217728,
}, },
cgroupVersion: cgroupV1,
}, },
{ {
name: "RequestNoMemory", name: "RequestNoMemory",
@ -273,6 +277,7 @@ func TestCalculateLinuxResources(t *testing.T) {
CpuShares: 2048, CpuShares: 2048,
MemoryLimitInBytes: 0, MemoryLimitInBytes: 0,
}, },
cgroupVersion: cgroupV1,
}, },
{ {
name: "RequestNilCPU", name: "RequestNilCPU",
@ -284,6 +289,7 @@ func TestCalculateLinuxResources(t *testing.T) {
CpuShares: 2048, CpuShares: 2048,
MemoryLimitInBytes: 0, MemoryLimitInBytes: 0,
}, },
cgroupVersion: cgroupV1,
}, },
{ {
name: "RequestZeroCPU", name: "RequestZeroCPU",
@ -296,9 +302,66 @@ func TestCalculateLinuxResources(t *testing.T) {
CpuShares: 2, CpuShares: 2,
MemoryLimitInBytes: 0, MemoryLimitInBytes: 0,
}, },
cgroupVersion: cgroupV1,
},
{
name: "Request128MBLimit256MB",
cpuReq: generateResourceQuantity("1"),
cpuLim: generateResourceQuantity("2"),
memLim: generateResourceQuantity("128Mi"),
expected: &runtimeapi.LinuxContainerResources{
CpuPeriod: 100000,
CpuQuota: 200000,
CpuShares: 1024,
MemoryLimitInBytes: 134217728,
Unified: map[string]string{"memory.oom.group": "1"},
},
cgroupVersion: cgroupV2,
},
{
name: "RequestNoMemory",
cpuReq: generateResourceQuantity("2"),
cpuLim: generateResourceQuantity("8"),
memLim: generateResourceQuantity("0"),
expected: &runtimeapi.LinuxContainerResources{
CpuPeriod: 100000,
CpuQuota: 800000,
CpuShares: 2048,
MemoryLimitInBytes: 0,
Unified: map[string]string{"memory.oom.group": "1"},
},
cgroupVersion: cgroupV2,
},
{
name: "RequestNilCPU",
cpuLim: generateResourceQuantity("2"),
memLim: generateResourceQuantity("0"),
expected: &runtimeapi.LinuxContainerResources{
CpuPeriod: 100000,
CpuQuota: 200000,
CpuShares: 2048,
MemoryLimitInBytes: 0,
Unified: map[string]string{"memory.oom.group": "1"},
},
cgroupVersion: cgroupV2,
},
{
name: "RequestZeroCPU",
cpuReq: generateResourceQuantity("0"),
cpuLim: generateResourceQuantity("2"),
memLim: generateResourceQuantity("0"),
expected: &runtimeapi.LinuxContainerResources{
CpuPeriod: 100000,
CpuQuota: 200000,
CpuShares: 2,
MemoryLimitInBytes: 0,
Unified: map[string]string{"memory.oom.group": "1"},
},
cgroupVersion: cgroupV2,
}, },
} }
for _, test := range tests { for _, test := range tests {
setCgroupVersionDuringTest(test.cgroupVersion)
linuxContainerResources := m.calculateLinuxResources(test.cpuReq, test.cpuLim, test.memLim) linuxContainerResources := m.calculateLinuxResources(test.cpuReq, test.cpuLim, test.memLim)
assert.Equal(t, test.expected, linuxContainerResources) assert.Equal(t, test.expected, linuxContainerResources)
} }
@ -634,96 +697,6 @@ func TestGenerateLinuxContainerConfigNamespaces(t *testing.T) {
} }
} }
func TestGenerateLinuxContainerConfigSwap(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, true)()
_, _, m, err := createTestRuntimeManager()
if err != nil {
t.Fatalf("error creating test RuntimeManager: %v", err)
}
m.machineInfo.MemoryCapacity = 1000000
containerName := "test"
for _, tc := range []struct {
name string
swapSetting string
pod *v1.Pod
expected int64
}{
{
name: "config unset, memory limit set",
// no swap setting
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{{
Name: containerName,
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
Requests: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
},
}},
},
},
expected: 1000,
},
{
name: "config unset, no memory limit",
// no swap setting
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{Name: containerName},
},
},
},
expected: 0,
},
{
// Note: behaviour will be the same as previous two cases
name: "config set to LimitedSwap, memory limit set",
swapSetting: kubelettypes.LimitedSwap,
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{{
Name: containerName,
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
Requests: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
},
}},
},
},
expected: 1000,
},
{
name: "UnlimitedSwap enabled",
swapSetting: kubelettypes.UnlimitedSwap,
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{Name: containerName},
},
},
},
expected: -1,
},
} {
t.Run(tc.name, func(t *testing.T) {
m.memorySwapBehavior = tc.swapSetting
actual, err := m.generateLinuxContainerConfig(&tc.pod.Spec.Containers[0], tc.pod, nil, "", nil, false)
assert.NoError(t, err)
assert.Equal(t, tc.expected, actual.Resources.MemorySwapLimitInBytes, "memory swap config for %s", tc.name)
})
}
}
func TestGenerateLinuxContainerResources(t *testing.T) { func TestGenerateLinuxContainerResources(t *testing.T) {
_, _, m, err := createTestRuntimeManager() _, _, m, err := createTestRuntimeManager()
assert.NoError(t, err) assert.NoError(t, err)
@ -875,6 +848,10 @@ func TestGenerateLinuxContainerResources(t *testing.T) {
if tc.scalingFg { if tc.scalingFg {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
} }
setCgroupVersionDuringTest(cgroupV1)
tc.expected.MemorySwapLimitInBytes = tc.expected.MemoryLimitInBytes
pod.Spec.Containers[0].Resources = v1.ResourceRequirements{Limits: tc.limits, Requests: tc.requests} pod.Spec.Containers[0].Resources = v1.ResourceRequirements{Limits: tc.limits, Requests: tc.requests}
if len(tc.cStatus) > 0 { if len(tc.cStatus) > 0 {
pod.Status.ContainerStatuses = tc.cStatus pod.Status.ContainerStatuses = tc.cStatus
@ -888,3 +865,289 @@ func TestGenerateLinuxContainerResources(t *testing.T) {
} }
//TODO(vinaykul,InPlacePodVerticalScaling): Add unit tests for cgroup v1 & v2 //TODO(vinaykul,InPlacePodVerticalScaling): Add unit tests for cgroup v1 & v2
} }
func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
_, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
m.machineInfo.MemoryCapacity = 42949672960 // 40Gb == 40 * 1024^3
m.machineInfo.SwapCapacity = 5368709120 // 5Gb == 5 * 1024^3
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "bar",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "c1",
},
{
Name: "c2",
},
},
},
Status: v1.PodStatus{},
}
expectNoSwap := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) {
const msg = "container is expected to not have swap access"
for _, r := range resources {
switch cgroupVersion {
case cgroupV1:
assert.Equal(t, r.MemoryLimitInBytes, r.MemorySwapLimitInBytes, msg)
case cgroupV2:
assert.Equal(t, "0", r.Unified[cm.Cgroup2MaxSwapFilename], msg)
}
}
}
expectUnlimitedSwap := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) {
const msg = "container is expected to have unlimited swap access"
for _, r := range resources {
switch cgroupVersion {
case cgroupV1:
assert.Equal(t, int64(-1), r.MemorySwapLimitInBytes, msg)
case cgroupV2:
assert.Equal(t, "max", r.Unified[cm.Cgroup2MaxSwapFilename], msg)
}
}
}
expectSwap := func(cgroupVersion CgroupVersion, swapBytesExpected int64, resources *runtimeapi.LinuxContainerResources) {
msg := fmt.Sprintf("container swap is expected to be limited by %d bytes", swapBytesExpected)
switch cgroupVersion {
case cgroupV1:
assert.Equal(t, resources.MemoryLimitInBytes+swapBytesExpected, resources.MemorySwapLimitInBytes, msg)
case cgroupV2:
assert.Equal(t, fmt.Sprintf("%d", swapBytesExpected), resources.Unified[cm.Cgroup2MaxSwapFilename], msg)
}
}
calcSwapForBurstablePods := func(containerMemoryRequest int64) int64 {
swapSize, err := calcSwapForBurstablePods(containerMemoryRequest, int64(m.machineInfo.MemoryCapacity), int64(m.machineInfo.SwapCapacity))
assert.NoError(t, err)
return swapSize
}
for _, tc := range []struct {
name string
cgroupVersion CgroupVersion
qosClass v1.PodQOSClass
nodeSwapFeatureGateEnabled bool
swapBehavior string
addContainerWithoutRequests bool
addGuaranteedContainer bool
}{
// With cgroup v1
{
name: "cgroups v1, LimitedSwap, Burstable QoS",
cgroupVersion: cgroupV1,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
},
{
name: "cgroups v1, UnlimitedSwap, Burstable QoS",
cgroupVersion: cgroupV1,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
},
{
name: "cgroups v1, LimitedSwap, Best-effort QoS",
cgroupVersion: cgroupV1,
qosClass: v1.PodQOSBestEffort,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
},
// With feature gate turned off
{
name: "NodeSwap feature gate turned off, cgroups v2, LimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: false,
swapBehavior: types.LimitedSwap,
},
{
name: "NodeSwap feature gate turned off, cgroups v2, UnlimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: false,
swapBehavior: types.UnlimitedSwap,
},
// With no swapBehavior, UnlimitedSwap should be the default
{
name: "With no swapBehavior - UnlimitedSwap should be the default",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBestEffort,
nodeSwapFeatureGateEnabled: true,
swapBehavior: "",
},
// With Guaranteed and Best-effort QoS
{
name: "Best-effort Qos, cgroups v2, LimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
},
{
name: "Best-effort Qos, cgroups v2, UnlimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
},
{
name: "Guaranteed Qos, cgroups v2, LimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSGuaranteed,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
},
{
name: "Guaranteed Qos, cgroups v2, UnlimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSGuaranteed,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
},
// With a "guaranteed" container (when memory requests equal to limits)
{
name: "Burstable Qos, cgroups v2, LimitedSwap, with a guaranteed container",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
addContainerWithoutRequests: false,
addGuaranteedContainer: true,
},
{
name: "Burstable Qos, cgroups v2, UnlimitedSwap, with a guaranteed container",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
addContainerWithoutRequests: false,
addGuaranteedContainer: true,
},
// Swap is expected to be allocated
{
name: "Burstable Qos, cgroups v2, LimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
addContainerWithoutRequests: false,
addGuaranteedContainer: false,
},
{
name: "Burstable Qos, cgroups v2, UnlimitedSwap",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
addContainerWithoutRequests: false,
addGuaranteedContainer: false,
},
{
name: "Burstable Qos, cgroups v2, LimitedSwap, with a container with no requests",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.LimitedSwap,
addContainerWithoutRequests: true,
addGuaranteedContainer: false,
},
{
name: "Burstable Qos, cgroups v2, UnlimitedSwap, with a container with no requests",
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
nodeSwapFeatureGateEnabled: true,
swapBehavior: types.UnlimitedSwap,
addContainerWithoutRequests: true,
addGuaranteedContainer: false,
},
} {
t.Run(tc.name, func(t *testing.T) {
setCgroupVersionDuringTest(tc.cgroupVersion)
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tc.nodeSwapFeatureGateEnabled)()
m.memorySwapBehavior = tc.swapBehavior
var resourceReqsC1, resourceReqsC2 v1.ResourceRequirements
switch tc.qosClass {
case v1.PodQOSBurstable:
resourceReqsC1 = v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
}
if !tc.addContainerWithoutRequests {
resourceReqsC2 = v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")},
}
if tc.addGuaranteedContainer {
resourceReqsC2.Limits = v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")}
}
}
case v1.PodQOSGuaranteed:
resourceReqsC1 = v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi"), v1.ResourceCPU: resource.MustParse("1")},
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi"), v1.ResourceCPU: resource.MustParse("1")},
}
resourceReqsC2 = v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi"), v1.ResourceCPU: resource.MustParse("1")},
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi"), v1.ResourceCPU: resource.MustParse("1")},
}
}
pod.Spec.Containers[0].Resources = resourceReqsC1
pod.Spec.Containers[1].Resources = resourceReqsC2
resourcesC1 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false)
resourcesC2 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[1], false)
if !tc.nodeSwapFeatureGateEnabled || tc.cgroupVersion == cgroupV1 || (tc.swapBehavior == types.LimitedSwap && tc.qosClass != v1.PodQOSBurstable) {
expectNoSwap(tc.cgroupVersion, resourcesC1, resourcesC2)
return
}
if tc.swapBehavior == types.UnlimitedSwap || tc.swapBehavior == "" {
expectUnlimitedSwap(tc.cgroupVersion, resourcesC1, resourcesC2)
return
}
c1ExpectedSwap := calcSwapForBurstablePods(resourceReqsC1.Requests.Memory().Value())
c2ExpectedSwap := int64(0)
if !tc.addContainerWithoutRequests && !tc.addGuaranteedContainer {
c2ExpectedSwap = calcSwapForBurstablePods(resourceReqsC2.Requests.Memory().Value())
}
expectSwap(tc.cgroupVersion, c1ExpectedSwap, resourcesC1)
expectSwap(tc.cgroupVersion, c2ExpectedSwap, resourcesC2)
})
}
}
type CgroupVersion string
const (
cgroupV1 CgroupVersion = "v1"
cgroupV2 CgroupVersion = "v2"
)
func setCgroupVersionDuringTest(version CgroupVersion) {
isCgroup2UnifiedMode = func() bool {
return version == cgroupV2
}
}

View File

@ -38,17 +38,8 @@ func TestApplySandboxResources(t *testing.T) {
Linux: &runtimeapi.LinuxPodSandboxConfig{}, Linux: &runtimeapi.LinuxPodSandboxConfig{},
} }
require.NoError(t, err) getPodWithOverhead := func() *v1.Pod {
return &v1.Pod{
tests := []struct {
description string
pod *v1.Pod
expectedResource *runtimeapi.LinuxContainerResources
expectedOverhead *runtimeapi.LinuxContainerResources
}{
{
description: "pod with overhead defined",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
UID: "12345678", UID: "12345678",
Name: "bar", Name: "bar",
@ -74,23 +65,10 @@ func TestApplySandboxResources(t *testing.T) {
v1.ResourceCPU: resource.MustParse("1"), v1.ResourceCPU: resource.MustParse("1"),
}, },
}, },
}, }
expectedResource: &runtimeapi.LinuxContainerResources{ }
MemoryLimitInBytes: 268435456, getPodWithoutOverhead := func() *v1.Pod {
CpuPeriod: 100000, return &v1.Pod{
CpuQuota: 400000,
CpuShares: 2048,
},
expectedOverhead: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 134217728,
CpuPeriod: 100000,
CpuQuota: 100000,
CpuShares: 1024,
},
},
{
description: "pod without overhead defined",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
UID: "12345678", UID: "12345678",
Name: "bar", Name: "bar",
@ -110,7 +88,38 @@ func TestApplySandboxResources(t *testing.T) {
}, },
}, },
}, },
}
}
require.NoError(t, err)
tests := []struct {
description string
pod *v1.Pod
expectedResource *runtimeapi.LinuxContainerResources
expectedOverhead *runtimeapi.LinuxContainerResources
cgroupVersion CgroupVersion
}{
{
description: "pod with overhead defined",
pod: getPodWithOverhead(),
expectedResource: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 268435456,
CpuPeriod: 100000,
CpuQuota: 400000,
CpuShares: 2048,
}, },
expectedOverhead: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 134217728,
CpuPeriod: 100000,
CpuQuota: 100000,
CpuShares: 1024,
},
cgroupVersion: cgroupV1,
},
{
description: "pod without overhead defined",
pod: getPodWithoutOverhead(),
expectedResource: &runtimeapi.LinuxContainerResources{ expectedResource: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 268435456, MemoryLimitInBytes: 268435456,
CpuPeriod: 100000, CpuPeriod: 100000,
@ -118,10 +127,45 @@ func TestApplySandboxResources(t *testing.T) {
CpuShares: 2, CpuShares: 2,
}, },
expectedOverhead: &runtimeapi.LinuxContainerResources{}, expectedOverhead: &runtimeapi.LinuxContainerResources{},
cgroupVersion: cgroupV1,
},
{
description: "pod with overhead defined",
pod: getPodWithOverhead(),
expectedResource: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 268435456,
CpuPeriod: 100000,
CpuQuota: 400000,
CpuShares: 2048,
Unified: map[string]string{"memory.oom.group": "1"},
},
expectedOverhead: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 134217728,
CpuPeriod: 100000,
CpuQuota: 100000,
CpuShares: 1024,
Unified: map[string]string{"memory.oom.group": "1"},
},
cgroupVersion: cgroupV2,
},
{
description: "pod without overhead defined",
pod: getPodWithoutOverhead(),
expectedResource: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 268435456,
CpuPeriod: 100000,
CpuQuota: 0,
CpuShares: 2,
Unified: map[string]string{"memory.oom.group": "1"},
},
expectedOverhead: &runtimeapi.LinuxContainerResources{},
cgroupVersion: cgroupV2,
}, },
} }
for i, test := range tests { for i, test := range tests {
setCgroupVersionDuringTest(test.cgroupVersion)
m.applySandboxResources(test.pod, config) m.applySandboxResources(test.pod, config)
assert.Equal(t, test.expectedResource, config.Linux.Resources, "TestCase[%d]: %s", i, test.description) assert.Equal(t, test.expectedResource, config.Linux.Resources, "TestCase[%d]: %s", i, test.description)
assert.Equal(t, test.expectedOverhead, config.Linux.Overhead, "TestCase[%d]: %s", i, test.description) assert.Equal(t, test.expectedOverhead, config.Linux.Overhead, "TestCase[%d]: %s", i, test.description)

254
test/e2e_node/swap_test.go Normal file
View File

@ -0,0 +1,254 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2enode
import (
"context"
"fmt"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/rand"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
testutils "k8s.io/kubernetes/test/utils"
admissionapi "k8s.io/pod-security-admission/api"
"path/filepath"
"strconv"
)
const (
cgroupBasePath = "/sys/fs/cgroup/"
cgroupV1SwapLimitFile = "/memory/memory.memsw.limit_in_bytes"
cgroupV2SwapLimitFile = "memory.swap.max"
cgroupV1MemLimitFile = "/memory/memory.limit_in_bytes"
)
var _ = SIGDescribe("Swap [NodeConformance][LinuxOnly]", func() {
f := framework.NewDefaultFramework("swap-test")
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelBaseline
ginkgo.DescribeTable("with configuration", func(qosClass v1.PodQOSClass, memoryRequestEqualLimit bool) {
ginkgo.By(fmt.Sprintf("Creating a pod of QOS class %s. memoryRequestEqualLimit: %t", qosClass, memoryRequestEqualLimit))
pod := getSwapTestPod(f, qosClass, memoryRequestEqualLimit)
pod = runPodAndWaitUntilScheduled(f, pod)
isCgroupV2 := isPodCgroupV2(f, pod)
isLimitedSwap := isLimitedSwap(f, pod)
if !isSwapFeatureGateEnabled() || !isCgroupV2 || (isLimitedSwap && (qosClass != v1.PodQOSBurstable || memoryRequestEqualLimit)) {
ginkgo.By(fmt.Sprintf("Expecting no swap. feature gate on? %t isCgroupV2? %t is QoS burstable? %t", isSwapFeatureGateEnabled(), isCgroupV2, qosClass == v1.PodQOSBurstable))
expectNoSwap(f, pod, isCgroupV2)
return
}
if !isLimitedSwap {
ginkgo.By("expecting unlimited swap")
expectUnlimitedSwap(f, pod, isCgroupV2)
return
}
ginkgo.By("expecting limited swap")
expectedSwapLimit := calcSwapForBurstablePod(f, pod)
expectLimitedSwap(f, pod, expectedSwapLimit)
},
ginkgo.Entry("QOS Best-effort", v1.PodQOSBestEffort, false),
ginkgo.Entry("QOS Burstable", v1.PodQOSBurstable, false),
ginkgo.Entry("QOS Burstable with memory request equals to limit", v1.PodQOSBurstable, true),
ginkgo.Entry("QOS Guaranteed", v1.PodQOSGuaranteed, false),
)
})
// Note that memoryRequestEqualLimit is effective only when qosClass is PodQOSBestEffort.
func getSwapTestPod(f *framework.Framework, qosClass v1.PodQOSClass, memoryRequestEqualLimit bool) *v1.Pod {
podMemoryAmount := resource.MustParse("128Mi")
var resources v1.ResourceRequirements
switch qosClass {
case v1.PodQOSBestEffort:
// nothing to do in this case
case v1.PodQOSBurstable:
resources = v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: podMemoryAmount,
},
}
if memoryRequestEqualLimit {
resources.Limits = resources.Requests
}
case v1.PodQOSGuaranteed:
resources = v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: podMemoryAmount,
},
}
resources.Requests = resources.Limits
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pod-swap-" + rand.String(5),
Namespace: f.Namespace.Name,
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyAlways,
Containers: []v1.Container{
{
Name: "busybox-container",
Image: busyboxImage,
Command: []string{"sleep", "600"},
Resources: resources,
},
},
},
}
return pod
}
func runPodAndWaitUntilScheduled(f *framework.Framework, pod *v1.Pod) *v1.Pod {
ginkgo.By("running swap test pod")
podClient := e2epod.NewPodClient(f)
pod = podClient.CreateSync(context.Background(), pod)
pod, err := podClient.Get(context.Background(), pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
isReady, err := testutils.PodRunningReady(pod)
framework.ExpectNoError(err)
gomega.ExpectWithOffset(1, isReady).To(gomega.BeTrue(), "pod should be ready")
return pod
}
func isSwapFeatureGateEnabled() bool {
ginkgo.By("figuring if NodeSwap feature gate is turned on")
return utilfeature.DefaultFeatureGate.Enabled(features.NodeSwap)
}
func readCgroupFile(f *framework.Framework, pod *v1.Pod, filename string) string {
filePath := filepath.Join(cgroupBasePath, filename)
ginkgo.By("reading cgroup file " + filePath)
output := e2epod.ExecCommandInContainer(f, pod.Name, pod.Spec.Containers[0].Name, "/bin/sh", "-ec", "cat "+filePath)
return output
}
func isPodCgroupV2(f *framework.Framework, pod *v1.Pod) bool {
ginkgo.By("figuring is test pod runs cgroup v2")
output := e2epod.ExecCommandInContainer(f, pod.Name, pod.Spec.Containers[0].Name, "/bin/sh", "-ec", `if test -f "/sys/fs/cgroup/cgroup.controllers"; then echo "true"; else echo "false"; fi`)
return output == "true"
}
func expectNoSwap(f *framework.Framework, pod *v1.Pod, isCgroupV2 bool) {
if isCgroupV2 {
swapLimit := readCgroupFile(f, pod, cgroupV2SwapLimitFile)
gomega.ExpectWithOffset(1, swapLimit).To(gomega.Equal("0"), "max swap allowed should be zero")
} else {
swapPlusMemLimit := readCgroupFile(f, pod, cgroupV1SwapLimitFile)
memLimit := readCgroupFile(f, pod, cgroupV1MemLimitFile)
gomega.ExpectWithOffset(1, swapPlusMemLimit).ToNot(gomega.BeEmpty())
gomega.ExpectWithOffset(1, swapPlusMemLimit).To(gomega.Equal(memLimit))
}
}
func expectUnlimitedSwap(f *framework.Framework, pod *v1.Pod, isCgroupV2 bool) {
if isCgroupV2 {
swapLimit := readCgroupFile(f, pod, cgroupV2SwapLimitFile)
gomega.ExpectWithOffset(1, swapLimit).To(gomega.Equal("max"), "max swap allowed should be \"max\"")
} else {
swapPlusMemLimit := readCgroupFile(f, pod, cgroupV1SwapLimitFile)
gomega.ExpectWithOffset(1, swapPlusMemLimit).To(gomega.Equal("-1"))
}
}
// supports v2 only as v1 shouldn't support LimitedSwap
func expectLimitedSwap(f *framework.Framework, pod *v1.Pod, expectedSwapLimit int64) {
swapLimitStr := readCgroupFile(f, pod, cgroupV2SwapLimitFile)
swapLimit, err := strconv.Atoi(swapLimitStr)
framework.ExpectNoError(err, "cannot convert swap limit to int")
// cgroup values are always aligned w.r.t. the page size, which is usually 4Ki
const cgroupAlignment int64 = 4 * 1024 // 4Ki
const errMsg = "swap limitation is not as expected"
gomega.ExpectWithOffset(1, int64(swapLimit)).To(
gomega.Or(
gomega.BeNumerically(">=", expectedSwapLimit-cgroupAlignment),
gomega.BeNumerically("<=", expectedSwapLimit+cgroupAlignment),
),
errMsg,
)
}
func getSwapCapacity(f *framework.Framework, pod *v1.Pod) int64 {
output := e2epod.ExecCommandInContainer(f, pod.Name, pod.Spec.Containers[0].Name, "/bin/sh", "-ec", "free -b | grep Swap | xargs | cut -d\" \" -f2")
swapCapacity, err := strconv.Atoi(output)
framework.ExpectNoError(err, "cannot convert swap size to int")
ginkgo.By(fmt.Sprintf("providing swap capacity: %d", swapCapacity))
return int64(swapCapacity)
}
func getMemoryCapacity(f *framework.Framework, pod *v1.Pod) int64 {
nodes, err := f.ClientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
framework.ExpectNoError(err, "failed listing nodes")
for _, node := range nodes.Items {
if node.Name != pod.Spec.NodeName {
continue
}
memCapacity := node.Status.Capacity[v1.ResourceMemory]
return memCapacity.Value()
}
framework.ExpectNoError(fmt.Errorf("node %s wasn't found", pod.Spec.NodeName))
return 0
}
func calcSwapForBurstablePod(f *framework.Framework, pod *v1.Pod) int64 {
nodeMemoryCapacity := getMemoryCapacity(f, pod)
nodeSwapCapacity := getSwapCapacity(f, pod)
containerMemoryRequest := pod.Spec.Containers[0].Resources.Requests.Memory().Value()
containerMemoryProportion := float64(containerMemoryRequest) / float64(nodeMemoryCapacity)
swapAllocation := containerMemoryProportion * float64(nodeSwapCapacity)
ginkgo.By(fmt.Sprintf("Calculating swap for burstable pods: nodeMemoryCapacity: %d, nodeSwapCapacity: %d, containerMemoryRequest: %d, swapAllocation: %d",
nodeMemoryCapacity, nodeSwapCapacity, containerMemoryRequest, int64(swapAllocation)))
return int64(swapAllocation)
}
func isLimitedSwap(f *framework.Framework, pod *v1.Pod) bool {
kubeletCfg, err := getCurrentKubeletConfig(context.Background())
framework.ExpectNoError(err, "cannot get kubelet config")
return kubeletCfg.MemorySwap.SwapBehavior == types.LimitedSwap
}