Merge pull request #118764 from iholder101/Swap/burstableQoS-impl

Add full cgroup v2 swap support with automatically calculated swap limit for LimitedSwap and Burstable QoS Pods
2025-08-04 09:49:50 +00:00 · 2023-07-17 19:49:07 -07:00 · 2023-07-17 19:49:07 -07:00 · da2fdf8cc3
commit da2fdf8cc3
parent 1e086cccc8 4321d8c60f
10 changed files with 831 additions and 169 deletions
--- a/hack/local-up-cluster.sh
+++ b/hack/local-up-cluster.sh
@ -47,6 +47,8 @@ CGROUP_DRIVER=${CGROUP_DRIVER:-""}
 CGROUP_ROOT=${CGROUP_ROOT:-""}
 # owner of client certs, default to current user if not specified
 USER=${USER:-$(whoami)}
+# if true, limited swap is being used instead of unlimited swap (default)
+LIMITED_SWAP=${LIMITED_SWAP:-""}

 # required for cni installation
 CNI_CONFIG_DIR=${CNI_CONFIG_DIR:-/etc/cni/net.d}
@ -832,6 +834,13 @@ tracing:
 EOF
    fi

+    if [[ "$LIMITED_SWAP" == "true" ]]; then
+        cat <<EOF >> "${TMP_DIR}"/kubelet.yaml
+memorySwap:
+  swapBehavior: LimitedSwap
+EOF
+    fi
+
    {
      # authentication
      echo "authentication:"
--- a/pkg/features/kube_features.go
+++ b/pkg/features/kube_features.go
@ -596,8 +596,9 @@ const (
 	// Allow pods to failover to a different node in case of non graceful node shutdown
 	NodeOutOfServiceVolumeDetach featuregate.Feature = "NodeOutOfServiceVolumeDetach"

-	// owner: @ehashman
+	// owner: @iholder101
 	// alpha: v1.22
+	// beta1: v1.28. For more info, please look at the KEP: https://kep.k8s.io/2400.
 	//
 	// Permits kubelet to run with swap enabled
 	NodeSwap featuregate.Feature = "NodeSwap"
@ -1074,7 +1075,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

 	NodeOutOfServiceVolumeDetach: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.31

-	NodeSwap: {Default: false, PreRelease: featuregate.Alpha},
+	NodeSwap: {Default: false, PreRelease: featuregate.Beta},

 	PDBUnhealthyPodEvictionPolicy: {Default: true, PreRelease: featuregate.Beta},

--- a/pkg/kubelet/cm/cgroup_manager_linux.go
+++ b/pkg/kubelet/cm/cgroup_manager_linux.go
@ -45,11 +45,12 @@ import (
 const (
 	// systemdSuffix is the cgroup name suffix for systemd
 	systemdSuffix string = ".slice"
-	// MemoryMin is memory.min for cgroup v2
-	MemoryMin string = "memory.min"
-	// MemoryHigh is memory.high for cgroup v2
-	MemoryHigh         string = "memory.high"
-	Cgroup2MaxCpuLimit string = "max"
+	// Cgroup2MemoryMin is memory.min for cgroup v2
+	Cgroup2MemoryMin string = "memory.min"
+	// Cgroup2MemoryHigh is memory.high for cgroup v2
+	Cgroup2MemoryHigh      string = "memory.high"
+	Cgroup2MaxCpuLimit     string = "max"
+	Cgroup2MaxSwapFilename string = "memory.swap.max"
 )

 var RootCgroupName = CgroupName([]string{})
--- a/pkg/kubelet/cm/helpers_linux.go
+++ b/pkg/kubelet/cm/helpers_linux.go
@ -196,7 +196,7 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64,
 		}
 		if memoryMin > 0 {
 			result.Unified = map[string]string{
-				MemoryMin: strconv.FormatInt(memoryMin, 10),
+				Cgroup2MemoryMin: strconv.FormatInt(memoryMin, 10),
 			}
 		}
 	}
--- a/pkg/kubelet/cm/node_container_manager_linux.go
+++ b/pkg/kubelet/cm/node_container_manager_linux.go
@ -147,7 +147,7 @@ func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.
 			if rp.Unified == nil {
 				rp.Unified = make(map[string]string)
 			}
-			rp.Unified[MemoryMin] = strconv.FormatInt(*rp.Memory, 10)
+			rp.Unified[Cgroup2MemoryMin] = strconv.FormatInt(*rp.Memory, 10)
 		}
 	}

--- a/pkg/kubelet/cm/qos_container_manager_linux.go
+++ b/pkg/kubelet/cm/qos_container_manager_linux.go
@ -292,7 +292,7 @@ func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*Cgrou
 		if configs[v1.PodQOSBurstable].ResourceParameters.Unified == nil {
 			configs[v1.PodQOSBurstable].ResourceParameters.Unified = make(map[string]string)
 		}
-		configs[v1.PodQOSBurstable].ResourceParameters.Unified[MemoryMin] = strconv.FormatInt(burstableMin, 10)
+		configs[v1.PodQOSBurstable].ResourceParameters.Unified[Cgroup2MemoryMin] = strconv.FormatInt(burstableMin, 10)
 		klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSBurstable, "memoryMin", burstableMin)
 	}

@ -300,7 +300,7 @@ func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*Cgrou
 		if configs[v1.PodQOSGuaranteed].ResourceParameters.Unified == nil {
 			configs[v1.PodQOSGuaranteed].ResourceParameters.Unified = make(map[string]string)
 		}
-		configs[v1.PodQOSGuaranteed].ResourceParameters.Unified[MemoryMin] = strconv.FormatInt(guaranteedMin, 10)
+		configs[v1.PodQOSGuaranteed].ResourceParameters.Unified[Cgroup2MemoryMin] = strconv.FormatInt(guaranteedMin, 10)
 		klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSGuaranteed, "memoryMin", guaranteedMin)
 	}
 }
--- a/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go
@ -20,6 +20,9 @@ limitations under the License.
 package kuberuntime

 import (
+	"fmt"
+	cadvisorv1 "github.com/google/cadvisor/info/v1"
+	kubeapiqos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
 	"math"
 	"os"
 	"strconv"
@ -46,7 +49,7 @@ func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config
 	enforceMemoryQoS := false
 	// Set memory.min and memory.high if MemoryQoS enabled with cgroups v2
 	if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
-		libcontainercgroups.IsCgroup2UnifiedMode() {
+		isCgroup2UnifiedMode() {
 		enforceMemoryQoS = true
 	}
 	cl, err := m.generateLinuxContainerConfig(container, pod, uid, username, nsTarget, enforceMemoryQoS)
@ -99,21 +102,17 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod,

 	lcr.HugepageLimits = GetHugepageLimitsFromResources(container.Resources)

-	if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
+	if swapConfigurationHelper := newSwapConfigurationHelper(*m.machineInfo); utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
 		// NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec:
 		// https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory
 		switch m.memorySwapBehavior {
-		case kubelettypes.UnlimitedSwap:
-			// -1 = unlimited swap
-			lcr.MemorySwapLimitInBytes = -1
 		case kubelettypes.LimitedSwap:
-			fallthrough
+			swapConfigurationHelper.ConfigureLimitedSwap(lcr, pod, container)
 		default:
-			// memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit
-			// Some swapping is still possible.
-			// Note that if memory limit is 0, memory swap limit is ignored.
-			lcr.MemorySwapLimitInBytes = lcr.MemoryLimitInBytes
+			swapConfigurationHelper.ConfigureUnlimitedSwap(lcr)
 		}
+	} else {
+		swapConfigurationHelper.ConfigureNoSwap(lcr)
 	}

 	// Set memory.min and memory.high to enforce MemoryQoS
@ -122,7 +121,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod,
 		memoryRequest := container.Resources.Requests.Memory().Value()
 		memoryLimit := container.Resources.Limits.Memory().Value()
 		if memoryRequest != 0 {
-			unified[cm.MemoryMin] = strconv.FormatInt(memoryRequest, 10)
+			unified[cm.Cgroup2MemoryMin] = strconv.FormatInt(memoryRequest, 10)
 		}

 		// Guaranteed pods by their QoS definition requires that memory request equals memory limit and cpu request must equal cpu limit.
@ -148,7 +147,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod,
 				}
 			}
 			if memoryHigh != 0 && memoryHigh > memoryRequest {
-				unified[cm.MemoryHigh] = strconv.FormatInt(memoryHigh, 10)
+				unified[cm.Cgroup2MemoryHigh] = strconv.FormatInt(memoryHigh, 10)
 			}
 		}
 		if len(unified) > 0 {
@ -171,7 +170,7 @@ func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, cont
 	enforceMemoryQoS := false
 	// Set memory.min and memory.high if MemoryQoS enabled with cgroups v2
 	if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
-		libcontainercgroups.IsCgroup2UnifiedMode() {
+		isCgroup2UnifiedMode() {
 		enforceMemoryQoS = true
 	}
 	return &runtimeapi.ContainerResources{
@ -216,7 +215,7 @@ func (m *kubeGenericRuntimeManager) calculateLinuxResources(cpuRequest, cpuLimit
 	}

 	// runc requires cgroupv2 for unified mode
-	if libcontainercgroups.IsCgroup2UnifiedMode() {
+	if isCgroup2UnifiedMode() {
 		resources.Unified = map[string]string{
 			// Ask the kernel to kill all processes in the container cgroup in case of OOM.
 			// See memory.oom.group in https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html for
@ -298,3 +297,94 @@ func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *k
 	}
 	return cStatusResources
 }
+
+// Note: this function variable is being added here so it would be possible to mock
+// the cgroup version for unit tests by assigning a new mocked function into it. Without it,
+// the cgroup version would solely depend on the environment running the test.
+var isCgroup2UnifiedMode = func() bool {
+	return libcontainercgroups.IsCgroup2UnifiedMode()
+}
+
+type swapConfigurationHelper struct {
+	machineInfo cadvisorv1.MachineInfo
+}
+
+func newSwapConfigurationHelper(machineInfo cadvisorv1.MachineInfo) *swapConfigurationHelper {
+	return &swapConfigurationHelper{machineInfo: machineInfo}
+}
+
+func (m swapConfigurationHelper) ConfigureLimitedSwap(lcr *runtimeapi.LinuxContainerResources, pod *v1.Pod, container *v1.Container) {
+	podQos := kubeapiqos.GetPodQOS(pod)
+	containerDoesNotRequestMemory := container.Resources.Requests.Memory().IsZero() && container.Resources.Limits.Memory().IsZero()
+	memoryRequestEqualsToLimit := container.Resources.Requests.Memory().Cmp(*container.Resources.Limits.Memory()) == 0
+
+	if podQos != v1.PodQOSBurstable || containerDoesNotRequestMemory || !isCgroup2UnifiedMode() || memoryRequestEqualsToLimit {
+		m.ConfigureNoSwap(lcr)
+		return
+	}
+
+	containerMemoryRequest := container.Resources.Requests.Memory()
+	swapLimit, err := calcSwapForBurstablePods(containerMemoryRequest.Value(), int64(m.machineInfo.MemoryCapacity), int64(m.machineInfo.SwapCapacity))
+
+	if err != nil {
+		klog.ErrorS(err, "cannot calculate swap allocation amount; disallowing swap")
+		m.ConfigureNoSwap(lcr)
+		return
+	}
+
+	m.configureSwap(lcr, swapLimit)
+}
+
+func (m swapConfigurationHelper) ConfigureNoSwap(lcr *runtimeapi.LinuxContainerResources) {
+	if !isCgroup2UnifiedMode() {
+		// memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit
+		// Some swapping is still possible.
+		// Note that if memory limit is 0, memory swap limit is ignored.
+		lcr.MemorySwapLimitInBytes = lcr.MemoryLimitInBytes
+		return
+	}
+
+	m.configureSwap(lcr, 0)
+}
+
+func (m swapConfigurationHelper) ConfigureUnlimitedSwap(lcr *runtimeapi.LinuxContainerResources) {
+	if !isCgroup2UnifiedMode() {
+		m.ConfigureNoSwap(lcr)
+		return
+	}
+
+	if lcr.Unified == nil {
+		lcr.Unified = map[string]string{}
+	}
+
+	lcr.Unified[cm.Cgroup2MaxSwapFilename] = "max"
+}
+
+func (m swapConfigurationHelper) configureSwap(lcr *runtimeapi.LinuxContainerResources, swapMemory int64) {
+	if !isCgroup2UnifiedMode() {
+		klog.ErrorS(fmt.Errorf("swap configuration is not supported with cgroup v1"), "swap configuration under cgroup v1 is unexpected")
+		return
+	}
+
+	if lcr.Unified == nil {
+		lcr.Unified = map[string]string{}
+	}
+
+	lcr.Unified[cm.Cgroup2MaxSwapFilename] = fmt.Sprintf("%d", swapMemory)
+}
+
+// The swap limit is calculated as (<containerMemoryRequest>/<nodeTotalMemory>)*<totalPodsSwapAvailable>.
+// For more info, please look at the following KEP: https://kep.k8s.io/2400
+func calcSwapForBurstablePods(containerMemoryRequest, nodeTotalMemory, totalPodsSwapAvailable int64) (int64, error) {
+	if nodeTotalMemory <= 0 {
+		return 0, fmt.Errorf("total node memory is 0")
+	}
+	if containerMemoryRequest > nodeTotalMemory {
+		return 0, fmt.Errorf("container request %d is larger than total node memory %d", containerMemoryRequest, nodeTotalMemory)
+	}
+
+	containerMemoryProportion := float64(containerMemoryRequest) / float64(nodeTotalMemory)
+	swapAllocation := containerMemoryProportion * float64(totalPodsSwapAvailable)
+
+	return int64(swapAllocation), nil
+}
--- a/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go
@ -21,6 +21,9 @@ package kuberuntime

 import (
 	"context"
+	"fmt"
+	"k8s.io/kubernetes/pkg/kubelet/cm"
+	"k8s.io/kubernetes/pkg/kubelet/types"
 	"math"
 	"os"
 	"reflect"
@ -38,7 +41,6 @@ import (
 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
 	"k8s.io/kubernetes/pkg/features"
 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
-	kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
 )

 func makeExpectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIndex int, enforceMemoryQoS bool) *runtimeapi.ContainerConfig {
@ -244,11 +246,12 @@ func TestCalculateLinuxResources(t *testing.T) {
 	}

 	tests := []struct {
-		name     string
-		cpuReq   *resource.Quantity
-		cpuLim   *resource.Quantity
-		memLim   *resource.Quantity
-		expected *runtimeapi.LinuxContainerResources
+		name          string
+		cpuReq        *resource.Quantity
+		cpuLim        *resource.Quantity
+		memLim        *resource.Quantity
+		expected      *runtimeapi.LinuxContainerResources
+		cgroupVersion CgroupVersion
 	}{
 		{
 			name:   "Request128MBLimit256MB",
@ -261,6 +264,7 @@ func TestCalculateLinuxResources(t *testing.T) {
 				CpuShares:          1024,
 				MemoryLimitInBytes: 134217728,
 			},
+			cgroupVersion: cgroupV1,
 		},
 		{
 			name:   "RequestNoMemory",
@ -273,6 +277,7 @@ func TestCalculateLinuxResources(t *testing.T) {
 				CpuShares:          2048,
 				MemoryLimitInBytes: 0,
 			},
+			cgroupVersion: cgroupV1,
 		},
 		{
 			name:   "RequestNilCPU",
@ -284,6 +289,7 @@ func TestCalculateLinuxResources(t *testing.T) {
 				CpuShares:          2048,
 				MemoryLimitInBytes: 0,
 			},
+			cgroupVersion: cgroupV1,
 		},
 		{
 			name:   "RequestZeroCPU",
@ -296,9 +302,66 @@ func TestCalculateLinuxResources(t *testing.T) {
 				CpuShares:          2,
 				MemoryLimitInBytes: 0,
 			},
+			cgroupVersion: cgroupV1,
+		},
+		{
+			name:   "Request128MBLimit256MB",
+			cpuReq: generateResourceQuantity("1"),
+			cpuLim: generateResourceQuantity("2"),
+			memLim: generateResourceQuantity("128Mi"),
+			expected: &runtimeapi.LinuxContainerResources{
+				CpuPeriod:          100000,
+				CpuQuota:           200000,
+				CpuShares:          1024,
+				MemoryLimitInBytes: 134217728,
+				Unified:            map[string]string{"memory.oom.group": "1"},
+			},
+			cgroupVersion: cgroupV2,
+		},
+		{
+			name:   "RequestNoMemory",
+			cpuReq: generateResourceQuantity("2"),
+			cpuLim: generateResourceQuantity("8"),
+			memLim: generateResourceQuantity("0"),
+			expected: &runtimeapi.LinuxContainerResources{
+				CpuPeriod:          100000,
+				CpuQuota:           800000,
+				CpuShares:          2048,
+				MemoryLimitInBytes: 0,
+				Unified:            map[string]string{"memory.oom.group": "1"},
+			},
+			cgroupVersion: cgroupV2,
+		},
+		{
+			name:   "RequestNilCPU",
+			cpuLim: generateResourceQuantity("2"),
+			memLim: generateResourceQuantity("0"),
+			expected: &runtimeapi.LinuxContainerResources{
+				CpuPeriod:          100000,
+				CpuQuota:           200000,
+				CpuShares:          2048,
+				MemoryLimitInBytes: 0,
+				Unified:            map[string]string{"memory.oom.group": "1"},
+			},
+			cgroupVersion: cgroupV2,
+		},
+		{
+			name:   "RequestZeroCPU",
+			cpuReq: generateResourceQuantity("0"),
+			cpuLim: generateResourceQuantity("2"),
+			memLim: generateResourceQuantity("0"),
+			expected: &runtimeapi.LinuxContainerResources{
+				CpuPeriod:          100000,
+				CpuQuota:           200000,
+				CpuShares:          2,
+				MemoryLimitInBytes: 0,
+				Unified:            map[string]string{"memory.oom.group": "1"},
+			},
+			cgroupVersion: cgroupV2,
 		},
 	}
 	for _, test := range tests {
+		setCgroupVersionDuringTest(test.cgroupVersion)
 		linuxContainerResources := m.calculateLinuxResources(test.cpuReq, test.cpuLim, test.memLim)
 		assert.Equal(t, test.expected, linuxContainerResources)
 	}
@ -634,96 +697,6 @@ func TestGenerateLinuxContainerConfigNamespaces(t *testing.T) {
 	}
 }

-func TestGenerateLinuxContainerConfigSwap(t *testing.T) {
-	defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, true)()
-	_, _, m, err := createTestRuntimeManager()
-	if err != nil {
-		t.Fatalf("error creating test RuntimeManager: %v", err)
-	}
-	m.machineInfo.MemoryCapacity = 1000000
-	containerName := "test"
-
-	for _, tc := range []struct {
-		name        string
-		swapSetting string
-		pod         *v1.Pod
-		expected    int64
-	}{
-		{
-			name: "config unset, memory limit set",
-			// no swap setting
-			pod: &v1.Pod{
-				Spec: v1.PodSpec{
-					Containers: []v1.Container{{
-						Name: containerName,
-						Resources: v1.ResourceRequirements{
-							Limits: v1.ResourceList{
-								"memory": resource.MustParse("1000"),
-							},
-							Requests: v1.ResourceList{
-								"memory": resource.MustParse("1000"),
-							},
-						},
-					}},
-				},
-			},
-			expected: 1000,
-		},
-		{
-			name: "config unset, no memory limit",
-			// no swap setting
-			pod: &v1.Pod{
-				Spec: v1.PodSpec{
-					Containers: []v1.Container{
-						{Name: containerName},
-					},
-				},
-			},
-			expected: 0,
-		},
-		{
-			// Note: behaviour will be the same as previous two cases
-			name:        "config set to LimitedSwap, memory limit set",
-			swapSetting: kubelettypes.LimitedSwap,
-			pod: &v1.Pod{
-				Spec: v1.PodSpec{
-					Containers: []v1.Container{{
-						Name: containerName,
-						Resources: v1.ResourceRequirements{
-							Limits: v1.ResourceList{
-								"memory": resource.MustParse("1000"),
-							},
-							Requests: v1.ResourceList{
-								"memory": resource.MustParse("1000"),
-							},
-						},
-					}},
-				},
-			},
-			expected: 1000,
-		},
-		{
-			name:        "UnlimitedSwap enabled",
-			swapSetting: kubelettypes.UnlimitedSwap,
-			pod: &v1.Pod{
-				Spec: v1.PodSpec{
-					Containers: []v1.Container{
-						{Name: containerName},
-					},
-				},
-			},
-			expected: -1,
-		},
-	} {
-		t.Run(tc.name, func(t *testing.T) {
-			m.memorySwapBehavior = tc.swapSetting
-			actual, err := m.generateLinuxContainerConfig(&tc.pod.Spec.Containers[0], tc.pod, nil, "", nil, false)
-			assert.NoError(t, err)
-			assert.Equal(t, tc.expected, actual.Resources.MemorySwapLimitInBytes, "memory swap config for %s", tc.name)
-		})
-	}
-}
-
 func TestGenerateLinuxContainerResources(t *testing.T) {
 	_, _, m, err := createTestRuntimeManager()
 	assert.NoError(t, err)
@ -875,6 +848,10 @@ func TestGenerateLinuxContainerResources(t *testing.T) {
 			if tc.scalingFg {
 				defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
 			}
+
+			setCgroupVersionDuringTest(cgroupV1)
+			tc.expected.MemorySwapLimitInBytes = tc.expected.MemoryLimitInBytes
+
 			pod.Spec.Containers[0].Resources = v1.ResourceRequirements{Limits: tc.limits, Requests: tc.requests}
 			if len(tc.cStatus) > 0 {
 				pod.Status.ContainerStatuses = tc.cStatus
@ -888,3 +865,289 @@ func TestGenerateLinuxContainerResources(t *testing.T) {
 	}
 	//TODO(vinaykul,InPlacePodVerticalScaling): Add unit tests for cgroup v1 & v2
 }
+
+func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
+	_, _, m, err := createTestRuntimeManager()
+	assert.NoError(t, err)
+	m.machineInfo.MemoryCapacity = 42949672960 // 40Gb == 40 * 1024^3
+	m.machineInfo.SwapCapacity = 5368709120    // 5Gb == 5 * 1024^3
+
+	pod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			UID:       "12345678",
+			Name:      "foo",
+			Namespace: "bar",
+		},
+		Spec: v1.PodSpec{
+			Containers: []v1.Container{
+				{
+					Name: "c1",
+				},
+				{
+					Name: "c2",
+				},
+			},
+		},
+		Status: v1.PodStatus{},
+	}
+
+	expectNoSwap := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) {
+		const msg = "container is expected to not have swap access"
+
+		for _, r := range resources {
+			switch cgroupVersion {
+			case cgroupV1:
+				assert.Equal(t, r.MemoryLimitInBytes, r.MemorySwapLimitInBytes, msg)
+			case cgroupV2:
+				assert.Equal(t, "0", r.Unified[cm.Cgroup2MaxSwapFilename], msg)
+			}
+		}
+	}
+
+	expectUnlimitedSwap := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) {
+		const msg = "container is expected to have unlimited swap access"
+
+		for _, r := range resources {
+			switch cgroupVersion {
+			case cgroupV1:
+				assert.Equal(t, int64(-1), r.MemorySwapLimitInBytes, msg)
+			case cgroupV2:
+				assert.Equal(t, "max", r.Unified[cm.Cgroup2MaxSwapFilename], msg)
+			}
+		}
+	}
+
+	expectSwap := func(cgroupVersion CgroupVersion, swapBytesExpected int64, resources *runtimeapi.LinuxContainerResources) {
+		msg := fmt.Sprintf("container swap is expected to be limited by %d bytes", swapBytesExpected)
+
+		switch cgroupVersion {
+		case cgroupV1:
+			assert.Equal(t, resources.MemoryLimitInBytes+swapBytesExpected, resources.MemorySwapLimitInBytes, msg)
+		case cgroupV2:
+			assert.Equal(t, fmt.Sprintf("%d", swapBytesExpected), resources.Unified[cm.Cgroup2MaxSwapFilename], msg)
+		}
+	}
+
+	calcSwapForBurstablePods := func(containerMemoryRequest int64) int64 {
+		swapSize, err := calcSwapForBurstablePods(containerMemoryRequest, int64(m.machineInfo.MemoryCapacity), int64(m.machineInfo.SwapCapacity))
+		assert.NoError(t, err)
+
+		return swapSize
+	}
+
+	for _, tc := range []struct {
+		name                        string
+		cgroupVersion               CgroupVersion
+		qosClass                    v1.PodQOSClass
+		nodeSwapFeatureGateEnabled  bool
+		swapBehavior                string
+		addContainerWithoutRequests bool
+		addGuaranteedContainer      bool
+	}{
+		// With cgroup v1
+		{
+			name:                       "cgroups v1, LimitedSwap, Burstable QoS",
+			cgroupVersion:              cgroupV1,
+			qosClass:                   v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled: true,
+			swapBehavior:               types.LimitedSwap,
+		},
+		{
+			name:                       "cgroups v1, UnlimitedSwap, Burstable QoS",
+			cgroupVersion:              cgroupV1,
+			qosClass:                   v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled: true,
+			swapBehavior:               types.UnlimitedSwap,
+		},
+		{
+			name:                       "cgroups v1, LimitedSwap, Best-effort QoS",
+			cgroupVersion:              cgroupV1,
+			qosClass:                   v1.PodQOSBestEffort,
+			nodeSwapFeatureGateEnabled: true,
+			swapBehavior:               types.LimitedSwap,
+		},
+
+		// With feature gate turned off
+		{
+			name:                       "NodeSwap feature gate turned off, cgroups v2, LimitedSwap",
+			cgroupVersion:              cgroupV2,
+			qosClass:                   v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled: false,
+			swapBehavior:               types.LimitedSwap,
+		},
+		{
+			name:                       "NodeSwap feature gate turned off, cgroups v2, UnlimitedSwap",
+			cgroupVersion:              cgroupV2,
+			qosClass:                   v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled: false,
+			swapBehavior:               types.UnlimitedSwap,
+		},
+
+		// With no swapBehavior, UnlimitedSwap should be the default
+		{
+			name:                       "With no swapBehavior - UnlimitedSwap should be the default",
+			cgroupVersion:              cgroupV2,
+			qosClass:                   v1.PodQOSBestEffort,
+			nodeSwapFeatureGateEnabled: true,
+			swapBehavior:               "",
+		},
+
+		// With Guaranteed and Best-effort QoS
+		{
+			name:                       "Best-effort Qos, cgroups v2, LimitedSwap",
+			cgroupVersion:              cgroupV2,
+			qosClass:                   v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled: true,
+			swapBehavior:               types.LimitedSwap,
+		},
+		{
+			name:                       "Best-effort Qos, cgroups v2, UnlimitedSwap",
+			cgroupVersion:              cgroupV2,
+			qosClass:                   v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled: true,
+			swapBehavior:               types.UnlimitedSwap,
+		},
+		{
+			name:                       "Guaranteed Qos, cgroups v2, LimitedSwap",
+			cgroupVersion:              cgroupV2,
+			qosClass:                   v1.PodQOSGuaranteed,
+			nodeSwapFeatureGateEnabled: true,
+			swapBehavior:               types.LimitedSwap,
+		},
+		{
+			name:                       "Guaranteed Qos, cgroups v2, UnlimitedSwap",
+			cgroupVersion:              cgroupV2,
+			qosClass:                   v1.PodQOSGuaranteed,
+			nodeSwapFeatureGateEnabled: true,
+			swapBehavior:               types.UnlimitedSwap,
+		},
+
+		// With a "guaranteed" container (when memory requests equal to limits)
+		{
+			name:                        "Burstable Qos, cgroups v2, LimitedSwap, with a guaranteed container",
+			cgroupVersion:               cgroupV2,
+			qosClass:                    v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled:  true,
+			swapBehavior:                types.LimitedSwap,
+			addContainerWithoutRequests: false,
+			addGuaranteedContainer:      true,
+		},
+		{
+			name:                        "Burstable Qos, cgroups v2, UnlimitedSwap, with a guaranteed container",
+			cgroupVersion:               cgroupV2,
+			qosClass:                    v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled:  true,
+			swapBehavior:                types.UnlimitedSwap,
+			addContainerWithoutRequests: false,
+			addGuaranteedContainer:      true,
+		},
+
+		// Swap is expected to be allocated
+		{
+			name:                        "Burstable Qos, cgroups v2, LimitedSwap",
+			cgroupVersion:               cgroupV2,
+			qosClass:                    v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled:  true,
+			swapBehavior:                types.LimitedSwap,
+			addContainerWithoutRequests: false,
+			addGuaranteedContainer:      false,
+		},
+		{
+			name:                        "Burstable Qos, cgroups v2, UnlimitedSwap",
+			cgroupVersion:               cgroupV2,
+			qosClass:                    v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled:  true,
+			swapBehavior:                types.UnlimitedSwap,
+			addContainerWithoutRequests: false,
+			addGuaranteedContainer:      false,
+		},
+		{
+			name:                        "Burstable Qos, cgroups v2, LimitedSwap, with a container with no requests",
+			cgroupVersion:               cgroupV2,
+			qosClass:                    v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled:  true,
+			swapBehavior:                types.LimitedSwap,
+			addContainerWithoutRequests: true,
+			addGuaranteedContainer:      false,
+		},
+		{
+			name:                        "Burstable Qos, cgroups v2, UnlimitedSwap, with a container with no requests",
+			cgroupVersion:               cgroupV2,
+			qosClass:                    v1.PodQOSBurstable,
+			nodeSwapFeatureGateEnabled:  true,
+			swapBehavior:                types.UnlimitedSwap,
+			addContainerWithoutRequests: true,
+			addGuaranteedContainer:      false,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			setCgroupVersionDuringTest(tc.cgroupVersion)
+			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tc.nodeSwapFeatureGateEnabled)()
+			m.memorySwapBehavior = tc.swapBehavior
+
+			var resourceReqsC1, resourceReqsC2 v1.ResourceRequirements
+			switch tc.qosClass {
+			case v1.PodQOSBurstable:
+				resourceReqsC1 = v1.ResourceRequirements{
+					Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
+				}
+
+				if !tc.addContainerWithoutRequests {
+					resourceReqsC2 = v1.ResourceRequirements{
+						Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")},
+					}
+
+					if tc.addGuaranteedContainer {
+						resourceReqsC2.Limits = v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")}
+					}
+				}
+			case v1.PodQOSGuaranteed:
+				resourceReqsC1 = v1.ResourceRequirements{
+					Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi"), v1.ResourceCPU: resource.MustParse("1")},
+					Limits:   v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi"), v1.ResourceCPU: resource.MustParse("1")},
+				}
+				resourceReqsC2 = v1.ResourceRequirements{
+					Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi"), v1.ResourceCPU: resource.MustParse("1")},
+					Limits:   v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi"), v1.ResourceCPU: resource.MustParse("1")},
+				}
+			}
+			pod.Spec.Containers[0].Resources = resourceReqsC1
+			pod.Spec.Containers[1].Resources = resourceReqsC2
+
+			resourcesC1 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false)
+			resourcesC2 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[1], false)
+
+			if !tc.nodeSwapFeatureGateEnabled || tc.cgroupVersion == cgroupV1 || (tc.swapBehavior == types.LimitedSwap && tc.qosClass != v1.PodQOSBurstable) {
+				expectNoSwap(tc.cgroupVersion, resourcesC1, resourcesC2)
+				return
+			}
+
+			if tc.swapBehavior == types.UnlimitedSwap || tc.swapBehavior == "" {
+				expectUnlimitedSwap(tc.cgroupVersion, resourcesC1, resourcesC2)
+				return
+			}
+
+			c1ExpectedSwap := calcSwapForBurstablePods(resourceReqsC1.Requests.Memory().Value())
+			c2ExpectedSwap := int64(0)
+			if !tc.addContainerWithoutRequests && !tc.addGuaranteedContainer {
+				c2ExpectedSwap = calcSwapForBurstablePods(resourceReqsC2.Requests.Memory().Value())
+			}
+
+			expectSwap(tc.cgroupVersion, c1ExpectedSwap, resourcesC1)
+			expectSwap(tc.cgroupVersion, c2ExpectedSwap, resourcesC2)
+		})
+	}
+}
+
+type CgroupVersion string
+
+const (
+	cgroupV1 CgroupVersion = "v1"
+	cgroupV2 CgroupVersion = "v2"
+)
+
+func setCgroupVersionDuringTest(version CgroupVersion) {
+	isCgroup2UnifiedMode = func() bool {
+		return version == cgroupV2
+	}
+}
--- a/pkg/kubelet/kuberuntime/kuberuntime_sandbox_linux_test.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_sandbox_linux_test.go
@ -38,6 +38,59 @@ func TestApplySandboxResources(t *testing.T) {
 		Linux: &runtimeapi.LinuxPodSandboxConfig{},
 	}

+	getPodWithOverhead := func() *v1.Pod {
+		return &v1.Pod{
+			ObjectMeta: metav1.ObjectMeta{
+				UID:       "12345678",
+				Name:      "bar",
+				Namespace: "new",
+			},
+			Spec: v1.PodSpec{
+				Containers: []v1.Container{
+					{
+						Resources: v1.ResourceRequirements{
+							Requests: v1.ResourceList{
+								v1.ResourceMemory: resource.MustParse("128Mi"),
+								v1.ResourceCPU:    resource.MustParse("2"),
+							},
+							Limits: v1.ResourceList{
+								v1.ResourceMemory: resource.MustParse("256Mi"),
+								v1.ResourceCPU:    resource.MustParse("4"),
+							},
+						},
+					},
+				},
+				Overhead: v1.ResourceList{
+					v1.ResourceMemory: resource.MustParse("128Mi"),
+					v1.ResourceCPU:    resource.MustParse("1"),
+				},
+			},
+		}
+	}
+	getPodWithoutOverhead := func() *v1.Pod {
+		return &v1.Pod{
+			ObjectMeta: metav1.ObjectMeta{
+				UID:       "12345678",
+				Name:      "bar",
+				Namespace: "new",
+			},
+			Spec: v1.PodSpec{
+				Containers: []v1.Container{
+					{
+						Resources: v1.ResourceRequirements{
+							Requests: v1.ResourceList{
+								v1.ResourceMemory: resource.MustParse("128Mi"),
+							},
+							Limits: v1.ResourceList{
+								v1.ResourceMemory: resource.MustParse("256Mi"),
+							},
+						},
+					},
+				},
+			},
+		}
+	}
+
 	require.NoError(t, err)

 	tests := []struct {
@ -45,36 +98,11 @@ func TestApplySandboxResources(t *testing.T) {
 		pod              *v1.Pod
 		expectedResource *runtimeapi.LinuxContainerResources
 		expectedOverhead *runtimeapi.LinuxContainerResources
+		cgroupVersion    CgroupVersion
 	}{
 		{
 			description: "pod with overhead defined",
-			pod: &v1.Pod{
-				ObjectMeta: metav1.ObjectMeta{
-					UID:       "12345678",
-					Name:      "bar",
-					Namespace: "new",
-				},
-				Spec: v1.PodSpec{
-					Containers: []v1.Container{
-						{
-							Resources: v1.ResourceRequirements{
-								Requests: v1.ResourceList{
-									v1.ResourceMemory: resource.MustParse("128Mi"),
-									v1.ResourceCPU:    resource.MustParse("2"),
-								},
-								Limits: v1.ResourceList{
-									v1.ResourceMemory: resource.MustParse("256Mi"),
-									v1.ResourceCPU:    resource.MustParse("4"),
-								},
-							},
-						},
-					},
-					Overhead: v1.ResourceList{
-						v1.ResourceMemory: resource.MustParse("128Mi"),
-						v1.ResourceCPU:    resource.MustParse("1"),
-					},
-				},
-			},
+			pod:         getPodWithOverhead(),
 			expectedResource: &runtimeapi.LinuxContainerResources{
 				MemoryLimitInBytes: 268435456,
 				CpuPeriod:          100000,
@ -87,30 +115,11 @@ func TestApplySandboxResources(t *testing.T) {
 				CpuQuota:           100000,
 				CpuShares:          1024,
 			},
+			cgroupVersion: cgroupV1,
 		},
 		{
 			description: "pod without overhead defined",
-			pod: &v1.Pod{
-				ObjectMeta: metav1.ObjectMeta{
-					UID:       "12345678",
-					Name:      "bar",
-					Namespace: "new",
-				},
-				Spec: v1.PodSpec{
-					Containers: []v1.Container{
-						{
-							Resources: v1.ResourceRequirements{
-								Requests: v1.ResourceList{
-									v1.ResourceMemory: resource.MustParse("128Mi"),
-								},
-								Limits: v1.ResourceList{
-									v1.ResourceMemory: resource.MustParse("256Mi"),
-								},
-							},
-						},
-					},
-				},
-			},
+			pod:         getPodWithoutOverhead(),
 			expectedResource: &runtimeapi.LinuxContainerResources{
 				MemoryLimitInBytes: 268435456,
 				CpuPeriod:          100000,
@ -118,10 +127,45 @@ func TestApplySandboxResources(t *testing.T) {
 				CpuShares:          2,
 			},
 			expectedOverhead: &runtimeapi.LinuxContainerResources{},
+			cgroupVersion:    cgroupV1,
+		},
+		{
+			description: "pod with overhead defined",
+			pod:         getPodWithOverhead(),
+			expectedResource: &runtimeapi.LinuxContainerResources{
+				MemoryLimitInBytes: 268435456,
+				CpuPeriod:          100000,
+				CpuQuota:           400000,
+				CpuShares:          2048,
+				Unified:            map[string]string{"memory.oom.group": "1"},
+			},
+			expectedOverhead: &runtimeapi.LinuxContainerResources{
+				MemoryLimitInBytes: 134217728,
+				CpuPeriod:          100000,
+				CpuQuota:           100000,
+				CpuShares:          1024,
+				Unified:            map[string]string{"memory.oom.group": "1"},
+			},
+			cgroupVersion: cgroupV2,
+		},
+		{
+			description: "pod without overhead defined",
+			pod:         getPodWithoutOverhead(),
+			expectedResource: &runtimeapi.LinuxContainerResources{
+				MemoryLimitInBytes: 268435456,
+				CpuPeriod:          100000,
+				CpuQuota:           0,
+				CpuShares:          2,
+				Unified:            map[string]string{"memory.oom.group": "1"},
+			},
+			expectedOverhead: &runtimeapi.LinuxContainerResources{},
+			cgroupVersion:    cgroupV2,
 		},
 	}

 	for i, test := range tests {
+		setCgroupVersionDuringTest(test.cgroupVersion)
+
 		m.applySandboxResources(test.pod, config)
 		assert.Equal(t, test.expectedResource, config.Linux.Resources, "TestCase[%d]: %s", i, test.description)
 		assert.Equal(t, test.expectedOverhead, config.Linux.Overhead, "TestCase[%d]: %s", i, test.description)
--- a/test/e2e_node/swap_test.go
+++ b/test/e2e_node/swap_test.go
@ -0,0 +1,254 @@
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2enode
+
+import (
+	"context"
+	"fmt"
+	"github.com/onsi/ginkgo/v2"
+	"github.com/onsi/gomega"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/rand"
+	utilfeature "k8s.io/apiserver/pkg/util/feature"
+	"k8s.io/kubernetes/pkg/features"
+	"k8s.io/kubernetes/pkg/kubelet/types"
+	"k8s.io/kubernetes/test/e2e/framework"
+	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
+	testutils "k8s.io/kubernetes/test/utils"
+	admissionapi "k8s.io/pod-security-admission/api"
+	"path/filepath"
+	"strconv"
+)
+
+const (
+	cgroupBasePath        = "/sys/fs/cgroup/"
+	cgroupV1SwapLimitFile = "/memory/memory.memsw.limit_in_bytes"
+	cgroupV2SwapLimitFile = "memory.swap.max"
+	cgroupV1MemLimitFile  = "/memory/memory.limit_in_bytes"
+)
+
+var _ = SIGDescribe("Swap [NodeConformance][LinuxOnly]", func() {
+	f := framework.NewDefaultFramework("swap-test")
+	f.NamespacePodSecurityEnforceLevel = admissionapi.LevelBaseline
+
+	ginkgo.DescribeTable("with configuration", func(qosClass v1.PodQOSClass, memoryRequestEqualLimit bool) {
+		ginkgo.By(fmt.Sprintf("Creating a pod of QOS class %s. memoryRequestEqualLimit: %t", qosClass, memoryRequestEqualLimit))
+		pod := getSwapTestPod(f, qosClass, memoryRequestEqualLimit)
+		pod = runPodAndWaitUntilScheduled(f, pod)
+
+		isCgroupV2 := isPodCgroupV2(f, pod)
+		isLimitedSwap := isLimitedSwap(f, pod)
+
+		if !isSwapFeatureGateEnabled() || !isCgroupV2 || (isLimitedSwap && (qosClass != v1.PodQOSBurstable || memoryRequestEqualLimit)) {
+			ginkgo.By(fmt.Sprintf("Expecting no swap. feature gate on? %t isCgroupV2? %t is QoS burstable? %t", isSwapFeatureGateEnabled(), isCgroupV2, qosClass == v1.PodQOSBurstable))
+			expectNoSwap(f, pod, isCgroupV2)
+			return
+		}
+
+		if !isLimitedSwap {
+			ginkgo.By("expecting unlimited swap")
+			expectUnlimitedSwap(f, pod, isCgroupV2)
+			return
+		}
+
+		ginkgo.By("expecting limited swap")
+		expectedSwapLimit := calcSwapForBurstablePod(f, pod)
+		expectLimitedSwap(f, pod, expectedSwapLimit)
+	},
+		ginkgo.Entry("QOS Best-effort", v1.PodQOSBestEffort, false),
+		ginkgo.Entry("QOS Burstable", v1.PodQOSBurstable, false),
+		ginkgo.Entry("QOS Burstable with memory request equals to limit", v1.PodQOSBurstable, true),
+		ginkgo.Entry("QOS Guaranteed", v1.PodQOSGuaranteed, false),
+	)
+})
+
+// Note that memoryRequestEqualLimit is effective only when qosClass is PodQOSBestEffort.
+func getSwapTestPod(f *framework.Framework, qosClass v1.PodQOSClass, memoryRequestEqualLimit bool) *v1.Pod {
+	podMemoryAmount := resource.MustParse("128Mi")
+
+	var resources v1.ResourceRequirements
+	switch qosClass {
+	case v1.PodQOSBestEffort:
+		// nothing to do in this case
+	case v1.PodQOSBurstable:
+		resources = v1.ResourceRequirements{
+			Requests: v1.ResourceList{
+				v1.ResourceMemory: podMemoryAmount,
+			},
+		}
+
+		if memoryRequestEqualLimit {
+			resources.Limits = resources.Requests
+		}
+	case v1.PodQOSGuaranteed:
+		resources = v1.ResourceRequirements{
+			Limits: v1.ResourceList{
+				v1.ResourceCPU:    resource.MustParse("200m"),
+				v1.ResourceMemory: podMemoryAmount,
+			},
+		}
+		resources.Requests = resources.Limits
+	}
+
+	pod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-pod-swap-" + rand.String(5),
+			Namespace: f.Namespace.Name,
+		},
+		Spec: v1.PodSpec{
+			RestartPolicy: v1.RestartPolicyAlways,
+			Containers: []v1.Container{
+				{
+					Name:      "busybox-container",
+					Image:     busyboxImage,
+					Command:   []string{"sleep", "600"},
+					Resources: resources,
+				},
+			},
+		},
+	}
+
+	return pod
+}
+
+func runPodAndWaitUntilScheduled(f *framework.Framework, pod *v1.Pod) *v1.Pod {
+	ginkgo.By("running swap test pod")
+	podClient := e2epod.NewPodClient(f)
+
+	pod = podClient.CreateSync(context.Background(), pod)
+	pod, err := podClient.Get(context.Background(), pod.Name, metav1.GetOptions{})
+
+	framework.ExpectNoError(err)
+	isReady, err := testutils.PodRunningReady(pod)
+	framework.ExpectNoError(err)
+	gomega.ExpectWithOffset(1, isReady).To(gomega.BeTrue(), "pod should be ready")
+
+	return pod
+}
+
+func isSwapFeatureGateEnabled() bool {
+	ginkgo.By("figuring if NodeSwap feature gate is turned on")
+	return utilfeature.DefaultFeatureGate.Enabled(features.NodeSwap)
+}
+
+func readCgroupFile(f *framework.Framework, pod *v1.Pod, filename string) string {
+	filePath := filepath.Join(cgroupBasePath, filename)
+
+	ginkgo.By("reading cgroup file " + filePath)
+	output := e2epod.ExecCommandInContainer(f, pod.Name, pod.Spec.Containers[0].Name, "/bin/sh", "-ec", "cat "+filePath)
+
+	return output
+}
+
+func isPodCgroupV2(f *framework.Framework, pod *v1.Pod) bool {
+	ginkgo.By("figuring is test pod runs cgroup v2")
+	output := e2epod.ExecCommandInContainer(f, pod.Name, pod.Spec.Containers[0].Name, "/bin/sh", "-ec", `if test -f "/sys/fs/cgroup/cgroup.controllers"; then echo "true"; else echo "false"; fi`)
+
+	return output == "true"
+}
+
+func expectNoSwap(f *framework.Framework, pod *v1.Pod, isCgroupV2 bool) {
+	if isCgroupV2 {
+		swapLimit := readCgroupFile(f, pod, cgroupV2SwapLimitFile)
+		gomega.ExpectWithOffset(1, swapLimit).To(gomega.Equal("0"), "max swap allowed should be zero")
+	} else {
+		swapPlusMemLimit := readCgroupFile(f, pod, cgroupV1SwapLimitFile)
+		memLimit := readCgroupFile(f, pod, cgroupV1MemLimitFile)
+		gomega.ExpectWithOffset(1, swapPlusMemLimit).ToNot(gomega.BeEmpty())
+		gomega.ExpectWithOffset(1, swapPlusMemLimit).To(gomega.Equal(memLimit))
+	}
+}
+
+func expectUnlimitedSwap(f *framework.Framework, pod *v1.Pod, isCgroupV2 bool) {
+	if isCgroupV2 {
+		swapLimit := readCgroupFile(f, pod, cgroupV2SwapLimitFile)
+		gomega.ExpectWithOffset(1, swapLimit).To(gomega.Equal("max"), "max swap allowed should be \"max\"")
+	} else {
+		swapPlusMemLimit := readCgroupFile(f, pod, cgroupV1SwapLimitFile)
+		gomega.ExpectWithOffset(1, swapPlusMemLimit).To(gomega.Equal("-1"))
+	}
+}
+
+// supports v2 only as v1 shouldn't support LimitedSwap
+func expectLimitedSwap(f *framework.Framework, pod *v1.Pod, expectedSwapLimit int64) {
+	swapLimitStr := readCgroupFile(f, pod, cgroupV2SwapLimitFile)
+
+	swapLimit, err := strconv.Atoi(swapLimitStr)
+	framework.ExpectNoError(err, "cannot convert swap limit to int")
+
+	// cgroup values are always aligned w.r.t. the page size, which is usually 4Ki
+	const cgroupAlignment int64 = 4 * 1024 // 4Ki
+	const errMsg = "swap limitation is not as expected"
+
+	gomega.ExpectWithOffset(1, int64(swapLimit)).To(
+		gomega.Or(
+			gomega.BeNumerically(">=", expectedSwapLimit-cgroupAlignment),
+			gomega.BeNumerically("<=", expectedSwapLimit+cgroupAlignment),
+		),
+		errMsg,
+	)
+}
+
+func getSwapCapacity(f *framework.Framework, pod *v1.Pod) int64 {
+	output := e2epod.ExecCommandInContainer(f, pod.Name, pod.Spec.Containers[0].Name, "/bin/sh", "-ec", "free -b | grep Swap | xargs | cut -d\" \" -f2")
+
+	swapCapacity, err := strconv.Atoi(output)
+	framework.ExpectNoError(err, "cannot convert swap size to int")
+
+	ginkgo.By(fmt.Sprintf("providing swap capacity: %d", swapCapacity))
+
+	return int64(swapCapacity)
+}
+
+func getMemoryCapacity(f *framework.Framework, pod *v1.Pod) int64 {
+	nodes, err := f.ClientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
+	framework.ExpectNoError(err, "failed listing nodes")
+
+	for _, node := range nodes.Items {
+		if node.Name != pod.Spec.NodeName {
+			continue
+		}
+
+		memCapacity := node.Status.Capacity[v1.ResourceMemory]
+		return memCapacity.Value()
+	}
+
+	framework.ExpectNoError(fmt.Errorf("node %s wasn't found", pod.Spec.NodeName))
+	return 0
+}
+
+func calcSwapForBurstablePod(f *framework.Framework, pod *v1.Pod) int64 {
+	nodeMemoryCapacity := getMemoryCapacity(f, pod)
+	nodeSwapCapacity := getSwapCapacity(f, pod)
+	containerMemoryRequest := pod.Spec.Containers[0].Resources.Requests.Memory().Value()
+
+	containerMemoryProportion := float64(containerMemoryRequest) / float64(nodeMemoryCapacity)
+	swapAllocation := containerMemoryProportion * float64(nodeSwapCapacity)
+	ginkgo.By(fmt.Sprintf("Calculating swap for burstable pods: nodeMemoryCapacity: %d, nodeSwapCapacity: %d, containerMemoryRequest: %d, swapAllocation: %d",
+		nodeMemoryCapacity, nodeSwapCapacity, containerMemoryRequest, int64(swapAllocation)))
+
+	return int64(swapAllocation)
+}
+
+func isLimitedSwap(f *framework.Framework, pod *v1.Pod) bool {
+	kubeletCfg, err := getCurrentKubeletConfig(context.Background())
+	framework.ExpectNoError(err, "cannot get kubelet config")
+
+	return kubeletCfg.MemorySwap.SwapBehavior == types.LimitedSwap
+}