mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-07 03:03:59 +00:00
Only configure swap if available on node
This change bypasses all logic to set swap in the linux container resources if a swap controller is not available on node. Failing to do so may cause errors in runc when starting a container with a swap configuration -- even if this is set to 0. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
d3d1827c05
commit
394bcaf182
@ -107,18 +107,8 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod,
|
||||
|
||||
lcr.HugepageLimits = GetHugepageLimitsFromResources(container.Resources)
|
||||
|
||||
if swapConfigurationHelper := newSwapConfigurationHelper(*m.machineInfo); utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
|
||||
// NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec:
|
||||
// https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory
|
||||
switch m.memorySwapBehavior {
|
||||
case kubelettypes.LimitedSwap:
|
||||
swapConfigurationHelper.ConfigureLimitedSwap(lcr, pod, container)
|
||||
default:
|
||||
swapConfigurationHelper.ConfigureUnlimitedSwap(lcr)
|
||||
}
|
||||
} else {
|
||||
swapConfigurationHelper.ConfigureNoSwap(lcr)
|
||||
}
|
||||
// Configure swap for the container
|
||||
m.configureContainerSwapResources(lcr, pod, container)
|
||||
|
||||
// Set memory.min and memory.high to enforce MemoryQoS
|
||||
if enforceMemoryQoS {
|
||||
@ -170,6 +160,30 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod,
|
||||
return lcr
|
||||
}
|
||||
|
||||
// configureContainerSwapResources configures the swap resources for a specified (linux) container.
|
||||
// Swap is only configured if a swap cgroup controller is available and the NodeSwap feature gate is enabled.
|
||||
func (m *kubeGenericRuntimeManager) configureContainerSwapResources(lcr *runtimeapi.LinuxContainerResources, pod *v1.Pod, container *v1.Container) {
|
||||
if !swapControllerAvailable() {
|
||||
klog.InfoS("No swap cgroup controller present", "swapBehavior", m.memorySwapBehavior, "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return
|
||||
}
|
||||
swapConfigurationHelper := newSwapConfigurationHelper(*m.machineInfo)
|
||||
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
|
||||
swapConfigurationHelper.ConfigureNoSwap(lcr)
|
||||
return
|
||||
}
|
||||
|
||||
// NOTE(ehashman): Behavior is defined in the opencontainers runtime spec:
|
||||
// https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory
|
||||
switch m.memorySwapBehavior {
|
||||
case kubelettypes.LimitedSwap:
|
||||
swapConfigurationHelper.ConfigureLimitedSwap(lcr, pod, container)
|
||||
default:
|
||||
swapConfigurationHelper.ConfigureUnlimitedSwap(lcr)
|
||||
}
|
||||
}
|
||||
|
||||
// generateContainerResources generates platform specific (linux) container resources config for runtime
|
||||
func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources {
|
||||
enforceMemoryQoS := false
|
||||
@ -315,7 +329,10 @@ var (
|
||||
swapControllerAvailabilityOnce sync.Once
|
||||
)
|
||||
|
||||
func swapControllerAvailable() bool {
|
||||
// Note: this function variable is being added here so it would be possible to mock
|
||||
// the swap controller availability for unit tests by assigning a new function to it. Without it,
|
||||
// the swap controller availability would solely depend on the environment running the test.
|
||||
var swapControllerAvailable = func() bool {
|
||||
// See https://github.com/containerd/containerd/pull/7838/
|
||||
swapControllerAvailabilityOnce.Do(func() {
|
||||
const warn = "Failed to detect the availability of the swap controller, assuming not available"
|
||||
|
@ -22,14 +22,15 @@ package kuberuntime
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"math"
|
||||
"os"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/stretchr/testify/assert"
|
||||
@ -845,12 +846,12 @@ func TestGenerateLinuxContainerResources(t *testing.T) {
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer setSwapControllerAvailableDuringTest(false)()
|
||||
if tc.scalingFg {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
|
||||
}
|
||||
|
||||
setCgroupVersionDuringTest(cgroupV1)
|
||||
tc.expected.MemorySwapLimitInBytes = tc.expected.MemoryLimitInBytes
|
||||
|
||||
pod.Spec.Containers[0].Resources = v1.ResourceRequirements{Limits: tc.limits, Requests: tc.requests}
|
||||
if len(tc.cStatus) > 0 {
|
||||
@ -891,6 +892,19 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
|
||||
Status: v1.PodStatus{},
|
||||
}
|
||||
|
||||
expectSwapDisabled := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) {
|
||||
const msg = "container is expected to not have swap configured"
|
||||
|
||||
for _, r := range resources {
|
||||
switch cgroupVersion {
|
||||
case cgroupV1:
|
||||
assert.Equal(t, int64(0), r.MemorySwapLimitInBytes, msg)
|
||||
case cgroupV2:
|
||||
assert.NotContains(t, r.Unified, cm.Cgroup2MaxSwapFilename, msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expectNoSwap := func(cgroupVersion CgroupVersion, resources ...*runtimeapi.LinuxContainerResources) {
|
||||
const msg = "container is expected to not have swap access"
|
||||
|
||||
@ -939,6 +953,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
|
||||
name string
|
||||
cgroupVersion CgroupVersion
|
||||
qosClass v1.PodQOSClass
|
||||
swapDisabledOnNode bool
|
||||
nodeSwapFeatureGateEnabled bool
|
||||
swapBehavior string
|
||||
addContainerWithoutRequests bool
|
||||
@ -994,28 +1009,28 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
|
||||
|
||||
// With Guaranteed and Best-effort QoS
|
||||
{
|
||||
name: "Best-effort Qos, cgroups v2, LimitedSwap",
|
||||
name: "Best-effort QoS, cgroups v2, LimitedSwap",
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.LimitedSwap,
|
||||
},
|
||||
{
|
||||
name: "Best-effort Qos, cgroups v2, UnlimitedSwap",
|
||||
name: "Best-effort QoS, cgroups v2, UnlimitedSwap",
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.UnlimitedSwap,
|
||||
},
|
||||
{
|
||||
name: "Guaranteed Qos, cgroups v2, LimitedSwap",
|
||||
name: "Guaranteed QoS, cgroups v2, LimitedSwap",
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSGuaranteed,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.LimitedSwap,
|
||||
},
|
||||
{
|
||||
name: "Guaranteed Qos, cgroups v2, UnlimitedSwap",
|
||||
name: "Guaranteed QoS, cgroups v2, UnlimitedSwap",
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSGuaranteed,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
@ -1024,7 +1039,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
|
||||
|
||||
// With a "guaranteed" container (when memory requests equal to limits)
|
||||
{
|
||||
name: "Burstable Qos, cgroups v2, LimitedSwap, with a guaranteed container",
|
||||
name: "Burstable QoS, cgroups v2, LimitedSwap, with a guaranteed container",
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
@ -1033,7 +1048,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
|
||||
addGuaranteedContainer: true,
|
||||
},
|
||||
{
|
||||
name: "Burstable Qos, cgroups v2, UnlimitedSwap, with a guaranteed container",
|
||||
name: "Burstable QoS, cgroups v2, UnlimitedSwap, with a guaranteed container",
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
@ -1044,7 +1059,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
|
||||
|
||||
// Swap is expected to be allocated
|
||||
{
|
||||
name: "Burstable Qos, cgroups v2, LimitedSwap",
|
||||
name: "Burstable QoS, cgroups v2, LimitedSwap",
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
@ -1053,7 +1068,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
|
||||
addGuaranteedContainer: false,
|
||||
},
|
||||
{
|
||||
name: "Burstable Qos, cgroups v2, UnlimitedSwap",
|
||||
name: "Burstable QoS, cgroups v2, UnlimitedSwap",
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
@ -1062,7 +1077,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
|
||||
addGuaranteedContainer: false,
|
||||
},
|
||||
{
|
||||
name: "Burstable Qos, cgroups v2, LimitedSwap, with a container with no requests",
|
||||
name: "Burstable QoS, cgroups v2, LimitedSwap, with a container with no requests",
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
@ -1071,7 +1086,158 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
|
||||
addGuaranteedContainer: false,
|
||||
},
|
||||
{
|
||||
name: "Burstable Qos, cgroups v2, UnlimitedSwap, with a container with no requests",
|
||||
name: "Burstable QoS, cgroups v2, UnlimitedSwap, with a container with no requests",
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.UnlimitedSwap,
|
||||
addContainerWithoutRequests: true,
|
||||
addGuaranteedContainer: false,
|
||||
},
|
||||
// All the above examples with Swap disabled on node
|
||||
{
|
||||
name: "Swap disabled on node, cgroups v1, LimitedSwap, Burstable QoS",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV1,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.LimitedSwap,
|
||||
},
|
||||
{
|
||||
name: "Swap disabled on node, cgroups v1, UnlimitedSwap, Burstable QoS",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV1,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.UnlimitedSwap,
|
||||
},
|
||||
{
|
||||
name: "Swap disabled on node, cgroups v1, LimitedSwap, Best-effort QoS",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV1,
|
||||
qosClass: v1.PodQOSBestEffort,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.LimitedSwap,
|
||||
},
|
||||
|
||||
// With feature gate turned off
|
||||
{
|
||||
name: "Swap disabled on node, NodeSwap feature gate turned off, cgroups v2, LimitedSwap",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: false,
|
||||
swapBehavior: types.LimitedSwap,
|
||||
},
|
||||
{
|
||||
name: "Swap disabled on node, NodeSwap feature gate turned off, cgroups v2, UnlimitedSwap",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: false,
|
||||
swapBehavior: types.UnlimitedSwap,
|
||||
},
|
||||
|
||||
// With no swapBehavior, UnlimitedSwap should be the default
|
||||
{
|
||||
name: "Swap disabled on node, With no swapBehavior - UnlimitedSwap should be the default",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBestEffort,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: "",
|
||||
},
|
||||
|
||||
// With Guaranteed and Best-effort QoS
|
||||
{
|
||||
name: "Swap disabled on node, Best-effort QoS, cgroups v2, LimitedSwap",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.LimitedSwap,
|
||||
},
|
||||
{
|
||||
name: "Swap disabled on node, Best-effort QoS, cgroups v2, UnlimitedSwap",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.UnlimitedSwap,
|
||||
},
|
||||
{
|
||||
name: "Swap disabled on node, Guaranteed QoS, cgroups v2, LimitedSwap",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSGuaranteed,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.LimitedSwap,
|
||||
},
|
||||
{
|
||||
name: "Swap disabled on node, Guaranteed QoS, cgroups v2, UnlimitedSwap",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSGuaranteed,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.UnlimitedSwap,
|
||||
},
|
||||
|
||||
// With a "guaranteed" container (when memory requests equal to limits)
|
||||
{
|
||||
name: "Swap disabled on node, Burstable QoS, cgroups v2, LimitedSwap, with a guaranteed container",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.LimitedSwap,
|
||||
addContainerWithoutRequests: false,
|
||||
addGuaranteedContainer: true,
|
||||
},
|
||||
{
|
||||
name: "Swap disabled on node, Burstable QoS, cgroups v2, UnlimitedSwap, with a guaranteed container",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.UnlimitedSwap,
|
||||
addContainerWithoutRequests: false,
|
||||
addGuaranteedContainer: true,
|
||||
},
|
||||
|
||||
// Swap is expected to be allocated
|
||||
{
|
||||
name: "Swap disabled on node, Burstable QoS, cgroups v2, LimitedSwap",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.LimitedSwap,
|
||||
addContainerWithoutRequests: false,
|
||||
addGuaranteedContainer: false,
|
||||
},
|
||||
{
|
||||
name: "Swap disabled on node, Burstable QoS, cgroups v2, UnlimitedSwap",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.UnlimitedSwap,
|
||||
addContainerWithoutRequests: false,
|
||||
addGuaranteedContainer: false,
|
||||
},
|
||||
{
|
||||
name: "Swap disabled on node, Burstable QoS, cgroups v2, LimitedSwap, with a container with no requests",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
swapBehavior: types.LimitedSwap,
|
||||
addContainerWithoutRequests: true,
|
||||
addGuaranteedContainer: false,
|
||||
},
|
||||
{
|
||||
name: "Swap disabled on node, Burstable QoS, cgroups v2, UnlimitedSwap, with a container with no requests",
|
||||
swapDisabledOnNode: true,
|
||||
cgroupVersion: cgroupV2,
|
||||
qosClass: v1.PodQOSBurstable,
|
||||
nodeSwapFeatureGateEnabled: true,
|
||||
@ -1082,6 +1248,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
setCgroupVersionDuringTest(tc.cgroupVersion)
|
||||
defer setSwapControllerAvailableDuringTest(!tc.swapDisabledOnNode)()
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tc.nodeSwapFeatureGateEnabled)()
|
||||
m.memorySwapBehavior = tc.swapBehavior
|
||||
|
||||
@ -1117,6 +1284,11 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
|
||||
resourcesC1 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false)
|
||||
resourcesC2 := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[1], false)
|
||||
|
||||
if tc.swapDisabledOnNode {
|
||||
expectSwapDisabled(tc.cgroupVersion, resourcesC1, resourcesC2)
|
||||
return
|
||||
}
|
||||
|
||||
if !tc.nodeSwapFeatureGateEnabled || tc.cgroupVersion == cgroupV1 || (tc.swapBehavior == types.LimitedSwap && tc.qosClass != v1.PodQOSBurstable) {
|
||||
expectNoSwap(tc.cgroupVersion, resourcesC1, resourcesC2)
|
||||
return
|
||||
@ -1151,3 +1323,14 @@ func setCgroupVersionDuringTest(version CgroupVersion) {
|
||||
return version == cgroupV2
|
||||
}
|
||||
}
|
||||
|
||||
func setSwapControllerAvailableDuringTest(available bool) func() {
|
||||
original := swapControllerAvailable
|
||||
swapControllerAvailable = func() bool {
|
||||
return available
|
||||
}
|
||||
|
||||
return func() {
|
||||
swapControllerAvailable = original
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user