disable in-place vertical pod scaling for non-restartable swappable containers

This commit is contained in:
Ajay Sundar Karuppasamy 2025-03-19 03:32:53 +00:00
parent e3baee37cd
commit a09cb22e4d
14 changed files with 482 additions and 51 deletions

View File

@ -834,7 +834,7 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
s.TopologyManagerPolicyOptions, features.TopologyManagerPolicyOptions)
}
if utilfeature.DefaultFeatureGate.Enabled(features.NodeSwap) {
if !kubeletutil.IsCgroup2UnifiedMode() && s.MemorySwap.SwapBehavior == kubelettypes.LimitedSwap {
if !kubeletutil.IsCgroup2UnifiedMode() && s.MemorySwap.SwapBehavior == string(kubelettypes.LimitedSwap) {
// This feature is not supported for cgroupv1 so we are failing early.
return fmt.Errorf("swap feature is enabled and LimitedSwap but it is only supported with cgroupv2")
}

View File

@ -203,8 +203,8 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration, featur
if localFeatureGate.Enabled(features.NodeSwap) {
switch kc.MemorySwap.SwapBehavior {
case "":
case kubetypes.NoSwap:
case kubetypes.LimitedSwap:
case string(kubetypes.NoSwap):
case string(kubetypes.LimitedSwap):
default:
allErrors = append(allErrors, fmt.Errorf("invalid configuration: memorySwap.swapBehavior %q must be one of: \"\", %q or %q", kc.MemorySwap.SwapBehavior, kubetypes.LimitedSwap, kubetypes.NoSwap))
}

View File

@ -385,7 +385,7 @@ func TestValidateKubeletConfiguration(t *testing.T) {
name: "specify MemorySwap.SwapBehavior without enabling NodeSwap",
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
conf.FeatureGates = map[string]bool{"NodeSwap": false}
conf.MemorySwap.SwapBehavior = kubetypes.LimitedSwap
conf.MemorySwap.SwapBehavior = string(kubetypes.LimitedSwap)
return conf
},
errMsg: "invalid configuration: memorySwap.swapBehavior cannot be set when NodeSwap feature flag is disabled",

View File

@ -34,6 +34,7 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/credentialprovider"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/volume"
)
@ -138,6 +139,9 @@ type Runtime interface {
ListPodSandboxMetrics(ctx context.Context) ([]*runtimeapi.PodSandboxMetrics, error)
// GetContainerStatus returns the status for the container.
GetContainerStatus(ctx context.Context, id ContainerID) (*Status, error)
// GetContainerSwapBehavior reports whether a container could be swappable.
// This is used to decide whether to handle InPlacePodVerticalScaling for containers.
GetContainerSwapBehavior(pod *v1.Pod, container *v1.Container) kubelettypes.SwapBehavior
}
// StreamingRuntime is the interface implemented by runtimes that handle the serving of the

View File

@ -30,6 +30,7 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/credentialprovider"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/volume"
)
@ -70,6 +71,7 @@ type FakeRuntime struct {
// from container runtime.
BlockImagePulls bool
imagePullTokenBucket chan bool
SwapBehavior map[string]kubetypes.SwapBehavior
T TB
}
@ -536,3 +538,10 @@ func (f *FakeRuntime) GetContainerStatus(_ context.Context, _ kubecontainer.Cont
f.CalledFunctions = append(f.CalledFunctions, "GetContainerStatus")
return nil, f.Err
}
func (f *FakeRuntime) GetContainerSwapBehavior(pod *v1.Pod, container *v1.Container) kubetypes.SwapBehavior {
if f.SwapBehavior != nil && f.SwapBehavior[container.Name] != "" {
return f.SwapBehavior[container.Name]
}
return kubetypes.NoSwap
}

View File

@ -33,7 +33,9 @@ import (
mock "github.com/stretchr/testify/mock"
types "k8s.io/apimachinery/pkg/types"
pkgtypes "k8s.io/apimachinery/pkg/types"
types "k8s.io/kubernetes/pkg/kubelet/types"
v1 "k8s.io/cri-api/pkg/apis/runtime/v1"
)
@ -419,6 +421,53 @@ func (_c *MockRuntime_GetContainerStatus_Call) RunAndReturn(run func(context.Con
return _c
}
// GetContainerSwapBehavior provides a mock function with given fields: pod, _a1
func (_m *MockRuntime) GetContainerSwapBehavior(pod *corev1.Pod, _a1 *corev1.Container) types.SwapBehavior {
ret := _m.Called(pod, _a1)
if len(ret) == 0 {
panic("no return value specified for GetContainerSwapBehavior")
}
var r0 types.SwapBehavior
if rf, ok := ret.Get(0).(func(*corev1.Pod, *corev1.Container) types.SwapBehavior); ok {
r0 = rf(pod, _a1)
} else {
r0 = ret.Get(0).(types.SwapBehavior)
}
return r0
}
// MockRuntime_GetContainerSwapBehavior_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetContainerSwapBehavior'
type MockRuntime_GetContainerSwapBehavior_Call struct {
*mock.Call
}
// GetContainerSwapBehavior is a helper method to define mock.On call
// - pod *corev1.Pod
// - _a1 *corev1.Container
func (_e *MockRuntime_Expecter) GetContainerSwapBehavior(pod interface{}, _a1 interface{}) *MockRuntime_GetContainerSwapBehavior_Call {
return &MockRuntime_GetContainerSwapBehavior_Call{Call: _e.mock.On("GetContainerSwapBehavior", pod, _a1)}
}
func (_c *MockRuntime_GetContainerSwapBehavior_Call) Run(run func(pod *corev1.Pod, _a1 *corev1.Container)) *MockRuntime_GetContainerSwapBehavior_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(*corev1.Pod), args[1].(*corev1.Container))
})
return _c
}
func (_c *MockRuntime_GetContainerSwapBehavior_Call) Return(_a0 types.SwapBehavior) *MockRuntime_GetContainerSwapBehavior_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockRuntime_GetContainerSwapBehavior_Call) RunAndReturn(run func(*corev1.Pod, *corev1.Container) types.SwapBehavior) *MockRuntime_GetContainerSwapBehavior_Call {
_c.Call.Return(run)
return _c
}
// GetImageRef provides a mock function with given fields: ctx, image
func (_m *MockRuntime) GetImageRef(ctx context.Context, image container.ImageSpec) (string, error) {
ret := _m.Called(ctx, image)
@ -534,7 +583,7 @@ func (_c *MockRuntime_GetImageSize_Call) RunAndReturn(run func(context.Context,
}
// GetPodStatus provides a mock function with given fields: ctx, uid, name, namespace
func (_m *MockRuntime) GetPodStatus(ctx context.Context, uid types.UID, name string, namespace string) (*container.PodStatus, error) {
func (_m *MockRuntime) GetPodStatus(ctx context.Context, uid pkgtypes.UID, name string, namespace string) (*container.PodStatus, error) {
ret := _m.Called(ctx, uid, name, namespace)
if len(ret) == 0 {
@ -543,10 +592,10 @@ func (_m *MockRuntime) GetPodStatus(ctx context.Context, uid types.UID, name str
var r0 *container.PodStatus
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, types.UID, string, string) (*container.PodStatus, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, pkgtypes.UID, string, string) (*container.PodStatus, error)); ok {
return rf(ctx, uid, name, namespace)
}
if rf, ok := ret.Get(0).(func(context.Context, types.UID, string, string) *container.PodStatus); ok {
if rf, ok := ret.Get(0).(func(context.Context, pkgtypes.UID, string, string) *container.PodStatus); ok {
r0 = rf(ctx, uid, name, namespace)
} else {
if ret.Get(0) != nil {
@ -554,7 +603,7 @@ func (_m *MockRuntime) GetPodStatus(ctx context.Context, uid types.UID, name str
}
}
if rf, ok := ret.Get(1).(func(context.Context, types.UID, string, string) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, pkgtypes.UID, string, string) error); ok {
r1 = rf(ctx, uid, name, namespace)
} else {
r1 = ret.Error(1)
@ -570,16 +619,16 @@ type MockRuntime_GetPodStatus_Call struct {
// GetPodStatus is a helper method to define mock.On call
// - ctx context.Context
// - uid types.UID
// - uid pkgtypes.UID
// - name string
// - namespace string
func (_e *MockRuntime_Expecter) GetPodStatus(ctx interface{}, uid interface{}, name interface{}, namespace interface{}) *MockRuntime_GetPodStatus_Call {
return &MockRuntime_GetPodStatus_Call{Call: _e.mock.On("GetPodStatus", ctx, uid, name, namespace)}
}
func (_c *MockRuntime_GetPodStatus_Call) Run(run func(ctx context.Context, uid types.UID, name string, namespace string)) *MockRuntime_GetPodStatus_Call {
func (_c *MockRuntime_GetPodStatus_Call) Run(run func(ctx context.Context, uid pkgtypes.UID, name string, namespace string)) *MockRuntime_GetPodStatus_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(types.UID), args[2].(string), args[3].(string))
run(args[0].(context.Context), args[1].(pkgtypes.UID), args[2].(string), args[3].(string))
})
return _c
}
@ -589,7 +638,7 @@ func (_c *MockRuntime_GetPodStatus_Call) Return(_a0 *container.PodStatus, _a1 er
return _c
}
func (_c *MockRuntime_GetPodStatus_Call) RunAndReturn(run func(context.Context, types.UID, string, string) (*container.PodStatus, error)) *MockRuntime_GetPodStatus_Call {
func (_c *MockRuntime_GetPodStatus_Call) RunAndReturn(run func(context.Context, pkgtypes.UID, string, string) (*container.PodStatus, error)) *MockRuntime_GetPodStatus_Call {
_c.Call.Return(run)
return _c
}

View File

@ -2920,6 +2920,40 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, string, string) {
return true, "", ""
}
func disallowResizeForSwappableContainers(runtime kubecontainer.Runtime, desiredPod, allocatedPod *v1.Pod) (bool, string) {
if desiredPod == nil || allocatedPod == nil {
return false, ""
}
restartableMemoryResizePolicy := func(resizePolicies []v1.ContainerResizePolicy) bool {
for _, policy := range resizePolicies {
if policy.ResourceName == v1.ResourceMemory {
return policy.RestartPolicy == v1.RestartContainer
}
}
return false
}
allocatedContainers := make(map[string]v1.Container)
for _, container := range append(allocatedPod.Spec.Containers, allocatedPod.Spec.InitContainers...) {
allocatedContainers[container.Name] = container
}
for _, desiredContainer := range append(desiredPod.Spec.Containers, desiredPod.Spec.InitContainers...) {
allocatedContainer, ok := allocatedContainers[desiredContainer.Name]
if !ok {
continue
}
origMemRequest := desiredContainer.Resources.Requests[v1.ResourceMemory]
newMemRequest := allocatedContainer.Resources.Requests[v1.ResourceMemory]
if !origMemRequest.Equal(newMemRequest) && !restartableMemoryResizePolicy(allocatedContainer.ResizePolicy) {
aSwapBehavior := runtime.GetContainerSwapBehavior(desiredPod, &desiredContainer)
bSwapBehavior := runtime.GetContainerSwapBehavior(allocatedPod, &allocatedContainer)
if aSwapBehavior != kubetypes.NoSwap || bSwapBehavior != kubetypes.NoSwap {
return true, "In-place resize of containers with swap is not supported."
}
}
}
return false, ""
}
// handlePodResourcesResize returns the "allocated pod", which should be used for all resource
// calculations after this function is called. It also updates the cached ResizeStatus according to
// the allocation decision and pod status.
@ -2949,6 +2983,10 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine
// If there is a pending resize but the resize is not allowed, always use the allocated resources.
kl.statusManager.SetPodResizePendingCondition(pod.UID, v1.PodReasonInfeasible, msg)
return podFromAllocation, nil
} else if resizeNotAllowed, msg := disallowResizeForSwappableContainers(kl.containerRuntime, pod, podFromAllocation); resizeNotAllowed {
// If this resize involve swap recalculation, set as infeasible, as IPPR with swap is not supported for beta.
kl.statusManager.SetPodResizePendingCondition(pod.UID, v1.PodReasonInfeasible, msg)
return podFromAllocation, nil
}
kl.podResizeMutex.Lock()

View File

@ -2612,6 +2612,185 @@ func TestPodResourceAllocationReset(t *testing.T) {
}
}
func TestHandlePodResourcesResizeWithSwap(t *testing.T) {
if goruntime.GOOS == "windows" {
t.Skip("InPlacePodVerticalScaling is not currently supported for Windows")
}
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, true)
testKubelet := newTestKubelet(t, false)
defer testKubelet.Cleanup()
noSwapContainerName, swapContainerName := "test-container-noswap", "test-container-limitedswap"
runtime := testKubelet.fakeRuntime
runtime.SwapBehavior = map[string]kubetypes.SwapBehavior{
noSwapContainerName: kubetypes.NoSwap,
swapContainerName: kubetypes.LimitedSwap,
}
kubelet := testKubelet.kubelet
cpu500m := resource.MustParse("500m")
cpu1000m := resource.MustParse("1")
mem500M := resource.MustParse("500Mi")
mem1000M := resource.MustParse("1Gi")
nodes := []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("8"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("4Gi"),
v1.ResourcePods: *resource.NewQuantity(40, resource.DecimalSI),
},
},
},
}
kubelet.nodeLister = testNodeLister{nodes: nodes}
testPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "1111",
Name: "pod1",
Namespace: "ns1",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "c1",
Image: "i1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
},
},
},
},
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "c1",
AllocatedResources: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
Resources: &v1.ResourceRequirements{},
},
},
},
}
testKubelet.fakeKubeClient = fake.NewSimpleClientset(testPod)
kubelet.kubeClient = testKubelet.fakeKubeClient
defer testKubelet.fakeKubeClient.ClearActions()
kubelet.podManager.AddPod(testPod)
kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{
testPod.UID: true,
}
defer kubelet.podManager.RemovePod(testPod)
tests := []struct {
name string
newRequests v1.ResourceList
expectedAllocatedReqs v1.ResourceList
resizePolicy v1.ContainerResizePolicy
swapBehavior kubetypes.SwapBehavior
expectedResize []*v1.PodCondition
}{
{
name: "NoSwap Request Memory decrease ResizePolicy RestartContainer - expect InProgress",
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
swapBehavior: kubetypes.NoSwap,
resizePolicy: v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, RestartPolicy: v1.RestartContainer},
expectedResize: []*v1.PodCondition{
{
Type: v1.PodResizeInProgress,
Status: "True",
},
},
},
{
name: "LimitedSwap Request Memory increase with ResizePolicy RestartContainer - expect InProgress",
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
swapBehavior: kubetypes.LimitedSwap,
resizePolicy: v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, RestartPolicy: v1.RestartContainer},
expectedResize: []*v1.PodCondition{
{
Type: v1.PodResizeInProgress,
Status: "True",
},
},
},
{
name: "LimitedSwap Request Memory increase with ResizePolicy NotRequired - expect Infeasible",
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
swapBehavior: kubetypes.LimitedSwap,
resizePolicy: v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, RestartPolicy: v1.NotRequired},
expectedResize: []*v1.PodCondition{
{
Type: v1.PodResizePending,
Status: "True",
Reason: "Infeasible",
Message: "In-place resize of containers with swap is not supported",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
originalPod := testPod.DeepCopy()
originalPod.Spec.Containers[0].ResizePolicy = []v1.ContainerResizePolicy{tt.resizePolicy}
if tt.swapBehavior == kubetypes.NoSwap {
originalPod.Spec.Containers[0].Name = noSwapContainerName
} else {
originalPod.Spec.Containers[0].Name = swapContainerName
}
kubelet.podManager.UpdatePod(originalPod)
newPod := originalPod.DeepCopy()
newPod.Spec.Containers[0].Resources.Requests = tt.newRequests
require.NoError(t, kubelet.allocationManager.SetAllocatedResources(originalPod))
require.NoError(t, kubelet.allocationManager.SetActuatedResources(originalPod, nil))
t.Cleanup(func() { kubelet.allocationManager.RemovePod(originalPod.UID) })
podStatus := &kubecontainer.PodStatus{
ID: originalPod.UID,
Name: originalPod.Name,
Namespace: originalPod.Namespace,
}
setContainerStatus := func(podStatus *kubecontainer.PodStatus, c *v1.Container, idx int) {
podStatus.ContainerStatuses[idx] = &kubecontainer.Status{
Name: c.Name,
State: kubecontainer.ContainerStateRunning,
Resources: &kubecontainer.ContainerResources{
CPURequest: c.Resources.Requests.Cpu(),
CPULimit: c.Resources.Limits.Cpu(),
MemoryLimit: c.Resources.Limits.Memory(),
},
}
}
podStatus.ContainerStatuses = make([]*kubecontainer.Status, len(originalPod.Spec.Containers))
for i, c := range originalPod.Spec.Containers {
setContainerStatus(podStatus, &c, i)
}
updatedPod, err := kubelet.handlePodResourcesResize(newPod, podStatus)
require.NoError(t, err)
updatedPodCtr := updatedPod.Spec.Containers[0]
assert.Equal(t, tt.expectedAllocatedReqs, updatedPodCtr.Resources.Requests, "updated pod spec requests")
alloc, found := kubelet.allocationManager.GetContainerResourceAllocation(newPod.UID, updatedPodCtr.Name)
require.True(t, found, "container allocation")
assert.Equal(t, tt.expectedAllocatedReqs, alloc.Requests, "stored container request allocation")
resizeStatus := kubelet.statusManager.GetPodResizeConditions(newPod.UID)
for i := range resizeStatus {
// Ignore probe time and last transition time during comparison.
resizeStatus[i].LastProbeTime = metav1.Time{}
resizeStatus[i].LastTransitionTime = metav1.Time{}
assert.Contains(t, resizeStatus[i].Message, tt.expectedResize[i].Message)
resizeStatus[i].Message = tt.expectedResize[i].Message
}
assert.Equal(t, tt.expectedResize, resizeStatus)
})
}
}
func TestHandlePodResourcesResize(t *testing.T) {
if goruntime.GOOS == "windows" {
t.Skip("InPlacePodVerticalScaling is not currently supported for Windows")

View File

@ -45,7 +45,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/qos"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/utils/ptr"
)
@ -206,35 +206,45 @@ func (m *kubeGenericRuntimeManager) configureContainerSwapResources(lcr *runtime
}
swapConfigurationHelper := newSwapConfigurationHelper(*m.machineInfo)
if m.memorySwapBehavior == kubelettypes.LimitedSwap {
if !isCgroup2UnifiedMode() {
swapConfigurationHelper.ConfigureNoSwap(lcr)
return
}
}
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
swapConfigurationHelper.ConfigureNoSwap(lcr)
return
}
if kubelettypes.IsCriticalPod(pod) {
swapConfigurationHelper.ConfigureNoSwap(lcr)
return
}
// NOTE(ehashman): Behavior is defined in the opencontainers runtime spec:
// https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory
switch m.memorySwapBehavior {
case kubelettypes.NoSwap:
switch m.GetContainerSwapBehavior(pod, container) {
case types.NoSwap:
swapConfigurationHelper.ConfigureNoSwap(lcr)
case kubelettypes.LimitedSwap:
case types.LimitedSwap:
swapConfigurationHelper.ConfigureLimitedSwap(lcr, pod, container)
default:
swapConfigurationHelper.ConfigureNoSwap(lcr)
}
}
// GetContainerSwapBehavior checks what swap behavior should be configured for a container,
// considering the requirements for enabling swap.
func (m *kubeGenericRuntimeManager) GetContainerSwapBehavior(pod *v1.Pod, container *v1.Container) types.SwapBehavior {
c := types.SwapBehavior(m.memorySwapBehavior)
if c == types.LimitedSwap {
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) || !swapControllerAvailable() {
return types.NoSwap
}
if !isCgroup2UnifiedMode() {
return types.NoSwap
}
if types.IsCriticalPod(pod) {
return types.NoSwap
}
podQos := kubeapiqos.GetPodQOS(pod)
containerDoesNotRequestMemory := container.Resources.Requests.Memory().IsZero() && container.Resources.Limits.Memory().IsZero()
memoryRequestEqualsToLimit := container.Resources.Requests.Memory().Cmp(*container.Resources.Limits.Memory()) == 0
if podQos != v1.PodQOSBurstable || containerDoesNotRequestMemory || memoryRequestEqualsToLimit {
return types.NoSwap
}
return c
}
return types.NoSwap
}
// generateContainerResources generates platform specific (linux) container resources config for runtime
func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources {
enforceMemoryQoS := false
@ -428,15 +438,6 @@ func newSwapConfigurationHelper(machineInfo cadvisorv1.MachineInfo) *swapConfigu
}
func (m swapConfigurationHelper) ConfigureLimitedSwap(lcr *runtimeapi.LinuxContainerResources, pod *v1.Pod, container *v1.Container) {
podQos := kubeapiqos.GetPodQOS(pod)
containerDoesNotRequestMemory := container.Resources.Requests.Memory().IsZero() && container.Resources.Limits.Memory().IsZero()
memoryRequestEqualsToLimit := container.Resources.Requests.Memory().Cmp(*container.Resources.Limits.Memory()) == 0
if podQos != v1.PodQOSBurstable || containerDoesNotRequestMemory || !isCgroup2UnifiedMode() || memoryRequestEqualsToLimit {
m.ConfigureNoSwap(lcr)
return
}
containerMemoryRequest := container.Resources.Requests.Memory()
swapLimit, err := calcSwapForBurstablePods(containerMemoryRequest.Value(), int64(m.machineInfo.MemoryCapacity), int64(m.machineInfo.SwapCapacity))
if err != nil {

View File

@ -924,6 +924,145 @@ func TestGenerateLinuxContainerResources(t *testing.T) {
}
}
func TestGetContainerSwapBehavior(t *testing.T) {
_, _, m, err := createTestRuntimeManager()
require.NoError(t, err)
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "bar",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "c1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")},
},
},
},
},
Status: v1.PodStatus{},
}
tests := []struct {
name string
configuredMemorySwap types.SwapBehavior
nodeSwapFeatureGateEnabled bool
isSwapControllerAvailable bool
cgroupVersion CgroupVersion
isCriticalPod bool
qosClass v1.PodQOSClass
containerResourceOverride func(container *v1.Container)
expected types.SwapBehavior
}{
{
name: "NoSwap, user set NoSwap behavior",
configuredMemorySwap: types.NoSwap,
nodeSwapFeatureGateEnabled: false,
expected: types.NoSwap,
},
{
name: "NoSwap, feature gate turned off",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: false,
expected: types.NoSwap,
},
{
name: "NoSwap, swap controller unavailable",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: false,
expected: types.NoSwap,
},
{
name: "NoSwap, cgroup v1",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV1,
expected: types.NoSwap,
},
{
name: "NoSwap, qos is Best-Effort",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBestEffort,
expected: types.NoSwap,
},
{
name: "NoSwap, qos is Guaranteed",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSGuaranteed,
expected: types.NoSwap,
},
{
name: "NoSwap, zero memory",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
containerResourceOverride: func(c *v1.Container) {
c.Resources.Requests = v1.ResourceList{
v1.ResourceMemory: resource.MustParse("0"),
}
c.Resources.Limits = v1.ResourceList{
v1.ResourceMemory: resource.MustParse("0"),
}
},
expected: types.NoSwap,
},
{
name: "NoSwap, memory request equal to limit",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
containerResourceOverride: func(c *v1.Container) {
c.Resources.Requests = v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100Mi"),
}
c.Resources.Limits = v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100Mi"),
}
},
expected: types.NoSwap,
},
{
name: "LimitedSwap, cgroup v2",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
expected: types.LimitedSwap,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m.memorySwapBehavior = string(tt.configuredMemorySwap)
setCgroupVersionDuringTest(tt.cgroupVersion)
defer setSwapControllerAvailableDuringTest(tt.isSwapControllerAvailable)()
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tt.nodeSwapFeatureGateEnabled)
testpod := pod.DeepCopy()
testpod.Status.QOSClass = tt.qosClass
if tt.containerResourceOverride != nil {
tt.containerResourceOverride(&testpod.Spec.Containers[0])
}
assert.Equal(t, tt.expected, m.GetContainerSwapBehavior(testpod, &testpod.Spec.Containers[0]))
})
}
}
func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
_, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
@ -999,7 +1138,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
qosClass v1.PodQOSClass
swapDisabledOnNode bool
nodeSwapFeatureGateEnabled bool
swapBehavior string
swapBehavior types.SwapBehavior
addContainerWithoutRequests bool
addGuaranteedContainer bool
isCriticalPod bool
@ -1195,7 +1334,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
setCgroupVersionDuringTest(tc.cgroupVersion)
defer setSwapControllerAvailableDuringTest(!tc.swapDisabledOnNode)()
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tc.nodeSwapFeatureGateEnabled)
m.memorySwapBehavior = tc.swapBehavior
m.memorySwapBehavior = string(tc.swapBehavior)
var resourceReqsC1, resourceReqsC2 v1.ResourceRequirements
switch tc.qosClass {

View File

@ -24,6 +24,7 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/types"
)
// applyPlatformSpecificContainerConfig applies platform specific configurations to runtimeapi.ContainerConfig.
@ -48,3 +49,7 @@ func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *k
func toKubeContainerUser(statusUser *runtimeapi.ContainerUser) *kubecontainer.ContainerUser {
return nil
}
func (m *kubeGenericRuntimeManager) GetContainerSwapBehavior(pod *v1.Pod, container *v1.Container) types.SwapBehavior {
return types.NoSwap
}

View File

@ -26,6 +26,7 @@ import (
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/winstats"
"k8s.io/kubernetes/pkg/securitycontext"
)
@ -183,3 +184,7 @@ func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *k
func toKubeContainerUser(statusUser *runtimeapi.ContainerUser) *kubecontainer.ContainerUser {
return nil
}
func (m *kubeGenericRuntimeManager) GetContainerSwapBehavior(pod *v1.Pod, container *v1.Container) types.SwapBehavior {
return types.NoSwap
}

View File

@ -32,7 +32,9 @@ const (
)
// SwapBehavior types
type SwapBehavior string
const (
LimitedSwap = "LimitedSwap"
NoSwap = "NoSwap"
LimitedSwap SwapBehavior = "LimitedSwap"
NoSwap SwapBehavior = "NoSwap"
)

View File

@ -76,7 +76,7 @@ var _ = SIGDescribe("Swap", "[LinuxOnly]", ginkgo.Ordered, feature.Swap, framewo
if !isPodCgroupV2(f, pod) {
e2eskipper.Skipf("swap tests require cgroup v2")
}
gomega.Expect(getSwapBehavior()).To(gomega.Or(gomega.Equal(types.NoSwap), gomega.BeEmpty()), "NodeConformance is expected to run with NoSwap")
gomega.Expect(getSwapBehavior()).To(gomega.Or(gomega.Equal(string(types.NoSwap)), gomega.BeEmpty()), "NodeConformance is expected to run with NoSwap")
expectNoSwap(f, pod)
},
@ -105,8 +105,8 @@ var _ = SIGDescribe("Swap", "[LinuxOnly]", ginkgo.Ordered, feature.Swap, framewo
enableLimitedSwap := func(ctx context.Context, initialConfig *config.KubeletConfiguration) {
msg := "swap behavior is already set to LimitedSwap"
if swapBehavior := initialConfig.MemorySwap.SwapBehavior; swapBehavior != types.LimitedSwap {
initialConfig.MemorySwap.SwapBehavior = types.LimitedSwap
if swapBehavior := initialConfig.MemorySwap.SwapBehavior; swapBehavior != string(types.LimitedSwap) {
initialConfig.MemorySwap.SwapBehavior = string(types.LimitedSwap)
msg = "setting swap behavior to LimitedSwap"
}
@ -124,7 +124,7 @@ var _ = SIGDescribe("Swap", "[LinuxOnly]", ginkgo.Ordered, feature.Swap, framewo
if !isPodCgroupV2(f, pod) {
e2eskipper.Skipf("swap tests require cgroup v2")
}
gomega.Expect(getSwapBehavior()).To(gomega.Equal(types.LimitedSwap))
gomega.Expect(getSwapBehavior()).To(gomega.Equal(string(types.LimitedSwap)))
expectedSwapLimit := calcSwapForBurstablePod(f, pod)
expectLimitedSwap(f, pod, expectedSwapLimit)