disable in-place vertical pod scaling for non-restartable swappable containers

This commit is contained in:
Ajay Sundar Karuppasamy 2025-03-19 03:32:53 +00:00
parent e3baee37cd
commit a09cb22e4d
14 changed files with 482 additions and 51 deletions

View File

@ -834,7 +834,7 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
s.TopologyManagerPolicyOptions, features.TopologyManagerPolicyOptions) s.TopologyManagerPolicyOptions, features.TopologyManagerPolicyOptions)
} }
if utilfeature.DefaultFeatureGate.Enabled(features.NodeSwap) { if utilfeature.DefaultFeatureGate.Enabled(features.NodeSwap) {
if !kubeletutil.IsCgroup2UnifiedMode() && s.MemorySwap.SwapBehavior == kubelettypes.LimitedSwap { if !kubeletutil.IsCgroup2UnifiedMode() && s.MemorySwap.SwapBehavior == string(kubelettypes.LimitedSwap) {
// This feature is not supported for cgroupv1 so we are failing early. // This feature is not supported for cgroupv1 so we are failing early.
return fmt.Errorf("swap feature is enabled and LimitedSwap but it is only supported with cgroupv2") return fmt.Errorf("swap feature is enabled and LimitedSwap but it is only supported with cgroupv2")
} }

View File

@ -203,8 +203,8 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration, featur
if localFeatureGate.Enabled(features.NodeSwap) { if localFeatureGate.Enabled(features.NodeSwap) {
switch kc.MemorySwap.SwapBehavior { switch kc.MemorySwap.SwapBehavior {
case "": case "":
case kubetypes.NoSwap: case string(kubetypes.NoSwap):
case kubetypes.LimitedSwap: case string(kubetypes.LimitedSwap):
default: default:
allErrors = append(allErrors, fmt.Errorf("invalid configuration: memorySwap.swapBehavior %q must be one of: \"\", %q or %q", kc.MemorySwap.SwapBehavior, kubetypes.LimitedSwap, kubetypes.NoSwap)) allErrors = append(allErrors, fmt.Errorf("invalid configuration: memorySwap.swapBehavior %q must be one of: \"\", %q or %q", kc.MemorySwap.SwapBehavior, kubetypes.LimitedSwap, kubetypes.NoSwap))
} }

View File

@ -385,7 +385,7 @@ func TestValidateKubeletConfiguration(t *testing.T) {
name: "specify MemorySwap.SwapBehavior without enabling NodeSwap", name: "specify MemorySwap.SwapBehavior without enabling NodeSwap",
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
conf.FeatureGates = map[string]bool{"NodeSwap": false} conf.FeatureGates = map[string]bool{"NodeSwap": false}
conf.MemorySwap.SwapBehavior = kubetypes.LimitedSwap conf.MemorySwap.SwapBehavior = string(kubetypes.LimitedSwap)
return conf return conf
}, },
errMsg: "invalid configuration: memorySwap.swapBehavior cannot be set when NodeSwap feature flag is disabled", errMsg: "invalid configuration: memorySwap.swapBehavior cannot be set when NodeSwap feature flag is disabled",

View File

@ -34,6 +34,7 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/klog/v2" "k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/credentialprovider" "k8s.io/kubernetes/pkg/credentialprovider"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/volume" "k8s.io/kubernetes/pkg/volume"
) )
@ -138,6 +139,9 @@ type Runtime interface {
ListPodSandboxMetrics(ctx context.Context) ([]*runtimeapi.PodSandboxMetrics, error) ListPodSandboxMetrics(ctx context.Context) ([]*runtimeapi.PodSandboxMetrics, error)
// GetContainerStatus returns the status for the container. // GetContainerStatus returns the status for the container.
GetContainerStatus(ctx context.Context, id ContainerID) (*Status, error) GetContainerStatus(ctx context.Context, id ContainerID) (*Status, error)
// GetContainerSwapBehavior reports whether a container could be swappable.
// This is used to decide whether to handle InPlacePodVerticalScaling for containers.
GetContainerSwapBehavior(pod *v1.Pod, container *v1.Container) kubelettypes.SwapBehavior
} }
// StreamingRuntime is the interface implemented by runtimes that handle the serving of the // StreamingRuntime is the interface implemented by runtimes that handle the serving of the

View File

@ -30,6 +30,7 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/credentialprovider" "k8s.io/kubernetes/pkg/credentialprovider"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/volume" "k8s.io/kubernetes/pkg/volume"
) )
@ -70,6 +71,7 @@ type FakeRuntime struct {
// from container runtime. // from container runtime.
BlockImagePulls bool BlockImagePulls bool
imagePullTokenBucket chan bool imagePullTokenBucket chan bool
SwapBehavior map[string]kubetypes.SwapBehavior
T TB T TB
} }
@ -536,3 +538,10 @@ func (f *FakeRuntime) GetContainerStatus(_ context.Context, _ kubecontainer.Cont
f.CalledFunctions = append(f.CalledFunctions, "GetContainerStatus") f.CalledFunctions = append(f.CalledFunctions, "GetContainerStatus")
return nil, f.Err return nil, f.Err
} }
func (f *FakeRuntime) GetContainerSwapBehavior(pod *v1.Pod, container *v1.Container) kubetypes.SwapBehavior {
if f.SwapBehavior != nil && f.SwapBehavior[container.Name] != "" {
return f.SwapBehavior[container.Name]
}
return kubetypes.NoSwap
}

View File

@ -33,7 +33,9 @@ import (
mock "github.com/stretchr/testify/mock" mock "github.com/stretchr/testify/mock"
types "k8s.io/apimachinery/pkg/types" pkgtypes "k8s.io/apimachinery/pkg/types"
types "k8s.io/kubernetes/pkg/kubelet/types"
v1 "k8s.io/cri-api/pkg/apis/runtime/v1" v1 "k8s.io/cri-api/pkg/apis/runtime/v1"
) )
@ -419,6 +421,53 @@ func (_c *MockRuntime_GetContainerStatus_Call) RunAndReturn(run func(context.Con
return _c return _c
} }
// GetContainerSwapBehavior provides a mock function with given fields: pod, _a1
func (_m *MockRuntime) GetContainerSwapBehavior(pod *corev1.Pod, _a1 *corev1.Container) types.SwapBehavior {
ret := _m.Called(pod, _a1)
if len(ret) == 0 {
panic("no return value specified for GetContainerSwapBehavior")
}
var r0 types.SwapBehavior
if rf, ok := ret.Get(0).(func(*corev1.Pod, *corev1.Container) types.SwapBehavior); ok {
r0 = rf(pod, _a1)
} else {
r0 = ret.Get(0).(types.SwapBehavior)
}
return r0
}
// MockRuntime_GetContainerSwapBehavior_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetContainerSwapBehavior'
type MockRuntime_GetContainerSwapBehavior_Call struct {
*mock.Call
}
// GetContainerSwapBehavior is a helper method to define mock.On call
// - pod *corev1.Pod
// - _a1 *corev1.Container
func (_e *MockRuntime_Expecter) GetContainerSwapBehavior(pod interface{}, _a1 interface{}) *MockRuntime_GetContainerSwapBehavior_Call {
return &MockRuntime_GetContainerSwapBehavior_Call{Call: _e.mock.On("GetContainerSwapBehavior", pod, _a1)}
}
func (_c *MockRuntime_GetContainerSwapBehavior_Call) Run(run func(pod *corev1.Pod, _a1 *corev1.Container)) *MockRuntime_GetContainerSwapBehavior_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(*corev1.Pod), args[1].(*corev1.Container))
})
return _c
}
func (_c *MockRuntime_GetContainerSwapBehavior_Call) Return(_a0 types.SwapBehavior) *MockRuntime_GetContainerSwapBehavior_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockRuntime_GetContainerSwapBehavior_Call) RunAndReturn(run func(*corev1.Pod, *corev1.Container) types.SwapBehavior) *MockRuntime_GetContainerSwapBehavior_Call {
_c.Call.Return(run)
return _c
}
// GetImageRef provides a mock function with given fields: ctx, image // GetImageRef provides a mock function with given fields: ctx, image
func (_m *MockRuntime) GetImageRef(ctx context.Context, image container.ImageSpec) (string, error) { func (_m *MockRuntime) GetImageRef(ctx context.Context, image container.ImageSpec) (string, error) {
ret := _m.Called(ctx, image) ret := _m.Called(ctx, image)
@ -534,7 +583,7 @@ func (_c *MockRuntime_GetImageSize_Call) RunAndReturn(run func(context.Context,
} }
// GetPodStatus provides a mock function with given fields: ctx, uid, name, namespace // GetPodStatus provides a mock function with given fields: ctx, uid, name, namespace
func (_m *MockRuntime) GetPodStatus(ctx context.Context, uid types.UID, name string, namespace string) (*container.PodStatus, error) { func (_m *MockRuntime) GetPodStatus(ctx context.Context, uid pkgtypes.UID, name string, namespace string) (*container.PodStatus, error) {
ret := _m.Called(ctx, uid, name, namespace) ret := _m.Called(ctx, uid, name, namespace)
if len(ret) == 0 { if len(ret) == 0 {
@ -543,10 +592,10 @@ func (_m *MockRuntime) GetPodStatus(ctx context.Context, uid types.UID, name str
var r0 *container.PodStatus var r0 *container.PodStatus
var r1 error var r1 error
if rf, ok := ret.Get(0).(func(context.Context, types.UID, string, string) (*container.PodStatus, error)); ok { if rf, ok := ret.Get(0).(func(context.Context, pkgtypes.UID, string, string) (*container.PodStatus, error)); ok {
return rf(ctx, uid, name, namespace) return rf(ctx, uid, name, namespace)
} }
if rf, ok := ret.Get(0).(func(context.Context, types.UID, string, string) *container.PodStatus); ok { if rf, ok := ret.Get(0).(func(context.Context, pkgtypes.UID, string, string) *container.PodStatus); ok {
r0 = rf(ctx, uid, name, namespace) r0 = rf(ctx, uid, name, namespace)
} else { } else {
if ret.Get(0) != nil { if ret.Get(0) != nil {
@ -554,7 +603,7 @@ func (_m *MockRuntime) GetPodStatus(ctx context.Context, uid types.UID, name str
} }
} }
if rf, ok := ret.Get(1).(func(context.Context, types.UID, string, string) error); ok { if rf, ok := ret.Get(1).(func(context.Context, pkgtypes.UID, string, string) error); ok {
r1 = rf(ctx, uid, name, namespace) r1 = rf(ctx, uid, name, namespace)
} else { } else {
r1 = ret.Error(1) r1 = ret.Error(1)
@ -570,16 +619,16 @@ type MockRuntime_GetPodStatus_Call struct {
// GetPodStatus is a helper method to define mock.On call // GetPodStatus is a helper method to define mock.On call
// - ctx context.Context // - ctx context.Context
// - uid types.UID // - uid pkgtypes.UID
// - name string // - name string
// - namespace string // - namespace string
func (_e *MockRuntime_Expecter) GetPodStatus(ctx interface{}, uid interface{}, name interface{}, namespace interface{}) *MockRuntime_GetPodStatus_Call { func (_e *MockRuntime_Expecter) GetPodStatus(ctx interface{}, uid interface{}, name interface{}, namespace interface{}) *MockRuntime_GetPodStatus_Call {
return &MockRuntime_GetPodStatus_Call{Call: _e.mock.On("GetPodStatus", ctx, uid, name, namespace)} return &MockRuntime_GetPodStatus_Call{Call: _e.mock.On("GetPodStatus", ctx, uid, name, namespace)}
} }
func (_c *MockRuntime_GetPodStatus_Call) Run(run func(ctx context.Context, uid types.UID, name string, namespace string)) *MockRuntime_GetPodStatus_Call { func (_c *MockRuntime_GetPodStatus_Call) Run(run func(ctx context.Context, uid pkgtypes.UID, name string, namespace string)) *MockRuntime_GetPodStatus_Call {
_c.Call.Run(func(args mock.Arguments) { _c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(types.UID), args[2].(string), args[3].(string)) run(args[0].(context.Context), args[1].(pkgtypes.UID), args[2].(string), args[3].(string))
}) })
return _c return _c
} }
@ -589,7 +638,7 @@ func (_c *MockRuntime_GetPodStatus_Call) Return(_a0 *container.PodStatus, _a1 er
return _c return _c
} }
func (_c *MockRuntime_GetPodStatus_Call) RunAndReturn(run func(context.Context, types.UID, string, string) (*container.PodStatus, error)) *MockRuntime_GetPodStatus_Call { func (_c *MockRuntime_GetPodStatus_Call) RunAndReturn(run func(context.Context, pkgtypes.UID, string, string) (*container.PodStatus, error)) *MockRuntime_GetPodStatus_Call {
_c.Call.Return(run) _c.Call.Return(run)
return _c return _c
} }

View File

@ -2920,6 +2920,40 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, string, string) {
return true, "", "" return true, "", ""
} }
func disallowResizeForSwappableContainers(runtime kubecontainer.Runtime, desiredPod, allocatedPod *v1.Pod) (bool, string) {
if desiredPod == nil || allocatedPod == nil {
return false, ""
}
restartableMemoryResizePolicy := func(resizePolicies []v1.ContainerResizePolicy) bool {
for _, policy := range resizePolicies {
if policy.ResourceName == v1.ResourceMemory {
return policy.RestartPolicy == v1.RestartContainer
}
}
return false
}
allocatedContainers := make(map[string]v1.Container)
for _, container := range append(allocatedPod.Spec.Containers, allocatedPod.Spec.InitContainers...) {
allocatedContainers[container.Name] = container
}
for _, desiredContainer := range append(desiredPod.Spec.Containers, desiredPod.Spec.InitContainers...) {
allocatedContainer, ok := allocatedContainers[desiredContainer.Name]
if !ok {
continue
}
origMemRequest := desiredContainer.Resources.Requests[v1.ResourceMemory]
newMemRequest := allocatedContainer.Resources.Requests[v1.ResourceMemory]
if !origMemRequest.Equal(newMemRequest) && !restartableMemoryResizePolicy(allocatedContainer.ResizePolicy) {
aSwapBehavior := runtime.GetContainerSwapBehavior(desiredPod, &desiredContainer)
bSwapBehavior := runtime.GetContainerSwapBehavior(allocatedPod, &allocatedContainer)
if aSwapBehavior != kubetypes.NoSwap || bSwapBehavior != kubetypes.NoSwap {
return true, "In-place resize of containers with swap is not supported."
}
}
}
return false, ""
}
// handlePodResourcesResize returns the "allocated pod", which should be used for all resource // handlePodResourcesResize returns the "allocated pod", which should be used for all resource
// calculations after this function is called. It also updates the cached ResizeStatus according to // calculations after this function is called. It also updates the cached ResizeStatus according to
// the allocation decision and pod status. // the allocation decision and pod status.
@ -2949,6 +2983,10 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine
// If there is a pending resize but the resize is not allowed, always use the allocated resources. // If there is a pending resize but the resize is not allowed, always use the allocated resources.
kl.statusManager.SetPodResizePendingCondition(pod.UID, v1.PodReasonInfeasible, msg) kl.statusManager.SetPodResizePendingCondition(pod.UID, v1.PodReasonInfeasible, msg)
return podFromAllocation, nil return podFromAllocation, nil
} else if resizeNotAllowed, msg := disallowResizeForSwappableContainers(kl.containerRuntime, pod, podFromAllocation); resizeNotAllowed {
// If this resize involve swap recalculation, set as infeasible, as IPPR with swap is not supported for beta.
kl.statusManager.SetPodResizePendingCondition(pod.UID, v1.PodReasonInfeasible, msg)
return podFromAllocation, nil
} }
kl.podResizeMutex.Lock() kl.podResizeMutex.Lock()

View File

@ -2612,6 +2612,185 @@ func TestPodResourceAllocationReset(t *testing.T) {
} }
} }
func TestHandlePodResourcesResizeWithSwap(t *testing.T) {
if goruntime.GOOS == "windows" {
t.Skip("InPlacePodVerticalScaling is not currently supported for Windows")
}
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, true)
testKubelet := newTestKubelet(t, false)
defer testKubelet.Cleanup()
noSwapContainerName, swapContainerName := "test-container-noswap", "test-container-limitedswap"
runtime := testKubelet.fakeRuntime
runtime.SwapBehavior = map[string]kubetypes.SwapBehavior{
noSwapContainerName: kubetypes.NoSwap,
swapContainerName: kubetypes.LimitedSwap,
}
kubelet := testKubelet.kubelet
cpu500m := resource.MustParse("500m")
cpu1000m := resource.MustParse("1")
mem500M := resource.MustParse("500Mi")
mem1000M := resource.MustParse("1Gi")
nodes := []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("8"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("4Gi"),
v1.ResourcePods: *resource.NewQuantity(40, resource.DecimalSI),
},
},
},
}
kubelet.nodeLister = testNodeLister{nodes: nodes}
testPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "1111",
Name: "pod1",
Namespace: "ns1",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "c1",
Image: "i1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
},
},
},
},
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "c1",
AllocatedResources: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
Resources: &v1.ResourceRequirements{},
},
},
},
}
testKubelet.fakeKubeClient = fake.NewSimpleClientset(testPod)
kubelet.kubeClient = testKubelet.fakeKubeClient
defer testKubelet.fakeKubeClient.ClearActions()
kubelet.podManager.AddPod(testPod)
kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{
testPod.UID: true,
}
defer kubelet.podManager.RemovePod(testPod)
tests := []struct {
name string
newRequests v1.ResourceList
expectedAllocatedReqs v1.ResourceList
resizePolicy v1.ContainerResizePolicy
swapBehavior kubetypes.SwapBehavior
expectedResize []*v1.PodCondition
}{
{
name: "NoSwap Request Memory decrease ResizePolicy RestartContainer - expect InProgress",
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
swapBehavior: kubetypes.NoSwap,
resizePolicy: v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, RestartPolicy: v1.RestartContainer},
expectedResize: []*v1.PodCondition{
{
Type: v1.PodResizeInProgress,
Status: "True",
},
},
},
{
name: "LimitedSwap Request Memory increase with ResizePolicy RestartContainer - expect InProgress",
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
swapBehavior: kubetypes.LimitedSwap,
resizePolicy: v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, RestartPolicy: v1.RestartContainer},
expectedResize: []*v1.PodCondition{
{
Type: v1.PodResizeInProgress,
Status: "True",
},
},
},
{
name: "LimitedSwap Request Memory increase with ResizePolicy NotRequired - expect Infeasible",
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
swapBehavior: kubetypes.LimitedSwap,
resizePolicy: v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, RestartPolicy: v1.NotRequired},
expectedResize: []*v1.PodCondition{
{
Type: v1.PodResizePending,
Status: "True",
Reason: "Infeasible",
Message: "In-place resize of containers with swap is not supported",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
originalPod := testPod.DeepCopy()
originalPod.Spec.Containers[0].ResizePolicy = []v1.ContainerResizePolicy{tt.resizePolicy}
if tt.swapBehavior == kubetypes.NoSwap {
originalPod.Spec.Containers[0].Name = noSwapContainerName
} else {
originalPod.Spec.Containers[0].Name = swapContainerName
}
kubelet.podManager.UpdatePod(originalPod)
newPod := originalPod.DeepCopy()
newPod.Spec.Containers[0].Resources.Requests = tt.newRequests
require.NoError(t, kubelet.allocationManager.SetAllocatedResources(originalPod))
require.NoError(t, kubelet.allocationManager.SetActuatedResources(originalPod, nil))
t.Cleanup(func() { kubelet.allocationManager.RemovePod(originalPod.UID) })
podStatus := &kubecontainer.PodStatus{
ID: originalPod.UID,
Name: originalPod.Name,
Namespace: originalPod.Namespace,
}
setContainerStatus := func(podStatus *kubecontainer.PodStatus, c *v1.Container, idx int) {
podStatus.ContainerStatuses[idx] = &kubecontainer.Status{
Name: c.Name,
State: kubecontainer.ContainerStateRunning,
Resources: &kubecontainer.ContainerResources{
CPURequest: c.Resources.Requests.Cpu(),
CPULimit: c.Resources.Limits.Cpu(),
MemoryLimit: c.Resources.Limits.Memory(),
},
}
}
podStatus.ContainerStatuses = make([]*kubecontainer.Status, len(originalPod.Spec.Containers))
for i, c := range originalPod.Spec.Containers {
setContainerStatus(podStatus, &c, i)
}
updatedPod, err := kubelet.handlePodResourcesResize(newPod, podStatus)
require.NoError(t, err)
updatedPodCtr := updatedPod.Spec.Containers[0]
assert.Equal(t, tt.expectedAllocatedReqs, updatedPodCtr.Resources.Requests, "updated pod spec requests")
alloc, found := kubelet.allocationManager.GetContainerResourceAllocation(newPod.UID, updatedPodCtr.Name)
require.True(t, found, "container allocation")
assert.Equal(t, tt.expectedAllocatedReqs, alloc.Requests, "stored container request allocation")
resizeStatus := kubelet.statusManager.GetPodResizeConditions(newPod.UID)
for i := range resizeStatus {
// Ignore probe time and last transition time during comparison.
resizeStatus[i].LastProbeTime = metav1.Time{}
resizeStatus[i].LastTransitionTime = metav1.Time{}
assert.Contains(t, resizeStatus[i].Message, tt.expectedResize[i].Message)
resizeStatus[i].Message = tt.expectedResize[i].Message
}
assert.Equal(t, tt.expectedResize, resizeStatus)
})
}
}
func TestHandlePodResourcesResize(t *testing.T) { func TestHandlePodResourcesResize(t *testing.T) {
if goruntime.GOOS == "windows" { if goruntime.GOOS == "windows" {
t.Skip("InPlacePodVerticalScaling is not currently supported for Windows") t.Skip("InPlacePodVerticalScaling is not currently supported for Windows")

View File

@ -45,7 +45,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/qos" "k8s.io/kubernetes/pkg/kubelet/qos"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/utils/ptr" "k8s.io/utils/ptr"
) )
@ -206,35 +206,45 @@ func (m *kubeGenericRuntimeManager) configureContainerSwapResources(lcr *runtime
} }
swapConfigurationHelper := newSwapConfigurationHelper(*m.machineInfo) swapConfigurationHelper := newSwapConfigurationHelper(*m.machineInfo)
if m.memorySwapBehavior == kubelettypes.LimitedSwap {
if !isCgroup2UnifiedMode() {
swapConfigurationHelper.ConfigureNoSwap(lcr)
return
}
}
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
swapConfigurationHelper.ConfigureNoSwap(lcr)
return
}
if kubelettypes.IsCriticalPod(pod) {
swapConfigurationHelper.ConfigureNoSwap(lcr)
return
}
// NOTE(ehashman): Behavior is defined in the opencontainers runtime spec: // NOTE(ehashman): Behavior is defined in the opencontainers runtime spec:
// https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory // https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory
switch m.memorySwapBehavior { switch m.GetContainerSwapBehavior(pod, container) {
case kubelettypes.NoSwap: case types.NoSwap:
swapConfigurationHelper.ConfigureNoSwap(lcr) swapConfigurationHelper.ConfigureNoSwap(lcr)
case kubelettypes.LimitedSwap: case types.LimitedSwap:
swapConfigurationHelper.ConfigureLimitedSwap(lcr, pod, container) swapConfigurationHelper.ConfigureLimitedSwap(lcr, pod, container)
default: default:
swapConfigurationHelper.ConfigureNoSwap(lcr) swapConfigurationHelper.ConfigureNoSwap(lcr)
} }
} }
// GetContainerSwapBehavior checks what swap behavior should be configured for a container,
// considering the requirements for enabling swap.
func (m *kubeGenericRuntimeManager) GetContainerSwapBehavior(pod *v1.Pod, container *v1.Container) types.SwapBehavior {
c := types.SwapBehavior(m.memorySwapBehavior)
if c == types.LimitedSwap {
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) || !swapControllerAvailable() {
return types.NoSwap
}
if !isCgroup2UnifiedMode() {
return types.NoSwap
}
if types.IsCriticalPod(pod) {
return types.NoSwap
}
podQos := kubeapiqos.GetPodQOS(pod)
containerDoesNotRequestMemory := container.Resources.Requests.Memory().IsZero() && container.Resources.Limits.Memory().IsZero()
memoryRequestEqualsToLimit := container.Resources.Requests.Memory().Cmp(*container.Resources.Limits.Memory()) == 0
if podQos != v1.PodQOSBurstable || containerDoesNotRequestMemory || memoryRequestEqualsToLimit {
return types.NoSwap
}
return c
}
return types.NoSwap
}
// generateContainerResources generates platform specific (linux) container resources config for runtime // generateContainerResources generates platform specific (linux) container resources config for runtime
func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources { func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources {
enforceMemoryQoS := false enforceMemoryQoS := false
@ -428,15 +438,6 @@ func newSwapConfigurationHelper(machineInfo cadvisorv1.MachineInfo) *swapConfigu
} }
func (m swapConfigurationHelper) ConfigureLimitedSwap(lcr *runtimeapi.LinuxContainerResources, pod *v1.Pod, container *v1.Container) { func (m swapConfigurationHelper) ConfigureLimitedSwap(lcr *runtimeapi.LinuxContainerResources, pod *v1.Pod, container *v1.Container) {
podQos := kubeapiqos.GetPodQOS(pod)
containerDoesNotRequestMemory := container.Resources.Requests.Memory().IsZero() && container.Resources.Limits.Memory().IsZero()
memoryRequestEqualsToLimit := container.Resources.Requests.Memory().Cmp(*container.Resources.Limits.Memory()) == 0
if podQos != v1.PodQOSBurstable || containerDoesNotRequestMemory || !isCgroup2UnifiedMode() || memoryRequestEqualsToLimit {
m.ConfigureNoSwap(lcr)
return
}
containerMemoryRequest := container.Resources.Requests.Memory() containerMemoryRequest := container.Resources.Requests.Memory()
swapLimit, err := calcSwapForBurstablePods(containerMemoryRequest.Value(), int64(m.machineInfo.MemoryCapacity), int64(m.machineInfo.SwapCapacity)) swapLimit, err := calcSwapForBurstablePods(containerMemoryRequest.Value(), int64(m.machineInfo.MemoryCapacity), int64(m.machineInfo.SwapCapacity))
if err != nil { if err != nil {

View File

@ -924,6 +924,145 @@ func TestGenerateLinuxContainerResources(t *testing.T) {
} }
} }
func TestGetContainerSwapBehavior(t *testing.T) {
_, _, m, err := createTestRuntimeManager()
require.NoError(t, err)
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "bar",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "c1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("2Gi")},
},
},
},
},
Status: v1.PodStatus{},
}
tests := []struct {
name string
configuredMemorySwap types.SwapBehavior
nodeSwapFeatureGateEnabled bool
isSwapControllerAvailable bool
cgroupVersion CgroupVersion
isCriticalPod bool
qosClass v1.PodQOSClass
containerResourceOverride func(container *v1.Container)
expected types.SwapBehavior
}{
{
name: "NoSwap, user set NoSwap behavior",
configuredMemorySwap: types.NoSwap,
nodeSwapFeatureGateEnabled: false,
expected: types.NoSwap,
},
{
name: "NoSwap, feature gate turned off",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: false,
expected: types.NoSwap,
},
{
name: "NoSwap, swap controller unavailable",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: false,
expected: types.NoSwap,
},
{
name: "NoSwap, cgroup v1",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV1,
expected: types.NoSwap,
},
{
name: "NoSwap, qos is Best-Effort",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBestEffort,
expected: types.NoSwap,
},
{
name: "NoSwap, qos is Guaranteed",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSGuaranteed,
expected: types.NoSwap,
},
{
name: "NoSwap, zero memory",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
containerResourceOverride: func(c *v1.Container) {
c.Resources.Requests = v1.ResourceList{
v1.ResourceMemory: resource.MustParse("0"),
}
c.Resources.Limits = v1.ResourceList{
v1.ResourceMemory: resource.MustParse("0"),
}
},
expected: types.NoSwap,
},
{
name: "NoSwap, memory request equal to limit",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
containerResourceOverride: func(c *v1.Container) {
c.Resources.Requests = v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100Mi"),
}
c.Resources.Limits = v1.ResourceList{
v1.ResourceMemory: resource.MustParse("100Mi"),
}
},
expected: types.NoSwap,
},
{
name: "LimitedSwap, cgroup v2",
configuredMemorySwap: types.LimitedSwap,
nodeSwapFeatureGateEnabled: true,
isSwapControllerAvailable: true,
cgroupVersion: cgroupV2,
qosClass: v1.PodQOSBurstable,
expected: types.LimitedSwap,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m.memorySwapBehavior = string(tt.configuredMemorySwap)
setCgroupVersionDuringTest(tt.cgroupVersion)
defer setSwapControllerAvailableDuringTest(tt.isSwapControllerAvailable)()
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tt.nodeSwapFeatureGateEnabled)
testpod := pod.DeepCopy()
testpod.Status.QOSClass = tt.qosClass
if tt.containerResourceOverride != nil {
tt.containerResourceOverride(&testpod.Spec.Containers[0])
}
assert.Equal(t, tt.expected, m.GetContainerSwapBehavior(testpod, &testpod.Spec.Containers[0]))
})
}
}
func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) { func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
_, _, m, err := createTestRuntimeManager() _, _, m, err := createTestRuntimeManager()
assert.NoError(t, err) assert.NoError(t, err)
@ -999,7 +1138,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
qosClass v1.PodQOSClass qosClass v1.PodQOSClass
swapDisabledOnNode bool swapDisabledOnNode bool
nodeSwapFeatureGateEnabled bool nodeSwapFeatureGateEnabled bool
swapBehavior string swapBehavior types.SwapBehavior
addContainerWithoutRequests bool addContainerWithoutRequests bool
addGuaranteedContainer bool addGuaranteedContainer bool
isCriticalPod bool isCriticalPod bool
@ -1195,7 +1334,7 @@ func TestGenerateLinuxContainerResourcesWithSwap(t *testing.T) {
setCgroupVersionDuringTest(tc.cgroupVersion) setCgroupVersionDuringTest(tc.cgroupVersion)
defer setSwapControllerAvailableDuringTest(!tc.swapDisabledOnNode)() defer setSwapControllerAvailableDuringTest(!tc.swapDisabledOnNode)()
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tc.nodeSwapFeatureGateEnabled) featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwap, tc.nodeSwapFeatureGateEnabled)
m.memorySwapBehavior = tc.swapBehavior m.memorySwapBehavior = string(tc.swapBehavior)
var resourceReqsC1, resourceReqsC2 v1.ResourceRequirements var resourceReqsC1, resourceReqsC2 v1.ResourceRequirements
switch tc.qosClass { switch tc.qosClass {

View File

@ -24,6 +24,7 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/types"
) )
// applyPlatformSpecificContainerConfig applies platform specific configurations to runtimeapi.ContainerConfig. // applyPlatformSpecificContainerConfig applies platform specific configurations to runtimeapi.ContainerConfig.
@ -48,3 +49,7 @@ func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *k
func toKubeContainerUser(statusUser *runtimeapi.ContainerUser) *kubecontainer.ContainerUser { func toKubeContainerUser(statusUser *runtimeapi.ContainerUser) *kubecontainer.ContainerUser {
return nil return nil
} }
func (m *kubeGenericRuntimeManager) GetContainerSwapBehavior(pod *v1.Pod, container *v1.Container) types.SwapBehavior {
return types.NoSwap
}

View File

@ -26,6 +26,7 @@ import (
"k8s.io/klog/v2" "k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/winstats" "k8s.io/kubernetes/pkg/kubelet/winstats"
"k8s.io/kubernetes/pkg/securitycontext" "k8s.io/kubernetes/pkg/securitycontext"
) )
@ -183,3 +184,7 @@ func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *k
func toKubeContainerUser(statusUser *runtimeapi.ContainerUser) *kubecontainer.ContainerUser { func toKubeContainerUser(statusUser *runtimeapi.ContainerUser) *kubecontainer.ContainerUser {
return nil return nil
} }
func (m *kubeGenericRuntimeManager) GetContainerSwapBehavior(pod *v1.Pod, container *v1.Container) types.SwapBehavior {
return types.NoSwap
}

View File

@ -32,7 +32,9 @@ const (
) )
// SwapBehavior types // SwapBehavior types
type SwapBehavior string
const ( const (
LimitedSwap = "LimitedSwap" LimitedSwap SwapBehavior = "LimitedSwap"
NoSwap = "NoSwap" NoSwap SwapBehavior = "NoSwap"
) )

View File

@ -76,7 +76,7 @@ var _ = SIGDescribe("Swap", "[LinuxOnly]", ginkgo.Ordered, feature.Swap, framewo
if !isPodCgroupV2(f, pod) { if !isPodCgroupV2(f, pod) {
e2eskipper.Skipf("swap tests require cgroup v2") e2eskipper.Skipf("swap tests require cgroup v2")
} }
gomega.Expect(getSwapBehavior()).To(gomega.Or(gomega.Equal(types.NoSwap), gomega.BeEmpty()), "NodeConformance is expected to run with NoSwap") gomega.Expect(getSwapBehavior()).To(gomega.Or(gomega.Equal(string(types.NoSwap)), gomega.BeEmpty()), "NodeConformance is expected to run with NoSwap")
expectNoSwap(f, pod) expectNoSwap(f, pod)
}, },
@ -105,8 +105,8 @@ var _ = SIGDescribe("Swap", "[LinuxOnly]", ginkgo.Ordered, feature.Swap, framewo
enableLimitedSwap := func(ctx context.Context, initialConfig *config.KubeletConfiguration) { enableLimitedSwap := func(ctx context.Context, initialConfig *config.KubeletConfiguration) {
msg := "swap behavior is already set to LimitedSwap" msg := "swap behavior is already set to LimitedSwap"
if swapBehavior := initialConfig.MemorySwap.SwapBehavior; swapBehavior != types.LimitedSwap { if swapBehavior := initialConfig.MemorySwap.SwapBehavior; swapBehavior != string(types.LimitedSwap) {
initialConfig.MemorySwap.SwapBehavior = types.LimitedSwap initialConfig.MemorySwap.SwapBehavior = string(types.LimitedSwap)
msg = "setting swap behavior to LimitedSwap" msg = "setting swap behavior to LimitedSwap"
} }
@ -124,7 +124,7 @@ var _ = SIGDescribe("Swap", "[LinuxOnly]", ginkgo.Ordered, feature.Swap, framewo
if !isPodCgroupV2(f, pod) { if !isPodCgroupV2(f, pod) {
e2eskipper.Skipf("swap tests require cgroup v2") e2eskipper.Skipf("swap tests require cgroup v2")
} }
gomega.Expect(getSwapBehavior()).To(gomega.Equal(types.LimitedSwap)) gomega.Expect(getSwapBehavior()).To(gomega.Equal(string(types.LimitedSwap)))
expectedSwapLimit := calcSwapForBurstablePod(f, pod) expectedSwapLimit := calcSwapForBurstablePod(f, pod)
expectLimitedSwap(f, pod, expectedSwapLimit) expectLimitedSwap(f, pod, expectedSwapLimit)