From 16abcd78bd493c2694754e46f9227fef7976ed91 Mon Sep 17 00:00:00 2001 From: Natasha Sarkar Date: Wed, 19 Mar 2025 22:12:54 -0500 Subject: [PATCH] [FG:InPlacePodVerticalScaling] surface pod resize actuation errors in pod resize conditions (#130902) * surface pod resize actuation errors in pod resize conditions * update kubelet unit tests * fix all other kubelet unit tests * fix linter error * address feedback * fix test failure --- pkg/kubelet/container/sync_result.go | 4 + pkg/kubelet/container/testing/fake_runtime.go | 4 + pkg/kubelet/kubelet.go | 26 +++-- pkg/kubelet/kubelet_test.go | 109 +++++++++++++++++- .../kuberuntime/kuberuntime_manager.go | 38 +++--- .../kuberuntime/kuberuntime_manager_test.go | 43 +++++-- pkg/kubelet/status/status_manager.go | 16 ++- pkg/kubelet/status/status_manager_test.go | 20 +++- 8 files changed, 217 insertions(+), 43 deletions(-) diff --git a/pkg/kubelet/container/sync_result.go b/pkg/kubelet/container/sync_result.go index 39ab04a3cac..b04456e5314 100644 --- a/pkg/kubelet/container/sync_result.go +++ b/pkg/kubelet/container/sync_result.go @@ -45,6 +45,8 @@ var ( ErrConfigPodSandbox = errors.New("ConfigPodSandboxError") // ErrKillPodSandbox returned when runtime failed to stop pod's sandbox. ErrKillPodSandbox = errors.New("KillPodSandboxError") + // ErrResizePodInPlace returned when runtime failed to resize a pod. + ErrResizePodInPlace = errors.New("ResizePodInPlaceError") ) // SyncAction indicates different kind of actions in SyncPod() and KillPod(). Now there are only actions @@ -68,6 +70,8 @@ const ( ConfigPodSandbox SyncAction = "ConfigPodSandbox" // KillPodSandbox action KillPodSandbox SyncAction = "KillPodSandbox" + // ResizePodInPlace action is included whenever any containers in the pod are resized without restart + ResizePodInPlace SyncAction = "ResizePodInPlace" ) // SyncResult is the result of sync action. diff --git a/pkg/kubelet/container/testing/fake_runtime.go b/pkg/kubelet/container/testing/fake_runtime.go index ca7002e4b91..e360ba40bb8 100644 --- a/pkg/kubelet/container/testing/fake_runtime.go +++ b/pkg/kubelet/container/testing/fake_runtime.go @@ -61,6 +61,7 @@ type FakeRuntime struct { VersionInfo string APIVersionInfo string RuntimeType string + SyncResults *kubecontainer.PodSyncResult Err error InspectErr error StatusErr error @@ -238,6 +239,9 @@ func (f *FakeRuntime) SyncPod(_ context.Context, pod *v1.Pod, _ *kubecontainer.P for _, c := range pod.Spec.Containers { f.StartedContainers = append(f.StartedContainers, c.Name) } + if f.SyncResults != nil { + return *f.SyncResults + } // TODO(random-liu): Add SyncResult for starting and killing containers if f.Err != nil { result.Fail(f.Err) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index ced300f6432..b327a713b9b 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -2066,20 +2066,19 @@ func (kl *Kubelet) SyncPod(ctx context.Context, updateType kubetypes.SyncPodType sctx := context.WithoutCancel(ctx) result := kl.containerRuntime.SyncPod(sctx, pod, podStatus, pullSecrets, kl.crashLoopBackOff) kl.reasonCache.Update(pod.UID, result) - if err := result.Error(); err != nil { - // Do not return error if the only failures were pods in backoff - for _, r := range result.SyncResults { - if r.Error != kubecontainer.ErrCrashLoopBackOff && r.Error != images.ErrImagePullBackOff { - // Do not record an event here, as we keep all event logging for sync pod failures - // local to container runtime, so we get better errors. - return false, err + + for _, r := range result.SyncResults { + if r.Action == kubecontainer.ResizePodInPlace { + if r.Error == nil { + // The pod was resized successfully, clear any pod resize errors in the PodResizeInProgress condition. + kl.statusManager.SetPodResizeInProgressCondition(pod.UID, "", "", true) + } else { + kl.statusManager.SetPodResizeInProgressCondition(pod.UID, v1.PodReasonError, r.Message, false) } } - - return false, nil } - return false, nil + return false, result.Error() } // SyncTerminatingPod is expected to terminate all running containers in a pod. Once this method @@ -2932,7 +2931,7 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine } if kl.isPodResizeInProgress(allocatedPod, podStatus) { // If a resize is in progress, make sure the cache has the correct state in case the Kubelet restarted. - kl.statusManager.SetPodResizeInProgressCondition(pod.UID, "", "") + kl.statusManager.SetPodResizeInProgressCondition(pod.UID, "", "", false) } else { // (Allocated == Actual) => clear the resize in-progress status. kl.statusManager.ClearPodResizeInProgressCondition(pod.UID) @@ -2961,6 +2960,11 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine return nil, err } kl.statusManager.ClearPodResizePendingCondition(pod.UID) + + // Clear any errors that may have been surfaced from a previous resize. The condition will be + // added back as needed in the defer block, but this prevents old errors from being preserved. + kl.statusManager.ClearPodResizeInProgressCondition(pod.UID) + for i, container := range pod.Spec.Containers { if !apiequality.Semantic.DeepEqual(container.Resources, podFromAllocation.Spec.Containers[i].Resources) { key := kuberuntime.GetStableKey(pod, &container) diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index c8e3500d6d6..ea7dc956b35 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -4138,5 +4138,112 @@ func TestCrashLoopBackOffConfiguration(t *testing.T) { assert.Equalf(t, tc.expectedInitial, resultInitial, "wrong base calculated, want: %v, got %v", tc.expectedInitial, resultInitial) }) } - +} + +func TestSyncPodWithErrorsDuringInPlacePodResize(t *testing.T) { + testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) + defer testKubelet.Cleanup() + kubelet := testKubelet.kubelet + + pod := podWithUIDNameNsSpec("12345678", "foo", "new", v1.PodSpec{ + Containers: []v1.Container{ + {Name: "bar"}, + }, + }) + + testCases := []struct { + name string + syncResults *kubecontainer.PodSyncResult + expectedErr string + expectedResizeConditions []*v1.PodCondition + }{ + { + name: "pod resize error returned from the runtime", + syncResults: &kubecontainer.PodSyncResult{ + SyncResults: []*kubecontainer.SyncResult{{ + Action: kubecontainer.ResizePodInPlace, + Target: pod.UID, + Error: kubecontainer.ErrResizePodInPlace, + Message: "could not resize pod", + }}, + }, + expectedErr: "failed to \"ResizePodInPlace\" for \"12345678\" with ResizePodInPlaceError: \"could not resize pod\"", + expectedResizeConditions: []*v1.PodCondition{{ + Type: v1.PodResizeInProgress, + Status: v1.ConditionTrue, + Reason: v1.PodReasonError, + Message: "could not resize pod", + }}, + }, + { + name: "pod resize error cleared upon successful run", + syncResults: &kubecontainer.PodSyncResult{ + SyncResults: []*kubecontainer.SyncResult{{ + Action: kubecontainer.ResizePodInPlace, + Target: pod.UID, + }}, + }, + expectedResizeConditions: []*v1.PodCondition{{ + Type: v1.PodResizeInProgress, + Status: v1.ConditionTrue, + }}, + }, + { + name: "sync results have a non-resize error", + syncResults: &kubecontainer.PodSyncResult{ + SyncResults: []*kubecontainer.SyncResult{{ + Action: kubecontainer.CreatePodSandbox, + Target: pod.UID, + Error: kubecontainer.ErrCreatePodSandbox, + Message: "could not create pod sandbox", + }}, + }, + expectedErr: "failed to \"CreatePodSandbox\" for \"12345678\" with CreatePodSandboxError: \"could not create pod sandbox\"", + expectedResizeConditions: nil, + }, + { + name: "sync results have a non-resize error and a successful pod resize action", + syncResults: &kubecontainer.PodSyncResult{ + SyncResults: []*kubecontainer.SyncResult{ + { + Action: kubecontainer.CreatePodSandbox, + Target: pod.UID, + Error: kubecontainer.ErrCreatePodSandbox, + Message: "could not create pod sandbox", + }, + { + Action: kubecontainer.ResizePodInPlace, + Target: pod.UID, + }, + }, + }, + expectedErr: "failed to \"CreatePodSandbox\" for \"12345678\" with CreatePodSandboxError: \"could not create pod sandbox\"", + expectedResizeConditions: []*v1.PodCondition{{ + Type: v1.PodResizeInProgress, + Status: v1.ConditionTrue, + }}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + testKubelet.fakeRuntime.SyncResults = tc.syncResults + kubelet.podManager.SetPods([]*v1.Pod{pod}) + isTerminal, err := kubelet.SyncPod(context.Background(), kubetypes.SyncPodUpdate, pod, nil, &kubecontainer.PodStatus{}) + require.False(t, isTerminal) + if tc.expectedErr == "" { + require.NoError(t, err) + } else { + require.Error(t, err) + require.Equal(t, tc.expectedErr, err.Error()) + } + gotResizeConditions := kubelet.statusManager.GetPodResizeConditions(pod.UID) + for _, c := range gotResizeConditions { + // ignore last probe and transition times for comparison + c.LastProbeTime = metav1.Time{} + c.LastTransitionTime = metav1.Time{} + } + require.Equal(t, tc.expectedResizeConditions, gotResizeConditions) + }) + } } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 988e754b84e..68d6129a7cf 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -693,7 +693,8 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe return true } -func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerChanges podActions, result *kubecontainer.PodSyncResult) { +func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerChanges podActions) *kubecontainer.SyncResult { + resizeResult := kubecontainer.NewSyncResult(kubecontainer.ResizePodInPlace, format.Pod(pod)) pcm := m.containerManager.NewPodContainerManager() //TODO(vinaykul,InPlacePodVerticalScaling): Figure out best way to get enforceMemoryQoS value (parameter #4 below) in platform-agnostic way enforceCPULimits := m.cpuCFSQuota @@ -704,20 +705,20 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerC podResources := cm.ResourceConfigForPod(pod, enforceCPULimits, uint64((m.cpuCFSQuotaPeriod.Duration)/time.Microsecond), false) if podResources == nil { klog.ErrorS(nil, "Unable to get resource configuration", "pod", klog.KObj(pod)) - result.Fail(fmt.Errorf("unable to get resource configuration processing resize for pod %s", pod.Name)) - return + resizeResult.Fail(kubecontainer.ErrResizePodInPlace, fmt.Sprintf("unable to get resource configuration processing resize for pod %s", pod.Name)) + return resizeResult } currentPodMemoryConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceMemory) if err != nil { klog.ErrorS(nil, "Unable to get pod cgroup memory config", "pod", klog.KObj(pod)) - result.Fail(fmt.Errorf("unable to get pod cgroup memory config for pod %s", pod.Name)) - return + resizeResult.Fail(kubecontainer.ErrResizePodInPlace, fmt.Sprintf("unable to get pod cgroup memory config for pod %s", pod.Name)) + return resizeResult } currentPodCPUConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceCPU) if err != nil { klog.ErrorS(nil, "Unable to get pod cgroup cpu config", "pod", klog.KObj(pod)) - result.Fail(fmt.Errorf("unable to get pod cgroup cpu config for pod %s", pod.Name)) - return + resizeResult.Fail(kubecontainer.ErrResizePodInPlace, fmt.Sprintf("unable to get pod cgroup cpu config for pod %s", pod.Name)) + return resizeResult } currentPodResources := podResources @@ -800,13 +801,13 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerC currentPodMemoryUsage, err := pcm.GetPodCgroupMemoryUsage(pod) if err != nil { klog.ErrorS(err, "GetPodCgroupMemoryUsage failed", "pod", pod.Name) - result.Fail(err) - return + resizeResult.Fail(kubecontainer.ErrResizePodInPlace, err.Error()) + return resizeResult } if currentPodMemoryUsage >= uint64(*podResources.Memory) { klog.ErrorS(nil, "Aborting attempt to set pod memory limit less than current memory usage", "pod", pod.Name) - result.Fail(fmt.Errorf("aborting attempt to set pod memory limit less than current memory usage for pod %s", pod.Name)) - return + resizeResult.Fail(kubecontainer.ErrResizePodInPlace, fmt.Sprintf("aborting attempt to set pod memory limit less than current memory usage for pod %s", pod.Name)) + return resizeResult } } else { // Default pod memory limit to the current memory limit if unset to prevent it from updating. @@ -814,16 +815,16 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerC podResources.Memory = currentPodMemoryConfig.Memory } if errResize := resizeContainers(v1.ResourceMemory, int64(*currentPodMemoryConfig.Memory), *podResources.Memory, 0, 0); errResize != nil { - result.Fail(errResize) - return + resizeResult.Fail(kubecontainer.ErrResizePodInPlace, errResize.Error()) + return resizeResult } } if len(podContainerChanges.ContainersToUpdate[v1.ResourceCPU]) > 0 || podContainerChanges.UpdatePodResources { if podResources.CPUShares == nil { // This shouldn't happen: ResourceConfigForPod always returns a non-nil value for CPUShares. klog.ErrorS(nil, "podResources.CPUShares is nil", "pod", pod.Name) - result.Fail(fmt.Errorf("podResources.CPUShares is nil for pod %s", pod.Name)) - return + resizeResult.Fail(kubecontainer.ErrResizePodInPlace, fmt.Sprintf("podResources.CPUShares is nil for pod %s", pod.Name)) + return resizeResult } // Default pod CPUQuota to the current CPUQuota if no limit is set to prevent the pod limit @@ -834,10 +835,11 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerC } if errResize := resizeContainers(v1.ResourceCPU, *currentPodCPUConfig.CPUQuota, *podResources.CPUQuota, int64(*currentPodCPUConfig.CPUShares), int64(*podResources.CPUShares)); errResize != nil { - result.Fail(errResize) - return + resizeResult.Fail(kubecontainer.ErrResizePodInPlace, errResize.Error()) + return resizeResult } } + return resizeResult } func (m *kubeGenericRuntimeManager) updatePodContainerResources(pod *v1.Pod, resourceName v1.ResourceName, containersToUpdate []containerToUpdateInfo) error { @@ -1401,7 +1403,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po // Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources if resizable, _ := IsInPlacePodVerticalScalingAllowed(pod); resizable { if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources { - m.doPodResizeAction(pod, podContainerChanges, &result) + result.SyncResults = append(result.SyncResults, m.doPodResizeAction(pod, podContainerChanges)) } } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index baf0a6de8f6..80da54715f3 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -67,7 +67,14 @@ var ( ) func createTestRuntimeManager() (*apitest.FakeRuntimeService, *apitest.FakeImageService, *kubeGenericRuntimeManager, error) { + return createTestRuntimeManagerWithErrors(nil) +} + +func createTestRuntimeManagerWithErrors(errors map[string][]error) (*apitest.FakeRuntimeService, *apitest.FakeImageService, *kubeGenericRuntimeManager, error) { fakeRuntimeService := apitest.NewFakeRuntimeService() + if errors != nil { + fakeRuntimeService.Errors = errors + } fakeImageService := apitest.NewFakeImageService() // Only an empty machineInfo is needed here, because in unit test all containers are besteffort, // data in machineInfo is not used. If burstable containers are used in unit test in the future, @@ -3186,9 +3193,6 @@ func TestDoPodResizeAction(t *testing.T) { } featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true) - _, _, m, err := createTestRuntimeManager() - require.NoError(t, err) - m.cpuCFSQuota = true // Enforce CPU Limits for _, tc := range []struct { testName string @@ -3196,7 +3200,9 @@ func TestDoPodResizeAction(t *testing.T) { desiredResources containerResources updatedResources []v1.ResourceName otherContainersHaveLimits bool + runtimeErrors map[string][]error expectedError string + expectedErrorMessage string expectPodCgroupUpdates int }{ { @@ -3213,6 +3219,23 @@ func TestDoPodResizeAction(t *testing.T) { updatedResources: []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory}, expectPodCgroupUpdates: 3, // cpu req, cpu lim, mem lim }, + { + testName: "Increase cpu and memory requests and limits, with computed pod limits and set a runtime error", + currentResources: containerResources{ + cpuRequest: 100, cpuLimit: 100, + memoryRequest: 100, memoryLimit: 100, + }, + desiredResources: containerResources{ + cpuRequest: 200, cpuLimit: 200, + memoryRequest: 200, memoryLimit: 200, + }, + otherContainersHaveLimits: true, + updatedResources: []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory}, + expectPodCgroupUpdates: 1, + runtimeErrors: map[string][]error{"UpdateContainerResources": {fmt.Errorf("error updating container resources")}}, + expectedError: "ResizePodInPlaceError", + expectedErrorMessage: "error updating container resources", + }, { testName: "Increase cpu and memory requests and limits, without computed pod limits", currentResources: containerResources{ @@ -3296,6 +3319,10 @@ func TestDoPodResizeAction(t *testing.T) { }, } { t.Run(tc.testName, func(t *testing.T) { + _, _, m, err := createTestRuntimeManagerWithErrors(tc.runtimeErrors) + require.NoError(t, err) + m.cpuCFSQuota = true // Enforce CPU Limits + mockCM := cmtesting.NewMockContainerManager(t) mockCM.EXPECT().PodHasExclusiveCPUs(mock.Anything).Return(false).Maybe() mockCM.EXPECT().ContainerHasExclusiveCPUs(mock.Anything, mock.Anything).Return(false).Maybe() @@ -3358,17 +3385,17 @@ func TestDoPodResizeAction(t *testing.T) { containersToUpdate[r] = []containerToUpdateInfo{updateInfo} } - syncResult := &kubecontainer.PodSyncResult{} actions := podActions{ ContainersToUpdate: containersToUpdate, } - m.doPodResizeAction(pod, actions, syncResult) + resizeResult := m.doPodResizeAction(pod, actions) if tc.expectedError != "" { - require.Error(t, syncResult.Error()) - require.EqualError(t, syncResult.Error(), tc.expectedError) + require.Error(t, resizeResult.Error) + require.EqualError(t, resizeResult.Error, tc.expectedError) + require.Equal(t, tc.expectedErrorMessage, resizeResult.Message) } else { - require.NoError(t, syncResult.Error()) + require.NoError(t, resizeResult.Error) } mock.AssertExpectationsForObjects(t, mockPCM) diff --git a/pkg/kubelet/status/status_manager.go b/pkg/kubelet/status/status_manager.go index e9a52c5298b..c86bef17ff3 100644 --- a/pkg/kubelet/status/status_manager.go +++ b/pkg/kubelet/status/status_manager.go @@ -159,7 +159,7 @@ type Manager interface { SetPodResizePendingCondition(podUID types.UID, reason, message string) // SetPodResizeInProgressCondition caches the last PodResizeInProgress condition for the pod. - SetPodResizeInProgressCondition(podUID types.UID, reason, message string) + SetPodResizeInProgressCondition(podUID types.UID, reason, message string, allowReasonToBeCleared bool) // ClearPodResizePendingCondition clears the PodResizePending condition for the pod from the cache. ClearPodResizePendingCondition(podUID types.UID) @@ -266,10 +266,22 @@ func (m *manager) SetPodResizePendingCondition(podUID types.UID, reason, message } // SetPodResizeInProgressCondition caches the last PodResizeInProgress condition for the pod. -func (m *manager) SetPodResizeInProgressCondition(podUID types.UID, reason, message string) { +func (m *manager) SetPodResizeInProgressCondition(podUID types.UID, reason, message string, allowReasonToBeCleared bool) { + oldConditions := m.GetPodResizeConditions(podUID) + m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() + if !allowReasonToBeCleared && reason == "" && message == "" { + // Preserve the old reason and message if there is one. + for _, c := range oldConditions { + if c.Type == v1.PodResizeInProgress { + reason = c.Reason + message = c.Message + } + } + } + m.podResizeConditions[podUID] = podResizeConditions{ PodResizeInProgress: updatedPodResizeCondition(v1.PodResizeInProgress, m.podResizeConditions[podUID].PodResizeInProgress, reason, message), PodResizePending: m.podResizeConditions[podUID].PodResizePending, diff --git a/pkg/kubelet/status/status_manager_test.go b/pkg/kubelet/status/status_manager_test.go index 413270b7fb6..57e0e45c541 100644 --- a/pkg/kubelet/status/status_manager_test.go +++ b/pkg/kubelet/status/status_manager_test.go @@ -2047,7 +2047,7 @@ func TestPodResizeConditions(t *testing.T) { { name: "set pod resize in progress condition with reason and message", updateFunc: func(podUID types.UID) { - m.SetPodResizeInProgressCondition(podUID, "some-reason", "some-message") + m.SetPodResizeInProgressCondition(podUID, "some-reason", "some-message", false) }, expected: []*v1.PodCondition{ { @@ -2059,9 +2059,23 @@ func TestPodResizeConditions(t *testing.T) { }, }, { - name: "set pod resize in progress condition without reason and message", + name: "set pod resize in progress condition without reason and message/allowReasonToBeCleared=false", updateFunc: func(podUID types.UID) { - m.SetPodResizeInProgressCondition(podUID, "", "") + m.SetPodResizeInProgressCondition(podUID, "", "", false) + }, + expected: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: v1.ConditionTrue, + Reason: "some-reason", + Message: "some-message", + }, + }, + }, + { + name: "set pod resize in progress condition without reason and message/allowReasonToBeCleared=true", + updateFunc: func(podUID types.UID) { + m.SetPodResizeInProgressCondition(podUID, "", "", true) }, expected: []*v1.PodCondition{ {