diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 4fedb38f2aa..3afbd6b21e4 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1889,7 +1889,7 @@ func (kl *Kubelet) SyncPod(_ context.Context, updateType kubetypes.SyncPodType, // TODO(vinaykul,InPlacePodVerticalScaling): Investigate doing this in HandlePodUpdates + periodic SyncLoop scan // See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r663160060 if kl.podWorkers.CouldHaveRunningContainers(pod.UID) && !kubetypes.IsStaticPod(pod) { - kl.handlePodResourcesResize(pod) + pod = kl.handlePodResourcesResize(pod) } } @@ -2631,13 +2631,14 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus) klog.ErrorS(err, "getNodeAnyway function failed") return false, nil, "" } + podCopy := pod.DeepCopy() cpuAvailable := node.Status.Allocatable.Cpu().MilliValue() memAvailable := node.Status.Allocatable.Memory().Value() - cpuRequests := resource.GetResourceRequest(pod, v1.ResourceCPU) - memRequests := resource.GetResourceRequest(pod, v1.ResourceMemory) + cpuRequests := resource.GetResourceRequest(podCopy, v1.ResourceCPU) + memRequests := resource.GetResourceRequest(podCopy, v1.ResourceMemory) if cpuRequests > cpuAvailable || memRequests > memAvailable { - klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "pod", pod.Name) - return false, nil, v1.PodResizeStatusInfeasible + klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "pod", podCopy.Name) + return false, podCopy, v1.PodResizeStatusInfeasible } // Treat the existing pod needing resize as a new pod with desired resources seeking admit. @@ -2649,13 +2650,12 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus) } } - if ok, failReason, failMessage := kl.canAdmitPod(otherActivePods, pod); !ok { + if ok, failReason, failMessage := kl.canAdmitPod(otherActivePods, podCopy); !ok { // Log reason and return. Let the next sync iteration retry the resize - klog.V(3).InfoS("Resize cannot be accommodated", "pod", pod.Name, "reason", failReason, "message", failMessage) - return false, nil, v1.PodResizeStatusDeferred + klog.V(3).InfoS("Resize cannot be accommodated", "pod", podCopy.Name, "reason", failReason, "message", failMessage) + return false, podCopy, v1.PodResizeStatusDeferred } - podCopy := pod.DeepCopy() for _, container := range podCopy.Spec.Containers { idx, found := podutil.GetIndexOfContainerStatus(podCopy.Status.ContainerStatuses, container.Name) if found { @@ -2667,9 +2667,9 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus) return true, podCopy, v1.PodResizeStatusInProgress } -func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) { +func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) *v1.Pod { if pod.Status.Phase != v1.PodRunning { - return + return pod } podResized := false for _, container := range pod.Spec.Containers { @@ -2691,31 +2691,35 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) { } } if !podResized { - return + return pod } kl.podResizeMutex.Lock() defer kl.podResizeMutex.Unlock() fit, updatedPod, resizeStatus := kl.canResizePod(pod) + if updatedPod == nil { + return pod + } if fit { // Update pod resource allocation checkpoint if err := kl.statusManager.SetPodAllocation(updatedPod); err != nil { //TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate - klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(pod)) + klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(updatedPod)) + return pod } - *pod = *updatedPod } if resizeStatus != "" { // Save resize decision to checkpoint - if err := kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus); err != nil { + if err := kl.statusManager.SetPodResizeStatus(updatedPod.UID, resizeStatus); err != nil { //TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate - klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(pod)) + klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(updatedPod)) + return pod } - pod.Status.Resize = resizeStatus + updatedPod.Status.Resize = resizeStatus } - kl.podManager.UpdatePod(pod) - kl.statusManager.SetPodStatus(pod, pod.Status) - return + kl.podManager.UpdatePod(updatedPod) + kl.statusManager.SetPodStatus(updatedPod, updatedPod.Status) + return updatedPod } // LatestLoopEntryTime returns the last time in the sync loop monitor. diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 8d50c9ec189..57b31b7c1c7 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -2585,8 +2585,10 @@ func TestHandlePodResourcesResize(t *testing.T) { tt.pod.Spec.Containers[0].Resources.Requests = tt.newRequests tt.pod.Status.ContainerStatuses[0].AllocatedResources = v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M} kubelet.handlePodResourcesResize(tt.pod) - assert.Equal(t, tt.expectedAllocations, tt.pod.Status.ContainerStatuses[0].AllocatedResources, tt.name) - assert.Equal(t, tt.expectedResize, tt.pod.Status.Resize, tt.name) + updatedPod, found := kubelet.podManager.GetPodByName(tt.pod.Namespace, tt.pod.Name) + assert.True(t, found, "expected to find pod %s", tt.pod.Name) + assert.Equal(t, tt.expectedAllocations, updatedPod.Status.ContainerStatuses[0].AllocatedResources, tt.name) + assert.Equal(t, tt.expectedResize, updatedPod.Status.Resize, tt.name) testKubelet.fakeKubeClient.ClearActions() } }