From a15520fbeaf458e2fc0f2be6e1ae0f9572369495 Mon Sep 17 00:00:00 2001 From: Natasha Sarkar Date: Mon, 17 Mar 2025 22:01:05 +0000 Subject: [PATCH] Move pod resize status to pod conditions --- api/openapi-spec/swagger.json | 2 +- api/openapi-spec/v3/api__v1_openapi.json | 2 +- pkg/api/pod/testing/make.go | 6 - pkg/api/pod/util.go | 3 +- pkg/api/pod/util_test.go | 1 - pkg/apis/core/types.go | 16 +- pkg/generated/openapi/zz_generated.openapi.go | 2 +- pkg/kubelet/kubelet.go | 85 +++++------ pkg/kubelet/kubelet_pods.go | 18 ++- pkg/kubelet/kubelet_pods_test.go | 9 +- pkg/kubelet/kubelet_test.go | 129 ++++++++++++++-- pkg/kubelet/status/status_manager.go | 138 +++++++++++++++--- pkg/kubelet/status/status_manager_test.go | 102 +++++++++++++ pkg/kubelet/types/pod_status.go | 2 + pkg/registry/core/pod/strategy_test.go | 1 - pkg/scheduler/framework/types_test.go | 35 +++-- .../src/k8s.io/api/core/v1/generated.proto | 3 + staging/src/k8s.io/api/core/v1/types.go | 28 +++- .../core/v1/types_swagger_doc_generated.go | 2 +- .../component-helpers/resource/helpers.go | 24 ++- .../resource/helpers_test.go | 36 +++-- .../kubectl/pkg/util/resource/resource.go | 3 +- test/e2e/framework/pod/resize.go | 18 ++- test/e2e/node/pod_resize.go | 69 ++++++++- 24 files changed, 595 insertions(+), 139 deletions(-) diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index e07367312f2..f8fd5ec8064 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -10108,7 +10108,7 @@ "type": "string" }, "resize": { - "description": "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\"", + "description": "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\" Deprecated: Resize status is moved to two pod conditions PodResizePending and PodResizeInProgress. PodResizePending will track states where the spec has been resized, but the Kubelet has not yet allocated the resources. PodResizeInProgress will track in-progress resizes, and should be present whenever allocated resources != acknowledged resources.", "type": "string" }, "resourceClaimStatuses": { diff --git a/api/openapi-spec/v3/api__v1_openapi.json b/api/openapi-spec/v3/api__v1_openapi.json index 75f3b90f9d6..8675a2dc3d3 100644 --- a/api/openapi-spec/v3/api__v1_openapi.json +++ b/api/openapi-spec/v3/api__v1_openapi.json @@ -5940,7 +5940,7 @@ "type": "string" }, "resize": { - "description": "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\"", + "description": "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\" Deprecated: Resize status is moved to two pod conditions PodResizePending and PodResizeInProgress. PodResizePending will track states where the spec has been resized, but the Kubelet has not yet allocated the resources. PodResizeInProgress will track in-progress resizes, and should be present whenever allocated resources != acknowledged resources.", "type": "string" }, "resourceClaimStatuses": { diff --git a/pkg/api/pod/testing/make.go b/pkg/api/pod/testing/make.go index fd5819fc60e..8c83340c4e8 100644 --- a/pkg/api/pod/testing/make.go +++ b/pkg/api/pod/testing/make.go @@ -336,12 +336,6 @@ func MakeContainerStatus(name string, allocatedResources api.ResourceList) api.C return cs } -func SetResizeStatus(resizeStatus api.PodResizeStatus) TweakPodStatus { - return func(podstatus *api.PodStatus) { - podstatus.Resize = resizeStatus - } -} - // TweakContainers applies the container tweaks to all containers (regular & init) in the pod. // Note: this should typically be added to pod tweaks after all containers have been added. func TweakContainers(tweaks ...TweakContainer) Tweak { diff --git a/pkg/api/pod/util.go b/pkg/api/pod/util.go index f104e683e4e..67a641debc2 100644 --- a/pkg/api/pod/util.go +++ b/pkg/api/pod/util.go @@ -791,7 +791,7 @@ func dropDisabledPodStatusFields(podStatus, oldPodStatus *api.PodStatus, podSpec } if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && !inPlacePodVerticalScalingInUse(oldPodSpec) { - // Drop Resize and Resources fields + // Drop Resources fields dropResourcesField := func(csl []api.ContainerStatus) { for i := range csl { csl[i].Resources = nil @@ -800,7 +800,6 @@ func dropDisabledPodStatusFields(podStatus, oldPodStatus *api.PodStatus, podSpec dropResourcesField(podStatus.ContainerStatuses) dropResourcesField(podStatus.InitContainerStatuses) dropResourcesField(podStatus.EphemeralContainerStatuses) - podStatus.Resize = "" } if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) || !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingAllocatedStatus) { diff --git a/pkg/api/pod/util_test.go b/pkg/api/pod/util_test.go index 5a831a36cd5..3f882483cc1 100644 --- a/pkg/api/pod/util_test.go +++ b/pkg/api/pod/util_test.go @@ -2675,7 +2675,6 @@ func TestDropInPlacePodVerticalScaling(t *testing.T) { }, }, Status: api.PodStatus{ - Resize: api.PodResizeStatusInProgress, ContainerStatuses: []api.ContainerStatus{ { Name: "c1", diff --git a/pkg/apis/core/types.go b/pkg/apis/core/types.go index 30874775929..c862ca5aa98 100644 --- a/pkg/apis/core/types.go +++ b/pkg/apis/core/types.go @@ -2951,6 +2951,17 @@ const ( // DisruptionTarget indicates the pod is about to be terminated due to a // disruption (such as preemption, eviction API or garbage-collection). DisruptionTarget PodConditionType = "DisruptionTarget" + // PodResizePending indicates that the pod has been resized, but kubelet has not + // yet allocated the resources. If both PodResizePending and PodResizeInProgress + // are set, it means that a new resize was requested in the middle of a previous + // pod resize that is still in progress. + PodResizePending PodConditionType = "PodResizePending" + // PodResizeInProgress indicates that a resize is in progress, and is present whenever + // the Kubelet has allocated resources for the resize, but has not yet actuated all of + // the required changes. + // If both PodResizePending and PodResizeInProgress are set, it means that a new resize was + // requested in the middle of a previous pod resize that is still in progress. + PodResizeInProgress PodConditionType = "PodResizeInProgress" ) // PodCondition represents pod's condition @@ -2970,7 +2981,7 @@ type PodCondition struct { Message string } -// PodResizeStatus shows status of desired resize of a pod's containers. +// Deprecated: PodResizeStatus shows status of desired resize of a pod's containers. type PodResizeStatus string const ( @@ -4251,6 +4262,9 @@ type PodStatus struct { // Status of resources resize desired for pod's containers. // It is empty if no resources resize is pending. // Any changes to container resources will automatically set this to "Proposed" + // Deprecated: Resize status is moved to two pod conditions PodResizePending and PodResizeInProgress. + // PodResizePending will track states where the spec has been resized, but the Kubelet has not yet allocated the resources. + // PodResizeInProgress will track in-progress resizes, and should be present whenever allocated resources != acknowledged resources. // +featureGate=InPlacePodVerticalScaling // +optional Resize PodResizeStatus diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 50b8ef06036..fcebf3a7a2b 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -29141,7 +29141,7 @@ func schema_k8sio_api_core_v1_PodStatus(ref common.ReferenceCallback) common.Ope }, "resize": { SchemaProps: spec.SchemaProps{ - Description: "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\"", + Description: "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\" Deprecated: Resize status is moved to two pod conditions PodResizePending and PodResizeInProgress. PodResizePending will track states where the spec has been resized, but the Kubelet has not yet allocated the resources. PodResizeInProgress will track in-progress resizes, and should be present whenever allocated resources != acknowledged resources.", Type: []string{"string"}, Format: "", }, diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index b03edd716db..ced300f6432 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -2863,21 +2863,23 @@ func (kl *Kubelet) HandlePodSyncs(pods []*v1.Pod) { // canResizePod determines if the requested resize is currently feasible. // pod should hold the desired (pre-allocated) spec. -// Returns true if the resize can proceed. -func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, v1.PodResizeStatus, string) { +// Returns true if the resize can proceed; returns a reason and message +// otherwise. +func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, string, string) { if v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed && !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) { if utilfeature.DefaultFeatureGate.Enabled(features.CPUManager) { if kl.containerManager.GetNodeConfig().CPUManagerPolicy == "static" { msg := "Resize is infeasible for Guaranteed Pods alongside CPU Manager static policy" klog.V(3).InfoS(msg, "pod", format.Pod(pod)) - return false, v1.PodResizeStatusInfeasible, msg + return false, v1.PodReasonInfeasible, msg } } if utilfeature.DefaultFeatureGate.Enabled(features.MemoryManager) { if kl.containerManager.GetNodeConfig().MemoryManagerPolicy == "Static" { msg := "Resize is infeasible for Guaranteed Pods alongside Memory Manager static policy" klog.V(3).InfoS(msg, "pod", format.Pod(pod)) - return false, v1.PodResizeStatusInfeasible, msg + return false, v1.PodReasonInfeasible, msg + } } } @@ -2900,7 +2902,8 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, v1.PodResizeStatus, string) } msg = "Node didn't have enough capacity: " + msg klog.V(3).InfoS(msg, "pod", klog.KObj(pod)) - return false, v1.PodResizeStatusInfeasible, msg + return false, v1.PodReasonInfeasible, msg + } // Treat the existing pod needing resize as a new pod with desired resources seeking admit. @@ -2911,83 +2914,75 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, v1.PodResizeStatus, string) if ok, failReason, failMessage := kl.canAdmitPod(allocatedPods, pod); !ok { // Log reason and return. Let the next sync iteration retry the resize klog.V(3).InfoS("Resize cannot be accommodated", "pod", klog.KObj(pod), "reason", failReason, "message", failMessage) - return false, v1.PodResizeStatusDeferred, failMessage + return false, v1.PodReasonDeferred, failMessage } - return true, v1.PodResizeStatusInProgress, "" + return true, "", "" } // handlePodResourcesResize returns the "allocated pod", which should be used for all resource // calculations after this function is called. It also updates the cached ResizeStatus according to // the allocation decision and pod status. -func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (*v1.Pod, error) { - allocatedPod, updated := kl.allocationManager.UpdatePodFromAllocation(pod) - - if !updated { - // Desired resources == allocated resources. Check whether a resize is in progress. +func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (allocatedPod *v1.Pod, err error) { + // Always check whether a resize is in progress so we can set the PodResizeInProgressCondition + // accordingly. + defer func() { + if err != nil { + return + } if kl.isPodResizeInProgress(allocatedPod, podStatus) { // If a resize is in progress, make sure the cache has the correct state in case the Kubelet restarted. - kl.statusManager.SetPodResizeStatus(pod.UID, v1.PodResizeStatusInProgress) + kl.statusManager.SetPodResizeInProgressCondition(pod.UID, "", "") } else { - // (Desired == Allocated == Actual) => clear the resize status. - kl.statusManager.SetPodResizeStatus(pod.UID, "") + // (Allocated == Actual) => clear the resize in-progress status. + kl.statusManager.ClearPodResizeInProgressCondition(pod.UID) } - // Pod allocation does not need to be updated. - return allocatedPod, nil + }() + + podFromAllocation, updated := kl.allocationManager.UpdatePodFromAllocation(pod) + if !updated { + // Desired resources == allocated resources. Pod allocation does not need to be updated. + kl.statusManager.ClearPodResizePendingCondition(pod.UID) + return podFromAllocation, nil + } else if resizable, msg := kuberuntime.IsInPlacePodVerticalScalingAllowed(pod); !resizable { // If there is a pending resize but the resize is not allowed, always use the allocated resources. - kl.recorder.Eventf(pod, v1.EventTypeWarning, events.ResizeInfeasible, msg) - kl.statusManager.SetPodResizeStatus(pod.UID, v1.PodResizeStatusInfeasible) - return allocatedPod, nil + kl.statusManager.SetPodResizePendingCondition(pod.UID, v1.PodReasonInfeasible, msg) + return podFromAllocation, nil } kl.podResizeMutex.Lock() defer kl.podResizeMutex.Unlock() // Desired resources != allocated resources. Can we update the allocation to the desired resources? - fit, resizeStatus, resizeMsg := kl.canResizePod(pod) + fit, reason, message := kl.canResizePod(pod) if fit { // Update pod resource allocation checkpoint if err := kl.allocationManager.SetAllocatedResources(pod); err != nil { return nil, err } + kl.statusManager.ClearPodResizePendingCondition(pod.UID) for i, container := range pod.Spec.Containers { - if !apiequality.Semantic.DeepEqual(container.Resources, allocatedPod.Spec.Containers[i].Resources) { + if !apiequality.Semantic.DeepEqual(container.Resources, podFromAllocation.Spec.Containers[i].Resources) { key := kuberuntime.GetStableKey(pod, &container) kl.crashLoopBackOff.Reset(key) } } for i, container := range pod.Spec.InitContainers { if podutil.IsRestartableInitContainer(&container) { - if !apiequality.Semantic.DeepEqual(container.Resources, allocatedPod.Spec.InitContainers[i].Resources) { + if !apiequality.Semantic.DeepEqual(container.Resources, podFromAllocation.Spec.InitContainers[i].Resources) { key := kuberuntime.GetStableKey(pod, &container) kl.crashLoopBackOff.Reset(key) } } } - allocatedPod = pod + return pod, nil + } - // Special case when the updated allocation matches the actuated resources. This can occur - // when reverting a resize that hasn't been actuated, or when making an equivalent change - // (such as CPU requests below MinShares). This is an optimization to clear the resize - // status immediately, rather than waiting for the next SyncPod iteration. - if !kl.isPodResizeInProgress(allocatedPod, podStatus) { - // In this case, consider the resize complete. - kl.statusManager.SetPodResizeStatus(pod.UID, "") - return allocatedPod, nil - } + if reason != "" { + kl.statusManager.SetPodResizePendingCondition(pod.UID, reason, message) } - if resizeStatus != "" { - kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus) - if resizeMsg != "" { - switch resizeStatus { - case v1.PodResizeStatusDeferred: - kl.recorder.Eventf(pod, v1.EventTypeWarning, events.ResizeDeferred, resizeMsg) - case v1.PodResizeStatusInfeasible: - kl.recorder.Eventf(pod, v1.EventTypeWarning, events.ResizeInfeasible, resizeMsg) - } - } - } - return allocatedPod, nil + + return podFromAllocation, nil } // isPodResizingInProgress checks whether the actuated resizable resources differ from the allocated resources diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 31a7bb392f6..8db4ab4b459 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -1738,14 +1738,15 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.Pod } } -func (kl *Kubelet) determinePodResizeStatus(allocatedPod *v1.Pod, podStatus *kubecontainer.PodStatus, podIsTerminal bool) v1.PodResizeStatus { +func (kl *Kubelet) determinePodResizeStatus(allocatedPod *v1.Pod, podIsTerminal bool) []*v1.PodCondition { // If pod is terminal, clear the resize status. if podIsTerminal { - kl.statusManager.SetPodResizeStatus(allocatedPod.UID, "") - return "" + kl.statusManager.ClearPodResizeInProgressCondition(allocatedPod.UID) + kl.statusManager.ClearPodResizePendingCondition(allocatedPod.UID) + return nil } - resizeStatus := kl.statusManager.GetPodResizeStatus(allocatedPod.UID) + resizeStatus := kl.statusManager.GetPodResizeConditions(allocatedPod.UID) return resizeStatus } @@ -1759,9 +1760,6 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po oldPodStatus = pod.Status } s := kl.convertStatusToAPIStatus(pod, podStatus, oldPodStatus) - if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - s.Resize = kl.determinePodResizeStatus(pod, podStatus, podIsTerminal) - } // calculate the next phase and preserve reason allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...) s.Phase = getPhase(pod, allStatus, podIsTerminal) @@ -1827,6 +1825,12 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po s.Conditions = append(s.Conditions, c) } } + if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { + resizeStatus := kl.determinePodResizeStatus(pod, podIsTerminal) + for _, c := range resizeStatus { + s.Conditions = append(s.Conditions, *c) + } + } // copy over the pod disruption conditions from state which is already // updated during the eviciton (due to either node resource pressure or diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index 3f81d46b32f..c3c865456ca 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -3881,7 +3881,6 @@ func Test_generateAPIPodStatusForInPlaceVPAEnabled(t *testing.T) { AllocatedResources: CPU1AndMem1GAndStorage2GAndCustomResource, }, }, - Resize: "InProgress", }, }, }, @@ -3912,7 +3911,6 @@ func Test_generateAPIPodStatusForInPlaceVPAEnabled(t *testing.T) { AllocatedResources: CPU1AndMem1GAndStorage2G, }, }, - Resize: "InProgress", }, }, }, @@ -3926,9 +3924,10 @@ func Test_generateAPIPodStatusForInPlaceVPAEnabled(t *testing.T) { oldStatus := test.pod.Status kl.statusManager.SetPodStatus(test.pod, oldStatus) actual := kl.generateAPIPodStatus(test.pod, &testKubecontainerPodStatus /* criStatus */, false /* test.isPodTerminal */) - - if actual.Resize != "" { - t.Fatalf("Unexpected Resize status: %s", actual.Resize) + for _, c := range actual.Conditions { + if c.Type == v1.PodResizePending || c.Type == v1.PodResizeInProgress { + t.Fatalf("unexpected resize status: %v", c) + } } }) } diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index b431135d2f8..758df2b0bc0 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -2714,7 +2714,7 @@ func TestHandlePodResourcesResize(t *testing.T) { newResourcesAllocated bool // Whether the new requests have already been allocated (but not actuated) expectedAllocatedReqs v1.ResourceList expectedAllocatedLims v1.ResourceList - expectedResize v1.PodResizeStatus + expectedResize []*v1.PodCondition expectBackoffReset bool annotations map[string]string }{ @@ -2723,52 +2723,102 @@ func TestHandlePodResourcesResize(t *testing.T) { originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, - expectedResize: v1.PodResizeStatusInProgress, expectBackoffReset: true, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: "True", + }, + }, }, { name: "Request CPU increase, memory decrease - expect InProgress", originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, newRequests: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M}, - expectedResize: v1.PodResizeStatusInProgress, expectBackoffReset: true, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: "True", + }, + }, }, { name: "Request CPU decrease, memory increase - expect InProgress", originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M}, - expectedResize: v1.PodResizeStatusInProgress, expectBackoffReset: true, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: "True", + }, + }, }, { name: "Request CPU and memory increase beyond current capacity - expect Deferred", originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, newRequests: v1.ResourceList{v1.ResourceCPU: cpu2500m, v1.ResourceMemory: mem2500M}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, - expectedResize: v1.PodResizeStatusDeferred, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizePending, + Status: "True", + Reason: "Deferred", + Message: "", + }, + }, }, { name: "Request CPU decrease and memory increase beyond current capacity - expect Deferred", originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem2500M}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, - expectedResize: v1.PodResizeStatusDeferred, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizePending, + Status: "True", + Reason: "Deferred", + Message: "Node didn't have enough resource: memory", + }, + }, }, { name: "Request memory increase beyond node capacity - expect Infeasible", originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem4500M}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, - expectedResize: v1.PodResizeStatusInfeasible, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizePending, + Status: "True", + Reason: "Infeasible", + Message: "Node didn't have enough capacity: memory, requested: 4718592000, capacity: 4294967296", + }, + }, }, { name: "Request CPU increase beyond node capacity - expect Infeasible", originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, newRequests: v1.ResourceList{v1.ResourceCPU: cpu5000m, v1.ResourceMemory: mem1000M}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, - expectedResize: v1.PodResizeStatusInfeasible, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizePending, + Status: "True", + Reason: "Infeasible", + Message: "Node didn't have enough capacity: cpu, requested: 5000, capacity: 4000", + }, + }, }, { name: "CPU increase in progress - expect InProgress", @@ -2776,38 +2826,64 @@ func TestHandlePodResourcesResize(t *testing.T) { newRequests: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem1000M}, newResourcesAllocated: true, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem1000M}, - expectedResize: v1.PodResizeStatusInProgress, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: "True", + }, + }, }, { name: "No resize", originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, - expectedResize: "", + expectedResize: nil, }, { name: "static pod, expect Infeasible", originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, - expectedResize: v1.PodResizeStatusInfeasible, annotations: map[string]string{kubetypes.ConfigSourceAnnotationKey: kubetypes.FileSource}, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizePending, + Status: "True", + Reason: "Infeasible", + Message: "In-place resize of static-pods is not supported", + }, + }, }, { name: "Increase CPU from min shares", originalRequests: v1.ResourceList{v1.ResourceCPU: cpu2m}, newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu1000m}, - expectedResize: v1.PodResizeStatusInProgress, expectBackoffReset: true, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: "True", + }, + }, }, { name: "Decrease CPU to min shares", originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m}, newRequests: v1.ResourceList{v1.ResourceCPU: cpu2m}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: cpu2m}, - expectedResize: v1.PodResizeStatusInProgress, expectBackoffReset: true, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: "True", + }, + }, }, { name: "Increase CPU from min limit", @@ -2817,8 +2893,14 @@ func TestHandlePodResourcesResize(t *testing.T) { newLimits: v1.ResourceList{v1.ResourceCPU: resource.MustParse("20m")}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: resource.MustParse("10m")}, expectedAllocatedLims: v1.ResourceList{v1.ResourceCPU: resource.MustParse("20m")}, - expectedResize: v1.PodResizeStatusInProgress, expectBackoffReset: true, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: "True", + }, + }, }, { name: "Decrease CPU to min limit", @@ -2828,8 +2910,14 @@ func TestHandlePodResourcesResize(t *testing.T) { newLimits: v1.ResourceList{v1.ResourceCPU: resource.MustParse("10m")}, expectedAllocatedReqs: v1.ResourceList{v1.ResourceCPU: resource.MustParse("10m")}, expectedAllocatedLims: v1.ResourceList{v1.ResourceCPU: resource.MustParse("10m")}, - expectedResize: v1.PodResizeStatusInProgress, expectBackoffReset: true, + + expectedResize: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: "True", + }, + }, }, } @@ -2917,7 +3005,16 @@ func TestHandlePodResourcesResize(t *testing.T) { assert.Equal(t, tt.expectedAllocatedReqs, alloc.Requests, "stored container request allocation") assert.Equal(t, tt.expectedAllocatedLims, alloc.Limits, "stored container limit allocation") - resizeStatus := kubelet.statusManager.GetPodResizeStatus(newPod.UID) + resizeStatus := kubelet.statusManager.GetPodResizeConditions(newPod.UID) + for i := range resizeStatus { + // Ignore probe time and last transition time during comparison. + resizeStatus[i].LastProbeTime = metav1.Time{} + resizeStatus[i].LastTransitionTime = metav1.Time{} + + // Message is a substring assertion, since it can change slightly. + assert.Contains(t, resizeStatus[i].Message, tt.expectedResize[i].Message) + resizeStatus[i].Message = tt.expectedResize[i].Message + } assert.Equal(t, tt.expectedResize, resizeStatus) isInBackoff := kubelet.crashLoopBackOff.IsInBackOffSince(backoffKey, now) diff --git a/pkg/kubelet/status/status_manager.go b/pkg/kubelet/status/status_manager.go index f727a4be958..8c5887c7ca1 100644 --- a/pkg/kubelet/status/status_manager.go +++ b/pkg/kubelet/status/status_manager.go @@ -68,10 +68,10 @@ type manager struct { kubeClient clientset.Interface podManager PodManager // Map from pod UID to sync status of the corresponding pod. - podStatuses map[types.UID]versionedPodStatus - podResizeStatuses map[types.UID]v1.PodResizeStatus - podStatusesLock sync.RWMutex - podStatusChannel chan struct{} + podStatuses map[types.UID]versionedPodStatus + podResizeConditions map[types.UID]podResizeConditions + podStatusesLock sync.RWMutex + podStatusChannel chan struct{} // Map from (mirror) pod UID to latest status version successfully sent to the API server. // apiStatusVersions must only be accessed from the sync thread. apiStatusVersions map[kubetypes.MirrorPodUID]uint64 @@ -80,6 +80,22 @@ type manager struct { podStartupLatencyHelper PodStartupLatencyStateHelper } +type podResizeConditions struct { + PodResizePending *v1.PodCondition + PodResizeInProgress *v1.PodCondition +} + +func (prc podResizeConditions) List() []*v1.PodCondition { + var conditions []*v1.PodCondition + if prc.PodResizePending != nil { + conditions = append(conditions, prc.PodResizePending) + } + if prc.PodResizeInProgress != nil { + conditions = append(conditions, prc.PodResizeInProgress) + } + return conditions +} + // PodManager is the subset of methods the manager needs to observe the actual state of the kubelet. // See pkg/k8s.io/kubernetes/pkg/kubelet/pod.Manager for method godoc. type PodManager interface { @@ -136,11 +152,20 @@ type Manager interface { // the provided podUIDs. RemoveOrphanedStatuses(podUIDs map[types.UID]bool) - // GetPodResizeStatus returns cached PodStatus.Resize value - GetPodResizeStatus(podUID types.UID) v1.PodResizeStatus + // GetPodResizeConditions returns cached PodStatus Resize conditions value + GetPodResizeConditions(podUID types.UID) []*v1.PodCondition - // SetPodResizeStatus caches the last resizing decision for the pod. - SetPodResizeStatus(podUID types.UID, resize v1.PodResizeStatus) + // SetPodResizePendingCondition caches the last PodResizePending condition for the pod. + SetPodResizePendingCondition(podUID types.UID, reason, message string) + + // SetPodResizeInProgressCondition caches the last PodResizeInProgress condition for the pod. + SetPodResizeInProgressCondition(podUID types.UID, reason, message string) + + // ClearPodResizePendingCondition clears the PodResizePending condition for the pod from the cache. + ClearPodResizePendingCondition(podUID types.UID) + + // ClearPodResizeInProgressCondition clears the PodResizeInProgress condition for the pod from the cache. + ClearPodResizeInProgressCondition(podUID types.UID) } const syncPeriod = 10 * time.Second @@ -151,7 +176,7 @@ func NewManager(kubeClient clientset.Interface, podManager PodManager, podDeleti kubeClient: kubeClient, podManager: podManager, podStatuses: make(map[types.UID]versionedPodStatus), - podResizeStatuses: make(map[types.UID]v1.PodResizeStatus), + podResizeConditions: make(map[types.UID]podResizeConditions), podStatusChannel: make(chan struct{}, 1), apiStatusVersions: make(map[kubetypes.MirrorPodUID]uint64), podDeletionSafety: podDeletionSafety, @@ -165,15 +190,30 @@ func NewManager(kubeClient clientset.Interface, podManager PodManager, podDeleti // changes will be ignored. func isPodStatusByKubeletEqual(oldStatus, status *v1.PodStatus) bool { oldCopy := oldStatus.DeepCopy() + + newConditions := make(map[v1.PodConditionType]*v1.PodCondition, len(status.Conditions)) + oldConditions := make(map[v1.PodConditionType]*v1.PodCondition, len(oldStatus.Conditions)) for _, c := range status.Conditions { - // both owned and shared conditions are used for kubelet status equality if kubetypes.PodConditionByKubelet(c.Type) || kubetypes.PodConditionSharedByKubelet(c.Type) { - _, oc := podutil.GetPodCondition(oldCopy, c.Type) - if oc == nil || oc.Status != c.Status || oc.Message != c.Message || oc.Reason != c.Reason { - return false - } + newConditions[c.Type] = &c } } + for _, c := range oldStatus.Conditions { + if kubetypes.PodConditionByKubelet(c.Type) || kubetypes.PodConditionSharedByKubelet(c.Type) { + oldConditions[c.Type] = &c + } + } + + if len(newConditions) != len(oldConditions) { + return false + } + for _, newCondition := range newConditions { + oldCondition := oldConditions[newCondition.Type] + if oldCondition == nil || oldCondition.Status != newCondition.Status || oldCondition.Message != newCondition.Message || oldCondition.Reason != newCondition.Reason { + return false + } + } + oldCopy.Conditions = status.Conditions return apiequality.Semantic.DeepEqual(oldCopy, status) } @@ -207,18 +247,53 @@ func (m *manager) Start() { }, 0) } -// GetPodResizeStatus returns the last cached ResizeStatus value. -func (m *manager) GetPodResizeStatus(podUID types.UID) v1.PodResizeStatus { +// GetPodResizeConditions returns the last cached ResizeStatus value. +func (m *manager) GetPodResizeConditions(podUID types.UID) []*v1.PodCondition { m.podStatusesLock.RLock() defer m.podStatusesLock.RUnlock() - return m.podResizeStatuses[podUID] + return m.podResizeConditions[podUID].List() } -// SetPodResizeStatus checkpoints the last resizing decision for the pod. -func (m *manager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) { +// SetPodResizePendingCondition caches the last PodResizePending condition for the pod. +func (m *manager) SetPodResizePendingCondition(podUID types.UID, reason, message string) { m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() - m.podResizeStatuses[podUID] = resizeStatus + + m.podResizeConditions[podUID] = podResizeConditions{ + PodResizePending: updatedPodResizeCondition(v1.PodResizePending, m.podResizeConditions[podUID].PodResizePending, reason, message), + PodResizeInProgress: m.podResizeConditions[podUID].PodResizeInProgress, + } +} + +// SetPodResizeInProgressCondition caches the last PodResizeInProgress condition for the pod. +func (m *manager) SetPodResizeInProgressCondition(podUID types.UID, reason, message string) { + m.podStatusesLock.Lock() + defer m.podStatusesLock.Unlock() + + m.podResizeConditions[podUID] = podResizeConditions{ + PodResizeInProgress: updatedPodResizeCondition(v1.PodResizeInProgress, m.podResizeConditions[podUID].PodResizeInProgress, reason, message), + PodResizePending: m.podResizeConditions[podUID].PodResizePending, + } +} + +// ClearPodResizePendingCondition clears the PodResizePending condition for the pod from the cache. +func (m *manager) ClearPodResizePendingCondition(podUID types.UID) { + m.podStatusesLock.Lock() + defer m.podStatusesLock.Unlock() + m.podResizeConditions[podUID] = podResizeConditions{ + PodResizePending: nil, + PodResizeInProgress: m.podResizeConditions[podUID].PodResizeInProgress, + } +} + +// ClearPodResizeInProgressCondition clears the PodResizeInProgress condition for the pod from the cache. +func (m *manager) ClearPodResizeInProgressCondition(podUID types.UID) { + m.podStatusesLock.Lock() + defer m.podStatusesLock.Unlock() + m.podResizeConditions[podUID] = podResizeConditions{ + PodResizePending: m.podResizeConditions[podUID].PodResizePending, + PodResizeInProgress: nil, + } } func (m *manager) GetPodStatus(uid types.UID) (v1.PodStatus, bool) { @@ -696,7 +771,7 @@ func (m *manager) deletePodStatus(uid types.UID) { delete(m.podStatuses, uid) m.podStartupLatencyHelper.DeletePodStartupState(uid) if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - delete(m.podResizeStatuses, uid) + delete(m.podResizeConditions, uid) } } @@ -709,7 +784,7 @@ func (m *manager) RemoveOrphanedStatuses(podUIDs map[types.UID]bool) { klog.V(5).InfoS("Removing pod from status map.", "podUID", key) delete(m.podStatuses, key) if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { - delete(m.podResizeStatuses, key) + delete(m.podResizeConditions, key) } } } @@ -1086,3 +1161,22 @@ func NeedToReconcilePodReadiness(pod *v1.Pod) bool { } return false } + +func updatedPodResizeCondition(conditionType v1.PodConditionType, oldCondition *v1.PodCondition, reason, message string) *v1.PodCondition { + now := metav1.NewTime(time.Now()) + var lastTransitionTime metav1.Time + if oldCondition == nil || oldCondition.Reason != reason { + lastTransitionTime = now + } else { + lastTransitionTime = oldCondition.LastTransitionTime + } + + return &v1.PodCondition{ + Type: conditionType, + Status: v1.ConditionTrue, + LastProbeTime: now, + LastTransitionTime: lastTransitionTime, + Reason: reason, + Message: message, + } +} diff --git a/pkg/kubelet/status/status_manager_test.go b/pkg/kubelet/status/status_manager_test.go index dc0ed827a97..29eb6cebd34 100644 --- a/pkg/kubelet/status/status_manager_test.go +++ b/pkg/kubelet/status/status_manager_test.go @@ -28,12 +28,14 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" @@ -2024,7 +2026,107 @@ func TestMergePodStatus(t *testing.T) { } }) } +} +func TestPodResizeConditions(t *testing.T) { + m := NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(), &statustest.FakePodDeletionSafetyProvider{}, util.NewPodStartupLatencyTracker()) + podUID := types.UID("12345") + + testCases := []struct { + name string + updateFunc func(types.UID) + expected []*v1.PodCondition + }{ + { + name: "initial empty conditions", + updateFunc: nil, + expected: nil, + }, + { + name: "set pod resize in progress condition with reason and message", + updateFunc: func(podUID types.UID) { + m.SetPodResizeInProgressCondition(podUID, "some-reason", "some-message") + }, + expected: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: v1.ConditionTrue, + Reason: "some-reason", + Message: "some-message", + }, + }, + }, + { + name: "set pod resize in progress condition without reason and message", + updateFunc: func(podUID types.UID) { + m.SetPodResizeInProgressCondition(podUID, "", "") + }, + expected: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: v1.ConditionTrue, + }, + }, + }, + { + name: "set pod resize pending condition with reason and message", + updateFunc: func(podUID types.UID) { + m.SetPodResizePendingCondition(podUID, "some-reason", "some-message") + }, + expected: []*v1.PodCondition{ + { + Type: v1.PodResizePending, + Status: v1.ConditionTrue, + Reason: "some-reason", + Message: "some-message", + }, + { + Type: v1.PodResizeInProgress, + Status: v1.ConditionTrue, + }, + }, + }, + { + name: "clear pod resize in progress condition", + updateFunc: func(podUID types.UID) { + m.ClearPodResizeInProgressCondition(podUID) + }, + expected: []*v1.PodCondition{ + { + Type: v1.PodResizePending, + Status: v1.ConditionTrue, + Reason: "some-reason", + Message: "some-message", + }, + }, + }, + { + name: "clear pod resize pending condition", + updateFunc: func(podUID types.UID) { + m.ClearPodResizePendingCondition(podUID) + }, + expected: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if tc.updateFunc != nil { + tc.updateFunc(podUID) + } + resizeConditions := m.GetPodResizeConditions(podUID) + if tc.expected == nil { + require.Nil(t, resizeConditions) + } else { + // ignore the last probe and transition times + for _, c := range resizeConditions { + c.LastProbeTime = metav1.Time{} + c.LastTransitionTime = metav1.Time{} + } + require.Equal(t, tc.expected, resizeConditions) + } + }) + } } func statusEqual(left, right v1.PodStatus) bool { diff --git a/pkg/kubelet/types/pod_status.go b/pkg/kubelet/types/pod_status.go index 593e9ce4ec6..24612e40df5 100644 --- a/pkg/kubelet/types/pod_status.go +++ b/pkg/kubelet/types/pod_status.go @@ -28,6 +28,8 @@ var PodConditionsByKubelet = []v1.PodConditionType{ v1.PodReady, v1.PodInitialized, v1.ContainersReady, + v1.PodResizeInProgress, + v1.PodResizePending, } // PodConditionByKubelet returns if the pod condition type is owned by kubelet diff --git a/pkg/registry/core/pod/strategy_test.go b/pkg/registry/core/pod/strategy_test.go index 9caf0de4322..b327578a099 100644 --- a/pkg/registry/core/pod/strategy_test.go +++ b/pkg/registry/core/pod/strategy_test.go @@ -3072,7 +3072,6 @@ func TestPodResizePrepareForUpdate(t *testing.T) { ), podtest.SetGeneration(1), podtest.SetStatus(podtest.MakePodStatus( - podtest.SetResizeStatus(""), // Resize status not set podtest.SetContainerStatuses( podtest.MakeContainerStatus("init-container1", api.ResourceList{ diff --git a/pkg/scheduler/framework/types_test.go b/pkg/scheduler/framework/types_test.go index 1f8875ce78b..e829bba994c 100644 --- a/pkg/scheduler/framework/types_test.go +++ b/pkg/scheduler/framework/types_test.go @@ -1896,7 +1896,7 @@ func TestCalculatePodResourcesWithResize(t *testing.T) { requests, statusResources, initRequests, initStatusResources, sidecarRequests, sidecarStatusResources *v1.ResourceList, - resizeStatus v1.PodResizeStatus) PodInfo { + resizeStatus []*v1.PodCondition) PodInfo { if requests != nil { pod.Spec.Containers = append(pod.Spec.Containers, @@ -1952,7 +1952,10 @@ func TestCalculatePodResourcesWithResize(t *testing.T) { }) } - pod.Status.Resize = resizeStatus + for _, c := range resizeStatus { + pod.Status.Conditions = append(pod.Status.Conditions, *c) + } + return PodInfo{Pod: &pod} } @@ -1962,16 +1965,15 @@ func TestCalculatePodResourcesWithResize(t *testing.T) { statusResources v1.ResourceList initRequests *v1.ResourceList initStatusResources *v1.ResourceList + resizeStatus []*v1.PodCondition sidecarRequests *v1.ResourceList sidecarStatusResources *v1.ResourceList - resizeStatus v1.PodResizeStatus expectedResource podResource }{ { name: "Pod with no pending resize", requests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, statusResources: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, - resizeStatus: "", expectedResource: podResource{ resource: Resource{ MilliCPU: cpu500m.MilliValue(), @@ -1985,7 +1987,12 @@ func TestCalculatePodResourcesWithResize(t *testing.T) { name: "Pod with resize in progress", requests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, statusResources: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, - resizeStatus: v1.PodResizeStatusInProgress, + resizeStatus: []*v1.PodCondition{ + { + Type: v1.PodResizeInProgress, + Status: v1.ConditionTrue, + }, + }, expectedResource: podResource{ resource: Resource{ MilliCPU: cpu500m.MilliValue(), @@ -1999,7 +2006,13 @@ func TestCalculatePodResourcesWithResize(t *testing.T) { name: "Pod with deferred resize", requests: v1.ResourceList{v1.ResourceCPU: cpu700m, v1.ResourceMemory: mem800M}, statusResources: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, - resizeStatus: v1.PodResizeStatusDeferred, + resizeStatus: []*v1.PodCondition{ + { + Type: v1.PodResizePending, + Status: v1.ConditionTrue, + Reason: v1.PodReasonDeferred, + }, + }, expectedResource: podResource{ resource: Resource{ MilliCPU: cpu700m.MilliValue(), @@ -2013,7 +2026,13 @@ func TestCalculatePodResourcesWithResize(t *testing.T) { name: "Pod with infeasible resize", requests: v1.ResourceList{v1.ResourceCPU: cpu700m, v1.ResourceMemory: mem800M}, statusResources: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, - resizeStatus: v1.PodResizeStatusInfeasible, + resizeStatus: []*v1.PodCondition{ + { + Type: v1.PodResizePending, + Status: v1.ConditionTrue, + Reason: v1.PodReasonInfeasible, + }, + }, expectedResource: podResource{ resource: Resource{ MilliCPU: cpu500m.MilliValue(), @@ -2029,7 +2048,6 @@ func TestCalculatePodResourcesWithResize(t *testing.T) { statusResources: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, initRequests: &v1.ResourceList{v1.ResourceCPU: cpu700m, v1.ResourceMemory: mem800M}, initStatusResources: &v1.ResourceList{v1.ResourceCPU: cpu700m, v1.ResourceMemory: mem800M}, - resizeStatus: "", expectedResource: podResource{ resource: Resource{ MilliCPU: cpu700m.MilliValue(), @@ -2047,7 +2065,6 @@ func TestCalculatePodResourcesWithResize(t *testing.T) { initStatusResources: &v1.ResourceList{v1.ResourceCPU: cpu700m, v1.ResourceMemory: mem800M}, sidecarRequests: &v1.ResourceList{v1.ResourceCPU: cpu700m, v1.ResourceMemory: mem800M}, sidecarStatusResources: &v1.ResourceList{v1.ResourceCPU: cpu700m, v1.ResourceMemory: mem800M}, - resizeStatus: "", expectedResource: podResource{ resource: Resource{ MilliCPU: cpu500m.MilliValue() + cpu700m.MilliValue(), diff --git a/staging/src/k8s.io/api/core/v1/generated.proto b/staging/src/k8s.io/api/core/v1/generated.proto index 21b6d098f1f..116c530ec4e 100644 --- a/staging/src/k8s.io/api/core/v1/generated.proto +++ b/staging/src/k8s.io/api/core/v1/generated.proto @@ -4640,6 +4640,9 @@ message PodStatus { // Status of resources resize desired for pod's containers. // It is empty if no resources resize is pending. // Any changes to container resources will automatically set this to "Proposed" + // Deprecated: Resize status is moved to two pod conditions PodResizePending and PodResizeInProgress. + // PodResizePending will track states where the spec has been resized, but the Kubelet has not yet allocated the resources. + // PodResizeInProgress will track in-progress resizes, and should be present whenever allocated resources != acknowledged resources. // +featureGate=InPlacePodVerticalScaling // +optional optional string resize = 14; diff --git a/staging/src/k8s.io/api/core/v1/types.go b/staging/src/k8s.io/api/core/v1/types.go index d7ac4e93415..34e7780851d 100644 --- a/staging/src/k8s.io/api/core/v1/types.go +++ b/staging/src/k8s.io/api/core/v1/types.go @@ -3278,6 +3278,17 @@ const ( // PodReadyToStartContainers pod sandbox is successfully configured and // the pod is ready to launch containers. PodReadyToStartContainers PodConditionType = "PodReadyToStartContainers" + // PodResizePending indicates that the pod has been resized, but kubelet has not + // yet allocated the resources. If both PodResizePending and PodResizeInProgress + // are set, it means that a new resize was requested in the middle of a previous + // pod resize that is still in progress. + PodResizePending PodConditionType = "PodResizePending" + // PodResizeInProgress indicates that a resize is in progress, and is present whenever + // the Kubelet has allocated resources for the resize, but has not yet actuated all of + // the required changes. + // If both PodResizePending and PodResizeInProgress are set, it means that a new resize was + // requested in the middle of a previous pod resize that is still in progress. + PodResizeInProgress PodConditionType = "PodResizeInProgress" ) // These are reasons for a pod's transition to a condition. @@ -3301,6 +3312,18 @@ const ( // PodReasonPreemptionByScheduler reason in DisruptionTarget pod condition indicates that the // disruption was initiated by scheduler's preemption. PodReasonPreemptionByScheduler = "PreemptionByScheduler" + + // PodReasonDeferred reason in PodResizePending pod condition indicates the proposed resize is feasible in + // theory (it fits on this node) but is not possible right now. + PodReasonDeferred = "Deferred" + + // PodReasonInfeasible reason in PodResizePending pod condition indicates the proposed resize is not + // feasible and is rejected; it may not be re-evaluated + PodReasonInfeasible = "Infeasible" + + // PodReasonError reason in PodResizeInProgress pod condition indicates that an error occurred while + // actuating the resize. + PodReasonError = "Error" ) // PodCondition contains details for the current condition of this pod. @@ -3331,7 +3354,7 @@ type PodCondition struct { Message string `json:"message,omitempty" protobuf:"bytes,6,opt,name=message"` } -// PodResizeStatus shows status of desired resize of a pod's containers. +// Deprecated: PodResizeStatus shows status of desired resize of a pod's containers. type PodResizeStatus string const ( @@ -4976,6 +4999,9 @@ type PodStatus struct { // Status of resources resize desired for pod's containers. // It is empty if no resources resize is pending. // Any changes to container resources will automatically set this to "Proposed" + // Deprecated: Resize status is moved to two pod conditions PodResizePending and PodResizeInProgress. + // PodResizePending will track states where the spec has been resized, but the Kubelet has not yet allocated the resources. + // PodResizeInProgress will track in-progress resizes, and should be present whenever allocated resources != acknowledged resources. // +featureGate=InPlacePodVerticalScaling // +optional Resize PodResizeStatus `json:"resize,omitempty" protobuf:"bytes,14,opt,name=resize,casttype=PodResizeStatus"` diff --git a/staging/src/k8s.io/api/core/v1/types_swagger_doc_generated.go b/staging/src/k8s.io/api/core/v1/types_swagger_doc_generated.go index 732f8712c6e..e35ecac9a46 100644 --- a/staging/src/k8s.io/api/core/v1/types_swagger_doc_generated.go +++ b/staging/src/k8s.io/api/core/v1/types_swagger_doc_generated.go @@ -1862,7 +1862,7 @@ var map_PodStatus = map[string]string{ "containerStatuses": "Statuses of containers in this pod. Each container in the pod should have at most one status in this list, and all statuses should be for containers in the pod. However this is not enforced. If a status for a non-existent container is present in the list, or the list has duplicate names, the behavior of various Kubernetes components is not defined and those statuses might be ignored. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#pod-and-container-status", "qosClass": "The Quality of Service (QOS) classification assigned to the pod based on resource requirements See PodQOSClass type for available QOS classes More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-qos/#quality-of-service-classes", "ephemeralContainerStatuses": "Statuses for any ephemeral containers that have run in this pod. Each ephemeral container in the pod should have at most one status in this list, and all statuses should be for containers in the pod. However this is not enforced. If a status for a non-existent container is present in the list, or the list has duplicate names, the behavior of various Kubernetes components is not defined and those statuses might be ignored. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#pod-and-container-status", - "resize": "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\"", + "resize": "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\" Deprecated: Resize status is moved to two pod conditions PodResizePending and PodResizeInProgress. PodResizePending will track states where the spec has been resized, but the Kubelet has not yet allocated the resources. PodResizeInProgress will track in-progress resizes, and should be present whenever allocated resources != acknowledged resources.", "resourceClaimStatuses": "Status of resource claims.", } diff --git a/staging/src/k8s.io/component-helpers/resource/helpers.go b/staging/src/k8s.io/component-helpers/resource/helpers.go index 683dc3ad8b8..b848603cb58 100644 --- a/staging/src/k8s.io/component-helpers/resource/helpers.go +++ b/staging/src/k8s.io/component-helpers/resource/helpers.go @@ -223,7 +223,7 @@ func AggregateContainerRequests(pod *v1.Pod, opts PodResourcesOptions) v1.Resour // determineContainerReqs will return a copy of the container requests based on if resizing is feasible or not. func determineContainerReqs(pod *v1.Pod, container *v1.Container, cs *v1.ContainerStatus) v1.ResourceList { - if pod.Status.Resize == v1.PodResizeStatusInfeasible { + if IsPodResizeInfeasible(pod) { return cs.Resources.Requests.DeepCopy() } return max(container.Resources.Requests, cs.Resources.Requests) @@ -231,12 +231,32 @@ func determineContainerReqs(pod *v1.Pod, container *v1.Container, cs *v1.Contain // determineContainerLimits will return a copy of the container limits based on if resizing is feasible or not. func determineContainerLimits(pod *v1.Pod, container *v1.Container, cs *v1.ContainerStatus) v1.ResourceList { - if pod.Status.Resize == v1.PodResizeStatusInfeasible { + if IsPodResizeInfeasible(pod) { return cs.Resources.Limits.DeepCopy() } return max(container.Resources.Limits, cs.Resources.Limits) } +// IsPodResizeInfeasible returns true if the pod condition PodResizePending is set to infeasible. +func IsPodResizeInfeasible(pod *v1.Pod) bool { + for _, condition := range pod.Status.Conditions { + if condition.Type == v1.PodResizePending { + return condition.Reason == v1.PodReasonInfeasible + } + } + return false +} + +// IsPodResizeDeferred returns true if the pod condition PodResizePending is set to deferred. +func IsPodResizeDeferred(pod *v1.Pod) bool { + for _, condition := range pod.Status.Conditions { + if condition.Type == v1.PodResizePending { + return condition.Reason == v1.PodReasonDeferred + } + } + return false +} + // applyNonMissing will return a copy of the given resource list with any missing values replaced by the nonMissing values func applyNonMissing(reqs v1.ResourceList, nonMissing v1.ResourceList) v1.ResourceList { cp := v1.ResourceList{} diff --git a/staging/src/k8s.io/component-helpers/resource/helpers_test.go b/staging/src/k8s.io/component-helpers/resource/helpers_test.go index a5d4657d21f..b1717f1e328 100644 --- a/staging/src/k8s.io/component-helpers/resource/helpers_test.go +++ b/staging/src/k8s.io/component-helpers/resource/helpers_test.go @@ -289,7 +289,7 @@ func TestPodResourceRequests(t *testing.T) { description string options PodResourcesOptions overhead v1.ResourceList - podResizeStatus v1.PodResizeStatus + podResizeStatus []v1.PodCondition initContainers []v1.Container initContainerStatuses []v1.ContainerStatus containers []v1.Container @@ -432,8 +432,12 @@ func TestPodResourceRequests(t *testing.T) { expectedRequests: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("2"), }, - podResizeStatus: v1.PodResizeStatusInfeasible, - options: PodResourcesOptions{UseStatusResources: true}, + podResizeStatus: []v1.PodCondition{{ + Type: v1.PodResizePending, + Status: v1.ConditionTrue, + Reason: v1.PodReasonInfeasible, + }}, + options: PodResourcesOptions{UseStatusResources: true}, containers: []v1.Container{ { Name: "container-1", @@ -487,8 +491,12 @@ func TestPodResourceRequests(t *testing.T) { expectedRequests: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("4"), }, - podResizeStatus: v1.PodResizeStatusInfeasible, - options: PodResourcesOptions{UseStatusResources: false}, + podResizeStatus: []v1.PodCondition{{ + Type: v1.PodResizePending, + Status: v1.ConditionTrue, + Reason: v1.PodReasonInfeasible, + }}, + options: PodResourcesOptions{UseStatusResources: false}, containers: []v1.Container{ { Name: "container-1", @@ -515,8 +523,12 @@ func TestPodResourceRequests(t *testing.T) { expectedRequests: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("2"), }, - podResizeStatus: v1.PodResizeStatusInfeasible, - options: PodResourcesOptions{UseStatusResources: true}, + podResizeStatus: []v1.PodCondition{{ + Type: v1.PodResizePending, + Status: v1.ConditionTrue, + Reason: v1.PodReasonInfeasible, + }}, + options: PodResourcesOptions{UseStatusResources: true}, initContainers: []v1.Container{ { Name: "restartable-init-1", @@ -572,8 +584,12 @@ func TestPodResourceRequests(t *testing.T) { expectedRequests: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("4"), }, - podResizeStatus: v1.PodResizeStatusInfeasible, - options: PodResourcesOptions{UseStatusResources: false}, + podResizeStatus: []v1.PodCondition{{ + Type: v1.PodResizePending, + Status: v1.ConditionTrue, + Reason: v1.PodReasonInfeasible, + }}, + options: PodResourcesOptions{UseStatusResources: false}, initContainers: []v1.Container{ { Name: "restartable-init-1", @@ -789,7 +805,7 @@ func TestPodResourceRequests(t *testing.T) { Status: v1.PodStatus{ ContainerStatuses: tc.containerStatus, InitContainerStatuses: tc.initContainerStatuses, - Resize: tc.podResizeStatus, + Conditions: tc.podResizeStatus, }, } request := PodRequests(p, tc.options) diff --git a/staging/src/k8s.io/kubectl/pkg/util/resource/resource.go b/staging/src/k8s.io/kubectl/pkg/util/resource/resource.go index 369277ba6dd..376c02dd05e 100644 --- a/staging/src/k8s.io/kubectl/pkg/util/resource/resource.go +++ b/staging/src/k8s.io/kubectl/pkg/util/resource/resource.go @@ -25,6 +25,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/sets" + helpers "k8s.io/component-helpers/resource" ) // PodRequestsAndLimits returns a dictionary of all defined resources summed up for all @@ -142,7 +143,7 @@ func podLimits(pod *corev1.Pod) corev1.ResourceList { // determineContainerReqs will return a copy of the container requests based on if resizing is feasible or not. func determineContainerReqs(pod *corev1.Pod, container *corev1.Container, cs *corev1.ContainerStatus) corev1.ResourceList { - if pod.Status.Resize == corev1.PodResizeStatusInfeasible { + if helpers.IsPodResizeInfeasible(pod) { return cs.Resources.Requests.DeepCopy() } return max(container.Resources.Requests, cs.Resources.Requests) diff --git a/test/e2e/framework/pod/resize.go b/test/e2e/framework/pod/resize.go index 9ee975c2726..2edc5ed4c90 100644 --- a/test/e2e/framework/pod/resize.go +++ b/test/e2e/framework/pod/resize.go @@ -28,6 +28,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilerrors "k8s.io/apimachinery/pkg/util/errors" + helpers "k8s.io/component-helpers/resource" kubecm "k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/test/e2e/framework" imageutils "k8s.io/kubernetes/test/utils/image" @@ -394,7 +395,7 @@ func WaitForPodResizeActuation(ctx context.Context, f *framework.Framework, podC Eventually(ctx, framework.RetryNotFound(framework.GetObject(f.ClientSet.CoreV1().Pods(pod.Namespace).Get, pod.Name, metav1.GetOptions{}))). WithTimeout(f.Timeouts.PodStart). Should(framework.MakeMatcher(func(pod *v1.Pod) (func() string, error) { - if pod.Status.Resize == v1.PodResizeStatusInfeasible { + if helpers.IsPodResizeInfeasible(pod) { // This is a terminal resize state return func() string { return "resize is infeasible" @@ -407,6 +408,14 @@ func WaitForPodResizeActuation(ctx context.Context, f *framework.Framework, podC return fmt.Sprintf("container status resources don't match expected: %v", formatErrors(resourceErrs)) }, nil } + // Wait for kubelet to clear the resize status conditions. + for _, c := range pod.Status.Conditions { + if c.Type == v1.PodResizePending || c.Type == v1.PodResizeInProgress { + return func() string { + return fmt.Sprintf("resize status %v is still present in the pod status", c) + }, nil + } + } return nil, nil })), ) @@ -431,6 +440,13 @@ func ExpectPodResized(ctx context.Context, f *framework.Framework, resizedPod *v errs = append(errs, fmt.Errorf("container restart counts don't match expected: %w", formatErrors(restartErrs))) } + // Verify Pod Resize conditions are empty. + for _, condition := range resizedPod.Status.Conditions { + if condition.Type == v1.PodResizeInProgress || condition.Type == v1.PodResizePending { + errs = append(errs, fmt.Errorf("unexpected resize condition type %s found in pod status", condition.Type)) + } + } + if len(errs) > 0 { resizedPod.ManagedFields = nil // Suppress managed fields in error output. framework.ExpectNoError(formatErrors(utilerrors.NewAggregate(errs)), diff --git a/test/e2e/node/pod_resize.go b/test/e2e/node/pod_resize.go index 1a03e592f51..4f883b2f691 100644 --- a/test/e2e/node/pod_resize.go +++ b/test/e2e/node/pod_resize.go @@ -26,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + helpers "k8s.io/component-helpers/resource" resourceapi "k8s.io/kubernetes/pkg/api/v1/resource" "k8s.io/kubernetes/test/e2e/feature" "k8s.io/kubernetes/test/e2e/framework" @@ -231,7 +232,7 @@ func doPodResizeSchedulerTests(f *framework.Framework) { node.Name, nodeAllocatableMilliCPU, nodeAvailableMilliCPU) // - // Scheduler focussed pod resize E2E test case #1: + // Scheduler focused pod resize E2E test case #1: // 1. Create pod1 and pod2 on node such that pod1 has enough CPU to be scheduled, but pod2 does not. // 2. Resize pod2 down so that it fits on the node and can be scheduled. // 3. Verify that pod2 gets scheduled and comes up and running. @@ -295,7 +296,7 @@ func doPodResizeSchedulerTests(f *framework.Framework) { ginkgo.By(fmt.Sprintf("TEST1: Verify that pod '%s' is running after resize", testPod2.Name)) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, testPod2)) - // Scheduler focussed pod resize E2E test case #2 + // Scheduler focused pod resize E2E test case #2 // 1. With pod1 + pod2 running on node above, create pod3 that requests more CPU than available, verify pending. // 2. Resize pod1 down so that pod3 gets room to be scheduled. // 3. Verify that pod3 is scheduled and running. @@ -348,13 +349,71 @@ func doPodResizeSchedulerTests(f *framework.Framework) { framework.Logf("TEST2: Pod '%s' CPU requests '%dm'", testPod3.Name, testPod3.Spec.Containers[0].Resources.Requests.Cpu().MilliValue()) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, testPod3)) - ginkgo.By("deleting pods") - delErr1 := e2epod.DeletePodWithWait(ctx, f.ClientSet, testPod1) - framework.ExpectNoError(delErr1, "failed to delete pod %s", testPod1.Name) + // Scheduler focssed pod resize E2E test case #3 + // 1. With pod1 + pod2 + pod3 running on node above, attempt to scale up pod1 to requests more CPU than available, verify deferred. + // 2. Delete pod2 + pod3 to make room for pod3. + // 3. Verify that pod1 resize has completed. + // 4. Attempt to scale up pod1 to request more cpu than the node has, verify infeasible. + patchTestpod1ExceedNodeCapacity := fmt.Sprintf(`{ + "spec": { + "containers": [ + { + "name": "c1", + "resources": {"requests": {"cpu": "%dm"},"limits": {"cpu": "%dm"}} + } + ] + } + }`, testPod1CPUQuantity.MilliValue(), testPod1CPUQuantity.MilliValue()) + + testPod1CPUQuantityResizedAgain := resource.NewMilliQuantity(nodeAvailableMilliCPU*2, resource.DecimalSI) + patchTestpod1AgainExceedNodeCapacity := fmt.Sprintf(`{ + "spec": { + "containers": [ + { + "name": "c1", + "resources": {"requests": {"cpu": "%dm"},"limits": {"cpu": "%dm"}} + } + ] + } + }`, testPod1CPUQuantityResizedAgain.MilliValue(), testPod1CPUQuantityResizedAgain.MilliValue()) + + ginkgo.By(fmt.Sprintf("TEST3: Resize pod '%s' exceed node capacity", testPod1.Name)) + testPod1, p1Err = f.ClientSet.CoreV1().Pods(testPod1.Namespace).Patch(ctx, + testPod1.Name, types.StrategicMergePatchType, []byte(patchTestpod1ExceedNodeCapacity), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(p1Err, "failed to patch pod for resize") + gomega.Expect(testPod1.Generation).To(gomega.BeEquivalentTo(3)) + framework.ExpectNoError(e2epod.WaitForPodCondition(ctx, f.ClientSet, testPod1.Namespace, testPod1.Name, "display pod resize status as deferred", f.Timeouts.PodStart, func(pod *v1.Pod) (bool, error) { + return helpers.IsPodResizeDeferred(pod), nil + })) + + ginkgo.By("deleting pods 2 and 3") delErr2 := e2epod.DeletePodWithWait(ctx, f.ClientSet, testPod2) framework.ExpectNoError(delErr2, "failed to delete pod %s", testPod2.Name) delErr3 := e2epod.DeletePodWithWait(ctx, f.ClientSet, testPod3) framework.ExpectNoError(delErr3, "failed to delete pod %s", testPod3.Name) + + ginkgo.By(fmt.Sprintf("TEST3: Verify pod '%s' is resized successfully after pod deletion '%s' and '%s", testPod1.Name, testPod2.Name, testPod3.Name)) + expected := []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: testPod1CPUQuantity.String(), CPULim: testPod1CPUQuantity.String()}, + }, + } + resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, testPod1, expected) + e2epod.ExpectPodResized(ctx, f, resizedPod, expected) + + ginkgo.By(fmt.Sprintf("TEST3: Resize pod '%s' to exceed the node capacity", testPod1.Name)) + testPod1, p1Err = f.ClientSet.CoreV1().Pods(testPod1.Namespace).Patch(ctx, + testPod1.Name, types.StrategicMergePatchType, []byte(patchTestpod1AgainExceedNodeCapacity), metav1.PatchOptions{}, "resize") + framework.ExpectNoError(p1Err, "failed to patch pod for resize") + gomega.Expect(testPod1.Generation).To(gomega.BeEquivalentTo(4)) + framework.ExpectNoError(e2epod.WaitForPodCondition(ctx, f.ClientSet, testPod1.Namespace, testPod1.Name, "display pod resize status as infeasible", f.Timeouts.PodStart, func(pod *v1.Pod) (bool, error) { + return helpers.IsPodResizeInfeasible(pod), nil + })) + + ginkgo.By("deleting pod 1") + delErr1 := e2epod.DeletePodWithWait(ctx, f.ClientSet, testPod1) + framework.ExpectNoError(delErr1, "failed to delete pod %s", testPod1.Name) }) }