Merge pull request #66615 from jbartosik/cpu-warm-up

Automatic merge from submit-queue (batch tested with PRs 65730, 66615, 66684, 66519, 66510). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Speed up HPA reaction to metric changes by removing scale up forbidden window **What this PR does / why we need it**: Speed up HPA reaction to metric changes by removing scale up forbidden window. Scale up forbidden window was protecting HPA against making decision to scale up based on metrics gathered during pod initialisation (which may be invalid, for example pod may be using a lot of CPU despite not doing any "actual" work). To avoid that negative effect only use per pod metrics from pods that are: - ready (so metrics about them should be valid), or - unready but creation and last readiness change timestamps are apart more than 10s (pods that have formerly been ready and so metrics are in at least some cases (pod becoming unready because of overload) very useful). This PR only deals with scale up window. I'll send another PR dealing with scale down window soon. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: **Special notes for your reviewer**: **Release note**: ```release-note Speed up HPA reaction to metric changes by removing scale up forbidden window. Scale up forbidden window was protecting HPA against making decision to scale up based on metrics gathered during pod initialisation (which may be invalid, for example pod may be using a lot of CPU despite not doing any "actual" work). To avoid that negative effect only use per pod metrics from pods that are: - ready (so metrics about them should be valid), or - unready but creation and last readiness change timestamps are apart more than 10s (pods that have formerly been ready and so metrics are in at least some cases (pod becoming unready because of overload) very useful). ```
2025-07-24 12:15:52 +00:00 · 2018-08-01 15:52:10 -07:00 · 2018-08-01 15:52:10 -07:00 · c32e0e84da
commit c32e0e84da
parent 22d0ef2a8e 8ef369ea2d
7 changed files with 588 additions and 435 deletions
--- a/cmd/kube-controller-manager/app/autoscaling.go
+++ b/cmd/kube-controller-manager/app/autoscaling.go
@ -93,7 +93,6 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me
 		replicaCalc,
 		ctx.InformerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
 		ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod.Duration,
-		ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration,
 		ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration,
 	).Run(ctx.Stop)
 	return nil, true, nil
--- a/cmd/kube-controller-manager/app/options/hpacontroller.go
+++ b/cmd/kube-controller-manager/app/options/hpacontroller.go
@ -40,6 +40,7 @@ func (o *HPAControllerOptions) AddFlags(fs *pflag.FlagSet) {

 	fs.DurationVar(&o.HorizontalPodAutoscalerSyncPeriod.Duration, "horizontal-pod-autoscaler-sync-period", o.HorizontalPodAutoscalerSyncPeriod.Duration, "The period for syncing the number of pods in horizontal pod autoscaler.")
 	fs.DurationVar(&o.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-upscale-delay", o.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration, "The period since last upscale, before another upscale can be performed in horizontal pod autoscaler.")
+	fs.MarkDeprecated("horizontal-pod-autoscaler-upscale-delay", "This flag is currently no-op and will be deleted.")
 	fs.DurationVar(&o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-downscale-delay", o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "The period since last downscale, before another downscale can be performed in horizontal pod autoscaler.")
 	fs.Float64Var(&o.HorizontalPodAutoscalerTolerance, "horizontal-pod-autoscaler-tolerance", o.HorizontalPodAutoscalerTolerance, "The minimum change (from 1.0) in the desired-to-actual metrics ratio for the horizontal pod autoscaler to consider scaling.")
 	fs.BoolVar(&o.HorizontalPodAutoscalerUseRESTClients, "horizontal-pod-autoscaler-use-rest-clients", o.HorizontalPodAutoscalerUseRESTClients, "If set to true, causes the horizontal pod autoscaler controller to use REST clients through the kube-aggregator, instead of using the legacy metrics client through the API server proxy.  This is required for custom metrics support in the horizontal pod autoscaler.")
@ -52,7 +53,6 @@ func (o *HPAControllerOptions) ApplyTo(cfg *componentconfig.HPAControllerConfigu
 	}

 	cfg.HorizontalPodAutoscalerSyncPeriod = o.HorizontalPodAutoscalerSyncPeriod
-	cfg.HorizontalPodAutoscalerUpscaleForbiddenWindow = o.HorizontalPodAutoscalerUpscaleForbiddenWindow
 	cfg.HorizontalPodAutoscalerDownscaleForbiddenWindow = o.HorizontalPodAutoscalerDownscaleForbiddenWindow
 	cfg.HorizontalPodAutoscalerTolerance = o.HorizontalPodAutoscalerTolerance
 	cfg.HorizontalPodAutoscalerUseRESTClients = o.HorizontalPodAutoscalerUseRESTClients
--- a/pkg/controller/podautoscaler/horizontal.go
+++ b/pkg/controller/podautoscaler/horizontal.go
@ -64,7 +64,6 @@ type HorizontalController struct {
 	replicaCalc   *ReplicaCalculator
 	eventRecorder record.EventRecorder

-	upscaleForbiddenWindow   time.Duration
 	downscaleForbiddenWindow time.Duration

 	// hpaLister is able to list/get HPAs from the shared cache from the informer passed in to
@ -85,7 +84,6 @@ func NewHorizontalController(
 	replicaCalc *ReplicaCalculator,
 	hpaInformer autoscalinginformers.HorizontalPodAutoscalerInformer,
 	resyncPeriod time.Duration,
-	upscaleForbiddenWindow time.Duration,
 	downscaleForbiddenWindow time.Duration,

 ) *HorizontalController {
@ -99,7 +97,6 @@ func NewHorizontalController(
 		eventRecorder:            recorder,
 		scaleNamespacer:          scaleNamespacer,
 		hpaNamespacer:            hpaNamespacer,
-		upscaleForbiddenWindow:   upscaleForbiddenWindow,
 		downscaleForbiddenWindow: downscaleForbiddenWindow,
 		queue:  workqueue.NewNamedRateLimitingQueue(NewDefaultHPARateLimiter(resyncPeriod), "horizontalpodautoscaler"),
 		mapper: mapper,
@ -246,7 +243,6 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori
 			setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "InvalidMetricSourceType", "the HPA was unable to compute the replica count: %s", errMsg)
 			return 0, "", nil, time.Time{}, fmt.Errorf(errMsg)
 		}
-
 		if replicas == 0 || replicaCountProposal > replicas {
 			timestamp = timestampProposal
 			replicas = replicaCountProposal
@ -472,6 +468,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho
 		rescaleReason = "Current number of replicas must be greater than 0"
 		desiredReplicas = 1
 	} else {
+
 		metricDesiredReplicas, metricName, metricStatuses, metricTimestamp, err = a.computeReplicasForMetrics(hpa, scale, hpa.Spec.Metrics)
 		if err != nil {
 			a.setCurrentReplicasInStatus(hpa, currentReplicas)
@ -507,15 +504,6 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho
 				setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffDownscale", "the time since the previous scale is still within the downscale forbidden window")
 				backoffDown = true
 			}
-
-			if !hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) {
-				backoffUp = true
-				if backoffDown {
-					setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffBoth", "the time since the previous scale is still within both the downscale and upscale forbidden windows")
-				} else {
-					setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffUpscale", "the time since the previous scale is still within the upscale forbidden window")
-				}
-			}
 		}

 		if !backoffDown && !backoffUp {
@ -634,9 +622,8 @@ func (a *HorizontalController) shouldScale(hpa *autoscalingv2.HorizontalPodAutos
 		return true
 	}

-	// Going up only if the usage ratio increased significantly above the target
-	// and there was no rescaling in the last upscaleForbiddenWindow.
-	if desiredReplicas > currentReplicas && hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) {
+	// Going up only if the usage ratio increased significantly above the target.
+	if desiredReplicas > currentReplicas {
 		return true
 	}

--- a/pkg/controller/podautoscaler/horizontal_test.go
+++ b/pkg/controller/podautoscaler/horizontal_test.go
--- a/pkg/controller/podautoscaler/legacy_horizontal_test.go
+++ b/pkg/controller/podautoscaler/legacy_horizontal_test.go
@ -491,7 +491,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
 	}

 	informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
-	defaultUpscaleForbiddenWindow := 3 * time.Minute
 	defaultDownscaleForbiddenWindow := 5 * time.Minute

 	hpaController := NewHorizontalController(
@ -502,7 +501,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
 		replicaCalc,
 		informerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
 		controller.NoResyncPeriodFunc(),
-		defaultUpscaleForbiddenWindow,
 		defaultDownscaleForbiddenWindow,
 	)
 	hpaController.hpaListerSynced = alwaysReady
--- a/pkg/controller/podautoscaler/replica_calculator.go
+++ b/pkg/controller/podautoscaler/replica_calculator.go
@ -35,6 +35,10 @@ const (
 	// defaultTestingTolerance is default value for calculating when to
 	// scale up/scale down.
 	defaultTestingTolerance = 0.1
+
+	// Pod begins existence as unready. If pod is unready and timestamp of last pod readiness change is
+	// less than maxDelayOfInitialReadinessStatus after pod start we assume it has never been ready.
+	maxDelayOfInitialReadinessStatus = 10 * time.Second
 )

 type ReplicaCalculator struct {
@ -205,7 +209,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
 	missingPods := sets.NewString()

 	for _, pod := range podList.Items {
-		if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) {
+		if pod.Status.Phase != v1.PodRunning || !hasPodBeenReadyBefore(&pod) {
 			// save this pod name for later, but pretend it doesn't exist for now
 			unreadyPods.Insert(pod.Name)
 			delete(metrics, pod.Name)
@ -381,3 +385,22 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3
 	utilization = int64(math.Ceil(float64(utilization) / float64(currentReplicas)))
 	return replicaCount, utilization, timestamp, nil
 }
+
+// hasPodBeenReadyBefore returns true if the pod is ready or if it's not ready
+func hasPodBeenReadyBefore(pod *v1.Pod) bool {
+	_, readyCondition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
+	if readyCondition == nil {
+		return false
+	}
+	if readyCondition.Status == v1.ConditionTrue {
+		return true
+	}
+	lastReady := readyCondition.LastTransitionTime.Time
+	if pod.Status.StartTime == nil {
+		return false
+	}
+	started := pod.Status.StartTime.Time
+	// If last status change was longer than maxDelayOfInitialReadinessStatus after the pod was
+	// created assume it was ready in the past.
+	return lastReady.After(started.Add(maxDelayOfInitialReadinessStatus))
+}
--- a/pkg/controller/podautoscaler/replica_calculator_test.go
+++ b/pkg/controller/podautoscaler/replica_calculator_test.go
@ -1069,4 +1069,76 @@ func TestReplicaCalcComputedToleranceAlgImplementation(t *testing.T) {
 	tc.runTest(t)
 }

+func TestHasPodBeenReadyBefore(t *testing.T) {
+	tests := []struct {
+		name       string
+		conditions []v1.PodCondition
+		started    time.Time
+		expected   bool
+	}{
+		{
+			"initially unready",
+			[]v1.PodCondition{
+				{
+					Type: v1.PodReady,
+					LastTransitionTime: metav1.Time{
+						Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+					},
+					Status: v1.ConditionFalse,
+				},
+			},
+			metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+			false,
+		},
+		{
+			"currently unready",
+			[]v1.PodCondition{
+				{
+					Type: v1.PodReady,
+					LastTransitionTime: metav1.Time{
+						Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+					},
+					Status: v1.ConditionFalse,
+				},
+			},
+			metav1.Date(2018, 7, 25, 17, 0, 0, 0, time.UTC).Time,
+			true,
+		},
+		{
+			"currently ready",
+			[]v1.PodCondition{
+				{
+					Type: v1.PodReady,
+					LastTransitionTime: metav1.Time{
+						Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+					},
+					Status: v1.ConditionTrue,
+				},
+			},
+			metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+			true,
+		},
+		{
+			"no ready status",
+			[]v1.PodCondition{},
+			metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+			false,
+		},
+	}
+	for _, tc := range tests {
+		pod := &v1.Pod{
+			Status: v1.PodStatus{
+				Conditions: tc.conditions,
+				StartTime: &metav1.Time{
+					Time: tc.started,
+				},
+			},
+		}
+		got := hasPodBeenReadyBefore(pod)
+		if got != tc.expected {
+			t.Errorf("[TestHasPodBeenReadyBefore.%s] got %v, want %v", tc.name, got, tc.expected)
+		}
+	}
+}
+
 // TODO: add more tests