Update for scaling rules in HorizontalPodAutoscaler

2025-08-01 15:58:37 +00:00 · 2015-09-07 12:25:04 +02:00 · 2015-09-07 12:25:04 +02:00 · 02c30c5d61
commit 02c30c5d61
parent 01d11b9f8f
2 changed files with 49 additions and 26 deletions
--- a/docs/proposals/horizontal-pod-autoscaler.md
+++ b/docs/proposals/horizontal-pod-autoscaler.md
@ -200,16 +200,20 @@ and adjust the count of the Scale if needed to match the target
 The target number of pods will be calculated from the following formula:
 ```
-TargetNumOfPods = sum(CurrentPodsConsumption) / Target
+TargetNumOfPods =ceil(sum(CurrentPodsConsumption) / Target)
 ```
-To make scaling more stable, scale-up will happen only when the floor of ```TargetNumOfPods``` is higher than
+Starting and stopping pods may introduce noise to the metrics (for instance starting may temporarily increase
-the current number, while scale-down will happen only when the ceiling of ```TargetNumOfPods``` is lower than
+CPU and decrease average memory consumption) so, after each action, the autoscaler should wait some time for reliable data.
 the current number.
-The decision to scale-up will be executed instantly.
+Scale-up will happen if there was no rescaling within the last 3 minutes.
-However, we will execute scale-down only if the sufficient time has passed from the last scale-up (e.g.: 10 minutes).
+Scale-down will wait for 10 minutes from the last rescaling. Moreover any scaling will only be made if
-Such approach has two benefits:
+
 ```
 avg(CurrentPodsConsumption) / Target
 ```
 drops below 0.9 or increases above 1.1 (10% tolerance). Such approach has two benefits:
 * Autoscaler works in a conservative way.
  If new user load appears, it is important for us to rapidly increase the number of pods,
@ -218,10 +222,6 @@ Such approach has two benefits:
 * Autoscaler avoids thrashing, i.e.: prevents rapid execution of conflicting decision if the load is not stable.
 As the CPU consumption of a pod immediately after start may be highly variable due to initialization/startup,
 autoscaler will skip metrics from the first minute of pod lifecycle.
 ## Relative vs. absolute metrics
 The question arises whether the values of the target metrics should be absolute (e.g.: 0.6 core, 100MB of RAM)
--- a/pkg/controller/autoscaler/horizontalpodautoscaler_controller.go
+++ b/pkg/controller/autoscaler/horizontalpodautoscaler_controller.go
@ -18,6 +18,7 @@ package autoscalercontroller
 import (
 	"fmt"
 	"math"
 	"time"
 	"github.com/golang/glog"
@ -30,6 +31,15 @@ import (
 	"k8s.io/kubernetes/pkg/util"
 )
 const (
 	heapsterNamespace = "kube-system"
 	heapsterService   = "monitoring-heapster"
 	// Usage shoud exceed the tolerance before we start downscale or upscale the pods.
 	// TODO: make it a flag or HPA spec element.
 	tolerance = 0.1
 )
 type HorizontalPodAutoscalerController struct {
 	client        client.Interface
 	expClient     client.ExperimentalInterface
@ -79,29 +89,41 @@ func (a *HorizontalPodAutoscalerController) reconcileAutoscalers() error {
 			continue
 		}
-		// if the ratio is 1.2 we want to have 2 replicas
+		usageRatio := float64(currentConsumption.Quantity.MilliValue()) / float64(hpa.Spec.Target.Quantity.MilliValue())
-		desiredReplicas := 1 + int((currentConsumption.Quantity.MilliValue()*int64(currentReplicas))/hpa.Spec.Target.Quantity.MilliValue())
+		desiredReplicas := int(math.Ceil(usageRatio * float64(currentReplicas)))
 		if desiredReplicas < hpa.Spec.MinCount {
 			desiredReplicas = hpa.Spec.MinCount
 		}
 		// TODO: remove when pod ideling is done.
 		if desiredReplicas == 0 {
 			desiredReplicas = 1
 		}
 		if desiredReplicas > hpa.Spec.MaxCount {
 			desiredReplicas = hpa.Spec.MaxCount
 		}
 		now := time.Now()
 		rescale := false
 		if desiredReplicas != currentReplicas {
-			// Going down
+			// Going down only if the usageRatio dropped significantly below the target
-			if desiredReplicas < currentReplicas && (hpa.Status == nil || hpa.Status.LastScaleTimestamp == nil ||
+			// and there was no rescaling in the last downscaleForbiddenWindow.
 			if desiredReplicas < currentReplicas && usageRatio < (1-tolerance) &&
 				(hpa.Status == nil || hpa.Status.LastScaleTimestamp == nil ||
 					hpa.Status.LastScaleTimestamp.Add(downscaleForbiddenWindow).Before(now)) {
 				rescale = true
 			}
-			// Going up
+			// Going up only if the usage ratio increased significantly above the target
-			if desiredReplicas > currentReplicas && (hpa.Status == nil || hpa.Status.LastScaleTimestamp == nil ||
+			// and there was no rescaling in the last upscaleForbiddenWindow.
 			if desiredReplicas > currentReplicas && usageRatio > (1+tolerance) &&
 				(hpa.Status == nil || hpa.Status.LastScaleTimestamp == nil ||
 					hpa.Status.LastScaleTimestamp.Add(upscaleForbiddenWindow).Before(now)) {
 				rescale = true
 			}
 		}
 		if rescale {
 			scale.Spec.Replicas = desiredReplicas
@ -110,7 +132,8 @@ func (a *HorizontalPodAutoscalerController) reconcileAutoscalers() error {
 				glog.Warningf("Failed to rescale %s: %v", reference, err)
 				continue
 			}
-			}
+		} else {
 			desiredReplicas = currentReplicas
 		}
 		status := expapi.HorizontalPodAutoscalerStatus{