From 02c30c5d61bb282a1e9faf4734ddf1437444f102 Mon Sep 17 00:00:00 2001 From: Marcin Wielgus Date: Mon, 7 Sep 2015 12:25:04 +0200 Subject: [PATCH] Update for scaling rules in HorizontalPodAutoscaler --- docs/proposals/horizontal-pod-autoscaler.md | 22 ++++---- .../horizontalpodautoscaler_controller.go | 53 +++++++++++++------ 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/docs/proposals/horizontal-pod-autoscaler.md b/docs/proposals/horizontal-pod-autoscaler.md index 6ae84532fc7..924988d29d6 100644 --- a/docs/proposals/horizontal-pod-autoscaler.md +++ b/docs/proposals/horizontal-pod-autoscaler.md @@ -200,16 +200,20 @@ and adjust the count of the Scale if needed to match the target The target number of pods will be calculated from the following formula: ``` -TargetNumOfPods = sum(CurrentPodsConsumption) / Target +TargetNumOfPods =ceil(sum(CurrentPodsConsumption) / Target) ``` -To make scaling more stable, scale-up will happen only when the floor of ```TargetNumOfPods``` is higher than -the current number, while scale-down will happen only when the ceiling of ```TargetNumOfPods``` is lower than -the current number. +Starting and stopping pods may introduce noise to the metrics (for instance starting may temporarily increase +CPU and decrease average memory consumption) so, after each action, the autoscaler should wait some time for reliable data. -The decision to scale-up will be executed instantly. -However, we will execute scale-down only if the sufficient time has passed from the last scale-up (e.g.: 10 minutes). -Such approach has two benefits: +Scale-up will happen if there was no rescaling within the last 3 minutes. +Scale-down will wait for 10 minutes from the last rescaling. Moreover any scaling will only be made if + +``` +avg(CurrentPodsConsumption) / Target +``` + +drops below 0.9 or increases above 1.1 (10% tolerance). Such approach has two benefits: * Autoscaler works in a conservative way. If new user load appears, it is important for us to rapidly increase the number of pods, @@ -218,10 +222,6 @@ Such approach has two benefits: * Autoscaler avoids thrashing, i.e.: prevents rapid execution of conflicting decision if the load is not stable. - -As the CPU consumption of a pod immediately after start may be highly variable due to initialization/startup, -autoscaler will skip metrics from the first minute of pod lifecycle. - ## Relative vs. absolute metrics The question arises whether the values of the target metrics should be absolute (e.g.: 0.6 core, 100MB of RAM) diff --git a/pkg/controller/autoscaler/horizontalpodautoscaler_controller.go b/pkg/controller/autoscaler/horizontalpodautoscaler_controller.go index a3cce83833b..abdb061e99d 100644 --- a/pkg/controller/autoscaler/horizontalpodautoscaler_controller.go +++ b/pkg/controller/autoscaler/horizontalpodautoscaler_controller.go @@ -18,6 +18,7 @@ package autoscalercontroller import ( "fmt" + "math" "time" "github.com/golang/glog" @@ -30,6 +31,15 @@ import ( "k8s.io/kubernetes/pkg/util" ) +const ( + heapsterNamespace = "kube-system" + heapsterService = "monitoring-heapster" + + // Usage shoud exceed the tolerance before we start downscale or upscale the pods. + // TODO: make it a flag or HPA spec element. + tolerance = 0.1 +) + type HorizontalPodAutoscalerController struct { client client.Interface expClient client.ExperimentalInterface @@ -79,38 +89,51 @@ func (a *HorizontalPodAutoscalerController) reconcileAutoscalers() error { continue } - // if the ratio is 1.2 we want to have 2 replicas - desiredReplicas := 1 + int((currentConsumption.Quantity.MilliValue()*int64(currentReplicas))/hpa.Spec.Target.Quantity.MilliValue()) + usageRatio := float64(currentConsumption.Quantity.MilliValue()) / float64(hpa.Spec.Target.Quantity.MilliValue()) + desiredReplicas := int(math.Ceil(usageRatio * float64(currentReplicas))) if desiredReplicas < hpa.Spec.MinCount { desiredReplicas = hpa.Spec.MinCount } + + // TODO: remove when pod ideling is done. + if desiredReplicas == 0 { + desiredReplicas = 1 + } + if desiredReplicas > hpa.Spec.MaxCount { desiredReplicas = hpa.Spec.MaxCount } now := time.Now() rescale := false + if desiredReplicas != currentReplicas { - // Going down - if desiredReplicas < currentReplicas && (hpa.Status == nil || hpa.Status.LastScaleTimestamp == nil || - hpa.Status.LastScaleTimestamp.Add(downscaleForbiddenWindow).Before(now)) { + // Going down only if the usageRatio dropped significantly below the target + // and there was no rescaling in the last downscaleForbiddenWindow. + if desiredReplicas < currentReplicas && usageRatio < (1-tolerance) && + (hpa.Status == nil || hpa.Status.LastScaleTimestamp == nil || + hpa.Status.LastScaleTimestamp.Add(downscaleForbiddenWindow).Before(now)) { rescale = true } - // Going up - if desiredReplicas > currentReplicas && (hpa.Status == nil || hpa.Status.LastScaleTimestamp == nil || - hpa.Status.LastScaleTimestamp.Add(upscaleForbiddenWindow).Before(now)) { + // Going up only if the usage ratio increased significantly above the target + // and there was no rescaling in the last upscaleForbiddenWindow. + if desiredReplicas > currentReplicas && usageRatio > (1+tolerance) && + (hpa.Status == nil || hpa.Status.LastScaleTimestamp == nil || + hpa.Status.LastScaleTimestamp.Add(upscaleForbiddenWindow).Before(now)) { rescale = true } + } - if rescale { - scale.Spec.Replicas = desiredReplicas - _, err = a.expClient.Scales(hpa.Namespace).Update(hpa.Spec.ScaleRef.Kind, scale) - if err != nil { - glog.Warningf("Failed to rescale %s: %v", reference, err) - continue - } + if rescale { + scale.Spec.Replicas = desiredReplicas + _, err = a.expClient.Scales(hpa.Namespace).Update(hpa.Spec.ScaleRef.Kind, scale) + if err != nil { + glog.Warningf("Failed to rescale %s: %v", reference, err) + continue } + } else { + desiredReplicas = currentReplicas } status := expapi.HorizontalPodAutoscalerStatus{