From 7681c284f5b1ee3a1e7830c439b67fe3dc7475e5 Mon Sep 17 00:00:00 2001 From: Joachim Bartosik Date: Wed, 18 Jul 2018 14:21:00 +0200 Subject: [PATCH] Remove UpscaleForbiddenWindow Instead discard metric values for pods that are unready and have never been ready (they may report misleading values, the original reason for introducing scale up forbidden window). Use per pod metric when pod is: - Ready, or - Not ready but creation timestamp and last readiness change are more than 10s apart. In the latter case we asume the pod was ready but later became unready. We want to use metrics for such pods because sometimes such pods are unready because they were getting too much load. --- .../app/autoscaling.go | 1 - pkg/controller/podautoscaler/horizontal.go | 19 +---- .../podautoscaler/horizontal_test.go | 82 ++++++++++++++++++- .../podautoscaler/legacy_horizontal_test.go | 2 - .../podautoscaler/replica_calculator.go | 25 +++++- .../podautoscaler/replica_calculator_test.go | 72 ++++++++++++++++ 6 files changed, 177 insertions(+), 24 deletions(-) diff --git a/cmd/kube-controller-manager/app/autoscaling.go b/cmd/kube-controller-manager/app/autoscaling.go index 124f369b517..dccdac49e06 100644 --- a/cmd/kube-controller-manager/app/autoscaling.go +++ b/cmd/kube-controller-manager/app/autoscaling.go @@ -93,7 +93,6 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me replicaCalc, ctx.InformerFactory.Autoscaling().V1().HorizontalPodAutoscalers(), ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod.Duration, - ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration, ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, ).Run(ctx.Stop) return nil, true, nil diff --git a/pkg/controller/podautoscaler/horizontal.go b/pkg/controller/podautoscaler/horizontal.go index b3e4f87996d..ec4680ec2fa 100644 --- a/pkg/controller/podautoscaler/horizontal.go +++ b/pkg/controller/podautoscaler/horizontal.go @@ -64,7 +64,6 @@ type HorizontalController struct { replicaCalc *ReplicaCalculator eventRecorder record.EventRecorder - upscaleForbiddenWindow time.Duration downscaleForbiddenWindow time.Duration // hpaLister is able to list/get HPAs from the shared cache from the informer passed in to @@ -85,7 +84,6 @@ func NewHorizontalController( replicaCalc *ReplicaCalculator, hpaInformer autoscalinginformers.HorizontalPodAutoscalerInformer, resyncPeriod time.Duration, - upscaleForbiddenWindow time.Duration, downscaleForbiddenWindow time.Duration, ) *HorizontalController { @@ -99,7 +97,6 @@ func NewHorizontalController( eventRecorder: recorder, scaleNamespacer: scaleNamespacer, hpaNamespacer: hpaNamespacer, - upscaleForbiddenWindow: upscaleForbiddenWindow, downscaleForbiddenWindow: downscaleForbiddenWindow, queue: workqueue.NewNamedRateLimitingQueue(NewDefaultHPARateLimiter(resyncPeriod), "horizontalpodautoscaler"), mapper: mapper, @@ -246,7 +243,6 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "InvalidMetricSourceType", "the HPA was unable to compute the replica count: %s", errMsg) return 0, "", nil, time.Time{}, fmt.Errorf(errMsg) } - if replicas == 0 || replicaCountProposal > replicas { timestamp = timestampProposal replicas = replicaCountProposal @@ -472,6 +468,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho rescaleReason = "Current number of replicas must be greater than 0" desiredReplicas = 1 } else { + metricDesiredReplicas, metricName, metricStatuses, metricTimestamp, err = a.computeReplicasForMetrics(hpa, scale, hpa.Spec.Metrics) if err != nil { a.setCurrentReplicasInStatus(hpa, currentReplicas) @@ -507,15 +504,6 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffDownscale", "the time since the previous scale is still within the downscale forbidden window") backoffDown = true } - - if !hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) { - backoffUp = true - if backoffDown { - setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffBoth", "the time since the previous scale is still within both the downscale and upscale forbidden windows") - } else { - setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffUpscale", "the time since the previous scale is still within the upscale forbidden window") - } - } } if !backoffDown && !backoffUp { @@ -634,9 +622,8 @@ func (a *HorizontalController) shouldScale(hpa *autoscalingv2.HorizontalPodAutos return true } - // Going up only if the usage ratio increased significantly above the target - // and there was no rescaling in the last upscaleForbiddenWindow. - if desiredReplicas > currentReplicas && hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) { + // Going up only if the usage ratio increased significantly above the target. + if desiredReplicas > currentReplicas { return true } diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index a2f6f060c55..dc4015b08b4 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -643,7 +643,6 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform } informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc()) - defaultUpscaleForbiddenWindow := 3 * time.Minute defaultDownscaleForbiddenWindow := 5 * time.Minute hpaController := NewHorizontalController( @@ -654,7 +653,6 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform replicaCalc, informerFactory.Autoscaling().V1().HorizontalPodAutoscalers(), controller.NoResyncPeriodFunc(), - defaultUpscaleForbiddenWindow, defaultDownscaleForbiddenWindow, ) hpaController.hpaListerSynced = alwaysReady @@ -1728,7 +1726,7 @@ func TestConditionFailedUpdateScale(t *testing.T) { tc.runTest(t) } -func TestBackoffUpscale(t *testing.T) { +func NoTestBackoffUpscale(t *testing.T) { time := metav1.Time{Time: time.Now()} tc := testCase{ minReplicas: 1, @@ -1746,8 +1744,84 @@ func TestBackoffUpscale(t *testing.T) { Reason: "ReadyForNewScale", }, autoscalingv2.HorizontalPodAutoscalerCondition{ Type: autoscalingv2.AbleToScale, + Status: v1.ConditionTrue, + Reason: "SucceededRescale", + }), + } + tc.runTest(t) +} + +func TestNoBackoffUpscaleCM(t *testing.T) { + time := metav1.Time{Time: time.Now()} + tc := testCase{ + minReplicas: 1, + maxReplicas: 5, + initialReplicas: 3, + expectedDesiredReplicas: 4, + CPUTarget: 0, + metricsTarget: []autoscalingv2.MetricSpec{ + { + Type: autoscalingv2.PodsMetricSourceType, + Pods: &autoscalingv2.PodsMetricSource{ + MetricName: "qps", + TargetAverageValue: resource.MustParse("15.0"), + }, + }, + }, + reportedLevels: []uint64{20000, 10000, 30000}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + //useMetricsAPI: true, + lastScaleTime: &time, + expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{ + Type: autoscalingv2.AbleToScale, + Status: v1.ConditionTrue, + Reason: "ReadyForNewScale", + }, autoscalingv2.HorizontalPodAutoscalerCondition{ + Type: autoscalingv2.AbleToScale, + Status: v1.ConditionTrue, + Reason: "SucceededRescale", + }, autoscalingv2.HorizontalPodAutoscalerCondition{ + Type: autoscalingv2.ScalingLimited, Status: v1.ConditionFalse, - Reason: "BackoffBoth", + Reason: "DesiredWithinRange", + }), + } + tc.runTest(t) +} + +func TestNoBackoffUpscaleCMNoBackoffCpu(t *testing.T) { + time := metav1.Time{Time: time.Now()} + tc := testCase{ + minReplicas: 1, + maxReplicas: 5, + initialReplicas: 3, + expectedDesiredReplicas: 5, + CPUTarget: 10, + metricsTarget: []autoscalingv2.MetricSpec{ + { + Type: autoscalingv2.PodsMetricSourceType, + Pods: &autoscalingv2.PodsMetricSource{ + MetricName: "qps", + TargetAverageValue: resource.MustParse("15.0"), + }, + }, + }, + reportedLevels: []uint64{20000, 10000, 30000}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + useMetricsAPI: true, + lastScaleTime: &time, + expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{ + Type: autoscalingv2.AbleToScale, + Status: v1.ConditionTrue, + Reason: "ReadyForNewScale", + }, autoscalingv2.HorizontalPodAutoscalerCondition{ + Type: autoscalingv2.AbleToScale, + Status: v1.ConditionTrue, + Reason: "SucceededRescale", + }, autoscalingv2.HorizontalPodAutoscalerCondition{ + Type: autoscalingv2.ScalingLimited, + Status: v1.ConditionTrue, + Reason: "TooManyReplicas", }), } tc.runTest(t) diff --git a/pkg/controller/podautoscaler/legacy_horizontal_test.go b/pkg/controller/podautoscaler/legacy_horizontal_test.go index b6ffc18c312..01fc6986234 100644 --- a/pkg/controller/podautoscaler/legacy_horizontal_test.go +++ b/pkg/controller/podautoscaler/legacy_horizontal_test.go @@ -491,7 +491,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) { } informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc()) - defaultUpscaleForbiddenWindow := 3 * time.Minute defaultDownscaleForbiddenWindow := 5 * time.Minute hpaController := NewHorizontalController( @@ -502,7 +501,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) { replicaCalc, informerFactory.Autoscaling().V1().HorizontalPodAutoscalers(), controller.NoResyncPeriodFunc(), - defaultUpscaleForbiddenWindow, defaultDownscaleForbiddenWindow, ) hpaController.hpaListerSynced = alwaysReady diff --git a/pkg/controller/podautoscaler/replica_calculator.go b/pkg/controller/podautoscaler/replica_calculator.go index 7dc26f9a738..304c2afba82 100644 --- a/pkg/controller/podautoscaler/replica_calculator.go +++ b/pkg/controller/podautoscaler/replica_calculator.go @@ -35,6 +35,10 @@ const ( // defaultTestingTolerance is default value for calculating when to // scale up/scale down. defaultTestingTolerance = 0.1 + + // Pod begins existence as unready. If pod is unready and timestamp of last pod readiness change is + // less than maxDelayOfInitialReadinessStatus after pod start we assume it has never been ready. + maxDelayOfInitialReadinessStatus = 10 * time.Second ) type ReplicaCalculator struct { @@ -205,7 +209,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet missingPods := sets.NewString() for _, pod := range podList.Items { - if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) { + if pod.Status.Phase != v1.PodRunning || !hasPodBeenReadyBefore(&pod) { // save this pod name for later, but pretend it doesn't exist for now unreadyPods.Insert(pod.Name) delete(metrics, pod.Name) @@ -381,3 +385,22 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3 utilization = int64(math.Ceil(float64(utilization) / float64(currentReplicas))) return replicaCount, utilization, timestamp, nil } + +// hasPodBeenReadyBefore returns true if the pod is ready or if it's not ready +func hasPodBeenReadyBefore(pod *v1.Pod) bool { + _, readyCondition := podutil.GetPodCondition(&pod.Status, v1.PodReady) + if readyCondition == nil { + return false + } + if readyCondition.Status == v1.ConditionTrue { + return true + } + lastReady := readyCondition.LastTransitionTime.Time + if pod.Status.StartTime == nil { + return false + } + started := pod.Status.StartTime.Time + // If last status change was longer than maxDelayOfInitialReadinessStatus after the pod was + // created assume it was ready in the past. + return lastReady.After(started.Add(maxDelayOfInitialReadinessStatus)) +} diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index 999070ba440..0dc43b0696a 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -1069,4 +1069,76 @@ func TestReplicaCalcComputedToleranceAlgImplementation(t *testing.T) { tc.runTest(t) } +func TestHasPodBeenReadyBefore(t *testing.T) { + tests := []struct { + name string + conditions []v1.PodCondition + started time.Time + expected bool + }{ + { + "initially unready", + []v1.PodCondition{ + { + Type: v1.PodReady, + LastTransitionTime: metav1.Time{ + Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, + }, + Status: v1.ConditionFalse, + }, + }, + metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, + false, + }, + { + "currently unready", + []v1.PodCondition{ + { + Type: v1.PodReady, + LastTransitionTime: metav1.Time{ + Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, + }, + Status: v1.ConditionFalse, + }, + }, + metav1.Date(2018, 7, 25, 17, 0, 0, 0, time.UTC).Time, + true, + }, + { + "currently ready", + []v1.PodCondition{ + { + Type: v1.PodReady, + LastTransitionTime: metav1.Time{ + Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, + }, + Status: v1.ConditionTrue, + }, + }, + metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, + true, + }, + { + "no ready status", + []v1.PodCondition{}, + metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, + false, + }, + } + for _, tc := range tests { + pod := &v1.Pod{ + Status: v1.PodStatus{ + Conditions: tc.conditions, + StartTime: &metav1.Time{ + Time: tc.started, + }, + }, + } + got := hasPodBeenReadyBefore(pod) + if got != tc.expected { + t.Errorf("[TestHasPodBeenReadyBefore.%s] got %v, want %v", tc.name, got, tc.expected) + } + } +} + // TODO: add more tests