From 7e3bce7b3e55bbd64babfe8de4ab1a9b987b87cc Mon Sep 17 00:00:00 2001 From: mattjmcnaughton Date: Tue, 6 Mar 2018 09:51:49 -0500 Subject: [PATCH 1/2] `GetObjectMetricReplicas` ignores unready pods Previously, when `GetObjectMetricReplicas` calculated the desired replica count, it multiplied the usage ratio by the current number of replicas. This method caused over-scaling when there were pods that were not ready for a long period of time. For example, if there were pods A, B, and C, and only pod A was ready, and the usage ratio was 500%, we would previously specify 15 pods as the desired replicas (even though really only one pod was handling the load). After this change, we now multiple the usage ratio by the number of ready pods for `GetObjectMetricReplicas`. In the example above, we'd only desire 5 replica pods. This change gives `GetObjectMetricReplicas` the same behavior as the other replica calculator methods. Only `GetExternalMetricReplicas` and `GetExternalPerPodMetricRepliacs` still allow unready pods to impact the number of desired replicas. I will fix this issue in the following commit. --- pkg/controller/podautoscaler/horizontal.go | 2 +- .../podautoscaler/horizontal_test.go | 36 ++++++++++++----- .../podautoscaler/replica_calculator.go | 40 ++++++++++++++++++- .../podautoscaler/replica_calculator_test.go | 22 +++++++++- 4 files changed, 87 insertions(+), 13 deletions(-) diff --git a/pkg/controller/podautoscaler/horizontal.go b/pkg/controller/podautoscaler/horizontal.go index 737ceb9bb5f..c25d6b81883 100644 --- a/pkg/controller/podautoscaler/horizontal.go +++ b/pkg/controller/podautoscaler/horizontal.go @@ -223,7 +223,7 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori switch metricSpec.Type { case autoscalingv2.ObjectMetricSourceType: - replicaCountProposal, utilizationProposal, timestampProposal, err = a.replicaCalc.GetObjectMetricReplicas(currentReplicas, metricSpec.Object.TargetValue.MilliValue(), metricSpec.Object.MetricName, hpa.Namespace, &metricSpec.Object.Target) + replicaCountProposal, utilizationProposal, timestampProposal, err = a.replicaCalc.GetObjectMetricReplicas(currentReplicas, metricSpec.Object.TargetValue.MilliValue(), metricSpec.Object.MetricName, hpa.Namespace, &metricSpec.Object.Target, selector) if err != nil { a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetObjectMetric", err.Error()) setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetObjectMetric", "the HPA was unable to compute the replica count: %v", err) diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index 2d74a8aef9e..cfc21cd9fb5 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -246,16 +246,32 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa defer tc.Unlock() obj := &v1.PodList{} - for i := 0; i < len(tc.reportedCPURequests); i++ { + + specifiedCPURequests := tc.reportedCPURequests != nil + + numPodsToCreate := int(tc.initialReplicas) + if specifiedCPURequests { + numPodsToCreate = len(tc.reportedCPURequests) + } + + for i := 0; i < numPodsToCreate; i++ { podReadiness := v1.ConditionTrue if tc.reportedPodReadiness != nil { podReadiness = tc.reportedPodReadiness[i] } + podPhase := v1.PodRunning if tc.reportedPodPhase != nil { podPhase = tc.reportedPodPhase[i] } + podName := fmt.Sprintf("%s-%d", podNamePrefix, i) + + reportedCPURequest := resource.MustParse("1.0") + if specifiedCPURequests { + reportedCPURequest = tc.reportedCPURequests[i] + } + pod := v1.Pod{ Status: v1.PodStatus{ Phase: podPhase, @@ -273,12 +289,13 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa "name": podNamePrefix, }, }, + Spec: v1.PodSpec{ Containers: []v1.Container{ { Resources: v1.ResourceRequirements{ Requests: v1.ResourceList{ - v1.ResourceCPU: tc.reportedCPURequests[i], + v1.ResourceCPU: reportedCPURequest, }, }, }, @@ -1348,13 +1365,14 @@ func TestEmptyMetrics(t *testing.T) { func TestEmptyCPURequest(t *testing.T) { tc := testCase{ - minReplicas: 1, - maxReplicas: 5, - initialReplicas: 1, - desiredReplicas: 1, - CPUTarget: 100, - reportedLevels: []uint64{200}, - useMetricsAPI: true, + minReplicas: 1, + maxReplicas: 5, + initialReplicas: 1, + desiredReplicas: 1, + CPUTarget: 100, + reportedLevels: []uint64{200}, + reportedCPURequests: []resource.Quantity{}, + useMetricsAPI: true, expectedConditions: []autoscalingv1.HorizontalPodAutoscalerCondition{ {Type: autoscalingv1.AbleToScale, Status: v1.ConditionTrue, Reason: "SucceededGetScale"}, {Type: autoscalingv1.ScalingActive, Status: v1.ConditionFalse, Reason: "FailedGetResourceMetric"}, diff --git a/pkg/controller/podautoscaler/replica_calculator.go b/pkg/controller/podautoscaler/replica_calculator.go index a7a54677c02..2dd22c6c065 100644 --- a/pkg/controller/podautoscaler/replica_calculator.go +++ b/pkg/controller/podautoscaler/replica_calculator.go @@ -276,7 +276,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet // GetObjectMetricReplicas calculates the desired replica count based on a target metric utilization (as a milli-value) // for the given object in the given namespace, and the current replica count. -func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targetUtilization int64, metricName string, namespace string, objectRef *autoscaling.CrossVersionObjectReference) (replicaCount int32, utilization int64, timestamp time.Time, err error) { +func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targetUtilization int64, metricName string, namespace string, objectRef *autoscaling.CrossVersionObjectReference, selector labels.Selector) (replicaCount int32, utilization int64, timestamp time.Time, err error) { utilization, timestamp, err = c.metricsClient.GetObjectMetric(metricName, namespace, objectRef) if err != nil { return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v on %s %s/%s", metricName, objectRef.Kind, namespace, objectRef.Name, err) @@ -287,11 +287,47 @@ func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targe // return the current replicas if the change would be too small return currentReplicas, utilization, timestamp, nil } - replicaCount = int32(math.Ceil(usageRatio * float64(currentReplicas))) + + // Calculate the total number of ready pods, as we want to base the + // replica count off of how many pods are ready, not how many pods + // exist. + readyPodCount, err := c.getReadyPodsCount(namespace, selector) + + if err != nil { + return 0, 0, time.Time{}, fmt.Errorf("unable to calculate ready pods: %s", err) + } + + replicaCount = int32(math.Ceil(usageRatio * float64(readyPodCount))) return replicaCount, utilization, timestamp, nil } +// @TODO(mattjmcnaughton) Many different functions in this module use variations +// of this function. Make this function generic, so we don't repeat the same +// logic in multiple places. +func (c *ReplicaCalculator) getReadyPodsCount(namespace string, selector labels.Selector) (int64, error) { + podList, err := c.podsGetter.Pods(namespace).List(metav1.ListOptions{LabelSelector: selector.String()}) + if err != nil { + return 0, fmt.Errorf("unable to get pods while calculating replica count: %v", err) + } + + if len(podList.Items) == 0 { + return 0, fmt.Errorf("no pods returned by selector while calculating replica count") + } + + readyPodCount := 0 + + for _, pod := range podList.Items { + if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) { + continue + } + + readyPodCount++ + } + + return int64(readyPodCount), nil +} + // GetExternalMetricReplicas calculates the desired replica count based on a // target metric value (as a milli-value) for the external metric in the given // namespace, and the current replica count. diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index 2c5f8250df7..042eae2ab8e 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -323,7 +323,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) { var outTimestamp time.Time var err error if tc.metric.singleObject != nil { - outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetObjectMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.singleObject) + outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetObjectMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.singleObject, selector) } else if tc.metric.selector != nil { if tc.metric.targetUtilization > 0 { outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetExternalMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.selector) @@ -497,6 +497,26 @@ func TestReplicaCalcScaleUpCMObject(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcScaleUpCMObjectIgnoreUnreadyPods(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 3, + expectedReplicas: 5, // If we did not ignore unready pods, we'd expect 15 replicas. + podReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse}, + metric: &metricInfo{ + name: "qps", + levels: []int64{50000}, + targetUtilization: 10000, + expectedUtilization: 50000, + singleObject: &autoscalingv2.CrossVersionObjectReference{ + Kind: "Deployment", + APIVersion: "extensions/v1beta1", + Name: "some-deployment", + }, + }, + } + tc.runTest(t) +} + func TestReplicaCalcScaleUpCMExternal(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 1, From d33494d459020e0089ff3ae5b9d142d8b6cede9e Mon Sep 17 00:00:00 2001 From: mattjmcnaughton Date: Wed, 7 Mar 2018 08:57:11 -0500 Subject: [PATCH 2/2] `GetExternalMetricReplicas` ignores unready pods Similar to the change we made for `GetObjectMetricReplicas` in the previous commit. Ensure that `GetExternalMetricReplicas` does not include unready pods when its determining how many replica it desires. Including unready pods can lead to over-scaling. We did not change the behavior of `GetExternalPerPodMetricReplicas`, as it is slightly less clear what is the desired behavior. We did make some small naming refactorings to this method, which will make it easier to ignore unready pods if we decide we want to. --- pkg/controller/podautoscaler/horizontal.go | 2 +- .../podautoscaler/replica_calculator.go | 33 ++++++++++--------- .../podautoscaler/replica_calculator_test.go | 20 +++++++++-- 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/pkg/controller/podautoscaler/horizontal.go b/pkg/controller/podautoscaler/horizontal.go index c25d6b81883..c566d773203 100644 --- a/pkg/controller/podautoscaler/horizontal.go +++ b/pkg/controller/podautoscaler/horizontal.go @@ -317,7 +317,7 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori }, } } else if metricSpec.External.TargetValue != nil { - replicaCountProposal, utilizationProposal, timestampProposal, err = a.replicaCalc.GetExternalMetricReplicas(currentReplicas, metricSpec.External.TargetValue.MilliValue(), metricSpec.External.MetricName, hpa.Namespace, metricSpec.External.MetricSelector) + replicaCountProposal, utilizationProposal, timestampProposal, err = a.replicaCalc.GetExternalMetricReplicas(currentReplicas, metricSpec.External.TargetValue.MilliValue(), metricSpec.External.MetricName, hpa.Namespace, metricSpec.External.MetricSelector, selector) if err != nil { a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetExternalMetric", err.Error()) setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetExternalMetric", "the HPA was unable to compute the replica count: %v", err) diff --git a/pkg/controller/podautoscaler/replica_calculator.go b/pkg/controller/podautoscaler/replica_calculator.go index 2dd22c6c065..7dc26f9a738 100644 --- a/pkg/controller/podautoscaler/replica_calculator.go +++ b/pkg/controller/podautoscaler/replica_calculator.go @@ -288,9 +288,6 @@ func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targe return currentReplicas, utilization, timestamp, nil } - // Calculate the total number of ready pods, as we want to base the - // replica count off of how many pods are ready, not how many pods - // exist. readyPodCount, err := c.getReadyPodsCount(namespace, selector) if err != nil { @@ -318,11 +315,9 @@ func (c *ReplicaCalculator) getReadyPodsCount(namespace string, selector labels. readyPodCount := 0 for _, pod := range podList.Items { - if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) { - continue + if pod.Status.Phase == v1.PodRunning && podutil.IsPodReady(&pod) { + readyPodCount++ } - - readyPodCount++ } return int64(readyPodCount), nil @@ -331,40 +326,46 @@ func (c *ReplicaCalculator) getReadyPodsCount(namespace string, selector labels. // GetExternalMetricReplicas calculates the desired replica count based on a // target metric value (as a milli-value) for the external metric in the given // namespace, and the current replica count. -func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, targetUtilization int64, metricName, namespace string, selector *metav1.LabelSelector) (replicaCount int32, utilization int64, timestamp time.Time, err error) { - labelSelector, err := metav1.LabelSelectorAsSelector(selector) +func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, targetUtilization int64, metricName, namespace string, metricSelector *metav1.LabelSelector, podSelector labels.Selector) (replicaCount int32, utilization int64, timestamp time.Time, err error) { + metricLabelSelector, err := metav1.LabelSelectorAsSelector(metricSelector) if err != nil { return 0, 0, time.Time{}, err } - metrics, timestamp, err := c.metricsClient.GetExternalMetric(metricName, namespace, labelSelector) + metrics, timestamp, err := c.metricsClient.GetExternalMetric(metricName, namespace, metricLabelSelector) if err != nil { - return 0, 0, time.Time{}, fmt.Errorf("unable to get external metric %s/%s/%+v: %s", namespace, metricName, selector, err) + return 0, 0, time.Time{}, fmt.Errorf("unable to get external metric %s/%s/%+v: %s", namespace, metricName, metricSelector, err) } utilization = 0 for _, val := range metrics { utilization = utilization + val } + readyPodCount, err := c.getReadyPodsCount(namespace, podSelector) + + if err != nil { + return 0, 0, time.Time{}, fmt.Errorf("unable to calculate ready pods: %s", err) + } + usageRatio := float64(utilization) / float64(targetUtilization) if math.Abs(1.0-usageRatio) <= c.tolerance { // return the current replicas if the change would be too small return currentReplicas, utilization, timestamp, nil } - return int32(math.Ceil(usageRatio * float64(currentReplicas))), utilization, timestamp, nil + return int32(math.Ceil(usageRatio * float64(readyPodCount))), utilization, timestamp, nil } // GetExternalPerPodMetricReplicas calculates the desired replica count based on a // target metric value per pod (as a milli-value) for the external metric in the // given namespace, and the current replica count. -func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int32, targetUtilizationPerPod int64, metricName, namespace string, selector *metav1.LabelSelector) (replicaCount int32, utilization int64, timestamp time.Time, err error) { - labelSelector, err := metav1.LabelSelectorAsSelector(selector) +func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int32, targetUtilizationPerPod int64, metricName, namespace string, metricSelector *metav1.LabelSelector) (replicaCount int32, utilization int64, timestamp time.Time, err error) { + metricLabelSelector, err := metav1.LabelSelectorAsSelector(metricSelector) if err != nil { return 0, 0, time.Time{}, err } - metrics, timestamp, err := c.metricsClient.GetExternalMetric(metricName, namespace, labelSelector) + metrics, timestamp, err := c.metricsClient.GetExternalMetric(metricName, namespace, metricLabelSelector) if err != nil { - return 0, 0, time.Time{}, fmt.Errorf("unable to get external metric %s/%s/%+v: %s", namespace, metricName, selector, err) + return 0, 0, time.Time{}, fmt.Errorf("unable to get external metric %s/%s/%+v: %s", namespace, metricName, metricSelector, err) } utilization = 0 for _, val := range metrics { diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index 042eae2ab8e..b67bcc4710b 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -326,7 +326,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) { outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetObjectMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.singleObject, selector) } else if tc.metric.selector != nil { if tc.metric.targetUtilization > 0 { - outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetExternalMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.selector) + outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetExternalMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.selector, selector) } else if tc.metric.perPodTargetUtilization > 0 { outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetExternalPerPodMetricReplicas(tc.currentReplicas, tc.metric.perPodTargetUtilization, tc.metric.name, testNamespace, tc.metric.selector) } @@ -497,7 +497,7 @@ func TestReplicaCalcScaleUpCMObject(t *testing.T) { tc.runTest(t) } -func TestReplicaCalcScaleUpCMObjectIgnoreUnreadyPods(t *testing.T) { +func TestReplicaCalcScaleUpCMObjectIgnoresUnreadyPods(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 3, expectedReplicas: 5, // If we did not ignore unready pods, we'd expect 15 replicas. @@ -532,6 +532,22 @@ func TestReplicaCalcScaleUpCMExternal(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcScaleUpCMExternalIgnoresUnreadyPods(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 3, + expectedReplicas: 2, // Would expect 6 if we didn't ignore unready pods + podReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse}, + metric: &metricInfo{ + name: "qps", + levels: []int64{8600}, + targetUtilization: 4400, + expectedUtilization: 8600, + selector: &metav1.LabelSelector{MatchLabels: map[string]string{"label": "value"}}, + }, + } + tc.runTest(t) +} + func TestReplicaCalcScaleUpCMExternalNoLabels(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 1,