GetExternalMetricReplicas ignores unready pods

Similar to the change we made for `GetObjectMetricReplicas` in the previous commit. Ensure that `GetExternalMetricReplicas` does not include unready pods when its determining how many replica it desires. Including unready pods can lead to over-scaling. We did not change the behavior of `GetExternalPerPodMetricReplicas`, as it is slightly less clear what is the desired behavior. We did make some small naming refactorings to this method, which will make it easier to ignore unready pods if we decide we want to.
2025-07-24 12:15:52 +00:00 · 2018-03-07 08:57:11 -05:00 · 2018-03-07 08:57:11 -05:00 · d33494d459
commit d33494d459
parent 7e3bce7b3e
3 changed files with 36 additions and 19 deletions
--- a/pkg/controller/podautoscaler/horizontal.go
+++ b/pkg/controller/podautoscaler/horizontal.go
@ -317,7 +317,7 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori
 					},
 				}
 			} else if metricSpec.External.TargetValue != nil {
-				replicaCountProposal, utilizationProposal, timestampProposal, err = a.replicaCalc.GetExternalMetricReplicas(currentReplicas, metricSpec.External.TargetValue.MilliValue(), metricSpec.External.MetricName, hpa.Namespace, metricSpec.External.MetricSelector)
+				replicaCountProposal, utilizationProposal, timestampProposal, err = a.replicaCalc.GetExternalMetricReplicas(currentReplicas, metricSpec.External.TargetValue.MilliValue(), metricSpec.External.MetricName, hpa.Namespace, metricSpec.External.MetricSelector, selector)
 				if err != nil {
 					a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetExternalMetric", err.Error())
 					setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetExternalMetric", "the HPA was unable to compute the replica count: %v", err)
--- a/pkg/controller/podautoscaler/replica_calculator.go
+++ b/pkg/controller/podautoscaler/replica_calculator.go
@ -288,9 +288,6 @@ func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targe
 		return currentReplicas, utilization, timestamp, nil
 	}

-	// Calculate the total number of ready pods, as we want to base the
-	// replica count off of how many pods are ready, not how many pods
-	// exist.
 	readyPodCount, err := c.getReadyPodsCount(namespace, selector)

 	if err != nil {
@ -318,11 +315,9 @@ func (c *ReplicaCalculator) getReadyPodsCount(namespace string, selector labels.
 	readyPodCount := 0

 	for _, pod := range podList.Items {
-		if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) {
-			continue
+		if pod.Status.Phase == v1.PodRunning && podutil.IsPodReady(&pod) {
+			readyPodCount++
 		}
-
-		readyPodCount++
 	}

 	return int64(readyPodCount), nil
@ -331,40 +326,46 @@ func (c *ReplicaCalculator) getReadyPodsCount(namespace string, selector labels.
 // GetExternalMetricReplicas calculates the desired replica count based on a
 // target metric value (as a milli-value) for the external metric in the given
 // namespace, and the current replica count.
-func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, targetUtilization int64, metricName, namespace string, selector *metav1.LabelSelector) (replicaCount int32, utilization int64, timestamp time.Time, err error) {
-	labelSelector, err := metav1.LabelSelectorAsSelector(selector)
+func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, targetUtilization int64, metricName, namespace string, metricSelector *metav1.LabelSelector, podSelector labels.Selector) (replicaCount int32, utilization int64, timestamp time.Time, err error) {
+	metricLabelSelector, err := metav1.LabelSelectorAsSelector(metricSelector)
 	if err != nil {
 		return 0, 0, time.Time{}, err
 	}
-	metrics, timestamp, err := c.metricsClient.GetExternalMetric(metricName, namespace, labelSelector)
+	metrics, timestamp, err := c.metricsClient.GetExternalMetric(metricName, namespace, metricLabelSelector)
 	if err != nil {
-		return 0, 0, time.Time{}, fmt.Errorf("unable to get external metric %s/%s/%+v: %s", namespace, metricName, selector, err)
+		return 0, 0, time.Time{}, fmt.Errorf("unable to get external metric %s/%s/%+v: %s", namespace, metricName, metricSelector, err)
 	}
 	utilization = 0
 	for _, val := range metrics {
 		utilization = utilization + val
 	}

+	readyPodCount, err := c.getReadyPodsCount(namespace, podSelector)
+
+	if err != nil {
+		return 0, 0, time.Time{}, fmt.Errorf("unable to calculate ready pods: %s", err)
+	}
+
 	usageRatio := float64(utilization) / float64(targetUtilization)
 	if math.Abs(1.0-usageRatio) <= c.tolerance {
 		// return the current replicas if the change would be too small
 		return currentReplicas, utilization, timestamp, nil
 	}

-	return int32(math.Ceil(usageRatio * float64(currentReplicas))), utilization, timestamp, nil
+	return int32(math.Ceil(usageRatio * float64(readyPodCount))), utilization, timestamp, nil
 }

 // GetExternalPerPodMetricReplicas calculates the desired replica count based on a
 // target metric value per pod (as a milli-value) for the external metric in the
 // given namespace, and the current replica count.
-func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int32, targetUtilizationPerPod int64, metricName, namespace string, selector *metav1.LabelSelector) (replicaCount int32, utilization int64, timestamp time.Time, err error) {
-	labelSelector, err := metav1.LabelSelectorAsSelector(selector)
+func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int32, targetUtilizationPerPod int64, metricName, namespace string, metricSelector *metav1.LabelSelector) (replicaCount int32, utilization int64, timestamp time.Time, err error) {
+	metricLabelSelector, err := metav1.LabelSelectorAsSelector(metricSelector)
 	if err != nil {
 		return 0, 0, time.Time{}, err
 	}
-	metrics, timestamp, err := c.metricsClient.GetExternalMetric(metricName, namespace, labelSelector)
+	metrics, timestamp, err := c.metricsClient.GetExternalMetric(metricName, namespace, metricLabelSelector)
 	if err != nil {
-		return 0, 0, time.Time{}, fmt.Errorf("unable to get external metric %s/%s/%+v: %s", namespace, metricName, selector, err)
+		return 0, 0, time.Time{}, fmt.Errorf("unable to get external metric %s/%s/%+v: %s", namespace, metricName, metricSelector, err)
 	}
 	utilization = 0
 	for _, val := range metrics {
--- a/pkg/controller/podautoscaler/replica_calculator_test.go
+++ b/pkg/controller/podautoscaler/replica_calculator_test.go
@ -326,7 +326,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) {
 			outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetObjectMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.singleObject, selector)
 		} else if tc.metric.selector != nil {
 			if tc.metric.targetUtilization > 0 {
-				outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetExternalMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.selector)
+				outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetExternalMetricReplicas(tc.currentReplicas, tc.metric.targetUtilization, tc.metric.name, testNamespace, tc.metric.selector, selector)
 			} else if tc.metric.perPodTargetUtilization > 0 {
 				outReplicas, outUtilization, outTimestamp, err = replicaCalc.GetExternalPerPodMetricReplicas(tc.currentReplicas, tc.metric.perPodTargetUtilization, tc.metric.name, testNamespace, tc.metric.selector)
 			}
@ -497,7 +497,7 @@ func TestReplicaCalcScaleUpCMObject(t *testing.T) {
 	tc.runTest(t)
 }

-func TestReplicaCalcScaleUpCMObjectIgnoreUnreadyPods(t *testing.T) {
+func TestReplicaCalcScaleUpCMObjectIgnoresUnreadyPods(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,
 		expectedReplicas: 5, // If we did not ignore unready pods, we'd expect 15 replicas.
@ -532,6 +532,22 @@ func TestReplicaCalcScaleUpCMExternal(t *testing.T) {
 	tc.runTest(t)
 }

+func TestReplicaCalcScaleUpCMExternalIgnoresUnreadyPods(t *testing.T) {
+	tc := replicaCalcTestCase{
+		currentReplicas:  3,
+		expectedReplicas: 2, // Would expect 6 if we didn't ignore unready pods
+		podReadiness:     []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse},
+		metric: &metricInfo{
+			name:                "qps",
+			levels:              []int64{8600},
+			targetUtilization:   4400,
+			expectedUtilization: 8600,
+			selector:            &metav1.LabelSelector{MatchLabels: map[string]string{"label": "value"}},
+		},
+	}
+	tc.runTest(t)
+}
+
 func TestReplicaCalcScaleUpCMExternalNoLabels(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  1,