Ignore pending pods.

This change adds pending pods to the ignored set first before selecting pods missing metrics. Pending pods are always ignored when calculating scale. When the HPA decides which pods and metric values to take into account when scaling, it divides the pods into three disjoint subsets: 1) ready 2) missing metrics and 3) ignored. First the HPA selects pods which are missing metrics. Then it selects pods should be ignored because they are not ready yet, or are still consuming CPU during initialization. All the remaining pods go into the ready set. After the HPA has decided what direction it wants to scale based on the ready pods, it considers what might have happened if it had the missing metrics. It makes a conservative guess about what the missing metrics might have been, 0% if it wants to scale up--100% if it wants to scale down. This is a good thing when scaling up, because newly added pods will likely help reduce the usage ratio, even though their metrics are missing at the moment. The HPA should wait to see the results of its previous scale decision before it makes another one. However when scaling down, it means that many missing metrics can pin the HPA at high scale, even when load is completely removed. In particular, when there are many unschedulable pods due to insufficient cluster capacity, the many missing metrics (assumed to be 100%) can cause the HPA to avoid scaling down indefinitely.
2025-09-05 19:21:37 +00:00 · 2019-07-02 20:15:58 +02:00
parent 4f29960cb2
commit 80e279d353
2 changed files with 58 additions and 12 deletions
--- a/pkg/controller/podautoscaler/replica_calculator.go
+++ b/pkg/controller/podautoscaler/replica_calculator.go
@@ -22,7 +22,7 @@ import (
 	"time"

 	autoscaling "k8s.io/api/autoscaling/v2beta2"
-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/util/sets"
@@ -365,11 +365,18 @@ func groupPods(pods []*v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1
 		if pod.DeletionTimestamp != nil || pod.Status.Phase == v1.PodFailed {
 			continue
 		}
+		// Pending pods are ignored.
+		if pod.Status.Phase == v1.PodPending {
+			ignoredPods.Insert(pod.Name)
+			continue
+		}
+		// Pods missing metrics.
 		metric, found := metrics[pod.Name]
 		if !found {
 			missingPods.Insert(pod.Name)
 			continue
 		}
+		// Unready pods are ignored.
 		if resource == v1.ResourceCPU {
 			var ignorePod bool
 			_, condition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
--- a/pkg/controller/podautoscaler/replica_calculator_test.go
+++ b/pkg/controller/podautoscaler/replica_calculator_test.go
@@ -23,7 +23,7 @@ import (
 	"time"

 	autoscalingv2 "k8s.io/api/autoscaling/v2beta2"
-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/meta/testrestmapper"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -862,6 +862,25 @@ func TestReplicaCalcScaleDownIncludeUnreadyPods(t *testing.T) {
 	tc.runTest(t)
 }

+func TestReplicaCalcScaleDownExcludeUnscheduledPods(t *testing.T) {
+	tc := replicaCalcTestCase{
+		currentReplicas:  5,
+		expectedReplicas: 1,
+		podReadiness:     []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse, v1.ConditionFalse, v1.ConditionFalse},
+		podPhase:         []v1.PodPhase{v1.PodRunning, v1.PodPending, v1.PodPending, v1.PodPending, v1.PodPending},
+		resource: &resourceInfo{
+			name:     v1.ResourceCPU,
+			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+			levels:   []int64{100},
+
+			targetUtilization:   50,
+			expectedUtilization: 10,
+			expectedValue:       numContainersPerPod * 100,
+		},
+	}
+	tc.runTest(t)
+}
+
 func TestReplicaCalcScaleDownIgnoreHotCpuPods(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  5,
@@ -1616,18 +1635,38 @@ func TestGroupPods(t *testing.T) {
 			sets.NewString("lucretius"),
 			sets.NewString("epicurus"),
 		},
+		{
+			name: "pending pods are ignored",
+			pods: []*v1.Pod{
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "unscheduled",
+					},
+					Status: v1.PodStatus{
+						Phase: v1.PodPending,
+					},
+				},
+			},
+			metrics:             metricsclient.PodMetricsInfo{},
+			resource:            v1.ResourceCPU,
+			expectReadyPodCount: 0,
+			expectIgnoredPods:   sets.NewString("unscheduled"),
+			expectMissingPods:   sets.NewString(),
+		},
 	}
 	for _, tc := range tests {
-		readyPodCount, ignoredPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus)
-		if readyPodCount != tc.expectReadyPodCount {
-			t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount)
-		}
-		if !ignoredPods.Equal(tc.expectIgnoredPods) {
-			t.Errorf("%s got unreadyPods %v, expected %v", tc.name, ignoredPods, tc.expectIgnoredPods)
-		}
-		if !missingPods.Equal(tc.expectMissingPods) {
-			t.Errorf("%s got missingPods %v, expected %v", tc.name, missingPods, tc.expectMissingPods)
-		}
+		t.Run(tc.name, func(t *testing.T) {
+			readyPodCount, ignoredPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus)
+			if readyPodCount != tc.expectReadyPodCount {
+				t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount)
+			}
+			if !ignoredPods.Equal(tc.expectIgnoredPods) {
+				t.Errorf("%s got unreadyPods %v, expected %v", tc.name, ignoredPods, tc.expectIgnoredPods)
+			}
+			if !missingPods.Equal(tc.expectMissingPods) {
+				t.Errorf("%s got missingPods %v, expected %v", tc.name, missingPods, tc.expectMissingPods)
+			}
+		})
 	}
 }