Do not count failed pods as unready in HPA controller

Currently, when performing a scale up, any failed pods (which can be present for example in case of evictions performed by kubelet) will be treated as unready. Unready pods are treated as if they had 0% utilization which will slow down or even block scale up. After this change, failed pods are ignored in all calculations. This way they do not influence neither scale up nor scale down replica calculations.
2025-07-25 12:43:23 +00:00 · 2018-02-20 19:14:43 +01:00 · 2018-02-20 19:14:43 +01:00 · e5f8bfa023
commit e5f8bfa023
parent 875f8c308f
3 changed files with 98 additions and 5 deletions
--- a/pkg/controller/podautoscaler/horizontal_test.go
+++ b/pkg/controller/podautoscaler/horizontal_test.go
@ -103,6 +103,7 @@ type testCase struct {
 	reportedLevels       []uint64
 	reportedCPURequests  []resource.Quantity
 	reportedPodReadiness []v1.ConditionStatus
 	reportedPodPhase     []v1.PodPhase
 	scaleUpdated         bool
 	statusUpdated        bool
 	eventCreated         bool
@ -250,10 +251,14 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
 			if tc.reportedPodReadiness != nil {
 				podReadiness = tc.reportedPodReadiness[i]
 			}
 			podPhase := v1.PodRunning
 			if tc.reportedPodPhase != nil {
 				podPhase = tc.reportedPodPhase[i]
 			}
 			podName := fmt.Sprintf("%s-%d", podNamePrefix, i)
 			pod := v1.Pod{
 				Status: v1.PodStatus{
-					Phase: v1.PodRunning,
+					Phase: podPhase,
 					Conditions: []v1.PodCondition{
 						{
 							Type:   v1.PodReady,
@ -713,6 +718,24 @@ func TestScaleUpUnreadyNoScale(t *testing.T) {
 	tc.runTest(t)
 }
 func TestScaleUpIgnoresFailedPods(t *testing.T) {
 	tc := testCase{
 		minReplicas:          2,
 		maxReplicas:          6,
 		initialReplicas:      2,
 		desiredReplicas:      4,
 		CPUTarget:            30,
 		CPUCurrent:           60,
 		verifyCPUCurrent:     true,
 		reportedLevels:       []uint64{500, 700},
 		reportedCPURequests:  []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
 		reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
 		reportedPodPhase:     []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed},
 		useMetricsAPI:        true,
 	}
 	tc.runTest(t)
 }
 func TestScaleUpDeployment(t *testing.T) {
 	tc := testCase{
 		minReplicas:         2,
@ -1017,6 +1040,24 @@ func TestScaleDownIgnoresUnreadyPods(t *testing.T) {
 	tc.runTest(t)
 }
 func TestScaleDownIgnoresFailedPods(t *testing.T) {
 	tc := testCase{
 		minReplicas:          2,
 		maxReplicas:          6,
 		initialReplicas:      5,
 		desiredReplicas:      3,
 		CPUTarget:            50,
 		CPUCurrent:           28,
 		verifyCPUCurrent:     true,
 		reportedLevels:       []uint64{100, 300, 500, 250, 250},
 		reportedCPURequests:  []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
 		useMetricsAPI:        true,
 		reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
 		reportedPodPhase:     []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed},
 	}
 	tc.runTest(t)
 }
 func TestTolerance(t *testing.T) {
 	tc := testCase{
 		minReplicas:         1,
--- a/pkg/controller/podautoscaler/replica_calculator.go
+++ b/pkg/controller/podautoscaler/replica_calculator.go
@ -88,7 +88,11 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
 		if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) {
 			// save this pod name for later, but pretend it doesn't exist for now
-			unreadyPods.Insert(pod.Name)
+			if pod.Status.Phase != v1.PodFailed {
 				// Failed pods should not be counted as unready pods as they will
 				// not become running anymore.
 				unreadyPods.Insert(pod.Name)
 			}
 			delete(metrics, pod.Name)
 			continue
 		}
--- a/pkg/controller/podautoscaler/replica_calculator_test.go
+++ b/pkg/controller/podautoscaler/replica_calculator_test.go
@ -77,6 +77,7 @@ type replicaCalcTestCase struct {
 	metric   *metricInfo
 	podReadiness []v1.ConditionStatus
 	podPhase     []v1.PodPhase
 }
 const (
@ -90,15 +91,24 @@ func (tc *replicaCalcTestCase) prepareTestClient(t *testing.T) (*fake.Clientset,
 	fakeClient := &fake.Clientset{}
 	fakeClient.AddReactor("list", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) {
 		obj := &v1.PodList{}
-		for i := 0; i < int(tc.currentReplicas); i++ {
+		podsCount := int(tc.currentReplicas)
 		// Failed pods are not included in tc.currentReplicas
 		if tc.podPhase != nil && len(tc.podPhase) > podsCount {
 			podsCount = len(tc.podPhase)
 		}
 		for i := 0; i < podsCount; i++ {
 			podReadiness := v1.ConditionTrue
-			if tc.podReadiness != nil {
+			if tc.podReadiness != nil && i < len(tc.podReadiness) {
 				podReadiness = tc.podReadiness[i]
 			}
 			podPhase := v1.PodRunning
 			if tc.podPhase != nil {
 				podPhase = tc.podPhase[i]
 			}
 			podName := fmt.Sprintf("%s-%d", podNamePrefix, i)
 			pod := v1.Pod{
 				Status: v1.PodStatus{
-					Phase: v1.PodRunning,
+					Phase: podPhase,
 					Conditions: []v1.PodCondition{
 						{
 							Type:   v1.PodReady,
@ -405,6 +415,25 @@ func TestReplicaCalcScaleUpUnreadyNoScale(t *testing.T) {
 	tc.runTest(t)
 }
 func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  2,
 		expectedReplicas: 4,
 		podReadiness:     []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
 		podPhase:         []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed},
 		resource: &resourceInfo{
 			name:     v1.ResourceCPU,
 			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
 			levels:   []int64{500, 700},
 			targetUtilization:   30,
 			expectedUtilization: 60,
 			expectedValue:       numContainersPerPod * 600,
 		},
 	}
 	tc.runTest(t)
 }
 func TestReplicaCalcScaleUpCM(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,
@ -610,6 +639,25 @@ func TestReplicaCalcScaleDownIgnoresUnreadyPods(t *testing.T) {
 	tc.runTest(t)
 }
 func TestReplicaCalcScaleDownIgnoresFailedPods(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  5,
 		expectedReplicas: 3,
 		podReadiness:     []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
 		podPhase:         []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed},
 		resource: &resourceInfo{
 			name:     v1.ResourceCPU,
 			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
 			levels:   []int64{100, 300, 500, 250, 250},
 			targetUtilization:   50,
 			expectedUtilization: 28,
 			expectedValue:       numContainersPerPod * 280,
 		},
 	}
 	tc.runTest(t)
 }
 func TestReplicaCalcTolerance(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,