diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index c7a144e4394..2d74a8aef9e 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -103,6 +103,7 @@ type testCase struct { reportedLevels []uint64 reportedCPURequests []resource.Quantity reportedPodReadiness []v1.ConditionStatus + reportedPodPhase []v1.PodPhase scaleUpdated bool statusUpdated bool eventCreated bool @@ -250,10 +251,14 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa if tc.reportedPodReadiness != nil { podReadiness = tc.reportedPodReadiness[i] } + podPhase := v1.PodRunning + if tc.reportedPodPhase != nil { + podPhase = tc.reportedPodPhase[i] + } podName := fmt.Sprintf("%s-%d", podNamePrefix, i) pod := v1.Pod{ Status: v1.PodStatus{ - Phase: v1.PodRunning, + Phase: podPhase, Conditions: []v1.PodCondition{ { Type: v1.PodReady, @@ -713,6 +718,24 @@ func TestScaleUpUnreadyNoScale(t *testing.T) { tc.runTest(t) } +func TestScaleUpIgnoresFailedPods(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + initialReplicas: 2, + desiredReplicas: 4, + CPUTarget: 30, + CPUCurrent: 60, + verifyCPUCurrent: true, + reportedLevels: []uint64{500, 700}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + reportedPodPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed}, + useMetricsAPI: true, + } + tc.runTest(t) +} + func TestScaleUpDeployment(t *testing.T) { tc := testCase{ minReplicas: 2, @@ -1017,6 +1040,24 @@ func TestScaleDownIgnoresUnreadyPods(t *testing.T) { tc.runTest(t) } +func TestScaleDownIgnoresFailedPods(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + initialReplicas: 5, + desiredReplicas: 3, + CPUTarget: 50, + CPUCurrent: 28, + verifyCPUCurrent: true, + reportedLevels: []uint64{100, 300, 500, 250, 250}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + useMetricsAPI: true, + reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + reportedPodPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed}, + } + tc.runTest(t) +} + func TestTolerance(t *testing.T) { tc := testCase{ minReplicas: 1, diff --git a/pkg/controller/podautoscaler/replica_calculator.go b/pkg/controller/podautoscaler/replica_calculator.go index c08deded3fd..a7a54677c02 100644 --- a/pkg/controller/podautoscaler/replica_calculator.go +++ b/pkg/controller/podautoscaler/replica_calculator.go @@ -88,7 +88,11 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) { // save this pod name for later, but pretend it doesn't exist for now - unreadyPods.Insert(pod.Name) + if pod.Status.Phase != v1.PodFailed { + // Failed pods should not be counted as unready pods as they will + // not become running anymore. + unreadyPods.Insert(pod.Name) + } delete(metrics, pod.Name) continue } diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index 58904a085bb..2c5f8250df7 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -77,6 +77,7 @@ type replicaCalcTestCase struct { metric *metricInfo podReadiness []v1.ConditionStatus + podPhase []v1.PodPhase } const ( @@ -90,15 +91,24 @@ func (tc *replicaCalcTestCase) prepareTestClient(t *testing.T) (*fake.Clientset, fakeClient := &fake.Clientset{} fakeClient.AddReactor("list", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { obj := &v1.PodList{} - for i := 0; i < int(tc.currentReplicas); i++ { + podsCount := int(tc.currentReplicas) + // Failed pods are not included in tc.currentReplicas + if tc.podPhase != nil && len(tc.podPhase) > podsCount { + podsCount = len(tc.podPhase) + } + for i := 0; i < podsCount; i++ { podReadiness := v1.ConditionTrue - if tc.podReadiness != nil { + if tc.podReadiness != nil && i < len(tc.podReadiness) { podReadiness = tc.podReadiness[i] } + podPhase := v1.PodRunning + if tc.podPhase != nil { + podPhase = tc.podPhase[i] + } podName := fmt.Sprintf("%s-%d", podNamePrefix, i) pod := v1.Pod{ Status: v1.PodStatus{ - Phase: v1.PodRunning, + Phase: podPhase, Conditions: []v1.PodCondition{ { Type: v1.PodReady, @@ -405,6 +415,25 @@ func TestReplicaCalcScaleUpUnreadyNoScale(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 2, + expectedReplicas: 4, + podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: []int64{500, 700}, + + targetUtilization: 30, + expectedUtilization: 60, + expectedValue: numContainersPerPod * 600, + }, + } + tc.runTest(t) +} + func TestReplicaCalcScaleUpCM(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 3, @@ -610,6 +639,25 @@ func TestReplicaCalcScaleDownIgnoresUnreadyPods(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcScaleDownIgnoresFailedPods(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 5, + expectedReplicas: 3, + podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: []int64{100, 300, 500, 250, 250}, + + targetUtilization: 50, + expectedUtilization: 28, + expectedValue: numContainersPerPod * 280, + }, + } + tc.runTest(t) +} + func TestReplicaCalcTolerance(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 3,