From e5f8bfa023ebbd3cd990a6b771ed28da563a0fd1 Mon Sep 17 00:00:00 2001 From: Beata Skiba Date: Tue, 20 Feb 2018 19:14:43 +0100 Subject: [PATCH] Do not count failed pods as unready in HPA controller Currently, when performing a scale up, any failed pods (which can be present for example in case of evictions performed by kubelet) will be treated as unready. Unready pods are treated as if they had 0% utilization which will slow down or even block scale up. After this change, failed pods are ignored in all calculations. This way they do not influence neither scale up nor scale down replica calculations. --- .../podautoscaler/horizontal_test.go | 43 ++++++++++++++- .../podautoscaler/replica_calculator.go | 6 ++- .../podautoscaler/replica_calculator_test.go | 54 +++++++++++++++++-- 3 files changed, 98 insertions(+), 5 deletions(-) diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index c7a144e4394..2d74a8aef9e 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -103,6 +103,7 @@ type testCase struct { reportedLevels []uint64 reportedCPURequests []resource.Quantity reportedPodReadiness []v1.ConditionStatus + reportedPodPhase []v1.PodPhase scaleUpdated bool statusUpdated bool eventCreated bool @@ -250,10 +251,14 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa if tc.reportedPodReadiness != nil { podReadiness = tc.reportedPodReadiness[i] } + podPhase := v1.PodRunning + if tc.reportedPodPhase != nil { + podPhase = tc.reportedPodPhase[i] + } podName := fmt.Sprintf("%s-%d", podNamePrefix, i) pod := v1.Pod{ Status: v1.PodStatus{ - Phase: v1.PodRunning, + Phase: podPhase, Conditions: []v1.PodCondition{ { Type: v1.PodReady, @@ -713,6 +718,24 @@ func TestScaleUpUnreadyNoScale(t *testing.T) { tc.runTest(t) } +func TestScaleUpIgnoresFailedPods(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + initialReplicas: 2, + desiredReplicas: 4, + CPUTarget: 30, + CPUCurrent: 60, + verifyCPUCurrent: true, + reportedLevels: []uint64{500, 700}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + reportedPodPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed}, + useMetricsAPI: true, + } + tc.runTest(t) +} + func TestScaleUpDeployment(t *testing.T) { tc := testCase{ minReplicas: 2, @@ -1017,6 +1040,24 @@ func TestScaleDownIgnoresUnreadyPods(t *testing.T) { tc.runTest(t) } +func TestScaleDownIgnoresFailedPods(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + initialReplicas: 5, + desiredReplicas: 3, + CPUTarget: 50, + CPUCurrent: 28, + verifyCPUCurrent: true, + reportedLevels: []uint64{100, 300, 500, 250, 250}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + useMetricsAPI: true, + reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + reportedPodPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed}, + } + tc.runTest(t) +} + func TestTolerance(t *testing.T) { tc := testCase{ minReplicas: 1, diff --git a/pkg/controller/podautoscaler/replica_calculator.go b/pkg/controller/podautoscaler/replica_calculator.go index c08deded3fd..a7a54677c02 100644 --- a/pkg/controller/podautoscaler/replica_calculator.go +++ b/pkg/controller/podautoscaler/replica_calculator.go @@ -88,7 +88,11 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) { // save this pod name for later, but pretend it doesn't exist for now - unreadyPods.Insert(pod.Name) + if pod.Status.Phase != v1.PodFailed { + // Failed pods should not be counted as unready pods as they will + // not become running anymore. + unreadyPods.Insert(pod.Name) + } delete(metrics, pod.Name) continue } diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index 58904a085bb..2c5f8250df7 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -77,6 +77,7 @@ type replicaCalcTestCase struct { metric *metricInfo podReadiness []v1.ConditionStatus + podPhase []v1.PodPhase } const ( @@ -90,15 +91,24 @@ func (tc *replicaCalcTestCase) prepareTestClient(t *testing.T) (*fake.Clientset, fakeClient := &fake.Clientset{} fakeClient.AddReactor("list", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { obj := &v1.PodList{} - for i := 0; i < int(tc.currentReplicas); i++ { + podsCount := int(tc.currentReplicas) + // Failed pods are not included in tc.currentReplicas + if tc.podPhase != nil && len(tc.podPhase) > podsCount { + podsCount = len(tc.podPhase) + } + for i := 0; i < podsCount; i++ { podReadiness := v1.ConditionTrue - if tc.podReadiness != nil { + if tc.podReadiness != nil && i < len(tc.podReadiness) { podReadiness = tc.podReadiness[i] } + podPhase := v1.PodRunning + if tc.podPhase != nil { + podPhase = tc.podPhase[i] + } podName := fmt.Sprintf("%s-%d", podNamePrefix, i) pod := v1.Pod{ Status: v1.PodStatus{ - Phase: v1.PodRunning, + Phase: podPhase, Conditions: []v1.PodCondition{ { Type: v1.PodReady, @@ -405,6 +415,25 @@ func TestReplicaCalcScaleUpUnreadyNoScale(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 2, + expectedReplicas: 4, + podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: []int64{500, 700}, + + targetUtilization: 30, + expectedUtilization: 60, + expectedValue: numContainersPerPod * 600, + }, + } + tc.runTest(t) +} + func TestReplicaCalcScaleUpCM(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 3, @@ -610,6 +639,25 @@ func TestReplicaCalcScaleDownIgnoresUnreadyPods(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcScaleDownIgnoresFailedPods(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 5, + expectedReplicas: 3, + podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: []int64{100, 300, 500, 250, 250}, + + targetUtilization: 50, + expectedUtilization: 28, + expectedValue: numContainersPerPod * 280, + }, + } + tc.runTest(t) +} + func TestReplicaCalcTolerance(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 3,