Do not count soft-deleted pods for scaling purposes in HPA controller

2025-07-29 06:27:05 +00:00 · 2018-08-28 16:27:47 +00:00 · 2018-08-28 16:27:47 +00:00 · 3fedbe48e3
commit 3fedbe48e3
parent b2b3c36ecb
3 changed files with 115 additions and 20 deletions
--- a/pkg/controller/podautoscaler/horizontal_test.go
+++ b/pkg/controller/podautoscaler/horizontal_test.go
@ -97,22 +97,23 @@ type testCase struct {
 	initialReplicas int32
 	// CPU target utilization as a percentage of the requested resources.
-	CPUTarget               int32
+	CPUTarget                    int32
-	CPUCurrent              int32
+	CPUCurrent                   int32
-	verifyCPUCurrent        bool
+	verifyCPUCurrent             bool
-	reportedLevels          []uint64
+	reportedLevels               []uint64
-	reportedCPURequests     []resource.Quantity
+	reportedCPURequests          []resource.Quantity
-	reportedPodReadiness    []v1.ConditionStatus
+	reportedPodReadiness         []v1.ConditionStatus
-	reportedPodStartTime    []metav1.Time
+	reportedPodStartTime         []metav1.Time
-	reportedPodPhase        []v1.PodPhase
+	reportedPodPhase             []v1.PodPhase
-	scaleUpdated            bool
+	reportedPodDeletionTimestamp []bool
-	statusUpdated           bool
+	scaleUpdated                 bool
-	eventCreated            bool
+	statusUpdated                bool
-	verifyEvents            bool
+	eventCreated                 bool
-	useMetricsAPI           bool
+	verifyEvents                 bool
-	metricsTarget           []autoscalingv2.MetricSpec
+	useMetricsAPI                bool
-	expectedDesiredReplicas int32
+	metricsTarget                []autoscalingv2.MetricSpec
-	expectedConditions      []autoscalingv1.HorizontalPodAutoscalerCondition
+	expectedDesiredReplicas      int32
 	expectedConditions           []autoscalingv1.HorizontalPodAutoscalerCondition
 	// Channel with names of HPA objects which we have reconciled.
 	processed chan string
@ -272,6 +273,11 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
 				podPhase = tc.reportedPodPhase[i]
 			}
 			podDeletionTimestamp := false
 			if tc.reportedPodDeletionTimestamp != nil {
 				podDeletionTimestamp = tc.reportedPodDeletionTimestamp[i]
 			}
 			podName := fmt.Sprintf("%s-%d", podNamePrefix, i)
 			reportedCPURequest := resource.MustParse("1.0")
@ -310,6 +316,9 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
 					},
 				},
 			}
 			if podDeletionTimestamp {
 				pod.DeletionTimestamp = &metav1.Time{Time: time.Now()}
 			}
 			obj.Items = append(obj.Items, pod)
 		}
 		return true, obj, nil
@ -806,6 +815,25 @@ func TestScaleUpIgnoresFailedPods(t *testing.T) {
 	tc.runTest(t)
 }
 func TestScaleUpIgnoresDeletionPods(t *testing.T) {
 	tc := testCase{
 		minReplicas:                  2,
 		maxReplicas:                  6,
 		initialReplicas:              2,
 		expectedDesiredReplicas:      4,
 		CPUTarget:                    30,
 		CPUCurrent:                   60,
 		verifyCPUCurrent:             true,
 		reportedLevels:               []uint64{500, 700},
 		reportedCPURequests:          []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
 		reportedPodReadiness:         []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
 		reportedPodPhase:             []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning},
 		reportedPodDeletionTimestamp: []bool{false, false, true, true},
 		useMetricsAPI:                true,
 	}
 	tc.runTest(t)
 }
 func TestScaleUpDeployment(t *testing.T) {
 	tc := testCase{
 		minReplicas:             2,
@ -1183,6 +1211,25 @@ func TestScaleDownIgnoresFailedPods(t *testing.T) {
 	tc.runTest(t)
 }
 func TestScaleDownIgnoresDeletionPods(t *testing.T) {
 	tc := testCase{
 		minReplicas:                  2,
 		maxReplicas:                  6,
 		initialReplicas:              5,
 		expectedDesiredReplicas:      3,
 		CPUTarget:                    50,
 		CPUCurrent:                   28,
 		verifyCPUCurrent:             true,
 		reportedLevels:               []uint64{100, 300, 500, 250, 250},
 		reportedCPURequests:          []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
 		useMetricsAPI:                true,
 		reportedPodReadiness:         []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
 		reportedPodPhase:             []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning},
 		reportedPodDeletionTimestamp: []bool{false, false, false, false, false, true, true},
 	}
 	tc.runTest(t)
 }
 func TestTolerance(t *testing.T) {
 	tc := testCase{
 		minReplicas:             1,
--- a/pkg/controller/podautoscaler/replica_calculator.go
+++ b/pkg/controller/podautoscaler/replica_calculator.go
@ -349,7 +349,7 @@ func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.
 	ignoredPods = sets.NewString()
 	glog.Errorf("groupPods stack: %v", string(debug.Stack()))
 	for _, pod := range pods {
-		if pod.Status.Phase == v1.PodFailed {
+		if pod.DeletionTimestamp != nil || pod.Status.Phase == v1.PodFailed {
 			continue
 		}
 		if _, found := metrics[pod.Name]; !found {
--- a/pkg/controller/podautoscaler/replica_calculator_test.go
+++ b/pkg/controller/podautoscaler/replica_calculator_test.go
@ -89,9 +89,10 @@ type replicaCalcTestCase struct {
 	resource *resourceInfo
 	metric   *metricInfo
-	podReadiness []v1.ConditionStatus
+	podReadiness         []v1.ConditionStatus
-	podStartTime []metav1.Time
+	podStartTime         []metav1.Time
-	podPhase     []v1.PodPhase
+	podPhase             []v1.PodPhase
 	podDeletionTimestamp []bool
 }
 const (
@ -122,6 +123,10 @@ func (tc *replicaCalcTestCase) prepareTestClientSet() *fake.Clientset {
 			if tc.podPhase != nil {
 				podPhase = tc.podPhase[i]
 			}
 			podDeletionTimestamp := false
 			if tc.podDeletionTimestamp != nil {
 				podDeletionTimestamp = tc.podDeletionTimestamp[i]
 			}
 			podName := fmt.Sprintf("%s-%d", podNamePrefix, i)
 			pod := v1.Pod{
 				Status: v1.PodStatus{
@ -145,6 +150,9 @@ func (tc *replicaCalcTestCase) prepareTestClientSet() *fake.Clientset {
 					Containers: []v1.Container{{}, {}},
 				},
 			}
 			if podDeletionTimestamp {
 				pod.DeletionTimestamp = &metav1.Time{Time: time.Now()}
 			}
 			if tc.resource != nil && i < len(tc.resource.requests) {
 				pod.Spec.Containers[0].Resources = v1.ResourceRequirements{
@ -517,6 +525,26 @@ func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) {
 	tc.runTest(t)
 }
 func TestReplicaCalcScaleUpIgnoresDeletionPods(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:      2,
 		expectedReplicas:     4,
 		podReadiness:         []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
 		podPhase:             []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning},
 		podDeletionTimestamp: []bool{false, false, true, true},
 		resource: &resourceInfo{
 			name:     v1.ResourceCPU,
 			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
 			levels:   []int64{500, 700},
 			targetUtilization:   30,
 			expectedUtilization: 60,
 			expectedValue:       numContainersPerPod * 600,
 		},
 	}
 	tc.runTest(t)
 }
 func TestReplicaCalcScaleUpCM(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,
@ -807,6 +835,26 @@ func TestReplicaCalcScaleDownIgnoresFailedPods(t *testing.T) {
 	tc.runTest(t)
 }
 func TestReplicaCalcScaleDownIgnoresDeletionPods(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:      5,
 		expectedReplicas:     3,
 		podReadiness:         []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
 		podPhase:             []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning},
 		podDeletionTimestamp: []bool{false, false, false, false, false, true, true},
 		resource: &resourceInfo{
 			name:     v1.ResourceCPU,
 			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
 			levels:   []int64{100, 300, 500, 250, 250},
 			targetUtilization:   50,
 			expectedUtilization: 28,
 			expectedValue:       numContainersPerPod * 280,
 		},
 	}
 	tc.runTest(t)
 }
 func TestReplicaCalcTolerance(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,