Do not count soft-deleted pods for scaling purposes in HPA controller

2025-09-07 04:03:20 +00:00 · 2018-08-28 16:27:47 +00:00
parent b2b3c36ecb
commit 3fedbe48e3
3 changed files with 115 additions and 20 deletions
--- a/pkg/controller/podautoscaler/horizontal_test.go
+++ b/pkg/controller/podautoscaler/horizontal_test.go
@@ -97,22 +97,23 @@ type testCase struct {
 	initialReplicas int32

 	// CPU target utilization as a percentage of the requested resources.
-	CPUTarget               int32
-	CPUCurrent              int32
-	verifyCPUCurrent        bool
-	reportedLevels          []uint64
-	reportedCPURequests     []resource.Quantity
-	reportedPodReadiness    []v1.ConditionStatus
-	reportedPodStartTime    []metav1.Time
-	reportedPodPhase        []v1.PodPhase
-	scaleUpdated            bool
-	statusUpdated           bool
-	eventCreated            bool
-	verifyEvents            bool
-	useMetricsAPI           bool
-	metricsTarget           []autoscalingv2.MetricSpec
-	expectedDesiredReplicas int32
-	expectedConditions      []autoscalingv1.HorizontalPodAutoscalerCondition
+	CPUTarget                    int32
+	CPUCurrent                   int32
+	verifyCPUCurrent             bool
+	reportedLevels               []uint64
+	reportedCPURequests          []resource.Quantity
+	reportedPodReadiness         []v1.ConditionStatus
+	reportedPodStartTime         []metav1.Time
+	reportedPodPhase             []v1.PodPhase
+	reportedPodDeletionTimestamp []bool
+	scaleUpdated                 bool
+	statusUpdated                bool
+	eventCreated                 bool
+	verifyEvents                 bool
+	useMetricsAPI                bool
+	metricsTarget                []autoscalingv2.MetricSpec
+	expectedDesiredReplicas      int32
+	expectedConditions           []autoscalingv1.HorizontalPodAutoscalerCondition
 	// Channel with names of HPA objects which we have reconciled.
 	processed chan string

@@ -272,6 +273,11 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
 				podPhase = tc.reportedPodPhase[i]
 			}

+			podDeletionTimestamp := false
+			if tc.reportedPodDeletionTimestamp != nil {
+				podDeletionTimestamp = tc.reportedPodDeletionTimestamp[i]
+			}
+
 			podName := fmt.Sprintf("%s-%d", podNamePrefix, i)

 			reportedCPURequest := resource.MustParse("1.0")
@@ -310,6 +316,9 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
 					},
 				},
 			}
+			if podDeletionTimestamp {
+				pod.DeletionTimestamp = &metav1.Time{Time: time.Now()}
+			}
 			obj.Items = append(obj.Items, pod)
 		}
 		return true, obj, nil
@@ -806,6 +815,25 @@ func TestScaleUpIgnoresFailedPods(t *testing.T) {
 	tc.runTest(t)
 }

+func TestScaleUpIgnoresDeletionPods(t *testing.T) {
+	tc := testCase{
+		minReplicas:                  2,
+		maxReplicas:                  6,
+		initialReplicas:              2,
+		expectedDesiredReplicas:      4,
+		CPUTarget:                    30,
+		CPUCurrent:                   60,
+		verifyCPUCurrent:             true,
+		reportedLevels:               []uint64{500, 700},
+		reportedCPURequests:          []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+		reportedPodReadiness:         []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
+		reportedPodPhase:             []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning},
+		reportedPodDeletionTimestamp: []bool{false, false, true, true},
+		useMetricsAPI:                true,
+	}
+	tc.runTest(t)
+}
+
 func TestScaleUpDeployment(t *testing.T) {
 	tc := testCase{
 		minReplicas:             2,
@@ -1183,6 +1211,25 @@ func TestScaleDownIgnoresFailedPods(t *testing.T) {
 	tc.runTest(t)
 }

+func TestScaleDownIgnoresDeletionPods(t *testing.T) {
+	tc := testCase{
+		minReplicas:                  2,
+		maxReplicas:                  6,
+		initialReplicas:              5,
+		expectedDesiredReplicas:      3,
+		CPUTarget:                    50,
+		CPUCurrent:                   28,
+		verifyCPUCurrent:             true,
+		reportedLevels:               []uint64{100, 300, 500, 250, 250},
+		reportedCPURequests:          []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+		useMetricsAPI:                true,
+		reportedPodReadiness:         []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
+		reportedPodPhase:             []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning},
+		reportedPodDeletionTimestamp: []bool{false, false, false, false, false, true, true},
+	}
+	tc.runTest(t)
+}
+
 func TestTolerance(t *testing.T) {
 	tc := testCase{
 		minReplicas:             1,
--- a/pkg/controller/podautoscaler/replica_calculator.go
+++ b/pkg/controller/podautoscaler/replica_calculator.go
@@ -349,7 +349,7 @@ func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.
 	ignoredPods = sets.NewString()
 	glog.Errorf("groupPods stack: %v", string(debug.Stack()))
 	for _, pod := range pods {
-		if pod.Status.Phase == v1.PodFailed {
+		if pod.DeletionTimestamp != nil || pod.Status.Phase == v1.PodFailed {
 			continue
 		}
 		if _, found := metrics[pod.Name]; !found {
--- a/pkg/controller/podautoscaler/replica_calculator_test.go
+++ b/pkg/controller/podautoscaler/replica_calculator_test.go
@@ -89,9 +89,10 @@ type replicaCalcTestCase struct {
 	resource *resourceInfo
 	metric   *metricInfo

-	podReadiness []v1.ConditionStatus
-	podStartTime []metav1.Time
-	podPhase     []v1.PodPhase
+	podReadiness         []v1.ConditionStatus
+	podStartTime         []metav1.Time
+	podPhase             []v1.PodPhase
+	podDeletionTimestamp []bool
 }

 const (
@@ -122,6 +123,10 @@ func (tc *replicaCalcTestCase) prepareTestClientSet() *fake.Clientset {
 			if tc.podPhase != nil {
 				podPhase = tc.podPhase[i]
 			}
+			podDeletionTimestamp := false
+			if tc.podDeletionTimestamp != nil {
+				podDeletionTimestamp = tc.podDeletionTimestamp[i]
+			}
 			podName := fmt.Sprintf("%s-%d", podNamePrefix, i)
 			pod := v1.Pod{
 				Status: v1.PodStatus{
@@ -145,6 +150,9 @@ func (tc *replicaCalcTestCase) prepareTestClientSet() *fake.Clientset {
 					Containers: []v1.Container{{}, {}},
 				},
 			}
+			if podDeletionTimestamp {
+				pod.DeletionTimestamp = &metav1.Time{Time: time.Now()}
+			}

 			if tc.resource != nil && i < len(tc.resource.requests) {
 				pod.Spec.Containers[0].Resources = v1.ResourceRequirements{
@@ -517,6 +525,26 @@ func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) {
 	tc.runTest(t)
 }

+func TestReplicaCalcScaleUpIgnoresDeletionPods(t *testing.T) {
+	tc := replicaCalcTestCase{
+		currentReplicas:      2,
+		expectedReplicas:     4,
+		podReadiness:         []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
+		podPhase:             []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning},
+		podDeletionTimestamp: []bool{false, false, true, true},
+		resource: &resourceInfo{
+			name:     v1.ResourceCPU,
+			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+			levels:   []int64{500, 700},
+
+			targetUtilization:   30,
+			expectedUtilization: 60,
+			expectedValue:       numContainersPerPod * 600,
+		},
+	}
+	tc.runTest(t)
+}
+
 func TestReplicaCalcScaleUpCM(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,
@@ -807,6 +835,26 @@ func TestReplicaCalcScaleDownIgnoresFailedPods(t *testing.T) {
 	tc.runTest(t)
 }

+func TestReplicaCalcScaleDownIgnoresDeletionPods(t *testing.T) {
+	tc := replicaCalcTestCase{
+		currentReplicas:      5,
+		expectedReplicas:     3,
+		podReadiness:         []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
+		podPhase:             []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning},
+		podDeletionTimestamp: []bool{false, false, false, false, false, true, true},
+		resource: &resourceInfo{
+			name:     v1.ResourceCPU,
+			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+			levels:   []int64{100, 300, 500, 250, 250},
+
+			targetUtilization:   50,
+			expectedUtilization: 28,
+			expectedValue:       numContainersPerPod * 280,
+		},
+	}
+	tc.runTest(t)
+}
+
 func TestReplicaCalcTolerance(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,