Merge pull request #39694 from DirectXMan12/bug/hpa-panic

Automatic merge from submit-queue (batch tested with PRs 39694, 39383, 39651, 39691, 39497) HPA Controller: Check for 0-sum request value In certain conditions in which the set of metrics returned by Heapster is completely disjoint from the set of pods returned by the API server, we can have a request sum of zero, which can cause a panic (due to division by zero). This checks for that condition. Fixes #39680 **Release note**: ```release-note Fixes an HPA-related panic due to division-by-zero. ```
2025-12-07 18:06:21 +00:00 · 2017-01-10 21:25:10 -08:00
parent ea18d5c32e c830d94dc4
commit bb7d07a33d
2 changed files with 33 additions and 1 deletions
--- a/pkg/controller/podautoscaler/metrics/utilization.go
+++ b/pkg/controller/podautoscaler/metrics/utilization.go
@@ -16,6 +16,10 @@ limitations under the License.

 package metrics

+import (
+	"fmt"
+)
+
 // GetResourceUtilizationRatio takes in a set of metrics, a set of matching requests,
 // and a target utilization percentage, and calcuates the the ratio of
 // desired to actual utilization (returning that and the actual utilization)
@@ -34,6 +38,12 @@ func GetResourceUtilizationRatio(metrics PodResourceInfo, requests map[string]in
 		requestsTotal += request
 	}

+	// if the set of requests is completely disjoint from the set of metrics,
+	// then we could have an issue where the requests total is zero
+	if requestsTotal == 0 {
+		return 0, 0, fmt.Errorf("no metrics returned matched known pods")
+	}
+
 	currentUtilization := int32((metricsTotal * 100) / requestsTotal)

 	return float64(currentUtilization) / float64(targetUtilization), currentUtilization, nil
--- a/pkg/controller/podautoscaler/replica_calculator_test.go
+++ b/pkg/controller/podautoscaler/replica_calculator_test.go
@@ -47,6 +47,8 @@ type resourceInfo struct {
 	name     v1.ResourceName
 	requests []resource.Quantity
 	levels   []int64
+	// only applies to pod names returned from "heapster"
+	podNames []string

 	targetUtilization   int32
 	expectedUtilization int32
@@ -134,9 +136,13 @@ func (tc *replicaCalcTestCase) prepareTestClient(t *testing.T) *fake.Clientset {
 		if tc.resource != nil {
 			metrics := metricsapi.PodMetricsList{}
 			for i, resValue := range tc.resource.levels {
+				podName := fmt.Sprintf("%s-%d", podNamePrefix, i)
+				if len(tc.resource.podNames) > i {
+					podName = tc.resource.podNames[i]
+				}
 				podMetric := metricsapi.PodMetrics{
 					ObjectMeta: v1.ObjectMeta{
-						Name:      fmt.Sprintf("%s-%d", podNamePrefix, i),
+						Name:      podName,
 						Namespace: testNamespace,
 					},
 					Timestamp: unversioned.Time{Time: tc.timestamp},
@@ -250,6 +256,22 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) {
 	}
 }

+func TestReplicaCalcDisjointResourcesMetrics(t *testing.T) {
+	tc := replicaCalcTestCase{
+		currentReplicas: 1,
+		expectedError:   fmt.Errorf("no metrics returned matched known pods"),
+		resource: &resourceInfo{
+			name:     v1.ResourceCPU,
+			requests: []resource.Quantity{resource.MustParse("1.0")},
+			levels:   []int64{100},
+			podNames: []string{"an-older-pod-name"},
+
+			targetUtilization: 100,
+		},
+	}
+	tc.runTest(t)
+}
+
 func TestReplicaCalcScaleUp(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,