Add events to improve understandability of HPA controller decisions.

Fixes #22174
2025-09-13 21:25:09 +00:00 · 2016-03-02 10:08:17 +01:00
parent b5c9af9762
commit 69b3c6aa39
2 changed files with 48 additions and 25 deletions
--- a/pkg/controller/podautoscaler/horizontal.go
+++ b/pkg/controller/podautoscaler/horizontal.go
@@ -91,6 +91,11 @@ func NewHorizontalController(evtNamespacer unversionedcore.EventsGetter, scaleNa
 		framework.ResourceEventHandlerFuncs{
 			AddFunc: func(obj interface{}) {
 				hpa := obj.(*extensions.HorizontalPodAutoscaler)
+				hasCPUPolicy := hpa.Spec.CPUUtilization != nil
+				_, hasCustomMetricsPolicy := hpa.Annotations[HpaCustomMetricsTargetAnnotationName]
+				if !hasCPUPolicy && !hasCustomMetricsPolicy {
+					controller.eventRecorder.Event(hpa, api.EventTypeNormal, "DefaultPolicy", "No scaling policy specified - will use default one. See documentation for details")
+				}
 				err := controller.reconcileAutoscaler(hpa)
 				if err != nil {
 					glog.Warningf("Failed to reconcile %s: %v", hpa.Name, err)
@@ -129,7 +134,7 @@ func (a *HorizontalController) computeReplicasForCPUUtilization(hpa *extensions.
 	// TODO: what to do on partial errors (like metrics obtained for 75% of pods).
 	if err != nil {
 		a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetMetrics", err.Error())
-		return 0, nil, time.Time{}, fmt.Errorf("failed to get cpu utilization: %v", err)
+		return 0, nil, time.Time{}, fmt.Errorf("failed to get CPU utilization: %v", err)
 	}

 	usageRatio := float64(*currentUtilization) / float64(targetUtilization)
@@ -142,25 +147,29 @@ func (a *HorizontalController) computeReplicasForCPUUtilization(hpa *extensions.

 // Computes the desired number of replicas based on the CustomMetrics passed in cmAnnotation as json-serialized
 // extensions.CustomMetricsTargetList.
-// Returns number of replicas, status string (also json-serialized extensions.CustomMetricsCurrentStatusList),
+// Returns number of replicas, metric which required highest number of replicas,
+// status string (also json-serialized extensions.CustomMetricsCurrentStatusList),
 // last timestamp of the metrics involved in computations or error, if occurred.
 func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.HorizontalPodAutoscaler, scale *extensions.Scale,
-	cmAnnotation string) (int, string, time.Time, error) {
+	cmAnnotation string) (replicas int, metric string, status string, timestamp time.Time, err error) {

 	currentReplicas := scale.Status.Replicas
-	replicas := 0
-	timestamp := time.Time{}
+	replicas = 0
+	metric = ""
+	status = ""
+	timestamp = time.Time{}
+	err = nil

 	if cmAnnotation == "" {
-		return 0, "", time.Time{}, nil
+		return
 	}

 	var targetList extensions.CustomMetricTargetList
 	if err := json.Unmarshal([]byte(cmAnnotation), &targetList); err != nil {
-		return 0, "", time.Time{}, fmt.Errorf("failed to parse custom metrics annotation: %v", err)
+		return 0, "", "", time.Time{}, fmt.Errorf("failed to parse custom metrics annotation: %v", err)
 	}
 	if len(targetList.Items) == 0 {
-		return 0, "", time.Time{}, fmt.Errorf("no custom metrics in annotation")
+		return 0, "", "", time.Time{}, fmt.Errorf("no custom metrics in annotation")
 	}

 	statusList := extensions.CustomMetricCurrentStatusList{
@@ -172,7 +181,7 @@ func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.H
 		// TODO: what to do on partial errors (like metrics obtained for 75% of pods).
 		if err != nil {
 			a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetCustomMetrics", err.Error())
-			return 0, "", time.Time{}, fmt.Errorf("failed to get custom metric value: %v", err)
+			return 0, "", "", time.Time{}, fmt.Errorf("failed to get custom metric value: %v", err)
 		}
 		floatTarget := float64(customMetricTarget.TargetValue.MilliValue()) / 1000.0
 		usageRatio := *value / floatTarget
@@ -186,10 +195,11 @@ func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.H
 		if replicaCountProposal > replicas {
 			timestamp = currentTimestamp
 			replicas = replicaCountProposal
+			metric = fmt.Sprintf("Custom metric %s", customMetricTarget.Name)
 		}
 		quantity, err := resource.ParseQuantity(fmt.Sprintf("%.3f", *value))
 		if err != nil {
-			return 0, "", time.Time{}, fmt.Errorf("failed to set custom metric value: %v", err)
+			return 0, "", "", time.Time{}, fmt.Errorf("failed to set custom metric value: %v", err)
 		}
 		statusList.Items = append(statusList.Items, extensions.CustomMetricCurrentStatus{
 			Name:         customMetricTarget.Name,
@@ -198,10 +208,10 @@ func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.H
 	}
 	byteStatusList, err := json.Marshal(statusList)
 	if err != nil {
-		return 0, "", time.Time{}, fmt.Errorf("failed to serialize custom metric status: %v", err)
+		return 0, "", "", time.Time{}, fmt.Errorf("failed to serialize custom metric status: %v", err)
 	}

-	return replicas, string(byteStatusList), timestamp, nil
+	return replicas, metric, string(byteStatusList), timestamp, nil
 }

 func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPodAutoscaler) error {
@@ -219,17 +229,22 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod
 	cpuTimestamp := time.Time{}

 	cmDesiredReplicas := 0
+	cmMetric := ""
 	cmStatus := ""
 	cmTimestamp := time.Time{}

 	desiredReplicas := 0
+	rescaleReason := ""
 	timestamp := time.Now()

 	if currentReplicas > hpa.Spec.MaxReplicas {
+		rescaleReason = "Current number of replicas above Spec.MaxReplicas"
 		desiredReplicas = hpa.Spec.MaxReplicas
 	} else if hpa.Spec.MinReplicas != nil && currentReplicas < *hpa.Spec.MinReplicas {
+		rescaleReason = "Current number of replicas below Spec.MinReplicas"
 		desiredReplicas = *hpa.Spec.MinReplicas
 	} else if currentReplicas == 0 {
+		rescaleReason = "Current number of replicas must be greater than 0"
 		desiredReplicas = 1
 	} else {
 		// All basic scenarios covered, the state should be sane, lets use metrics.
@@ -245,7 +260,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod
 		}

 		if cmAnnotationFound {
-			cmDesiredReplicas, cmStatus, cmTimestamp, err = a.computeReplicasForCustomMetrics(hpa, scale, cmAnnotation)
+			cmDesiredReplicas, cmMetric, cmStatus, cmTimestamp, err = a.computeReplicasForCustomMetrics(hpa, scale, cmAnnotation)
 			if err != nil {
 				a.updateCurrentReplicasInStatus(hpa, currentReplicas)
 				a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedComputeCMReplicas", err.Error())
@@ -253,13 +268,21 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod
 			}
 		}

+		rescaleMetric := ""
 		if cpuDesiredReplicas > desiredReplicas {
 			desiredReplicas = cpuDesiredReplicas
 			timestamp = cpuTimestamp
+			rescaleMetric = "CPU utilization"
 		}
 		if cmDesiredReplicas > desiredReplicas {
 			desiredReplicas = cmDesiredReplicas
 			timestamp = cmTimestamp
+			rescaleMetric = cmMetric
+		}
+		if desiredReplicas > currentReplicas {
+			rescaleReason = fmt.Sprintf("%s above target", rescaleMetric)
+		} else if desiredReplicas < currentReplicas {
+			rescaleReason = "All metrics below target"
 		}

 		if hpa.Spec.MinReplicas != nil && desiredReplicas < *hpa.Spec.MinReplicas {
@@ -281,12 +304,12 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod
 		scale.Spec.Replicas = desiredReplicas
 		_, err = a.scaleNamespacer.Scales(hpa.Namespace).Update(hpa.Spec.ScaleRef.Kind, scale)
 		if err != nil {
-			a.eventRecorder.Eventf(hpa, api.EventTypeWarning, "FailedRescale", "New size: %d; error: %v", desiredReplicas, err.Error())
+			a.eventRecorder.Eventf(hpa, api.EventTypeWarning, "FailedRescale", "New size: %d; reason: %s; error: %v", desiredReplicas, rescaleReason, err.Error())
 			return fmt.Errorf("failed to rescale %s: %v", reference, err)
 		}
-		a.eventRecorder.Eventf(hpa, api.EventTypeNormal, "SuccessfulRescale", "New size: %d", desiredReplicas)
-		glog.Infof("Successfull rescale of %s, old size: %d, new size: %d",
-			hpa.Name, currentReplicas, desiredReplicas)
+		a.eventRecorder.Eventf(hpa, api.EventTypeNormal, "SuccessfulRescale", "New size: %d; reason: %s", desiredReplicas, rescaleReason)
+		glog.Infof("Successfull rescale of %s, old size: %d, new size: %d, reason: %s",
+			hpa.Name, currentReplicas, desiredReplicas, rescaleReason)
 	} else {
 		desiredReplicas = currentReplicas
 	}
--- a/pkg/controller/podautoscaler/horizontal_test.go
+++ b/pkg/controller/podautoscaler/horizontal_test.go
@@ -33,12 +33,10 @@ import (
 	"k8s.io/kubernetes/pkg/client/unversioned/testclient"
 	"k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
 	"k8s.io/kubernetes/pkg/runtime"
-	"k8s.io/kubernetes/pkg/util/wait"
 	"k8s.io/kubernetes/pkg/watch"

 	heapster "k8s.io/heapster/api/v1/types"

-	"github.com/golang/glog"
 	"github.com/stretchr/testify/assert"
 )

@@ -74,6 +72,8 @@ type testCase struct {
 	statusUpdated       bool
 	eventCreated        bool
 	verifyEvents        bool
+	// Channel with names of HPA objects which we have reconciled.
+	processed chan string
 }

 func (tc *testCase) computeCPUCurrent() {
@@ -100,6 +100,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) *fake.Clientset {
 	tc.scaleUpdated = false
 	tc.statusUpdated = false
 	tc.eventCreated = false
+	tc.processed = make(chan string, 100)
 	tc.computeCPUCurrent()

 	fakeClient := &fake.Clientset{}
@@ -223,6 +224,8 @@ func (tc *testCase) prepareTestClient(t *testing.T) *fake.Clientset {
 			assert.Equal(t, tc.CPUCurrent, *obj.Status.CurrentCPUUtilizationPercentage)
 		}
 		tc.statusUpdated = true
+		// Every time we reconcile HPA object we are updating status.
+		tc.processed <- obj.Name
 		return true, obj, nil
 	})

@@ -230,7 +233,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) *fake.Clientset {
 		obj := action.(testclient.CreateAction).GetObject().(*api.Event)
 		if tc.verifyEvents {
 			assert.Equal(t, "SuccessfulRescale", obj.Reason)
-			assert.Equal(t, fmt.Sprintf("New size: %d", tc.desiredReplicas), obj.Message)
+			assert.Equal(t, fmt.Sprintf("New size: %d; reason: CPU utilization above target", tc.desiredReplicas), obj.Message)
 		}
 		tc.eventCreated = true
 		return true, obj, nil
@@ -261,11 +264,8 @@ func (tc *testCase) runTest(t *testing.T) {
 		// We need to wait for events to be broadcasted (sleep for longer than record.sleepDuration).
 		time.Sleep(12 * time.Second)
 	}
-	// Each iteration for an HPA object ends with updating status.
-	wait.Poll(1*time.Second, 30*time.Second, func() (done bool, err error) {
-		glog.Infof("Status value: ", tc.statusUpdated)
-		return tc.statusUpdated, nil
-	})
+	// Wait for HPA to be processed.
+	<-tc.processed
 	tc.verifyResults(t)
 }