From 69b3c6aa39eb8d19c933d5fb7d1d90f88a4741d4 Mon Sep 17 00:00:00 2001 From: Filip Grzadkowski Date: Wed, 2 Mar 2016 10:08:17 +0100 Subject: [PATCH] Add events to improve understandability of HPA controller decisions. Fixes #22174 --- pkg/controller/podautoscaler/horizontal.go | 57 +++++++++++++------ .../podautoscaler/horizontal_test.go | 16 +++--- 2 files changed, 48 insertions(+), 25 deletions(-) diff --git a/pkg/controller/podautoscaler/horizontal.go b/pkg/controller/podautoscaler/horizontal.go index f37d169d5bf..17093311cc4 100644 --- a/pkg/controller/podautoscaler/horizontal.go +++ b/pkg/controller/podautoscaler/horizontal.go @@ -91,6 +91,11 @@ func NewHorizontalController(evtNamespacer unversionedcore.EventsGetter, scaleNa framework.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { hpa := obj.(*extensions.HorizontalPodAutoscaler) + hasCPUPolicy := hpa.Spec.CPUUtilization != nil + _, hasCustomMetricsPolicy := hpa.Annotations[HpaCustomMetricsTargetAnnotationName] + if !hasCPUPolicy && !hasCustomMetricsPolicy { + controller.eventRecorder.Event(hpa, api.EventTypeNormal, "DefaultPolicy", "No scaling policy specified - will use default one. See documentation for details") + } err := controller.reconcileAutoscaler(hpa) if err != nil { glog.Warningf("Failed to reconcile %s: %v", hpa.Name, err) @@ -129,7 +134,7 @@ func (a *HorizontalController) computeReplicasForCPUUtilization(hpa *extensions. // TODO: what to do on partial errors (like metrics obtained for 75% of pods). if err != nil { a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetMetrics", err.Error()) - return 0, nil, time.Time{}, fmt.Errorf("failed to get cpu utilization: %v", err) + return 0, nil, time.Time{}, fmt.Errorf("failed to get CPU utilization: %v", err) } usageRatio := float64(*currentUtilization) / float64(targetUtilization) @@ -142,25 +147,29 @@ func (a *HorizontalController) computeReplicasForCPUUtilization(hpa *extensions. // Computes the desired number of replicas based on the CustomMetrics passed in cmAnnotation as json-serialized // extensions.CustomMetricsTargetList. -// Returns number of replicas, status string (also json-serialized extensions.CustomMetricsCurrentStatusList), +// Returns number of replicas, metric which required highest number of replicas, +// status string (also json-serialized extensions.CustomMetricsCurrentStatusList), // last timestamp of the metrics involved in computations or error, if occurred. func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.HorizontalPodAutoscaler, scale *extensions.Scale, - cmAnnotation string) (int, string, time.Time, error) { + cmAnnotation string) (replicas int, metric string, status string, timestamp time.Time, err error) { currentReplicas := scale.Status.Replicas - replicas := 0 - timestamp := time.Time{} + replicas = 0 + metric = "" + status = "" + timestamp = time.Time{} + err = nil if cmAnnotation == "" { - return 0, "", time.Time{}, nil + return } var targetList extensions.CustomMetricTargetList if err := json.Unmarshal([]byte(cmAnnotation), &targetList); err != nil { - return 0, "", time.Time{}, fmt.Errorf("failed to parse custom metrics annotation: %v", err) + return 0, "", "", time.Time{}, fmt.Errorf("failed to parse custom metrics annotation: %v", err) } if len(targetList.Items) == 0 { - return 0, "", time.Time{}, fmt.Errorf("no custom metrics in annotation") + return 0, "", "", time.Time{}, fmt.Errorf("no custom metrics in annotation") } statusList := extensions.CustomMetricCurrentStatusList{ @@ -172,7 +181,7 @@ func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.H // TODO: what to do on partial errors (like metrics obtained for 75% of pods). if err != nil { a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetCustomMetrics", err.Error()) - return 0, "", time.Time{}, fmt.Errorf("failed to get custom metric value: %v", err) + return 0, "", "", time.Time{}, fmt.Errorf("failed to get custom metric value: %v", err) } floatTarget := float64(customMetricTarget.TargetValue.MilliValue()) / 1000.0 usageRatio := *value / floatTarget @@ -186,10 +195,11 @@ func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.H if replicaCountProposal > replicas { timestamp = currentTimestamp replicas = replicaCountProposal + metric = fmt.Sprintf("Custom metric %s", customMetricTarget.Name) } quantity, err := resource.ParseQuantity(fmt.Sprintf("%.3f", *value)) if err != nil { - return 0, "", time.Time{}, fmt.Errorf("failed to set custom metric value: %v", err) + return 0, "", "", time.Time{}, fmt.Errorf("failed to set custom metric value: %v", err) } statusList.Items = append(statusList.Items, extensions.CustomMetricCurrentStatus{ Name: customMetricTarget.Name, @@ -198,10 +208,10 @@ func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.H } byteStatusList, err := json.Marshal(statusList) if err != nil { - return 0, "", time.Time{}, fmt.Errorf("failed to serialize custom metric status: %v", err) + return 0, "", "", time.Time{}, fmt.Errorf("failed to serialize custom metric status: %v", err) } - return replicas, string(byteStatusList), timestamp, nil + return replicas, metric, string(byteStatusList), timestamp, nil } func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPodAutoscaler) error { @@ -219,17 +229,22 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod cpuTimestamp := time.Time{} cmDesiredReplicas := 0 + cmMetric := "" cmStatus := "" cmTimestamp := time.Time{} desiredReplicas := 0 + rescaleReason := "" timestamp := time.Now() if currentReplicas > hpa.Spec.MaxReplicas { + rescaleReason = "Current number of replicas above Spec.MaxReplicas" desiredReplicas = hpa.Spec.MaxReplicas } else if hpa.Spec.MinReplicas != nil && currentReplicas < *hpa.Spec.MinReplicas { + rescaleReason = "Current number of replicas below Spec.MinReplicas" desiredReplicas = *hpa.Spec.MinReplicas } else if currentReplicas == 0 { + rescaleReason = "Current number of replicas must be greater than 0" desiredReplicas = 1 } else { // All basic scenarios covered, the state should be sane, lets use metrics. @@ -245,7 +260,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod } if cmAnnotationFound { - cmDesiredReplicas, cmStatus, cmTimestamp, err = a.computeReplicasForCustomMetrics(hpa, scale, cmAnnotation) + cmDesiredReplicas, cmMetric, cmStatus, cmTimestamp, err = a.computeReplicasForCustomMetrics(hpa, scale, cmAnnotation) if err != nil { a.updateCurrentReplicasInStatus(hpa, currentReplicas) a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedComputeCMReplicas", err.Error()) @@ -253,13 +268,21 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod } } + rescaleMetric := "" if cpuDesiredReplicas > desiredReplicas { desiredReplicas = cpuDesiredReplicas timestamp = cpuTimestamp + rescaleMetric = "CPU utilization" } if cmDesiredReplicas > desiredReplicas { desiredReplicas = cmDesiredReplicas timestamp = cmTimestamp + rescaleMetric = cmMetric + } + if desiredReplicas > currentReplicas { + rescaleReason = fmt.Sprintf("%s above target", rescaleMetric) + } else if desiredReplicas < currentReplicas { + rescaleReason = "All metrics below target" } if hpa.Spec.MinReplicas != nil && desiredReplicas < *hpa.Spec.MinReplicas { @@ -281,12 +304,12 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod scale.Spec.Replicas = desiredReplicas _, err = a.scaleNamespacer.Scales(hpa.Namespace).Update(hpa.Spec.ScaleRef.Kind, scale) if err != nil { - a.eventRecorder.Eventf(hpa, api.EventTypeWarning, "FailedRescale", "New size: %d; error: %v", desiredReplicas, err.Error()) + a.eventRecorder.Eventf(hpa, api.EventTypeWarning, "FailedRescale", "New size: %d; reason: %s; error: %v", desiredReplicas, rescaleReason, err.Error()) return fmt.Errorf("failed to rescale %s: %v", reference, err) } - a.eventRecorder.Eventf(hpa, api.EventTypeNormal, "SuccessfulRescale", "New size: %d", desiredReplicas) - glog.Infof("Successfull rescale of %s, old size: %d, new size: %d", - hpa.Name, currentReplicas, desiredReplicas) + a.eventRecorder.Eventf(hpa, api.EventTypeNormal, "SuccessfulRescale", "New size: %d; reason: %s", desiredReplicas, rescaleReason) + glog.Infof("Successfull rescale of %s, old size: %d, new size: %d, reason: %s", + hpa.Name, currentReplicas, desiredReplicas, rescaleReason) } else { desiredReplicas = currentReplicas } diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index a781da65078..fd3ad20fd31 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -33,12 +33,10 @@ import ( "k8s.io/kubernetes/pkg/client/unversioned/testclient" "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" "k8s.io/kubernetes/pkg/runtime" - "k8s.io/kubernetes/pkg/util/wait" "k8s.io/kubernetes/pkg/watch" heapster "k8s.io/heapster/api/v1/types" - "github.com/golang/glog" "github.com/stretchr/testify/assert" ) @@ -74,6 +72,8 @@ type testCase struct { statusUpdated bool eventCreated bool verifyEvents bool + // Channel with names of HPA objects which we have reconciled. + processed chan string } func (tc *testCase) computeCPUCurrent() { @@ -100,6 +100,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) *fake.Clientset { tc.scaleUpdated = false tc.statusUpdated = false tc.eventCreated = false + tc.processed = make(chan string, 100) tc.computeCPUCurrent() fakeClient := &fake.Clientset{} @@ -223,6 +224,8 @@ func (tc *testCase) prepareTestClient(t *testing.T) *fake.Clientset { assert.Equal(t, tc.CPUCurrent, *obj.Status.CurrentCPUUtilizationPercentage) } tc.statusUpdated = true + // Every time we reconcile HPA object we are updating status. + tc.processed <- obj.Name return true, obj, nil }) @@ -230,7 +233,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) *fake.Clientset { obj := action.(testclient.CreateAction).GetObject().(*api.Event) if tc.verifyEvents { assert.Equal(t, "SuccessfulRescale", obj.Reason) - assert.Equal(t, fmt.Sprintf("New size: %d", tc.desiredReplicas), obj.Message) + assert.Equal(t, fmt.Sprintf("New size: %d; reason: CPU utilization above target", tc.desiredReplicas), obj.Message) } tc.eventCreated = true return true, obj, nil @@ -261,11 +264,8 @@ func (tc *testCase) runTest(t *testing.T) { // We need to wait for events to be broadcasted (sleep for longer than record.sleepDuration). time.Sleep(12 * time.Second) } - // Each iteration for an HPA object ends with updating status. - wait.Poll(1*time.Second, 30*time.Second, func() (done bool, err error) { - glog.Infof("Status value: ", tc.statusUpdated) - return tc.statusUpdated, nil - }) + // Wait for HPA to be processed. + <-tc.processed tc.verifyResults(t) }