From 6adc18ed6707ece61fea5ef211d5331f63a47fba Mon Sep 17 00:00:00 2001
From: Jerzy Szczepkowski <jsz@google.com>
Date: Fri, 4 Nov 2016 15:38:06 +0100
Subject: [PATCH] Improved event generation for HPA.

Improved event generation for HPA: added grace-period before warning event is generated. Resolves #29799.
---
 pkg/controller/podautoscaler/horizontal.go | 47 +++++++++++++++++++---
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/pkg/controller/podautoscaler/horizontal.go b/pkg/controller/podautoscaler/horizontal.go
index 30ab629e482..5bec9a902ec 100644
--- a/pkg/controller/podautoscaler/horizontal.go
+++ b/pkg/controller/podautoscaler/horizontal.go
@@ -136,6 +136,15 @@ func (a *HorizontalController) Run(stopCh <-chan struct{}) {
 	glog.Infof("Shutting down HPA Controller")
 }
 
+// getLastScaleTime returns the hpa's last scale time or the hpa's creation time if the last scale time is nil.
+func getLastScaleTime(hpa *autoscaling.HorizontalPodAutoscaler) time.Time {
+	lastScaleTime := hpa.Status.LastScaleTime
+	if lastScaleTime == nil {
+		lastScaleTime = &hpa.CreationTimestamp
+	}
+	return lastScaleTime.Time
+}
+
 func (a *HorizontalController) computeReplicasForCPUUtilization(hpa *autoscaling.HorizontalPodAutoscaler, scale *extensions.Scale) (int32, *int32, time.Time, error) {
 	targetUtilization := int32(defaultTargetCPUUtilizationPercentage)
 	if hpa.Spec.TargetCPUUtilizationPercentage != nil {
@@ -159,7 +168,13 @@ func (a *HorizontalController) computeReplicasForCPUUtilization(hpa *autoscaling
 
 	// TODO: what to do on partial errors (like metrics obtained for 75% of pods).
 	if err != nil {
-		a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetMetrics", err.Error())
+		lastScaleTime := getLastScaleTime(hpa)
+		if time.Now().After(lastScaleTime.Add(upscaleForbiddenWindow)) {
+			a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetMetrics", err.Error())
+		} else {
+			a.eventRecorder.Event(hpa, api.EventTypeNormal, "MetricsNotAvailableYet", err.Error())
+		}
+
 		return 0, nil, time.Time{}, fmt.Errorf("failed to get CPU utilization: %v", err)
 	}
 
@@ -179,8 +194,8 @@ func (a *HorizontalController) computeReplicasForCPUUtilization(hpa *autoscaling
 	return int32(desiredReplicas), &utilization, timestamp, nil
 }
 
-// Computes the desired number of replicas based on the CustomMetrics passed in cmAnnotation as json-serialized
-// extensions.CustomMetricsTargetList.
+// computeReplicasForCustomMetrics computes the desired number of replicas based on the CustomMetrics passed in cmAnnotation
+// as json-serialized extensions.CustomMetricsTargetList.
 // Returns number of replicas, metric which required highest number of replicas,
 // status string (also json-serialized extensions.CustomMetricsCurrentStatusList),
 // last timestamp of the metrics involved in computations or error, if occurred.
@@ -221,7 +236,13 @@ func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *autoscaling.
 		value, currentTimestamp, err := a.metricsClient.GetCustomMetric(customMetricTarget.Name, hpa.Namespace, selector)
 		// TODO: what to do on partial errors (like metrics obtained for 75% of pods).
 		if err != nil {
-			a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetCustomMetrics", err.Error())
+			lastScaleTime := getLastScaleTime(hpa)
+			if time.Now().After(lastScaleTime.Add(upscaleForbiddenWindow)) {
+				a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetCustomMetrics", err.Error())
+			} else {
+				a.eventRecorder.Event(hpa, api.EventTypeNormal, "CustomMetricsNotAvailableYet", err.Error())
+			}
+
 			return 0, "", "", time.Time{}, fmt.Errorf("failed to get custom metric value: %v", err)
 		}
 		floatTarget := float64(customMetricTarget.TargetValue.MilliValue()) / 1000.0
@@ -298,7 +319,14 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *autoscaling.HorizontalPo
 			cpuDesiredReplicas, cpuCurrentUtilization, cpuTimestamp, err = a.computeReplicasForCPUUtilization(hpa, scale)
 			if err != nil {
 				a.updateCurrentReplicasInStatus(hpa, currentReplicas)
-				a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedComputeReplicas", err.Error())
+
+				lastScaleTime := getLastScaleTime(hpa)
+				if time.Now().After(lastScaleTime.Add(upscaleForbiddenWindow)) {
+					a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedComputeReplicas", err.Error())
+				} else {
+					a.eventRecorder.Event(hpa, api.EventTypeNormal, "FailedComputeReplicas", err.Error())
+				}
+
 				return fmt.Errorf("failed to compute desired number of replicas based on CPU utilization for %s: %v", reference, err)
 			}
 		}
@@ -307,7 +335,14 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *autoscaling.HorizontalPo
 			cmDesiredReplicas, cmMetric, cmStatus, cmTimestamp, err = a.computeReplicasForCustomMetrics(hpa, scale, cmAnnotation)
 			if err != nil {
 				a.updateCurrentReplicasInStatus(hpa, currentReplicas)
-				a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedComputeCMReplicas", err.Error())
+
+				lastScaleTime := getLastScaleTime(hpa)
+				if time.Now().After(lastScaleTime.Add(upscaleForbiddenWindow)) {
+					a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedComputeCMReplicas", err.Error())
+				} else {
+					a.eventRecorder.Event(hpa, api.EventTypeNormal, "FailedComputeCMReplicas", err.Error())
+				}
+
 				return fmt.Errorf("failed to compute desired number of replicas based on Custom Metrics for %s: %v", reference, err)
 			}
 		}