mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-03 17:30:00 +00:00
Add events to improve understandability of HPA controller decisions.
Fixes #22174
This commit is contained in:
parent
b5c9af9762
commit
69b3c6aa39
@ -91,6 +91,11 @@ func NewHorizontalController(evtNamespacer unversionedcore.EventsGetter, scaleNa
|
|||||||
framework.ResourceEventHandlerFuncs{
|
framework.ResourceEventHandlerFuncs{
|
||||||
AddFunc: func(obj interface{}) {
|
AddFunc: func(obj interface{}) {
|
||||||
hpa := obj.(*extensions.HorizontalPodAutoscaler)
|
hpa := obj.(*extensions.HorizontalPodAutoscaler)
|
||||||
|
hasCPUPolicy := hpa.Spec.CPUUtilization != nil
|
||||||
|
_, hasCustomMetricsPolicy := hpa.Annotations[HpaCustomMetricsTargetAnnotationName]
|
||||||
|
if !hasCPUPolicy && !hasCustomMetricsPolicy {
|
||||||
|
controller.eventRecorder.Event(hpa, api.EventTypeNormal, "DefaultPolicy", "No scaling policy specified - will use default one. See documentation for details")
|
||||||
|
}
|
||||||
err := controller.reconcileAutoscaler(hpa)
|
err := controller.reconcileAutoscaler(hpa)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Warningf("Failed to reconcile %s: %v", hpa.Name, err)
|
glog.Warningf("Failed to reconcile %s: %v", hpa.Name, err)
|
||||||
@ -129,7 +134,7 @@ func (a *HorizontalController) computeReplicasForCPUUtilization(hpa *extensions.
|
|||||||
// TODO: what to do on partial errors (like metrics obtained for 75% of pods).
|
// TODO: what to do on partial errors (like metrics obtained for 75% of pods).
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetMetrics", err.Error())
|
a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetMetrics", err.Error())
|
||||||
return 0, nil, time.Time{}, fmt.Errorf("failed to get cpu utilization: %v", err)
|
return 0, nil, time.Time{}, fmt.Errorf("failed to get CPU utilization: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
usageRatio := float64(*currentUtilization) / float64(targetUtilization)
|
usageRatio := float64(*currentUtilization) / float64(targetUtilization)
|
||||||
@ -142,25 +147,29 @@ func (a *HorizontalController) computeReplicasForCPUUtilization(hpa *extensions.
|
|||||||
|
|
||||||
// Computes the desired number of replicas based on the CustomMetrics passed in cmAnnotation as json-serialized
|
// Computes the desired number of replicas based on the CustomMetrics passed in cmAnnotation as json-serialized
|
||||||
// extensions.CustomMetricsTargetList.
|
// extensions.CustomMetricsTargetList.
|
||||||
// Returns number of replicas, status string (also json-serialized extensions.CustomMetricsCurrentStatusList),
|
// Returns number of replicas, metric which required highest number of replicas,
|
||||||
|
// status string (also json-serialized extensions.CustomMetricsCurrentStatusList),
|
||||||
// last timestamp of the metrics involved in computations or error, if occurred.
|
// last timestamp of the metrics involved in computations or error, if occurred.
|
||||||
func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.HorizontalPodAutoscaler, scale *extensions.Scale,
|
func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.HorizontalPodAutoscaler, scale *extensions.Scale,
|
||||||
cmAnnotation string) (int, string, time.Time, error) {
|
cmAnnotation string) (replicas int, metric string, status string, timestamp time.Time, err error) {
|
||||||
|
|
||||||
currentReplicas := scale.Status.Replicas
|
currentReplicas := scale.Status.Replicas
|
||||||
replicas := 0
|
replicas = 0
|
||||||
timestamp := time.Time{}
|
metric = ""
|
||||||
|
status = ""
|
||||||
|
timestamp = time.Time{}
|
||||||
|
err = nil
|
||||||
|
|
||||||
if cmAnnotation == "" {
|
if cmAnnotation == "" {
|
||||||
return 0, "", time.Time{}, nil
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var targetList extensions.CustomMetricTargetList
|
var targetList extensions.CustomMetricTargetList
|
||||||
if err := json.Unmarshal([]byte(cmAnnotation), &targetList); err != nil {
|
if err := json.Unmarshal([]byte(cmAnnotation), &targetList); err != nil {
|
||||||
return 0, "", time.Time{}, fmt.Errorf("failed to parse custom metrics annotation: %v", err)
|
return 0, "", "", time.Time{}, fmt.Errorf("failed to parse custom metrics annotation: %v", err)
|
||||||
}
|
}
|
||||||
if len(targetList.Items) == 0 {
|
if len(targetList.Items) == 0 {
|
||||||
return 0, "", time.Time{}, fmt.Errorf("no custom metrics in annotation")
|
return 0, "", "", time.Time{}, fmt.Errorf("no custom metrics in annotation")
|
||||||
}
|
}
|
||||||
|
|
||||||
statusList := extensions.CustomMetricCurrentStatusList{
|
statusList := extensions.CustomMetricCurrentStatusList{
|
||||||
@ -172,7 +181,7 @@ func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.H
|
|||||||
// TODO: what to do on partial errors (like metrics obtained for 75% of pods).
|
// TODO: what to do on partial errors (like metrics obtained for 75% of pods).
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetCustomMetrics", err.Error())
|
a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedGetCustomMetrics", err.Error())
|
||||||
return 0, "", time.Time{}, fmt.Errorf("failed to get custom metric value: %v", err)
|
return 0, "", "", time.Time{}, fmt.Errorf("failed to get custom metric value: %v", err)
|
||||||
}
|
}
|
||||||
floatTarget := float64(customMetricTarget.TargetValue.MilliValue()) / 1000.0
|
floatTarget := float64(customMetricTarget.TargetValue.MilliValue()) / 1000.0
|
||||||
usageRatio := *value / floatTarget
|
usageRatio := *value / floatTarget
|
||||||
@ -186,10 +195,11 @@ func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.H
|
|||||||
if replicaCountProposal > replicas {
|
if replicaCountProposal > replicas {
|
||||||
timestamp = currentTimestamp
|
timestamp = currentTimestamp
|
||||||
replicas = replicaCountProposal
|
replicas = replicaCountProposal
|
||||||
|
metric = fmt.Sprintf("Custom metric %s", customMetricTarget.Name)
|
||||||
}
|
}
|
||||||
quantity, err := resource.ParseQuantity(fmt.Sprintf("%.3f", *value))
|
quantity, err := resource.ParseQuantity(fmt.Sprintf("%.3f", *value))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, "", time.Time{}, fmt.Errorf("failed to set custom metric value: %v", err)
|
return 0, "", "", time.Time{}, fmt.Errorf("failed to set custom metric value: %v", err)
|
||||||
}
|
}
|
||||||
statusList.Items = append(statusList.Items, extensions.CustomMetricCurrentStatus{
|
statusList.Items = append(statusList.Items, extensions.CustomMetricCurrentStatus{
|
||||||
Name: customMetricTarget.Name,
|
Name: customMetricTarget.Name,
|
||||||
@ -198,10 +208,10 @@ func (a *HorizontalController) computeReplicasForCustomMetrics(hpa *extensions.H
|
|||||||
}
|
}
|
||||||
byteStatusList, err := json.Marshal(statusList)
|
byteStatusList, err := json.Marshal(statusList)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, "", time.Time{}, fmt.Errorf("failed to serialize custom metric status: %v", err)
|
return 0, "", "", time.Time{}, fmt.Errorf("failed to serialize custom metric status: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return replicas, string(byteStatusList), timestamp, nil
|
return replicas, metric, string(byteStatusList), timestamp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPodAutoscaler) error {
|
func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPodAutoscaler) error {
|
||||||
@ -219,17 +229,22 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod
|
|||||||
cpuTimestamp := time.Time{}
|
cpuTimestamp := time.Time{}
|
||||||
|
|
||||||
cmDesiredReplicas := 0
|
cmDesiredReplicas := 0
|
||||||
|
cmMetric := ""
|
||||||
cmStatus := ""
|
cmStatus := ""
|
||||||
cmTimestamp := time.Time{}
|
cmTimestamp := time.Time{}
|
||||||
|
|
||||||
desiredReplicas := 0
|
desiredReplicas := 0
|
||||||
|
rescaleReason := ""
|
||||||
timestamp := time.Now()
|
timestamp := time.Now()
|
||||||
|
|
||||||
if currentReplicas > hpa.Spec.MaxReplicas {
|
if currentReplicas > hpa.Spec.MaxReplicas {
|
||||||
|
rescaleReason = "Current number of replicas above Spec.MaxReplicas"
|
||||||
desiredReplicas = hpa.Spec.MaxReplicas
|
desiredReplicas = hpa.Spec.MaxReplicas
|
||||||
} else if hpa.Spec.MinReplicas != nil && currentReplicas < *hpa.Spec.MinReplicas {
|
} else if hpa.Spec.MinReplicas != nil && currentReplicas < *hpa.Spec.MinReplicas {
|
||||||
|
rescaleReason = "Current number of replicas below Spec.MinReplicas"
|
||||||
desiredReplicas = *hpa.Spec.MinReplicas
|
desiredReplicas = *hpa.Spec.MinReplicas
|
||||||
} else if currentReplicas == 0 {
|
} else if currentReplicas == 0 {
|
||||||
|
rescaleReason = "Current number of replicas must be greater than 0"
|
||||||
desiredReplicas = 1
|
desiredReplicas = 1
|
||||||
} else {
|
} else {
|
||||||
// All basic scenarios covered, the state should be sane, lets use metrics.
|
// All basic scenarios covered, the state should be sane, lets use metrics.
|
||||||
@ -245,7 +260,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod
|
|||||||
}
|
}
|
||||||
|
|
||||||
if cmAnnotationFound {
|
if cmAnnotationFound {
|
||||||
cmDesiredReplicas, cmStatus, cmTimestamp, err = a.computeReplicasForCustomMetrics(hpa, scale, cmAnnotation)
|
cmDesiredReplicas, cmMetric, cmStatus, cmTimestamp, err = a.computeReplicasForCustomMetrics(hpa, scale, cmAnnotation)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.updateCurrentReplicasInStatus(hpa, currentReplicas)
|
a.updateCurrentReplicasInStatus(hpa, currentReplicas)
|
||||||
a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedComputeCMReplicas", err.Error())
|
a.eventRecorder.Event(hpa, api.EventTypeWarning, "FailedComputeCMReplicas", err.Error())
|
||||||
@ -253,13 +268,21 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rescaleMetric := ""
|
||||||
if cpuDesiredReplicas > desiredReplicas {
|
if cpuDesiredReplicas > desiredReplicas {
|
||||||
desiredReplicas = cpuDesiredReplicas
|
desiredReplicas = cpuDesiredReplicas
|
||||||
timestamp = cpuTimestamp
|
timestamp = cpuTimestamp
|
||||||
|
rescaleMetric = "CPU utilization"
|
||||||
}
|
}
|
||||||
if cmDesiredReplicas > desiredReplicas {
|
if cmDesiredReplicas > desiredReplicas {
|
||||||
desiredReplicas = cmDesiredReplicas
|
desiredReplicas = cmDesiredReplicas
|
||||||
timestamp = cmTimestamp
|
timestamp = cmTimestamp
|
||||||
|
rescaleMetric = cmMetric
|
||||||
|
}
|
||||||
|
if desiredReplicas > currentReplicas {
|
||||||
|
rescaleReason = fmt.Sprintf("%s above target", rescaleMetric)
|
||||||
|
} else if desiredReplicas < currentReplicas {
|
||||||
|
rescaleReason = "All metrics below target"
|
||||||
}
|
}
|
||||||
|
|
||||||
if hpa.Spec.MinReplicas != nil && desiredReplicas < *hpa.Spec.MinReplicas {
|
if hpa.Spec.MinReplicas != nil && desiredReplicas < *hpa.Spec.MinReplicas {
|
||||||
@ -281,12 +304,12 @@ func (a *HorizontalController) reconcileAutoscaler(hpa *extensions.HorizontalPod
|
|||||||
scale.Spec.Replicas = desiredReplicas
|
scale.Spec.Replicas = desiredReplicas
|
||||||
_, err = a.scaleNamespacer.Scales(hpa.Namespace).Update(hpa.Spec.ScaleRef.Kind, scale)
|
_, err = a.scaleNamespacer.Scales(hpa.Namespace).Update(hpa.Spec.ScaleRef.Kind, scale)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.eventRecorder.Eventf(hpa, api.EventTypeWarning, "FailedRescale", "New size: %d; error: %v", desiredReplicas, err.Error())
|
a.eventRecorder.Eventf(hpa, api.EventTypeWarning, "FailedRescale", "New size: %d; reason: %s; error: %v", desiredReplicas, rescaleReason, err.Error())
|
||||||
return fmt.Errorf("failed to rescale %s: %v", reference, err)
|
return fmt.Errorf("failed to rescale %s: %v", reference, err)
|
||||||
}
|
}
|
||||||
a.eventRecorder.Eventf(hpa, api.EventTypeNormal, "SuccessfulRescale", "New size: %d", desiredReplicas)
|
a.eventRecorder.Eventf(hpa, api.EventTypeNormal, "SuccessfulRescale", "New size: %d; reason: %s", desiredReplicas, rescaleReason)
|
||||||
glog.Infof("Successfull rescale of %s, old size: %d, new size: %d",
|
glog.Infof("Successfull rescale of %s, old size: %d, new size: %d, reason: %s",
|
||||||
hpa.Name, currentReplicas, desiredReplicas)
|
hpa.Name, currentReplicas, desiredReplicas, rescaleReason)
|
||||||
} else {
|
} else {
|
||||||
desiredReplicas = currentReplicas
|
desiredReplicas = currentReplicas
|
||||||
}
|
}
|
||||||
|
@ -33,12 +33,10 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/client/unversioned/testclient"
|
"k8s.io/kubernetes/pkg/client/unversioned/testclient"
|
||||||
"k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
|
"k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
|
||||||
"k8s.io/kubernetes/pkg/runtime"
|
"k8s.io/kubernetes/pkg/runtime"
|
||||||
"k8s.io/kubernetes/pkg/util/wait"
|
|
||||||
"k8s.io/kubernetes/pkg/watch"
|
"k8s.io/kubernetes/pkg/watch"
|
||||||
|
|
||||||
heapster "k8s.io/heapster/api/v1/types"
|
heapster "k8s.io/heapster/api/v1/types"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -74,6 +72,8 @@ type testCase struct {
|
|||||||
statusUpdated bool
|
statusUpdated bool
|
||||||
eventCreated bool
|
eventCreated bool
|
||||||
verifyEvents bool
|
verifyEvents bool
|
||||||
|
// Channel with names of HPA objects which we have reconciled.
|
||||||
|
processed chan string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tc *testCase) computeCPUCurrent() {
|
func (tc *testCase) computeCPUCurrent() {
|
||||||
@ -100,6 +100,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) *fake.Clientset {
|
|||||||
tc.scaleUpdated = false
|
tc.scaleUpdated = false
|
||||||
tc.statusUpdated = false
|
tc.statusUpdated = false
|
||||||
tc.eventCreated = false
|
tc.eventCreated = false
|
||||||
|
tc.processed = make(chan string, 100)
|
||||||
tc.computeCPUCurrent()
|
tc.computeCPUCurrent()
|
||||||
|
|
||||||
fakeClient := &fake.Clientset{}
|
fakeClient := &fake.Clientset{}
|
||||||
@ -223,6 +224,8 @@ func (tc *testCase) prepareTestClient(t *testing.T) *fake.Clientset {
|
|||||||
assert.Equal(t, tc.CPUCurrent, *obj.Status.CurrentCPUUtilizationPercentage)
|
assert.Equal(t, tc.CPUCurrent, *obj.Status.CurrentCPUUtilizationPercentage)
|
||||||
}
|
}
|
||||||
tc.statusUpdated = true
|
tc.statusUpdated = true
|
||||||
|
// Every time we reconcile HPA object we are updating status.
|
||||||
|
tc.processed <- obj.Name
|
||||||
return true, obj, nil
|
return true, obj, nil
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -230,7 +233,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) *fake.Clientset {
|
|||||||
obj := action.(testclient.CreateAction).GetObject().(*api.Event)
|
obj := action.(testclient.CreateAction).GetObject().(*api.Event)
|
||||||
if tc.verifyEvents {
|
if tc.verifyEvents {
|
||||||
assert.Equal(t, "SuccessfulRescale", obj.Reason)
|
assert.Equal(t, "SuccessfulRescale", obj.Reason)
|
||||||
assert.Equal(t, fmt.Sprintf("New size: %d", tc.desiredReplicas), obj.Message)
|
assert.Equal(t, fmt.Sprintf("New size: %d; reason: CPU utilization above target", tc.desiredReplicas), obj.Message)
|
||||||
}
|
}
|
||||||
tc.eventCreated = true
|
tc.eventCreated = true
|
||||||
return true, obj, nil
|
return true, obj, nil
|
||||||
@ -261,11 +264,8 @@ func (tc *testCase) runTest(t *testing.T) {
|
|||||||
// We need to wait for events to be broadcasted (sleep for longer than record.sleepDuration).
|
// We need to wait for events to be broadcasted (sleep for longer than record.sleepDuration).
|
||||||
time.Sleep(12 * time.Second)
|
time.Sleep(12 * time.Second)
|
||||||
}
|
}
|
||||||
// Each iteration for an HPA object ends with updating status.
|
// Wait for HPA to be processed.
|
||||||
wait.Poll(1*time.Second, 30*time.Second, func() (done bool, err error) {
|
<-tc.processed
|
||||||
glog.Infof("Status value: ", tc.statusUpdated)
|
|
||||||
return tc.statusUpdated, nil
|
|
||||||
})
|
|
||||||
tc.verifyResults(t)
|
tc.verifyResults(t)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user