diff --git a/pkg/controller/podautoscaler/horizontal.go b/pkg/controller/podautoscaler/horizontal.go index 57bea5e1372..67f78b4b1a8 100644 --- a/pkg/controller/podautoscaler/horizontal.go +++ b/pkg/controller/podautoscaler/horizontal.go @@ -37,6 +37,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" + utilfeature "k8s.io/apiserver/pkg/util/feature" autoscalinginformers "k8s.io/client-go/informers/autoscaling/v2" coreinformers "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes/scheme" @@ -53,6 +54,7 @@ import ( metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" "k8s.io/kubernetes/pkg/controller/podautoscaler/monitor" "k8s.io/kubernetes/pkg/controller/util/selectors" + "k8s.io/kubernetes/pkg/features" ) var ( @@ -86,6 +88,7 @@ type HorizontalController struct { hpaNamespacer autoscalingclient.HorizontalPodAutoscalersGetter mapper apimeta.RESTMapper + tolerance float64 replicaCalc *ReplicaCalculator eventRecorder record.EventRecorder @@ -146,6 +149,7 @@ func NewHorizontalController( eventRecorder: recorder, scaleNamespacer: scaleNamespacer, hpaNamespacer: hpaNamespacer, + tolerance: tolerance, downscaleStabilisationWindow: downscaleStabilisationWindow, monitor: monitor.New(), queue: workqueue.NewTypedRateLimitingQueueWithConfig( @@ -181,7 +185,6 @@ func NewHorizontalController( replicaCalc := NewReplicaCalculator( metricsClient, hpaController.podLister, - tolerance, cpuInitializationPeriod, delayOfInitialReadinessStatus, ) @@ -539,8 +542,9 @@ func (a *HorizontalController) computeStatusForObjectMetric(specReplicas, status }, }, } + tolerances := a.tolerancesForHpa(hpa) if metricSpec.Object.Target.Type == autoscalingv2.ValueMetricType && metricSpec.Object.Target.Value != nil { - replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetObjectMetricReplicas(specReplicas, metricSpec.Object.Target.Value.MilliValue(), metricSpec.Object.Metric.Name, hpa.Namespace, &metricSpec.Object.DescribedObject, selector, metricSelector) + replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetObjectMetricReplicas(specReplicas, metricSpec.Object.Target.Value.MilliValue(), metricSpec.Object.Metric.Name, tolerances, hpa.Namespace, &metricSpec.Object.DescribedObject, selector, metricSelector) if err != nil { condition := a.getUnableComputeReplicaCountCondition(hpa, "FailedGetObjectMetric", err) return 0, timestampProposal, "", condition, err @@ -549,7 +553,7 @@ func (a *HorizontalController) computeStatusForObjectMetric(specReplicas, status *status = metricStatus return replicaCountProposal, timestampProposal, fmt.Sprintf("%s metric %s", metricSpec.Object.DescribedObject.Kind, metricSpec.Object.Metric.Name), autoscalingv2.HorizontalPodAutoscalerCondition{}, nil } else if metricSpec.Object.Target.Type == autoscalingv2.AverageValueMetricType && metricSpec.Object.Target.AverageValue != nil { - replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetObjectPerPodMetricReplicas(statusReplicas, metricSpec.Object.Target.AverageValue.MilliValue(), metricSpec.Object.Metric.Name, hpa.Namespace, &metricSpec.Object.DescribedObject, metricSelector) + replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetObjectPerPodMetricReplicas(statusReplicas, metricSpec.Object.Target.AverageValue.MilliValue(), metricSpec.Object.Metric.Name, tolerances, hpa.Namespace, &metricSpec.Object.DescribedObject, metricSelector) if err != nil { condition := a.getUnableComputeReplicaCountCondition(hpa, "FailedGetObjectMetric", err) return 0, time.Time{}, "", condition, fmt.Errorf("failed to get %s object metric: %v", metricSpec.Object.Metric.Name, err) @@ -566,7 +570,8 @@ func (a *HorizontalController) computeStatusForObjectMetric(specReplicas, status // computeStatusForPodsMetric computes the desired number of replicas for the specified metric of type PodsMetricSourceType. func (a *HorizontalController) computeStatusForPodsMetric(currentReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler, selector labels.Selector, status *autoscalingv2.MetricStatus, metricSelector labels.Selector) (replicaCountProposal int32, timestampProposal time.Time, metricNameProposal string, condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) { - replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetMetricReplicas(currentReplicas, metricSpec.Pods.Target.AverageValue.MilliValue(), metricSpec.Pods.Metric.Name, hpa.Namespace, selector, metricSelector) + tolerances := a.tolerancesForHpa(hpa) + replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetMetricReplicas(currentReplicas, metricSpec.Pods.Target.AverageValue.MilliValue(), metricSpec.Pods.Metric.Name, tolerances, hpa.Namespace, selector, metricSelector) if err != nil { condition = a.getUnableComputeReplicaCountCondition(hpa, "FailedGetPodsMetric", err) return 0, timestampProposal, "", condition, err @@ -588,12 +593,14 @@ func (a *HorizontalController) computeStatusForPodsMetric(currentReplicas int32, } func (a *HorizontalController) computeStatusForResourceMetricGeneric(ctx context.Context, currentReplicas int32, target autoscalingv2.MetricTarget, - resourceName v1.ResourceName, namespace string, container string, selector labels.Selector, sourceType autoscalingv2.MetricSourceType) (replicaCountProposal int32, + resourceName v1.ResourceName, hpa *autoscalingv2.HorizontalPodAutoscaler, container string, selector labels.Selector, sourceType autoscalingv2.MetricSourceType) (replicaCountProposal int32, metricStatus *autoscalingv2.MetricValueStatus, timestampProposal time.Time, metricNameProposal string, condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) { + namespace := hpa.Namespace + tolerances := a.tolerancesForHpa(hpa) if target.AverageValue != nil { var rawProposal int64 - replicaCountProposal, rawProposal, timestampProposal, err := a.replicaCalc.GetRawResourceReplicas(ctx, currentReplicas, target.AverageValue.MilliValue(), resourceName, namespace, selector, container) + replicaCountProposal, rawProposal, timestampProposal, err := a.replicaCalc.GetRawResourceReplicas(ctx, currentReplicas, target.AverageValue.MilliValue(), resourceName, tolerances, namespace, selector, container) if err != nil { return 0, nil, time.Time{}, "", condition, fmt.Errorf("failed to get %s usage: %v", resourceName, err) } @@ -610,7 +617,7 @@ func (a *HorizontalController) computeStatusForResourceMetricGeneric(ctx context } targetUtilization := *target.AverageUtilization - replicaCountProposal, percentageProposal, rawProposal, timestampProposal, err := a.replicaCalc.GetResourceReplicas(ctx, currentReplicas, targetUtilization, resourceName, namespace, selector, container) + replicaCountProposal, percentageProposal, rawProposal, timestampProposal, err := a.replicaCalc.GetResourceReplicas(ctx, currentReplicas, targetUtilization, resourceName, tolerances, namespace, selector, container) if err != nil { return 0, nil, time.Time{}, "", condition, fmt.Errorf("failed to get %s utilization: %v", resourceName, err) } @@ -630,7 +637,7 @@ func (a *HorizontalController) computeStatusForResourceMetricGeneric(ctx context func (a *HorizontalController) computeStatusForResourceMetric(ctx context.Context, currentReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler, selector labels.Selector, status *autoscalingv2.MetricStatus) (replicaCountProposal int32, timestampProposal time.Time, metricNameProposal string, condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) { - replicaCountProposal, metricValueStatus, timestampProposal, metricNameProposal, condition, err := a.computeStatusForResourceMetricGeneric(ctx, currentReplicas, metricSpec.Resource.Target, metricSpec.Resource.Name, hpa.Namespace, "", selector, autoscalingv2.ResourceMetricSourceType) + replicaCountProposal, metricValueStatus, timestampProposal, metricNameProposal, condition, err := a.computeStatusForResourceMetricGeneric(ctx, currentReplicas, metricSpec.Resource.Target, metricSpec.Resource.Name, hpa, "", selector, autoscalingv2.ResourceMetricSourceType) if err != nil { condition = a.getUnableComputeReplicaCountCondition(hpa, "FailedGetResourceMetric", err) return replicaCountProposal, timestampProposal, metricNameProposal, condition, err @@ -649,7 +656,7 @@ func (a *HorizontalController) computeStatusForResourceMetric(ctx context.Contex func (a *HorizontalController) computeStatusForContainerResourceMetric(ctx context.Context, currentReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler, selector labels.Selector, status *autoscalingv2.MetricStatus) (replicaCountProposal int32, timestampProposal time.Time, metricNameProposal string, condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) { - replicaCountProposal, metricValueStatus, timestampProposal, metricNameProposal, condition, err := a.computeStatusForResourceMetricGeneric(ctx, currentReplicas, metricSpec.ContainerResource.Target, metricSpec.ContainerResource.Name, hpa.Namespace, metricSpec.ContainerResource.Container, selector, autoscalingv2.ContainerResourceMetricSourceType) + replicaCountProposal, metricValueStatus, timestampProposal, metricNameProposal, condition, err := a.computeStatusForResourceMetricGeneric(ctx, currentReplicas, metricSpec.ContainerResource.Target, metricSpec.ContainerResource.Name, hpa, metricSpec.ContainerResource.Container, selector, autoscalingv2.ContainerResourceMetricSourceType) if err != nil { condition = a.getUnableComputeReplicaCountCondition(hpa, "FailedGetContainerResourceMetric", err) return replicaCountProposal, timestampProposal, metricNameProposal, condition, err @@ -667,8 +674,9 @@ func (a *HorizontalController) computeStatusForContainerResourceMetric(ctx conte // computeStatusForExternalMetric computes the desired number of replicas for the specified metric of type ExternalMetricSourceType. func (a *HorizontalController) computeStatusForExternalMetric(specReplicas, statusReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler, selector labels.Selector, status *autoscalingv2.MetricStatus) (replicaCountProposal int32, timestampProposal time.Time, metricNameProposal string, condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) { + tolerances := a.tolerancesForHpa(hpa) if metricSpec.External.Target.AverageValue != nil { - replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetExternalPerPodMetricReplicas(statusReplicas, metricSpec.External.Target.AverageValue.MilliValue(), metricSpec.External.Metric.Name, hpa.Namespace, metricSpec.External.Metric.Selector) + replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetExternalPerPodMetricReplicas(statusReplicas, metricSpec.External.Target.AverageValue.MilliValue(), metricSpec.External.Metric.Name, tolerances, hpa.Namespace, metricSpec.External.Metric.Selector) if err != nil { condition = a.getUnableComputeReplicaCountCondition(hpa, "FailedGetExternalMetric", err) return 0, time.Time{}, "", condition, fmt.Errorf("failed to get %s external metric: %v", metricSpec.External.Metric.Name, err) @@ -688,7 +696,7 @@ func (a *HorizontalController) computeStatusForExternalMetric(specReplicas, stat return replicaCountProposal, timestampProposal, fmt.Sprintf("external metric %s(%+v)", metricSpec.External.Metric.Name, metricSpec.External.Metric.Selector), autoscalingv2.HorizontalPodAutoscalerCondition{}, nil } if metricSpec.External.Target.Value != nil { - replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetExternalMetricReplicas(specReplicas, metricSpec.External.Target.Value.MilliValue(), metricSpec.External.Metric.Name, hpa.Namespace, metricSpec.External.Metric.Selector, selector) + replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetExternalMetricReplicas(specReplicas, metricSpec.External.Target.Value.MilliValue(), metricSpec.External.Metric.Name, tolerances, hpa.Namespace, metricSpec.External.Metric.Selector, selector) if err != nil { condition = a.getUnableComputeReplicaCountCondition(hpa, "FailedGetExternalMetric", err) return 0, time.Time{}, "", condition, fmt.Errorf("failed to get external metric %s: %v", metricSpec.External.Metric.Name, err) @@ -835,6 +843,7 @@ func (a *HorizontalController) reconcileAutoscaler(ctx context.Context, hpaShare logger.V(4).Info("Proposing desired replicas", "desiredReplicas", metricDesiredReplicas, "metric", metricName, + "tolerances", a.tolerancesForHpa(hpa), "timestamp", metricTimestamp, "scaleTarget", reference) @@ -1384,6 +1393,25 @@ func (a *HorizontalController) updateStatus(ctx context.Context, hpa *autoscalin return nil } +// tolerancesForHpa returns the metrics usage ratio tolerances for a given HPA. +// It ignores configurable tolerances set in the HPA spec.behavior field if the +// HPAConfigurableTolerance feature gate is disabled. +func (a *HorizontalController) tolerancesForHpa(hpa *autoscalingv2.HorizontalPodAutoscaler) Tolerances { + t := Tolerances{a.tolerance, a.tolerance} + behavior := hpa.Spec.Behavior + allowConfigurableTolerances := utilfeature.DefaultFeatureGate.Enabled(features.HPAConfigurableTolerance) + if behavior == nil || !allowConfigurableTolerances { + return t + } + if behavior.ScaleDown != nil && behavior.ScaleDown.Tolerance != nil { + t.scaleDown = behavior.ScaleDown.Tolerance.AsApproximateFloat64() + } + if behavior.ScaleUp != nil && behavior.ScaleUp.Tolerance != nil { + t.scaleUp = behavior.ScaleUp.Tolerance.AsApproximateFloat64() + } + return t +} + // setCondition sets the specific condition type on the given HPA to the specified value with the given reason // and message. The message and args are treated like a format string. The condition will be added if it is // not present. diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index 250e45d7682..0492bf47e77 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -37,16 +37,19 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/watch" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes/fake" scalefake "k8s.io/client-go/scale/fake" core "k8s.io/client-go/testing" + featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/kubernetes/pkg/api/legacyscheme" autoscalingapiv2 "k8s.io/kubernetes/pkg/apis/autoscaling/v2" "k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" "k8s.io/kubernetes/pkg/controller/podautoscaler/monitor" "k8s.io/kubernetes/pkg/controller/util/selectors" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/test/utils/ktesting" cmapi "k8s.io/metrics/pkg/apis/custom_metrics/v1beta2" emapi "k8s.io/metrics/pkg/apis/external_metrics/v1beta1" @@ -2216,6 +2219,107 @@ func TestTolerance(t *testing.T) { tc.runTest(t) } +func TestConfigurableTolerance(t *testing.T) { + onePercentQuantity := resource.MustParse("0.01") + ninetyPercentQuantity := resource.MustParse("0.9") + + testCases := []struct { + name string + configurableToleranceGate bool + replicas int32 + scaleUpRules *autoscalingv2.HPAScalingRules + scaleDownRules *autoscalingv2.HPAScalingRules + reportedLevels []uint64 + reportedCPURequests []resource.Quantity + expectedDesiredReplicas int32 + expectedConditionReason string + expectedActionLabel monitor.ActionLabel + }{ + { + name: "Scaling up because of a 1% configurable tolerance", + configurableToleranceGate: true, + replicas: 3, + scaleUpRules: &autoscalingv2.HPAScalingRules{ + Tolerance: &onePercentQuantity, + }, + reportedLevels: []uint64{1010, 1030, 1020}, + reportedCPURequests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")}, + expectedDesiredReplicas: 4, + expectedConditionReason: "SucceededRescale", + expectedActionLabel: monitor.ActionLabelScaleUp, + }, + { + name: "No scale-down because of a 90% configurable tolerance", + configurableToleranceGate: true, + replicas: 3, + scaleDownRules: &autoscalingv2.HPAScalingRules{ + Tolerance: &ninetyPercentQuantity, + }, + reportedLevels: []uint64{300, 300, 300}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + expectedDesiredReplicas: 3, + expectedConditionReason: "ReadyForNewScale", + expectedActionLabel: monitor.ActionLabelNone, + }, + { + name: "No scaling because of the large default tolerance", + configurableToleranceGate: true, + replicas: 3, + reportedLevels: []uint64{1010, 1030, 1020}, + reportedCPURequests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")}, + expectedDesiredReplicas: 3, + expectedConditionReason: "ReadyForNewScale", + expectedActionLabel: monitor.ActionLabelNone, + }, + { + name: "No scaling because the configurable tolerance is ignored as the feature gate is disabled", + configurableToleranceGate: false, + replicas: 3, + scaleUpRules: &autoscalingv2.HPAScalingRules{ + Tolerance: &onePercentQuantity, + }, + reportedLevels: []uint64{1010, 1030, 1020}, + reportedCPURequests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")}, + expectedDesiredReplicas: 3, + expectedConditionReason: "ReadyForNewScale", + expectedActionLabel: monitor.ActionLabelNone, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.HPAConfigurableTolerance, tc.configurableToleranceGate) + tc := testCase{ + minReplicas: 1, + maxReplicas: 5, + specReplicas: tc.replicas, + statusReplicas: tc.replicas, + scaleDownRules: tc.scaleDownRules, + scaleUpRules: tc.scaleUpRules, + expectedDesiredReplicas: tc.expectedDesiredReplicas, + CPUTarget: 100, + reportedLevels: tc.reportedLevels, + reportedCPURequests: tc.reportedCPURequests, + useMetricsAPI: true, + expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{ + Type: autoscalingv2.AbleToScale, + Status: v1.ConditionTrue, + Reason: tc.expectedConditionReason, + }), + expectedReportedReconciliationActionLabel: tc.expectedActionLabel, + expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone, + expectedReportedMetricComputationActionLabels: map[autoscalingv2.MetricSourceType]monitor.ActionLabel{ + autoscalingv2.ResourceMetricSourceType: tc.expectedActionLabel, + }, + expectedReportedMetricComputationErrorLabels: map[autoscalingv2.MetricSourceType]monitor.ErrorLabel{ + autoscalingv2.ResourceMetricSourceType: monitor.ErrorLabelNone, + }, + } + tc.runTest(t) + }) + } +} + func TestToleranceCM(t *testing.T) { averageValue := resource.MustParse("20.0") tc := testCase{ diff --git a/pkg/controller/podautoscaler/replica_calculator.go b/pkg/controller/podautoscaler/replica_calculator.go index f4f89b544a6..97bf1ead639 100644 --- a/pkg/controller/podautoscaler/replica_calculator.go +++ b/pkg/controller/podautoscaler/replica_calculator.go @@ -40,21 +40,33 @@ const ( defaultTestingDelayOfInitialReadinessStatus = 10 * time.Second ) +// Tolerances contains metric usage ratio scale-up and scale-down tolerances. +type Tolerances struct { + scaleDown float64 + scaleUp float64 +} + +func (t Tolerances) String() string { + return fmt.Sprintf("[down:%.1f%%, up:%.1f%%]", t.scaleDown*100., t.scaleUp*100.) +} + +func (t Tolerances) isWithin(usageRatio float64) bool { + return (1.0-t.scaleDown) <= usageRatio && usageRatio <= (1.0+t.scaleUp) +} + // ReplicaCalculator bundles all needed information to calculate the target amount of replicas type ReplicaCalculator struct { metricsClient metricsclient.MetricsClient podLister corelisters.PodLister - tolerance float64 cpuInitializationPeriod time.Duration delayOfInitialReadinessStatus time.Duration } // NewReplicaCalculator creates a new ReplicaCalculator and passes all necessary information to the new instance -func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podLister corelisters.PodLister, tolerance float64, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator { +func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podLister corelisters.PodLister, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator { return &ReplicaCalculator{ metricsClient: metricsClient, podLister: podLister, - tolerance: tolerance, cpuInitializationPeriod: cpuInitializationPeriod, delayOfInitialReadinessStatus: delayOfInitialReadinessStatus, } @@ -62,7 +74,7 @@ func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podLister c // GetResourceReplicas calculates the desired replica count based on a target resource utilization percentage // of the given resource for pods matching the given selector in the given namespace, and the current replica count -func (c *ReplicaCalculator) GetResourceReplicas(ctx context.Context, currentReplicas int32, targetUtilization int32, resource v1.ResourceName, namespace string, selector labels.Selector, container string) (replicaCount int32, utilization int32, rawUtilization int64, timestamp time.Time, err error) { +func (c *ReplicaCalculator) GetResourceReplicas(ctx context.Context, currentReplicas int32, targetUtilization int32, resource v1.ResourceName, tolerances Tolerances, namespace string, selector labels.Selector, container string) (replicaCount int32, utilization int32, rawUtilization int64, timestamp time.Time, err error) { metrics, timestamp, err := c.metricsClient.GetResourceMetric(ctx, resource, namespace, selector, container) if err != nil { return 0, 0, 0, time.Time{}, fmt.Errorf("unable to get metrics for resource %s: %v", resource, err) @@ -94,7 +106,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(ctx context.Context, currentRepl scaleUpWithUnready := len(unreadyPods) > 0 && usageRatio > 1.0 if !scaleUpWithUnready && len(missingPods) == 0 { - if math.Abs(1.0-usageRatio) <= c.tolerance { + if tolerances.isWithin(usageRatio) { // return the current replicas if the change would be too small return currentReplicas, utilization, rawUtilization, timestamp, nil } @@ -132,7 +144,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(ctx context.Context, currentRepl return 0, utilization, rawUtilization, time.Time{}, err } - if math.Abs(1.0-newUsageRatio) <= c.tolerance || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) { + if tolerances.isWithin(newUsageRatio) || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) { // return the current replicas if the change would be too small, // or if the new usage ratio would cause a change in scale direction return currentReplicas, utilization, rawUtilization, timestamp, nil @@ -151,31 +163,31 @@ func (c *ReplicaCalculator) GetResourceReplicas(ctx context.Context, currentRepl // GetRawResourceReplicas calculates the desired replica count based on a target resource usage (as a raw milli-value) // for pods matching the given selector in the given namespace, and the current replica count -func (c *ReplicaCalculator) GetRawResourceReplicas(ctx context.Context, currentReplicas int32, targetUsage int64, resource v1.ResourceName, namespace string, selector labels.Selector, container string) (replicaCount int32, usage int64, timestamp time.Time, err error) { +func (c *ReplicaCalculator) GetRawResourceReplicas(ctx context.Context, currentReplicas int32, targetUsage int64, resource v1.ResourceName, tolerances Tolerances, namespace string, selector labels.Selector, container string) (replicaCount int32, usage int64, timestamp time.Time, err error) { metrics, timestamp, err := c.metricsClient.GetResourceMetric(ctx, resource, namespace, selector, container) if err != nil { return 0, 0, time.Time{}, fmt.Errorf("unable to get metrics for resource %s: %v", resource, err) } - replicaCount, usage, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUsage, namespace, selector, resource) + replicaCount, usage, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUsage, tolerances, namespace, selector, resource) return replicaCount, usage, timestamp, err } // GetMetricReplicas calculates the desired replica count based on a target metric usage // (as a milli-value) for pods matching the given selector in the given namespace, and the // current replica count -func (c *ReplicaCalculator) GetMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, namespace string, selector labels.Selector, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) { +func (c *ReplicaCalculator) GetMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, tolerances Tolerances, namespace string, selector labels.Selector, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) { metrics, timestamp, err := c.metricsClient.GetRawMetric(metricName, namespace, selector, metricSelector) if err != nil { return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v", metricName, err) } - replicaCount, usage, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUsage, namespace, selector, v1.ResourceName("")) + replicaCount, usage, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUsage, tolerances, namespace, selector, v1.ResourceName("")) return replicaCount, usage, timestamp, err } // calcPlainMetricReplicas calculates the desired replicas for plain (i.e. non-utilization percentage) metrics. -func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUsage int64, namespace string, selector labels.Selector, resource v1.ResourceName) (replicaCount int32, usage int64, err error) { +func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUsage int64, tolerances Tolerances, namespace string, selector labels.Selector, resource v1.ResourceName) (replicaCount int32, usage int64, err error) { podList, err := c.podLister.Pods(namespace).List(selector) if err != nil { @@ -199,7 +211,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet scaleUpWithUnready := len(unreadyPods) > 0 && usageRatio > 1.0 if !scaleUpWithUnready && len(missingPods) == 0 { - if math.Abs(1.0-usageRatio) <= c.tolerance { + if tolerances.isWithin(usageRatio) { // return the current replicas if the change would be too small return currentReplicas, usage, nil } @@ -232,7 +244,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet // re-run the usage calculation with our new numbers newUsageRatio, _ := metricsclient.GetMetricUsageRatio(metrics, targetUsage) - if math.Abs(1.0-newUsageRatio) <= c.tolerance || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) { + if tolerances.isWithin(newUsageRatio) || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) { // return the current replicas if the change would be too small, // or if the new usage ratio would cause a change in scale direction return currentReplicas, usage, nil @@ -251,22 +263,22 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet // GetObjectMetricReplicas calculates the desired replica count based on a target metric usage (as a milli-value) // for the given object in the given namespace, and the current replica count. -func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, namespace string, objectRef *autoscaling.CrossVersionObjectReference, selector labels.Selector, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) { +func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, tolerances Tolerances, namespace string, objectRef *autoscaling.CrossVersionObjectReference, selector labels.Selector, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) { usage, _, err = c.metricsClient.GetObjectMetric(metricName, namespace, objectRef, metricSelector) if err != nil { return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v on %s %s/%s", metricName, objectRef.Kind, namespace, objectRef.Name, err) } usageRatio := float64(usage) / float64(targetUsage) - replicaCount, timestamp, err = c.getUsageRatioReplicaCount(currentReplicas, usageRatio, namespace, selector) + replicaCount, timestamp, err = c.getUsageRatioReplicaCount(currentReplicas, usageRatio, tolerances, namespace, selector) return replicaCount, usage, timestamp, err } // getUsageRatioReplicaCount calculates the desired replica count based on usageRatio and ready pods count. // For currentReplicas=0 doesn't take into account ready pods count and tolerance to support scaling to zero pods. -func (c *ReplicaCalculator) getUsageRatioReplicaCount(currentReplicas int32, usageRatio float64, namespace string, selector labels.Selector) (replicaCount int32, timestamp time.Time, err error) { +func (c *ReplicaCalculator) getUsageRatioReplicaCount(currentReplicas int32, usageRatio float64, tolerances Tolerances, namespace string, selector labels.Selector) (replicaCount int32, timestamp time.Time, err error) { if currentReplicas != 0 { - if math.Abs(1.0-usageRatio) <= c.tolerance { + if tolerances.isWithin(usageRatio) { // return the current replicas if the change would be too small return currentReplicas, timestamp, nil } @@ -286,7 +298,7 @@ func (c *ReplicaCalculator) getUsageRatioReplicaCount(currentReplicas int32, usa // GetObjectPerPodMetricReplicas calculates the desired replica count based on a target metric usage (as a milli-value) // for the given object in the given namespace, and the current replica count. -func (c *ReplicaCalculator) GetObjectPerPodMetricReplicas(statusReplicas int32, targetAverageUsage int64, metricName string, namespace string, objectRef *autoscaling.CrossVersionObjectReference, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) { +func (c *ReplicaCalculator) GetObjectPerPodMetricReplicas(statusReplicas int32, targetAverageUsage int64, metricName string, tolerances Tolerances, namespace string, objectRef *autoscaling.CrossVersionObjectReference, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) { usage, timestamp, err = c.metricsClient.GetObjectMetric(metricName, namespace, objectRef, metricSelector) if err != nil { return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v on %s %s/%s", metricName, objectRef.Kind, namespace, objectRef.Name, err) @@ -294,7 +306,7 @@ func (c *ReplicaCalculator) GetObjectPerPodMetricReplicas(statusReplicas int32, replicaCount = statusReplicas usageRatio := float64(usage) / (float64(targetAverageUsage) * float64(replicaCount)) - if math.Abs(1.0-usageRatio) > c.tolerance { + if !tolerances.isWithin(usageRatio) { // update number of replicas if change is large enough replicaCount = int32(math.Ceil(float64(usage) / float64(targetAverageUsage))) } @@ -329,7 +341,7 @@ func (c *ReplicaCalculator) getReadyPodsCount(namespace string, selector labels. // GetExternalMetricReplicas calculates the desired replica count based on a // target metric value (as a milli-value) for the external metric in the given // namespace, and the current replica count. -func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, targetUsage int64, metricName, namespace string, metricSelector *metav1.LabelSelector, podSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) { +func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, tolerances Tolerances, namespace string, metricSelector *metav1.LabelSelector, podSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) { metricLabelSelector, err := metav1.LabelSelectorAsSelector(metricSelector) if err != nil { return 0, 0, time.Time{}, err @@ -344,14 +356,14 @@ func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, tar } usageRatio := float64(usage) / float64(targetUsage) - replicaCount, timestamp, err = c.getUsageRatioReplicaCount(currentReplicas, usageRatio, namespace, podSelector) + replicaCount, timestamp, err = c.getUsageRatioReplicaCount(currentReplicas, usageRatio, tolerances, namespace, podSelector) return replicaCount, usage, timestamp, err } // GetExternalPerPodMetricReplicas calculates the desired replica count based on a // target metric value per pod (as a milli-value) for the external metric in the // given namespace, and the current replica count. -func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(statusReplicas int32, targetUsagePerPod int64, metricName, namespace string, metricSelector *metav1.LabelSelector) (replicaCount int32, usage int64, timestamp time.Time, err error) { +func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(statusReplicas int32, targetUsagePerPod int64, metricName string, tolerances Tolerances, namespace string, metricSelector *metav1.LabelSelector) (replicaCount int32, usage int64, timestamp time.Time, err error) { metricLabelSelector, err := metav1.LabelSelectorAsSelector(metricSelector) if err != nil { return 0, 0, time.Time{}, err @@ -367,7 +379,7 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(statusReplicas int32 replicaCount = statusReplicas usageRatio := float64(usage) / (float64(targetUsagePerPod) * float64(replicaCount)) - if math.Abs(1.0-usageRatio) > c.tolerance { + if !tolerances.isWithin(usageRatio) { // update number of replicas if the change is large enough replicaCount = int32(math.Ceil(float64(usage) / float64(targetUsagePerPod))) } diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index 2ca484d96e7..a91ffa4a09f 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -90,9 +90,10 @@ type replicaCalcTestCase struct { timestamp time.Time - resource *resourceInfo - metric *metricInfo - container string + tolerances *Tolerances + resource *resourceInfo + metric *metricInfo + container string podReadiness []v1.ConditionStatus podStartTime []metav1.Time @@ -343,7 +344,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) { informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc()) informer := informerFactory.Core().V1().Pods() - replicaCalc := NewReplicaCalculator(metricsClient, informer.Lister(), defaultTestingTolerance, defaultTestingCPUInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus) + replicaCalc := NewReplicaCalculator(metricsClient, informer.Lister(), defaultTestingCPUInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus) stop := make(chan struct{}) defer close(stop) @@ -357,8 +358,14 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) { }) require.NoError(t, err, "something went horribly wrong...") + // Use default if tolerances are not specified in the test case. + tolerances := Tolerances{defaultTestingTolerance, defaultTestingTolerance} + if tc.tolerances != nil { + tolerances = *tc.tolerances + } + if tc.resource != nil { - outReplicas, outUtilization, outRawValue, outTimestamp, err := replicaCalc.GetResourceReplicas(context.TODO(), tc.currentReplicas, tc.resource.targetUtilization, tc.resource.name, testNamespace, selector, tc.container) + outReplicas, outUtilization, outRawValue, outTimestamp, err := replicaCalc.GetResourceReplicas(context.TODO(), tc.currentReplicas, tc.resource.targetUtilization, tc.resource.name, tolerances, testNamespace, selector, tc.container) if tc.expectedError != nil { require.Error(t, err, "there should be an error calculating the replica count") @@ -381,12 +388,12 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) { if tc.metric.singleObject == nil { t.Fatal("Metric specified as objectMetric but metric.singleObject is nil.") } - outReplicas, outUsage, outTimestamp, err = replicaCalc.GetObjectMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, testNamespace, tc.metric.singleObject, selector, nil) + outReplicas, outUsage, outTimestamp, err = replicaCalc.GetObjectMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, tolerances, testNamespace, tc.metric.singleObject, selector, nil) case objectPerPodMetric: if tc.metric.singleObject == nil { t.Fatal("Metric specified as objectMetric but metric.singleObject is nil.") } - outReplicas, outUsage, outTimestamp, err = replicaCalc.GetObjectPerPodMetricReplicas(tc.currentReplicas, tc.metric.perPodTargetUsage, tc.metric.name, testNamespace, tc.metric.singleObject, nil) + outReplicas, outUsage, outTimestamp, err = replicaCalc.GetObjectPerPodMetricReplicas(tc.currentReplicas, tc.metric.perPodTargetUsage, tc.metric.name, tolerances, testNamespace, tc.metric.singleObject, nil) case externalMetric: if tc.metric.selector == nil { t.Fatal("Metric specified as externalMetric but metric.selector is nil.") @@ -394,7 +401,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) { if tc.metric.targetUsage <= 0 { t.Fatalf("Metric specified as externalMetric but metric.targetUsage is %d which is <=0.", tc.metric.targetUsage) } - outReplicas, outUsage, outTimestamp, err = replicaCalc.GetExternalMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, testNamespace, tc.metric.selector, selector) + outReplicas, outUsage, outTimestamp, err = replicaCalc.GetExternalMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, tolerances, testNamespace, tc.metric.selector, selector) case externalPerPodMetric: if tc.metric.selector == nil { t.Fatal("Metric specified as externalPerPodMetric but metric.selector is nil.") @@ -403,9 +410,9 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) { t.Fatalf("Metric specified as externalPerPodMetric but metric.perPodTargetUsage is %d which is <=0.", tc.metric.perPodTargetUsage) } - outReplicas, outUsage, outTimestamp, err = replicaCalc.GetExternalPerPodMetricReplicas(tc.currentReplicas, tc.metric.perPodTargetUsage, tc.metric.name, testNamespace, tc.metric.selector) + outReplicas, outUsage, outTimestamp, err = replicaCalc.GetExternalPerPodMetricReplicas(tc.currentReplicas, tc.metric.perPodTargetUsage, tc.metric.name, tolerances, testNamespace, tc.metric.selector) case podMetric: - outReplicas, outUsage, outTimestamp, err = replicaCalc.GetMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, testNamespace, selector, nil) + outReplicas, outUsage, outTimestamp, err = replicaCalc.GetMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, tolerances, testNamespace, selector, nil) default: t.Fatalf("Unknown metric type: %d", tc.metric.metricType) } @@ -1263,6 +1270,188 @@ func TestReplicaCalcTolerancePerPodCMExternal(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcConfigurableTolerance(t *testing.T) { + testCases := []struct { + name string + replicaCalcTestCase + }{ + { + name: "Outside of a 0% tolerance", + replicaCalcTestCase: replicaCalcTestCase{ + tolerances: &Tolerances{0., 0.}, + currentReplicas: 3, + expectedReplicas: 4, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")}, + levels: makePodMetricLevels(909, 1010, 1111), + targetUtilization: 100, + expectedUtilization: 101, + expectedValue: numContainersPerPod * 1010, + }, + }, + }, + { + name: "Within a 200% scale-up tolerance", + replicaCalcTestCase: replicaCalcTestCase{ + tolerances: &Tolerances{defaultTestingTolerance, 2.}, + currentReplicas: 3, + expectedReplicas: 3, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")}, + levels: makePodMetricLevels(1890, 1910, 1900), + targetUtilization: 100, + expectedUtilization: 190, + expectedValue: numContainersPerPod * 1900, + }, + }, + }, + { + name: "Outside 8% scale-up tolerance (and superfuous scale-down tolerance)", + replicaCalcTestCase: replicaCalcTestCase{ + tolerances: &Tolerances{2., .08}, + currentReplicas: 3, + expectedReplicas: 4, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")}, + levels: makePodMetricLevels(1100, 1080, 1090), + targetUtilization: 100, + expectedUtilization: 109, + expectedValue: numContainersPerPod * 1090, + }, + }, + }, + { + name: "Within a 36% scale-down tolerance", + replicaCalcTestCase: replicaCalcTestCase{ + tolerances: &Tolerances{.36, defaultTestingTolerance}, + currentReplicas: 3, + expectedReplicas: 3, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")}, + levels: makePodMetricLevels(660, 640, 650), + targetUtilization: 100, + expectedUtilization: 65, + expectedValue: numContainersPerPod * 650, + }, + }, + }, + { + name: "Outside a 34% scale-down tolerance", + replicaCalcTestCase: replicaCalcTestCase{ + tolerances: &Tolerances{.34, defaultTestingTolerance}, + currentReplicas: 3, + expectedReplicas: 2, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")}, + levels: makePodMetricLevels(660, 640, 650), + targetUtilization: 100, + expectedUtilization: 65, + expectedValue: numContainersPerPod * 650, + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, tc.runTest) + } +} + +func TestReplicaCalcConfigurableToleranceCM(t *testing.T) { + tc := replicaCalcTestCase{ + tolerances: &Tolerances{defaultTestingTolerance, .01}, + currentReplicas: 3, + expectedReplicas: 4, + metric: &metricInfo{ + name: "qps", + levels: []int64{20000, 21000, 21000}, + targetUsage: 20000, + expectedUsage: 20666, + metricType: podMetric, + }, + } + tc.runTest(t) +} + +func TestReplicaCalcConfigurableToleranceCMObject(t *testing.T) { + tc := replicaCalcTestCase{ + tolerances: &Tolerances{defaultTestingTolerance, .01}, + currentReplicas: 3, + expectedReplicas: 4, + metric: &metricInfo{ + name: "qps", + levels: []int64{20666}, + targetUsage: 20000, + expectedUsage: 20666, + singleObject: &autoscalingv2.CrossVersionObjectReference{ + Kind: "Deployment", + APIVersion: "apps/v1", + Name: "some-deployment", + }, + }, + } + tc.runTest(t) +} + +func TestReplicaCalcConfigurableTolerancePerPodCMObject(t *testing.T) { + tc := replicaCalcTestCase{ + tolerances: &Tolerances{defaultTestingTolerance, .01}, + currentReplicas: 4, + expectedReplicas: 5, + metric: &metricInfo{ + metricType: objectPerPodMetric, + name: "qps", + levels: []int64{20208}, + perPodTargetUsage: 5000, + expectedUsage: 5052, + singleObject: &autoscalingv2.CrossVersionObjectReference{ + Kind: "Deployment", + APIVersion: "apps/v1", + Name: "some-deployment", + }, + }, + } + tc.runTest(t) +} + +func TestReplicaCalcConfigurableToleranceCMExternal(t *testing.T) { + tc := replicaCalcTestCase{ + tolerances: &Tolerances{defaultTestingTolerance, .01}, + currentReplicas: 3, + expectedReplicas: 4, + metric: &metricInfo{ + name: "qps", + levels: []int64{8900}, + targetUsage: 8800, + expectedUsage: 8900, + selector: &metav1.LabelSelector{MatchLabels: map[string]string{"label": "value"}}, + metricType: externalMetric, + }, + } + tc.runTest(t) +} + +func TestReplicaCalcConfigurableTolerancePerPodCMExternal(t *testing.T) { + tc := replicaCalcTestCase{ + tolerances: &Tolerances{defaultTestingTolerance, .01}, + currentReplicas: 3, + expectedReplicas: 4, + metric: &metricInfo{ + name: "qps", + levels: []int64{8600}, + perPodTargetUsage: 2800, + expectedUsage: 2867, + selector: &metav1.LabelSelector{MatchLabels: map[string]string{"label": "value"}}, + metricType: externalPerPodMetric, + }, + } + tc.runTest(t) +} + func TestReplicaCalcSuperfluousMetrics(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 4,