Add configurable tolerance logic.

This commit is contained in:
Jean-Marc François 2025-03-21 16:57:31 -04:00
parent 11b6e2a375
commit 2dd9eda47f
4 changed files with 377 additions and 44 deletions

View File

@ -37,6 +37,7 @@ import (
"k8s.io/apimachinery/pkg/runtime/schema"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
autoscalinginformers "k8s.io/client-go/informers/autoscaling/v2"
coreinformers "k8s.io/client-go/informers/core/v1"
"k8s.io/client-go/kubernetes/scheme"
@ -53,6 +54,7 @@ import (
metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
"k8s.io/kubernetes/pkg/controller/podautoscaler/monitor"
"k8s.io/kubernetes/pkg/controller/util/selectors"
"k8s.io/kubernetes/pkg/features"
)
var (
@ -86,6 +88,7 @@ type HorizontalController struct {
hpaNamespacer autoscalingclient.HorizontalPodAutoscalersGetter
mapper apimeta.RESTMapper
tolerance float64
replicaCalc *ReplicaCalculator
eventRecorder record.EventRecorder
@ -146,6 +149,7 @@ func NewHorizontalController(
eventRecorder: recorder,
scaleNamespacer: scaleNamespacer,
hpaNamespacer: hpaNamespacer,
tolerance: tolerance,
downscaleStabilisationWindow: downscaleStabilisationWindow,
monitor: monitor.New(),
queue: workqueue.NewTypedRateLimitingQueueWithConfig(
@ -181,7 +185,6 @@ func NewHorizontalController(
replicaCalc := NewReplicaCalculator(
metricsClient,
hpaController.podLister,
tolerance,
cpuInitializationPeriod,
delayOfInitialReadinessStatus,
)
@ -539,8 +542,9 @@ func (a *HorizontalController) computeStatusForObjectMetric(specReplicas, status
},
},
}
tolerances := a.tolerancesForHpa(hpa)
if metricSpec.Object.Target.Type == autoscalingv2.ValueMetricType && metricSpec.Object.Target.Value != nil {
replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetObjectMetricReplicas(specReplicas, metricSpec.Object.Target.Value.MilliValue(), metricSpec.Object.Metric.Name, hpa.Namespace, &metricSpec.Object.DescribedObject, selector, metricSelector)
replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetObjectMetricReplicas(specReplicas, metricSpec.Object.Target.Value.MilliValue(), metricSpec.Object.Metric.Name, tolerances, hpa.Namespace, &metricSpec.Object.DescribedObject, selector, metricSelector)
if err != nil {
condition := a.getUnableComputeReplicaCountCondition(hpa, "FailedGetObjectMetric", err)
return 0, timestampProposal, "", condition, err
@ -549,7 +553,7 @@ func (a *HorizontalController) computeStatusForObjectMetric(specReplicas, status
*status = metricStatus
return replicaCountProposal, timestampProposal, fmt.Sprintf("%s metric %s", metricSpec.Object.DescribedObject.Kind, metricSpec.Object.Metric.Name), autoscalingv2.HorizontalPodAutoscalerCondition{}, nil
} else if metricSpec.Object.Target.Type == autoscalingv2.AverageValueMetricType && metricSpec.Object.Target.AverageValue != nil {
replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetObjectPerPodMetricReplicas(statusReplicas, metricSpec.Object.Target.AverageValue.MilliValue(), metricSpec.Object.Metric.Name, hpa.Namespace, &metricSpec.Object.DescribedObject, metricSelector)
replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetObjectPerPodMetricReplicas(statusReplicas, metricSpec.Object.Target.AverageValue.MilliValue(), metricSpec.Object.Metric.Name, tolerances, hpa.Namespace, &metricSpec.Object.DescribedObject, metricSelector)
if err != nil {
condition := a.getUnableComputeReplicaCountCondition(hpa, "FailedGetObjectMetric", err)
return 0, time.Time{}, "", condition, fmt.Errorf("failed to get %s object metric: %v", metricSpec.Object.Metric.Name, err)
@ -566,7 +570,8 @@ func (a *HorizontalController) computeStatusForObjectMetric(specReplicas, status
// computeStatusForPodsMetric computes the desired number of replicas for the specified metric of type PodsMetricSourceType.
func (a *HorizontalController) computeStatusForPodsMetric(currentReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler, selector labels.Selector, status *autoscalingv2.MetricStatus, metricSelector labels.Selector) (replicaCountProposal int32, timestampProposal time.Time, metricNameProposal string, condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) {
replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetMetricReplicas(currentReplicas, metricSpec.Pods.Target.AverageValue.MilliValue(), metricSpec.Pods.Metric.Name, hpa.Namespace, selector, metricSelector)
tolerances := a.tolerancesForHpa(hpa)
replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetMetricReplicas(currentReplicas, metricSpec.Pods.Target.AverageValue.MilliValue(), metricSpec.Pods.Metric.Name, tolerances, hpa.Namespace, selector, metricSelector)
if err != nil {
condition = a.getUnableComputeReplicaCountCondition(hpa, "FailedGetPodsMetric", err)
return 0, timestampProposal, "", condition, err
@ -588,12 +593,14 @@ func (a *HorizontalController) computeStatusForPodsMetric(currentReplicas int32,
}
func (a *HorizontalController) computeStatusForResourceMetricGeneric(ctx context.Context, currentReplicas int32, target autoscalingv2.MetricTarget,
resourceName v1.ResourceName, namespace string, container string, selector labels.Selector, sourceType autoscalingv2.MetricSourceType) (replicaCountProposal int32,
resourceName v1.ResourceName, hpa *autoscalingv2.HorizontalPodAutoscaler, container string, selector labels.Selector, sourceType autoscalingv2.MetricSourceType) (replicaCountProposal int32,
metricStatus *autoscalingv2.MetricValueStatus, timestampProposal time.Time, metricNameProposal string,
condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) {
namespace := hpa.Namespace
tolerances := a.tolerancesForHpa(hpa)
if target.AverageValue != nil {
var rawProposal int64
replicaCountProposal, rawProposal, timestampProposal, err := a.replicaCalc.GetRawResourceReplicas(ctx, currentReplicas, target.AverageValue.MilliValue(), resourceName, namespace, selector, container)
replicaCountProposal, rawProposal, timestampProposal, err := a.replicaCalc.GetRawResourceReplicas(ctx, currentReplicas, target.AverageValue.MilliValue(), resourceName, tolerances, namespace, selector, container)
if err != nil {
return 0, nil, time.Time{}, "", condition, fmt.Errorf("failed to get %s usage: %v", resourceName, err)
}
@ -610,7 +617,7 @@ func (a *HorizontalController) computeStatusForResourceMetricGeneric(ctx context
}
targetUtilization := *target.AverageUtilization
replicaCountProposal, percentageProposal, rawProposal, timestampProposal, err := a.replicaCalc.GetResourceReplicas(ctx, currentReplicas, targetUtilization, resourceName, namespace, selector, container)
replicaCountProposal, percentageProposal, rawProposal, timestampProposal, err := a.replicaCalc.GetResourceReplicas(ctx, currentReplicas, targetUtilization, resourceName, tolerances, namespace, selector, container)
if err != nil {
return 0, nil, time.Time{}, "", condition, fmt.Errorf("failed to get %s utilization: %v", resourceName, err)
}
@ -630,7 +637,7 @@ func (a *HorizontalController) computeStatusForResourceMetricGeneric(ctx context
func (a *HorizontalController) computeStatusForResourceMetric(ctx context.Context, currentReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler,
selector labels.Selector, status *autoscalingv2.MetricStatus) (replicaCountProposal int32, timestampProposal time.Time,
metricNameProposal string, condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) {
replicaCountProposal, metricValueStatus, timestampProposal, metricNameProposal, condition, err := a.computeStatusForResourceMetricGeneric(ctx, currentReplicas, metricSpec.Resource.Target, metricSpec.Resource.Name, hpa.Namespace, "", selector, autoscalingv2.ResourceMetricSourceType)
replicaCountProposal, metricValueStatus, timestampProposal, metricNameProposal, condition, err := a.computeStatusForResourceMetricGeneric(ctx, currentReplicas, metricSpec.Resource.Target, metricSpec.Resource.Name, hpa, "", selector, autoscalingv2.ResourceMetricSourceType)
if err != nil {
condition = a.getUnableComputeReplicaCountCondition(hpa, "FailedGetResourceMetric", err)
return replicaCountProposal, timestampProposal, metricNameProposal, condition, err
@ -649,7 +656,7 @@ func (a *HorizontalController) computeStatusForResourceMetric(ctx context.Contex
func (a *HorizontalController) computeStatusForContainerResourceMetric(ctx context.Context, currentReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler,
selector labels.Selector, status *autoscalingv2.MetricStatus) (replicaCountProposal int32, timestampProposal time.Time,
metricNameProposal string, condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) {
replicaCountProposal, metricValueStatus, timestampProposal, metricNameProposal, condition, err := a.computeStatusForResourceMetricGeneric(ctx, currentReplicas, metricSpec.ContainerResource.Target, metricSpec.ContainerResource.Name, hpa.Namespace, metricSpec.ContainerResource.Container, selector, autoscalingv2.ContainerResourceMetricSourceType)
replicaCountProposal, metricValueStatus, timestampProposal, metricNameProposal, condition, err := a.computeStatusForResourceMetricGeneric(ctx, currentReplicas, metricSpec.ContainerResource.Target, metricSpec.ContainerResource.Name, hpa, metricSpec.ContainerResource.Container, selector, autoscalingv2.ContainerResourceMetricSourceType)
if err != nil {
condition = a.getUnableComputeReplicaCountCondition(hpa, "FailedGetContainerResourceMetric", err)
return replicaCountProposal, timestampProposal, metricNameProposal, condition, err
@ -667,8 +674,9 @@ func (a *HorizontalController) computeStatusForContainerResourceMetric(ctx conte
// computeStatusForExternalMetric computes the desired number of replicas for the specified metric of type ExternalMetricSourceType.
func (a *HorizontalController) computeStatusForExternalMetric(specReplicas, statusReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler, selector labels.Selector, status *autoscalingv2.MetricStatus) (replicaCountProposal int32, timestampProposal time.Time, metricNameProposal string, condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) {
tolerances := a.tolerancesForHpa(hpa)
if metricSpec.External.Target.AverageValue != nil {
replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetExternalPerPodMetricReplicas(statusReplicas, metricSpec.External.Target.AverageValue.MilliValue(), metricSpec.External.Metric.Name, hpa.Namespace, metricSpec.External.Metric.Selector)
replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetExternalPerPodMetricReplicas(statusReplicas, metricSpec.External.Target.AverageValue.MilliValue(), metricSpec.External.Metric.Name, tolerances, hpa.Namespace, metricSpec.External.Metric.Selector)
if err != nil {
condition = a.getUnableComputeReplicaCountCondition(hpa, "FailedGetExternalMetric", err)
return 0, time.Time{}, "", condition, fmt.Errorf("failed to get %s external metric: %v", metricSpec.External.Metric.Name, err)
@ -688,7 +696,7 @@ func (a *HorizontalController) computeStatusForExternalMetric(specReplicas, stat
return replicaCountProposal, timestampProposal, fmt.Sprintf("external metric %s(%+v)", metricSpec.External.Metric.Name, metricSpec.External.Metric.Selector), autoscalingv2.HorizontalPodAutoscalerCondition{}, nil
}
if metricSpec.External.Target.Value != nil {
replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetExternalMetricReplicas(specReplicas, metricSpec.External.Target.Value.MilliValue(), metricSpec.External.Metric.Name, hpa.Namespace, metricSpec.External.Metric.Selector, selector)
replicaCountProposal, usageProposal, timestampProposal, err := a.replicaCalc.GetExternalMetricReplicas(specReplicas, metricSpec.External.Target.Value.MilliValue(), metricSpec.External.Metric.Name, tolerances, hpa.Namespace, metricSpec.External.Metric.Selector, selector)
if err != nil {
condition = a.getUnableComputeReplicaCountCondition(hpa, "FailedGetExternalMetric", err)
return 0, time.Time{}, "", condition, fmt.Errorf("failed to get external metric %s: %v", metricSpec.External.Metric.Name, err)
@ -835,6 +843,7 @@ func (a *HorizontalController) reconcileAutoscaler(ctx context.Context, hpaShare
logger.V(4).Info("Proposing desired replicas",
"desiredReplicas", metricDesiredReplicas,
"metric", metricName,
"tolerances", a.tolerancesForHpa(hpa),
"timestamp", metricTimestamp,
"scaleTarget", reference)
@ -1384,6 +1393,25 @@ func (a *HorizontalController) updateStatus(ctx context.Context, hpa *autoscalin
return nil
}
// tolerancesForHpa returns the metrics usage ratio tolerances for a given HPA.
// It ignores configurable tolerances set in the HPA spec.behavior field if the
// HPAConfigurableTolerance feature gate is disabled.
func (a *HorizontalController) tolerancesForHpa(hpa *autoscalingv2.HorizontalPodAutoscaler) Tolerances {
t := Tolerances{a.tolerance, a.tolerance}
behavior := hpa.Spec.Behavior
allowConfigurableTolerances := utilfeature.DefaultFeatureGate.Enabled(features.HPAConfigurableTolerance)
if behavior == nil || !allowConfigurableTolerances {
return t
}
if behavior.ScaleDown != nil && behavior.ScaleDown.Tolerance != nil {
t.scaleDown = behavior.ScaleDown.Tolerance.AsApproximateFloat64()
}
if behavior.ScaleUp != nil && behavior.ScaleUp.Tolerance != nil {
t.scaleUp = behavior.ScaleUp.Tolerance.AsApproximateFloat64()
}
return t
}
// setCondition sets the specific condition type on the given HPA to the specified value with the given reason
// and message. The message and args are treated like a format string. The condition will be added if it is
// not present.

View File

@ -37,16 +37,19 @@ import (
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/watch"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
scalefake "k8s.io/client-go/scale/fake"
core "k8s.io/client-go/testing"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/api/legacyscheme"
autoscalingapiv2 "k8s.io/kubernetes/pkg/apis/autoscaling/v2"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
"k8s.io/kubernetes/pkg/controller/podautoscaler/monitor"
"k8s.io/kubernetes/pkg/controller/util/selectors"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/test/utils/ktesting"
cmapi "k8s.io/metrics/pkg/apis/custom_metrics/v1beta2"
emapi "k8s.io/metrics/pkg/apis/external_metrics/v1beta1"
@ -2216,6 +2219,107 @@ func TestTolerance(t *testing.T) {
tc.runTest(t)
}
func TestConfigurableTolerance(t *testing.T) {
onePercentQuantity := resource.MustParse("0.01")
ninetyPercentQuantity := resource.MustParse("0.9")
testCases := []struct {
name string
configurableToleranceGate bool
replicas int32
scaleUpRules *autoscalingv2.HPAScalingRules
scaleDownRules *autoscalingv2.HPAScalingRules
reportedLevels []uint64
reportedCPURequests []resource.Quantity
expectedDesiredReplicas int32
expectedConditionReason string
expectedActionLabel monitor.ActionLabel
}{
{
name: "Scaling up because of a 1% configurable tolerance",
configurableToleranceGate: true,
replicas: 3,
scaleUpRules: &autoscalingv2.HPAScalingRules{
Tolerance: &onePercentQuantity,
},
reportedLevels: []uint64{1010, 1030, 1020},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
expectedDesiredReplicas: 4,
expectedConditionReason: "SucceededRescale",
expectedActionLabel: monitor.ActionLabelScaleUp,
},
{
name: "No scale-down because of a 90% configurable tolerance",
configurableToleranceGate: true,
replicas: 3,
scaleDownRules: &autoscalingv2.HPAScalingRules{
Tolerance: &ninetyPercentQuantity,
},
reportedLevels: []uint64{300, 300, 300},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
expectedDesiredReplicas: 3,
expectedConditionReason: "ReadyForNewScale",
expectedActionLabel: monitor.ActionLabelNone,
},
{
name: "No scaling because of the large default tolerance",
configurableToleranceGate: true,
replicas: 3,
reportedLevels: []uint64{1010, 1030, 1020},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
expectedDesiredReplicas: 3,
expectedConditionReason: "ReadyForNewScale",
expectedActionLabel: monitor.ActionLabelNone,
},
{
name: "No scaling because the configurable tolerance is ignored as the feature gate is disabled",
configurableToleranceGate: false,
replicas: 3,
scaleUpRules: &autoscalingv2.HPAScalingRules{
Tolerance: &onePercentQuantity,
},
reportedLevels: []uint64{1010, 1030, 1020},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
expectedDesiredReplicas: 3,
expectedConditionReason: "ReadyForNewScale",
expectedActionLabel: monitor.ActionLabelNone,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.HPAConfigurableTolerance, tc.configurableToleranceGate)
tc := testCase{
minReplicas: 1,
maxReplicas: 5,
specReplicas: tc.replicas,
statusReplicas: tc.replicas,
scaleDownRules: tc.scaleDownRules,
scaleUpRules: tc.scaleUpRules,
expectedDesiredReplicas: tc.expectedDesiredReplicas,
CPUTarget: 100,
reportedLevels: tc.reportedLevels,
reportedCPURequests: tc.reportedCPURequests,
useMetricsAPI: true,
expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: tc.expectedConditionReason,
}),
expectedReportedReconciliationActionLabel: tc.expectedActionLabel,
expectedReportedReconciliationErrorLabel: monitor.ErrorLabelNone,
expectedReportedMetricComputationActionLabels: map[autoscalingv2.MetricSourceType]monitor.ActionLabel{
autoscalingv2.ResourceMetricSourceType: tc.expectedActionLabel,
},
expectedReportedMetricComputationErrorLabels: map[autoscalingv2.MetricSourceType]monitor.ErrorLabel{
autoscalingv2.ResourceMetricSourceType: monitor.ErrorLabelNone,
},
}
tc.runTest(t)
})
}
}
func TestToleranceCM(t *testing.T) {
averageValue := resource.MustParse("20.0")
tc := testCase{

View File

@ -40,21 +40,33 @@ const (
defaultTestingDelayOfInitialReadinessStatus = 10 * time.Second
)
// Tolerances contains metric usage ratio scale-up and scale-down tolerances.
type Tolerances struct {
scaleDown float64
scaleUp float64
}
func (t Tolerances) String() string {
return fmt.Sprintf("[down:%.1f%%, up:%.1f%%]", t.scaleDown*100., t.scaleUp*100.)
}
func (t Tolerances) isWithin(usageRatio float64) bool {
return (1.0-t.scaleDown) <= usageRatio && usageRatio <= (1.0+t.scaleUp)
}
// ReplicaCalculator bundles all needed information to calculate the target amount of replicas
type ReplicaCalculator struct {
metricsClient metricsclient.MetricsClient
podLister corelisters.PodLister
tolerance float64
cpuInitializationPeriod time.Duration
delayOfInitialReadinessStatus time.Duration
}
// NewReplicaCalculator creates a new ReplicaCalculator and passes all necessary information to the new instance
func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podLister corelisters.PodLister, tolerance float64, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator {
func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podLister corelisters.PodLister, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator {
return &ReplicaCalculator{
metricsClient: metricsClient,
podLister: podLister,
tolerance: tolerance,
cpuInitializationPeriod: cpuInitializationPeriod,
delayOfInitialReadinessStatus: delayOfInitialReadinessStatus,
}
@ -62,7 +74,7 @@ func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podLister c
// GetResourceReplicas calculates the desired replica count based on a target resource utilization percentage
// of the given resource for pods matching the given selector in the given namespace, and the current replica count
func (c *ReplicaCalculator) GetResourceReplicas(ctx context.Context, currentReplicas int32, targetUtilization int32, resource v1.ResourceName, namespace string, selector labels.Selector, container string) (replicaCount int32, utilization int32, rawUtilization int64, timestamp time.Time, err error) {
func (c *ReplicaCalculator) GetResourceReplicas(ctx context.Context, currentReplicas int32, targetUtilization int32, resource v1.ResourceName, tolerances Tolerances, namespace string, selector labels.Selector, container string) (replicaCount int32, utilization int32, rawUtilization int64, timestamp time.Time, err error) {
metrics, timestamp, err := c.metricsClient.GetResourceMetric(ctx, resource, namespace, selector, container)
if err != nil {
return 0, 0, 0, time.Time{}, fmt.Errorf("unable to get metrics for resource %s: %v", resource, err)
@ -94,7 +106,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(ctx context.Context, currentRepl
scaleUpWithUnready := len(unreadyPods) > 0 && usageRatio > 1.0
if !scaleUpWithUnready && len(missingPods) == 0 {
if math.Abs(1.0-usageRatio) <= c.tolerance {
if tolerances.isWithin(usageRatio) {
// return the current replicas if the change would be too small
return currentReplicas, utilization, rawUtilization, timestamp, nil
}
@ -132,7 +144,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(ctx context.Context, currentRepl
return 0, utilization, rawUtilization, time.Time{}, err
}
if math.Abs(1.0-newUsageRatio) <= c.tolerance || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) {
if tolerances.isWithin(newUsageRatio) || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) {
// return the current replicas if the change would be too small,
// or if the new usage ratio would cause a change in scale direction
return currentReplicas, utilization, rawUtilization, timestamp, nil
@ -151,31 +163,31 @@ func (c *ReplicaCalculator) GetResourceReplicas(ctx context.Context, currentRepl
// GetRawResourceReplicas calculates the desired replica count based on a target resource usage (as a raw milli-value)
// for pods matching the given selector in the given namespace, and the current replica count
func (c *ReplicaCalculator) GetRawResourceReplicas(ctx context.Context, currentReplicas int32, targetUsage int64, resource v1.ResourceName, namespace string, selector labels.Selector, container string) (replicaCount int32, usage int64, timestamp time.Time, err error) {
func (c *ReplicaCalculator) GetRawResourceReplicas(ctx context.Context, currentReplicas int32, targetUsage int64, resource v1.ResourceName, tolerances Tolerances, namespace string, selector labels.Selector, container string) (replicaCount int32, usage int64, timestamp time.Time, err error) {
metrics, timestamp, err := c.metricsClient.GetResourceMetric(ctx, resource, namespace, selector, container)
if err != nil {
return 0, 0, time.Time{}, fmt.Errorf("unable to get metrics for resource %s: %v", resource, err)
}
replicaCount, usage, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUsage, namespace, selector, resource)
replicaCount, usage, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUsage, tolerances, namespace, selector, resource)
return replicaCount, usage, timestamp, err
}
// GetMetricReplicas calculates the desired replica count based on a target metric usage
// (as a milli-value) for pods matching the given selector in the given namespace, and the
// current replica count
func (c *ReplicaCalculator) GetMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, namespace string, selector labels.Selector, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
func (c *ReplicaCalculator) GetMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, tolerances Tolerances, namespace string, selector labels.Selector, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
metrics, timestamp, err := c.metricsClient.GetRawMetric(metricName, namespace, selector, metricSelector)
if err != nil {
return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v", metricName, err)
}
replicaCount, usage, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUsage, namespace, selector, v1.ResourceName(""))
replicaCount, usage, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUsage, tolerances, namespace, selector, v1.ResourceName(""))
return replicaCount, usage, timestamp, err
}
// calcPlainMetricReplicas calculates the desired replicas for plain (i.e. non-utilization percentage) metrics.
func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUsage int64, namespace string, selector labels.Selector, resource v1.ResourceName) (replicaCount int32, usage int64, err error) {
func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUsage int64, tolerances Tolerances, namespace string, selector labels.Selector, resource v1.ResourceName) (replicaCount int32, usage int64, err error) {
podList, err := c.podLister.Pods(namespace).List(selector)
if err != nil {
@ -199,7 +211,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
scaleUpWithUnready := len(unreadyPods) > 0 && usageRatio > 1.0
if !scaleUpWithUnready && len(missingPods) == 0 {
if math.Abs(1.0-usageRatio) <= c.tolerance {
if tolerances.isWithin(usageRatio) {
// return the current replicas if the change would be too small
return currentReplicas, usage, nil
}
@ -232,7 +244,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
// re-run the usage calculation with our new numbers
newUsageRatio, _ := metricsclient.GetMetricUsageRatio(metrics, targetUsage)
if math.Abs(1.0-newUsageRatio) <= c.tolerance || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) {
if tolerances.isWithin(newUsageRatio) || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) {
// return the current replicas if the change would be too small,
// or if the new usage ratio would cause a change in scale direction
return currentReplicas, usage, nil
@ -251,22 +263,22 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
// GetObjectMetricReplicas calculates the desired replica count based on a target metric usage (as a milli-value)
// for the given object in the given namespace, and the current replica count.
func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, namespace string, objectRef *autoscaling.CrossVersionObjectReference, selector labels.Selector, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, tolerances Tolerances, namespace string, objectRef *autoscaling.CrossVersionObjectReference, selector labels.Selector, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
usage, _, err = c.metricsClient.GetObjectMetric(metricName, namespace, objectRef, metricSelector)
if err != nil {
return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v on %s %s/%s", metricName, objectRef.Kind, namespace, objectRef.Name, err)
}
usageRatio := float64(usage) / float64(targetUsage)
replicaCount, timestamp, err = c.getUsageRatioReplicaCount(currentReplicas, usageRatio, namespace, selector)
replicaCount, timestamp, err = c.getUsageRatioReplicaCount(currentReplicas, usageRatio, tolerances, namespace, selector)
return replicaCount, usage, timestamp, err
}
// getUsageRatioReplicaCount calculates the desired replica count based on usageRatio and ready pods count.
// For currentReplicas=0 doesn't take into account ready pods count and tolerance to support scaling to zero pods.
func (c *ReplicaCalculator) getUsageRatioReplicaCount(currentReplicas int32, usageRatio float64, namespace string, selector labels.Selector) (replicaCount int32, timestamp time.Time, err error) {
func (c *ReplicaCalculator) getUsageRatioReplicaCount(currentReplicas int32, usageRatio float64, tolerances Tolerances, namespace string, selector labels.Selector) (replicaCount int32, timestamp time.Time, err error) {
if currentReplicas != 0 {
if math.Abs(1.0-usageRatio) <= c.tolerance {
if tolerances.isWithin(usageRatio) {
// return the current replicas if the change would be too small
return currentReplicas, timestamp, nil
}
@ -286,7 +298,7 @@ func (c *ReplicaCalculator) getUsageRatioReplicaCount(currentReplicas int32, usa
// GetObjectPerPodMetricReplicas calculates the desired replica count based on a target metric usage (as a milli-value)
// for the given object in the given namespace, and the current replica count.
func (c *ReplicaCalculator) GetObjectPerPodMetricReplicas(statusReplicas int32, targetAverageUsage int64, metricName string, namespace string, objectRef *autoscaling.CrossVersionObjectReference, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
func (c *ReplicaCalculator) GetObjectPerPodMetricReplicas(statusReplicas int32, targetAverageUsage int64, metricName string, tolerances Tolerances, namespace string, objectRef *autoscaling.CrossVersionObjectReference, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
usage, timestamp, err = c.metricsClient.GetObjectMetric(metricName, namespace, objectRef, metricSelector)
if err != nil {
return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v on %s %s/%s", metricName, objectRef.Kind, namespace, objectRef.Name, err)
@ -294,7 +306,7 @@ func (c *ReplicaCalculator) GetObjectPerPodMetricReplicas(statusReplicas int32,
replicaCount = statusReplicas
usageRatio := float64(usage) / (float64(targetAverageUsage) * float64(replicaCount))
if math.Abs(1.0-usageRatio) > c.tolerance {
if !tolerances.isWithin(usageRatio) {
// update number of replicas if change is large enough
replicaCount = int32(math.Ceil(float64(usage) / float64(targetAverageUsage)))
}
@ -329,7 +341,7 @@ func (c *ReplicaCalculator) getReadyPodsCount(namespace string, selector labels.
// GetExternalMetricReplicas calculates the desired replica count based on a
// target metric value (as a milli-value) for the external metric in the given
// namespace, and the current replica count.
func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, targetUsage int64, metricName, namespace string, metricSelector *metav1.LabelSelector, podSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, tolerances Tolerances, namespace string, metricSelector *metav1.LabelSelector, podSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
metricLabelSelector, err := metav1.LabelSelectorAsSelector(metricSelector)
if err != nil {
return 0, 0, time.Time{}, err
@ -344,14 +356,14 @@ func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, tar
}
usageRatio := float64(usage) / float64(targetUsage)
replicaCount, timestamp, err = c.getUsageRatioReplicaCount(currentReplicas, usageRatio, namespace, podSelector)
replicaCount, timestamp, err = c.getUsageRatioReplicaCount(currentReplicas, usageRatio, tolerances, namespace, podSelector)
return replicaCount, usage, timestamp, err
}
// GetExternalPerPodMetricReplicas calculates the desired replica count based on a
// target metric value per pod (as a milli-value) for the external metric in the
// given namespace, and the current replica count.
func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(statusReplicas int32, targetUsagePerPod int64, metricName, namespace string, metricSelector *metav1.LabelSelector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(statusReplicas int32, targetUsagePerPod int64, metricName string, tolerances Tolerances, namespace string, metricSelector *metav1.LabelSelector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
metricLabelSelector, err := metav1.LabelSelectorAsSelector(metricSelector)
if err != nil {
return 0, 0, time.Time{}, err
@ -367,7 +379,7 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(statusReplicas int32
replicaCount = statusReplicas
usageRatio := float64(usage) / (float64(targetUsagePerPod) * float64(replicaCount))
if math.Abs(1.0-usageRatio) > c.tolerance {
if !tolerances.isWithin(usageRatio) {
// update number of replicas if the change is large enough
replicaCount = int32(math.Ceil(float64(usage) / float64(targetUsagePerPod)))
}

View File

@ -90,9 +90,10 @@ type replicaCalcTestCase struct {
timestamp time.Time
resource *resourceInfo
metric *metricInfo
container string
tolerances *Tolerances
resource *resourceInfo
metric *metricInfo
container string
podReadiness []v1.ConditionStatus
podStartTime []metav1.Time
@ -343,7 +344,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) {
informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
informer := informerFactory.Core().V1().Pods()
replicaCalc := NewReplicaCalculator(metricsClient, informer.Lister(), defaultTestingTolerance, defaultTestingCPUInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus)
replicaCalc := NewReplicaCalculator(metricsClient, informer.Lister(), defaultTestingCPUInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus)
stop := make(chan struct{})
defer close(stop)
@ -357,8 +358,14 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) {
})
require.NoError(t, err, "something went horribly wrong...")
// Use default if tolerances are not specified in the test case.
tolerances := Tolerances{defaultTestingTolerance, defaultTestingTolerance}
if tc.tolerances != nil {
tolerances = *tc.tolerances
}
if tc.resource != nil {
outReplicas, outUtilization, outRawValue, outTimestamp, err := replicaCalc.GetResourceReplicas(context.TODO(), tc.currentReplicas, tc.resource.targetUtilization, tc.resource.name, testNamespace, selector, tc.container)
outReplicas, outUtilization, outRawValue, outTimestamp, err := replicaCalc.GetResourceReplicas(context.TODO(), tc.currentReplicas, tc.resource.targetUtilization, tc.resource.name, tolerances, testNamespace, selector, tc.container)
if tc.expectedError != nil {
require.Error(t, err, "there should be an error calculating the replica count")
@ -381,12 +388,12 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) {
if tc.metric.singleObject == nil {
t.Fatal("Metric specified as objectMetric but metric.singleObject is nil.")
}
outReplicas, outUsage, outTimestamp, err = replicaCalc.GetObjectMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, testNamespace, tc.metric.singleObject, selector, nil)
outReplicas, outUsage, outTimestamp, err = replicaCalc.GetObjectMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, tolerances, testNamespace, tc.metric.singleObject, selector, nil)
case objectPerPodMetric:
if tc.metric.singleObject == nil {
t.Fatal("Metric specified as objectMetric but metric.singleObject is nil.")
}
outReplicas, outUsage, outTimestamp, err = replicaCalc.GetObjectPerPodMetricReplicas(tc.currentReplicas, tc.metric.perPodTargetUsage, tc.metric.name, testNamespace, tc.metric.singleObject, nil)
outReplicas, outUsage, outTimestamp, err = replicaCalc.GetObjectPerPodMetricReplicas(tc.currentReplicas, tc.metric.perPodTargetUsage, tc.metric.name, tolerances, testNamespace, tc.metric.singleObject, nil)
case externalMetric:
if tc.metric.selector == nil {
t.Fatal("Metric specified as externalMetric but metric.selector is nil.")
@ -394,7 +401,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) {
if tc.metric.targetUsage <= 0 {
t.Fatalf("Metric specified as externalMetric but metric.targetUsage is %d which is <=0.", tc.metric.targetUsage)
}
outReplicas, outUsage, outTimestamp, err = replicaCalc.GetExternalMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, testNamespace, tc.metric.selector, selector)
outReplicas, outUsage, outTimestamp, err = replicaCalc.GetExternalMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, tolerances, testNamespace, tc.metric.selector, selector)
case externalPerPodMetric:
if tc.metric.selector == nil {
t.Fatal("Metric specified as externalPerPodMetric but metric.selector is nil.")
@ -403,9 +410,9 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) {
t.Fatalf("Metric specified as externalPerPodMetric but metric.perPodTargetUsage is %d which is <=0.", tc.metric.perPodTargetUsage)
}
outReplicas, outUsage, outTimestamp, err = replicaCalc.GetExternalPerPodMetricReplicas(tc.currentReplicas, tc.metric.perPodTargetUsage, tc.metric.name, testNamespace, tc.metric.selector)
outReplicas, outUsage, outTimestamp, err = replicaCalc.GetExternalPerPodMetricReplicas(tc.currentReplicas, tc.metric.perPodTargetUsage, tc.metric.name, tolerances, testNamespace, tc.metric.selector)
case podMetric:
outReplicas, outUsage, outTimestamp, err = replicaCalc.GetMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, testNamespace, selector, nil)
outReplicas, outUsage, outTimestamp, err = replicaCalc.GetMetricReplicas(tc.currentReplicas, tc.metric.targetUsage, tc.metric.name, tolerances, testNamespace, selector, nil)
default:
t.Fatalf("Unknown metric type: %d", tc.metric.metricType)
}
@ -1263,6 +1270,188 @@ func TestReplicaCalcTolerancePerPodCMExternal(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcConfigurableTolerance(t *testing.T) {
testCases := []struct {
name string
replicaCalcTestCase
}{
{
name: "Outside of a 0% tolerance",
replicaCalcTestCase: replicaCalcTestCase{
tolerances: &Tolerances{0., 0.},
currentReplicas: 3,
expectedReplicas: 4,
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
levels: makePodMetricLevels(909, 1010, 1111),
targetUtilization: 100,
expectedUtilization: 101,
expectedValue: numContainersPerPod * 1010,
},
},
},
{
name: "Within a 200% scale-up tolerance",
replicaCalcTestCase: replicaCalcTestCase{
tolerances: &Tolerances{defaultTestingTolerance, 2.},
currentReplicas: 3,
expectedReplicas: 3,
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
levels: makePodMetricLevels(1890, 1910, 1900),
targetUtilization: 100,
expectedUtilization: 190,
expectedValue: numContainersPerPod * 1900,
},
},
},
{
name: "Outside 8% scale-up tolerance (and superfuous scale-down tolerance)",
replicaCalcTestCase: replicaCalcTestCase{
tolerances: &Tolerances{2., .08},
currentReplicas: 3,
expectedReplicas: 4,
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
levels: makePodMetricLevels(1100, 1080, 1090),
targetUtilization: 100,
expectedUtilization: 109,
expectedValue: numContainersPerPod * 1090,
},
},
},
{
name: "Within a 36% scale-down tolerance",
replicaCalcTestCase: replicaCalcTestCase{
tolerances: &Tolerances{.36, defaultTestingTolerance},
currentReplicas: 3,
expectedReplicas: 3,
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
levels: makePodMetricLevels(660, 640, 650),
targetUtilization: 100,
expectedUtilization: 65,
expectedValue: numContainersPerPod * 650,
},
},
},
{
name: "Outside a 34% scale-down tolerance",
replicaCalcTestCase: replicaCalcTestCase{
tolerances: &Tolerances{.34, defaultTestingTolerance},
currentReplicas: 3,
expectedReplicas: 2,
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
levels: makePodMetricLevels(660, 640, 650),
targetUtilization: 100,
expectedUtilization: 65,
expectedValue: numContainersPerPod * 650,
},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, tc.runTest)
}
}
func TestReplicaCalcConfigurableToleranceCM(t *testing.T) {
tc := replicaCalcTestCase{
tolerances: &Tolerances{defaultTestingTolerance, .01},
currentReplicas: 3,
expectedReplicas: 4,
metric: &metricInfo{
name: "qps",
levels: []int64{20000, 21000, 21000},
targetUsage: 20000,
expectedUsage: 20666,
metricType: podMetric,
},
}
tc.runTest(t)
}
func TestReplicaCalcConfigurableToleranceCMObject(t *testing.T) {
tc := replicaCalcTestCase{
tolerances: &Tolerances{defaultTestingTolerance, .01},
currentReplicas: 3,
expectedReplicas: 4,
metric: &metricInfo{
name: "qps",
levels: []int64{20666},
targetUsage: 20000,
expectedUsage: 20666,
singleObject: &autoscalingv2.CrossVersionObjectReference{
Kind: "Deployment",
APIVersion: "apps/v1",
Name: "some-deployment",
},
},
}
tc.runTest(t)
}
func TestReplicaCalcConfigurableTolerancePerPodCMObject(t *testing.T) {
tc := replicaCalcTestCase{
tolerances: &Tolerances{defaultTestingTolerance, .01},
currentReplicas: 4,
expectedReplicas: 5,
metric: &metricInfo{
metricType: objectPerPodMetric,
name: "qps",
levels: []int64{20208},
perPodTargetUsage: 5000,
expectedUsage: 5052,
singleObject: &autoscalingv2.CrossVersionObjectReference{
Kind: "Deployment",
APIVersion: "apps/v1",
Name: "some-deployment",
},
},
}
tc.runTest(t)
}
func TestReplicaCalcConfigurableToleranceCMExternal(t *testing.T) {
tc := replicaCalcTestCase{
tolerances: &Tolerances{defaultTestingTolerance, .01},
currentReplicas: 3,
expectedReplicas: 4,
metric: &metricInfo{
name: "qps",
levels: []int64{8900},
targetUsage: 8800,
expectedUsage: 8900,
selector: &metav1.LabelSelector{MatchLabels: map[string]string{"label": "value"}},
metricType: externalMetric,
},
}
tc.runTest(t)
}
func TestReplicaCalcConfigurableTolerancePerPodCMExternal(t *testing.T) {
tc := replicaCalcTestCase{
tolerances: &Tolerances{defaultTestingTolerance, .01},
currentReplicas: 3,
expectedReplicas: 4,
metric: &metricInfo{
name: "qps",
levels: []int64{8600},
perPodTargetUsage: 2800,
expectedUsage: 2867,
selector: &metav1.LabelSelector{MatchLabels: map[string]string{"label": "value"}},
metricType: externalPerPodMetric,
},
}
tc.runTest(t)
}
func TestReplicaCalcSuperfluousMetrics(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 4,