Merge pull request #66615 from jbartosik/cpu-warm-up

Automatic merge from submit-queue (batch tested with PRs 65730, 66615, 66684, 66519, 66510). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Speed up HPA reaction to metric changes by removing scale up forbidden window

**What this PR does / why we need it**:
Speed up HPA reaction to metric changes by removing scale up forbidden window.

Scale up forbidden window was protecting HPA against making decision to scale up based on metrics gathered during pod initialisation (which may be invalid, for example pod may be using a lot of CPU despite not doing any "actual" work).

To avoid that negative effect only use per pod metrics from pods that are:
- ready (so metrics about them should be valid), or
- unready but creation and last readiness change timestamps are apart more than 10s (pods that have formerly been ready and so metrics are in at least some cases (pod becoming unready because of overload) very useful).


This PR only deals with scale up window. I'll send another PR dealing with scale down window soon.

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:

**Special notes for your reviewer**:

**Release note**:
```release-note
Speed up HPA reaction to metric changes by removing scale up forbidden window.

Scale up forbidden window was protecting HPA against making decision to scale up based on metrics gathered during pod initialisation (which may be invalid, for example pod may be using a lot of CPU despite not doing any "actual" work).

To avoid that negative effect only use per pod metrics from pods that are:
- ready (so metrics about them should be valid), or
- unready but creation and last readiness change timestamps are apart more than 10s (pods that have formerly been ready and so metrics are in at least some cases (pod becoming unready because of overload) very useful).
```
This commit is contained in:
Kubernetes Submit Queue 2018-08-01 15:52:10 -07:00 committed by GitHub
commit c32e0e84da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 588 additions and 435 deletions

View File

@ -93,7 +93,6 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me
replicaCalc,
ctx.InformerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod.Duration,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration,
).Run(ctx.Stop)
return nil, true, nil

View File

@ -40,6 +40,7 @@ func (o *HPAControllerOptions) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&o.HorizontalPodAutoscalerSyncPeriod.Duration, "horizontal-pod-autoscaler-sync-period", o.HorizontalPodAutoscalerSyncPeriod.Duration, "The period for syncing the number of pods in horizontal pod autoscaler.")
fs.DurationVar(&o.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-upscale-delay", o.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration, "The period since last upscale, before another upscale can be performed in horizontal pod autoscaler.")
fs.MarkDeprecated("horizontal-pod-autoscaler-upscale-delay", "This flag is currently no-op and will be deleted.")
fs.DurationVar(&o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-downscale-delay", o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "The period since last downscale, before another downscale can be performed in horizontal pod autoscaler.")
fs.Float64Var(&o.HorizontalPodAutoscalerTolerance, "horizontal-pod-autoscaler-tolerance", o.HorizontalPodAutoscalerTolerance, "The minimum change (from 1.0) in the desired-to-actual metrics ratio for the horizontal pod autoscaler to consider scaling.")
fs.BoolVar(&o.HorizontalPodAutoscalerUseRESTClients, "horizontal-pod-autoscaler-use-rest-clients", o.HorizontalPodAutoscalerUseRESTClients, "If set to true, causes the horizontal pod autoscaler controller to use REST clients through the kube-aggregator, instead of using the legacy metrics client through the API server proxy. This is required for custom metrics support in the horizontal pod autoscaler.")
@ -52,7 +53,6 @@ func (o *HPAControllerOptions) ApplyTo(cfg *componentconfig.HPAControllerConfigu
}
cfg.HorizontalPodAutoscalerSyncPeriod = o.HorizontalPodAutoscalerSyncPeriod
cfg.HorizontalPodAutoscalerUpscaleForbiddenWindow = o.HorizontalPodAutoscalerUpscaleForbiddenWindow
cfg.HorizontalPodAutoscalerDownscaleForbiddenWindow = o.HorizontalPodAutoscalerDownscaleForbiddenWindow
cfg.HorizontalPodAutoscalerTolerance = o.HorizontalPodAutoscalerTolerance
cfg.HorizontalPodAutoscalerUseRESTClients = o.HorizontalPodAutoscalerUseRESTClients

View File

@ -64,7 +64,6 @@ type HorizontalController struct {
replicaCalc *ReplicaCalculator
eventRecorder record.EventRecorder
upscaleForbiddenWindow time.Duration
downscaleForbiddenWindow time.Duration
// hpaLister is able to list/get HPAs from the shared cache from the informer passed in to
@ -85,7 +84,6 @@ func NewHorizontalController(
replicaCalc *ReplicaCalculator,
hpaInformer autoscalinginformers.HorizontalPodAutoscalerInformer,
resyncPeriod time.Duration,
upscaleForbiddenWindow time.Duration,
downscaleForbiddenWindow time.Duration,
) *HorizontalController {
@ -99,7 +97,6 @@ func NewHorizontalController(
eventRecorder: recorder,
scaleNamespacer: scaleNamespacer,
hpaNamespacer: hpaNamespacer,
upscaleForbiddenWindow: upscaleForbiddenWindow,
downscaleForbiddenWindow: downscaleForbiddenWindow,
queue: workqueue.NewNamedRateLimitingQueue(NewDefaultHPARateLimiter(resyncPeriod), "horizontalpodautoscaler"),
mapper: mapper,
@ -246,7 +243,6 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori
setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "InvalidMetricSourceType", "the HPA was unable to compute the replica count: %s", errMsg)
return 0, "", nil, time.Time{}, fmt.Errorf(errMsg)
}
if replicas == 0 || replicaCountProposal > replicas {
timestamp = timestampProposal
replicas = replicaCountProposal
@ -472,6 +468,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho
rescaleReason = "Current number of replicas must be greater than 0"
desiredReplicas = 1
} else {
metricDesiredReplicas, metricName, metricStatuses, metricTimestamp, err = a.computeReplicasForMetrics(hpa, scale, hpa.Spec.Metrics)
if err != nil {
a.setCurrentReplicasInStatus(hpa, currentReplicas)
@ -507,15 +504,6 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho
setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffDownscale", "the time since the previous scale is still within the downscale forbidden window")
backoffDown = true
}
if !hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) {
backoffUp = true
if backoffDown {
setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffBoth", "the time since the previous scale is still within both the downscale and upscale forbidden windows")
} else {
setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffUpscale", "the time since the previous scale is still within the upscale forbidden window")
}
}
}
if !backoffDown && !backoffUp {
@ -634,9 +622,8 @@ func (a *HorizontalController) shouldScale(hpa *autoscalingv2.HorizontalPodAutos
return true
}
// Going up only if the usage ratio increased significantly above the target
// and there was no rescaling in the last upscaleForbiddenWindow.
if desiredReplicas > currentReplicas && hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) {
// Going up only if the usage ratio increased significantly above the target.
if desiredReplicas > currentReplicas {
return true
}

View File

@ -95,7 +95,6 @@ type testCase struct {
minReplicas int32
maxReplicas int32
initialReplicas int32
desiredReplicas int32
// CPU target utilization as a percentage of the requested resources.
CPUTarget int32
@ -111,6 +110,7 @@ type testCase struct {
verifyEvents bool
useMetricsAPI bool
metricsTarget []autoscalingv2.MetricSpec
expectedDesiredReplicas int32
expectedConditions []autoscalingv1.HorizontalPodAutoscalerCondition
// Channel with names of HPA objects which we have reconciled.
processed chan string
@ -316,7 +316,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
obj := action.(core.UpdateAction).GetObject().(*autoscalingv1.HorizontalPodAutoscaler)
assert.Equal(t, namespace, obj.Namespace, "the HPA namespace should be as expected")
assert.Equal(t, hpaName, obj.Name, "the HPA name should be as expected")
assert.Equal(t, tc.desiredReplicas, obj.Status.DesiredReplicas, "the desired replica count reported in the object status should be as expected")
assert.Equal(t, tc.expectedDesiredReplicas, obj.Status.DesiredReplicas, "the desired replica count reported in the object status should be as expected")
if tc.verifyCPUCurrent {
if assert.NotNil(t, obj.Status.CurrentCPUUtilizationPercentage, "the reported CPU utilization percentage should be non-nil") {
assert.Equal(t, tc.CPUCurrent, *obj.Status.CurrentCPUUtilizationPercentage, "the report CPU utilization percentage should be as expected")
@ -411,7 +411,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
obj := action.(core.UpdateAction).GetObject().(*autoscalingv1.Scale)
replicas := action.(core.UpdateAction).GetObject().(*autoscalingv1.Scale).Spec.Replicas
assert.Equal(t, tc.desiredReplicas, replicas, "the replica count of the RC should be as expected")
assert.Equal(t, tc.expectedDesiredReplicas, replicas, "the replica count of the RC should be as expected")
tc.scaleUpdated = true
return true, obj, nil
})
@ -422,7 +422,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
obj := action.(core.UpdateAction).GetObject().(*autoscalingv1.Scale)
replicas := action.(core.UpdateAction).GetObject().(*autoscalingv1.Scale).Spec.Replicas
assert.Equal(t, tc.desiredReplicas, replicas, "the replica count of the deployment should be as expected")
assert.Equal(t, tc.expectedDesiredReplicas, replicas, "the replica count of the deployment should be as expected")
tc.scaleUpdated = true
return true, obj, nil
})
@ -433,7 +433,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
obj := action.(core.UpdateAction).GetObject().(*autoscalingv1.Scale)
replicas := action.(core.UpdateAction).GetObject().(*autoscalingv1.Scale).Spec.Replicas
assert.Equal(t, tc.desiredReplicas, replicas, "the replica count of the replicaset should be as expected")
assert.Equal(t, tc.expectedDesiredReplicas, replicas, "the replica count of the replicaset should be as expected")
tc.scaleUpdated = true
return true, obj, nil
})
@ -583,10 +583,10 @@ func (tc *testCase) verifyResults(t *testing.T) {
tc.Lock()
defer tc.Unlock()
assert.Equal(t, tc.initialReplicas != tc.desiredReplicas, tc.scaleUpdated, "the scale should only be updated if we expected a change in replicas")
assert.Equal(t, tc.initialReplicas != tc.expectedDesiredReplicas, tc.scaleUpdated, "the scale should only be updated if we expected a change in replicas")
assert.True(t, tc.statusUpdated, "the status should have been updated")
if tc.verifyEvents {
assert.Equal(t, tc.initialReplicas != tc.desiredReplicas, tc.eventCreated, "an event should have been created only if we expected a change in replicas")
assert.Equal(t, tc.initialReplicas != tc.expectedDesiredReplicas, tc.eventCreated, "an event should have been created only if we expected a change in replicas")
}
}
@ -622,11 +622,11 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform
if tc.verifyEvents {
switch obj.Reason {
case "SuccessfulRescale":
assert.Equal(t, fmt.Sprintf("New size: %d; reason: cpu resource utilization (percentage of request) above target", tc.desiredReplicas), obj.Message)
assert.Equal(t, fmt.Sprintf("New size: %d; reason: cpu resource utilization (percentage of request) above target", tc.expectedDesiredReplicas), obj.Message)
case "DesiredReplicasComputed":
assert.Equal(t, fmt.Sprintf(
"Computed the desired num of replicas: %d (avgCPUutil: %d, current replicas: %d)",
tc.desiredReplicas,
tc.expectedDesiredReplicas,
(int64(tc.reportedLevels[0])*100)/tc.reportedCPURequests[0].MilliValue(), tc.initialReplicas), obj.Message)
default:
assert.False(t, true, fmt.Sprintf("Unexpected event: %s / %s", obj.Reason, obj.Message))
@ -643,7 +643,6 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform
}
informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
defaultUpscaleForbiddenWindow := 3 * time.Minute
defaultDownscaleForbiddenWindow := 5 * time.Minute
hpaController := NewHorizontalController(
@ -654,7 +653,6 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform
replicaCalc,
informerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
controller.NoResyncPeriodFunc(),
defaultUpscaleForbiddenWindow,
defaultDownscaleForbiddenWindow,
)
hpaController.hpaListerSynced = alwaysReady
@ -691,7 +689,7 @@ func TestScaleUp(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 5,
expectedDesiredReplicas: 5,
CPUTarget: 30,
verifyCPUCurrent: true,
reportedLevels: []uint64{300, 500, 700},
@ -706,7 +704,7 @@ func TestScaleUpUnreadyLessScale(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 4,
expectedDesiredReplicas: 4,
CPUTarget: 30,
CPUCurrent: 60,
verifyCPUCurrent: true,
@ -723,7 +721,7 @@ func TestScaleUpUnreadyNoScale(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 30,
CPUCurrent: 40,
verifyCPUCurrent: true,
@ -745,7 +743,7 @@ func TestScaleUpIgnoresFailedPods(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 2,
desiredReplicas: 4,
expectedDesiredReplicas: 4,
CPUTarget: 30,
CPUCurrent: 60,
verifyCPUCurrent: true,
@ -763,7 +761,7 @@ func TestScaleUpDeployment(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 5,
expectedDesiredReplicas: 5,
CPUTarget: 30,
verifyCPUCurrent: true,
reportedLevels: []uint64{300, 500, 700},
@ -783,7 +781,7 @@ func TestScaleUpReplicaSet(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 5,
expectedDesiredReplicas: 5,
CPUTarget: 30,
verifyCPUCurrent: true,
reportedLevels: []uint64{300, 500, 700},
@ -803,7 +801,7 @@ func TestScaleUpCM(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 4,
expectedDesiredReplicas: 4,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@ -825,7 +823,7 @@ func TestScaleUpCMUnreadyLessScale(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 4,
expectedDesiredReplicas: 4,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@ -848,7 +846,7 @@ func TestScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@ -876,7 +874,7 @@ func TestScaleUpCMObject(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 4,
expectedDesiredReplicas: 4,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@ -902,7 +900,7 @@ func TestScaleUpCMExternal(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 4,
expectedDesiredReplicas: 4,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.ExternalMetricSourceType,
@ -923,7 +921,7 @@ func TestScaleUpPerPodCMExternal(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 4,
expectedDesiredReplicas: 4,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.ExternalMetricSourceType,
@ -944,7 +942,7 @@ func TestScaleDown(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 5,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 50,
verifyCPUCurrent: true,
reportedLevels: []uint64{100, 300, 500, 250, 250},
@ -959,7 +957,7 @@ func TestScaleDownCM(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 5,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@ -981,7 +979,7 @@ func TestScaleDownCMObject(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 5,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@ -1008,7 +1006,7 @@ func TestScaleDownCMExternal(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 5,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.ExternalMetricSourceType,
@ -1029,7 +1027,7 @@ func TestScaleDownPerPodCMExternal(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 5,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.ExternalMetricSourceType,
@ -1050,7 +1048,7 @@ func TestScaleDownIgnoresUnreadyPods(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 5,
desiredReplicas: 2,
expectedDesiredReplicas: 2,
CPUTarget: 50,
CPUCurrent: 30,
verifyCPUCurrent: true,
@ -1067,7 +1065,7 @@ func TestScaleDownIgnoresFailedPods(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 5,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 50,
CPUCurrent: 28,
verifyCPUCurrent: true,
@ -1085,7 +1083,7 @@ func TestTolerance(t *testing.T) {
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 100,
reportedLevels: []uint64{1010, 1030, 1020},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
@ -1104,7 +1102,7 @@ func TestToleranceCM(t *testing.T) {
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.PodsMetricSourceType,
@ -1130,7 +1128,7 @@ func TestToleranceCMObject(t *testing.T) {
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.ObjectMetricSourceType,
@ -1161,7 +1159,7 @@ func TestToleranceCMExternal(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 4,
desiredReplicas: 4,
expectedDesiredReplicas: 4,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.ExternalMetricSourceType,
@ -1187,7 +1185,7 @@ func TestTolerancePerPodCMExternal(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 4,
desiredReplicas: 4,
expectedDesiredReplicas: 4,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.ExternalMetricSourceType,
@ -1213,7 +1211,7 @@ func TestMinReplicas(t *testing.T) {
minReplicas: 2,
maxReplicas: 5,
initialReplicas: 3,
desiredReplicas: 2,
expectedDesiredReplicas: 2,
CPUTarget: 90,
reportedLevels: []uint64{10, 95, 10},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
@ -1232,7 +1230,7 @@ func TestMinReplicasDesiredZero(t *testing.T) {
minReplicas: 2,
maxReplicas: 5,
initialReplicas: 3,
desiredReplicas: 2,
expectedDesiredReplicas: 2,
CPUTarget: 90,
reportedLevels: []uint64{0, 0, 0},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
@ -1251,7 +1249,7 @@ func TestZeroReplicas(t *testing.T) {
minReplicas: 3,
maxReplicas: 5,
initialReplicas: 0,
desiredReplicas: 0,
expectedDesiredReplicas: 0,
CPUTarget: 90,
reportedLevels: []uint64{},
reportedCPURequests: []resource.Quantity{},
@ -1269,7 +1267,7 @@ func TestTooFewReplicas(t *testing.T) {
minReplicas: 3,
maxReplicas: 5,
initialReplicas: 2,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 90,
reportedLevels: []uint64{},
reportedCPURequests: []resource.Quantity{},
@ -1286,7 +1284,7 @@ func TestTooManyReplicas(t *testing.T) {
minReplicas: 3,
maxReplicas: 5,
initialReplicas: 10,
desiredReplicas: 5,
expectedDesiredReplicas: 5,
CPUTarget: 90,
reportedLevels: []uint64{},
reportedCPURequests: []resource.Quantity{},
@ -1303,7 +1301,7 @@ func TestMaxReplicas(t *testing.T) {
minReplicas: 2,
maxReplicas: 5,
initialReplicas: 3,
desiredReplicas: 5,
expectedDesiredReplicas: 5,
CPUTarget: 90,
reportedLevels: []uint64{8000, 9500, 1000},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
@ -1322,7 +1320,7 @@ func TestSuperfluousMetrics(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 4,
desiredReplicas: 6,
expectedDesiredReplicas: 6,
CPUTarget: 100,
reportedLevels: []uint64{4000, 9500, 3000, 7000, 3200, 2000},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
@ -1341,7 +1339,7 @@ func TestMissingMetrics(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 4,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 100,
reportedLevels: []uint64{400, 95},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
@ -1355,7 +1353,7 @@ func TestEmptyMetrics(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 4,
desiredReplicas: 4,
expectedDesiredReplicas: 4,
CPUTarget: 100,
reportedLevels: []uint64{},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
@ -1373,7 +1371,7 @@ func TestEmptyCPURequest(t *testing.T) {
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 1,
desiredReplicas: 1,
expectedDesiredReplicas: 1,
CPUTarget: 100,
reportedLevels: []uint64{200},
reportedCPURequests: []resource.Quantity{},
@ -1391,7 +1389,7 @@ func TestEventCreated(t *testing.T) {
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 1,
desiredReplicas: 2,
expectedDesiredReplicas: 2,
CPUTarget: 50,
reportedLevels: []uint64{200},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.2")},
@ -1406,7 +1404,7 @@ func TestEventNotCreated(t *testing.T) {
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 2,
desiredReplicas: 2,
expectedDesiredReplicas: 2,
CPUTarget: 50,
reportedLevels: []uint64{200, 200},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.4"), resource.MustParse("0.4")},
@ -1426,7 +1424,7 @@ func TestMissingReports(t *testing.T) {
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 4,
desiredReplicas: 2,
expectedDesiredReplicas: 2,
CPUTarget: 50,
reportedLevels: []uint64{200},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.2")},
@ -1440,7 +1438,7 @@ func TestUpscaleCap(t *testing.T) {
minReplicas: 1,
maxReplicas: 100,
initialReplicas: 3,
desiredReplicas: 24,
expectedDesiredReplicas: 24,
CPUTarget: 10,
reportedLevels: []uint64{100, 200, 300},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.1"), resource.MustParse("0.1"), resource.MustParse("0.1")},
@ -1459,8 +1457,8 @@ func TestUpscaleCapGreaterThanMaxReplicas(t *testing.T) {
minReplicas: 1,
maxReplicas: 20,
initialReplicas: 3,
// desiredReplicas would be 24 without maxReplicas
desiredReplicas: 20,
// expectedDesiredReplicas would be 24 without maxReplicas
expectedDesiredReplicas: 20,
CPUTarget: 10,
reportedLevels: []uint64{100, 200, 300},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.1"), resource.MustParse("0.1"), resource.MustParse("0.1")},
@ -1479,7 +1477,7 @@ func TestConditionInvalidSelectorMissing(t *testing.T) {
minReplicas: 1,
maxReplicas: 100,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 10,
reportedLevels: []uint64{100, 200, 300},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.1"), resource.MustParse("0.1"), resource.MustParse("0.1")},
@ -1524,7 +1522,7 @@ func TestConditionInvalidSelectorUnparsable(t *testing.T) {
minReplicas: 1,
maxReplicas: 100,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 10,
reportedLevels: []uint64{100, 200, 300},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.1"), resource.MustParse("0.1"), resource.MustParse("0.1")},
@ -1608,7 +1606,7 @@ func TestConditionFailedGetMetrics(t *testing.T) {
minReplicas: 1,
maxReplicas: 100,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 10,
reportedLevels: []uint64{100, 200, 300},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.1"), resource.MustParse("0.1"), resource.MustParse("0.1")},
@ -1648,7 +1646,7 @@ func TestConditionInvalidSourceType(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@ -1677,7 +1675,7 @@ func TestConditionFailedGetScale(t *testing.T) {
minReplicas: 1,
maxReplicas: 100,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 10,
reportedLevels: []uint64{100, 200, 300},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.1"), resource.MustParse("0.1"), resource.MustParse("0.1")},
@ -1706,7 +1704,7 @@ func TestConditionFailedUpdateScale(t *testing.T) {
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 100,
reportedLevels: []uint64{150, 150, 150},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.1"), resource.MustParse("0.1"), resource.MustParse("0.1")},
@ -1728,13 +1726,13 @@ func TestConditionFailedUpdateScale(t *testing.T) {
tc.runTest(t)
}
func TestBackoffUpscale(t *testing.T) {
func NoTestBackoffUpscale(t *testing.T) {
time := metav1.Time{Time: time.Now()}
tc := testCase{
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 100,
reportedLevels: []uint64{150, 150, 150},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.1"), resource.MustParse("0.1"), resource.MustParse("0.1")},
@ -1746,8 +1744,84 @@ func TestBackoffUpscale(t *testing.T) {
Reason: "ReadyForNewScale",
}, autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: "SucceededRescale",
}),
}
tc.runTest(t)
}
func TestNoBackoffUpscaleCM(t *testing.T) {
time := metav1.Time{Time: time.Now()}
tc := testCase{
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 3,
expectedDesiredReplicas: 4,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.PodsMetricSourceType,
Pods: &autoscalingv2.PodsMetricSource{
MetricName: "qps",
TargetAverageValue: resource.MustParse("15.0"),
},
},
},
reportedLevels: []uint64{20000, 10000, 30000},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
//useMetricsAPI: true,
lastScaleTime: &time,
expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: "ReadyForNewScale",
}, autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: "SucceededRescale",
}, autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.ScalingLimited,
Status: v1.ConditionFalse,
Reason: "BackoffBoth",
Reason: "DesiredWithinRange",
}),
}
tc.runTest(t)
}
func TestNoBackoffUpscaleCMNoBackoffCpu(t *testing.T) {
time := metav1.Time{Time: time.Now()}
tc := testCase{
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 3,
expectedDesiredReplicas: 5,
CPUTarget: 10,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.PodsMetricSourceType,
Pods: &autoscalingv2.PodsMetricSource{
MetricName: "qps",
TargetAverageValue: resource.MustParse("15.0"),
},
},
},
reportedLevels: []uint64{20000, 10000, 30000},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
useMetricsAPI: true,
lastScaleTime: &time,
expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: "ReadyForNewScale",
}, autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: "SucceededRescale",
}, autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.ScalingLimited,
Status: v1.ConditionTrue,
Reason: "TooManyReplicas",
}),
}
tc.runTest(t)
@ -1759,7 +1833,7 @@ func TestBackoffDownscale(t *testing.T) {
minReplicas: 1,
maxReplicas: 5,
initialReplicas: 4,
desiredReplicas: 4,
expectedDesiredReplicas: 4,
CPUTarget: 100,
reportedLevels: []uint64{50, 50, 50},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.1"), resource.MustParse("0.1"), resource.MustParse("0.1")},
@ -1806,7 +1880,7 @@ func TestComputedToleranceAlgImplementation(t *testing.T) {
minReplicas: 0,
maxReplicas: 1000,
initialReplicas: startPods,
desiredReplicas: finalPods,
expectedDesiredReplicas: finalPods,
CPUTarget: finalCPUPercentTarget,
reportedLevels: []uint64{
totalUsedCPUOfAllPods / 10,
@ -1843,7 +1917,7 @@ func TestComputedToleranceAlgImplementation(t *testing.T) {
finalCPUPercentTarget = int32(target * 100)
tc.CPUTarget = finalCPUPercentTarget
tc.initialReplicas = startPods
tc.desiredReplicas = startPods
tc.expectedDesiredReplicas = startPods
tc.expectedConditions = statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
@ -1858,7 +1932,7 @@ func TestScaleUpRCImmediately(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 1,
desiredReplicas: 2,
expectedDesiredReplicas: 2,
verifyCPUCurrent: false,
reportedLevels: []uint64{0, 0, 0, 0},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
@ -1877,7 +1951,7 @@ func TestScaleDownRCImmediately(t *testing.T) {
minReplicas: 2,
maxReplicas: 5,
initialReplicas: 6,
desiredReplicas: 5,
expectedDesiredReplicas: 5,
CPUTarget: 50,
reportedLevels: []uint64{8000, 9500, 1000},
reportedCPURequests: []resource.Quantity{resource.MustParse("0.9"), resource.MustParse("1.0"), resource.MustParse("1.1")},
@ -1895,7 +1969,7 @@ func TestAvoidUncessaryUpdates(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 30,
CPUCurrent: 40,
verifyCPUCurrent: true,
@ -1962,7 +2036,7 @@ func TestAvoidUncessaryUpdates(t *testing.T) {
func TestConvertDesiredReplicasWithRules(t *testing.T) {
conversionTestCases := []struct {
currentReplicas int32
desiredReplicas int32
expectedDesiredReplicas int32
hpaMinReplicas int32
hpaMaxReplicas int32
expectedConvertedDesiredReplicas int32
@ -1971,7 +2045,7 @@ func TestConvertDesiredReplicasWithRules(t *testing.T) {
}{
{
currentReplicas: 5,
desiredReplicas: 7,
expectedDesiredReplicas: 7,
hpaMinReplicas: 3,
hpaMaxReplicas: 8,
expectedConvertedDesiredReplicas: 7,
@ -1980,7 +2054,7 @@ func TestConvertDesiredReplicasWithRules(t *testing.T) {
},
{
currentReplicas: 3,
desiredReplicas: 1,
expectedDesiredReplicas: 1,
hpaMinReplicas: 2,
hpaMaxReplicas: 8,
expectedConvertedDesiredReplicas: 2,
@ -1989,7 +2063,7 @@ func TestConvertDesiredReplicasWithRules(t *testing.T) {
},
{
currentReplicas: 1,
desiredReplicas: 0,
expectedDesiredReplicas: 0,
hpaMinReplicas: 0,
hpaMaxReplicas: 10,
expectedConvertedDesiredReplicas: 1,
@ -1998,7 +2072,7 @@ func TestConvertDesiredReplicasWithRules(t *testing.T) {
},
{
currentReplicas: 20,
desiredReplicas: 1000,
expectedDesiredReplicas: 1000,
hpaMinReplicas: 1,
hpaMaxReplicas: 10,
expectedConvertedDesiredReplicas: 10,
@ -2007,7 +2081,7 @@ func TestConvertDesiredReplicasWithRules(t *testing.T) {
},
{
currentReplicas: 3,
desiredReplicas: 1000,
expectedDesiredReplicas: 1000,
hpaMinReplicas: 1,
hpaMaxReplicas: 2000,
expectedConvertedDesiredReplicas: calculateScaleUpLimit(3),
@ -2018,7 +2092,7 @@ func TestConvertDesiredReplicasWithRules(t *testing.T) {
for _, ctc := range conversionTestCases {
actualConvertedDesiredReplicas, actualCondition, _ := convertDesiredReplicasWithRules(
ctc.currentReplicas, ctc.desiredReplicas, ctc.hpaMinReplicas, ctc.hpaMaxReplicas,
ctc.currentReplicas, ctc.expectedDesiredReplicas, ctc.hpaMinReplicas, ctc.hpaMaxReplicas,
)
assert.Equal(t, ctc.expectedConvertedDesiredReplicas, actualConvertedDesiredReplicas, ctc.annotation)

View File

@ -491,7 +491,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
}
informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
defaultUpscaleForbiddenWindow := 3 * time.Minute
defaultDownscaleForbiddenWindow := 5 * time.Minute
hpaController := NewHorizontalController(
@ -502,7 +501,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
replicaCalc,
informerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
controller.NoResyncPeriodFunc(),
defaultUpscaleForbiddenWindow,
defaultDownscaleForbiddenWindow,
)
hpaController.hpaListerSynced = alwaysReady

View File

@ -35,6 +35,10 @@ const (
// defaultTestingTolerance is default value for calculating when to
// scale up/scale down.
defaultTestingTolerance = 0.1
// Pod begins existence as unready. If pod is unready and timestamp of last pod readiness change is
// less than maxDelayOfInitialReadinessStatus after pod start we assume it has never been ready.
maxDelayOfInitialReadinessStatus = 10 * time.Second
)
type ReplicaCalculator struct {
@ -205,7 +209,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
missingPods := sets.NewString()
for _, pod := range podList.Items {
if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) {
if pod.Status.Phase != v1.PodRunning || !hasPodBeenReadyBefore(&pod) {
// save this pod name for later, but pretend it doesn't exist for now
unreadyPods.Insert(pod.Name)
delete(metrics, pod.Name)
@ -381,3 +385,22 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3
utilization = int64(math.Ceil(float64(utilization) / float64(currentReplicas)))
return replicaCount, utilization, timestamp, nil
}
// hasPodBeenReadyBefore returns true if the pod is ready or if it's not ready
func hasPodBeenReadyBefore(pod *v1.Pod) bool {
_, readyCondition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
if readyCondition == nil {
return false
}
if readyCondition.Status == v1.ConditionTrue {
return true
}
lastReady := readyCondition.LastTransitionTime.Time
if pod.Status.StartTime == nil {
return false
}
started := pod.Status.StartTime.Time
// If last status change was longer than maxDelayOfInitialReadinessStatus after the pod was
// created assume it was ready in the past.
return lastReady.After(started.Add(maxDelayOfInitialReadinessStatus))
}

View File

@ -1069,4 +1069,76 @@ func TestReplicaCalcComputedToleranceAlgImplementation(t *testing.T) {
tc.runTest(t)
}
func TestHasPodBeenReadyBefore(t *testing.T) {
tests := []struct {
name string
conditions []v1.PodCondition
started time.Time
expected bool
}{
{
"initially unready",
[]v1.PodCondition{
{
Type: v1.PodReady,
LastTransitionTime: metav1.Time{
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
},
Status: v1.ConditionFalse,
},
},
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
false,
},
{
"currently unready",
[]v1.PodCondition{
{
Type: v1.PodReady,
LastTransitionTime: metav1.Time{
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
},
Status: v1.ConditionFalse,
},
},
metav1.Date(2018, 7, 25, 17, 0, 0, 0, time.UTC).Time,
true,
},
{
"currently ready",
[]v1.PodCondition{
{
Type: v1.PodReady,
LastTransitionTime: metav1.Time{
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
},
Status: v1.ConditionTrue,
},
},
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
true,
},
{
"no ready status",
[]v1.PodCondition{},
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
false,
},
}
for _, tc := range tests {
pod := &v1.Pod{
Status: v1.PodStatus{
Conditions: tc.conditions,
StartTime: &metav1.Time{
Time: tc.started,
},
},
}
got := hasPodBeenReadyBefore(pod)
if got != tc.expected {
t.Errorf("[TestHasPodBeenReadyBefore.%s] got %v, want %v", tc.name, got, tc.expected)
}
}
}
// TODO: add more tests