Merge pull request #66615 from jbartosik/cpu-warm-up

Automatic merge from submit-queue (batch tested with PRs 65730, 66615, 66684, 66519, 66510). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Speed up HPA reaction to metric changes by removing scale up forbidden window

**What this PR does / why we need it**:
Speed up HPA reaction to metric changes by removing scale up forbidden window.

Scale up forbidden window was protecting HPA against making decision to scale up based on metrics gathered during pod initialisation (which may be invalid, for example pod may be using a lot of CPU despite not doing any "actual" work).

To avoid that negative effect only use per pod metrics from pods that are:
- ready (so metrics about them should be valid), or
- unready but creation and last readiness change timestamps are apart more than 10s (pods that have formerly been ready and so metrics are in at least some cases (pod becoming unready because of overload) very useful).


This PR only deals with scale up window. I'll send another PR dealing with scale down window soon.

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:

**Special notes for your reviewer**:

**Release note**:
```release-note
Speed up HPA reaction to metric changes by removing scale up forbidden window.

Scale up forbidden window was protecting HPA against making decision to scale up based on metrics gathered during pod initialisation (which may be invalid, for example pod may be using a lot of CPU despite not doing any "actual" work).

To avoid that negative effect only use per pod metrics from pods that are:
- ready (so metrics about them should be valid), or
- unready but creation and last readiness change timestamps are apart more than 10s (pods that have formerly been ready and so metrics are in at least some cases (pod becoming unready because of overload) very useful).
```
This commit is contained in:
Kubernetes Submit Queue 2018-08-01 15:52:10 -07:00 committed by GitHub
commit c32e0e84da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 588 additions and 435 deletions

View File

@ -93,7 +93,6 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me
replicaCalc,
ctx.InformerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod.Duration,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration,
).Run(ctx.Stop)
return nil, true, nil

View File

@ -40,6 +40,7 @@ func (o *HPAControllerOptions) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&o.HorizontalPodAutoscalerSyncPeriod.Duration, "horizontal-pod-autoscaler-sync-period", o.HorizontalPodAutoscalerSyncPeriod.Duration, "The period for syncing the number of pods in horizontal pod autoscaler.")
fs.DurationVar(&o.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-upscale-delay", o.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration, "The period since last upscale, before another upscale can be performed in horizontal pod autoscaler.")
fs.MarkDeprecated("horizontal-pod-autoscaler-upscale-delay", "This flag is currently no-op and will be deleted.")
fs.DurationVar(&o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-downscale-delay", o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "The period since last downscale, before another downscale can be performed in horizontal pod autoscaler.")
fs.Float64Var(&o.HorizontalPodAutoscalerTolerance, "horizontal-pod-autoscaler-tolerance", o.HorizontalPodAutoscalerTolerance, "The minimum change (from 1.0) in the desired-to-actual metrics ratio for the horizontal pod autoscaler to consider scaling.")
fs.BoolVar(&o.HorizontalPodAutoscalerUseRESTClients, "horizontal-pod-autoscaler-use-rest-clients", o.HorizontalPodAutoscalerUseRESTClients, "If set to true, causes the horizontal pod autoscaler controller to use REST clients through the kube-aggregator, instead of using the legacy metrics client through the API server proxy. This is required for custom metrics support in the horizontal pod autoscaler.")
@ -52,7 +53,6 @@ func (o *HPAControllerOptions) ApplyTo(cfg *componentconfig.HPAControllerConfigu
}
cfg.HorizontalPodAutoscalerSyncPeriod = o.HorizontalPodAutoscalerSyncPeriod
cfg.HorizontalPodAutoscalerUpscaleForbiddenWindow = o.HorizontalPodAutoscalerUpscaleForbiddenWindow
cfg.HorizontalPodAutoscalerDownscaleForbiddenWindow = o.HorizontalPodAutoscalerDownscaleForbiddenWindow
cfg.HorizontalPodAutoscalerTolerance = o.HorizontalPodAutoscalerTolerance
cfg.HorizontalPodAutoscalerUseRESTClients = o.HorizontalPodAutoscalerUseRESTClients

View File

@ -64,7 +64,6 @@ type HorizontalController struct {
replicaCalc *ReplicaCalculator
eventRecorder record.EventRecorder
upscaleForbiddenWindow time.Duration
downscaleForbiddenWindow time.Duration
// hpaLister is able to list/get HPAs from the shared cache from the informer passed in to
@ -85,7 +84,6 @@ func NewHorizontalController(
replicaCalc *ReplicaCalculator,
hpaInformer autoscalinginformers.HorizontalPodAutoscalerInformer,
resyncPeriod time.Duration,
upscaleForbiddenWindow time.Duration,
downscaleForbiddenWindow time.Duration,
) *HorizontalController {
@ -99,7 +97,6 @@ func NewHorizontalController(
eventRecorder: recorder,
scaleNamespacer: scaleNamespacer,
hpaNamespacer: hpaNamespacer,
upscaleForbiddenWindow: upscaleForbiddenWindow,
downscaleForbiddenWindow: downscaleForbiddenWindow,
queue: workqueue.NewNamedRateLimitingQueue(NewDefaultHPARateLimiter(resyncPeriod), "horizontalpodautoscaler"),
mapper: mapper,
@ -246,7 +243,6 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori
setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "InvalidMetricSourceType", "the HPA was unable to compute the replica count: %s", errMsg)
return 0, "", nil, time.Time{}, fmt.Errorf(errMsg)
}
if replicas == 0 || replicaCountProposal > replicas {
timestamp = timestampProposal
replicas = replicaCountProposal
@ -472,6 +468,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho
rescaleReason = "Current number of replicas must be greater than 0"
desiredReplicas = 1
} else {
metricDesiredReplicas, metricName, metricStatuses, metricTimestamp, err = a.computeReplicasForMetrics(hpa, scale, hpa.Spec.Metrics)
if err != nil {
a.setCurrentReplicasInStatus(hpa, currentReplicas)
@ -507,15 +504,6 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho
setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffDownscale", "the time since the previous scale is still within the downscale forbidden window")
backoffDown = true
}
if !hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) {
backoffUp = true
if backoffDown {
setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffBoth", "the time since the previous scale is still within both the downscale and upscale forbidden windows")
} else {
setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffUpscale", "the time since the previous scale is still within the upscale forbidden window")
}
}
}
if !backoffDown && !backoffUp {
@ -634,9 +622,8 @@ func (a *HorizontalController) shouldScale(hpa *autoscalingv2.HorizontalPodAutos
return true
}
// Going up only if the usage ratio increased significantly above the target
// and there was no rescaling in the last upscaleForbiddenWindow.
if desiredReplicas > currentReplicas && hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) {
// Going up only if the usage ratio increased significantly above the target.
if desiredReplicas > currentReplicas {
return true
}

File diff suppressed because it is too large Load Diff

View File

@ -491,7 +491,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
}
informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
defaultUpscaleForbiddenWindow := 3 * time.Minute
defaultDownscaleForbiddenWindow := 5 * time.Minute
hpaController := NewHorizontalController(
@ -502,7 +501,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
replicaCalc,
informerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
controller.NoResyncPeriodFunc(),
defaultUpscaleForbiddenWindow,
defaultDownscaleForbiddenWindow,
)
hpaController.hpaListerSynced = alwaysReady

View File

@ -35,6 +35,10 @@ const (
// defaultTestingTolerance is default value for calculating when to
// scale up/scale down.
defaultTestingTolerance = 0.1
// Pod begins existence as unready. If pod is unready and timestamp of last pod readiness change is
// less than maxDelayOfInitialReadinessStatus after pod start we assume it has never been ready.
maxDelayOfInitialReadinessStatus = 10 * time.Second
)
type ReplicaCalculator struct {
@ -205,7 +209,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
missingPods := sets.NewString()
for _, pod := range podList.Items {
if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) {
if pod.Status.Phase != v1.PodRunning || !hasPodBeenReadyBefore(&pod) {
// save this pod name for later, but pretend it doesn't exist for now
unreadyPods.Insert(pod.Name)
delete(metrics, pod.Name)
@ -381,3 +385,22 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3
utilization = int64(math.Ceil(float64(utilization) / float64(currentReplicas)))
return replicaCount, utilization, timestamp, nil
}
// hasPodBeenReadyBefore returns true if the pod is ready or if it's not ready
func hasPodBeenReadyBefore(pod *v1.Pod) bool {
_, readyCondition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
if readyCondition == nil {
return false
}
if readyCondition.Status == v1.ConditionTrue {
return true
}
lastReady := readyCondition.LastTransitionTime.Time
if pod.Status.StartTime == nil {
return false
}
started := pod.Status.StartTime.Time
// If last status change was longer than maxDelayOfInitialReadinessStatus after the pod was
// created assume it was ready in the past.
return lastReady.After(started.Add(maxDelayOfInitialReadinessStatus))
}

View File

@ -1069,4 +1069,76 @@ func TestReplicaCalcComputedToleranceAlgImplementation(t *testing.T) {
tc.runTest(t)
}
func TestHasPodBeenReadyBefore(t *testing.T) {
tests := []struct {
name string
conditions []v1.PodCondition
started time.Time
expected bool
}{
{
"initially unready",
[]v1.PodCondition{
{
Type: v1.PodReady,
LastTransitionTime: metav1.Time{
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
},
Status: v1.ConditionFalse,
},
},
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
false,
},
{
"currently unready",
[]v1.PodCondition{
{
Type: v1.PodReady,
LastTransitionTime: metav1.Time{
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
},
Status: v1.ConditionFalse,
},
},
metav1.Date(2018, 7, 25, 17, 0, 0, 0, time.UTC).Time,
true,
},
{
"currently ready",
[]v1.PodCondition{
{
Type: v1.PodReady,
LastTransitionTime: metav1.Time{
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
},
Status: v1.ConditionTrue,
},
},
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
true,
},
{
"no ready status",
[]v1.PodCondition{},
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
false,
},
}
for _, tc := range tests {
pod := &v1.Pod{
Status: v1.PodStatus{
Conditions: tc.conditions,
StartTime: &metav1.Time{
Time: tc.started,
},
},
}
got := hasPodBeenReadyBefore(pod)
if got != tc.expected {
t.Errorf("[TestHasPodBeenReadyBefore.%s] got %v, want %v", tc.name, got, tc.expected)
}
}
}
// TODO: add more tests