mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Merge pull request #66615 from jbartosik/cpu-warm-up
Automatic merge from submit-queue (batch tested with PRs 65730, 66615, 66684, 66519, 66510). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Speed up HPA reaction to metric changes by removing scale up forbidden window **What this PR does / why we need it**: Speed up HPA reaction to metric changes by removing scale up forbidden window. Scale up forbidden window was protecting HPA against making decision to scale up based on metrics gathered during pod initialisation (which may be invalid, for example pod may be using a lot of CPU despite not doing any "actual" work). To avoid that negative effect only use per pod metrics from pods that are: - ready (so metrics about them should be valid), or - unready but creation and last readiness change timestamps are apart more than 10s (pods that have formerly been ready and so metrics are in at least some cases (pod becoming unready because of overload) very useful). This PR only deals with scale up window. I'll send another PR dealing with scale down window soon. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: **Special notes for your reviewer**: **Release note**: ```release-note Speed up HPA reaction to metric changes by removing scale up forbidden window. Scale up forbidden window was protecting HPA against making decision to scale up based on metrics gathered during pod initialisation (which may be invalid, for example pod may be using a lot of CPU despite not doing any "actual" work). To avoid that negative effect only use per pod metrics from pods that are: - ready (so metrics about them should be valid), or - unready but creation and last readiness change timestamps are apart more than 10s (pods that have formerly been ready and so metrics are in at least some cases (pod becoming unready because of overload) very useful). ```
This commit is contained in:
commit
c32e0e84da
@ -93,7 +93,6 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me
|
||||
replicaCalc,
|
||||
ctx.InformerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
|
||||
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod.Duration,
|
||||
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration,
|
||||
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration,
|
||||
).Run(ctx.Stop)
|
||||
return nil, true, nil
|
||||
|
@ -40,6 +40,7 @@ func (o *HPAControllerOptions) AddFlags(fs *pflag.FlagSet) {
|
||||
|
||||
fs.DurationVar(&o.HorizontalPodAutoscalerSyncPeriod.Duration, "horizontal-pod-autoscaler-sync-period", o.HorizontalPodAutoscalerSyncPeriod.Duration, "The period for syncing the number of pods in horizontal pod autoscaler.")
|
||||
fs.DurationVar(&o.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-upscale-delay", o.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration, "The period since last upscale, before another upscale can be performed in horizontal pod autoscaler.")
|
||||
fs.MarkDeprecated("horizontal-pod-autoscaler-upscale-delay", "This flag is currently no-op and will be deleted.")
|
||||
fs.DurationVar(&o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-downscale-delay", o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "The period since last downscale, before another downscale can be performed in horizontal pod autoscaler.")
|
||||
fs.Float64Var(&o.HorizontalPodAutoscalerTolerance, "horizontal-pod-autoscaler-tolerance", o.HorizontalPodAutoscalerTolerance, "The minimum change (from 1.0) in the desired-to-actual metrics ratio for the horizontal pod autoscaler to consider scaling.")
|
||||
fs.BoolVar(&o.HorizontalPodAutoscalerUseRESTClients, "horizontal-pod-autoscaler-use-rest-clients", o.HorizontalPodAutoscalerUseRESTClients, "If set to true, causes the horizontal pod autoscaler controller to use REST clients through the kube-aggregator, instead of using the legacy metrics client through the API server proxy. This is required for custom metrics support in the horizontal pod autoscaler.")
|
||||
@ -52,7 +53,6 @@ func (o *HPAControllerOptions) ApplyTo(cfg *componentconfig.HPAControllerConfigu
|
||||
}
|
||||
|
||||
cfg.HorizontalPodAutoscalerSyncPeriod = o.HorizontalPodAutoscalerSyncPeriod
|
||||
cfg.HorizontalPodAutoscalerUpscaleForbiddenWindow = o.HorizontalPodAutoscalerUpscaleForbiddenWindow
|
||||
cfg.HorizontalPodAutoscalerDownscaleForbiddenWindow = o.HorizontalPodAutoscalerDownscaleForbiddenWindow
|
||||
cfg.HorizontalPodAutoscalerTolerance = o.HorizontalPodAutoscalerTolerance
|
||||
cfg.HorizontalPodAutoscalerUseRESTClients = o.HorizontalPodAutoscalerUseRESTClients
|
||||
|
@ -64,7 +64,6 @@ type HorizontalController struct {
|
||||
replicaCalc *ReplicaCalculator
|
||||
eventRecorder record.EventRecorder
|
||||
|
||||
upscaleForbiddenWindow time.Duration
|
||||
downscaleForbiddenWindow time.Duration
|
||||
|
||||
// hpaLister is able to list/get HPAs from the shared cache from the informer passed in to
|
||||
@ -85,7 +84,6 @@ func NewHorizontalController(
|
||||
replicaCalc *ReplicaCalculator,
|
||||
hpaInformer autoscalinginformers.HorizontalPodAutoscalerInformer,
|
||||
resyncPeriod time.Duration,
|
||||
upscaleForbiddenWindow time.Duration,
|
||||
downscaleForbiddenWindow time.Duration,
|
||||
|
||||
) *HorizontalController {
|
||||
@ -99,7 +97,6 @@ func NewHorizontalController(
|
||||
eventRecorder: recorder,
|
||||
scaleNamespacer: scaleNamespacer,
|
||||
hpaNamespacer: hpaNamespacer,
|
||||
upscaleForbiddenWindow: upscaleForbiddenWindow,
|
||||
downscaleForbiddenWindow: downscaleForbiddenWindow,
|
||||
queue: workqueue.NewNamedRateLimitingQueue(NewDefaultHPARateLimiter(resyncPeriod), "horizontalpodautoscaler"),
|
||||
mapper: mapper,
|
||||
@ -246,7 +243,6 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori
|
||||
setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "InvalidMetricSourceType", "the HPA was unable to compute the replica count: %s", errMsg)
|
||||
return 0, "", nil, time.Time{}, fmt.Errorf(errMsg)
|
||||
}
|
||||
|
||||
if replicas == 0 || replicaCountProposal > replicas {
|
||||
timestamp = timestampProposal
|
||||
replicas = replicaCountProposal
|
||||
@ -472,6 +468,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho
|
||||
rescaleReason = "Current number of replicas must be greater than 0"
|
||||
desiredReplicas = 1
|
||||
} else {
|
||||
|
||||
metricDesiredReplicas, metricName, metricStatuses, metricTimestamp, err = a.computeReplicasForMetrics(hpa, scale, hpa.Spec.Metrics)
|
||||
if err != nil {
|
||||
a.setCurrentReplicasInStatus(hpa, currentReplicas)
|
||||
@ -507,15 +504,6 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho
|
||||
setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffDownscale", "the time since the previous scale is still within the downscale forbidden window")
|
||||
backoffDown = true
|
||||
}
|
||||
|
||||
if !hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) {
|
||||
backoffUp = true
|
||||
if backoffDown {
|
||||
setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffBoth", "the time since the previous scale is still within both the downscale and upscale forbidden windows")
|
||||
} else {
|
||||
setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffUpscale", "the time since the previous scale is still within the upscale forbidden window")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !backoffDown && !backoffUp {
|
||||
@ -634,9 +622,8 @@ func (a *HorizontalController) shouldScale(hpa *autoscalingv2.HorizontalPodAutos
|
||||
return true
|
||||
}
|
||||
|
||||
// Going up only if the usage ratio increased significantly above the target
|
||||
// and there was no rescaling in the last upscaleForbiddenWindow.
|
||||
if desiredReplicas > currentReplicas && hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) {
|
||||
// Going up only if the usage ratio increased significantly above the target.
|
||||
if desiredReplicas > currentReplicas {
|
||||
return true
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -491,7 +491,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
|
||||
}
|
||||
|
||||
informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
|
||||
defaultUpscaleForbiddenWindow := 3 * time.Minute
|
||||
defaultDownscaleForbiddenWindow := 5 * time.Minute
|
||||
|
||||
hpaController := NewHorizontalController(
|
||||
@ -502,7 +501,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
|
||||
replicaCalc,
|
||||
informerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
|
||||
controller.NoResyncPeriodFunc(),
|
||||
defaultUpscaleForbiddenWindow,
|
||||
defaultDownscaleForbiddenWindow,
|
||||
)
|
||||
hpaController.hpaListerSynced = alwaysReady
|
||||
|
@ -35,6 +35,10 @@ const (
|
||||
// defaultTestingTolerance is default value for calculating when to
|
||||
// scale up/scale down.
|
||||
defaultTestingTolerance = 0.1
|
||||
|
||||
// Pod begins existence as unready. If pod is unready and timestamp of last pod readiness change is
|
||||
// less than maxDelayOfInitialReadinessStatus after pod start we assume it has never been ready.
|
||||
maxDelayOfInitialReadinessStatus = 10 * time.Second
|
||||
)
|
||||
|
||||
type ReplicaCalculator struct {
|
||||
@ -205,7 +209,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
|
||||
missingPods := sets.NewString()
|
||||
|
||||
for _, pod := range podList.Items {
|
||||
if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) {
|
||||
if pod.Status.Phase != v1.PodRunning || !hasPodBeenReadyBefore(&pod) {
|
||||
// save this pod name for later, but pretend it doesn't exist for now
|
||||
unreadyPods.Insert(pod.Name)
|
||||
delete(metrics, pod.Name)
|
||||
@ -381,3 +385,22 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3
|
||||
utilization = int64(math.Ceil(float64(utilization) / float64(currentReplicas)))
|
||||
return replicaCount, utilization, timestamp, nil
|
||||
}
|
||||
|
||||
// hasPodBeenReadyBefore returns true if the pod is ready or if it's not ready
|
||||
func hasPodBeenReadyBefore(pod *v1.Pod) bool {
|
||||
_, readyCondition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
|
||||
if readyCondition == nil {
|
||||
return false
|
||||
}
|
||||
if readyCondition.Status == v1.ConditionTrue {
|
||||
return true
|
||||
}
|
||||
lastReady := readyCondition.LastTransitionTime.Time
|
||||
if pod.Status.StartTime == nil {
|
||||
return false
|
||||
}
|
||||
started := pod.Status.StartTime.Time
|
||||
// If last status change was longer than maxDelayOfInitialReadinessStatus after the pod was
|
||||
// created assume it was ready in the past.
|
||||
return lastReady.After(started.Add(maxDelayOfInitialReadinessStatus))
|
||||
}
|
||||
|
@ -1069,4 +1069,76 @@ func TestReplicaCalcComputedToleranceAlgImplementation(t *testing.T) {
|
||||
tc.runTest(t)
|
||||
}
|
||||
|
||||
func TestHasPodBeenReadyBefore(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
conditions []v1.PodCondition
|
||||
started time.Time
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
"initially unready",
|
||||
[]v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
LastTransitionTime: metav1.Time{
|
||||
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
|
||||
},
|
||||
Status: v1.ConditionFalse,
|
||||
},
|
||||
},
|
||||
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
|
||||
false,
|
||||
},
|
||||
{
|
||||
"currently unready",
|
||||
[]v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
LastTransitionTime: metav1.Time{
|
||||
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
|
||||
},
|
||||
Status: v1.ConditionFalse,
|
||||
},
|
||||
},
|
||||
metav1.Date(2018, 7, 25, 17, 0, 0, 0, time.UTC).Time,
|
||||
true,
|
||||
},
|
||||
{
|
||||
"currently ready",
|
||||
[]v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
LastTransitionTime: metav1.Time{
|
||||
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
|
||||
},
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
|
||||
true,
|
||||
},
|
||||
{
|
||||
"no ready status",
|
||||
[]v1.PodCondition{},
|
||||
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
|
||||
false,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
pod := &v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Conditions: tc.conditions,
|
||||
StartTime: &metav1.Time{
|
||||
Time: tc.started,
|
||||
},
|
||||
},
|
||||
}
|
||||
got := hasPodBeenReadyBefore(pod)
|
||||
if got != tc.expected {
|
||||
t.Errorf("[TestHasPodBeenReadyBefore.%s] got %v, want %v", tc.name, got, tc.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: add more tests
|
||||
|
Loading…
Reference in New Issue
Block a user