mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-01-05 07:27:21 +00:00
Merge pull request #67252 from jbartosik/metric-sanitization
Automatic merge from submit-queue (batch tested with PRs 66916, 67252, 67794, 67619, 67328). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Fix HPA sample sanitization **What this PR does / why we need it**: @mwielgus pointed out a case when HPA fails as a result of my changes to HPA algorithm: - Have pods that use a lot of CPU during initilization, become ready right after they initialize, - Trigger a scale up, - When new pods become ready will will count their usage (even though it's not related to any work that needs doing), - This triggers another scale up, even though existing pods can handle work, no problem. The fix is: - Use all samples for non-cpu metrics. - Only use CPU samples if: - Pod is ready and was started more than 2 minutes ago, or - Pod is unready and last readiness change happened more than 10s after it was started. Reasoning behind this in: https://docs.google.com/document/d/1UdtYedhmCxjaJIQi6hwJMY0eHQQKxlVD8lSHZC1BPOA/edit **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: **Special notes for your reviewer**: **Release note**: ```release-note Replace scale up forbidden window with disregarding CPU samples collected when pod was initializing. ```
This commit is contained in:
@@ -84,6 +84,8 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me
|
||||
metricsClient,
|
||||
hpaClient.CoreV1(),
|
||||
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerTolerance,
|
||||
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod.Duration,
|
||||
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay.Duration,
|
||||
)
|
||||
go podautoscaler.NewHorizontalController(
|
||||
hpaClient.CoreV1(),
|
||||
|
||||
@@ -30,6 +30,8 @@ type HPAControllerOptions struct {
|
||||
HorizontalPodAutoscalerDownscaleForbiddenWindow metav1.Duration
|
||||
HorizontalPodAutoscalerUpscaleForbiddenWindow metav1.Duration
|
||||
HorizontalPodAutoscalerSyncPeriod metav1.Duration
|
||||
HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration
|
||||
HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration
|
||||
}
|
||||
|
||||
// AddFlags adds flags related to HPAController for controller manager to the specified FlagSet.
|
||||
@@ -44,6 +46,8 @@ func (o *HPAControllerOptions) AddFlags(fs *pflag.FlagSet) {
|
||||
fs.DurationVar(&o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-downscale-delay", o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "The period since last downscale, before another downscale can be performed in horizontal pod autoscaler.")
|
||||
fs.Float64Var(&o.HorizontalPodAutoscalerTolerance, "horizontal-pod-autoscaler-tolerance", o.HorizontalPodAutoscalerTolerance, "The minimum change (from 1.0) in the desired-to-actual metrics ratio for the horizontal pod autoscaler to consider scaling.")
|
||||
fs.BoolVar(&o.HorizontalPodAutoscalerUseRESTClients, "horizontal-pod-autoscaler-use-rest-clients", o.HorizontalPodAutoscalerUseRESTClients, "If set to true, causes the horizontal pod autoscaler controller to use REST clients through the kube-aggregator, instead of using the legacy metrics client through the API server proxy. This is required for custom metrics support in the horizontal pod autoscaler.")
|
||||
fs.DurationVar(&o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "horizontal-pod-autoscaler-cpu-taint-period", o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "The period after pod start for which CPU samples are considered tainted by initialization.")
|
||||
fs.DurationVar(&o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "horizontal-pod-autoscaler-initial-readiness-delay", o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "The period after pod start during which readiness changes will be treated as initial readiness.")
|
||||
}
|
||||
|
||||
// ApplyTo fills up HPAController config with options.
|
||||
|
||||
@@ -134,6 +134,8 @@ func NewKubeControllerManagerOptions() (*KubeControllerManagerOptions, error) {
|
||||
HorizontalPodAutoscalerSyncPeriod: componentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod,
|
||||
HorizontalPodAutoscalerUpscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow,
|
||||
HorizontalPodAutoscalerDownscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow,
|
||||
HorizontalPodAutoscalerCPUTaintPeriod: componentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod,
|
||||
HorizontalPodAutoscalerInitialReadinessDelay: componentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay,
|
||||
HorizontalPodAutoscalerTolerance: componentConfig.HPAController.HorizontalPodAutoscalerTolerance,
|
||||
HorizontalPodAutoscalerUseRESTClients: componentConfig.HPAController.HorizontalPodAutoscalerUseRESTClients,
|
||||
},
|
||||
|
||||
@@ -73,6 +73,8 @@ func TestAddFlags(t *testing.T) {
|
||||
"--horizontal-pod-autoscaler-downscale-delay=2m",
|
||||
"--horizontal-pod-autoscaler-sync-period=45s",
|
||||
"--horizontal-pod-autoscaler-upscale-delay=1m",
|
||||
"--horizontal-pod-autoscaler-cpu-taint-period=90s",
|
||||
"--horizontal-pod-autoscaler-initial-readiness-delay=50s",
|
||||
"--http2-max-streams-per-connection=47",
|
||||
"--kube-api-burst=100",
|
||||
"--kube-api-content-type=application/json",
|
||||
@@ -185,6 +187,8 @@ func TestAddFlags(t *testing.T) {
|
||||
HorizontalPodAutoscalerSyncPeriod: metav1.Duration{Duration: 45 * time.Second},
|
||||
HorizontalPodAutoscalerUpscaleForbiddenWindow: metav1.Duration{Duration: 1 * time.Minute},
|
||||
HorizontalPodAutoscalerDownscaleForbiddenWindow: metav1.Duration{Duration: 2 * time.Minute},
|
||||
HorizontalPodAutoscalerCPUTaintPeriod: metav1.Duration{Duration: 90 * time.Second},
|
||||
HorizontalPodAutoscalerInitialReadinessDelay: metav1.Duration{Duration: 50 * time.Second},
|
||||
HorizontalPodAutoscalerTolerance: 0.1,
|
||||
HorizontalPodAutoscalerUseRESTClients: true,
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user