Make HPA more configurable

Duration of initialization taint on CPU and window of initial readiness
setting controlled by flags.

Adding API violation exceptions following example of e50340ee23
This commit is contained in:
Joachim Bartosik 2018-08-21 10:18:31 +02:00
parent 7d6676eab1
commit 4fd6a1684d
16 changed files with 75 additions and 38 deletions

View File

@ -93,6 +93,8 @@ API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alp
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerDownscaleForbiddenWindow
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerTolerance
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerUseRESTClients
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerCPUTaintPeriod
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerInitialReadinessDelay
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,JobControllerConfiguration,ConcurrentJobSyncs
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,KubeCloudSharedConfiguration,Port
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,KubeCloudSharedConfiguration,Address

View File

@ -84,6 +84,8 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me
metricsClient,
hpaClient.CoreV1(),
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerTolerance,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod.Duration,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay.Duration,
)
go podautoscaler.NewHorizontalController(
hpaClient.CoreV1(),

View File

@ -30,6 +30,8 @@ type HPAControllerOptions struct {
HorizontalPodAutoscalerDownscaleForbiddenWindow metav1.Duration
HorizontalPodAutoscalerUpscaleForbiddenWindow metav1.Duration
HorizontalPodAutoscalerSyncPeriod metav1.Duration
HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration
HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration
}
// AddFlags adds flags related to HPAController for controller manager to the specified FlagSet.
@ -44,6 +46,8 @@ func (o *HPAControllerOptions) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-downscale-delay", o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "The period since last downscale, before another downscale can be performed in horizontal pod autoscaler.")
fs.Float64Var(&o.HorizontalPodAutoscalerTolerance, "horizontal-pod-autoscaler-tolerance", o.HorizontalPodAutoscalerTolerance, "The minimum change (from 1.0) in the desired-to-actual metrics ratio for the horizontal pod autoscaler to consider scaling.")
fs.BoolVar(&o.HorizontalPodAutoscalerUseRESTClients, "horizontal-pod-autoscaler-use-rest-clients", o.HorizontalPodAutoscalerUseRESTClients, "If set to true, causes the horizontal pod autoscaler controller to use REST clients through the kube-aggregator, instead of using the legacy metrics client through the API server proxy. This is required for custom metrics support in the horizontal pod autoscaler.")
fs.DurationVar(&o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "horizontal-pod-autoscaler-cpu-taint-period", o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "The period after pod start for which CPU samples are considered tainted by initialization.")
fs.DurationVar(&o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "horizontal-pod-autoscaler-initial-readiness-delay", o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "The period after pod start during which readiness changes will be treated as initial readiness.")
}
// ApplyTo fills up HPAController config with options.

View File

@ -134,6 +134,8 @@ func NewKubeControllerManagerOptions() (*KubeControllerManagerOptions, error) {
HorizontalPodAutoscalerSyncPeriod: componentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod,
HorizontalPodAutoscalerUpscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow,
HorizontalPodAutoscalerDownscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow,
HorizontalPodAutoscalerCPUTaintPeriod: componentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod,
HorizontalPodAutoscalerInitialReadinessDelay: componentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay,
HorizontalPodAutoscalerTolerance: componentConfig.HPAController.HorizontalPodAutoscalerTolerance,
HorizontalPodAutoscalerUseRESTClients: componentConfig.HPAController.HorizontalPodAutoscalerUseRESTClients,
},

View File

@ -73,6 +73,8 @@ func TestAddFlags(t *testing.T) {
"--horizontal-pod-autoscaler-downscale-delay=2m",
"--horizontal-pod-autoscaler-sync-period=45s",
"--horizontal-pod-autoscaler-upscale-delay=1m",
"--horizontal-pod-autoscaler-cpu-taint-period=90s",
"--horizontal-pod-autoscaler-initial-readiness-delay=50s",
"--http2-max-streams-per-connection=47",
"--kube-api-burst=100",
"--kube-api-content-type=application/json",
@ -185,6 +187,8 @@ func TestAddFlags(t *testing.T) {
HorizontalPodAutoscalerSyncPeriod: metav1.Duration{Duration: 45 * time.Second},
HorizontalPodAutoscalerUpscaleForbiddenWindow: metav1.Duration{Duration: 1 * time.Minute},
HorizontalPodAutoscalerDownscaleForbiddenWindow: metav1.Duration{Duration: 2 * time.Minute},
HorizontalPodAutoscalerCPUTaintPeriod: metav1.Duration{Duration: 90 * time.Second},
HorizontalPodAutoscalerInitialReadinessDelay: metav1.Duration{Duration: 50 * time.Second},
HorizontalPodAutoscalerTolerance: 0.1,
HorizontalPodAutoscalerUseRESTClients: true,
},

View File

@ -371,6 +371,14 @@ type HPAControllerConfiguration struct {
// through the kube-aggregator when enabled, instead of using the legacy metrics client
// through the API server proxy.
HorizontalPodAutoscalerUseRESTClients bool
// HorizontalPodAutoscalerCPUTaintPeriod is period after pod start for which HPA will consider CPU
// samples from the pod contaminated by initialization and disregard them.
HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration
// HorizontalPodAutoscalerInitialReadinessDelay is period after pod start during which readiness
// changes are treated as readiness being set for the first time. The only effect of this is that
// HPA will disregard CPU samples from unready pods that had last readiness change during that
// period.
HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration
}
type JobControllerConfiguration struct {

View File

@ -95,6 +95,14 @@ func SetDefaults_KubeControllerManagerConfiguration(obj *KubeControllerManagerCo
if obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow == zero {
obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow = metav1.Duration{Duration: 3 * time.Minute}
}
if obj.HPAController.HorizontalPodAutoscalerCPUTaintPeriod == zero {
// Assuming CPU is collected every minute and initialization takes another minute HPA should
// disregard samples from first two minutes as contaminated by initialization.
obj.HPAController.HorizontalPodAutoscalerCPUTaintPeriod = metav1.Duration{Duration: time.Minute}
}
if obj.HPAController.HorizontalPodAutoscalerInitialReadinessDelay == zero {
obj.HPAController.HorizontalPodAutoscalerInitialReadinessDelay = metav1.Duration{Duration: 30 * time.Second}
}
if obj.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow == zero {
obj.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow = metav1.Duration{Duration: 5 * time.Minute}
}

View File

@ -416,6 +416,14 @@ type HPAControllerConfiguration struct {
// through the kube-aggregator when enabled, instead of using the legacy metrics client
// through the API server proxy.
HorizontalPodAutoscalerUseRESTClients *bool
// HorizontalPodAutoscalerCPUTaintPeriod is period after pod start for which HPA will consider CPU
// samples from the pod contaminated by initialization and disregard them.
HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration
// HorizontalPodAutoscalerInitialReadinessDelay is period after pod start during which readiness
// changes are treated as readiness being set for the first time. The only effect of this is that
// HPA will disregard CPU samples from unready pods that had last readiness change during that
// period.
HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration
}
type JobControllerConfiguration struct {

View File

@ -667,6 +667,8 @@ func autoConvert_v1alpha1_HPAControllerConfiguration_To_componentconfig_HPAContr
if err := v1.Convert_Pointer_bool_To_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil {
return err
}
out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
return nil
}
@ -683,6 +685,8 @@ func autoConvert_componentconfig_HPAControllerConfiguration_To_v1alpha1_HPAContr
if err := v1.Convert_bool_To_Pointer_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil {
return err
}
out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
return nil
}

View File

@ -242,6 +242,8 @@ func (in *HPAControllerConfiguration) DeepCopyInto(out *HPAControllerConfigurati
*out = new(bool)
**out = **in
}
out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
return
}

View File

@ -232,6 +232,8 @@ func (in *HPAControllerConfiguration) DeepCopyInto(out *HPAControllerConfigurati
out.HorizontalPodAutoscalerSyncPeriod = in.HorizontalPodAutoscalerSyncPeriod
out.HorizontalPodAutoscalerUpscaleForbiddenWindow = in.HorizontalPodAutoscalerUpscaleForbiddenWindow
out.HorizontalPodAutoscalerDownscaleForbiddenWindow = in.HorizontalPodAutoscalerDownscaleForbiddenWindow
out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
return
}

View File

@ -642,11 +642,7 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform
return true, obj, nil
})
replicaCalc := &ReplicaCalculator{
metricsClient: metricsClient,
podsGetter: testClient.Core(),
tolerance: defaultTestingTolerance,
}
replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)
informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
defaultDownscaleForbiddenWindow := 5 * time.Minute

View File

@ -485,11 +485,7 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
return true, obj, nil
})
replicaCalc := &ReplicaCalculator{
metricsClient: metricsClient,
podsGetter: testClient.Core(),
tolerance: defaultTestingTolerance,
}
replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)
informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
defaultDownscaleForbiddenWindow := 5 * time.Minute

View File

@ -186,11 +186,7 @@ func (tc *legacyReplicaCalcTestCase) runTest(t *testing.T) {
testClient := tc.prepareTestClient(t)
metricsClient := metrics.NewHeapsterMetricsClient(testClient, metrics.DefaultHeapsterNamespace, metrics.DefaultHeapsterScheme, metrics.DefaultHeapsterService, metrics.DefaultHeapsterPort)
replicaCalc := &ReplicaCalculator{
metricsClient: metricsClient,
podsGetter: testClient.Core(),
tolerance: defaultTestingTolerance,
}
replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchLabels: map[string]string{"name": podNamePrefix},

View File

@ -21,6 +21,7 @@ import (
"math"
"time"
"github.com/golang/glog"
autoscaling "k8s.io/api/autoscaling/v2beta1"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -29,29 +30,34 @@ import (
v1coreclient "k8s.io/client-go/kubernetes/typed/core/v1"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
"runtime/debug"
)
const (
// TODO(jbartosik): use actual value.
cpuSampleWindow = time.Minute
// defaultTestingTolerance is default value for calculating when to
// scale up/scale down.
defaultTestingTolerance = 0.1
// Pod begins existence as unready. If pod is unready and timestamp of last pod readiness change is
// less than maxDelayOfInitialReadinessStatus after pod start we assume it has never been ready.
maxDelayOfInitialReadinessStatus = 10 * time.Second
defaultTestingTolerance = 0.1
defaultTestingCpuTaintAfterStart = 2 * time.Minute
defaultTestingDelayOfInitialReadinessStatus = 10 * time.Second
)
type ReplicaCalculator struct {
metricsClient metricsclient.MetricsClient
podsGetter v1coreclient.PodsGetter
tolerance float64
metricsClient metricsclient.MetricsClient
podsGetter v1coreclient.PodsGetter
tolerance float64
cpuTaintAfterStart time.Duration
delayOfInitialReadinessStatus time.Duration
}
func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64) *ReplicaCalculator {
func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator {
return &ReplicaCalculator{
metricsClient: metricsClient,
podsGetter: podsGetter,
tolerance: tolerance,
metricsClient: metricsClient,
podsGetter: podsGetter,
tolerance: tolerance,
cpuTaintAfterStart: cpuTaintAfterStart,
delayOfInitialReadinessStatus: delayOfInitialReadinessStatus,
}
}
@ -73,7 +79,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
return 0, 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count")
}
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource)
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus)
removeMetricsForPods(metrics, ignoredPods)
requests, err := calculatePodRequests(podList.Items, resource)
if err != nil {
@ -174,7 +180,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
return 0, 0, fmt.Errorf("no pods returned by selector while calculating replica count")
}
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource)
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus)
removeMetricsForPods(metrics, ignoredPods)
if len(metrics) == 0 {
@ -338,9 +344,10 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3
return replicaCount, utilization, timestamp, nil
}
func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) {
func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) {
missingPods = sets.NewString()
ignoredPods = sets.NewString()
glog.Errorf("groupPods stack: %v", string(debug.Stack()))
for _, pod := range pods {
if pod.Status.Phase == v1.PodFailed {
continue
@ -356,9 +363,9 @@ func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.
ignorePod = true
} else {
if condition.Status == v1.ConditionTrue {
ignorePod = pod.Status.StartTime.Add(2 * time.Minute).After(time.Now())
ignorePod = pod.Status.StartTime.Add(cpuTaintAfterStart + cpuSampleWindow).After(time.Now())
} else {
ignorePod = pod.Status.StartTime.Add(maxDelayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time)
ignorePod = pod.Status.StartTime.Add(delayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time)
}
}
if ignorePod {

View File

@ -324,11 +324,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) {
testClient, testMetricsClient, testCMClient, testEMClient := tc.prepareTestClient(t)
metricsClient := metrics.NewRESTMetricsClient(testMetricsClient.MetricsV1beta1(), testCMClient, testEMClient)
replicaCalc := &ReplicaCalculator{
metricsClient: metricsClient,
podsGetter: testClient.Core(),
tolerance: defaultTestingTolerance,
}
replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchLabels: map[string]string{"name": podNamePrefix},
@ -1339,7 +1335,7 @@ func TestGroupPods(t *testing.T) {
},
}
for _, tc := range tests {
readyPodCount, unreadyPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource)
readyPodCount, unreadyPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)
if readyPodCount != tc.expectReadyPodCount {
t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount)
}