diff --git a/api/api-rules/violation_exceptions.list b/api/api-rules/violation_exceptions.list index ef4d73a0520..1c6c2b41575 100644 --- a/api/api-rules/violation_exceptions.list +++ b/api/api-rules/violation_exceptions.list @@ -93,6 +93,8 @@ API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alp API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerDownscaleForbiddenWindow API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerTolerance API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerUseRESTClients +API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerCPUTaintPeriod +API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerInitialReadinessDelay API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,JobControllerConfiguration,ConcurrentJobSyncs API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,KubeCloudSharedConfiguration,Port API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,KubeCloudSharedConfiguration,Address diff --git a/cmd/kube-controller-manager/app/autoscaling.go b/cmd/kube-controller-manager/app/autoscaling.go index dccdac49e06..ece5f9b239e 100644 --- a/cmd/kube-controller-manager/app/autoscaling.go +++ b/cmd/kube-controller-manager/app/autoscaling.go @@ -84,6 +84,8 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me metricsClient, hpaClient.CoreV1(), ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerTolerance, + ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod.Duration, + ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay.Duration, ) go podautoscaler.NewHorizontalController( hpaClient.CoreV1(), diff --git a/cmd/kube-controller-manager/app/options/hpacontroller.go b/cmd/kube-controller-manager/app/options/hpacontroller.go index d383bfd0886..f5acf567b15 100644 --- a/cmd/kube-controller-manager/app/options/hpacontroller.go +++ b/cmd/kube-controller-manager/app/options/hpacontroller.go @@ -30,6 +30,8 @@ type HPAControllerOptions struct { HorizontalPodAutoscalerDownscaleForbiddenWindow metav1.Duration HorizontalPodAutoscalerUpscaleForbiddenWindow metav1.Duration HorizontalPodAutoscalerSyncPeriod metav1.Duration + HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration + HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration } // AddFlags adds flags related to HPAController for controller manager to the specified FlagSet. @@ -44,6 +46,8 @@ func (o *HPAControllerOptions) AddFlags(fs *pflag.FlagSet) { fs.DurationVar(&o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-downscale-delay", o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "The period since last downscale, before another downscale can be performed in horizontal pod autoscaler.") fs.Float64Var(&o.HorizontalPodAutoscalerTolerance, "horizontal-pod-autoscaler-tolerance", o.HorizontalPodAutoscalerTolerance, "The minimum change (from 1.0) in the desired-to-actual metrics ratio for the horizontal pod autoscaler to consider scaling.") fs.BoolVar(&o.HorizontalPodAutoscalerUseRESTClients, "horizontal-pod-autoscaler-use-rest-clients", o.HorizontalPodAutoscalerUseRESTClients, "If set to true, causes the horizontal pod autoscaler controller to use REST clients through the kube-aggregator, instead of using the legacy metrics client through the API server proxy. This is required for custom metrics support in the horizontal pod autoscaler.") + fs.DurationVar(&o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "horizontal-pod-autoscaler-cpu-taint-period", o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "The period after pod start for which CPU samples are considered tainted by initialization.") + fs.DurationVar(&o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "horizontal-pod-autoscaler-initial-readiness-delay", o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "The period after pod start during which readiness changes will be treated as initial readiness.") } // ApplyTo fills up HPAController config with options. diff --git a/cmd/kube-controller-manager/app/options/options.go b/cmd/kube-controller-manager/app/options/options.go index 96864ef126f..e91b5607939 100644 --- a/cmd/kube-controller-manager/app/options/options.go +++ b/cmd/kube-controller-manager/app/options/options.go @@ -134,6 +134,8 @@ func NewKubeControllerManagerOptions() (*KubeControllerManagerOptions, error) { HorizontalPodAutoscalerSyncPeriod: componentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod, HorizontalPodAutoscalerUpscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow, HorizontalPodAutoscalerDownscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow, + HorizontalPodAutoscalerCPUTaintPeriod: componentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod, + HorizontalPodAutoscalerInitialReadinessDelay: componentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay, HorizontalPodAutoscalerTolerance: componentConfig.HPAController.HorizontalPodAutoscalerTolerance, HorizontalPodAutoscalerUseRESTClients: componentConfig.HPAController.HorizontalPodAutoscalerUseRESTClients, }, diff --git a/cmd/kube-controller-manager/app/options/options_test.go b/cmd/kube-controller-manager/app/options/options_test.go index f8e31d4fd1a..0f1d302ce9e 100644 --- a/cmd/kube-controller-manager/app/options/options_test.go +++ b/cmd/kube-controller-manager/app/options/options_test.go @@ -73,6 +73,8 @@ func TestAddFlags(t *testing.T) { "--horizontal-pod-autoscaler-downscale-delay=2m", "--horizontal-pod-autoscaler-sync-period=45s", "--horizontal-pod-autoscaler-upscale-delay=1m", + "--horizontal-pod-autoscaler-cpu-taint-period=90s", + "--horizontal-pod-autoscaler-initial-readiness-delay=50s", "--http2-max-streams-per-connection=47", "--kube-api-burst=100", "--kube-api-content-type=application/json", @@ -185,6 +187,8 @@ func TestAddFlags(t *testing.T) { HorizontalPodAutoscalerSyncPeriod: metav1.Duration{Duration: 45 * time.Second}, HorizontalPodAutoscalerUpscaleForbiddenWindow: metav1.Duration{Duration: 1 * time.Minute}, HorizontalPodAutoscalerDownscaleForbiddenWindow: metav1.Duration{Duration: 2 * time.Minute}, + HorizontalPodAutoscalerCPUTaintPeriod: metav1.Duration{Duration: 90 * time.Second}, + HorizontalPodAutoscalerInitialReadinessDelay: metav1.Duration{Duration: 50 * time.Second}, HorizontalPodAutoscalerTolerance: 0.1, HorizontalPodAutoscalerUseRESTClients: true, }, diff --git a/pkg/apis/componentconfig/types.go b/pkg/apis/componentconfig/types.go index 0a30cdd1484..4ac9f51b366 100644 --- a/pkg/apis/componentconfig/types.go +++ b/pkg/apis/componentconfig/types.go @@ -271,6 +271,14 @@ type HPAControllerConfiguration struct { // through the kube-aggregator when enabled, instead of using the legacy metrics client // through the API server proxy. HorizontalPodAutoscalerUseRESTClients bool + // HorizontalPodAutoscalerCPUTaintPeriod is period after pod start for which HPA will consider CPU + // samples from the pod contaminated by initialization and disregard them. + HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration + // HorizontalPodAutoscalerInitialReadinessDelay is period after pod start during which readiness + // changes are treated as readiness being set for the first time. The only effect of this is that + // HPA will disregard CPU samples from unready pods that had last readiness change during that + // period. + HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration } type JobControllerConfiguration struct { diff --git a/pkg/apis/componentconfig/v1alpha1/defaults.go b/pkg/apis/componentconfig/v1alpha1/defaults.go index 8edf2c28e14..1f0de2fcc96 100644 --- a/pkg/apis/componentconfig/v1alpha1/defaults.go +++ b/pkg/apis/componentconfig/v1alpha1/defaults.go @@ -89,6 +89,14 @@ func SetDefaults_KubeControllerManagerConfiguration(obj *KubeControllerManagerCo if obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow == zero { obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow = metav1.Duration{Duration: 3 * time.Minute} } + if obj.HPAController.HorizontalPodAutoscalerCPUTaintPeriod == zero { + // Assuming CPU is collected every minute and initialization takes another minute HPA should + // disregard samples from first two minutes as contaminated by initialization. + obj.HPAController.HorizontalPodAutoscalerCPUTaintPeriod = metav1.Duration{Duration: time.Minute} + } + if obj.HPAController.HorizontalPodAutoscalerInitialReadinessDelay == zero { + obj.HPAController.HorizontalPodAutoscalerInitialReadinessDelay = metav1.Duration{Duration: 30 * time.Second} + } if obj.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow == zero { obj.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow = metav1.Duration{Duration: 5 * time.Minute} } diff --git a/pkg/apis/componentconfig/v1alpha1/types.go b/pkg/apis/componentconfig/v1alpha1/types.go index 67a83933060..331de145c13 100644 --- a/pkg/apis/componentconfig/v1alpha1/types.go +++ b/pkg/apis/componentconfig/v1alpha1/types.go @@ -320,6 +320,14 @@ type HPAControllerConfiguration struct { // through the kube-aggregator when enabled, instead of using the legacy metrics client // through the API server proxy. HorizontalPodAutoscalerUseRESTClients *bool + // HorizontalPodAutoscalerCPUTaintPeriod is period after pod start for which HPA will consider CPU + // samples from the pod contaminated by initialization and disregard them. + HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration + // HorizontalPodAutoscalerInitialReadinessDelay is period after pod start during which readiness + // changes are treated as readiness being set for the first time. The only effect of this is that + // HPA will disregard CPU samples from unready pods that had last readiness change during that + // period. + HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration } type JobControllerConfiguration struct { diff --git a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go index f6a42051ba1..068e857e4f1 100644 --- a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go +++ b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go @@ -606,6 +606,8 @@ func autoConvert_v1alpha1_HPAControllerConfiguration_To_componentconfig_HPAContr if err := v1.Convert_Pointer_bool_To_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil { return err } + out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod + out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay return nil } @@ -622,6 +624,8 @@ func autoConvert_componentconfig_HPAControllerConfiguration_To_v1alpha1_HPAContr if err := v1.Convert_bool_To_Pointer_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil { return err } + out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod + out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay return nil } diff --git a/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go index b548bdf2e26..1e2bedd6d9d 100644 --- a/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go @@ -242,6 +242,8 @@ func (in *HPAControllerConfiguration) DeepCopyInto(out *HPAControllerConfigurati *out = new(bool) **out = **in } + out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod + out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay return } diff --git a/pkg/apis/componentconfig/zz_generated.deepcopy.go b/pkg/apis/componentconfig/zz_generated.deepcopy.go index 9fdb0a2f083..482befef647 100644 --- a/pkg/apis/componentconfig/zz_generated.deepcopy.go +++ b/pkg/apis/componentconfig/zz_generated.deepcopy.go @@ -232,6 +232,8 @@ func (in *HPAControllerConfiguration) DeepCopyInto(out *HPAControllerConfigurati out.HorizontalPodAutoscalerSyncPeriod = in.HorizontalPodAutoscalerSyncPeriod out.HorizontalPodAutoscalerUpscaleForbiddenWindow = in.HorizontalPodAutoscalerUpscaleForbiddenWindow out.HorizontalPodAutoscalerDownscaleForbiddenWindow = in.HorizontalPodAutoscalerDownscaleForbiddenWindow + out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod + out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay return } diff --git a/pkg/controller/podautoscaler/BUILD b/pkg/controller/podautoscaler/BUILD index 8924b2ddb89..80342236639 100644 --- a/pkg/controller/podautoscaler/BUILD +++ b/pkg/controller/podautoscaler/BUILD @@ -73,6 +73,7 @@ go_test( "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/watch:go_default_library", "//staging/src/k8s.io/client-go/informers:go_default_library", "//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library", diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index dc4015b08b4..5be579861dd 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -103,6 +103,7 @@ type testCase struct { reportedLevels []uint64 reportedCPURequests []resource.Quantity reportedPodReadiness []v1.ConditionStatus + reportedPodStartTime []metav1.Time reportedPodPhase []v1.PodPhase scaleUpdated bool statusUpdated bool @@ -261,6 +262,10 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa if tc.reportedPodReadiness != nil { podReadiness = tc.reportedPodReadiness[i] } + var podStartTime metav1.Time + if tc.reportedPodStartTime != nil { + podStartTime = tc.reportedPodStartTime[i] + } podPhase := v1.PodRunning if tc.reportedPodPhase != nil { @@ -283,6 +288,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa Status: podReadiness, }, }, + StartTime: &podStartTime, }, ObjectMeta: metav1.ObjectMeta{ Name: podName, @@ -636,11 +642,7 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform return true, obj, nil }) - replicaCalc := &ReplicaCalculator{ - metricsClient: metricsClient, - podsGetter: testClient.Core(), - tolerance: defaultTestingTolerance, - } + replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus) informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc()) defaultDownscaleForbiddenWindow := 5 * time.Minute @@ -660,6 +662,14 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform return hpaController, informerFactory } +func hotCpuCreationTime() metav1.Time { + return metav1.Time{Time: time.Now()} +} + +func coolCpuCreationTime() metav1.Time { + return metav1.Time{Time: time.Now().Add(-3 * time.Minute)} +} + func (tc *testCase) runTestWithController(t *testing.T, hpaController *HorizontalController, informerFactory informers.SharedInformerFactory) { stop := make(chan struct{}) defer close(stop) @@ -716,6 +726,23 @@ func TestScaleUpUnreadyLessScale(t *testing.T) { tc.runTest(t) } +func TestScaleUpHotCpuLessScale(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + initialReplicas: 3, + expectedDesiredReplicas: 4, + CPUTarget: 30, + CPUCurrent: 60, + verifyCPUCurrent: true, + reportedLevels: []uint64{300, 500, 700}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + reportedPodStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()}, + useMetricsAPI: true, + } + tc.runTest(t) +} + func TestScaleUpUnreadyNoScale(t *testing.T) { tc := testCase{ minReplicas: 2, @@ -738,6 +765,29 @@ func TestScaleUpUnreadyNoScale(t *testing.T) { tc.runTest(t) } +func TestScaleUpHotCpuNoScale(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + initialReplicas: 3, + expectedDesiredReplicas: 3, + CPUTarget: 30, + CPUCurrent: 40, + verifyCPUCurrent: true, + reportedLevels: []uint64{400, 500, 700}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()}, + useMetricsAPI: true, + expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{ + Type: autoscalingv2.AbleToScale, + Status: v1.ConditionTrue, + Reason: "ReadyForNewScale", + }), + } + tc.runTest(t) +} + func TestScaleUpIgnoresFailedPods(t *testing.T) { tc := testCase{ minReplicas: 2, @@ -818,12 +868,12 @@ func TestScaleUpCM(t *testing.T) { tc.runTest(t) } -func TestScaleUpCMUnreadyLessScale(t *testing.T) { +func TestScaleUpCMUnreadyAndHotCpuNoLessScale(t *testing.T) { tc := testCase{ minReplicas: 2, maxReplicas: 6, initialReplicas: 3, - expectedDesiredReplicas: 4, + expectedDesiredReplicas: 6, CPUTarget: 0, metricsTarget: []autoscalingv2.MetricSpec{ { @@ -836,17 +886,18 @@ func TestScaleUpCMUnreadyLessScale(t *testing.T) { }, reportedLevels: []uint64{50000, 10000, 30000}, reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse}, + reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, } tc.runTest(t) } -func TestScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) { +func TestScaleUpCMUnreadyandCpuHot(t *testing.T) { tc := testCase{ minReplicas: 2, maxReplicas: 6, initialReplicas: 3, - expectedDesiredReplicas: 3, + expectedDesiredReplicas: 6, CPUTarget: 0, metricsTarget: []autoscalingv2.MetricSpec{ { @@ -859,11 +910,48 @@ func TestScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) { }, reportedLevels: []uint64{50000, 15000, 30000}, reportedPodReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse}, + reportedPodStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{ Type: autoscalingv2.AbleToScale, Status: v1.ConditionTrue, - Reason: "ReadyForNewScale", + Reason: "SucceededRescale", + }, autoscalingv2.HorizontalPodAutoscalerCondition{ + Type: autoscalingv2.ScalingLimited, + Status: v1.ConditionTrue, + Reason: "TooManyReplicas", + }), + } + tc.runTest(t) +} + +func TestScaleUpHotCpuNoScaleWouldScaleDown(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + initialReplicas: 3, + expectedDesiredReplicas: 6, + CPUTarget: 0, + metricsTarget: []autoscalingv2.MetricSpec{ + { + Type: autoscalingv2.PodsMetricSourceType, + Pods: &autoscalingv2.PodsMetricSource{ + MetricName: "qps", + TargetAverageValue: resource.MustParse("15.0"), + }, + }, + }, + reportedLevels: []uint64{50000, 15000, 30000}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + reportedPodStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()}, + expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{ + Type: autoscalingv2.AbleToScale, + Status: v1.ConditionTrue, + Reason: "SucceededRescale", + }, autoscalingv2.HorizontalPodAutoscalerCondition{ + Type: autoscalingv2.ScalingLimited, + Status: v1.ConditionTrue, + Reason: "TooManyReplicas", }), } tc.runTest(t) @@ -1043,7 +1131,7 @@ func TestScaleDownPerPodCMExternal(t *testing.T) { tc.runTest(t) } -func TestScaleDownIgnoresUnreadyPods(t *testing.T) { +func TestScaleDownIncludeUnreadyPods(t *testing.T) { tc := testCase{ minReplicas: 2, maxReplicas: 6, @@ -1060,6 +1148,23 @@ func TestScaleDownIgnoresUnreadyPods(t *testing.T) { tc.runTest(t) } +func TestScaleDownIgnoreHotCpuPods(t *testing.T) { + tc := testCase{ + minReplicas: 2, + maxReplicas: 6, + initialReplicas: 5, + expectedDesiredReplicas: 2, + CPUTarget: 50, + CPUCurrent: 30, + verifyCPUCurrent: true, + reportedLevels: []uint64{100, 300, 500, 250, 250}, + reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + useMetricsAPI: true, + reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()}, + } + tc.runTest(t) +} + func TestScaleDownIgnoresFailedPods(t *testing.T) { tc := testCase{ minReplicas: 2, @@ -1975,7 +2080,7 @@ func TestAvoidUncessaryUpdates(t *testing.T) { verifyCPUCurrent: true, reportedLevels: []uint64{400, 500, 700}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, - reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()}, useMetricsAPI: true, } testClient, _, _, _, _ := tc.prepareTestClient(t) diff --git a/pkg/controller/podautoscaler/legacy_horizontal_test.go b/pkg/controller/podautoscaler/legacy_horizontal_test.go index 01fc6986234..a327f7f61ef 100644 --- a/pkg/controller/podautoscaler/legacy_horizontal_test.go +++ b/pkg/controller/podautoscaler/legacy_horizontal_test.go @@ -222,7 +222,8 @@ func (tc *legacyTestCase) prepareTestClient(t *testing.T) (*fake.Clientset, *sca podName := fmt.Sprintf("%s-%d", podNamePrefix, i) pod := v1.Pod{ Status: v1.PodStatus{ - Phase: v1.PodRunning, + StartTime: &metav1.Time{Time: time.Now().Add(-3 * time.Minute)}, + Phase: v1.PodRunning, Conditions: []v1.PodCondition{ { Type: v1.PodReady, @@ -484,11 +485,7 @@ func (tc *legacyTestCase) runTest(t *testing.T) { return true, obj, nil }) - replicaCalc := &ReplicaCalculator{ - metricsClient: metricsClient, - podsGetter: testClient.Core(), - tolerance: defaultTestingTolerance, - } + replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus) informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc()) defaultDownscaleForbiddenWindow := 5 * time.Minute @@ -545,8 +542,7 @@ func TestLegacyScaleUpUnreadyLessScale(t *testing.T) { initialReplicas: 3, desiredReplicas: 4, CPUTarget: 30, - CPUCurrent: 60, - verifyCPUCurrent: true, + verifyCPUCurrent: false, reportedLevels: []uint64{300, 500, 700}, reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, reportedPodReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionTrue}, @@ -634,12 +630,12 @@ func TestLegacyScaleUpCM(t *testing.T) { tc.runTest(t) } -func TestLegacyScaleUpCMUnreadyLessScale(t *testing.T) { +func TestLegacyScaleUpCMUnreadyNoLessScale(t *testing.T) { tc := legacyTestCase{ minReplicas: 2, maxReplicas: 6, initialReplicas: 3, - desiredReplicas: 4, + desiredReplicas: 6, CPUTarget: 0, metricsTarget: []autoscalingv2.MetricSpec{ { @@ -662,7 +658,7 @@ func TestLegacyScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) { minReplicas: 2, maxReplicas: 6, initialReplicas: 3, - desiredReplicas: 3, + desiredReplicas: 6, CPUTarget: 0, metricsTarget: []autoscalingv2.MetricSpec{ { diff --git a/pkg/controller/podautoscaler/legacy_replica_calculator_test.go b/pkg/controller/podautoscaler/legacy_replica_calculator_test.go index 7f3e872a502..9fac8a4a000 100644 --- a/pkg/controller/podautoscaler/legacy_replica_calculator_test.go +++ b/pkg/controller/podautoscaler/legacy_replica_calculator_test.go @@ -67,7 +67,8 @@ func (tc *legacyReplicaCalcTestCase) prepareTestClient(t *testing.T) *fake.Clien podName := fmt.Sprintf("%s-%d", podNamePrefix, i) pod := v1.Pod{ Status: v1.PodStatus{ - Phase: v1.PodRunning, + Phase: v1.PodRunning, + StartTime: &metav1.Time{Time: time.Now().Add(-3 * time.Minute)}, Conditions: []v1.PodCondition{ { Type: v1.PodReady, @@ -185,11 +186,7 @@ func (tc *legacyReplicaCalcTestCase) runTest(t *testing.T) { testClient := tc.prepareTestClient(t) metricsClient := metrics.NewHeapsterMetricsClient(testClient, metrics.DefaultHeapsterNamespace, metrics.DefaultHeapsterScheme, metrics.DefaultHeapsterService, metrics.DefaultHeapsterPort) - replicaCalc := &ReplicaCalculator{ - metricsClient: metricsClient, - podsGetter: testClient.Core(), - tolerance: defaultTestingTolerance, - } + replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus) selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ MatchLabels: map[string]string{"name": podNamePrefix}, @@ -310,10 +307,10 @@ func TestLegacyReplicaCalcScaleUpCM(t *testing.T) { tc.runTest(t) } -func TestLegacyReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) { +func TestLegacyReplicaCalcScaleUpCMUnreadyNoLessScale(t *testing.T) { tc := legacyReplicaCalcTestCase{ currentReplicas: 3, - expectedReplicas: 4, + expectedReplicas: 6, podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse}, metric: &metricInfo{ name: "qps", @@ -325,16 +322,16 @@ func TestLegacyReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) { tc.runTest(t) } -func TestLegacyReplicaCalcScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) { +func TestLegacyReplicaCalcScaleUpCMUnreadyScale(t *testing.T) { tc := legacyReplicaCalcTestCase{ currentReplicas: 3, - expectedReplicas: 3, + expectedReplicas: 7, podReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse}, metric: &metricInfo{ name: "qps", levels: []int64{50000, 15000, 30000}, targetUtilization: 15000, - expectedUtilization: 15000, + expectedUtilization: 31666, }, } tc.runTest(t) diff --git a/pkg/controller/podautoscaler/replica_calculator.go b/pkg/controller/podautoscaler/replica_calculator.go index 304c2afba82..b4faac41096 100644 --- a/pkg/controller/podautoscaler/replica_calculator.go +++ b/pkg/controller/podautoscaler/replica_calculator.go @@ -21,6 +21,7 @@ import ( "math" "time" + "github.com/golang/glog" autoscaling "k8s.io/api/autoscaling/v2beta1" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -29,29 +30,34 @@ import ( v1coreclient "k8s.io/client-go/kubernetes/typed/core/v1" podutil "k8s.io/kubernetes/pkg/api/v1/pod" metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" + "runtime/debug" ) const ( + // TODO(jbartosik): use actual value. + cpuSampleWindow = time.Minute // defaultTestingTolerance is default value for calculating when to // scale up/scale down. - defaultTestingTolerance = 0.1 - - // Pod begins existence as unready. If pod is unready and timestamp of last pod readiness change is - // less than maxDelayOfInitialReadinessStatus after pod start we assume it has never been ready. - maxDelayOfInitialReadinessStatus = 10 * time.Second + defaultTestingTolerance = 0.1 + defaultTestingCpuTaintAfterStart = 2 * time.Minute + defaultTestingDelayOfInitialReadinessStatus = 10 * time.Second ) type ReplicaCalculator struct { - metricsClient metricsclient.MetricsClient - podsGetter v1coreclient.PodsGetter - tolerance float64 + metricsClient metricsclient.MetricsClient + podsGetter v1coreclient.PodsGetter + tolerance float64 + cpuTaintAfterStart time.Duration + delayOfInitialReadinessStatus time.Duration } -func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64) *ReplicaCalculator { +func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator { return &ReplicaCalculator{ - metricsClient: metricsClient, - podsGetter: podsGetter, - tolerance: tolerance, + metricsClient: metricsClient, + podsGetter: podsGetter, + tolerance: tolerance, + cpuTaintAfterStart: cpuTaintAfterStart, + delayOfInitialReadinessStatus: delayOfInitialReadinessStatus, } } @@ -73,41 +79,11 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti return 0, 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count") } - requests := make(map[string]int64, itemsLen) - readyPodCount := 0 - unreadyPods := sets.NewString() - missingPods := sets.NewString() - - for _, pod := range podList.Items { - podSum := int64(0) - for _, container := range pod.Spec.Containers { - if containerRequest, ok := container.Resources.Requests[resource]; ok { - podSum += containerRequest.MilliValue() - } else { - return 0, 0, 0, time.Time{}, fmt.Errorf("missing request for %s on container %s in pod %s/%s", resource, container.Name, namespace, pod.Name) - } - } - - requests[pod.Name] = podSum - - if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) { - // save this pod name for later, but pretend it doesn't exist for now - if pod.Status.Phase != v1.PodFailed { - // Failed pods should not be counted as unready pods as they will - // not become running anymore. - unreadyPods.Insert(pod.Name) - } - delete(metrics, pod.Name) - continue - } - - if _, found := metrics[pod.Name]; !found { - // save this pod name for later, but pretend it doesn't exist for now - missingPods.Insert(pod.Name) - continue - } - - readyPodCount++ + readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus) + removeMetricsForPods(metrics, ignoredPods) + requests, err := calculatePodRequests(podList.Items, resource) + if err != nil { + return 0, 0, 0, time.Time{}, err } if len(metrics) == 0 { @@ -119,8 +95,8 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti return 0, 0, 0, time.Time{}, err } - rebalanceUnready := len(unreadyPods) > 0 && usageRatio > 1.0 - if !rebalanceUnready && len(missingPods) == 0 { + rebalanceIgnored := len(ignoredPods) > 0 && usageRatio > 1.0 + if !rebalanceIgnored && len(missingPods) == 0 { if math.Abs(1.0-usageRatio) <= c.tolerance { // return the current replicas if the change would be too small return currentReplicas, utilization, rawUtilization, timestamp, nil @@ -144,9 +120,9 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti } } - if rebalanceUnready { + if rebalanceIgnored { // on a scale-up, treat unready pods as using 0% of the resource request - for podName := range unreadyPods { + for podName := range ignoredPods { metrics[podName] = 0 } } @@ -176,7 +152,7 @@ func (c *ReplicaCalculator) GetRawResourceReplicas(currentReplicas int32, target return 0, 0, time.Time{}, fmt.Errorf("unable to get metrics for resource %s: %v", resource, err) } - replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector) + replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector, resource) return replicaCount, utilization, timestamp, err } @@ -189,12 +165,12 @@ func (c *ReplicaCalculator) GetMetricReplicas(currentReplicas int32, targetUtili return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v", metricName, err) } - replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector) + replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector, v1.ResourceName("")) return replicaCount, utilization, timestamp, err } // calcPlainMetricReplicas calculates the desired replicas for plain (i.e. non-utilization percentage) metrics. -func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUtilization int64, namespace string, selector labels.Selector) (replicaCount int32, utilization int64, err error) { +func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUtilization int64, namespace string, selector labels.Selector, resource v1.ResourceName) (replicaCount int32, utilization int64, err error) { podList, err := c.podsGetter.Pods(namespace).List(metav1.ListOptions{LabelSelector: selector.String()}) if err != nil { return 0, 0, fmt.Errorf("unable to get pods while calculating replica count: %v", err) @@ -204,26 +180,8 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet return 0, 0, fmt.Errorf("no pods returned by selector while calculating replica count") } - readyPodCount := 0 - unreadyPods := sets.NewString() - missingPods := sets.NewString() - - for _, pod := range podList.Items { - if pod.Status.Phase != v1.PodRunning || !hasPodBeenReadyBefore(&pod) { - // save this pod name for later, but pretend it doesn't exist for now - unreadyPods.Insert(pod.Name) - delete(metrics, pod.Name) - continue - } - - if _, found := metrics[pod.Name]; !found { - // save this pod name for later, but pretend it doesn't exist for now - missingPods.Insert(pod.Name) - continue - } - - readyPodCount++ - } + readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus) + removeMetricsForPods(metrics, ignoredPods) if len(metrics) == 0 { return 0, 0, fmt.Errorf("did not receive metrics for any ready pods") @@ -231,9 +189,9 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet usageRatio, utilization := metricsclient.GetMetricUtilizationRatio(metrics, targetUtilization) - rebalanceUnready := len(unreadyPods) > 0 && usageRatio > 1.0 + rebalanceIgnored := len(ignoredPods) > 0 && usageRatio > 1.0 - if !rebalanceUnready && len(missingPods) == 0 { + if !rebalanceIgnored && len(missingPods) == 0 { if math.Abs(1.0-usageRatio) <= c.tolerance { // return the current replicas if the change would be too small return currentReplicas, utilization, nil @@ -257,9 +215,9 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet } } - if rebalanceUnready { + if rebalanceIgnored { // on a scale-up, treat unready pods as using 0% of the resource request - for podName := range unreadyPods { + for podName := range ignoredPods { metrics[podName] = 0 } } @@ -386,21 +344,59 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3 return replicaCount, utilization, timestamp, nil } -// hasPodBeenReadyBefore returns true if the pod is ready or if it's not ready -func hasPodBeenReadyBefore(pod *v1.Pod) bool { - _, readyCondition := podutil.GetPodCondition(&pod.Status, v1.PodReady) - if readyCondition == nil { - return false +func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) { + missingPods = sets.NewString() + ignoredPods = sets.NewString() + glog.Errorf("groupPods stack: %v", string(debug.Stack())) + for _, pod := range pods { + if pod.Status.Phase == v1.PodFailed { + continue + } + if _, found := metrics[pod.Name]; !found { + missingPods.Insert(pod.Name) + continue + } + if resource == v1.ResourceCPU { + var ignorePod bool + _, condition := podutil.GetPodCondition(&pod.Status, v1.PodReady) + if condition == nil || pod.Status.StartTime == nil { + ignorePod = true + } else { + if condition.Status == v1.ConditionTrue { + ignorePod = pod.Status.StartTime.Add(cpuTaintAfterStart + cpuSampleWindow).After(time.Now()) + } else { + ignorePod = pod.Status.StartTime.Add(delayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time) + } + } + if ignorePod { + ignoredPods.Insert(pod.Name) + continue + } + } + + readyPodCount++ + } + return +} + +func calculatePodRequests(pods []v1.Pod, resource v1.ResourceName) (map[string]int64, error) { + requests := make(map[string]int64, len(pods)) + for _, pod := range pods { + podSum := int64(0) + for _, container := range pod.Spec.Containers { + if containerRequest, ok := container.Resources.Requests[resource]; ok { + podSum += containerRequest.MilliValue() + } else { + return nil, fmt.Errorf("missing request for %s", resource) + } + } + requests[pod.Name] = podSum + } + return requests, nil +} + +func removeMetricsForPods(metrics metricsclient.PodMetricsInfo, pods sets.String) { + for _, pod := range pods.UnsortedList() { + delete(metrics, pod) } - if readyCondition.Status == v1.ConditionTrue { - return true - } - lastReady := readyCondition.LastTransitionTime.Time - if pod.Status.StartTime == nil { - return false - } - started := pod.Status.StartTime.Time - // If last status change was longer than maxDelayOfInitialReadinessStatus after the pod was - // created assume it was ready in the past. - return lastReady.After(started.Add(maxDelayOfInitialReadinessStatus)) } diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index 0dc43b0696a..fa869a38f47 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -29,10 +29,12 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" "k8s.io/kubernetes/pkg/api/legacyscheme" "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" + metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" cmapi "k8s.io/metrics/pkg/apis/custom_metrics/v1beta1" emapi "k8s.io/metrics/pkg/apis/external_metrics/v1beta1" metricsapi "k8s.io/metrics/pkg/apis/metrics/v1beta1" @@ -88,6 +90,7 @@ type replicaCalcTestCase struct { metric *metricInfo podReadiness []v1.ConditionStatus + podStartTime []metav1.Time podPhase []v1.PodPhase } @@ -111,6 +114,10 @@ func (tc *replicaCalcTestCase) prepareTestClientSet() *fake.Clientset { if tc.podReadiness != nil && i < len(tc.podReadiness) { podReadiness = tc.podReadiness[i] } + var podStartTime metav1.Time + if tc.podStartTime != nil { + podStartTime = tc.podStartTime[i] + } podPhase := v1.PodRunning if tc.podPhase != nil { podPhase = tc.podPhase[i] @@ -118,7 +125,8 @@ func (tc *replicaCalcTestCase) prepareTestClientSet() *fake.Clientset { podName := fmt.Sprintf("%s-%d", podNamePrefix, i) pod := v1.Pod{ Status: v1.PodStatus{ - Phase: podPhase, + Phase: podPhase, + StartTime: &podStartTime, Conditions: []v1.PodCondition{ { Type: v1.PodReady, @@ -316,11 +324,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) { testClient, testMetricsClient, testCMClient, testEMClient := tc.prepareTestClient(t) metricsClient := metrics.NewRESTMetricsClient(testMetricsClient.MetricsV1beta1(), testCMClient, testEMClient) - replicaCalc := &ReplicaCalculator{ - metricsClient: metricsClient, - podsGetter: testClient.Core(), - tolerance: defaultTestingTolerance, - } + replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus) selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ MatchLabels: map[string]string{"name": podNamePrefix}, @@ -439,6 +443,24 @@ func TestReplicaCalcScaleUpUnreadyLessScale(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcScaleUpHotCpuLessScale(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 3, + expectedReplicas: 4, + podStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: []int64{300, 500, 700}, + + targetUtilization: 30, + expectedUtilization: 60, + expectedValue: numContainersPerPod * 600, + }, + } + tc.runTest(t) +} + func TestReplicaCalcScaleUpUnreadyNoScale(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 3, @@ -457,6 +479,25 @@ func TestReplicaCalcScaleUpUnreadyNoScale(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcScaleHotCpuNoScale(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 3, + expectedReplicas: 3, + podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse}, + podStartTime: []metav1.Time{coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: []int64{400, 500, 700}, + + targetUtilization: 30, + expectedUtilization: 40, + expectedValue: numContainersPerPod * 400, + }, + } + tc.runTest(t) +} + func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 2, @@ -491,11 +532,12 @@ func TestReplicaCalcScaleUpCM(t *testing.T) { tc.runTest(t) } -func TestReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) { +func TestReplicaCalcScaleUpCMUnreadyHotCpuNoLessScale(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 3, - expectedReplicas: 4, + expectedReplicas: 6, podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse}, + podStartTime: []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()}, metric: &metricInfo{ name: "qps", levels: []int64{50000, 10000, 30000}, @@ -507,16 +549,17 @@ func TestReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) { tc.runTest(t) } -func TestReplicaCalcScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) { +func TestReplicaCalcScaleUpCMUnreadyHotCpuScaleWouldScaleDown(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 3, - expectedReplicas: 3, + expectedReplicas: 7, podReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse}, + podStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()}, metric: &metricInfo{ name: "qps", levels: []int64{50000, 15000, 30000}, targetUtilization: 15000, - expectedUtilization: 15000, + expectedUtilization: 31666, metricType: podMetric, }, } @@ -709,7 +752,7 @@ func TestReplicaCalcScaleDownPerPodCMExternal(t *testing.T) { tc.runTest(t) } -func TestReplicaCalcScaleDownIgnoresUnreadyPods(t *testing.T) { +func TestReplicaCalcScaleDownIncludeUnreadyPods(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 5, expectedReplicas: 2, @@ -727,6 +770,24 @@ func TestReplicaCalcScaleDownIgnoresUnreadyPods(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcScaleDownIgnoreHotCpuPods(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 5, + expectedReplicas: 2, + podStartTime: []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: []int64{100, 300, 500, 250, 250}, + + targetUtilization: 50, + expectedUtilization: 30, + expectedValue: numContainersPerPod * 300, + }, + } + tc.runTest(t) +} + func TestReplicaCalcScaleDownIgnoresFailedPods(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 5, @@ -943,7 +1004,7 @@ func TestReplicaCalcMissingMetricsNoChangeLt(t *testing.T) { tc.runTest(t) } -func TestReplicaCalcMissingMetricsUnreadyNoChange(t *testing.T) { +func TestReplicaCalcMissingMetricsUnreadyChange(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 3, expectedReplicas: 3, @@ -961,6 +1022,24 @@ func TestReplicaCalcMissingMetricsUnreadyNoChange(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcMissingMetricsHotCpuNoChange(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 3, + expectedReplicas: 3, + podStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: []int64{100, 450}, + + targetUtilization: 50, + expectedUtilization: 45, + expectedValue: numContainersPerPod * 450, + }, + } + tc.runTest(t) +} + func TestReplicaCalcMissingMetricsUnreadyScaleUp(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 3, @@ -979,6 +1058,25 @@ func TestReplicaCalcMissingMetricsUnreadyScaleUp(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcMissingMetricsHotCpuScaleUp(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 3, + expectedReplicas: 4, + podReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionTrue}, + podStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: []int64{100, 2000}, + + targetUtilization: 50, + expectedUtilization: 200, + expectedValue: numContainersPerPod * 2000, + }, + } + tc.runTest(t) +} + func TestReplicaCalcMissingMetricsUnreadyScaleDown(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 4, @@ -1069,74 +1167,183 @@ func TestReplicaCalcComputedToleranceAlgImplementation(t *testing.T) { tc.runTest(t) } -func TestHasPodBeenReadyBefore(t *testing.T) { +func TestGroupPods(t *testing.T) { tests := []struct { - name string - conditions []v1.PodCondition - started time.Time - expected bool + name string + pods []v1.Pod + metrics metricsclient.PodMetricsInfo + resource v1.ResourceName + expectReadyPodCount int + expectUnreadyPods sets.String + expectMissingPods sets.String }{ { - "initially unready", - []v1.PodCondition{ - { - Type: v1.PodReady, - LastTransitionTime: metav1.Time{ - Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, - }, - Status: v1.ConditionFalse, - }, - }, - metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, - false, + "void", + []v1.Pod{}, + metricsclient.PodMetricsInfo{}, + v1.ResourceName(""), + 0, + sets.NewString(), + sets.NewString(), }, { - "currently unready", - []v1.PodCondition{ + "a ready pod", + []v1.Pod{ { - Type: v1.PodReady, - LastTransitionTime: metav1.Time{ - Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, + ObjectMeta: metav1.ObjectMeta{ + Name: "bentham", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, }, - Status: v1.ConditionFalse, }, }, - metav1.Date(2018, 7, 25, 17, 0, 0, 0, time.UTC).Time, - true, + metricsclient.PodMetricsInfo{ + "bentham": 1, + }, + v1.ResourceName("hedons"), + 1, + sets.NewString(), + sets.NewString(), }, { - "currently ready", - []v1.PodCondition{ + "an unready pod", + []v1.Pod{ { - Type: v1.PodReady, - LastTransitionTime: metav1.Time{ - Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, + ObjectMeta: metav1.ObjectMeta{ + Name: "lucretius", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + StartTime: &metav1.Time{ + Time: time.Now(), + }, }, - Status: v1.ConditionTrue, }, }, - metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, - true, + metricsclient.PodMetricsInfo{ + "lucretius": 1, + }, + v1.ResourceCPU, + 0, + sets.NewString("lucretius"), + sets.NewString(), }, { - "no ready status", - []v1.PodCondition{}, - metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time, - false, + "a ready cpu pod", + []v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "niccolo", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + StartTime: &metav1.Time{ + Time: time.Now().Add(-3 * time.Minute), + }, + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-3 * time.Minute)}, + Status: v1.ConditionTrue, + }, + }, + }, + }, + }, + metricsclient.PodMetricsInfo{ + "niccolo": 1, + }, + v1.ResourceCPU, + 1, + sets.NewString(), + sets.NewString(), + }, + { + "a missing pod", + []v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "epicurus", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + StartTime: &metav1.Time{ + Time: time.Now().Add(-3 * time.Minute), + }, + }, + }, + }, + metricsclient.PodMetricsInfo{}, + v1.ResourceCPU, + 0, + sets.NewString(), + sets.NewString("epicurus"), + }, + { + "all together", + []v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "lucretius", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + StartTime: &metav1.Time{ + Time: time.Now(), + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "niccolo", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + StartTime: &metav1.Time{ + Time: time.Now().Add(-3 * time.Minute), + }, + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-3 * time.Minute)}, + Status: v1.ConditionTrue, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "epicurus", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + StartTime: &metav1.Time{ + Time: time.Now().Add(-3 * time.Minute), + }, + }, + }, + }, + metricsclient.PodMetricsInfo{ + "lucretius": 1, + "niccolo": 1, + }, + v1.ResourceCPU, + 1, + sets.NewString("lucretius"), + sets.NewString("epicurus"), }, } for _, tc := range tests { - pod := &v1.Pod{ - Status: v1.PodStatus{ - Conditions: tc.conditions, - StartTime: &metav1.Time{ - Time: tc.started, - }, - }, + readyPodCount, unreadyPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus) + if readyPodCount != tc.expectReadyPodCount { + t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount) } - got := hasPodBeenReadyBefore(pod) - if got != tc.expected { - t.Errorf("[TestHasPodBeenReadyBefore.%s] got %v, want %v", tc.name, got, tc.expected) + if !unreadyPods.Equal(tc.expectUnreadyPods) { + t.Errorf("%s got unreadyPods %v, expected %v", tc.name, unreadyPods, tc.expectUnreadyPods) + } + if !missingPods.Equal(tc.expectMissingPods) { + t.Errorf("%s got missingPods %v, expected %v", tc.name, missingPods, tc.expectMissingPods) } } }