From 80e279d353c9fa7c77ac6f624f826ed827d76c60 Mon Sep 17 00:00:00 2001 From: Joseph Burnett Date: Tue, 2 Jul 2019 20:15:58 +0200 Subject: [PATCH] Ignore pending pods. This change adds pending pods to the ignored set first before selecting pods missing metrics. Pending pods are always ignored when calculating scale. When the HPA decides which pods and metric values to take into account when scaling, it divides the pods into three disjoint subsets: 1) ready 2) missing metrics and 3) ignored. First the HPA selects pods which are missing metrics. Then it selects pods should be ignored because they are not ready yet, or are still consuming CPU during initialization. All the remaining pods go into the ready set. After the HPA has decided what direction it wants to scale based on the ready pods, it considers what might have happened if it had the missing metrics. It makes a conservative guess about what the missing metrics might have been, 0% if it wants to scale up--100% if it wants to scale down. This is a good thing when scaling up, because newly added pods will likely help reduce the usage ratio, even though their metrics are missing at the moment. The HPA should wait to see the results of its previous scale decision before it makes another one. However when scaling down, it means that many missing metrics can pin the HPA at high scale, even when load is completely removed. In particular, when there are many unschedulable pods due to insufficient cluster capacity, the many missing metrics (assumed to be 100%) can cause the HPA to avoid scaling down indefinitely. --- .../podautoscaler/replica_calculator.go | 9 ++- .../podautoscaler/replica_calculator_test.go | 61 +++++++++++++++---- 2 files changed, 58 insertions(+), 12 deletions(-) diff --git a/pkg/controller/podautoscaler/replica_calculator.go b/pkg/controller/podautoscaler/replica_calculator.go index d479a4edd84..991688de6dd 100644 --- a/pkg/controller/podautoscaler/replica_calculator.go +++ b/pkg/controller/podautoscaler/replica_calculator.go @@ -22,7 +22,7 @@ import ( "time" autoscaling "k8s.io/api/autoscaling/v2beta2" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" @@ -365,11 +365,18 @@ func groupPods(pods []*v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1 if pod.DeletionTimestamp != nil || pod.Status.Phase == v1.PodFailed { continue } + // Pending pods are ignored. + if pod.Status.Phase == v1.PodPending { + ignoredPods.Insert(pod.Name) + continue + } + // Pods missing metrics. metric, found := metrics[pod.Name] if !found { missingPods.Insert(pod.Name) continue } + // Unready pods are ignored. if resource == v1.ResourceCPU { var ignorePod bool _, condition := podutil.GetPodCondition(&pod.Status, v1.PodReady) diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index c07d7b2bb5c..22272766038 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -23,7 +23,7 @@ import ( "time" autoscalingv2 "k8s.io/api/autoscaling/v2beta2" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta/testrestmapper" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -862,6 +862,25 @@ func TestReplicaCalcScaleDownIncludeUnreadyPods(t *testing.T) { tc.runTest(t) } +func TestReplicaCalcScaleDownExcludeUnscheduledPods(t *testing.T) { + tc := replicaCalcTestCase{ + currentReplicas: 5, + expectedReplicas: 1, + podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse, v1.ConditionFalse, v1.ConditionFalse}, + podPhase: []v1.PodPhase{v1.PodRunning, v1.PodPending, v1.PodPending, v1.PodPending, v1.PodPending}, + resource: &resourceInfo{ + name: v1.ResourceCPU, + requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")}, + levels: []int64{100}, + + targetUtilization: 50, + expectedUtilization: 10, + expectedValue: numContainersPerPod * 100, + }, + } + tc.runTest(t) +} + func TestReplicaCalcScaleDownIgnoreHotCpuPods(t *testing.T) { tc := replicaCalcTestCase{ currentReplicas: 5, @@ -1616,18 +1635,38 @@ func TestGroupPods(t *testing.T) { sets.NewString("lucretius"), sets.NewString("epicurus"), }, + { + name: "pending pods are ignored", + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "unscheduled", + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + }, + }, + metrics: metricsclient.PodMetricsInfo{}, + resource: v1.ResourceCPU, + expectReadyPodCount: 0, + expectIgnoredPods: sets.NewString("unscheduled"), + expectMissingPods: sets.NewString(), + }, } for _, tc := range tests { - readyPodCount, ignoredPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus) - if readyPodCount != tc.expectReadyPodCount { - t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount) - } - if !ignoredPods.Equal(tc.expectIgnoredPods) { - t.Errorf("%s got unreadyPods %v, expected %v", tc.name, ignoredPods, tc.expectIgnoredPods) - } - if !missingPods.Equal(tc.expectMissingPods) { - t.Errorf("%s got missingPods %v, expected %v", tc.name, missingPods, tc.expectMissingPods) - } + t.Run(tc.name, func(t *testing.T) { + readyPodCount, ignoredPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus) + if readyPodCount != tc.expectReadyPodCount { + t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount) + } + if !ignoredPods.Equal(tc.expectIgnoredPods) { + t.Errorf("%s got unreadyPods %v, expected %v", tc.name, ignoredPods, tc.expectIgnoredPods) + } + if !missingPods.Equal(tc.expectMissingPods) { + t.Errorf("%s got missingPods %v, expected %v", tc.name, missingPods, tc.expectMissingPods) + } + }) } }