Ignore pending pods.

This change adds pending pods to the ignored set first before
selecting pods missing metrics. Pending pods are always ignored when
calculating scale.

When the HPA decides which pods and metric values to take into account
when scaling, it divides the pods into three disjoint subsets: 1)
ready 2) missing metrics and 3) ignored. First the HPA selects pods
which are missing metrics. Then it selects pods should be ignored
because they are not ready yet, or are still consuming CPU during
initialization. All the remaining pods go into the ready set. After
the HPA has decided what direction it wants to scale based on the
ready pods, it considers what might have happened if it had the
missing metrics. It makes a conservative guess about what the missing
metrics might have been, 0% if it wants to scale up--100% if it wants
to scale down. This is a good thing when scaling up, because newly
added pods will likely help reduce the usage ratio, even though their
metrics are missing at the moment. The HPA should wait to see the
results of its previous scale decision before it makes another
one. However when scaling down, it means that many missing metrics can
pin the HPA at high scale, even when load is completely removed. In
particular, when there are many unschedulable pods due to insufficient
cluster capacity, the many missing metrics (assumed to be 100%) can
cause the HPA to avoid scaling down indefinitely.
This commit is contained in:
Joseph Burnett 2019-07-02 20:15:58 +02:00
parent 4f29960cb2
commit 80e279d353
2 changed files with 58 additions and 12 deletions

View File

@ -22,7 +22,7 @@ import (
"time"
autoscaling "k8s.io/api/autoscaling/v2beta2"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
@ -365,11 +365,18 @@ func groupPods(pods []*v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1
if pod.DeletionTimestamp != nil || pod.Status.Phase == v1.PodFailed {
continue
}
// Pending pods are ignored.
if pod.Status.Phase == v1.PodPending {
ignoredPods.Insert(pod.Name)
continue
}
// Pods missing metrics.
metric, found := metrics[pod.Name]
if !found {
missingPods.Insert(pod.Name)
continue
}
// Unready pods are ignored.
if resource == v1.ResourceCPU {
var ignorePod bool
_, condition := podutil.GetPodCondition(&pod.Status, v1.PodReady)

View File

@ -23,7 +23,7 @@ import (
"time"
autoscalingv2 "k8s.io/api/autoscaling/v2beta2"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta/testrestmapper"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -862,6 +862,25 @@ func TestReplicaCalcScaleDownIncludeUnreadyPods(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcScaleDownExcludeUnscheduledPods(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 5,
expectedReplicas: 1,
podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse, v1.ConditionFalse, v1.ConditionFalse},
podPhase: []v1.PodPhase{v1.PodRunning, v1.PodPending, v1.PodPending, v1.PodPending, v1.PodPending},
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
levels: []int64{100},
targetUtilization: 50,
expectedUtilization: 10,
expectedValue: numContainersPerPod * 100,
},
}
tc.runTest(t)
}
func TestReplicaCalcScaleDownIgnoreHotCpuPods(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 5,
@ -1616,18 +1635,38 @@ func TestGroupPods(t *testing.T) {
sets.NewString("lucretius"),
sets.NewString("epicurus"),
},
{
name: "pending pods are ignored",
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "unscheduled",
},
Status: v1.PodStatus{
Phase: v1.PodPending,
},
},
},
metrics: metricsclient.PodMetricsInfo{},
resource: v1.ResourceCPU,
expectReadyPodCount: 0,
expectIgnoredPods: sets.NewString("unscheduled"),
expectMissingPods: sets.NewString(),
},
}
for _, tc := range tests {
readyPodCount, ignoredPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus)
if readyPodCount != tc.expectReadyPodCount {
t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount)
}
if !ignoredPods.Equal(tc.expectIgnoredPods) {
t.Errorf("%s got unreadyPods %v, expected %v", tc.name, ignoredPods, tc.expectIgnoredPods)
}
if !missingPods.Equal(tc.expectMissingPods) {
t.Errorf("%s got missingPods %v, expected %v", tc.name, missingPods, tc.expectMissingPods)
}
t.Run(tc.name, func(t *testing.T) {
readyPodCount, ignoredPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus)
if readyPodCount != tc.expectReadyPodCount {
t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount)
}
if !ignoredPods.Equal(tc.expectIgnoredPods) {
t.Errorf("%s got unreadyPods %v, expected %v", tc.name, ignoredPods, tc.expectIgnoredPods)
}
if !missingPods.Equal(tc.expectMissingPods) {
t.Errorf("%s got missingPods %v, expected %v", tc.name, missingPods, tc.expectMissingPods)
}
})
}
}