Merge pull request #125527 from sanposhiho/gated-pods-filter-out-bug

fix: skip isPodWorthRequeuing only when SchedulingGates gates the pod
This commit is contained in:
Kubernetes Prow Robot 2024-06-21 12:22:55 -07:00 committed by GitHub
commit 8c508c5480
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 35 additions and 13 deletions

View File

@ -47,6 +47,7 @@ import (
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/framework" "k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
"k8s.io/kubernetes/pkg/scheduler/internal/heap" "k8s.io/kubernetes/pkg/scheduler/internal/heap"
"k8s.io/kubernetes/pkg/scheduler/metrics" "k8s.io/kubernetes/pkg/scheduler/metrics"
"k8s.io/kubernetes/pkg/scheduler/util" "k8s.io/kubernetes/pkg/scheduler/util"
@ -1193,11 +1194,25 @@ func (p *PriorityQueue) movePodsToActiveOrBackoffQueue(logger klog.Logger, podIn
activated := false activated := false
for _, pInfo := range podInfoList { for _, pInfo := range podInfoList {
// Since there may be many gated pods and they will not move from the // When handling events takes time, a scheduling throughput gets impacted negatively
// unschedulable pool, we skip calling the expensive isPodWorthRequeueing. // because of a shared lock within PriorityQueue, which Pop() also requires.
if pInfo.Gated { //
// Scheduling-gated Pods never get schedulable with any events,
// except the Pods themselves got updated, which isn't handled by movePodsToActiveOrBackoffQueue.
// So, we can skip them early here so that they don't go through isPodWorthRequeuing,
// which isn't fast enough to keep a sufficient scheduling throughput
// when the number of scheduling-gated Pods in unschedulablePods is large.
// https://github.com/kubernetes/kubernetes/issues/124384
// This is a hotfix for this issue, which might be changed
// once we have a better general solution for the shared lock issue.
//
// Note that we cannot skip all pInfo.Gated Pods here
// because PreEnqueue plugins apart from the scheduling gate plugin may change the gating status
// with these events.
if pInfo.Gated && pInfo.UnschedulablePlugins.Has(names.SchedulingGates) {
continue continue
} }
schedulingHint := p.isPodWorthRequeuing(logger, pInfo, event, oldObj, newObj) schedulingHint := p.isPodWorthRequeuing(logger, pInfo, event, oldObj, newObj)
if schedulingHint == queueSkip { if schedulingHint == queueSkip {
// QueueingHintFn determined that this Pod isn't worth putting to activeQ or backoffQ by this event. // QueueingHintFn determined that this Pod isn't worth putting to activeQ or backoffQ by this event.

View File

@ -44,6 +44,7 @@ import (
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/framework" "k8s.io/kubernetes/pkg/scheduler/framework"
plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature" plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/schedulinggates" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/schedulinggates"
"k8s.io/kubernetes/pkg/scheduler/metrics" "k8s.io/kubernetes/pkg/scheduler/metrics"
@ -1499,13 +1500,19 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueueWithQueueingHint(t *testing.
expectedQ: unschedulablePods, expectedQ: unschedulablePods,
}, },
{ {
name: "QueueHintFunction is not called when Pod is gated", name: "QueueHintFunction is not called when Pod is gated by SchedulingGates plugin",
podInfo: setQueuedPodInfoGated(&framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), UnschedulablePlugins: sets.New("foo")}), podInfo: setQueuedPodInfoGated(&framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), UnschedulablePlugins: sets.New(names.SchedulingGates, "foo")}),
hint: func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) { hint: func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
return framework.Queue, fmt.Errorf("QueueingHintFn should not be called as pod is gated") return framework.Queue, fmt.Errorf("QueueingHintFn should not be called as pod is gated")
}, },
expectedQ: unschedulablePods, expectedQ: unschedulablePods,
}, },
{
name: "QueueHintFunction is called when Pod is gated by a plugin other than SchedulingGates",
podInfo: setQueuedPodInfoGated(&framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), UnschedulablePlugins: sets.New("foo")}),
hint: queueHintReturnQueue,
expectedQ: activeQ,
},
} }
for _, test := range tests { for _, test := range tests {
@ -1518,14 +1525,13 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueueWithQueueingHint(t *testing.
QueueingHintFn: test.hint, QueueingHintFn: test.hint,
}, },
} }
test.podInfo.UnschedulablePlugins = sets.New("foo")
cl := testingclock.NewFakeClock(now) cl := testingclock.NewFakeClock(now)
q := NewTestQueue(ctx, newDefaultQueueSort(), WithQueueingHintMapPerProfile(m), WithClock(cl)) q := NewTestQueue(ctx, newDefaultQueueSort(), WithQueueingHintMapPerProfile(m), WithClock(cl))
// add to unsched pod pool
q.activeQ.Add(q.newQueuedPodInfo(test.podInfo.Pod)) q.activeQ.Add(q.newQueuedPodInfo(test.podInfo.Pod))
if p, err := q.Pop(logger); err != nil || p.Pod != test.podInfo.Pod { if p, err := q.Pop(logger); err != nil || p.Pod != test.podInfo.Pod {
t.Errorf("Expected: %v after Pop, but got: %v", test.podInfo.Pod.Name, p.Pod.Name) t.Errorf("Expected: %v after Pop, but got: %v", test.podInfo.Pod.Name, p.Pod.Name)
} }
// add to unsched pod pool
err := q.AddUnschedulableIfNotPresent(logger, test.podInfo, q.SchedulingCycle()) err := q.AddUnschedulableIfNotPresent(logger, test.podInfo, q.SchedulingCycle())
if err != nil { if err != nil {
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err) t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)

View File

@ -2651,8 +2651,8 @@ func (pl *SchedulingGatesPluginWOEvents) EventsToRegister() []framework.ClusterE
return nil return nil
} }
// This test helps to verify registering nil events for schedulingGates plugin works as expected. // This test helps to verify registering nil events for PreEnqueue plugin works as expected.
func TestSchedulingGatesPluginEventsToRegister(t *testing.T) { func TestPreEnqueuePluginEventsToRegister(t *testing.T) {
testContext := testutils.InitTestAPIServer(t, "preenqueue-plugin", nil) testContext := testutils.InitTestAPIServer(t, "preenqueue-plugin", nil)
num := func(pl framework.Plugin) int { num := func(pl framework.Plugin) int {
@ -2670,6 +2670,7 @@ func TestSchedulingGatesPluginEventsToRegister(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
withEvents bool withEvents bool
// count is the expected number of calls to PreEnqueue().
count int count int
queueHintEnabled []bool queueHintEnabled []bool
expectedScheduled []bool expectedScheduled []bool
@ -2686,7 +2687,7 @@ func TestSchedulingGatesPluginEventsToRegister(t *testing.T) {
{ {
name: "preEnqueue plugin with event registered", name: "preEnqueue plugin with event registered",
withEvents: true, withEvents: true,
count: 2, count: 3,
queueHintEnabled: []bool{false, true}, queueHintEnabled: []bool{false, true},
expectedScheduled: []bool{true, true}, expectedScheduled: []bool{true, true},
}, },
@ -2700,7 +2701,7 @@ func TestSchedulingGatesPluginEventsToRegister(t *testing.T) {
t.Run(tt.name+fmt.Sprintf(" queueHint(%v)", queueHintEnabled), func(t *testing.T) { t.Run(tt.name+fmt.Sprintf(" queueHint(%v)", queueHintEnabled), func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, queueHintEnabled) featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, queueHintEnabled)
// new plugin every time to clear counts // use new plugin every time to clear counts
var plugin framework.PreEnqueuePlugin var plugin framework.PreEnqueuePlugin
if tt.withEvents { if tt.withEvents {
plugin = &SchedulingGatesPluginWithEvents{SchedulingGates: schedulinggates.SchedulingGates{}} plugin = &SchedulingGatesPluginWithEvents{SchedulingGates: schedulinggates.SchedulingGates{}}