mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 07:20:13 +00:00
scheduler: start scheduling attempt with clean UnschedulablePlugins
When some plugin was registered as "unschedulable" in some previous scheduling attempt, it kept that attribute for a pod forever. When that plugin then later failed with an error that requires backoff, the pod was incorrectly moved to the "unschedulable" queue where it got stuck until the periodic flushing because there was no event that the plugin was waiting for. Here's an example where that happened: framework.go:1280: E0831 20:03:47.184243] Reserve/DynamicResources: Plugin failed err="Operation cannot be fulfilled on podschedulingcontexts.resource.k8s.io \"test-dragxd5c\": the object has been modified; please apply your changes to the latest version and try again" node="scheduler-perf-dra-7l2v2" plugin="DynamicResources" pod="test/test-dragxd5c" schedule_one.go:1001: E0831 20:03:47.184345] Error scheduling pod; retrying err="running Reserve plugin \"DynamicResources\": Operation cannot be fulfilled on podschedulingcontexts.resource.k8s.io \"test-dragxd5c\": the object has been modified; please apply your changes to the latest version and try again" pod="test/test-dragxd5c" ... scheduling_queue.go:745: I0831 20:03:47.198968] Pod moved to an internal scheduling queue pod="test/test-dragxd5c" event="ScheduleAttemptFailure" queue="Unschedulable" schedulingCycle=9576 hint="QueueSkip" Pop still needs the information about unschedulable plugins to update the UnschedulableReason metric. It can reset that information before returning the PodInfo for the next scheduling attempt.
This commit is contained in:
parent
04292dd94b
commit
4e73634b53
@ -840,9 +840,11 @@ func (p *PriorityQueue) Pop() (*framework.QueuedPodInfo, error) {
|
||||
p.inFlightPods[pInfo.Pod.UID] = p.inFlightEvents.PushBack(pInfo.Pod)
|
||||
}
|
||||
|
||||
// Update metrics and reset the set of unschedulable plugins for the next attempt.
|
||||
for plugin := range pInfo.UnschedulablePlugins {
|
||||
metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Dec()
|
||||
}
|
||||
pInfo.UnschedulablePlugins.Clear()
|
||||
|
||||
return pInfo, nil
|
||||
}
|
||||
|
@ -201,17 +201,6 @@ func Test_InFlightPods(t *testing.T) {
|
||||
callback func(t *testing.T, q *PriorityQueue)
|
||||
}
|
||||
|
||||
popPod := func(t *testing.T, q *PriorityQueue, pod *v1.Pod) *framework.QueuedPodInfo {
|
||||
p, err := q.Pop()
|
||||
if err != nil {
|
||||
t.Fatalf("Pop failed: %v", err)
|
||||
}
|
||||
if p.Pod.UID != pod.UID {
|
||||
t.Errorf("Unexpected popped pod: %v", p)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
queueingHintMap QueueingHintMapPerProfile
|
||||
@ -542,6 +531,47 @@ func Test_InFlightPods(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "popped pod must have empty UnschedulablePlugins",
|
||||
isSchedulingQueueHintEnabled: true,
|
||||
initialPods: []*v1.Pod{pod},
|
||||
actions: []action{
|
||||
{callback: func(t *testing.T, q *PriorityQueue) { poppedPod = popPod(t, q, pod) }},
|
||||
{callback: func(t *testing.T, q *PriorityQueue) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
// Unschedulable.
|
||||
poppedPod.UnschedulablePlugins = sets.New("fooPlugin1")
|
||||
if err := q.AddUnschedulableIfNotPresent(logger, poppedPod, q.SchedulingCycle()); err != nil {
|
||||
t.Errorf("Unexpected error from AddUnschedulableIfNotPresent: %v", err)
|
||||
}
|
||||
}},
|
||||
{eventHappens: &PvAdd}, // Active again.
|
||||
{callback: func(t *testing.T, q *PriorityQueue) {
|
||||
poppedPod = popPod(t, q, pod)
|
||||
if len(poppedPod.UnschedulablePlugins) > 0 {
|
||||
t.Errorf("QueuedPodInfo from Pop should have empty UnschedulablePlugins, got instead: %+v", poppedPod)
|
||||
}
|
||||
}},
|
||||
{callback: func(t *testing.T, q *PriorityQueue) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
// Failed (i.e. no UnschedulablePlugins). Should go to backoff.
|
||||
if err := q.AddUnschedulableIfNotPresent(logger, poppedPod, q.SchedulingCycle()); err != nil {
|
||||
t.Errorf("Unexpected error from AddUnschedulableIfNotPresent: %v", err)
|
||||
}
|
||||
}},
|
||||
},
|
||||
queueingHintMap: QueueingHintMapPerProfile{
|
||||
"": {
|
||||
PvAdd: {
|
||||
{
|
||||
// The hint fn tells that this event makes a Pod scheudlable immediately.
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueImmediately,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
@ -642,6 +672,59 @@ func Test_InFlightPods(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func popPod(t *testing.T, q *PriorityQueue, pod *v1.Pod) *framework.QueuedPodInfo {
|
||||
p, err := q.Pop()
|
||||
if err != nil {
|
||||
t.Fatalf("Pop failed: %v", err)
|
||||
}
|
||||
if p.Pod.UID != pod.UID {
|
||||
t.Errorf("Unexpected popped pod: %v", p)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func TestPop(t *testing.T) {
|
||||
pod := st.MakePod().Name("targetpod").UID("pod1").Obj()
|
||||
queueingHintMap := QueueingHintMapPerProfile{
|
||||
"": {
|
||||
PvAdd: {
|
||||
{
|
||||
// The hint fn tells that this event makes a Pod scheudlable immediately.
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueImmediately,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for name, isSchedulingQueueHintEnabled := range map[string]bool{"with-hints": true, "without-hints": false} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, isSchedulingQueueHintEnabled)()
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
q := NewTestQueueWithObjects(ctx, newDefaultQueueSort(), []runtime.Object{pod}, WithQueueingHintMapPerProfile(queueingHintMap))
|
||||
q.Add(logger, pod)
|
||||
|
||||
// Simulate failed attempt that makes the pod unschedulable.
|
||||
poppedPod := popPod(t, q, pod)
|
||||
poppedPod.UnschedulablePlugins = sets.New("fooPlugin1")
|
||||
if err := q.AddUnschedulableIfNotPresent(logger, poppedPod, q.SchedulingCycle()); err != nil {
|
||||
t.Errorf("Unexpected error from AddUnschedulableIfNotPresent: %v", err)
|
||||
}
|
||||
|
||||
// Activate it again.
|
||||
q.MoveAllToActiveOrBackoffQueue(logger, PvAdd, nil, nil, nil)
|
||||
|
||||
// Now check result of Pop.
|
||||
poppedPod = popPod(t, q, pod)
|
||||
if len(poppedPod.UnschedulablePlugins) > 0 {
|
||||
t.Errorf("QueuedPodInfo from Pop should have empty UnschedulablePlugins, got instead: %+v", poppedPod)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPriorityQueue_AddUnschedulableIfNotPresent(t *testing.T) {
|
||||
objs := []runtime.Object{highPriNominatedPodInfo.Pod, unschedulablePodInfo.Pod}
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
|
Loading…
Reference in New Issue
Block a user