scheduler: allow PreBind to return "Pending" and "Unschedulable"

Any error result from PreBind was treated as a pod scheduling failure. This was
overlooked when moving blocking API calls in the DRA plugin into a PreBind
implementation, leading to:

    E0604 15:45:50.980929  306340 schedule_one.go:1048] "Error scheduling pod; retrying" err="waiting for resource driver" pod="test/test-draqld28"

That's because DRA's PreBind does some updates in the apiserver, then returns
Pending to wait for the outcome.

The fix is to allow PreBind to return the same special status codes as other
extension points.
This commit is contained in:
Patrick Ohly 2024-06-06 15:28:08 +02:00
parent 9e2075b3c8
commit c339eafb76
2 changed files with 12 additions and 1 deletions

View File

@ -208,7 +208,7 @@ type QueuedPodInfo struct {
// latency for a pod.
InitialAttemptTimestamp *time.Time
// UnschedulablePlugins records the plugin names that the Pod failed with Unschedulable or UnschedulableAndUnresolvable status.
// It's registered only when the Pod is rejected in PreFilter, Filter, Reserve, or Permit (WaitOnPermit).
// It's registered only when the Pod is rejected in PreFilter, Filter, Reserve, PreBind or Permit (WaitOnPermit).
UnschedulablePlugins sets.Set[string]
// PendingPlugins records the plugin names that the Pod failed with Pending status.
PendingPlugins sets.Set[string]

View File

@ -292,6 +292,17 @@ func (sched *Scheduler) bindingCycle(
// Run "prebind" plugins.
if status := fwk.RunPreBindPlugins(ctx, state, assumedPod, scheduleResult.SuggestedHost); !status.IsSuccess() {
if status.IsRejected() {
fitErr := &framework.FitError{
NumAllNodes: 1,
Pod: assumedPodInfo.Pod,
Diagnosis: framework.Diagnosis{
NodeToStatusMap: framework.NodeToStatusMap{scheduleResult.SuggestedHost: status},
UnschedulablePlugins: sets.New(status.Plugin()),
},
}
return framework.NewStatus(status.Code()).WithError(fitErr)
}
return status
}