fix(scheduler_one): call Done() as soon as possible

This commit is contained in:
Kensei Nakada 2023-10-24 07:08:22 +00:00
parent 9682c62148
commit baf69640d3
3 changed files with 20 additions and 13 deletions

View File

@ -642,20 +642,19 @@ func (p *PriorityQueue) SchedulingCycle() int64 {
// determineSchedulingHintForInFlightPod looks at the unschedulable plugins of the given Pod
// and determines the scheduling hint for this Pod while checking the events that happened during in-flight.
func (p *PriorityQueue) determineSchedulingHintForInFlightPod(logger klog.Logger, pInfo *framework.QueuedPodInfo) queueingStrategy {
events, err := p.activeQ.clusterEventsForPod(logger, pInfo)
if err != nil {
logger.Error(err, "Error getting cluster events for pod", "pod", klog.KObj(pInfo.Pod))
return queueAfterBackoff
}
rejectorPlugins := pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins)
if len(rejectorPlugins) == 0 {
if len(pInfo.UnschedulablePlugins) == 0 && len(pInfo.PendingPlugins) == 0 {
// No failed plugins are associated with this Pod.
// Meaning something unusual (a temporal failure on kube-apiserver, etc) happened and this Pod gets moved back to the queue.
// In this case, we should retry scheduling it because this Pod may not be retried until the next flush.
return queueAfterBackoff
}
events, err := p.activeQ.clusterEventsForPod(logger, pInfo)
if err != nil {
logger.Error(err, "Error getting cluster events for pod", "pod", klog.KObj(pInfo.Pod))
return queueAfterBackoff
}
// check if there is an event that makes this Pod schedulable based on pInfo.UnschedulablePlugins.
queueingStrategy := queueSkip
for _, e := range events {

View File

@ -239,8 +239,12 @@ type QueuedPodInfo struct {
// It shouldn't be updated once initialized. It's used to record the e2e scheduling
// latency for a pod.
InitialAttemptTimestamp *time.Time
// UnschedulablePlugins records the plugin names that the Pod failed with Unschedulable or UnschedulableAndUnresolvable status.
// It's registered only when the Pod is rejected in PreFilter, Filter, Reserve, PreBind or Permit (WaitOnPermit).
// UnschedulablePlugins records the plugin names that the Pod failed with Unschedulable or UnschedulableAndUnresolvable status
// at specific extension points: PreFilter, Filter, Reserve, Permit (WaitOnPermit), or PreBind.
// If Pods are rejected at other extension points,
// they're assumed to be unexpected errors (e.g., temporal network issue, plugin implementation issue, etc)
// and retried soon after a backoff period.
// That is because such failures could be solved regardless of incoming cluster events (registered in EventsToRegister).
UnschedulablePlugins sets.Set[string]
// PendingPlugins records the plugin names that the Pod failed with Pending status.
PendingPlugins sets.Set[string]

View File

@ -126,9 +126,6 @@ func (sched *Scheduler) ScheduleOne(ctx context.Context) {
sched.handleBindingCycleError(bindingCycleCtx, state, fwk, assumedPodInfo, start, scheduleResult, status)
return
}
// Usually, DonePod is called inside the scheduling queue,
// but in this case, we need to call it here because this Pod won't go back to the scheduling queue.
sched.SchedulingQueue.Done(assumedPodInfo.Pod.UID)
}()
}
@ -309,6 +306,13 @@ func (sched *Scheduler) bindingCycle(
return status
}
// Any failures after this point cannot lead to the Pod being considered unschedulable.
// We define the Pod as "unschedulable" only when Pods are rejected at specific extension points, and PreBind is the last one in the scheduling/binding cycle.
//
// We can call Done() here because
// we can free the cluster events stored in the scheduling queue sonner, which is worth for busy clusters memory consumption wise.
sched.SchedulingQueue.Done(assumedPod.UID)
// Run "bind" plugins.
if status := sched.bind(ctx, fwk, assumedPod, scheduleResult.SuggestedHost, state); !status.IsSuccess() {
return status