mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-01 07:47:56 +00:00
fix(scheduler_one): call Done() as soon as possible
This commit is contained in:
parent
9682c62148
commit
baf69640d3
@ -642,20 +642,19 @@ func (p *PriorityQueue) SchedulingCycle() int64 {
|
||||
// determineSchedulingHintForInFlightPod looks at the unschedulable plugins of the given Pod
|
||||
// and determines the scheduling hint for this Pod while checking the events that happened during in-flight.
|
||||
func (p *PriorityQueue) determineSchedulingHintForInFlightPod(logger klog.Logger, pInfo *framework.QueuedPodInfo) queueingStrategy {
|
||||
events, err := p.activeQ.clusterEventsForPod(logger, pInfo)
|
||||
if err != nil {
|
||||
logger.Error(err, "Error getting cluster events for pod", "pod", klog.KObj(pInfo.Pod))
|
||||
return queueAfterBackoff
|
||||
}
|
||||
|
||||
rejectorPlugins := pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins)
|
||||
if len(rejectorPlugins) == 0 {
|
||||
if len(pInfo.UnschedulablePlugins) == 0 && len(pInfo.PendingPlugins) == 0 {
|
||||
// No failed plugins are associated with this Pod.
|
||||
// Meaning something unusual (a temporal failure on kube-apiserver, etc) happened and this Pod gets moved back to the queue.
|
||||
// In this case, we should retry scheduling it because this Pod may not be retried until the next flush.
|
||||
return queueAfterBackoff
|
||||
}
|
||||
|
||||
events, err := p.activeQ.clusterEventsForPod(logger, pInfo)
|
||||
if err != nil {
|
||||
logger.Error(err, "Error getting cluster events for pod", "pod", klog.KObj(pInfo.Pod))
|
||||
return queueAfterBackoff
|
||||
}
|
||||
|
||||
// check if there is an event that makes this Pod schedulable based on pInfo.UnschedulablePlugins.
|
||||
queueingStrategy := queueSkip
|
||||
for _, e := range events {
|
||||
|
@ -239,8 +239,12 @@ type QueuedPodInfo struct {
|
||||
// It shouldn't be updated once initialized. It's used to record the e2e scheduling
|
||||
// latency for a pod.
|
||||
InitialAttemptTimestamp *time.Time
|
||||
// UnschedulablePlugins records the plugin names that the Pod failed with Unschedulable or UnschedulableAndUnresolvable status.
|
||||
// It's registered only when the Pod is rejected in PreFilter, Filter, Reserve, PreBind or Permit (WaitOnPermit).
|
||||
// UnschedulablePlugins records the plugin names that the Pod failed with Unschedulable or UnschedulableAndUnresolvable status
|
||||
// at specific extension points: PreFilter, Filter, Reserve, Permit (WaitOnPermit), or PreBind.
|
||||
// If Pods are rejected at other extension points,
|
||||
// they're assumed to be unexpected errors (e.g., temporal network issue, plugin implementation issue, etc)
|
||||
// and retried soon after a backoff period.
|
||||
// That is because such failures could be solved regardless of incoming cluster events (registered in EventsToRegister).
|
||||
UnschedulablePlugins sets.Set[string]
|
||||
// PendingPlugins records the plugin names that the Pod failed with Pending status.
|
||||
PendingPlugins sets.Set[string]
|
||||
|
@ -126,9 +126,6 @@ func (sched *Scheduler) ScheduleOne(ctx context.Context) {
|
||||
sched.handleBindingCycleError(bindingCycleCtx, state, fwk, assumedPodInfo, start, scheduleResult, status)
|
||||
return
|
||||
}
|
||||
// Usually, DonePod is called inside the scheduling queue,
|
||||
// but in this case, we need to call it here because this Pod won't go back to the scheduling queue.
|
||||
sched.SchedulingQueue.Done(assumedPodInfo.Pod.UID)
|
||||
}()
|
||||
}
|
||||
|
||||
@ -309,6 +306,13 @@ func (sched *Scheduler) bindingCycle(
|
||||
return status
|
||||
}
|
||||
|
||||
// Any failures after this point cannot lead to the Pod being considered unschedulable.
|
||||
// We define the Pod as "unschedulable" only when Pods are rejected at specific extension points, and PreBind is the last one in the scheduling/binding cycle.
|
||||
//
|
||||
// We can call Done() here because
|
||||
// we can free the cluster events stored in the scheduling queue sonner, which is worth for busy clusters memory consumption wise.
|
||||
sched.SchedulingQueue.Done(assumedPod.UID)
|
||||
|
||||
// Run "bind" plugins.
|
||||
if status := sched.bind(ctx, fwk, assumedPod, scheduleResult.SuggestedHost, state); !status.IsSuccess() {
|
||||
return status
|
||||
|
Loading…
Reference in New Issue
Block a user