diff --git a/pkg/scheduler/framework/interface.go b/pkg/scheduler/framework/interface.go index 2623f5b988a..1b8af830cb2 100644 --- a/pkg/scheduler/framework/interface.go +++ b/pkg/scheduler/framework/interface.go @@ -278,8 +278,7 @@ func AsStatus(err error) *Status { } } -// PluginToStatus maps plugin name to status. Currently used to identify which Filter plugin -// returned which status. +// PluginToStatus maps plugin name to status it returned. type PluginToStatus map[string]*Status // Merge merges the statuses in the map into one. The resulting status code have the following diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go index 91fc9ef9ed0..c1c5165b5b2 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go @@ -228,15 +228,16 @@ func (pl *DefaultPreemption) SelectVictimsOnNode( // PodEligibleToPreemptOthers returns one bool and one string. The bool // indicates whether this pod should be considered for preempting other pods or // not. The string includes the reason if this pod isn't eligible. -// If this pod has a preemptionPolicy of Never or has already preempted other -// pods and those are in their graceful termination period, it shouldn't be -// considered for preemption. -// We look at the node that is nominated for this pod and as long as there are -// terminating pods on the node, we don't consider this for preempting more pods. +// There're several reasons: +// 1. The pod has a preemptionPolicy of Never. +// 2. The pod has already preempted other pods and the victims are in their graceful termination period. +// Currently we check the node that is nominated for this pod, and as long as there are +// terminating pods on this node, we don't attempt to preempt more pods. func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) (bool, string) { if pod.Spec.PreemptionPolicy != nil && *pod.Spec.PreemptionPolicy == v1.PreemptNever { - return false, fmt.Sprint("not eligible due to preemptionPolicy=Never.") + return false, "not eligible due to preemptionPolicy=Never." } + nodeInfos := pl.fh.SnapshotSharedLister().NodeInfos() nomNodeName := pod.Status.NominatedNodeName if len(nomNodeName) > 0 { @@ -251,7 +252,7 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo for _, p := range nodeInfo.Pods { if p.Pod.DeletionTimestamp != nil && corev1helpers.PodPriority(p.Pod) < podPriority { // There is a terminating pod on the nominated node. - return false, fmt.Sprint("not eligible due to a terminating pod on the nominated node.") + return false, "not eligible due to a terminating pod on the nominated node." } } } diff --git a/pkg/scheduler/schedule_one.go b/pkg/scheduler/schedule_one.go index 06703c509fb..cc0fbc73725 100644 --- a/pkg/scheduler/schedule_one.go +++ b/pkg/scheduler/schedule_one.go @@ -126,46 +126,45 @@ func (sched *Scheduler) schedulingCycle( start time.Time, podsToActivate *framework.PodsToActivate, ) (ScheduleResult, *framework.QueuedPodInfo, *framework.Status) { - pod := podInfo.Pod scheduleResult, err := sched.SchedulePod(ctx, fwk, state, pod) if err != nil { + if err == ErrNoNodesAvailable { + status := framework.NewStatus(framework.UnschedulableAndUnresolvable).WithError(err) + return ScheduleResult{nominatingInfo: clearNominatedNode}, podInfo, status + } + + fitError, ok := err.(*framework.FitError) + if !ok { + klog.ErrorS(err, "Error selecting node for pod", "pod", klog.KObj(pod)) + return ScheduleResult{nominatingInfo: clearNominatedNode}, podInfo, framework.AsStatus(err) + } + // SchedulePod() may have failed because the pod would not fit on any host, so we try to // preempt, with the expectation that the next time the pod is tried for scheduling it // will fit due to the preemption. It is also possible that a different pod will schedule // into the resources that were preempted, but this is harmless. - var ( - nominatingInfo *framework.NominatingInfo - status *framework.Status - ) - if fitError, ok := err.(*framework.FitError); ok { - if !fwk.HasPostFilterPlugins() { - klog.V(3).InfoS("No PostFilter plugins are registered, so no preemption will be performed") - } else { - // Run PostFilter plugins to try to make the pod schedulable in a future scheduling cycle. - result, status := fwk.RunPostFilterPlugins(ctx, state, pod, fitError.Diagnosis.NodeToStatusMap) - if status.Code() == framework.Error { - klog.ErrorS(nil, "Status after running PostFilter plugins for pod", "pod", klog.KObj(pod), "status", status) - } else { - msg := status.Message() - fitError.Diagnosis.PostFilterMsg = msg - klog.V(5).InfoS("Status after running PostFilter plugins for pod", "pod", klog.KObj(pod), "status", msg) - } - if result != nil { - nominatingInfo = result.NominatingInfo - } - } - status = framework.NewStatus(framework.Unschedulable).WithError(err) - } else if err == ErrNoNodesAvailable { - nominatingInfo = clearNominatedNode - status = framework.NewStatus(framework.UnschedulableAndUnresolvable).WithError(err) - } else { - klog.ErrorS(err, "Error selecting node for pod", "pod", klog.KObj(pod)) - nominatingInfo = clearNominatedNode - status = framework.AsStatus(err) + + if !fwk.HasPostFilterPlugins() { + klog.V(3).InfoS("No PostFilter plugins are registered, so no preemption will be performed") + return ScheduleResult{}, podInfo, framework.NewStatus(framework.Unschedulable).WithError(err) } - return ScheduleResult{nominatingInfo: nominatingInfo}, podInfo, status + // Run PostFilter plugins to attempt to make the pod schedulable in a future scheduling cycle. + result, status := fwk.RunPostFilterPlugins(ctx, state, pod, fitError.Diagnosis.NodeToStatusMap) + msg := status.Message() + fitError.Diagnosis.PostFilterMsg = msg + if status.Code() == framework.Error { + klog.ErrorS(nil, "Status after running PostFilter plugins for pod", "pod", klog.KObj(pod), "status", msg) + } else { + klog.V(5).InfoS("Status after running PostFilter plugins for pod", "pod", klog.KObj(pod), "status", msg) + } + + var nominatingInfo *framework.NominatingInfo + if result != nil { + nominatingInfo = result.NominatingInfo + } + return ScheduleResult{nominatingInfo: nominatingInfo}, podInfo, framework.NewStatus(framework.Unschedulable).WithError(err) } metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInSeconds(start))