diff --git a/plugin/pkg/scheduler/core/generic_scheduler.go b/plugin/pkg/scheduler/core/generic_scheduler.go index e306b3afd17..2c2f3e3dd6e 100644 --- a/plugin/pkg/scheduler/core/generic_scheduler.go +++ b/plugin/pkg/scheduler/core/generic_scheduler.go @@ -362,7 +362,14 @@ func addNominatedPods(podPriority int32, meta algorithm.PredicateMetadata, return true, metaOut, nodeInfoOut } -// Checks whether node with a given name and NodeInfo satisfies all predicateFuncs. +// podFitsOnNode checks whether a node given by NodeInfo satisfies the given predicate functions. +// This function is called from two different places: Schedule and Preempt. +// When it is called from Schedule, we want to test whether the pod is schedulable +// on the node with all the existing pods on the node plus higher and equal priority +// pods nominated to run on the node. +// When it is called from Preempt, we should remove the victims of preemption and +// add the nominated pods. Removal of the victims is done by SelectVictimsOnNode(). +// It removes victims from meta and NodeInfo before calling this function. func podFitsOnNode( pod *v1.Pod, meta algorithm.PredicateMetadata, diff --git a/plugin/pkg/scheduler/factory/factory.go b/plugin/pkg/scheduler/factory/factory.go index 5693212a046..e77d4958303 100644 --- a/plugin/pkg/scheduler/factory/factory.go +++ b/plugin/pkg/scheduler/factory/factory.go @@ -1162,7 +1162,7 @@ func (p *podPreemptor) RemoveNominatedNodeAnnotation(pod *v1.Pod) error { if _, exists := podCopy.Annotations[core.NominatedNodeAnnotationKey]; !exists { return nil } - // Note: Deleting the entry from the annotations and passing it Patch() will + // Note: Deleting the entry from the annotations and passing it to Patch() will // not remove the annotation. That's why we set it to empty string. podCopy.Annotations[core.NominatedNodeAnnotationKey] = "" ret := &unstructured.Unstructured{} diff --git a/plugin/pkg/scheduler/scheduler.go b/plugin/pkg/scheduler/scheduler.go index 52a25f599b5..d9af3d67c96 100644 --- a/plugin/pkg/scheduler/scheduler.go +++ b/plugin/pkg/scheduler/scheduler.go @@ -226,6 +226,10 @@ func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, e sched.config.Recorder.Eventf(victim, v1.EventTypeNormal, "Preempted", "by %v/%v on node %v", preemptor.Namespace, preemptor.Name, nodeName) } } + // Clearing nominated pods should happen outside of "if node != nil". Node could + // be nil when a pod with nominated node name is eligible to preempt again, + // but preemption logic does not find any node for it. In that case Preempt() + // function of generic_scheduler.go returns the pod itself for removal of the annotation. for _, p := range nominatedPodsToClear { rErr := sched.config.PodPreemptor.RemoveNominatedNodeAnnotation(p) if rErr != nil {