diff --git a/pkg/scheduler/framework/preemption/preemption.go b/pkg/scheduler/framework/preemption/preemption.go index 6cdee095978..7c98bb423f2 100644 --- a/pkg/scheduler/framework/preemption/preemption.go +++ b/pkg/scheduler/framework/preemption/preemption.go @@ -441,55 +441,24 @@ func pickOneNodeForPreemption(nodesToVictims map[string]*extenderv1.Victims) str if len(nodesToVictims) == 0 { return "" } - minNumPDBViolatingPods := int64(math.MaxInt32) - var minNodes1 []string - lenNodes1 := 0 - for node, victims := range nodesToVictims { - numPDBViolatingPods := victims.NumPDBViolations - if numPDBViolatingPods < minNumPDBViolatingPods { - minNumPDBViolatingPods = numPDBViolatingPods - minNodes1 = nil - lenNodes1 = 0 - } - if numPDBViolatingPods == minNumPDBViolatingPods { - minNodes1 = append(minNodes1, node) - lenNodes1++ - } - } - if lenNodes1 == 1 { - return minNodes1[0] + + allCandidates := make([]string, 0, len(nodesToVictims)) + for node := range nodesToVictims { + allCandidates = append(allCandidates, node) } - // There are more than one node with minimum number PDB violating pods. Find - // the one with minimum highest priority victim. - minHighestPriority := int32(math.MaxInt32) - var minNodes2 = make([]string, lenNodes1) - lenNodes2 := 0 - for i := 0; i < lenNodes1; i++ { - node := minNodes1[i] - victims := nodesToVictims[node] + minNumPDBViolatingScoreFunc := func(node string) int64 { + // The smaller the NumPDBViolations, the higher the score. + return -nodesToVictims[node].NumPDBViolations + } + minHighestPriorityScoreFunc := func(node string) int64 { // highestPodPriority is the highest priority among the victims on this node. - highestPodPriority := corev1helpers.PodPriority(victims.Pods[0]) - if highestPodPriority < minHighestPriority { - minHighestPriority = highestPodPriority - lenNodes2 = 0 - } - if highestPodPriority == minHighestPriority { - minNodes2[lenNodes2] = node - lenNodes2++ - } + highestPodPriority := corev1helpers.PodPriority(nodesToVictims[node].Pods[0]) + // The smaller the highestPodPriority, the higher the score. + return -int64(highestPodPriority) } - if lenNodes2 == 1 { - return minNodes2[0] - } - - // There are a few nodes with minimum highest priority victim. Find the - // smallest sum of priorities. - minSumPriorities := int64(math.MaxInt64) - lenNodes1 = 0 - for i := 0; i < lenNodes2; i++ { + minSumPrioritiesScoreFunc := func(node string) int64 { var sumPriorities int64 - node := minNodes2[i] for _, pod := range nodesToVictims[node].Pods { // We add MaxInt32+1 to all priorities to make all of them >= 0. This is // needed so that a node with a few pods with negative priority is not @@ -497,64 +466,61 @@ func pickOneNodeForPreemption(nodesToVictims map[string]*extenderv1.Victims) str // priority (and similar scenarios). sumPriorities += int64(corev1helpers.PodPriority(pod)) + int64(math.MaxInt32+1) } - if sumPriorities < minSumPriorities { - minSumPriorities = sumPriorities - lenNodes1 = 0 - } - if sumPriorities == minSumPriorities { - minNodes1[lenNodes1] = node - lenNodes1++ - } + // The smaller the sumPriorities, the higher the score. + return -sumPriorities } - if lenNodes1 == 1 { - return minNodes1[0] + minNumPodsScoreFunc := func(node string) int64 { + // The smaller the length of pods, the higher the score. + return -int64(len(nodesToVictims[node].Pods)) } - - // There are a few nodes with minimum highest priority victim and sum of priorities. - // Find one with the minimum number of pods. - minNumPods := math.MaxInt32 - lenNodes2 = 0 - for i := 0; i < lenNodes1; i++ { - node := minNodes1[i] - numPods := len(nodesToVictims[node].Pods) - if numPods < minNumPods { - minNumPods = numPods - lenNodes2 = 0 - } - if numPods == minNumPods { - minNodes2[lenNodes2] = node - lenNodes2++ - } - } - if lenNodes2 == 1 { - return minNodes2[0] - } - - // There are a few nodes with same number of pods. - // Find the node that satisfies latest(earliestStartTime(all highest-priority pods on node)) - latestStartTime := util.GetEarliestPodStartTime(nodesToVictims[minNodes2[0]]) - if latestStartTime == nil { - // If the earliest start time of all pods on the 1st node is nil, just return it, - // which is not expected to happen. - klog.ErrorS(errors.New("earliestStartTime is nil for node"), "Should not reach here", "node", klog.KRef("", minNodes2[0])) - return minNodes2[0] - } - nodeToReturn := minNodes2[0] - for i := 1; i < lenNodes2; i++ { - node := minNodes2[i] + latestStartTimeScoreFunc := func(node string) int64 { // Get earliest start time of all pods on the current node. earliestStartTimeOnNode := util.GetEarliestPodStartTime(nodesToVictims[node]) if earliestStartTimeOnNode == nil { - klog.ErrorS(errors.New("earliestStartTime is nil for node"), "Should not reach here", "node", klog.KRef("", node)) - continue - } - if earliestStartTimeOnNode.After(latestStartTime.Time) { - latestStartTime = earliestStartTimeOnNode - nodeToReturn = node + klog.ErrorS(errors.New("earliestStartTime is nil for node"), "Should not reach here", "node", node) + return int64(math.MinInt64) } + // The bigger the earliestStartTimeOnNode, the higher the score. + return earliestStartTimeOnNode.UnixNano() } - return nodeToReturn + // Each scoreFunc scores the nodes according to specific rules and keeps the name of the node + // with the highest score. If and only if the scoreFunc has more than one node with the highest + // score, we will execute the other scoreFunc in order of precedence. + scoreFuncs := []func(string) int64{ + // A node with a minimum number of PDB is preferable. + minNumPDBViolatingScoreFunc, + // A node with a minimum highest priority victim is preferable. + minHighestPriorityScoreFunc, + // A node with the smallest sum of priorities is preferable. + minSumPrioritiesScoreFunc, + // A node with the minimum number of pods is preferable. + minNumPodsScoreFunc, + // A node with the latest start time of all highest priority victims is preferable. + latestStartTimeScoreFunc, + // If there are still ties, then the first Node in the list is selected. + } + + for _, f := range scoreFuncs { + selectedNodes := []string{} + maxScore := int64(math.MinInt64) + for _, node := range allCandidates { + score := f(node) + if score > maxScore { + maxScore = score + selectedNodes = []string{} + } + if score == maxScore { + selectedNodes = append(selectedNodes, node) + } + } + if len(selectedNodes) == 1 { + return selectedNodes[0] + } + allCandidates = selectedNodes + } + + return allCandidates[0] } // getLowerPriorityNominatedPods returns pods whose priority is smaller than the