mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 19:01:49 +00:00
Merge pull request #114623 from Huang-Wei/feat/smart-preemption-identification
Enhanced logic to identify eligible preemption node
This commit is contained in:
commit
bd43394467
@ -49,6 +49,7 @@ const Name = names.DefaultPreemption
|
||||
// DefaultPreemption is a PostFilter plugin implements the preemption logic.
|
||||
type DefaultPreemption struct {
|
||||
fh framework.Handle
|
||||
fts feature.Features
|
||||
args config.DefaultPreemptionArgs
|
||||
podLister corelisters.PodLister
|
||||
pdbLister policylisters.PodDisruptionBudgetLister
|
||||
@ -72,6 +73,7 @@ func New(dpArgs runtime.Object, fh framework.Handle, fts feature.Features) (fram
|
||||
}
|
||||
pl := DefaultPreemption{
|
||||
fh: fh,
|
||||
fts: fts,
|
||||
args: *args,
|
||||
podLister: fh.SharedInformerFactory().Core().V1().Pods().Lister(),
|
||||
pdbLister: getPDBLister(fh.SharedInformerFactory()),
|
||||
@ -250,7 +252,7 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo
|
||||
if nodeInfo, _ := nodeInfos.Get(nomNodeName); nodeInfo != nil {
|
||||
podPriority := corev1helpers.PodPriority(pod)
|
||||
for _, p := range nodeInfo.Pods {
|
||||
if p.Pod.DeletionTimestamp != nil && corev1helpers.PodPriority(p.Pod) < podPriority {
|
||||
if corev1helpers.PodPriority(p.Pod) < podPriority && podTerminatingByPreemption(p.Pod, pl.fts.EnablePodDisruptionConditions) {
|
||||
// There is a terminating pod on the nominated node.
|
||||
return false, "not eligible due to a terminating pod on the nominated node."
|
||||
}
|
||||
@ -260,6 +262,25 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo
|
||||
return true, ""
|
||||
}
|
||||
|
||||
// podTerminatingByPreemption returns the pod's terminating state if feature PodDisruptionConditions is not enabled.
|
||||
// Otherwise, it additionally checks if the termination state is caused by scheduler preemption.
|
||||
func podTerminatingByPreemption(p *v1.Pod, enablePodDisruptionConditions bool) bool {
|
||||
if p.DeletionTimestamp == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if !enablePodDisruptionConditions {
|
||||
return true
|
||||
}
|
||||
|
||||
for _, condition := range p.Status.Conditions {
|
||||
if condition.Type == v1.DisruptionTarget {
|
||||
return condition.Status == v1.ConditionTrue && condition.Reason == v1.PodReasonPreemptionByScheduler
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// filterPodsWithPDBViolation groups the given "pods" into two groups of "violatingPods"
|
||||
// and "nonViolatingPods" based on whether their PDBs will be violated if they are
|
||||
// preempted.
|
||||
|
@ -1401,6 +1401,7 @@ func TestSelectBestCandidate(t *testing.T) {
|
||||
func TestPodEligibleToPreemptOthers(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fts feature.Features
|
||||
pod *v1.Pod
|
||||
pods []*v1.Pod
|
||||
nodes []string
|
||||
@ -1439,6 +1440,40 @@ func TestPodEligibleToPreemptOthers(t *testing.T) {
|
||||
nominatedNodeStatus: nil,
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "victim Pods terminating, feature PodDisruptionConditions is enabled",
|
||||
fts: feature.Features{EnablePodDisruptionConditions: true},
|
||||
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
|
||||
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().
|
||||
Condition(v1.DisruptionTarget, v1.ConditionTrue, v1.PodReasonPreemptionByScheduler).Obj()},
|
||||
nodes: []string{"node1"},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "non-victim Pods terminating, feature PodDisruptionConditions is enabled",
|
||||
fts: feature.Features{EnablePodDisruptionConditions: true},
|
||||
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
|
||||
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()},
|
||||
nodes: []string{"node1"},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "victim Pods terminating, feature PodDisruptionConditions is disabled",
|
||||
fts: feature.Features{EnablePodDisruptionConditions: false},
|
||||
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
|
||||
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().
|
||||
Condition(v1.DisruptionTarget, v1.ConditionTrue, v1.PodReasonPreemptionByScheduler).Obj()},
|
||||
nodes: []string{"node1"},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "non-victim Pods terminating, feature PodDisruptionConditions is disabled",
|
||||
fts: feature.Features{EnablePodDisruptionConditions: false},
|
||||
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
|
||||
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()},
|
||||
nodes: []string{"node1"},
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
@ -1459,7 +1494,7 @@ func TestPodEligibleToPreemptOthers(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
pl := DefaultPreemption{fh: f}
|
||||
pl := DefaultPreemption{fh: f, fts: test.fts}
|
||||
if got, _ := pl.PodEligibleToPreemptOthers(test.pod, test.nominatedNodeStatus); got != test.expected {
|
||||
t.Errorf("expected %t, got %t for pod: %s", test.expected, got, test.pod.Name)
|
||||
}
|
||||
|
@ -27,4 +27,5 @@ type Features struct {
|
||||
EnableNodeInclusionPolicyInPodTopologySpread bool
|
||||
EnableMatchLabelKeysInPodTopologySpread bool
|
||||
EnablePodSchedulingReadiness bool
|
||||
EnablePodDisruptionConditions bool
|
||||
}
|
||||
|
@ -54,6 +54,7 @@ func NewInTreeRegistry() runtime.Registry {
|
||||
EnableNodeInclusionPolicyInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.NodeInclusionPolicyInPodTopologySpread),
|
||||
EnableMatchLabelKeysInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.MatchLabelKeysInPodTopologySpread),
|
||||
EnablePodSchedulingReadiness: feature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness),
|
||||
EnablePodDisruptionConditions: feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions),
|
||||
}
|
||||
|
||||
registry := runtime.Registry{
|
||||
|
@ -361,7 +361,7 @@ func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1.
|
||||
victimPodApply.Status.WithConditions(corev1apply.PodCondition().
|
||||
WithType(v1.DisruptionTarget).
|
||||
WithStatus(v1.ConditionTrue).
|
||||
WithReason("PreemptionByKubeScheduler").
|
||||
WithReason(v1.PodReasonPreemptionByScheduler).
|
||||
WithMessage(fmt.Sprintf("Kube-scheduler: preempting to accommodate a higher priority pod: %s", klog.KObj(pod))).
|
||||
WithLastTransitionTime(metav1.Now()),
|
||||
)
|
||||
|
@ -2721,6 +2721,10 @@ const (
|
||||
// TerminationByKubelet reason in DisruptionTarget pod condition indicates that the termination
|
||||
// is initiated by kubelet
|
||||
PodReasonTerminationByKubelet = "TerminationByKubelet"
|
||||
|
||||
// PodReasonPreemptionByScheduler reason in DisruptionTarget pod condition indicates that the
|
||||
// disruption was initiated by scheduler's preemption.
|
||||
PodReasonPreemptionByScheduler = "PreemptionByScheduler"
|
||||
)
|
||||
|
||||
// PodCondition contains details for the current condition of this pod.
|
||||
|
Loading…
Reference in New Issue
Block a user