Merge pull request #114623 from Huang-Wei/feat/smart-preemption-identification

Enhanced logic to identify eligible preemption node
This commit is contained in:
Kubernetes Prow Robot 2023-01-06 00:23:59 -08:00 committed by GitHub
commit bd43394467
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 65 additions and 3 deletions

View File

@ -49,6 +49,7 @@ const Name = names.DefaultPreemption
// DefaultPreemption is a PostFilter plugin implements the preemption logic. // DefaultPreemption is a PostFilter plugin implements the preemption logic.
type DefaultPreemption struct { type DefaultPreemption struct {
fh framework.Handle fh framework.Handle
fts feature.Features
args config.DefaultPreemptionArgs args config.DefaultPreemptionArgs
podLister corelisters.PodLister podLister corelisters.PodLister
pdbLister policylisters.PodDisruptionBudgetLister pdbLister policylisters.PodDisruptionBudgetLister
@ -72,6 +73,7 @@ func New(dpArgs runtime.Object, fh framework.Handle, fts feature.Features) (fram
} }
pl := DefaultPreemption{ pl := DefaultPreemption{
fh: fh, fh: fh,
fts: fts,
args: *args, args: *args,
podLister: fh.SharedInformerFactory().Core().V1().Pods().Lister(), podLister: fh.SharedInformerFactory().Core().V1().Pods().Lister(),
pdbLister: getPDBLister(fh.SharedInformerFactory()), pdbLister: getPDBLister(fh.SharedInformerFactory()),
@ -250,7 +252,7 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo
if nodeInfo, _ := nodeInfos.Get(nomNodeName); nodeInfo != nil { if nodeInfo, _ := nodeInfos.Get(nomNodeName); nodeInfo != nil {
podPriority := corev1helpers.PodPriority(pod) podPriority := corev1helpers.PodPriority(pod)
for _, p := range nodeInfo.Pods { for _, p := range nodeInfo.Pods {
if p.Pod.DeletionTimestamp != nil && corev1helpers.PodPriority(p.Pod) < podPriority { if corev1helpers.PodPriority(p.Pod) < podPriority && podTerminatingByPreemption(p.Pod, pl.fts.EnablePodDisruptionConditions) {
// There is a terminating pod on the nominated node. // There is a terminating pod on the nominated node.
return false, "not eligible due to a terminating pod on the nominated node." return false, "not eligible due to a terminating pod on the nominated node."
} }
@ -260,6 +262,25 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo
return true, "" return true, ""
} }
// podTerminatingByPreemption returns the pod's terminating state if feature PodDisruptionConditions is not enabled.
// Otherwise, it additionally checks if the termination state is caused by scheduler preemption.
func podTerminatingByPreemption(p *v1.Pod, enablePodDisruptionConditions bool) bool {
if p.DeletionTimestamp == nil {
return false
}
if !enablePodDisruptionConditions {
return true
}
for _, condition := range p.Status.Conditions {
if condition.Type == v1.DisruptionTarget {
return condition.Status == v1.ConditionTrue && condition.Reason == v1.PodReasonPreemptionByScheduler
}
}
return false
}
// filterPodsWithPDBViolation groups the given "pods" into two groups of "violatingPods" // filterPodsWithPDBViolation groups the given "pods" into two groups of "violatingPods"
// and "nonViolatingPods" based on whether their PDBs will be violated if they are // and "nonViolatingPods" based on whether their PDBs will be violated if they are
// preempted. // preempted.

View File

@ -1401,6 +1401,7 @@ func TestSelectBestCandidate(t *testing.T) {
func TestPodEligibleToPreemptOthers(t *testing.T) { func TestPodEligibleToPreemptOthers(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
fts feature.Features
pod *v1.Pod pod *v1.Pod
pods []*v1.Pod pods []*v1.Pod
nodes []string nodes []string
@ -1439,6 +1440,40 @@ func TestPodEligibleToPreemptOthers(t *testing.T) {
nominatedNodeStatus: nil, nominatedNodeStatus: nil,
expected: false, expected: false,
}, },
{
name: "victim Pods terminating, feature PodDisruptionConditions is enabled",
fts: feature.Features{EnablePodDisruptionConditions: true},
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().
Condition(v1.DisruptionTarget, v1.ConditionTrue, v1.PodReasonPreemptionByScheduler).Obj()},
nodes: []string{"node1"},
expected: false,
},
{
name: "non-victim Pods terminating, feature PodDisruptionConditions is enabled",
fts: feature.Features{EnablePodDisruptionConditions: true},
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()},
nodes: []string{"node1"},
expected: true,
},
{
name: "victim Pods terminating, feature PodDisruptionConditions is disabled",
fts: feature.Features{EnablePodDisruptionConditions: false},
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().
Condition(v1.DisruptionTarget, v1.ConditionTrue, v1.PodReasonPreemptionByScheduler).Obj()},
nodes: []string{"node1"},
expected: false,
},
{
name: "non-victim Pods terminating, feature PodDisruptionConditions is disabled",
fts: feature.Features{EnablePodDisruptionConditions: false},
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()},
nodes: []string{"node1"},
expected: false,
},
} }
for _, test := range tests { for _, test := range tests {
@ -1459,7 +1494,7 @@ func TestPodEligibleToPreemptOthers(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
pl := DefaultPreemption{fh: f} pl := DefaultPreemption{fh: f, fts: test.fts}
if got, _ := pl.PodEligibleToPreemptOthers(test.pod, test.nominatedNodeStatus); got != test.expected { if got, _ := pl.PodEligibleToPreemptOthers(test.pod, test.nominatedNodeStatus); got != test.expected {
t.Errorf("expected %t, got %t for pod: %s", test.expected, got, test.pod.Name) t.Errorf("expected %t, got %t for pod: %s", test.expected, got, test.pod.Name)
} }

View File

@ -27,4 +27,5 @@ type Features struct {
EnableNodeInclusionPolicyInPodTopologySpread bool EnableNodeInclusionPolicyInPodTopologySpread bool
EnableMatchLabelKeysInPodTopologySpread bool EnableMatchLabelKeysInPodTopologySpread bool
EnablePodSchedulingReadiness bool EnablePodSchedulingReadiness bool
EnablePodDisruptionConditions bool
} }

View File

@ -54,6 +54,7 @@ func NewInTreeRegistry() runtime.Registry {
EnableNodeInclusionPolicyInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.NodeInclusionPolicyInPodTopologySpread), EnableNodeInclusionPolicyInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.NodeInclusionPolicyInPodTopologySpread),
EnableMatchLabelKeysInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.MatchLabelKeysInPodTopologySpread), EnableMatchLabelKeysInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.MatchLabelKeysInPodTopologySpread),
EnablePodSchedulingReadiness: feature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness), EnablePodSchedulingReadiness: feature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness),
EnablePodDisruptionConditions: feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions),
} }
registry := runtime.Registry{ registry := runtime.Registry{

View File

@ -361,7 +361,7 @@ func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1.
victimPodApply.Status.WithConditions(corev1apply.PodCondition(). victimPodApply.Status.WithConditions(corev1apply.PodCondition().
WithType(v1.DisruptionTarget). WithType(v1.DisruptionTarget).
WithStatus(v1.ConditionTrue). WithStatus(v1.ConditionTrue).
WithReason("PreemptionByKubeScheduler"). WithReason(v1.PodReasonPreemptionByScheduler).
WithMessage(fmt.Sprintf("Kube-scheduler: preempting to accommodate a higher priority pod: %s", klog.KObj(pod))). WithMessage(fmt.Sprintf("Kube-scheduler: preempting to accommodate a higher priority pod: %s", klog.KObj(pod))).
WithLastTransitionTime(metav1.Now()), WithLastTransitionTime(metav1.Now()),
) )

View File

@ -2721,6 +2721,10 @@ const (
// TerminationByKubelet reason in DisruptionTarget pod condition indicates that the termination // TerminationByKubelet reason in DisruptionTarget pod condition indicates that the termination
// is initiated by kubelet // is initiated by kubelet
PodReasonTerminationByKubelet = "TerminationByKubelet" PodReasonTerminationByKubelet = "TerminationByKubelet"
// PodReasonPreemptionByScheduler reason in DisruptionTarget pod condition indicates that the
// disruption was initiated by scheduler's preemption.
PodReasonPreemptionByScheduler = "PreemptionByScheduler"
) )
// PodCondition contains details for the current condition of this pod. // PodCondition contains details for the current condition of this pod.