mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 11:21:47 +00:00
Merge pull request #114623 from Huang-Wei/feat/smart-preemption-identification
Enhanced logic to identify eligible preemption node
This commit is contained in:
commit
bd43394467
@ -49,6 +49,7 @@ const Name = names.DefaultPreemption
|
|||||||
// DefaultPreemption is a PostFilter plugin implements the preemption logic.
|
// DefaultPreemption is a PostFilter plugin implements the preemption logic.
|
||||||
type DefaultPreemption struct {
|
type DefaultPreemption struct {
|
||||||
fh framework.Handle
|
fh framework.Handle
|
||||||
|
fts feature.Features
|
||||||
args config.DefaultPreemptionArgs
|
args config.DefaultPreemptionArgs
|
||||||
podLister corelisters.PodLister
|
podLister corelisters.PodLister
|
||||||
pdbLister policylisters.PodDisruptionBudgetLister
|
pdbLister policylisters.PodDisruptionBudgetLister
|
||||||
@ -72,6 +73,7 @@ func New(dpArgs runtime.Object, fh framework.Handle, fts feature.Features) (fram
|
|||||||
}
|
}
|
||||||
pl := DefaultPreemption{
|
pl := DefaultPreemption{
|
||||||
fh: fh,
|
fh: fh,
|
||||||
|
fts: fts,
|
||||||
args: *args,
|
args: *args,
|
||||||
podLister: fh.SharedInformerFactory().Core().V1().Pods().Lister(),
|
podLister: fh.SharedInformerFactory().Core().V1().Pods().Lister(),
|
||||||
pdbLister: getPDBLister(fh.SharedInformerFactory()),
|
pdbLister: getPDBLister(fh.SharedInformerFactory()),
|
||||||
@ -250,7 +252,7 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo
|
|||||||
if nodeInfo, _ := nodeInfos.Get(nomNodeName); nodeInfo != nil {
|
if nodeInfo, _ := nodeInfos.Get(nomNodeName); nodeInfo != nil {
|
||||||
podPriority := corev1helpers.PodPriority(pod)
|
podPriority := corev1helpers.PodPriority(pod)
|
||||||
for _, p := range nodeInfo.Pods {
|
for _, p := range nodeInfo.Pods {
|
||||||
if p.Pod.DeletionTimestamp != nil && corev1helpers.PodPriority(p.Pod) < podPriority {
|
if corev1helpers.PodPriority(p.Pod) < podPriority && podTerminatingByPreemption(p.Pod, pl.fts.EnablePodDisruptionConditions) {
|
||||||
// There is a terminating pod on the nominated node.
|
// There is a terminating pod on the nominated node.
|
||||||
return false, "not eligible due to a terminating pod on the nominated node."
|
return false, "not eligible due to a terminating pod on the nominated node."
|
||||||
}
|
}
|
||||||
@ -260,6 +262,25 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo
|
|||||||
return true, ""
|
return true, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// podTerminatingByPreemption returns the pod's terminating state if feature PodDisruptionConditions is not enabled.
|
||||||
|
// Otherwise, it additionally checks if the termination state is caused by scheduler preemption.
|
||||||
|
func podTerminatingByPreemption(p *v1.Pod, enablePodDisruptionConditions bool) bool {
|
||||||
|
if p.DeletionTimestamp == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if !enablePodDisruptionConditions {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, condition := range p.Status.Conditions {
|
||||||
|
if condition.Type == v1.DisruptionTarget {
|
||||||
|
return condition.Status == v1.ConditionTrue && condition.Reason == v1.PodReasonPreemptionByScheduler
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// filterPodsWithPDBViolation groups the given "pods" into two groups of "violatingPods"
|
// filterPodsWithPDBViolation groups the given "pods" into two groups of "violatingPods"
|
||||||
// and "nonViolatingPods" based on whether their PDBs will be violated if they are
|
// and "nonViolatingPods" based on whether their PDBs will be violated if they are
|
||||||
// preempted.
|
// preempted.
|
||||||
|
@ -1401,6 +1401,7 @@ func TestSelectBestCandidate(t *testing.T) {
|
|||||||
func TestPodEligibleToPreemptOthers(t *testing.T) {
|
func TestPodEligibleToPreemptOthers(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
fts feature.Features
|
||||||
pod *v1.Pod
|
pod *v1.Pod
|
||||||
pods []*v1.Pod
|
pods []*v1.Pod
|
||||||
nodes []string
|
nodes []string
|
||||||
@ -1439,6 +1440,40 @@ func TestPodEligibleToPreemptOthers(t *testing.T) {
|
|||||||
nominatedNodeStatus: nil,
|
nominatedNodeStatus: nil,
|
||||||
expected: false,
|
expected: false,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "victim Pods terminating, feature PodDisruptionConditions is enabled",
|
||||||
|
fts: feature.Features{EnablePodDisruptionConditions: true},
|
||||||
|
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
|
||||||
|
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().
|
||||||
|
Condition(v1.DisruptionTarget, v1.ConditionTrue, v1.PodReasonPreemptionByScheduler).Obj()},
|
||||||
|
nodes: []string{"node1"},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-victim Pods terminating, feature PodDisruptionConditions is enabled",
|
||||||
|
fts: feature.Features{EnablePodDisruptionConditions: true},
|
||||||
|
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
|
||||||
|
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()},
|
||||||
|
nodes: []string{"node1"},
|
||||||
|
expected: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "victim Pods terminating, feature PodDisruptionConditions is disabled",
|
||||||
|
fts: feature.Features{EnablePodDisruptionConditions: false},
|
||||||
|
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
|
||||||
|
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().
|
||||||
|
Condition(v1.DisruptionTarget, v1.ConditionTrue, v1.PodReasonPreemptionByScheduler).Obj()},
|
||||||
|
nodes: []string{"node1"},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-victim Pods terminating, feature PodDisruptionConditions is disabled",
|
||||||
|
fts: feature.Features{EnablePodDisruptionConditions: false},
|
||||||
|
pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(),
|
||||||
|
pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()},
|
||||||
|
nodes: []string{"node1"},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
@ -1459,7 +1494,7 @@ func TestPodEligibleToPreemptOthers(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
pl := DefaultPreemption{fh: f}
|
pl := DefaultPreemption{fh: f, fts: test.fts}
|
||||||
if got, _ := pl.PodEligibleToPreemptOthers(test.pod, test.nominatedNodeStatus); got != test.expected {
|
if got, _ := pl.PodEligibleToPreemptOthers(test.pod, test.nominatedNodeStatus); got != test.expected {
|
||||||
t.Errorf("expected %t, got %t for pod: %s", test.expected, got, test.pod.Name)
|
t.Errorf("expected %t, got %t for pod: %s", test.expected, got, test.pod.Name)
|
||||||
}
|
}
|
||||||
|
@ -27,4 +27,5 @@ type Features struct {
|
|||||||
EnableNodeInclusionPolicyInPodTopologySpread bool
|
EnableNodeInclusionPolicyInPodTopologySpread bool
|
||||||
EnableMatchLabelKeysInPodTopologySpread bool
|
EnableMatchLabelKeysInPodTopologySpread bool
|
||||||
EnablePodSchedulingReadiness bool
|
EnablePodSchedulingReadiness bool
|
||||||
|
EnablePodDisruptionConditions bool
|
||||||
}
|
}
|
||||||
|
@ -54,6 +54,7 @@ func NewInTreeRegistry() runtime.Registry {
|
|||||||
EnableNodeInclusionPolicyInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.NodeInclusionPolicyInPodTopologySpread),
|
EnableNodeInclusionPolicyInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.NodeInclusionPolicyInPodTopologySpread),
|
||||||
EnableMatchLabelKeysInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.MatchLabelKeysInPodTopologySpread),
|
EnableMatchLabelKeysInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.MatchLabelKeysInPodTopologySpread),
|
||||||
EnablePodSchedulingReadiness: feature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness),
|
EnablePodSchedulingReadiness: feature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness),
|
||||||
|
EnablePodDisruptionConditions: feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions),
|
||||||
}
|
}
|
||||||
|
|
||||||
registry := runtime.Registry{
|
registry := runtime.Registry{
|
||||||
|
@ -361,7 +361,7 @@ func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1.
|
|||||||
victimPodApply.Status.WithConditions(corev1apply.PodCondition().
|
victimPodApply.Status.WithConditions(corev1apply.PodCondition().
|
||||||
WithType(v1.DisruptionTarget).
|
WithType(v1.DisruptionTarget).
|
||||||
WithStatus(v1.ConditionTrue).
|
WithStatus(v1.ConditionTrue).
|
||||||
WithReason("PreemptionByKubeScheduler").
|
WithReason(v1.PodReasonPreemptionByScheduler).
|
||||||
WithMessage(fmt.Sprintf("Kube-scheduler: preempting to accommodate a higher priority pod: %s", klog.KObj(pod))).
|
WithMessage(fmt.Sprintf("Kube-scheduler: preempting to accommodate a higher priority pod: %s", klog.KObj(pod))).
|
||||||
WithLastTransitionTime(metav1.Now()),
|
WithLastTransitionTime(metav1.Now()),
|
||||||
)
|
)
|
||||||
|
@ -2721,6 +2721,10 @@ const (
|
|||||||
// TerminationByKubelet reason in DisruptionTarget pod condition indicates that the termination
|
// TerminationByKubelet reason in DisruptionTarget pod condition indicates that the termination
|
||||||
// is initiated by kubelet
|
// is initiated by kubelet
|
||||||
PodReasonTerminationByKubelet = "TerminationByKubelet"
|
PodReasonTerminationByKubelet = "TerminationByKubelet"
|
||||||
|
|
||||||
|
// PodReasonPreemptionByScheduler reason in DisruptionTarget pod condition indicates that the
|
||||||
|
// disruption was initiated by scheduler's preemption.
|
||||||
|
PodReasonPreemptionByScheduler = "PreemptionByScheduler"
|
||||||
)
|
)
|
||||||
|
|
||||||
// PodCondition contains details for the current condition of this pod.
|
// PodCondition contains details for the current condition of this pod.
|
||||||
|
Loading…
Reference in New Issue
Block a user