add preemption info in scheduling failed event

This commit is contained in:
Alex Wang 2022-02-01 00:37:16 +08:00
parent 2a2b294c99
commit 52275fc831
6 changed files with 38 additions and 25 deletions

View File

@ -95,7 +95,12 @@ func (pl *DefaultPreemption) PostFilter(ctx context.Context, state *framework.Cy
State: state,
Interface: pl,
}
return pe.Preempt(ctx, pod, m)
result, status := pe.Preempt(ctx, pod, m)
if status.Message() != "" {
return result, framework.NewStatus(status.Code(), "preemption: "+status.Message())
}
return result, status
}
// calculateNumCandidates returns the number of candidates the FindCandidates
@ -222,16 +227,17 @@ func (pl *DefaultPreemption) SelectVictimsOnNode(
return victims, numViolatingVictim, framework.NewStatus(framework.Success)
}
// PodEligibleToPreemptOthers determines whether this pod should be considered
// for preempting other pods or not. If this pod has already preempted other
// PodEligibleToPreemptOthers returns one bool and one string. The bool
// indicates whether this pod should be considered for preempting other pods or
// not. The string includes the reason if this pod isn't eligible.
// If this pod has a preemptionPolicy of Never or has already preempted other
// pods and those are in their graceful termination period, it shouldn't be
// considered for preemption.
// We look at the node that is nominated for this pod and as long as there are
// terminating pods on the node, we don't consider this for preempting more pods.
func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) bool {
func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) (bool, string) {
if pod.Spec.PreemptionPolicy != nil && *pod.Spec.PreemptionPolicy == v1.PreemptNever {
klog.V(5).InfoS("Pod is not eligible for preemption because it has a preemptionPolicy of Never", "pod", klog.KObj(pod))
return false
return false, fmt.Sprint("not eligible due to preemptionPolicy=Never.")
}
nodeInfos := pl.fh.SnapshotSharedLister().NodeInfos()
nomNodeName := pod.Status.NominatedNodeName
@ -239,7 +245,7 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo
// If the pod's nominated node is considered as UnschedulableAndUnresolvable by the filters,
// then the pod should be considered for preempting again.
if nominatedNodeStatus.Code() == framework.UnschedulableAndUnresolvable {
return true
return true, ""
}
if nodeInfo, _ := nodeInfos.Get(nomNodeName); nodeInfo != nil {
@ -247,12 +253,12 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo
for _, p := range nodeInfo.Pods {
if p.Pod.DeletionTimestamp != nil && corev1helpers.PodPriority(p.Pod) < podPriority {
// There is a terminating pod on the nominated node.
return false
return false, fmt.Sprint("not eligible due to a terminating pod on the nominated node.")
}
}
}
}
return true
return true, ""
}
// filterPodsWithPDBViolation groups the given "pods" into two groups of "violatingPods"

View File

@ -177,7 +177,7 @@ func TestPostFilter(t *testing.T) {
"node1": framework.NewStatus(framework.Unschedulable),
},
wantResult: framework.NewPostFilterResultWithNominatedNode(""),
wantStatus: framework.NewStatus(framework.Unschedulable, "0/1 nodes are available: 1 No victims found on node node1 for preemptor pod p."),
wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/1 nodes are available: 1 No victims found on node node1 for preemptor pod p."),
},
{
name: "preemption should respect filteredNodesStatuses",
@ -192,7 +192,7 @@ func TestPostFilter(t *testing.T) {
"node1": framework.NewStatus(framework.UnschedulableAndUnresolvable),
},
wantResult: framework.NewPostFilterResultWithNominatedNode(""),
wantStatus: framework.NewStatus(framework.Unschedulable, "0/1 nodes are available: 1 Preemption is not helpful for scheduling."),
wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling."),
},
{
name: "pod can be made schedulable on one node",
@ -247,7 +247,7 @@ func TestPostFilter(t *testing.T) {
"node2": framework.NewStatus(framework.Unschedulable),
},
wantResult: framework.NewPostFilterResultWithNominatedNode(""),
wantStatus: framework.NewStatus(framework.Unschedulable, "0/2 nodes are available: 2 Insufficient cpu."),
wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/2 nodes are available: 2 Insufficient cpu."),
},
{
name: "no candidate nodes found with mixed reasons, no lower priority pod and no enough CPU resource",
@ -265,7 +265,7 @@ func TestPostFilter(t *testing.T) {
"node2": framework.NewStatus(framework.Unschedulable),
},
wantResult: framework.NewPostFilterResultWithNominatedNode(""),
wantStatus: framework.NewStatus(framework.Unschedulable, "0/2 nodes are available: 1 Insufficient cpu, 1 No victims found on node node1 for preemptor pod p."),
wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/2 nodes are available: 1 Insufficient cpu, 1 No victims found on node node1 for preemptor pod p."),
},
{
name: "no candidate nodes found with mixed reason, 2 UnschedulableAndUnresolvable nodes and 2 nodes don't have enough CPU resource",
@ -285,7 +285,7 @@ func TestPostFilter(t *testing.T) {
"node4": framework.NewStatus(framework.UnschedulableAndUnresolvable),
},
wantResult: framework.NewPostFilterResultWithNominatedNode(""),
wantStatus: framework.NewStatus(framework.Unschedulable, "0/4 nodes are available: 2 Insufficient cpu, 2 Preemption is not helpful for scheduling."),
wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/4 nodes are available: 2 Insufficient cpu, 2 Preemption is not helpful for scheduling."),
},
{
name: "only one node but failed with TestPlugin",
@ -297,7 +297,7 @@ func TestPostFilter(t *testing.T) {
nodes: []*v1.Node{st.MakeNode().Name("node1").Capacity(largeRes).Label("error", "true").Obj()},
filteredNodesStatuses: framework.NodeToStatusMap{"node1": framework.NewStatus(framework.Unschedulable)},
wantResult: nil,
wantStatus: framework.AsStatus(errors.New("running RemovePod on PreFilter plugin \"test-plugin\": failed to remove pod: p")),
wantStatus: framework.AsStatus(errors.New("preemption: running RemovePod on PreFilter plugin \"test-plugin\": failed to remove pod: p")),
},
{
name: "one failed with TestPlugin and the other pass",
@ -1447,7 +1447,7 @@ func TestPodEligibleToPreemptOthers(t *testing.T) {
t.Fatal(err)
}
pl := DefaultPreemption{fh: f}
if got := pl.PodEligibleToPreemptOthers(test.pod, test.nominatedNodeStatus); got != test.expected {
if got, _ := pl.PodEligibleToPreemptOthers(test.pod, test.nominatedNodeStatus); got != test.expected {
t.Errorf("expected %t, got %t for pod: %s", test.expected, got, test.pod.Name)
}
})

View File

@ -104,9 +104,9 @@ type Interface interface {
GetOffsetAndNumCandidates(nodes int32) (int32, int32)
// CandidatesToVictimsMap builds a map from the target node to a list of to-be-preempted Pods and the number of PDB violation.
CandidatesToVictimsMap(candidates []Candidate) map[string]*extenderv1.Victims
// PodEligibleToPreemptOthers determines whether this pod should be considered
// for preempting other pods or not.
PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) bool
// PodEligibleToPreemptOthers returns one bool and one string. The bool indicates whether this pod should be considered for
// preempting other pods or not. The string includes the reason if this pod isn't eligible.
PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) (bool, string)
// SelectVictimsOnNode finds minimum set of pods on the given node that should be preempted in order to make enough room
// for "pod" to be scheduled.
// Note that both `state` and `nodeInfo` are deep copied.
@ -148,9 +148,9 @@ func (ev *Evaluator) Preempt(ctx context.Context, pod *v1.Pod, m framework.NodeT
}
// 1) Ensure the preemptor is eligible to preempt other pods.
if !ev.PodEligibleToPreemptOthers(pod, m[pod.Status.NominatedNodeName]) {
klog.V(5).InfoS("Pod is not eligible for more preemption", "pod", klog.KObj(pod))
return nil, framework.NewStatus(framework.Unschedulable)
if ok, msg := ev.PodEligibleToPreemptOthers(pod, m[pod.Status.NominatedNodeName]); !ok {
klog.V(5).InfoS("Pod is not eligible for preemption", "pod", klog.KObj(pod), "reason", msg)
return nil, framework.NewStatus(framework.Unschedulable, msg)
}
// 2) Find all preemption candidates.
@ -182,7 +182,7 @@ func (ev *Evaluator) Preempt(ctx context.Context, pod *v1.Pod, m framework.NodeT
// 4) Find the best candidate.
bestCandidate := ev.SelectCandidate(candidates)
if bestCandidate == nil || len(bestCandidate.Name()) == 0 {
return nil, framework.NewStatus(framework.Unschedulable)
return nil, framework.NewStatus(framework.Unschedulable, "no candidate node for preemption")
}
// 5) Perform preparation work before nominating the selected candidate.

View File

@ -76,8 +76,8 @@ func (pl *FakePostFilterPlugin) CandidatesToVictimsMap(candidates []Candidate) m
return nil
}
func (pl *FakePostFilterPlugin) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) bool {
return true
func (pl *FakePostFilterPlugin) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) (bool, string) {
return true, ""
}
func TestNodesWherePreemptionMightHelp(t *testing.T) {

View File

@ -215,6 +215,8 @@ type WeightedAffinityTerm struct {
type Diagnosis struct {
NodeToStatusMap NodeToStatusMap
UnschedulablePlugins sets.String
// PostFilterMsg records the messages returned from PostFilterPlugins.
PostFilterMsg string
}
// FitError describes a fit error of a pod.
@ -247,6 +249,10 @@ func (f *FitError) Error() string {
return reasonStrings
}
reasonMsg := fmt.Sprintf(NoNodeAvailableMsg+": %v.", f.NumAllNodes, strings.Join(sortReasonsHistogram(), ", "))
postFilterMsg := f.Diagnosis.PostFilterMsg
if postFilterMsg != "" {
reasonMsg += " " + postFilterMsg
}
return reasonMsg
}

View File

@ -468,6 +468,7 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
if status.Code() == framework.Error {
klog.ErrorS(nil, "Status after running PostFilter plugins for pod", "pod", klog.KObj(pod), "status", status)
} else {
fitError.Diagnosis.PostFilterMsg = status.Message()
klog.V(5).InfoS("Status after running PostFilter plugins for pod", "pod", klog.KObj(pod), "status", status)
}
if result != nil {