Merge pull request #119778 from sanposhiho/bugfix-unschedulableandunresolvable

fix: when PreFilter returns UnschedulableAndUnresolvable, copy the state in all nodes in statusmap
This commit is contained in:
Kubernetes Prow Robot 2023-08-15 23:12:57 -07:00 committed by GitHub
commit 719d1a84f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 85 additions and 27 deletions

View File

@ -313,6 +313,21 @@ const (
// Error returns detailed information of why the pod failed to fit on each node.
// A message format is "0/X nodes are available: <PreFilterMsg>. <FilterMsg>. <PostFilterMsg>."
func (f *FitError) Error() string {
reasonMsg := fmt.Sprintf(NoNodeAvailableMsg+":", f.NumAllNodes)
preFilterMsg := f.Diagnosis.PreFilterMsg
if preFilterMsg != "" {
// PreFilter plugin returns unschedulable.
// Add the messages from PreFilter plugins to reasonMsg.
reasonMsg += fmt.Sprintf(SeparatorFormat, preFilterMsg)
}
if preFilterMsg == "" {
// the scheduling cycle went through PreFilter extension point successfully.
//
// When the prefilter plugin returns unschedulable,
// the scheduling framework inserts the same unschedulable status to all nodes in NodeToStatusMap.
// So, we shouldn't add the message from NodeToStatusMap when the PreFilter failed.
// Otherwise, we will have duplicated reasons in the error message.
reasons := make(map[string]int)
for _, status := range f.Diagnosis.NodeToStatusMap {
for _, reason := range status.Reasons() {
@ -320,12 +335,6 @@ func (f *FitError) Error() string {
}
}
reasonMsg := fmt.Sprintf(NoNodeAvailableMsg+":", f.NumAllNodes)
// Add the messages from PreFilter plugins to reasonMsg.
preFilterMsg := f.Diagnosis.PreFilterMsg
if preFilterMsg != "" {
reasonMsg += fmt.Sprintf(SeparatorFormat, preFilterMsg)
}
sortReasonsHistogram := func() []string {
var reasonStrings []string
for k, v := range reasons {
@ -338,7 +347,11 @@ func (f *FitError) Error() string {
if len(sortedFilterMsg) != 0 {
reasonMsg += fmt.Sprintf(SeparatorFormat, strings.Join(sortedFilterMsg, ", "))
}
}
// Add the messages from PostFilter plugins to reasonMsg.
// We can add this message regardless of whether the scheduling cycle fails at PreFilter or Filter
// since we may run PostFilter (if enabled) in both cases.
postFilterMsg := f.Diagnosis.PostFilterMsg
if postFilterMsg != "" {
reasonMsg += fmt.Sprintf(SeparatorFormat, postFilterMsg)

View File

@ -1390,9 +1390,33 @@ func TestFitError_Error(t *testing.T) {
numAllNodes: 3,
diagnosis: Diagnosis{
PreFilterMsg: "Node(s) failed PreFilter plugin FalsePreFilter",
NodeToStatusMap: NodeToStatusMap{
// They're inserted by the framework.
// We don't include them in the reason message because they'd be just duplicates.
"node1": NewStatus(Unschedulable, "Node(s) failed PreFilter plugin FalsePreFilter"),
"node2": NewStatus(Unschedulable, "Node(s) failed PreFilter plugin FalsePreFilter"),
"node3": NewStatus(Unschedulable, "Node(s) failed PreFilter plugin FalsePreFilter"),
},
},
wantReasonMsg: "0/3 nodes are available: Node(s) failed PreFilter plugin FalsePreFilter.",
},
{
name: "nodes failed Prefilter plugin and the preemption also failed",
numAllNodes: 3,
diagnosis: Diagnosis{
PreFilterMsg: "Node(s) failed PreFilter plugin FalsePreFilter",
NodeToStatusMap: NodeToStatusMap{
// They're inserted by the framework.
// We don't include them in the reason message because they'd be just duplicates.
"node1": NewStatus(Unschedulable, "Node(s) failed PreFilter plugin FalsePreFilter"),
"node2": NewStatus(Unschedulable, "Node(s) failed PreFilter plugin FalsePreFilter"),
"node3": NewStatus(Unschedulable, "Node(s) failed PreFilter plugin FalsePreFilter"),
},
// PostFilterMsg will be included.
PostFilterMsg: "Error running PostFilter plugin FailedPostFilter",
},
wantReasonMsg: "0/3 nodes are available: Node(s) failed PreFilter plugin FalsePreFilter. Error running PostFilter plugin FailedPostFilter.",
},
{
name: "nodes failed one Filter plugin with an empty PostFilterMsg",
numAllNodes: 3,

View File

@ -438,6 +438,12 @@ func (sched *Scheduler) findNodesThatFitPod(ctx context.Context, fwk framework.F
if !s.IsUnschedulable() {
return nil, diagnosis, s.AsError()
}
// All nodes in NodeToStatusMap will have the same status so that they can be handled in the preemption.
// Some non trivial refactoring is needed to avoid this copy.
for _, n := range allNodes {
diagnosis.NodeToStatusMap[n.Node().Name] = s
}
// Record the messages from PreFilter in Diagnosis.PreFilterMsg.
msg := s.Message()
diagnosis.PreFilterMsg = msg

View File

@ -1948,7 +1948,10 @@ func TestSchedulerSchedulePod(t *testing.T) {
Pod: st.MakePod().Name("ignore").UID("ignore").PVC("unknownPVC").Obj(),
NumAllNodes: 2,
Diagnosis: framework.Diagnosis{
NodeToStatusMap: framework.NodeToStatusMap{},
NodeToStatusMap: framework.NodeToStatusMap{
"node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, `persistentvolumeclaim "unknownPVC" not found`).WithFailedPlugin("VolumeBinding"),
"node2": framework.NewStatus(framework.UnschedulableAndUnresolvable, `persistentvolumeclaim "unknownPVC" not found`).WithFailedPlugin("VolumeBinding"),
},
PreFilterMsg: `persistentvolumeclaim "unknownPVC" not found`,
UnschedulablePlugins: sets.New(volumebinding.Name),
},
@ -1970,7 +1973,10 @@ func TestSchedulerSchedulePod(t *testing.T) {
Pod: st.MakePod().Name("ignore").UID("ignore").Namespace(v1.NamespaceDefault).PVC("existingPVC").Obj(),
NumAllNodes: 2,
Diagnosis: framework.Diagnosis{
NodeToStatusMap: framework.NodeToStatusMap{},
NodeToStatusMap: framework.NodeToStatusMap{
"node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, `persistentvolumeclaim "existingPVC" is being deleted`).WithFailedPlugin("VolumeBinding"),
"node2": framework.NewStatus(framework.UnschedulableAndUnresolvable, `persistentvolumeclaim "existingPVC" is being deleted`).WithFailedPlugin("VolumeBinding"),
},
PreFilterMsg: `persistentvolumeclaim "existingPVC" is being deleted`,
UnschedulablePlugins: sets.New(volumebinding.Name),
},
@ -2128,7 +2134,10 @@ func TestSchedulerSchedulePod(t *testing.T) {
Pod: st.MakePod().Name("test-prefilter").UID("test-prefilter").Obj(),
NumAllNodes: 2,
Diagnosis: framework.Diagnosis{
NodeToStatusMap: framework.NodeToStatusMap{},
NodeToStatusMap: framework.NodeToStatusMap{
"1": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected unschedulable status").WithFailedPlugin("FakePreFilter"),
"2": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected unschedulable status").WithFailedPlugin("FakePreFilter"),
},
PreFilterMsg: "injected unschedulable status",
UnschedulablePlugins: sets.New("FakePreFilter"),
},
@ -2196,7 +2205,11 @@ func TestSchedulerSchedulePod(t *testing.T) {
Pod: st.MakePod().Name("test-prefilter").UID("test-prefilter").Obj(),
NumAllNodes: 3,
Diagnosis: framework.Diagnosis{
NodeToStatusMap: framework.NodeToStatusMap{},
NodeToStatusMap: framework.NodeToStatusMap{
"node1": framework.NewStatus(framework.Unschedulable, "node(s) didn't satisfy plugin(s) [FakePreFilter2 FakePreFilter3] simultaneously"),
"node2": framework.NewStatus(framework.Unschedulable, "node(s) didn't satisfy plugin(s) [FakePreFilter2 FakePreFilter3] simultaneously"),
"node3": framework.NewStatus(framework.Unschedulable, "node(s) didn't satisfy plugin(s) [FakePreFilter2 FakePreFilter3] simultaneously"),
},
UnschedulablePlugins: sets.Set[string]{},
PreFilterMsg: "node(s) didn't satisfy plugin(s) [FakePreFilter2 FakePreFilter3] simultaneously",
},
@ -2222,7 +2235,9 @@ func TestSchedulerSchedulePod(t *testing.T) {
Pod: st.MakePod().Name("test-prefilter").UID("test-prefilter").Obj(),
NumAllNodes: 1,
Diagnosis: framework.Diagnosis{
NodeToStatusMap: framework.NodeToStatusMap{},
NodeToStatusMap: framework.NodeToStatusMap{
"node1": framework.NewStatus(framework.Unschedulable, "node(s) didn't satisfy plugin FakePreFilter2"),
},
UnschedulablePlugins: sets.Set[string]{},
PreFilterMsg: "node(s) didn't satisfy plugin FakePreFilter2",
},