Merge pull request #98041 from Huang-Wei/sched-enqueue-1

Surface info of failed plugins during PerFilter and Filter
This commit is contained in:
Kubernetes Prow Robot 2021-01-28 17:00:07 -08:00 committed by GitHub
commit f402e472a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 290 additions and 168 deletions

View File

@ -61,6 +61,7 @@ go_test(
"//staging/src/k8s.io/client-go/informers:go_default_library", "//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library", "//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library", "//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
"//vendor/github.com/google/go-cmp/cmp:go_default_library",
], ],
) )

View File

@ -27,6 +27,7 @@ import (
"k8s.io/klog/v2" "k8s.io/klog/v2"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apiserver/pkg/util/feature" "k8s.io/apiserver/pkg/util/feature"
extenderv1 "k8s.io/kube-scheduler/extender/v1" extenderv1 "k8s.io/kube-scheduler/extender/v1"
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
@ -106,7 +107,7 @@ func (g *genericScheduler) Schedule(ctx context.Context, fwk framework.Framework
return result, ErrNoNodesAvailable return result, ErrNoNodesAvailable
} }
feasibleNodes, filteredNodesStatuses, err := g.findNodesThatFitPod(ctx, fwk, state, pod) feasibleNodes, diagnosis, err := g.findNodesThatFitPod(ctx, fwk, state, pod)
if err != nil { if err != nil {
return result, err return result, err
} }
@ -116,7 +117,7 @@ func (g *genericScheduler) Schedule(ctx context.Context, fwk framework.Framework
return result, &framework.FitError{ return result, &framework.FitError{
Pod: pod, Pod: pod,
NumAllNodes: g.nodeInfoSnapshot.NumNodes(), NumAllNodes: g.nodeInfoSnapshot.NumNodes(),
FilteredNodesStatuses: filteredNodesStatuses, Diagnosis: diagnosis,
} }
} }
@ -124,7 +125,7 @@ func (g *genericScheduler) Schedule(ctx context.Context, fwk framework.Framework
if len(feasibleNodes) == 1 { if len(feasibleNodes) == 1 {
return ScheduleResult{ return ScheduleResult{
SuggestedHost: feasibleNodes[0].Name, SuggestedHost: feasibleNodes[0].Name,
EvaluatedNodes: 1 + len(filteredNodesStatuses), EvaluatedNodes: 1 + len(diagnosis.NodeToStatusMap),
FeasibleNodes: 1, FeasibleNodes: 1,
}, nil }, nil
} }
@ -139,7 +140,7 @@ func (g *genericScheduler) Schedule(ctx context.Context, fwk framework.Framework
return ScheduleResult{ return ScheduleResult{
SuggestedHost: host, SuggestedHost: host,
EvaluatedNodes: len(feasibleNodes) + len(filteredNodesStatuses), EvaluatedNodes: len(feasibleNodes) + len(diagnosis.NodeToStatusMap),
FeasibleNodes: len(feasibleNodes), FeasibleNodes: len(feasibleNodes),
}, err }, err
} }
@ -197,19 +198,19 @@ func (g *genericScheduler) numFeasibleNodesToFind(numAllNodes int32) (numNodes i
return numNodes return numNodes
} }
func (g *genericScheduler) evaluateNominatedNode(ctx context.Context, pod *v1.Pod, fwk framework.Framework, state *framework.CycleState, filteredNodesStatuses framework.NodeToStatusMap) ([]*v1.Node, error) { func (g *genericScheduler) evaluateNominatedNode(ctx context.Context, pod *v1.Pod, fwk framework.Framework, state *framework.CycleState, diagnosis framework.Diagnosis) ([]*v1.Node, error) {
nnn := pod.Status.NominatedNodeName nnn := pod.Status.NominatedNodeName
nodeInfo, err := g.nodeInfoSnapshot.Get(nnn) nodeInfo, err := g.nodeInfoSnapshot.Get(nnn)
if err != nil { if err != nil {
return nil, err return nil, err
} }
node := []*framework.NodeInfo{nodeInfo} node := []*framework.NodeInfo{nodeInfo}
feasibleNodes, err := g.findNodesThatPassFilters(ctx, fwk, state, pod, filteredNodesStatuses, node) feasibleNodes, err := g.findNodesThatPassFilters(ctx, fwk, state, pod, diagnosis, node)
if err != nil { if err != nil {
return nil, err return nil, err
} }
feasibleNodes, err = g.findNodesThatPassExtenders(pod, feasibleNodes, filteredNodesStatuses) feasibleNodes, err = g.findNodesThatPassExtenders(pod, feasibleNodes, diagnosis.NodeToStatusMap)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -219,48 +220,53 @@ func (g *genericScheduler) evaluateNominatedNode(ctx context.Context, pod *v1.Po
// Filters the nodes to find the ones that fit the pod based on the framework // Filters the nodes to find the ones that fit the pod based on the framework
// filter plugins and filter extenders. // filter plugins and filter extenders.
func (g *genericScheduler) findNodesThatFitPod(ctx context.Context, fwk framework.Framework, state *framework.CycleState, pod *v1.Pod) ([]*v1.Node, framework.NodeToStatusMap, error) { func (g *genericScheduler) findNodesThatFitPod(ctx context.Context, fwk framework.Framework, state *framework.CycleState, pod *v1.Pod) ([]*v1.Node, framework.Diagnosis, error) {
filteredNodesStatuses := make(framework.NodeToStatusMap) diagnosis := framework.Diagnosis{
NodeToStatusMap: make(framework.NodeToStatusMap),
UnschedulablePlugins: sets.NewString(),
}
// Run "prefilter" plugins. // Run "prefilter" plugins.
s := fwk.RunPreFilterPlugins(ctx, state, pod) s := fwk.RunPreFilterPlugins(ctx, state, pod)
allNodes, err := g.nodeInfoSnapshot.NodeInfos().List() allNodes, err := g.nodeInfoSnapshot.NodeInfos().List()
if err != nil { if err != nil {
return nil, nil, err return nil, diagnosis, err
} }
if !s.IsSuccess() { if !s.IsSuccess() {
if !s.IsUnschedulable() { if !s.IsUnschedulable() {
return nil, nil, s.AsError() return nil, diagnosis, s.AsError()
} }
// All nodes will have the same status. Some non trivial refactoring is // All nodes will have the same status. Some non trivial refactoring is
// needed to avoid this copy. // needed to avoid this copy.
for _, n := range allNodes { for _, n := range allNodes {
filteredNodesStatuses[n.Node().Name] = s diagnosis.NodeToStatusMap[n.Node().Name] = s
} }
return nil, filteredNodesStatuses, nil diagnosis.UnschedulablePlugins.Insert(s.FailedPlugin())
return nil, diagnosis, nil
} }
// "NominatedNodeName" can potentially be set in a previous scheduling cycle as a result of preemption. // "NominatedNodeName" can potentially be set in a previous scheduling cycle as a result of preemption.
// This node is likely the only candidate that will fit the pod, and hence we try it first before iterating over all nodes. // This node is likely the only candidate that will fit the pod, and hence we try it first before iterating over all nodes.
if len(pod.Status.NominatedNodeName) > 0 && feature.DefaultFeatureGate.Enabled(features.PreferNominatedNode) { if len(pod.Status.NominatedNodeName) > 0 && feature.DefaultFeatureGate.Enabled(features.PreferNominatedNode) {
feasibleNodes, err := g.evaluateNominatedNode(ctx, pod, fwk, state, filteredNodesStatuses) feasibleNodes, err := g.evaluateNominatedNode(ctx, pod, fwk, state, diagnosis)
if err != nil { if err != nil {
klog.ErrorS(err, "Evaluation failed on nominated node", "pod", klog.KObj(pod), "node", pod.Status.NominatedNodeName) klog.ErrorS(err, "Evaluation failed on nominated node", "pod", klog.KObj(pod), "node", pod.Status.NominatedNodeName)
} }
// Nominated node passes all the filters, scheduler is good to assign this node to the pod. // Nominated node passes all the filters, scheduler is good to assign this node to the pod.
if len(feasibleNodes) != 0 { if len(feasibleNodes) != 0 {
return feasibleNodes, filteredNodesStatuses, nil return feasibleNodes, diagnosis, nil
} }
} }
feasibleNodes, err := g.findNodesThatPassFilters(ctx, fwk, state, pod, filteredNodesStatuses, allNodes) feasibleNodes, err := g.findNodesThatPassFilters(ctx, fwk, state, pod, diagnosis, allNodes)
if err != nil { if err != nil {
return nil, nil, err return nil, diagnosis, err
} }
feasibleNodes, err = g.findNodesThatPassExtenders(pod, feasibleNodes, filteredNodesStatuses)
feasibleNodes, err = g.findNodesThatPassExtenders(pod, feasibleNodes, diagnosis.NodeToStatusMap)
if err != nil { if err != nil {
return nil, nil, err return nil, diagnosis, err
} }
return feasibleNodes, filteredNodesStatuses, nil return feasibleNodes, diagnosis, nil
} }
// findNodesThatPassFilters finds the nodes that fit the filter plugins. // findNodesThatPassFilters finds the nodes that fit the filter plugins.
@ -269,7 +275,7 @@ func (g *genericScheduler) findNodesThatPassFilters(
fwk framework.Framework, fwk framework.Framework,
state *framework.CycleState, state *framework.CycleState,
pod *v1.Pod, pod *v1.Pod,
statuses framework.NodeToStatusMap, diagnosis framework.Diagnosis,
nodes []*framework.NodeInfo) ([]*v1.Node, error) { nodes []*framework.NodeInfo) ([]*v1.Node, error) {
numNodesToFind := g.numFeasibleNodesToFind(int32(len(nodes))) numNodesToFind := g.numFeasibleNodesToFind(int32(len(nodes)))
@ -309,7 +315,8 @@ func (g *genericScheduler) findNodesThatPassFilters(
} }
} else { } else {
statusesLock.Lock() statusesLock.Lock()
statuses[nodeInfo.Node().Name] = status diagnosis.NodeToStatusMap[nodeInfo.Node().Name] = status
diagnosis.UnschedulablePlugins.Insert(status.FailedPlugin())
statusesLock.Unlock() statusesLock.Unlock()
} }
} }
@ -326,7 +333,7 @@ func (g *genericScheduler) findNodesThatPassFilters(
// Stops searching for more nodes once the configured number of feasible nodes // Stops searching for more nodes once the configured number of feasible nodes
// are found. // are found.
parallelize.Until(ctx, len(nodes), checkNode) parallelize.Until(ctx, len(nodes), checkNode)
processedNodes := int(feasibleNodesLen) + len(statuses) processedNodes := int(feasibleNodesLen) + len(diagnosis.NodeToStatusMap)
g.nextStartNodeIndex = (g.nextStartNodeIndex + processedNodes) % len(nodes) g.nextStartNodeIndex = (g.nextStartNodeIndex + processedNodes) % len(nodes)
feasibleNodes = feasibleNodes[:feasibleNodesLen] feasibleNodes = feasibleNodes[:feasibleNodesLen]

View File

@ -21,11 +21,12 @@ import (
"errors" "errors"
"fmt" "fmt"
"math" "math"
"reflect"
"strconv" "strconv"
"strings"
"testing" "testing"
"time" "time"
"github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -295,9 +296,12 @@ func TestGenericScheduler(t *testing.T) {
wErr: &framework.FitError{ wErr: &framework.FitError{
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}}, Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
NumAllNodes: 2, NumAllNodes: 2,
FilteredNodesStatuses: framework.NodeToStatusMap{ Diagnosis: framework.Diagnosis{
"machine1": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake), NodeToStatusMap: framework.NodeToStatusMap{
"machine2": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake), "machine1": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake).WithFailedPlugin("FalseFilter"),
"machine2": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake).WithFailedPlugin("FalseFilter"),
},
UnschedulablePlugins: sets.NewString("FalseFilter"),
}, },
}, },
}, },
@ -380,10 +384,13 @@ func TestGenericScheduler(t *testing.T) {
wErr: &framework.FitError{ wErr: &framework.FitError{
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}}, Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
NumAllNodes: 3, NumAllNodes: 3,
FilteredNodesStatuses: framework.NodeToStatusMap{ Diagnosis: framework.Diagnosis{
"3": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake), NodeToStatusMap: framework.NodeToStatusMap{
"2": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake), "3": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake).WithFailedPlugin("FalseFilter"),
"1": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake), "2": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake).WithFailedPlugin("FalseFilter"),
"1": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake).WithFailedPlugin("FalseFilter"),
},
UnschedulablePlugins: sets.NewString("FalseFilter"),
}, },
}, },
}, },
@ -412,9 +419,12 @@ func TestGenericScheduler(t *testing.T) {
wErr: &framework.FitError{ wErr: &framework.FitError{
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}}, Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
NumAllNodes: 2, NumAllNodes: 2,
FilteredNodesStatuses: framework.NodeToStatusMap{ Diagnosis: framework.Diagnosis{
"1": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake), NodeToStatusMap: framework.NodeToStatusMap{
"2": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake), "1": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake).WithFailedPlugin("MatchFilter"),
"2": framework.NewStatus(framework.Unschedulable, st.ErrReasonFake).WithFailedPlugin("NoPodsFilter"),
},
UnschedulablePlugins: sets.NewString("MatchFilter", "NoPodsFilter"),
}, },
}, },
}, },
@ -475,7 +485,30 @@ func TestGenericScheduler(t *testing.T) {
}, },
}, },
name: "unknown PVC", name: "unknown PVC",
wErr: fmt.Errorf("persistentvolumeclaim \"unknownPVC\" not found"), wErr: &framework.FitError{
Pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "ignore", UID: types.UID("ignore")},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "unknownPVC",
},
},
},
},
},
},
NumAllNodes: 2,
Diagnosis: framework.Diagnosis{
NodeToStatusMap: framework.NodeToStatusMap{
"machine1": framework.NewStatus(framework.UnschedulableAndUnresolvable, `persistentvolumeclaim "unknownPVC" not found`).WithFailedPlugin(volumebinding.Name),
"machine2": framework.NewStatus(framework.UnschedulableAndUnresolvable, `persistentvolumeclaim "unknownPVC" not found`).WithFailedPlugin(volumebinding.Name),
},
UnschedulablePlugins: sets.NewString(volumebinding.Name),
},
},
}, },
{ {
// Pod with deleting PVC // Pod with deleting PVC
@ -502,7 +535,30 @@ func TestGenericScheduler(t *testing.T) {
}, },
}, },
name: "deleted PVC", name: "deleted PVC",
wErr: fmt.Errorf("persistentvolumeclaim \"existingPVC\" is being deleted"), wErr: &framework.FitError{
Pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "ignore", UID: types.UID("ignore"), Namespace: v1.NamespaceDefault},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "existingPVC",
},
},
},
},
},
},
NumAllNodes: 2,
Diagnosis: framework.Diagnosis{
NodeToStatusMap: framework.NodeToStatusMap{
"machine1": framework.NewStatus(framework.UnschedulableAndUnresolvable, `persistentvolumeclaim "existingPVC" is being deleted`).WithFailedPlugin(volumebinding.Name),
"machine2": framework.NewStatus(framework.UnschedulableAndUnresolvable, `persistentvolumeclaim "existingPVC" is being deleted`).WithFailedPlugin(volumebinding.Name),
},
UnschedulablePlugins: sets.NewString(volumebinding.Name),
},
},
}, },
{ {
registerPlugins: []st.RegisterPluginFunc{ registerPlugins: []st.RegisterPluginFunc{
@ -646,8 +702,11 @@ func TestGenericScheduler(t *testing.T) {
wErr: &framework.FitError{ wErr: &framework.FitError{
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "test-filter", UID: types.UID("test-filter")}}, Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "test-filter", UID: types.UID("test-filter")}},
NumAllNodes: 1, NumAllNodes: 1,
FilteredNodesStatuses: framework.NodeToStatusMap{ Diagnosis: framework.Diagnosis{
"3": framework.NewStatus(framework.Unschedulable, "injecting failure for pod test-filter"), NodeToStatusMap: framework.NodeToStatusMap{
"3": framework.NewStatus(framework.Unschedulable, "injecting failure for pod test-filter").WithFailedPlugin("FakeFilter"),
},
UnschedulablePlugins: sets.NewString("FakeFilter"),
}, },
}, },
}, },
@ -668,8 +727,11 @@ func TestGenericScheduler(t *testing.T) {
wErr: &framework.FitError{ wErr: &framework.FitError{
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "test-filter", UID: types.UID("test-filter")}}, Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "test-filter", UID: types.UID("test-filter")}},
NumAllNodes: 1, NumAllNodes: 1,
FilteredNodesStatuses: framework.NodeToStatusMap{ Diagnosis: framework.Diagnosis{
"3": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injecting failure for pod test-filter"), NodeToStatusMap: framework.NodeToStatusMap{
"3": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injecting failure for pod test-filter").WithFailedPlugin("FakeFilter"),
},
UnschedulablePlugins: sets.NewString("FakeFilter"),
}, },
}, },
}, },
@ -705,9 +767,12 @@ func TestGenericScheduler(t *testing.T) {
wErr: &framework.FitError{ wErr: &framework.FitError{
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "test-prefilter", UID: types.UID("test-prefilter")}}, Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "test-prefilter", UID: types.UID("test-prefilter")}},
NumAllNodes: 2, NumAllNodes: 2,
FilteredNodesStatuses: framework.NodeToStatusMap{ Diagnosis: framework.Diagnosis{
"1": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected unschedulable status"), NodeToStatusMap: framework.NodeToStatusMap{
"2": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected unschedulable status"), "1": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected unschedulable status").WithFailedPlugin("FakePreFilter"),
"2": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected unschedulable status").WithFailedPlugin("FakePreFilter"),
},
UnschedulablePlugins: sets.NewString("FakePreFilter"),
}, },
}, },
}, },
@ -772,8 +837,15 @@ func TestGenericScheduler(t *testing.T) {
informerFactory.WaitForCacheSync(ctx.Done()) informerFactory.WaitForCacheSync(ctx.Done())
result, err := scheduler.Schedule(ctx, fwk, framework.NewCycleState(), test.pod) result, err := scheduler.Schedule(ctx, fwk, framework.NewCycleState(), test.pod)
if err != test.wErr && !strings.Contains(err.Error(), test.wErr.Error()) { // TODO(#94696): replace reflect.DeepEqual with cmp.Diff().
t.Errorf("Unexpected error: %v, expected: %v", err.Error(), test.wErr) if err != test.wErr {
gotFitErr, gotOK := err.(*framework.FitError)
wantFitErr, wantOK := test.wErr.(*framework.FitError)
if gotOK != wantOK {
t.Errorf("Expected err to be FitError: %v, but got %v", wantOK, gotOK)
} else if gotOK && !reflect.DeepEqual(gotFitErr, wantFitErr) {
t.Errorf("Unexpected fitError. Want %v, but got %v", wantFitErr, gotFitErr)
}
} }
if test.expectedHosts != nil && !test.expectedHosts.Has(result.SuggestedHost) { if test.expectedHosts != nil && !test.expectedHosts.Has(result.SuggestedHost) {
t.Errorf("Expected: %s, got: %s", test.expectedHosts, result.SuggestedHost) t.Errorf("Expected: %s, got: %s", test.expectedHosts, result.SuggestedHost)
@ -817,27 +889,19 @@ func TestFindFitAllError(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
_, nodeToStatusMap, err := scheduler.findNodesThatFitPod(context.Background(), fwk, framework.NewCycleState(), &v1.Pod{}) _, diagnosis, err := scheduler.findNodesThatFitPod(context.Background(), fwk, framework.NewCycleState(), &v1.Pod{})
if err != nil { if err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if len(nodeToStatusMap) != len(nodes) { // TODO(#94696): use cmp.Diff() to compare `diagnosis`.
t.Errorf("unexpected failed status map: %v", nodeToStatusMap) if len(diagnosis.NodeToStatusMap) != len(nodes) {
t.Errorf("unexpected failed status map: %v", diagnosis.NodeToStatusMap)
} }
for _, node := range nodes { if diff := cmp.Diff(sets.NewString("MatchFilter"), diagnosis.UnschedulablePlugins); diff != "" {
t.Run(node.Name, func(t *testing.T) { t.Errorf("Unexpected unschedulablePlugins: (-want, +got): %s", diagnosis.UnschedulablePlugins)
status, found := nodeToStatusMap[node.Name]
if !found {
t.Errorf("failed to find node %v in %v", node.Name, nodeToStatusMap)
}
reasons := status.Reasons()
if len(reasons) != 1 || reasons[0] != st.ErrReasonFake {
t.Errorf("unexpected failure reasons: %v", reasons)
}
})
} }
} }
@ -858,14 +922,18 @@ func TestFindFitSomeError(t *testing.T) {
} }
pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "1", UID: types.UID("1")}} pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "1", UID: types.UID("1")}}
_, nodeToStatusMap, err := scheduler.findNodesThatFitPod(context.Background(), fwk, framework.NewCycleState(), pod) _, diagnosis, err := scheduler.findNodesThatFitPod(context.Background(), fwk, framework.NewCycleState(), pod)
if err != nil { if err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if len(nodeToStatusMap) != len(nodes)-1 { if len(diagnosis.NodeToStatusMap) != len(nodes)-1 {
t.Errorf("unexpected failed status map: %v", nodeToStatusMap) t.Errorf("unexpected failed status map: %v", diagnosis.NodeToStatusMap)
}
if diff := cmp.Diff(sets.NewString("MatchFilter"), diagnosis.UnschedulablePlugins); diff != "" {
t.Errorf("Unexpected unschedulablePlugins: (-want, +got): %s", diagnosis.UnschedulablePlugins)
} }
for _, node := range nodes { for _, node := range nodes {
@ -873,9 +941,9 @@ func TestFindFitSomeError(t *testing.T) {
continue continue
} }
t.Run(node.Name, func(t *testing.T) { t.Run(node.Name, func(t *testing.T) {
status, found := nodeToStatusMap[node.Name] status, found := diagnosis.NodeToStatusMap[node.Name]
if !found { if !found {
t.Errorf("failed to find node %v in %v", node.Name, nodeToStatusMap) t.Errorf("failed to find node %v in %v", node.Name, diagnosis.NodeToStatusMap)
} }
reasons := status.Reasons() reasons := status.Reasons()
if len(reasons) != 1 || reasons[0] != st.ErrReasonFake { if len(reasons) != 1 || reasons[0] != st.ErrReasonFake {

View File

@ -102,13 +102,16 @@ const (
) )
// Status indicates the result of running a plugin. It consists of a code, a // Status indicates the result of running a plugin. It consists of a code, a
// message and (optionally) an error. When the status code is not `Success`, // message, (optionally) an error and an plugin name it fails by. When the status
// the reasons should explain why. // code is not `Success`, the reasons should explain why.
// NOTE: A nil Status is also considered as Success. // NOTE: A nil Status is also considered as Success.
type Status struct { type Status struct {
code Code code Code
reasons []string reasons []string
err error err error
// failedPlugin is an optional field that records the plugin name a Pod failed by.
// It's set by the framework when code is Error, Unschedulable or UnschedulableAndUnresolvable.
failedPlugin string
} }
// Code returns code of the Status. // Code returns code of the Status.
@ -127,6 +130,23 @@ func (s *Status) Message() string {
return strings.Join(s.reasons, ", ") return strings.Join(s.reasons, ", ")
} }
// SetFailedPlugin sets the given plugin name to s.failedPlugin.
func (s *Status) SetFailedPlugin(plugin string) {
s.failedPlugin = plugin
}
// WithFailedPlugin sets the given plugin name to s.failedPlugin,
// and returns the given status object.
func (s *Status) WithFailedPlugin(plugin string) *Status {
s.SetFailedPlugin(plugin)
return s
}
// FailedPlugin returns the failed plugin name.
func (s *Status) FailedPlugin() string {
return s.failedPlugin
}
// Reasons returns reasons of the Status. // Reasons returns reasons of the Status.
func (s *Status) Reasons() []string { func (s *Status) Reasons() []string {
return s.reasons return s.reasons
@ -199,6 +219,8 @@ func (p PluginToStatus) Merge() *Status {
} }
if statusPrecedence[s.Code()] > statusPrecedence[finalStatus.code] { if statusPrecedence[s.Code()] > statusPrecedence[finalStatus.code] {
finalStatus.code = s.Code() finalStatus.code = s.Code()
// Same as code, we keep the most relevant failedPlugin in the returned Status.
finalStatus.failedPlugin = s.FailedPlugin()
} }
for _, r := range s.reasons { for _, r := range s.reasons {
@ -220,7 +242,7 @@ type WaitingPod interface {
// to unblock the pod. // to unblock the pod.
Allow(pluginName string) Allow(pluginName string)
// Reject declares the waiting pod unschedulable. // Reject declares the waiting pod unschedulable.
Reject(msg string) Reject(pluginName, msg string)
} }
// Plugin is the parent type for all the scheduling framework plugins. // Plugin is the parent type for all the scheduling framework plugins.

View File

@ -156,7 +156,7 @@ func (pl *DefaultPreemption) preempt(ctx context.Context, state *framework.Cycle
} }
// 5) Perform preparation work before nominating the selected candidate. // 5) Perform preparation work before nominating the selected candidate.
if err := PrepareCandidate(bestCandidate, pl.fh, cs, pod); err != nil { if err := PrepareCandidate(bestCandidate, pl.fh, cs, pod, pl.Name()); err != nil {
return "", err return "", err
} }
@ -225,7 +225,10 @@ func (pl *DefaultPreemption) FindCandidates(ctx context.Context, state *framewor
return candidates, &framework.FitError{ return candidates, &framework.FitError{
Pod: pod, Pod: pod,
NumAllNodes: len(potentialNodes), NumAllNodes: len(potentialNodes),
FilteredNodesStatuses: nodeStatuses, Diagnosis: framework.Diagnosis{
NodeToStatusMap: nodeStatuses,
// Leave FailedPlugins as nil as it won't be used on moving Pods.
},
} }
} }
return candidates, nil return candidates, nil
@ -687,7 +690,7 @@ func selectVictimsOnNode(
// - Evict the victim pods // - Evict the victim pods
// - Reject the victim pods if they are in waitingPod map // - Reject the victim pods if they are in waitingPod map
// - Clear the low-priority pods' nominatedNodeName status if needed // - Clear the low-priority pods' nominatedNodeName status if needed
func PrepareCandidate(c Candidate, fh framework.Handle, cs kubernetes.Interface, pod *v1.Pod) error { func PrepareCandidate(c Candidate, fh framework.Handle, cs kubernetes.Interface, pod *v1.Pod, pluginName string) error {
for _, victim := range c.Victims().Pods { for _, victim := range c.Victims().Pods {
if err := util.DeletePod(cs, victim); err != nil { if err := util.DeletePod(cs, victim); err != nil {
klog.Errorf("Error preempting pod %v/%v: %v", victim.Namespace, victim.Name, err) klog.Errorf("Error preempting pod %v/%v: %v", victim.Namespace, victim.Name, err)
@ -695,7 +698,7 @@ func PrepareCandidate(c Candidate, fh framework.Handle, cs kubernetes.Interface,
} }
// If the victim is a WaitingPod, send a reject message to the PermitPlugin // If the victim is a WaitingPod, send a reject message to the PermitPlugin
if waitingPod := fh.GetWaitingPod(victim.UID); waitingPod != nil { if waitingPod := fh.GetWaitingPod(victim.UID); waitingPod != nil {
waitingPod.Reject("preempted") waitingPod.Reject(pluginName, "preempted")
} }
fh.EventRecorder().Eventf(victim, pod, v1.EventTypeNormal, "Preempted", "Preempting", "Preempted by %v/%v on node %v", fh.EventRecorder().Eventf(victim, pod, v1.EventTypeNormal, "Preempted", "Preempting", "Preempted by %v/%v on node %v",
pod.Namespace, pod.Name, c.Name()) pod.Namespace, pod.Name, c.Name())

View File

@ -431,12 +431,13 @@ func (f *frameworkImpl) RunPreFilterPlugins(ctx context.Context, state *framewor
for _, pl := range f.preFilterPlugins { for _, pl := range f.preFilterPlugins {
status = f.runPreFilterPlugin(ctx, pl, state, pod) status = f.runPreFilterPlugin(ctx, pl, state, pod)
if !status.IsSuccess() { if !status.IsSuccess() {
status.SetFailedPlugin(pl.Name())
if status.IsUnschedulable() { if status.IsUnschedulable() {
return status return status
} }
err := status.AsError() err := status.AsError()
klog.ErrorS(err, "Failed running PreFilter plugin", "plugin", pl.Name(), "pod", klog.KObj(pod)) klog.ErrorS(err, "Failed running PreFilter plugin", "plugin", pl.Name(), "pod", klog.KObj(pod))
return framework.AsStatus(fmt.Errorf("running PreFilter plugin %q: %w", pl.Name(), err)) return framework.AsStatus(fmt.Errorf("running PreFilter plugin %q: %w", pl.Name(), err)).WithFailedPlugin(pl.Name())
} }
} }
@ -540,9 +541,10 @@ func (f *frameworkImpl) RunFilterPlugins(
if !pluginStatus.IsUnschedulable() { if !pluginStatus.IsUnschedulable() {
// Filter plugins are not supposed to return any status other than // Filter plugins are not supposed to return any status other than
// Success or Unschedulable. // Success or Unschedulable.
errStatus := framework.AsStatus(fmt.Errorf("running %q filter plugin: %w", pl.Name(), pluginStatus.AsError())) errStatus := framework.AsStatus(fmt.Errorf("running %q filter plugin: %w", pl.Name(), pluginStatus.AsError())).WithFailedPlugin(pl.Name())
return map[string]*framework.Status{pl.Name(): errStatus} return map[string]*framework.Status{pl.Name(): errStatus}
} }
pluginStatus.SetFailedPlugin(pl.Name())
statuses[pl.Name()] = pluginStatus statuses[pl.Name()] = pluginStatus
if !f.runAllFilters { if !f.runAllFilters {
// Exit early if we don't need to run all filters. // Exit early if we don't need to run all filters.
@ -975,7 +977,8 @@ func (f *frameworkImpl) RunPermitPlugins(ctx context.Context, state *framework.C
if status.IsUnschedulable() { if status.IsUnschedulable() {
msg := fmt.Sprintf("rejected pod %q by permit plugin %q: %v", pod.Name, pl.Name(), status.Message()) msg := fmt.Sprintf("rejected pod %q by permit plugin %q: %v", pod.Name, pl.Name(), status.Message())
klog.V(4).Infof(msg) klog.V(4).Infof(msg)
return framework.NewStatus(status.Code(), msg) status.SetFailedPlugin(pl.Name())
return status
} }
if status.Code() == framework.Wait { if status.Code() == framework.Wait {
// Not allowed to be greater than maxTimeout. // Not allowed to be greater than maxTimeout.
@ -987,7 +990,7 @@ func (f *frameworkImpl) RunPermitPlugins(ctx context.Context, state *framework.C
} else { } else {
err := status.AsError() err := status.AsError()
klog.ErrorS(err, "Failed running Permit plugin", "plugin", pl.Name(), "pod", klog.KObj(pod)) klog.ErrorS(err, "Failed running Permit plugin", "plugin", pl.Name(), "pod", klog.KObj(pod))
return framework.AsStatus(fmt.Errorf("running Permit plugin %q: %w", pl.Name(), err)) return framework.AsStatus(fmt.Errorf("running Permit plugin %q: %w", pl.Name(), err)).WithFailedPlugin(pl.Name())
} }
} }
} }
@ -1012,7 +1015,7 @@ func (f *frameworkImpl) runPermitPlugin(ctx context.Context, pl framework.Permit
} }
// WaitOnPermit will block, if the pod is a waiting pod, until the waiting pod is rejected or allowed. // WaitOnPermit will block, if the pod is a waiting pod, until the waiting pod is rejected or allowed.
func (f *frameworkImpl) WaitOnPermit(ctx context.Context, pod *v1.Pod) (status *framework.Status) { func (f *frameworkImpl) WaitOnPermit(ctx context.Context, pod *v1.Pod) *framework.Status {
waitingPod := f.waitingPods.get(pod.UID) waitingPod := f.waitingPods.get(pod.UID)
if waitingPod == nil { if waitingPod == nil {
return nil return nil
@ -1028,11 +1031,12 @@ func (f *frameworkImpl) WaitOnPermit(ctx context.Context, pod *v1.Pod) (status *
if s.IsUnschedulable() { if s.IsUnschedulable() {
msg := fmt.Sprintf("pod %q rejected while waiting on permit: %v", pod.Name, s.Message()) msg := fmt.Sprintf("pod %q rejected while waiting on permit: %v", pod.Name, s.Message())
klog.V(4).Infof(msg) klog.V(4).Infof(msg)
return framework.NewStatus(s.Code(), msg) s.SetFailedPlugin(s.FailedPlugin())
return s
} }
err := s.AsError() err := s.AsError()
klog.ErrorS(err, "Failed waiting on permit for pod", "pod", klog.KObj(pod)) klog.ErrorS(err, "Failed waiting on permit for pod", "pod", klog.KObj(pod))
return framework.AsStatus(fmt.Errorf("waiting on permit for pod: %w", err)) return framework.AsStatus(fmt.Errorf("waiting on permit for pod: %w", err)).WithFailedPlugin(s.FailedPlugin())
} }
return nil return nil
} }
@ -1062,7 +1066,7 @@ func (f *frameworkImpl) GetWaitingPod(uid types.UID) framework.WaitingPod {
func (f *frameworkImpl) RejectWaitingPod(uid types.UID) { func (f *frameworkImpl) RejectWaitingPod(uid types.UID) {
waitingPod := f.waitingPods.get(uid) waitingPod := f.waitingPods.get(uid)
if waitingPod != nil { if waitingPod != nil {
waitingPod.Reject("removed") waitingPod.Reject("", "removed")
} }
} }

View File

@ -28,7 +28,6 @@ import (
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go" dto "github.com/prometheus/client_model/go"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
@ -286,7 +285,7 @@ func (pp *TestPermitPlugin) Name() string {
return permitPlugin return permitPlugin
} }
func (pp *TestPermitPlugin) Permit(ctx context.Context, state *framework.CycleState, p *v1.Pod, nodeName string) (*framework.Status, time.Duration) { func (pp *TestPermitPlugin) Permit(ctx context.Context, state *framework.CycleState, p *v1.Pod, nodeName string) (*framework.Status, time.Duration) {
return framework.NewStatus(framework.Wait, ""), time.Duration(10 * time.Second) return framework.NewStatus(framework.Wait), 10 * time.Second
} }
var _ framework.QueueSortPlugin = &TestQueueSortPlugin{} var _ framework.QueueSortPlugin = &TestQueueSortPlugin{}
@ -894,8 +893,10 @@ func TestFilterPlugins(t *testing.T) {
inj: injectedResult{FilterStatus: int(framework.Error)}, inj: injectedResult{FilterStatus: int(framework.Error)},
}, },
}, },
wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin" filter plugin: %w`, errInjectedFilterStatus)), wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin" filter plugin: %w`, errInjectedFilterStatus)).WithFailedPlugin("TestPlugin"),
wantStatusMap: framework.PluginToStatus{"TestPlugin": framework.AsStatus(fmt.Errorf(`running "TestPlugin" filter plugin: %w`, errInjectedFilterStatus))}, wantStatusMap: framework.PluginToStatus{
"TestPlugin": framework.AsStatus(fmt.Errorf(`running "TestPlugin" filter plugin: %w`, errInjectedFilterStatus)).WithFailedPlugin("TestPlugin"),
},
}, },
{ {
name: "UnschedulableFilter", name: "UnschedulableFilter",
@ -905,8 +906,10 @@ func TestFilterPlugins(t *testing.T) {
inj: injectedResult{FilterStatus: int(framework.Unschedulable)}, inj: injectedResult{FilterStatus: int(framework.Unschedulable)},
}, },
}, },
wantStatus: framework.NewStatus(framework.Unschedulable, "injected filter status"), wantStatus: framework.NewStatus(framework.Unschedulable, "injected filter status").WithFailedPlugin("TestPlugin"),
wantStatusMap: framework.PluginToStatus{"TestPlugin": framework.NewStatus(framework.Unschedulable, "injected filter status")}, wantStatusMap: framework.PluginToStatus{
"TestPlugin": framework.NewStatus(framework.Unschedulable, "injected filter status").WithFailedPlugin("TestPlugin"),
},
}, },
{ {
name: "UnschedulableAndUnresolvableFilter", name: "UnschedulableAndUnresolvableFilter",
@ -917,8 +920,10 @@ func TestFilterPlugins(t *testing.T) {
FilterStatus: int(framework.UnschedulableAndUnresolvable)}, FilterStatus: int(framework.UnschedulableAndUnresolvable)},
}, },
}, },
wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected filter status"), wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected filter status").WithFailedPlugin("TestPlugin"),
wantStatusMap: framework.PluginToStatus{"TestPlugin": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected filter status")}, wantStatusMap: framework.PluginToStatus{
"TestPlugin": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected filter status").WithFailedPlugin("TestPlugin"),
},
}, },
// followings tests cover multiple-plugins scenarios // followings tests cover multiple-plugins scenarios
{ {
@ -934,8 +939,10 @@ func TestFilterPlugins(t *testing.T) {
inj: injectedResult{FilterStatus: int(framework.Error)}, inj: injectedResult{FilterStatus: int(framework.Error)},
}, },
}, },
wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus)), wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus)).WithFailedPlugin("TestPlugin1"),
wantStatusMap: framework.PluginToStatus{"TestPlugin1": framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus))}, wantStatusMap: framework.PluginToStatus{
"TestPlugin1": framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus)).WithFailedPlugin("TestPlugin1"),
},
}, },
{ {
name: "SuccessAndSuccessFilters", name: "SuccessAndSuccessFilters",
@ -965,8 +972,10 @@ func TestFilterPlugins(t *testing.T) {
inj: injectedResult{FilterStatus: int(framework.Success)}, inj: injectedResult{FilterStatus: int(framework.Success)},
}, },
}, },
wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus)), wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus)).WithFailedPlugin("TestPlugin1"),
wantStatusMap: framework.PluginToStatus{"TestPlugin1": framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus))}, wantStatusMap: framework.PluginToStatus{
"TestPlugin1": framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus)).WithFailedPlugin("TestPlugin1"),
},
}, },
{ {
name: "SuccessAndErrorFilters", name: "SuccessAndErrorFilters",
@ -981,8 +990,10 @@ func TestFilterPlugins(t *testing.T) {
inj: injectedResult{FilterStatus: int(framework.Error)}, inj: injectedResult{FilterStatus: int(framework.Error)},
}, },
}, },
wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin2" filter plugin: %w`, errInjectedFilterStatus)), wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin2" filter plugin: %w`, errInjectedFilterStatus)).WithFailedPlugin("TestPlugin2"),
wantStatusMap: framework.PluginToStatus{"TestPlugin2": framework.AsStatus(fmt.Errorf(`running "TestPlugin2" filter plugin: %w`, errInjectedFilterStatus))}, wantStatusMap: framework.PluginToStatus{
"TestPlugin2": framework.AsStatus(fmt.Errorf(`running "TestPlugin2" filter plugin: %w`, errInjectedFilterStatus)).WithFailedPlugin("TestPlugin2"),
},
}, },
{ {
name: "SuccessAndUnschedulableFilters", name: "SuccessAndUnschedulableFilters",
@ -997,8 +1008,10 @@ func TestFilterPlugins(t *testing.T) {
inj: injectedResult{FilterStatus: int(framework.Unschedulable)}, inj: injectedResult{FilterStatus: int(framework.Unschedulable)},
}, },
}, },
wantStatus: framework.NewStatus(framework.Unschedulable, "injected filter status"), wantStatus: framework.NewStatus(framework.Unschedulable, "injected filter status").WithFailedPlugin("TestPlugin2"),
wantStatusMap: framework.PluginToStatus{"TestPlugin2": framework.NewStatus(framework.Unschedulable, "injected filter status")}, wantStatusMap: framework.PluginToStatus{
"TestPlugin2": framework.NewStatus(framework.Unschedulable, "injected filter status").WithFailedPlugin("TestPlugin2"),
},
}, },
{ {
name: "SuccessFilterWithRunAllFilters", name: "SuccessFilterWithRunAllFilters",
@ -1026,8 +1039,10 @@ func TestFilterPlugins(t *testing.T) {
}, },
}, },
runAllFilters: true, runAllFilters: true,
wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus)), wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus)).WithFailedPlugin("TestPlugin1"),
wantStatusMap: framework.PluginToStatus{"TestPlugin1": framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus))}, wantStatusMap: framework.PluginToStatus{
"TestPlugin1": framework.AsStatus(fmt.Errorf(`running "TestPlugin1" filter plugin: %w`, errInjectedFilterStatus)).WithFailedPlugin("TestPlugin1"),
},
}, },
{ {
name: "ErrorAndErrorFilters", name: "ErrorAndErrorFilters",
@ -1042,10 +1057,10 @@ func TestFilterPlugins(t *testing.T) {
}, },
}, },
runAllFilters: true, runAllFilters: true,
wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected filter status", "injected filter status"), wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected filter status", "injected filter status").WithFailedPlugin("TestPlugin1"),
wantStatusMap: framework.PluginToStatus{ wantStatusMap: framework.PluginToStatus{
"TestPlugin1": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected filter status"), "TestPlugin1": framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected filter status").WithFailedPlugin("TestPlugin1"),
"TestPlugin2": framework.NewStatus(framework.Unschedulable, "injected filter status"), "TestPlugin2": framework.NewStatus(framework.Unschedulable, "injected filter status").WithFailedPlugin("TestPlugin2"),
}, },
}, },
} }
@ -1081,7 +1096,6 @@ func TestFilterPlugins(t *testing.T) {
if !reflect.DeepEqual(gotStatusMap, tt.wantStatusMap) { if !reflect.DeepEqual(gotStatusMap, tt.wantStatusMap) {
t.Errorf("wrong status map. got: %+v, want: %+v", gotStatusMap, tt.wantStatusMap) t.Errorf("wrong status map. got: %+v, want: %+v", gotStatusMap, tt.wantStatusMap)
} }
}) })
} }
} }
@ -1238,7 +1252,7 @@ func TestFilterPluginsWithNominatedPods(t *testing.T) {
nominatedPod: highPriorityPod, nominatedPod: highPriorityPod,
node: node, node: node,
nodeInfo: framework.NewNodeInfo(pod), nodeInfo: framework.NewNodeInfo(pod),
wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin2" filter plugin: %w`, errInjectedFilterStatus)), wantStatus: framework.AsStatus(fmt.Errorf(`running "TestPlugin2" filter plugin: %w`, errInjectedFilterStatus)).WithFailedPlugin("TestPlugin2"),
}, },
{ {
name: "node has a low-priority nominated pod and pre filters return unschedulable", name: "node has a low-priority nominated pod and pre filters return unschedulable",
@ -1653,7 +1667,7 @@ func TestPermitPlugins(t *testing.T) {
inj: injectedResult{PermitStatus: int(framework.Unschedulable)}, inj: injectedResult{PermitStatus: int(framework.Unschedulable)},
}, },
}, },
want: framework.NewStatus(framework.Unschedulable, `rejected pod "" by permit plugin "TestPlugin": injected status`), want: framework.NewStatus(framework.Unschedulable, "injected status").WithFailedPlugin("TestPlugin"),
}, },
{ {
name: "ErrorPermitPlugin", name: "ErrorPermitPlugin",
@ -1663,7 +1677,7 @@ func TestPermitPlugins(t *testing.T) {
inj: injectedResult{PermitStatus: int(framework.Error)}, inj: injectedResult{PermitStatus: int(framework.Error)},
}, },
}, },
want: framework.AsStatus(fmt.Errorf(`running Permit plugin "TestPlugin": %w`, errInjectedStatus)), want: framework.AsStatus(fmt.Errorf(`running Permit plugin "TestPlugin": %w`, errInjectedStatus)).WithFailedPlugin("TestPlugin"),
}, },
{ {
name: "UnschedulableAndUnresolvablePermitPlugin", name: "UnschedulableAndUnresolvablePermitPlugin",
@ -1673,7 +1687,7 @@ func TestPermitPlugins(t *testing.T) {
inj: injectedResult{PermitStatus: int(framework.UnschedulableAndUnresolvable)}, inj: injectedResult{PermitStatus: int(framework.UnschedulableAndUnresolvable)},
}, },
}, },
want: framework.NewStatus(framework.UnschedulableAndUnresolvable, `rejected pod "" by permit plugin "TestPlugin": injected status`), want: framework.NewStatus(framework.UnschedulableAndUnresolvable, "injected status").WithFailedPlugin("TestPlugin"),
}, },
{ {
name: "WaitPermitPlugin", name: "WaitPermitPlugin",
@ -1711,11 +1725,12 @@ func TestPermitPlugins(t *testing.T) {
inj: injectedResult{PermitStatus: int(framework.Error)}, inj: injectedResult{PermitStatus: int(framework.Error)},
}, },
}, },
want: framework.AsStatus(fmt.Errorf(`running Permit plugin "TestPlugin": %w`, errInjectedStatus)), want: framework.AsStatus(fmt.Errorf(`running Permit plugin "TestPlugin": %w`, errInjectedStatus)).WithFailedPlugin("TestPlugin"),
}, },
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
registry := Registry{} registry := Registry{}
configPlugins := &config.Plugins{Permit: &config.PluginSet{}} configPlugins := &config.Plugins{Permit: &config.PluginSet{}}
@ -1743,6 +1758,7 @@ func TestPermitPlugins(t *testing.T) {
if !reflect.DeepEqual(status, tt.want) { if !reflect.DeepEqual(status, tt.want) {
t.Errorf("wrong status code. got %v, want %v", status, tt.want) t.Errorf("wrong status code. got %v, want %v", status, tt.want)
} }
})
} }
} }
@ -2077,24 +2093,21 @@ func TestWaitOnPermit(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
action func(f framework.Framework) action func(f framework.Framework)
wantStatus framework.Code want *framework.Status
wantMessage string
}{ }{
{ {
name: "Reject Waiting Pod", name: "Reject Waiting Pod",
action: func(f framework.Framework) { action: func(f framework.Framework) {
f.GetWaitingPod(pod.UID).Reject("reject message") f.GetWaitingPod(pod.UID).Reject(permitPlugin, "reject message")
}, },
wantStatus: framework.Unschedulable, want: framework.NewStatus(framework.Unschedulable, "reject message").WithFailedPlugin(permitPlugin),
wantMessage: "pod \"pod\" rejected while waiting on permit: reject message",
}, },
{ {
name: "Allow Waiting Pod", name: "Allow Waiting Pod",
action: func(f framework.Framework) { action: func(f framework.Framework) {
f.GetWaitingPod(pod.UID).Allow(permitPlugin) f.GetWaitingPod(pod.UID).Allow(permitPlugin)
}, },
wantStatus: framework.Success, want: nil,
wantMessage: "",
}, },
} }
@ -2123,14 +2136,9 @@ func TestWaitOnPermit(t *testing.T) {
go tt.action(f) go tt.action(f)
waitOnPermitStatus := f.WaitOnPermit(context.Background(), pod) got := f.WaitOnPermit(context.Background(), pod)
if waitOnPermitStatus.Code() != tt.wantStatus { if !reflect.DeepEqual(tt.want, got) {
t.Fatalf("Expected WaitOnPermit to return status %v, but got %v", t.Errorf("Unexpected status: want %v, but got %v", tt.want, got)
tt.wantStatus, waitOnPermitStatus.Code())
}
if waitOnPermitStatus.Message() != tt.wantMessage {
t.Fatalf("Expected WaitOnPermit to return status with message %q, but got %q",
tt.wantMessage, waitOnPermitStatus.Message())
} }
}) })
} }

View File

@ -100,7 +100,7 @@ func newWaitingPod(pod *v1.Pod, pluginsMaxWaitTime map[string]time.Duration) *wa
wp.pendingPlugins[plugin] = time.AfterFunc(waitTime, func() { wp.pendingPlugins[plugin] = time.AfterFunc(waitTime, func() {
msg := fmt.Sprintf("rejected due to timeout after waiting %v at plugin %v", msg := fmt.Sprintf("rejected due to timeout after waiting %v at plugin %v",
waitTime, plugin) waitTime, plugin)
wp.Reject(msg) wp.Reject(plugin, msg)
}) })
} }
@ -149,7 +149,7 @@ func (w *waitingPod) Allow(pluginName string) {
} }
// Reject declares the waiting pod unschedulable. // Reject declares the waiting pod unschedulable.
func (w *waitingPod) Reject(msg string) { func (w *waitingPod) Reject(pluginName, msg string) {
w.mu.RLock() w.mu.RLock()
defer w.mu.RUnlock() defer w.mu.RUnlock()
for _, timer := range w.pendingPlugins { for _, timer := range w.pendingPlugins {
@ -159,7 +159,7 @@ func (w *waitingPod) Reject(msg string) {
// The select clause works as a non-blocking send. // The select clause works as a non-blocking send.
// If there is no receiver, it's a no-op (default case). // If there is no receiver, it's a no-op (default case).
select { select {
case w.s <- framework.NewStatus(framework.Unschedulable, msg): case w.s <- framework.NewStatus(framework.Unschedulable, msg).WithFailedPlugin(pluginName):
default: default:
} }
} }

View File

@ -91,11 +91,17 @@ type WeightedAffinityTerm struct {
Weight int32 Weight int32
} }
// Diagnosis records the details to diagnose a scheduling failure.
type Diagnosis struct {
NodeToStatusMap NodeToStatusMap
UnschedulablePlugins sets.String
}
// FitError describes a fit error of a pod. // FitError describes a fit error of a pod.
type FitError struct { type FitError struct {
Pod *v1.Pod Pod *v1.Pod
NumAllNodes int NumAllNodes int
FilteredNodesStatuses NodeToStatusMap Diagnosis Diagnosis
} }
const ( const (
@ -106,7 +112,7 @@ const (
// Error returns detailed information of why the pod failed to fit on each node // Error returns detailed information of why the pod failed to fit on each node
func (f *FitError) Error() string { func (f *FitError) Error() string {
reasons := make(map[string]int) reasons := make(map[string]int)
for _, status := range f.FilteredNodesStatuses { for _, status := range f.Diagnosis.NodeToStatusMap {
for _, reason := range status.Reasons() { for _, reason := range status.Reasons() {
reasons[reason]++ reasons[reason]++
} }

View File

@ -462,7 +462,7 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
klog.V(3).InfoS("No PostFilter plugins are registered, so no preemption will be performed") klog.V(3).InfoS("No PostFilter plugins are registered, so no preemption will be performed")
} else { } else {
// Run PostFilter plugins to try to make the pod schedulable in a future scheduling cycle. // Run PostFilter plugins to try to make the pod schedulable in a future scheduling cycle.
result, status := fwk.RunPostFilterPlugins(ctx, state, pod, fitError.FilteredNodesStatuses) result, status := fwk.RunPostFilterPlugins(ctx, state, pod, fitError.Diagnosis.NodeToStatusMap)
if status.Code() == framework.Error { if status.Code() == framework.Error {
klog.ErrorS(nil, "Status after running PostFilter plugins for pod", klog.KObj(pod), "status", status) klog.ErrorS(nil, "Status after running PostFilter plugins for pod", klog.KObj(pod), "status", status)
} else { } else {

View File

@ -636,11 +636,11 @@ func TestSchedulerNoPhantomPodAfterDelete(t *testing.T) {
expectErr := &framework.FitError{ expectErr := &framework.FitError{
Pod: secondPod, Pod: secondPod,
NumAllNodes: 1, NumAllNodes: 1,
FilteredNodesStatuses: framework.NodeToStatusMap{ Diagnosis: framework.Diagnosis{
node.Name: framework.NewStatus( NodeToStatusMap: framework.NodeToStatusMap{
framework.Unschedulable, node.Name: framework.NewStatus(framework.Unschedulable, nodeports.ErrReason).WithFailedPlugin(nodeports.Name),
nodeports.ErrReason, },
), UnschedulablePlugins: sets.NewString(nodeports.Name),
}, },
} }
if !reflect.DeepEqual(expectErr, err) { if !reflect.DeepEqual(expectErr, err) {
@ -761,7 +761,7 @@ func TestSchedulerFailedSchedulingReasons(t *testing.T) {
framework.Unschedulable, framework.Unschedulable,
fmt.Sprintf("Insufficient %v", v1.ResourceCPU), fmt.Sprintf("Insufficient %v", v1.ResourceCPU),
fmt.Sprintf("Insufficient %v", v1.ResourceMemory), fmt.Sprintf("Insufficient %v", v1.ResourceMemory),
) ).WithFailedPlugin(noderesources.FitName)
} }
fns := []st.RegisterPluginFunc{ fns := []st.RegisterPluginFunc{
st.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), st.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
@ -780,7 +780,10 @@ func TestSchedulerFailedSchedulingReasons(t *testing.T) {
expectErr := &framework.FitError{ expectErr := &framework.FitError{
Pod: podWithTooBigResourceRequests, Pod: podWithTooBigResourceRequests,
NumAllNodes: len(nodes), NumAllNodes: len(nodes),
FilteredNodesStatuses: failedNodeStatues, Diagnosis: framework.Diagnosis{
NodeToStatusMap: failedNodeStatues,
UnschedulablePlugins: sets.NewString(noderesources.FitName),
},
} }
if len(fmt.Sprint(expectErr)) > 150 { if len(fmt.Sprint(expectErr)) > 150 {
t.Errorf("message is too spammy ! %v ", len(fmt.Sprint(expectErr))) t.Errorf("message is too spammy ! %v ", len(fmt.Sprint(expectErr)))

View File

@ -459,7 +459,7 @@ func (pp *PermitPlugin) Permit(ctx context.Context, state *framework.CycleState,
if pp.waitAndRejectPermit { if pp.waitAndRejectPermit {
pp.rejectingPod = pod.Name pp.rejectingPod = pod.Name
pp.fh.IterateOverWaitingPods(func(wp framework.WaitingPod) { pp.fh.IterateOverWaitingPods(func(wp framework.WaitingPod) {
wp.Reject(fmt.Sprintf("reject pod %v", wp.GetPod().Name)) wp.Reject(pp.name, fmt.Sprintf("reject pod %v", wp.GetPod().Name))
}) })
return framework.NewStatus(framework.Unschedulable, fmt.Sprintf("reject pod %v", pod.Name)), 0 return framework.NewStatus(framework.Unschedulable, fmt.Sprintf("reject pod %v", pod.Name)), 0
} }
@ -479,7 +479,7 @@ func (pp *PermitPlugin) allowAllPods() {
// rejectAllPods rejects all waiting pods. // rejectAllPods rejects all waiting pods.
func (pp *PermitPlugin) rejectAllPods() { func (pp *PermitPlugin) rejectAllPods() {
pp.fh.IterateOverWaitingPods(func(wp framework.WaitingPod) { wp.Reject("rejectAllPods") }) pp.fh.IterateOverWaitingPods(func(wp framework.WaitingPod) { wp.Reject(pp.name, "rejectAllPods") })
} }
// reset used to reset permit plugin. // reset used to reset permit plugin.