mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-06 10:43:56 +00:00
feature(scheduler): simplify QueueingHint by introducing new statuses
This commit is contained in:
parent
de054fbf94
commit
cb5dc46edf
@ -77,18 +77,30 @@ const (
|
||||
// Success means that plugin ran correctly and found pod schedulable.
|
||||
// NOTE: A nil status is also considered as "Success".
|
||||
Success Code = iota
|
||||
// Error is used for internal plugin errors, unexpected input, etc.
|
||||
// Error is one of the failures, used for internal plugin errors, unexpected input, etc.
|
||||
// Plugin shouldn't return this code for expected failures, like Unschedulable.
|
||||
// Since it's the unexpected failure, the scheduling queue registers the pod without unschedulable plugins.
|
||||
// Meaning, the Pod will be requeued to activeQ/backoffQ soon.
|
||||
Error
|
||||
// Unschedulable is used when a plugin finds a pod unschedulable. The scheduler might attempt to
|
||||
// Unschedulable is one of the failures, used when a plugin finds a pod unschedulable.
|
||||
// If it's returned from PreFilter or Filter, the scheduler might attempt to
|
||||
// run other postFilter plugins like preemption to get this pod scheduled.
|
||||
// Use UnschedulableAndUnresolvable to make the scheduler skipping other postFilter plugins.
|
||||
// The accompanying status message should explain why the pod is unschedulable.
|
||||
//
|
||||
// We regard the backoff as a penalty of wasting the scheduling cycle.
|
||||
// When the scheduling queue requeues Pods, which was rejected with Unschedulable in the last scheduling,
|
||||
// the Pod goes through backoff.
|
||||
Unschedulable
|
||||
// UnschedulableAndUnresolvable is used when a plugin finds a pod unschedulable and
|
||||
// other postFilter plugins like preemption would not change anything.
|
||||
// Plugins should return Unschedulable if it is possible that the pod can get scheduled
|
||||
// after running other postFilter plugins.
|
||||
// The accompanying status message should explain why the pod is unschedulable.
|
||||
//
|
||||
// We regard the backoff as a penalty of wasting the scheduling cycle.
|
||||
// When the scheduling queue requeues Pods, which was rejected with Unschedulable in the last scheduling,
|
||||
// the Pod goes through backoff.
|
||||
UnschedulableAndUnresolvable
|
||||
// Wait is used when a Permit plugin finds a pod scheduling should wait.
|
||||
Wait
|
||||
@ -97,10 +109,27 @@ const (
|
||||
// - when a PreFilter plugin returns Skip so that coupled Filter plugin/PreFilterExtensions() will be skipped.
|
||||
// - when a PreScore plugin returns Skip so that coupled Score plugin will be skipped.
|
||||
Skip
|
||||
// Pending means that the scheduling process is finished successfully,
|
||||
// but the plugin wants to abort the scheduling cycle/binding cycle here.
|
||||
//
|
||||
// For example, the DRA plugin sometimes needs to wait for the external device driver
|
||||
// to provision the resource for the Pod.
|
||||
// It's different from when to return Unschedulable/UnschedulableAndUnresolvable,
|
||||
// because in this case, the scheduler decides where the Pod can go successfully,
|
||||
// but we need to wait for the external component to do something based on that scheduling result.
|
||||
//
|
||||
// We regard the backoff as a penalty of wasting the scheduling cycle.
|
||||
// In the case of returning Pending, we cannot say the scheduling cycle is wasted
|
||||
// because the scheduling result is used to proceed the Pod's scheduling forward,
|
||||
// that particular scheduling cycle is failed though.
|
||||
// So, Pods rejected by such reasons don't need to suffer a penalty (backoff).
|
||||
// When the scheduling queue requeues Pods, which was rejected with Pending in the last scheduling,
|
||||
// the Pod goes to activeQ directly ignoring backoff.
|
||||
Pending
|
||||
)
|
||||
|
||||
// This list should be exactly the same as the codes iota defined above in the same order.
|
||||
var codes = []string{"Success", "Error", "Unschedulable", "UnschedulableAndUnresolvable", "Wait", "Skip"}
|
||||
var codes = []string{"Success", "Error", "Unschedulable", "UnschedulableAndUnresolvable", "Wait", "Skip", "Pending"}
|
||||
|
||||
func (c Code) String() string {
|
||||
return codes[c]
|
||||
@ -151,7 +180,7 @@ type Status struct {
|
||||
reasons []string
|
||||
err error
|
||||
// failedPlugin is an optional field that records the plugin name a Pod failed by.
|
||||
// It's set by the framework when code is Error, Unschedulable or UnschedulableAndUnresolvable.
|
||||
// It's set by the framework when code is Unschedulable or UnschedulableAndUnresolvable.
|
||||
failedPlugin string
|
||||
}
|
||||
|
||||
|
@ -131,6 +131,7 @@ func TestStatusCodes(t *testing.T) {
|
||||
assertStatusCode(t, UnschedulableAndUnresolvable, 3)
|
||||
assertStatusCode(t, Wait, 4)
|
||||
assertStatusCode(t, Skip, 5)
|
||||
assertStatusCode(t, Pending, 6)
|
||||
}
|
||||
|
||||
func assertStatusCode(t *testing.T, code Code, value int) {
|
||||
|
@ -315,7 +315,7 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
|
||||
originalClaim, modifiedClaim, err := schedutil.As[*resourcev1alpha2.ResourceClaim](oldObj, newObj)
|
||||
if err != nil {
|
||||
// Shouldn't happen.
|
||||
return framework.QueueAfterBackoff, fmt.Errorf("unexpected object in isSchedulableAfterClaimChange: %w", err)
|
||||
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterClaimChange: %w", err)
|
||||
}
|
||||
|
||||
usesClaim := false
|
||||
@ -339,7 +339,7 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
|
||||
|
||||
if originalClaim == nil {
|
||||
logger.V(4).Info("claim for pod got created", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
|
||||
return framework.QueueImmediately, nil
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// Modifications may or may not be relevant. If the entire
|
||||
@ -357,7 +357,7 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
|
||||
}
|
||||
|
||||
logger.V(4).Info("status of claim for pod got updated", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
|
||||
return framework.QueueImmediately, nil
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPodSchedulingContextChange is invoked for all
|
||||
@ -376,7 +376,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
|
||||
oldPodScheduling, newPodScheduling, err := schedutil.As[*resourcev1alpha2.PodSchedulingContext](oldObj, newObj)
|
||||
if err != nil {
|
||||
// Shouldn't happen.
|
||||
return framework.QueueAfterBackoff, fmt.Errorf("unexpected object in isSchedulableAfterPodSchedulingContextChange: %w", err)
|
||||
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterPodSchedulingContextChange: %w", err)
|
||||
}
|
||||
podScheduling := newPodScheduling // Never nil because deletes are handled above.
|
||||
|
||||
@ -423,7 +423,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
|
||||
len(oldPodScheduling.Status.ResourceClaims) < len(podScheduling.Status.ResourceClaims) /* new information and not incomplete (checked above) */ {
|
||||
// This definitely is new information for the scheduler. Try again immediately.
|
||||
logger.V(4).Info("PodSchedulingContext for pod has all required information, schedule immediately", "pod", klog.KObj(pod))
|
||||
return framework.QueueImmediately, nil
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// The other situation where the scheduler needs to do
|
||||
@ -448,7 +448,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
|
||||
for _, claimStatus := range podScheduling.Status.ResourceClaims {
|
||||
if sliceContains(claimStatus.UnsuitableNodes, podScheduling.Spec.SelectedNode) {
|
||||
logger.V(5).Info("PodSchedulingContext has unsuitable selected node, schedule immediately", "pod", klog.KObj(pod), "selectedNode", podScheduling.Spec.SelectedNode, "podResourceName", claimStatus.Name)
|
||||
return framework.QueueImmediately, nil
|
||||
return framework.Queue, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -463,7 +463,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
|
||||
|
||||
// Once we get here, all changes which are known to require special responses
|
||||
// have been checked for. Whatever the change was, we don't know exactly how
|
||||
// to handle it and thus return QueueAfterBackoff. This will cause the
|
||||
// to handle it and thus return Queue. This will cause the
|
||||
// scheduler to treat the event as if no event hint callback had been provided.
|
||||
// Developers who want to investigate this can enable a diff at log level 6.
|
||||
if loggerV := logger.V(6); loggerV.Enabled() {
|
||||
@ -471,7 +471,7 @@ func (pl *dynamicResources) isSchedulableAfterPodSchedulingContextChange(logger
|
||||
} else {
|
||||
logger.V(5).Info("PodSchedulingContext for pod with unknown changes, maybe schedule", "pod", klog.KObj(pod))
|
||||
}
|
||||
return framework.QueueAfterBackoff, nil
|
||||
return framework.Queue, nil
|
||||
|
||||
}
|
||||
|
||||
@ -961,7 +961,7 @@ func (pl *dynamicResources) Reserve(ctx context.Context, cs *framework.CycleStat
|
||||
if err := state.podSchedulingState.publish(ctx, pod, pl.clientset); err != nil {
|
||||
return statusError(logger, err)
|
||||
}
|
||||
return statusUnschedulable(logger, "waiting for resource driver to allocate resource", "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
|
||||
return statusPending(logger, "waiting for resource driver to allocate resource", "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
|
||||
}
|
||||
}
|
||||
|
||||
@ -977,7 +977,7 @@ func (pl *dynamicResources) Reserve(ctx context.Context, cs *framework.CycleStat
|
||||
// provisioning? On the one hand, volume provisioning is currently
|
||||
// irreversible, so it better should come last. On the other hand,
|
||||
// triggering both in parallel might be faster.
|
||||
return statusUnschedulable(logger, "waiting for resource driver to provide information", "pod", klog.KObj(pod))
|
||||
return statusPending(logger, "waiting for resource driver to provide information", "pod", klog.KObj(pod))
|
||||
}
|
||||
|
||||
func containsNode(hay []string, needle string) bool {
|
||||
@ -1073,6 +1073,21 @@ func statusUnschedulable(logger klog.Logger, reason string, kv ...interface{}) *
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, reason)
|
||||
}
|
||||
|
||||
// statusPending ensures that there is a log message associated with the
|
||||
// line where the status originated.
|
||||
func statusPending(logger klog.Logger, reason string, kv ...interface{}) *framework.Status {
|
||||
if loggerV := logger.V(5); loggerV.Enabled() {
|
||||
helper, loggerV := loggerV.WithCallStackHelper()
|
||||
helper()
|
||||
kv = append(kv, "reason", reason)
|
||||
// nolint: logcheck // warns because it cannot check key/values
|
||||
loggerV.Info("pod waiting for external component", kv...)
|
||||
}
|
||||
|
||||
// When we return Pending, we want to block the Pod at the same time.
|
||||
return framework.NewStatus(framework.Pending, reason)
|
||||
}
|
||||
|
||||
// statusError ensures that there is a log message associated with the
|
||||
// line where the error originated.
|
||||
func statusError(logger klog.Logger, err error, kv ...interface{}) *framework.Status {
|
||||
|
@ -346,7 +346,7 @@ func TestPlugin(t *testing.T) {
|
||||
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
|
||||
want: want{
|
||||
reserve: result{
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `waiting for resource driver to allocate resource`),
|
||||
status: framework.NewStatus(framework.Pending, `waiting for resource driver to allocate resource`),
|
||||
added: []metav1.Object{schedulingSelectedPotential},
|
||||
},
|
||||
},
|
||||
@ -360,7 +360,7 @@ func TestPlugin(t *testing.T) {
|
||||
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
|
||||
want: want{
|
||||
reserve: result{
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `waiting for resource driver to provide information`),
|
||||
status: framework.NewStatus(framework.Pending, `waiting for resource driver to provide information`),
|
||||
added: []metav1.Object{schedulingPotential},
|
||||
},
|
||||
},
|
||||
@ -374,7 +374,7 @@ func TestPlugin(t *testing.T) {
|
||||
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
|
||||
want: want{
|
||||
reserve: result{
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `waiting for resource driver to allocate resource`),
|
||||
status: framework.NewStatus(framework.Pending, `waiting for resource driver to allocate resource`),
|
||||
changes: change{
|
||||
scheduling: func(in *resourcev1alpha2.PodSchedulingContext) *resourcev1alpha2.PodSchedulingContext {
|
||||
return st.FromPodSchedulingContexts(in).
|
||||
@ -925,7 +925,7 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
|
||||
"queue-on-add": {
|
||||
pod: podWithClaimName,
|
||||
newObj: pendingImmediateClaim,
|
||||
expectedHint: framework.QueueImmediately,
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
"backoff-wrong-old-object": {
|
||||
pod: podWithClaimName,
|
||||
@ -953,7 +953,7 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
|
||||
claim.Status.Allocation = &resourcev1alpha2.AllocationResult{}
|
||||
return claim
|
||||
}(),
|
||||
expectedHint: framework.QueueImmediately,
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
}
|
||||
|
||||
@ -1051,7 +1051,7 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) {
|
||||
claims: []*resourcev1alpha2.ResourceClaim{pendingDelayedClaim},
|
||||
oldObj: scheduling,
|
||||
newObj: schedulingInfo,
|
||||
expectedHint: framework.QueueImmediately,
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
"queue-bad-selected-node": {
|
||||
pod: podWithClaimTemplateInStatus,
|
||||
@ -1067,7 +1067,7 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) {
|
||||
scheduling.Status.ResourceClaims[0].UnsuitableNodes = append(scheduling.Status.ResourceClaims[0].UnsuitableNodes, scheduling.Spec.SelectedNode)
|
||||
return scheduling
|
||||
}(),
|
||||
expectedHint: framework.QueueImmediately,
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
"skip-spec-changes": {
|
||||
pod: podWithClaimTemplateInStatus,
|
||||
@ -1089,7 +1089,7 @@ func Test_isSchedulableAfterPodSchedulingContextChange(t *testing.T) {
|
||||
scheduling.Finalizers = append(scheduling.Finalizers, "foo")
|
||||
return scheduling
|
||||
}(),
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -94,7 +94,7 @@ func (pl *NodeAffinity) EventsToRegister() []framework.ClusterEventWithHint {
|
||||
func (pl *NodeAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.QueueAfterBackoff, err
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
if pl.addedNodeSelector != nil && !pl.addedNodeSelector.Match(modifiedNode) {
|
||||
@ -105,7 +105,7 @@ func (pl *NodeAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1
|
||||
requiredNodeAffinity := nodeaffinity.GetRequiredNodeAffinity(pod)
|
||||
isMatched, err := requiredNodeAffinity.Match(modifiedNode)
|
||||
if err != nil {
|
||||
return framework.QueueAfterBackoff, err
|
||||
return framework.Queue, err
|
||||
}
|
||||
if !isMatched {
|
||||
logger.V(4).Info("node was created or updated, but doesn't matches with the pod's NodeAffinity", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
@ -116,14 +116,14 @@ func (pl *NodeAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1
|
||||
if originalNode != nil {
|
||||
wasMatched, err = requiredNodeAffinity.Match(originalNode)
|
||||
if err != nil {
|
||||
return framework.QueueAfterBackoff, err
|
||||
return framework.Queue, err
|
||||
}
|
||||
}
|
||||
|
||||
if !wasMatched {
|
||||
// This modification makes this Node match with Pod's NodeAffinity.
|
||||
logger.V(4).Info("node was created or updated, and matches with the pod's NodeAffinity", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueAfterBackoff, nil
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
logger.V(4).Info("node was created or updated, but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
|
@ -1189,7 +1189,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
|
||||
args: &config.NodeAffinityArgs{},
|
||||
pod: podWithNodeAffinity.Obj(),
|
||||
newObj: "not-a-node",
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
expectedErr: true,
|
||||
},
|
||||
"backoff-wrong-old-object": {
|
||||
@ -1197,7 +1197,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
|
||||
pod: podWithNodeAffinity.Obj(),
|
||||
oldObj: "not-a-node",
|
||||
newObj: st.MakeNode().Obj(),
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
expectedErr: true,
|
||||
},
|
||||
"skip-queue-on-add": {
|
||||
@ -1210,7 +1210,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
|
||||
args: &config.NodeAffinityArgs{},
|
||||
pod: podWithNodeAffinity.Obj(),
|
||||
newObj: st.MakeNode().Label("foo", "bar").Obj(),
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
"skip-unrelated-changes": {
|
||||
args: &config.NodeAffinityArgs{},
|
||||
@ -1245,7 +1245,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
|
||||
pod: podWithNodeAffinity.Obj(),
|
||||
oldObj: st.MakeNode().Obj(),
|
||||
newObj: st.MakeNode().Label("foo", "bar").Obj(),
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
"skip-queue-on-add-scheduler-enforced-node-affinity": {
|
||||
args: &config.NodeAffinityArgs{
|
||||
@ -1289,7 +1289,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
|
||||
},
|
||||
pod: podWithNodeAffinity.Obj(),
|
||||
newObj: st.MakeNode().Label("foo", "bar").Obj(),
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -120,7 +120,7 @@ func (pl *NodePorts) EventsToRegister() []framework.ClusterEventWithHint {
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
// And, we can remove NodeUpdated event once https://github.com/kubernetes/kubernetes/issues/110175 is solved.
|
||||
// We don't need the QueueingHintFn here because the scheduling of Pods will be always retried with backoff when this Event happens.
|
||||
// (the same as QueueAfterBackoff)
|
||||
// (the same as Queue)
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.Update}},
|
||||
}
|
||||
}
|
||||
@ -130,7 +130,7 @@ func (pl *NodePorts) EventsToRegister() []framework.ClusterEventWithHint {
|
||||
func (pl *NodePorts) isSchedulableAfterPodDeleted(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
deletedPod, _, err := util.As[*v1.Pod](oldObj, nil)
|
||||
if err != nil {
|
||||
return framework.QueueAfterBackoff, err
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
// If the deleted pod is unscheduled, it doesn't make the target pod schedulable.
|
||||
@ -163,8 +163,8 @@ func (pl *NodePorts) isSchedulableAfterPodDeleted(logger klog.Logger, pod *v1.Po
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
logger.V(4).Info("the deleted pod and the target pod have any common port(s), returning QueueAfterBackoff as deleting this Pod may make the Pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod))
|
||||
return framework.QueueAfterBackoff, nil
|
||||
logger.V(4).Info("the deleted pod and the target pod have any common port(s), returning Queue as deleting this Pod may make the Pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
|
@ -313,7 +313,7 @@ func Test_isSchedulableAfterPodDeleted(t *testing.T) {
|
||||
"backoff-wrong-old-object": {
|
||||
pod: podWithHostPort.Obj(),
|
||||
oldObj: "not-a-pod",
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
expectedErr: true,
|
||||
},
|
||||
"skip-queue-on-unscheduled": {
|
||||
@ -334,7 +334,7 @@ func Test_isSchedulableAfterPodDeleted(t *testing.T) {
|
||||
"queue-on-released-hostport": {
|
||||
pod: podWithHostPort.Obj(),
|
||||
oldObj: st.MakePod().Node("fake-node").HostPort(8080).Obj(),
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -61,7 +61,7 @@ func (pl *NodeUnschedulable) isSchedulableAfterNodeChange(logger klog.Logger, po
|
||||
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
|
||||
if err != nil {
|
||||
logger.Error(err, "unexpected objects in isSchedulableAfterNodeChange", "oldObj", oldObj, "newObj", newObj)
|
||||
return framework.QueueAfterBackoff, err
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
originalNodeSchedulable, modifiedNodeSchedulable := false, !modifiedNode.Spec.Unschedulable
|
||||
@ -71,7 +71,7 @@ func (pl *NodeUnschedulable) isSchedulableAfterNodeChange(logger klog.Logger, po
|
||||
|
||||
if !originalNodeSchedulable && modifiedNodeSchedulable {
|
||||
logger.V(4).Info("node was created or updated, pod may be schedulable now", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueAfterBackoff, nil
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
logger.V(4).Info("node was created or updated, but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
|
@ -98,7 +98,7 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
|
||||
name: "backoff-wrong-new-object",
|
||||
pod: &v1.Pod{},
|
||||
newObj: "not-a-node",
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
@ -110,7 +110,7 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
|
||||
},
|
||||
},
|
||||
oldObj: "not-a-node",
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
@ -131,7 +131,7 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
|
||||
Unschedulable: false,
|
||||
},
|
||||
},
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
{
|
||||
name: "skip-unrelated-change",
|
||||
@ -167,7 +167,7 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
|
||||
Unschedulable: true,
|
||||
},
|
||||
},
|
||||
expectedHint: framework.QueueAfterBackoff,
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -85,15 +85,15 @@ type ClusterEventWithHint struct {
|
||||
// and filters out events to reduce useless retry of Pod's scheduling.
|
||||
// It's an optional field. If not set,
|
||||
// the scheduling of Pods will be always retried with backoff when this Event happens.
|
||||
// (the same as QueueAfterBackoff)
|
||||
// (the same as Queue)
|
||||
QueueingHintFn QueueingHintFn
|
||||
}
|
||||
|
||||
// QueueingHintFn returns a hint that signals whether the event can make a Pod,
|
||||
// which was rejected by this plugin in the past scheduling cycle, schedulable or not.
|
||||
// It's called before a Pod gets moved from unschedulableQ to backoffQ or activeQ.
|
||||
// If it returns an error, we'll take the returned QueueingHint as `QueueAfterBackoff` at the caller whatever we returned here so that
|
||||
// we can prevent the Pod from stucking in the unschedulable pod pool.
|
||||
// If it returns an error, we'll take the returned QueueingHint as `Queue` at the caller whatever we returned here so that
|
||||
// we can prevent the Pod from being stuck in the unschedulable pod pool.
|
||||
//
|
||||
// - `pod`: the Pod to be enqueued, which is rejected by this plugin in the past.
|
||||
// - `oldObj` `newObj`: the object involved in that event.
|
||||
@ -109,29 +109,16 @@ const (
|
||||
// scheduling of the pod.
|
||||
QueueSkip QueueingHint = iota
|
||||
|
||||
// QueueAfterBackoff implies that the Pod may be schedulable by the event,
|
||||
// and worth retrying the scheduling again after backoff.
|
||||
QueueAfterBackoff
|
||||
|
||||
// QueueImmediately is returned only when it is highly possible that the Pod gets scheduled in the next scheduling.
|
||||
// You should only return QueueImmediately when there is a high chance that the Pod gets scheduled in the next scheduling.
|
||||
// Otherwise, it's detrimental to scheduling throughput.
|
||||
// For example, when the Pod was rejected as waiting for an external resource to be provisioned, that is directly tied to the Pod,
|
||||
// and the event is that the resource is provisioned, then you can return QueueImmediately.
|
||||
// As a counterexample, when the Pod was rejected due to insufficient memory resource,
|
||||
// and the event is that more memory on Node is available, then you should return QueueAfterBackoff instead of QueueImmediately
|
||||
// because other Pods may be waiting for the same resources and only a few of them would schedule in the next scheduling cycle.
|
||||
QueueImmediately
|
||||
// Queue implies that the Pod may be schedulable by the event.
|
||||
Queue
|
||||
)
|
||||
|
||||
func (s QueueingHint) String() string {
|
||||
switch s {
|
||||
case QueueSkip:
|
||||
return "QueueSkip"
|
||||
case QueueAfterBackoff:
|
||||
return "QueueAfterBackoff"
|
||||
case QueueImmediately:
|
||||
return "QueueImmediately"
|
||||
case Queue:
|
||||
return "Queue"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
@ -179,9 +166,11 @@ type QueuedPodInfo struct {
|
||||
// It shouldn't be updated once initialized. It's used to record the e2e scheduling
|
||||
// latency for a pod.
|
||||
InitialAttemptTimestamp *time.Time
|
||||
// If a Pod failed in a scheduling cycle, record the plugin names it failed by.
|
||||
// UnschedulablePlugins records the plugin names that the Pod failed with Unschedulable or UnschedulableAndUnresolvable status.
|
||||
// It's registered only when the Pod is rejected in PreFilter, Filter, Reserve, or Permit (WaitOnPermit).
|
||||
UnschedulablePlugins sets.Set[string]
|
||||
// PendingPlugins records the plugin names that the Pod failed with Pending status.
|
||||
PendingPlugins sets.Set[string]
|
||||
// Whether the Pod is scheduling gated (by PreEnqueuePlugins) or not.
|
||||
Gated bool
|
||||
}
|
||||
@ -292,8 +281,11 @@ type WeightedAffinityTerm struct {
|
||||
|
||||
// Diagnosis records the details to diagnose a scheduling failure.
|
||||
type Diagnosis struct {
|
||||
NodeToStatusMap NodeToStatusMap
|
||||
NodeToStatusMap NodeToStatusMap
|
||||
// UnschedulablePlugins are plugins that returns Unschedulable or UnschedulableAndUnresolvable.
|
||||
UnschedulablePlugins sets.Set[string]
|
||||
// UnschedulablePlugins are plugins that returns Pending.
|
||||
PendingPlugins sets.Set[string]
|
||||
// PreFilterMsg records the messages returned from PreFilter plugins.
|
||||
PreFilterMsg string
|
||||
// PostFilterMsg records the messages returned from PostFilter plugins.
|
||||
@ -312,6 +304,24 @@ const (
|
||||
NoNodeAvailableMsg = "0/%v nodes are available"
|
||||
)
|
||||
|
||||
func (d *Diagnosis) SetFailedPlugin(sts *Status) {
|
||||
if sts.FailedPlugin() == "" {
|
||||
return
|
||||
}
|
||||
if sts.IsUnschedulable() {
|
||||
if d.UnschedulablePlugins == nil {
|
||||
d.UnschedulablePlugins = sets.New[string]()
|
||||
}
|
||||
d.UnschedulablePlugins.Insert(sts.FailedPlugin())
|
||||
}
|
||||
if sts.Code() == Pending {
|
||||
if d.PendingPlugins == nil {
|
||||
d.PendingPlugins = sets.New[string]()
|
||||
}
|
||||
d.PendingPlugins.Insert(sts.FailedPlugin())
|
||||
}
|
||||
}
|
||||
|
||||
// Error returns detailed information of why the pod failed to fit on each node.
|
||||
// A message format is "0/X nodes are available: <PreFilterMsg>. <FilterMsg>. <PostFilterMsg>."
|
||||
func (f *FitError) Error() string {
|
||||
|
@ -397,72 +397,97 @@ func (p *PriorityQueue) Run(logger klog.Logger) {
|
||||
}, 30*time.Second, p.stop)
|
||||
}
|
||||
|
||||
// isPodWorthRequeuing calls QueueingHintFn of only plugins registered in pInfo.unschedulablePlugins.
|
||||
// If any QueueingHintFn returns QueueImmediately, the scheduling queue is supposed to enqueue this Pod to activeQ.
|
||||
// If no QueueingHintFn returns QueueImmediately, but some return QueueAfterBackoff,
|
||||
// queueingStrategy indicates how the scheduling queue should enqueue the Pod from unschedulable pod pool.
|
||||
type queueingStrategy int
|
||||
|
||||
const (
|
||||
// queueSkip indicates that the scheduling queue should skip requeuing the Pod to activeQ/backoffQ.
|
||||
queueSkip queueingStrategy = iota
|
||||
// queueAfterBackoff indicates that the scheduling queue should requeue the Pod after backoff is completed.
|
||||
queueAfterBackoff
|
||||
// queueImmediately indicates that the scheduling queue should skip backoff and requeue the Pod immediately to activeQ.
|
||||
queueImmediately
|
||||
)
|
||||
|
||||
// isPodWorthRequeuing calls QueueingHintFn of only plugins registered in pInfo.unschedulablePlugins and pInfo.PendingPlugins.
|
||||
//
|
||||
// If any of pInfo.PendingPlugins return Queue,
|
||||
// the scheduling queue is supposed to enqueue this Pod to activeQ, skipping backoffQ.
|
||||
// If any of pInfo.unschedulablePlugins return Queue,
|
||||
// the scheduling queue is supposed to enqueue this Pod to activeQ/backoffQ depending on the remaining backoff time of the Pod.
|
||||
// If all QueueingHintFn returns QueueSkip, the scheduling queue enqueues the Pod back to unschedulable Pod pool
|
||||
// If all QueueingHintFns returns Skip, the scheduling queue enqueues the Pod back to unschedulable Pod pool
|
||||
// because no plugin changes the scheduling result via the event.
|
||||
func (p *PriorityQueue) isPodWorthRequeuing(logger klog.Logger, pInfo *framework.QueuedPodInfo, event framework.ClusterEvent, oldObj, newObj interface{}) framework.QueueingHint {
|
||||
if pInfo.UnschedulablePlugins.Len() == 0 {
|
||||
logger.V(6).Info("Worth requeuing because no unschedulable plugins", "pod", klog.KObj(pInfo.Pod))
|
||||
return framework.QueueAfterBackoff
|
||||
func (p *PriorityQueue) isPodWorthRequeuing(logger klog.Logger, pInfo *framework.QueuedPodInfo, event framework.ClusterEvent, oldObj, newObj interface{}) queueingStrategy {
|
||||
failedPlugins := pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins)
|
||||
if failedPlugins.Len() == 0 {
|
||||
logger.V(6).Info("Worth requeuing because no failed plugins", "pod", klog.KObj(pInfo.Pod))
|
||||
return queueAfterBackoff
|
||||
}
|
||||
|
||||
if event.IsWildCard() {
|
||||
// If the wildcard event is special one as someone wants to force all Pods to move to activeQ/backoffQ.
|
||||
// We return queueAfterBackoff in this case, while resetting all blocked plugins.
|
||||
logger.V(6).Info("Worth requeuing because the event is wildcard", "pod", klog.KObj(pInfo.Pod))
|
||||
return framework.QueueAfterBackoff
|
||||
return queueAfterBackoff
|
||||
}
|
||||
|
||||
hintMap, ok := p.queueingHintMap[pInfo.Pod.Spec.SchedulerName]
|
||||
if !ok {
|
||||
// shouldn't reach here unless bug.
|
||||
logger.Error(nil, "No QueueingHintMap is registered for this profile", "profile", pInfo.Pod.Spec.SchedulerName, "pod", klog.KObj(pInfo.Pod))
|
||||
return framework.QueueAfterBackoff
|
||||
return queueAfterBackoff
|
||||
}
|
||||
|
||||
pod := pInfo.Pod
|
||||
queueHint := framework.QueueSkip
|
||||
queueStrategy := queueSkip
|
||||
for eventToMatch, hintfns := range hintMap {
|
||||
if eventToMatch.Resource != event.Resource || eventToMatch.ActionType&event.ActionType == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, hintfn := range hintfns {
|
||||
if !pInfo.UnschedulablePlugins.Has(hintfn.PluginName) {
|
||||
if !failedPlugins.Has(hintfn.PluginName) {
|
||||
// skip if it's not hintfn from failedPlugins.
|
||||
continue
|
||||
}
|
||||
|
||||
h, err := hintfn.QueueingHintFn(logger, pod, oldObj, newObj)
|
||||
if err != nil {
|
||||
// If the QueueingHintFn returned an error, we should treat the event as QueueAfterBackoff so that we can prevent
|
||||
// the Pod from stucking in the unschedulable pod pool.
|
||||
// If the QueueingHintFn returned an error, we should treat the event as Queue so that we can prevent
|
||||
// the Pod from being stuck in the unschedulable pod pool.
|
||||
oldObjMeta, newObjMeta, asErr := util.As[klog.KMetadata](oldObj, newObj)
|
||||
if asErr != nil {
|
||||
logger.Error(err, "QueueingHintFn returns error", "event", event, "plugin", hintfn.PluginName, "pod", klog.KObj(pod))
|
||||
} else {
|
||||
logger.Error(err, "QueueingHintFn returns error", "event", event, "plugin", hintfn.PluginName, "pod", klog.KObj(pod), "oldObj", klog.KObj(oldObjMeta), "newObj", klog.KObj(newObjMeta))
|
||||
}
|
||||
h = framework.QueueAfterBackoff
|
||||
h = framework.Queue
|
||||
}
|
||||
|
||||
switch h {
|
||||
case framework.QueueSkip:
|
||||
if h == framework.QueueSkip {
|
||||
continue
|
||||
case framework.QueueImmediately:
|
||||
return h
|
||||
case framework.QueueAfterBackoff:
|
||||
// replace queueHint with the returned value,
|
||||
// but continue to other queueHintFn to check because other plugins may want to return QueueImmediately.
|
||||
queueHint = h
|
||||
}
|
||||
|
||||
if pInfo.PendingPlugins.Has(hintfn.PluginName) {
|
||||
// interprets Queue from the Pending plugin as queueImmediately.
|
||||
// We can return immediately because queueImmediately is the highest priority.
|
||||
return queueImmediately
|
||||
}
|
||||
|
||||
// interprets Queue from the unschedulable plugin as queueAfterBackoff.
|
||||
|
||||
if pInfo.PendingPlugins.Len() == 0 {
|
||||
// We can return immediately because no Pending plugins, which only can make queueImmediately, registered in this Pod,
|
||||
// and queueAfterBackoff is the second highest priority.
|
||||
return queueAfterBackoff
|
||||
}
|
||||
|
||||
// We can't return immediately because there are some Pending plugins registered in this Pod.
|
||||
// We need to check if those plugins return Queue or not and if they do, we return queueImmediately.
|
||||
queueStrategy = queueAfterBackoff
|
||||
}
|
||||
}
|
||||
|
||||
// No queueing hint function is registered for this event
|
||||
// or no queueing hint fn returns the value other than QueueSkip.
|
||||
return queueHint
|
||||
return queueStrategy
|
||||
}
|
||||
|
||||
// runPreEnqueuePlugins iterates PreEnqueue function in each registered PreEnqueuePlugin.
|
||||
@ -626,7 +651,7 @@ func (p *PriorityQueue) SchedulingCycle() int64 {
|
||||
|
||||
// determineSchedulingHintForInFlightPod looks at the unschedulable plugins of the given Pod
|
||||
// and determines the scheduling hint for this Pod while checking the events that happened during in-flight.
|
||||
func (p *PriorityQueue) determineSchedulingHintForInFlightPod(logger klog.Logger, pInfo *framework.QueuedPodInfo) framework.QueueingHint {
|
||||
func (p *PriorityQueue) determineSchedulingHintForInFlightPod(logger klog.Logger, pInfo *framework.QueuedPodInfo) queueingStrategy {
|
||||
logger.V(5).Info("Checking events for in-flight pod", "pod", klog.KObj(pInfo.Pod), "unschedulablePlugins", pInfo.UnschedulablePlugins, "inFlightEventsSize", p.inFlightEvents.Len(), "inFlightPodsSize", len(p.inFlightPods))
|
||||
|
||||
// AddUnschedulableIfNotPresent is called with the Pod at the end of scheduling or binding.
|
||||
@ -638,48 +663,50 @@ func (p *PriorityQueue) determineSchedulingHintForInFlightPod(logger klog.Logger
|
||||
// be empty. If it is not, we may have a problem.
|
||||
if len(pInfo.UnschedulablePlugins) != 0 {
|
||||
logger.Error(nil, "In flight Pod isn't found in the scheduling queue. If you see this error log, it's likely a bug in the scheduler.", "pod", klog.KObj(pInfo.Pod))
|
||||
return framework.QueueAfterBackoff
|
||||
return queueAfterBackoff
|
||||
}
|
||||
if p.inFlightEvents.Len() > len(p.inFlightPods) {
|
||||
return framework.QueueAfterBackoff
|
||||
return queueAfterBackoff
|
||||
}
|
||||
return framework.QueueSkip
|
||||
return queueSkip
|
||||
}
|
||||
|
||||
if len(pInfo.UnschedulablePlugins) == 0 {
|
||||
// No unschedulable plugins are associated with this Pod.
|
||||
failedPlugins := pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins)
|
||||
if len(failedPlugins) == 0 {
|
||||
// No failed plugins are associated with this Pod.
|
||||
// Meaning something unusual (a temporal failure on kube-apiserver, etc) happened and this Pod gets moved back to the queue.
|
||||
// In this case, we should retry scheduling it because this Pod may not be retried until the next flush.
|
||||
return framework.QueueAfterBackoff
|
||||
return queueAfterBackoff
|
||||
}
|
||||
|
||||
// check if there is an event that makes this Pod schedulable based on pInfo.UnschedulablePlugins.
|
||||
schedulingHint := framework.QueueSkip
|
||||
queueingStrategy := queueSkip
|
||||
for event := inFlightPod.Next(); event != nil; event = event.Next() {
|
||||
e, ok := event.Value.(*clusterEvent)
|
||||
if !ok {
|
||||
// Must be another pod. Can be ignored.
|
||||
// Must be another in-flight Pod (*v1.Pod). Can be ignored.
|
||||
continue
|
||||
}
|
||||
logger.V(5).Info("Checking event for in-flight pod", "pod", klog.KObj(pInfo.Pod), "event", e.event.Label)
|
||||
|
||||
hint := p.isPodWorthRequeuing(logger, pInfo, e.event, e.oldObj, e.newObj)
|
||||
if hint == framework.QueueSkip {
|
||||
switch p.isPodWorthRequeuing(logger, pInfo, e.event, e.oldObj, e.newObj) {
|
||||
case queueSkip:
|
||||
continue
|
||||
}
|
||||
|
||||
if hint == framework.QueueImmediately {
|
||||
// QueueImmediately is the strongest opinion, we don't need to check other events.
|
||||
schedulingHint = framework.QueueImmediately
|
||||
break
|
||||
}
|
||||
if hint == framework.QueueAfterBackoff {
|
||||
// replace schedulingHint with QueueAfterBackoff,
|
||||
// but continue to check other events because we may find it QueueImmediately with other events.
|
||||
schedulingHint = framework.QueueAfterBackoff
|
||||
case queueImmediately:
|
||||
// queueImmediately is the highest priority.
|
||||
// No need to go through the rest of the events.
|
||||
return queueImmediately
|
||||
case queueAfterBackoff:
|
||||
// replace schedulingHint with queueAfterBackoff
|
||||
queueingStrategy = queueAfterBackoff
|
||||
if pInfo.PendingPlugins.Len() == 0 {
|
||||
// We can return immediately because no Pending plugins, which only can make queueImmediately, registered in this Pod,
|
||||
// and queueAfterBackoff is the second highest priority.
|
||||
return queueAfterBackoff
|
||||
}
|
||||
}
|
||||
}
|
||||
return schedulingHint
|
||||
return queueingStrategy
|
||||
}
|
||||
|
||||
// addUnschedulableIfNotPresentWithoutQueueingHint inserts a pod that cannot be scheduled into
|
||||
@ -693,12 +720,16 @@ func (p *PriorityQueue) addUnschedulableWithoutQueueingHint(logger klog.Logger,
|
||||
// Refresh the timestamp since the pod is re-added.
|
||||
pInfo.Timestamp = p.clock.Now()
|
||||
|
||||
// When the queueing hint is enabled, they are used differently.
|
||||
// But, we use all of them as UnschedulablePlugins when the queueing hint isn't enabled so that we don't break the old behaviour.
|
||||
failedPlugins := pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins)
|
||||
|
||||
// If a move request has been received, move it to the BackoffQ, otherwise move
|
||||
// it to unschedulablePods.
|
||||
for plugin := range pInfo.UnschedulablePlugins {
|
||||
for plugin := range failedPlugins {
|
||||
metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Inc()
|
||||
}
|
||||
if p.moveRequestCycle >= podSchedulingCycle || len(pInfo.UnschedulablePlugins) == 0 {
|
||||
if p.moveRequestCycle >= podSchedulingCycle || len(failedPlugins) == 0 {
|
||||
// Two cases to move a Pod to the active/backoff queue:
|
||||
// - The Pod is rejected by some plugins, but a move request is received after this Pod's scheduling cycle is started.
|
||||
// In this case, the received event may be make Pod schedulable and we should retry scheduling it.
|
||||
@ -753,7 +784,8 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo *
|
||||
|
||||
// If a move request has been received, move it to the BackoffQ, otherwise move
|
||||
// it to unschedulablePods.
|
||||
for plugin := range pInfo.UnschedulablePlugins {
|
||||
failedPlugins := pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins)
|
||||
for plugin := range failedPlugins {
|
||||
metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Inc()
|
||||
}
|
||||
|
||||
@ -762,7 +794,7 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo *
|
||||
|
||||
// In this case, we try to requeue this Pod to activeQ/backoffQ.
|
||||
queue := p.requeuePodViaQueueingHint(logger, pInfo, schedulingHint, ScheduleAttemptFailure)
|
||||
logger.V(3).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", ScheduleAttemptFailure, "queue", queue, "schedulingCycle", podSchedulingCycle, "hint", schedulingHint)
|
||||
logger.V(3).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", ScheduleAttemptFailure, "queue", queue, "schedulingCycle", podSchedulingCycle, "hint", schedulingHint, "unschedulable plugins", failedPlugins)
|
||||
if queue == activeQ {
|
||||
// When the Pod is moved to activeQ, need to let p.cond know so that the Pod will be pop()ed out.
|
||||
p.cond.Broadcast()
|
||||
@ -853,10 +885,11 @@ func (p *PriorityQueue) Pop() (*framework.QueuedPodInfo, error) {
|
||||
}
|
||||
|
||||
// Update metrics and reset the set of unschedulable plugins for the next attempt.
|
||||
for plugin := range pInfo.UnschedulablePlugins {
|
||||
for plugin := range pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins) {
|
||||
metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Dec()
|
||||
}
|
||||
pInfo.UnschedulablePlugins.Clear()
|
||||
pInfo.PendingPlugins.Clear()
|
||||
|
||||
return pInfo, nil
|
||||
}
|
||||
@ -1067,15 +1100,15 @@ func (p *PriorityQueue) MoveAllToActiveOrBackoffQueue(logger klog.Logger, event
|
||||
// It returns the queue name Pod goes.
|
||||
//
|
||||
// NOTE: this function assumes lock has been acquired in caller
|
||||
func (p *PriorityQueue) requeuePodViaQueueingHint(logger klog.Logger, pInfo *framework.QueuedPodInfo, schedulingHint framework.QueueingHint, event string) string {
|
||||
if schedulingHint == framework.QueueSkip {
|
||||
func (p *PriorityQueue) requeuePodViaQueueingHint(logger klog.Logger, pInfo *framework.QueuedPodInfo, strategy queueingStrategy, event string) string {
|
||||
if strategy == queueSkip {
|
||||
p.unschedulablePods.addOrUpdate(pInfo)
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", event).Inc()
|
||||
return unschedulablePods
|
||||
}
|
||||
|
||||
pod := pInfo.Pod
|
||||
if schedulingHint == framework.QueueAfterBackoff && p.isPodBackingoff(pInfo) {
|
||||
if strategy == queueAfterBackoff && p.isPodBackingoff(pInfo) {
|
||||
if err := p.podBackoffQ.Add(pInfo); err != nil {
|
||||
logger.Error(err, "Error adding pod to the backoff queue, queue this Pod to unschedulable pod pool", "pod", klog.KObj(pod))
|
||||
p.unschedulablePods.addOrUpdate(pInfo)
|
||||
@ -1086,7 +1119,7 @@ func (p *PriorityQueue) requeuePodViaQueueingHint(logger klog.Logger, pInfo *fra
|
||||
return backoffQ
|
||||
}
|
||||
|
||||
// Reach here if schedulingHint is QueueImmediately, or schedulingHint is QueueAfterBackoff but the pod is not backing off.
|
||||
// Reach here if schedulingHint is QueueImmediately, or schedulingHint is Queue but the pod is not backing off.
|
||||
|
||||
added, err := p.addToActiveQ(logger, pInfo)
|
||||
if err != nil {
|
||||
@ -1111,7 +1144,7 @@ func (p *PriorityQueue) movePodsToActiveOrBackoffQueue(logger klog.Logger, podIn
|
||||
activated := false
|
||||
for _, pInfo := range podInfoList {
|
||||
schedulingHint := p.isPodWorthRequeuing(logger, pInfo, event, oldObj, newObj)
|
||||
if schedulingHint == framework.QueueSkip {
|
||||
if schedulingHint == queueSkip {
|
||||
// QueueingHintFn determined that this Pod isn't worth putting to activeQ or backoffQ by this event.
|
||||
logger.V(5).Info("Event is not making pod schedulable", "pod", klog.KObj(pInfo.Pod), "event", event.Label)
|
||||
continue
|
||||
|
@ -87,13 +87,10 @@ var (
|
||||
cmpopts.IgnoreFields(nominator{}, "podLister", "lock"),
|
||||
}
|
||||
|
||||
queueHintReturnQueueAfterBackoff = func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
return framework.QueueAfterBackoff, nil
|
||||
queueHintReturnQueue = func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
return framework.Queue, nil
|
||||
}
|
||||
queueHintReturnQueueImmediately = func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
return framework.QueueImmediately, nil
|
||||
}
|
||||
queueHintReturnQueueSkip = func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
queueHintReturnSkip = func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
)
|
||||
@ -244,7 +241,7 @@ func Test_InFlightPods(t *testing.T) {
|
||||
AssignedPodAdd: {
|
||||
{
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueSkip,
|
||||
QueueingHintFn: queueHintReturnSkip,
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -340,7 +337,7 @@ func Test_InFlightPods(t *testing.T) {
|
||||
AssignedPodAdd: {
|
||||
{
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueSkip,
|
||||
QueueingHintFn: queueHintReturnSkip,
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -364,7 +361,7 @@ func Test_InFlightPods(t *testing.T) {
|
||||
AssignedPodAdd: {
|
||||
{
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueImmediately,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -389,14 +386,14 @@ func Test_InFlightPods(t *testing.T) {
|
||||
{
|
||||
// It will be ignored because the event is not NodeAdd.
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueImmediately,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "pod is enqueued to unschedulable pod pool because the failed plugin has a hint fn but it returns QueueSkip",
|
||||
name: "pod is enqueued to unschedulable pod pool because the failed plugin has a hint fn but it returns Skip",
|
||||
isSchedulingQueueHintEnabled: true,
|
||||
initialPods: []*v1.Pod{pod},
|
||||
actions: []action{
|
||||
@ -414,20 +411,24 @@ func Test_InFlightPods(t *testing.T) {
|
||||
AssignedPodAdd: {
|
||||
{
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueSkip,
|
||||
QueueingHintFn: queueHintReturnSkip,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "pod is enqueued to activeQ because the failed plugin has a hint fn and it returns QueueImmediately",
|
||||
name: "pod is enqueued to activeQ because the Pending plugins has a hint fn and it returns Queue",
|
||||
isSchedulingQueueHintEnabled: true,
|
||||
initialPods: []*v1.Pod{pod},
|
||||
actions: []action{
|
||||
{podPopped: pod},
|
||||
{eventHappens: &AssignedPodAdd},
|
||||
{podEnqueued: newQueuedPodInfoForLookup(pod, "fooPlugin1", "fooPlugin2", "fooPlugin3")},
|
||||
{podEnqueued: &framework.QueuedPodInfo{
|
||||
PodInfo: mustNewPodInfo(pod),
|
||||
UnschedulablePlugins: sets.New("fooPlugin2", "fooPlugin3"),
|
||||
PendingPlugins: sets.New("fooPlugin1"),
|
||||
}},
|
||||
},
|
||||
wantActiveQPodNames: []string{"targetpod"},
|
||||
wantInFlightPods: nil,
|
||||
@ -436,26 +437,24 @@ func Test_InFlightPods(t *testing.T) {
|
||||
"": {
|
||||
AssignedPodAdd: {
|
||||
{
|
||||
// it will be ignored because the hint fn returns QueueSkip that is weaker than queueHintReturnQueueImmediately from fooPlugin1.
|
||||
PluginName: "fooPlugin3",
|
||||
QueueingHintFn: queueHintReturnQueueSkip,
|
||||
QueueingHintFn: queueHintReturnSkip,
|
||||
},
|
||||
{
|
||||
// it will be ignored because the hint fn returns QueueAfterBackoff that is weaker than queueHintReturnQueueImmediately from fooPlugin1.
|
||||
PluginName: "fooPlugin2",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
{
|
||||
// The hint fn tells that this event makes a Pod scheudlable immediately.
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueImmediately,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "pod is enqueued to backoffQ because the failed plugin has a hint fn and it returns QueueAfterBackoff",
|
||||
name: "pod is enqueued to backoffQ because the failed plugin has a hint fn and it returns Queue",
|
||||
isSchedulingQueueHintEnabled: true,
|
||||
initialPods: []*v1.Pod{pod},
|
||||
actions: []action{
|
||||
@ -470,21 +469,21 @@ func Test_InFlightPods(t *testing.T) {
|
||||
"": {
|
||||
AssignedPodAdd: {
|
||||
{
|
||||
// it will be ignored because the hint fn returns QueueSkip that is weaker than queueHintReturnQueueAfterBackoff from fooPlugin1.
|
||||
// it will be ignored because the hint fn returns Skip that is weaker than queueHintReturnQueue from fooPlugin1.
|
||||
PluginName: "fooPlugin2",
|
||||
QueueingHintFn: queueHintReturnQueueSkip,
|
||||
QueueingHintFn: queueHintReturnSkip,
|
||||
},
|
||||
{
|
||||
// The hint fn tells that this event makes a Pod scheudlable.
|
||||
// The hint fn tells that this event makes a Pod schedulable.
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "pod is enqueued to activeQ because the failed plugin has a hint fn and it returns QueueImmediately for a concurrent event that was received while some other pod was in flight",
|
||||
name: "pod is enqueued to activeQ because the failed plugin has a hint fn and it returns Queue for a concurrent event that was received while some other pod was in flight",
|
||||
isSchedulingQueueHintEnabled: true,
|
||||
initialPods: []*v1.Pod{pod, pod2},
|
||||
actions: []action{
|
||||
@ -501,7 +500,8 @@ func Test_InFlightPods(t *testing.T) {
|
||||
}},
|
||||
{callback: func(t *testing.T, q *PriorityQueue) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
poppedPod2.UnschedulablePlugins = sets.New("fooPlugin1", "fooPlugin2", "fooPlugin3")
|
||||
poppedPod2.UnschedulablePlugins = sets.New("fooPlugin2", "fooPlugin3")
|
||||
poppedPod2.PendingPlugins = sets.New("fooPlugin1")
|
||||
err := q.AddUnschedulableIfNotPresent(logger, poppedPod2, q.SchedulingCycle())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
|
||||
@ -517,32 +517,34 @@ func Test_InFlightPods(t *testing.T) {
|
||||
{
|
||||
// it will be ignored because the hint fn returns QueueSkip that is weaker than queueHintReturnQueueImmediately from fooPlugin1.
|
||||
PluginName: "fooPlugin3",
|
||||
QueueingHintFn: queueHintReturnQueueSkip,
|
||||
QueueingHintFn: queueHintReturnSkip,
|
||||
},
|
||||
{
|
||||
// it will be ignored because the hint fn returns QueueAfterBackoff that is weaker than queueHintReturnQueueImmediately from fooPlugin1.
|
||||
// it will be ignored because the fooPlugin2 is registered in UnschedulablePlugins and it's interpret as Queue that is weaker than QueueImmediately from fooPlugin1.
|
||||
PluginName: "fooPlugin2",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
{
|
||||
// The hint fn tells that this event makes a Pod scheudlable immediately.
|
||||
// The hint fn tells that this event makes a Pod scheudlable.
|
||||
// Given fooPlugin1 is registered as Pendings, we interpret Queue as queueImmediately.
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueImmediately,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "popped pod must have empty UnschedulablePlugins",
|
||||
name: "popped pod must have empty UnschedulablePlugins and PendingPlugins",
|
||||
isSchedulingQueueHintEnabled: true,
|
||||
initialPods: []*v1.Pod{pod},
|
||||
actions: []action{
|
||||
{callback: func(t *testing.T, q *PriorityQueue) { poppedPod = popPod(t, q, pod) }},
|
||||
{callback: func(t *testing.T, q *PriorityQueue) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
// Unschedulable.
|
||||
poppedPod.UnschedulablePlugins = sets.New("fooPlugin1")
|
||||
// Unschedulable due to PendingPlugins.
|
||||
poppedPod.PendingPlugins = sets.New("fooPlugin1")
|
||||
poppedPod.UnschedulablePlugins = sets.New("fooPlugin2")
|
||||
if err := q.AddUnschedulableIfNotPresent(logger, poppedPod, q.SchedulingCycle()); err != nil {
|
||||
t.Errorf("Unexpected error from AddUnschedulableIfNotPresent: %v", err)
|
||||
}
|
||||
@ -568,7 +570,7 @@ func Test_InFlightPods(t *testing.T) {
|
||||
{
|
||||
// The hint fn tells that this event makes a Pod scheudlable immediately.
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueImmediately,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -694,9 +696,9 @@ func TestPop(t *testing.T) {
|
||||
"": {
|
||||
PvAdd: {
|
||||
{
|
||||
// The hint fn tells that this event makes a Pod scheudlable immediately.
|
||||
// The hint fn tells that this event makes a Pod scheudlable.
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueImmediately,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -713,7 +715,8 @@ func TestPop(t *testing.T) {
|
||||
|
||||
// Simulate failed attempt that makes the pod unschedulable.
|
||||
poppedPod := popPod(t, q, pod)
|
||||
poppedPod.UnschedulablePlugins = sets.New("fooPlugin1")
|
||||
// We put register the plugin to PendingPlugins so that it's interpreted as queueImmediately and skip backoff.
|
||||
poppedPod.PendingPlugins = sets.New("fooPlugin1")
|
||||
if err := q.AddUnschedulableIfNotPresent(logger, poppedPod, q.SchedulingCycle()); err != nil {
|
||||
t.Errorf("Unexpected error from AddUnschedulableIfNotPresent: %v", err)
|
||||
}
|
||||
@ -723,8 +726,8 @@ func TestPop(t *testing.T) {
|
||||
|
||||
// Now check result of Pop.
|
||||
poppedPod = popPod(t, q, pod)
|
||||
if len(poppedPod.UnschedulablePlugins) > 0 {
|
||||
t.Errorf("QueuedPodInfo from Pop should have empty UnschedulablePlugins, got instead: %+v", poppedPod)
|
||||
if len(poppedPod.PendingPlugins) > 0 {
|
||||
t.Errorf("QueuedPodInfo from Pop should have empty PendingPlugins, got instead: %+v", poppedPod)
|
||||
}
|
||||
})
|
||||
}
|
||||
@ -1221,7 +1224,7 @@ func BenchmarkMoveAllToActiveOrBackoffQueue(b *testing.B) {
|
||||
if (k+1)%(j+1) == 0 {
|
||||
m[""][events[j]] = append(m[""][events[j]], &QueueingHintFunction{
|
||||
PluginName: plugins[k],
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -1286,28 +1289,28 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueueWithQueueingHint(t *testing.
|
||||
expectedQ string
|
||||
}{
|
||||
{
|
||||
name: "QueueImmediately queues pod to activeQ",
|
||||
podInfo: &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p)},
|
||||
hint: queueHintReturnQueueImmediately,
|
||||
name: "Queue queues pod to activeQ",
|
||||
podInfo: &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), PendingPlugins: sets.New("foo")},
|
||||
hint: queueHintReturnQueue,
|
||||
expectedQ: activeQ,
|
||||
},
|
||||
{
|
||||
name: "QueueAfterBackoff queues pod to backoffQ if Pod is backing off",
|
||||
podInfo: &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p)},
|
||||
hint: queueHintReturnQueueAfterBackoff,
|
||||
name: "Queue queues pod to backoffQ if Pod is backing off",
|
||||
podInfo: &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), UnschedulablePlugins: sets.New("foo")},
|
||||
hint: queueHintReturnQueue,
|
||||
expectedQ: backoffQ,
|
||||
},
|
||||
{
|
||||
name: "QueueAfterBackoff queues pod to activeQ if Pod is not backing off",
|
||||
podInfo: &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p)},
|
||||
hint: queueHintReturnQueueAfterBackoff,
|
||||
name: "Queue queues pod to activeQ if Pod is not backing off",
|
||||
podInfo: &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), UnschedulablePlugins: sets.New("foo")},
|
||||
hint: queueHintReturnQueue,
|
||||
duration: DefaultPodInitialBackoffDuration, // backoff is finished
|
||||
expectedQ: activeQ,
|
||||
},
|
||||
{
|
||||
name: "QueueSkip queues pod to unschedulablePods",
|
||||
podInfo: &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p)},
|
||||
hint: queueHintReturnQueueSkip,
|
||||
name: "Skip queues pod to unschedulablePods",
|
||||
podInfo: &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), UnschedulablePlugins: sets.New("foo")},
|
||||
hint: queueHintReturnSkip,
|
||||
expectedQ: unschedulablePods,
|
||||
},
|
||||
}
|
||||
@ -1362,7 +1365,7 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueue(t *testing.T) {
|
||||
m[""][NodeAdd] = []*QueueingHintFunction{
|
||||
{
|
||||
PluginName: "fooPlugin",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
}
|
||||
q := NewTestQueue(ctx, newDefaultQueueSort(), WithClock(c), WithQueueingHintMapPerProfile(m))
|
||||
@ -1544,7 +1547,7 @@ func TestPriorityQueue_AssignedPodAdded(t *testing.T) {
|
||||
m[""][AssignedPodAdd] = []*QueueingHintFunction{
|
||||
{
|
||||
PluginName: "fakePlugin",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
}
|
||||
q := NewTestQueue(ctx, newDefaultQueueSort(), WithClock(c), WithQueueingHintMapPerProfile(m))
|
||||
@ -2139,7 +2142,7 @@ func TestHighPriorityFlushUnschedulablePodsLeftover(t *testing.T) {
|
||||
m[""][NodeAdd] = []*QueueingHintFunction{
|
||||
{
|
||||
PluginName: "fakePlugin",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
}
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
@ -3180,18 +3183,14 @@ func mustNewPodInfo(pod *v1.Pod) *framework.PodInfo {
|
||||
// Test_isPodWorthRequeuing tests isPodWorthRequeuing function.
|
||||
func Test_isPodWorthRequeuing(t *testing.T) {
|
||||
count := 0
|
||||
queueHintReturnQueueImmediately := func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
queueHintReturnQueue := func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
count++
|
||||
return framework.QueueImmediately, nil
|
||||
return framework.Queue, nil
|
||||
}
|
||||
queueHintReturnQueueSkip := func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
queueHintReturnSkip := func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
count++
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
queueHintReturnQueueAfterBackoff := func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
count++
|
||||
return framework.QueueAfterBackoff, nil
|
||||
}
|
||||
queueHintReturnErr := func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
count++
|
||||
return framework.QueueSkip, fmt.Errorf("unexpected error")
|
||||
@ -3203,12 +3202,12 @@ func Test_isPodWorthRequeuing(t *testing.T) {
|
||||
event framework.ClusterEvent
|
||||
oldObj interface{}
|
||||
newObj interface{}
|
||||
expected framework.QueueingHint
|
||||
expected queueingStrategy
|
||||
expectedExecutionCount int // expected total execution count of queueing hint function
|
||||
queueingHintMap QueueingHintMapPerProfile
|
||||
}{
|
||||
{
|
||||
name: "return QueueAfterBackoff when no queueing hint function is registered for the event",
|
||||
name: "return Queue when no queueing hint function is registered for the event",
|
||||
podInfo: &framework.QueuedPodInfo{
|
||||
UnschedulablePlugins: sets.New("fooPlugin1"),
|
||||
PodInfo: mustNewPodInfo(st.MakePod().Name("pod1").Namespace("ns1").UID("1").Obj()),
|
||||
@ -3216,7 +3215,7 @@ func Test_isPodWorthRequeuing(t *testing.T) {
|
||||
event: NodeAdd,
|
||||
oldObj: nil,
|
||||
newObj: st.MakeNode().Obj(),
|
||||
expected: framework.QueueSkip,
|
||||
expected: queueSkip,
|
||||
expectedExecutionCount: 0,
|
||||
queueingHintMap: QueueingHintMapPerProfile{
|
||||
"": {
|
||||
@ -3225,14 +3224,14 @@ func Test_isPodWorthRequeuing(t *testing.T) {
|
||||
{
|
||||
// It will be ignored because the event is not NodeAdd.
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueImmediately,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Treat the event as QueueAfterBackoff when QueueHintFn returns error",
|
||||
name: "Treat the event as Queue when QueueHintFn returns error",
|
||||
podInfo: &framework.QueuedPodInfo{
|
||||
UnschedulablePlugins: sets.New("fooPlugin1"),
|
||||
PodInfo: mustNewPodInfo(st.MakePod().Name("pod1").Namespace("ns1").UID("1").Obj()),
|
||||
@ -3240,7 +3239,7 @@ func Test_isPodWorthRequeuing(t *testing.T) {
|
||||
event: NodeAdd,
|
||||
oldObj: nil,
|
||||
newObj: st.MakeNode().Obj(),
|
||||
expected: framework.QueueAfterBackoff,
|
||||
expected: queueAfterBackoff,
|
||||
expectedExecutionCount: 1,
|
||||
queueingHintMap: QueueingHintMapPerProfile{
|
||||
"": {
|
||||
@ -3254,7 +3253,7 @@ func Test_isPodWorthRequeuing(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "return QueueAfterBackoff when the event is wildcard",
|
||||
name: "return Queue when the event is wildcard",
|
||||
podInfo: &framework.QueuedPodInfo{
|
||||
UnschedulablePlugins: sets.New("fooPlugin1"),
|
||||
PodInfo: mustNewPodInfo(st.MakePod().Name("pod1").Namespace("ns1").UID("1").Obj()),
|
||||
@ -3262,78 +3261,40 @@ func Test_isPodWorthRequeuing(t *testing.T) {
|
||||
event: WildCardEvent,
|
||||
oldObj: nil,
|
||||
newObj: st.MakeNode().Obj(),
|
||||
expected: framework.QueueAfterBackoff,
|
||||
expected: queueAfterBackoff,
|
||||
expectedExecutionCount: 0,
|
||||
queueingHintMap: QueueingHintMapPerProfile{},
|
||||
},
|
||||
{
|
||||
name: "QueueImmediately is the highest priority",
|
||||
name: "interprets Queue from the Pending plugin as queueImmediately",
|
||||
podInfo: &framework.QueuedPodInfo{
|
||||
UnschedulablePlugins: sets.New("fooPlugin1", "fooPlugin2", "fooPlugin3", "fooPlugin4", "fooPlugin5"),
|
||||
UnschedulablePlugins: sets.New("fooPlugin1", "fooPlugin3"),
|
||||
PendingPlugins: sets.New("fooPlugin2"),
|
||||
PodInfo: mustNewPodInfo(st.MakePod().Name("pod1").Namespace("ns1").UID("1").Obj()),
|
||||
},
|
||||
event: NodeAdd,
|
||||
oldObj: nil,
|
||||
newObj: st.MakeNode().Obj(),
|
||||
expected: framework.QueueImmediately,
|
||||
expectedExecutionCount: 3,
|
||||
newObj: st.MakeNode().Node,
|
||||
expected: queueImmediately,
|
||||
expectedExecutionCount: 2,
|
||||
queueingHintMap: QueueingHintMapPerProfile{
|
||||
"": {
|
||||
NodeAdd: {
|
||||
{
|
||||
// executed
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
PluginName: "fooPlugin1",
|
||||
// It returns Queue and it's interpreted as queueAfterBackoff.
|
||||
// But, the function continues to run other hints because the Pod has PendingPlugins, which can result in queueImmediately.
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
{
|
||||
// executed
|
||||
PluginName: "fooPlugin2",
|
||||
QueueingHintFn: queueHintReturnErr,
|
||||
},
|
||||
{
|
||||
// executed
|
||||
// But, no more queueing hint function is executed
|
||||
// because the highest priority is QueueImmediately.
|
||||
PluginName: "fooPlugin3",
|
||||
QueueingHintFn: queueHintReturnQueueImmediately,
|
||||
},
|
||||
{
|
||||
PluginName: "fooPlugin4",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
},
|
||||
{
|
||||
PluginName: "fooPlugin5",
|
||||
QueueingHintFn: queueHintReturnQueueSkip,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "QueueSkip is the lowest priority",
|
||||
podInfo: &framework.QueuedPodInfo{
|
||||
UnschedulablePlugins: sets.New("fooPlugin1", "fooPlugin2", "fooPlugin3", "fooPlugin4"),
|
||||
PodInfo: mustNewPodInfo(st.MakePod().Name("pod1").Namespace("ns1").UID("1").Obj()),
|
||||
},
|
||||
event: NodeAdd,
|
||||
oldObj: nil,
|
||||
newObj: st.MakeNode().Obj(),
|
||||
expected: framework.QueueAfterBackoff,
|
||||
expectedExecutionCount: 4,
|
||||
queueingHintMap: QueueingHintMapPerProfile{
|
||||
"": {
|
||||
NodeAdd: {
|
||||
{
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
},
|
||||
{
|
||||
PluginName: "fooPlugin2",
|
||||
QueueingHintFn: queueHintReturnQueueSkip,
|
||||
PluginName: "fooPlugin2",
|
||||
// It's interpreted as queueImmediately.
|
||||
// The function doesn't run other hints because queueImmediately is the highest priority.
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
{
|
||||
PluginName: "fooPlugin3",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
{
|
||||
PluginName: "fooPlugin4",
|
||||
@ -3344,7 +3305,7 @@ func Test_isPodWorthRequeuing(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Queueing hint function that isn't from the plugin, that is in the UnschedulablePlugins, is ignored",
|
||||
name: "interprets Queue from the Unschedulable plugin as queueAfterBackoff",
|
||||
podInfo: &framework.QueuedPodInfo{
|
||||
UnschedulablePlugins: sets.New("fooPlugin1", "fooPlugin2"),
|
||||
PodInfo: mustNewPodInfo(st.MakePod().Name("pod1").Namespace("ns1").UID("1").Obj()),
|
||||
@ -3352,29 +3313,57 @@ func Test_isPodWorthRequeuing(t *testing.T) {
|
||||
event: NodeAdd,
|
||||
oldObj: nil,
|
||||
newObj: st.MakeNode().Obj(),
|
||||
expected: framework.QueueAfterBackoff,
|
||||
expected: queueAfterBackoff,
|
||||
expectedExecutionCount: 2,
|
||||
queueingHintMap: QueueingHintMapPerProfile{
|
||||
"": {
|
||||
NodeAdd: {
|
||||
{
|
||||
// Skip will be ignored
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnSkip,
|
||||
},
|
||||
{
|
||||
// Skip will be ignored
|
||||
PluginName: "fooPlugin2",
|
||||
QueueingHintFn: queueHintReturnQueueSkip,
|
||||
},
|
||||
{
|
||||
PluginName: "fooPlugin3",
|
||||
QueueingHintFn: queueHintReturnQueueImmediately, // It'll be ignored.
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "If event is specific Node update event, queueing hint function for NodeUpdate/UpdateNodeLabel is executed",
|
||||
name: "Queueing hint function that isn't from the plugin in UnschedulablePlugins/PendingPlugins is ignored",
|
||||
podInfo: &framework.QueuedPodInfo{
|
||||
UnschedulablePlugins: sets.New("fooPlugin1", "fooPlugin2"),
|
||||
PodInfo: mustNewPodInfo(st.MakePod().Name("pod1").Namespace("ns1").UID("1").Obj()),
|
||||
},
|
||||
event: NodeAdd,
|
||||
oldObj: nil,
|
||||
newObj: st.MakeNode().Node,
|
||||
expected: queueSkip,
|
||||
expectedExecutionCount: 2,
|
||||
queueingHintMap: QueueingHintMapPerProfile{
|
||||
"": {
|
||||
NodeAdd: {
|
||||
{
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnSkip,
|
||||
},
|
||||
{
|
||||
PluginName: "fooPlugin2",
|
||||
QueueingHintFn: queueHintReturnSkip,
|
||||
},
|
||||
{
|
||||
PluginName: "fooPlugin3",
|
||||
QueueingHintFn: queueHintReturnQueue, // It'll be ignored.
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "If event is specific Node update event, queueing hint function for NodeUpdate/UpdateNodeLabel is also executed",
|
||||
podInfo: &framework.QueuedPodInfo{
|
||||
UnschedulablePlugins: sets.New("fooPlugin1", "fooPlugin2"),
|
||||
PodInfo: mustNewPodInfo(st.MakePod().Name("pod1").Namespace("ns1").UID("1").Obj()),
|
||||
@ -3382,30 +3371,32 @@ func Test_isPodWorthRequeuing(t *testing.T) {
|
||||
event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.UpdateNodeLabel},
|
||||
oldObj: nil,
|
||||
newObj: st.MakeNode().Obj(),
|
||||
expected: framework.QueueAfterBackoff,
|
||||
expectedExecutionCount: 3,
|
||||
expected: queueAfterBackoff,
|
||||
expectedExecutionCount: 1,
|
||||
queueingHintMap: QueueingHintMapPerProfile{
|
||||
"": {
|
||||
framework.ClusterEvent{Resource: framework.Node, ActionType: framework.UpdateNodeLabel}: {
|
||||
{
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
PluginName: "fooPlugin1",
|
||||
// It's only executed and interpreted as queueAfterBackoff.
|
||||
// The function doesn't run other hints because this Pod doesn't have PendingPlugins.
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
{
|
||||
PluginName: "fooPlugin2",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Update}: {
|
||||
{
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
NodeAdd: { // not executed because NodeAdd is unrelated.
|
||||
{
|
||||
PluginName: "fooPlugin1",
|
||||
QueueingHintFn: queueHintReturnQueueAfterBackoff,
|
||||
QueueingHintFn: queueHintReturnQueue,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -211,10 +211,10 @@ func (sched *Scheduler) schedulingCycle(
|
||||
NumAllNodes: 1,
|
||||
Pod: pod,
|
||||
Diagnosis: framework.Diagnosis{
|
||||
NodeToStatusMap: framework.NodeToStatusMap{scheduleResult.SuggestedHost: sts},
|
||||
UnschedulablePlugins: sets.New(sts.FailedPlugin()),
|
||||
NodeToStatusMap: framework.NodeToStatusMap{scheduleResult.SuggestedHost: sts},
|
||||
},
|
||||
}
|
||||
fitErr.Diagnosis.SetFailedPlugin(sts)
|
||||
return ScheduleResult{nominatingInfo: clearNominatedNode}, assumedPodInfo, framework.NewStatus(sts.Code()).WithError(fitErr)
|
||||
}
|
||||
return ScheduleResult{nominatingInfo: clearNominatedNode}, assumedPodInfo, sts
|
||||
@ -234,10 +234,10 @@ func (sched *Scheduler) schedulingCycle(
|
||||
NumAllNodes: 1,
|
||||
Pod: pod,
|
||||
Diagnosis: framework.Diagnosis{
|
||||
NodeToStatusMap: framework.NodeToStatusMap{scheduleResult.SuggestedHost: runPermitStatus},
|
||||
UnschedulablePlugins: sets.New(runPermitStatus.FailedPlugin()),
|
||||
NodeToStatusMap: framework.NodeToStatusMap{scheduleResult.SuggestedHost: runPermitStatus},
|
||||
},
|
||||
}
|
||||
fitErr.Diagnosis.SetFailedPlugin(runPermitStatus)
|
||||
return ScheduleResult{nominatingInfo: clearNominatedNode}, assumedPodInfo, framework.NewStatus(runPermitStatus.Code()).WithError(fitErr)
|
||||
}
|
||||
|
||||
@ -435,8 +435,7 @@ func (sched *Scheduler) schedulePod(ctx context.Context, fwk framework.Framework
|
||||
func (sched *Scheduler) findNodesThatFitPod(ctx context.Context, fwk framework.Framework, state *framework.CycleState, pod *v1.Pod) ([]*v1.Node, framework.Diagnosis, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
diagnosis := framework.Diagnosis{
|
||||
NodeToStatusMap: make(framework.NodeToStatusMap),
|
||||
UnschedulablePlugins: sets.New[string](),
|
||||
NodeToStatusMap: make(framework.NodeToStatusMap),
|
||||
}
|
||||
|
||||
allNodes, err := sched.nodeInfoSnapshot.NodeInfos().List()
|
||||
@ -459,10 +458,7 @@ func (sched *Scheduler) findNodesThatFitPod(ctx context.Context, fwk framework.F
|
||||
msg := s.Message()
|
||||
diagnosis.PreFilterMsg = msg
|
||||
logger.V(5).Info("Status after running PreFilter plugins for pod", "pod", klog.KObj(pod), "status", msg)
|
||||
// Status satisfying IsUnschedulable() gets injected into diagnosis.UnschedulablePlugins.
|
||||
if s.FailedPlugin() != "" {
|
||||
diagnosis.UnschedulablePlugins.Insert(s.FailedPlugin())
|
||||
}
|
||||
diagnosis.SetFailedPlugin(s)
|
||||
return nil, diagnosis, nil
|
||||
}
|
||||
|
||||
@ -490,7 +486,7 @@ func (sched *Scheduler) findNodesThatFitPod(ctx context.Context, fwk framework.F
|
||||
nodes = append(nodes, nInfo)
|
||||
}
|
||||
}
|
||||
feasibleNodes, err := sched.findNodesThatPassFilters(ctx, fwk, state, pod, diagnosis, nodes)
|
||||
feasibleNodes, err := sched.findNodesThatPassFilters(ctx, fwk, state, pod, &diagnosis, nodes)
|
||||
// always try to update the sched.nextStartNodeIndex regardless of whether an error has occurred
|
||||
// this is helpful to make sure that all the nodes have a chance to be searched
|
||||
processedNodes := len(feasibleNodes) + len(diagnosis.NodeToStatusMap)
|
||||
@ -513,7 +509,7 @@ func (sched *Scheduler) evaluateNominatedNode(ctx context.Context, pod *v1.Pod,
|
||||
return nil, err
|
||||
}
|
||||
node := []*framework.NodeInfo{nodeInfo}
|
||||
feasibleNodes, err := sched.findNodesThatPassFilters(ctx, fwk, state, pod, diagnosis, node)
|
||||
feasibleNodes, err := sched.findNodesThatPassFilters(ctx, fwk, state, pod, &diagnosis, node)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -532,7 +528,7 @@ func (sched *Scheduler) findNodesThatPassFilters(
|
||||
fwk framework.Framework,
|
||||
state *framework.CycleState,
|
||||
pod *v1.Pod,
|
||||
diagnosis framework.Diagnosis,
|
||||
diagnosis *framework.Diagnosis,
|
||||
nodes []*framework.NodeInfo) ([]*v1.Node, error) {
|
||||
numAllNodes := len(nodes)
|
||||
numNodesToFind := sched.numFeasibleNodesToFind(fwk.PercentageOfNodesToScore(), int32(numAllNodes))
|
||||
@ -573,7 +569,7 @@ func (sched *Scheduler) findNodesThatPassFilters(
|
||||
} else {
|
||||
statusesLock.Lock()
|
||||
diagnosis.NodeToStatusMap[nodeInfo.Node().Name] = status
|
||||
diagnosis.UnschedulablePlugins.Insert(status.FailedPlugin())
|
||||
diagnosis.SetFailedPlugin(status)
|
||||
statusesLock.Unlock()
|
||||
}
|
||||
}
|
||||
@ -982,6 +978,7 @@ func (sched *Scheduler) handleSchedulingFailure(ctx context.Context, fwk framewo
|
||||
logger.V(2).Info("Unable to schedule pod; no nodes are registered to the cluster; waiting", "pod", klog.KObj(pod))
|
||||
} else if fitError, ok := err.(*framework.FitError); ok { // Inject UnschedulablePlugins to PodInfo, which will be used later for moving Pods between queues efficiently.
|
||||
podInfo.UnschedulablePlugins = fitError.Diagnosis.UnschedulablePlugins
|
||||
podInfo.PendingPlugins = fitError.Diagnosis.PendingPlugins
|
||||
logger.V(2).Info("Unable to schedule pod; no fit; waiting", "pod", klog.KObj(pod), "err", errMsg)
|
||||
} else if apierrors.IsNotFound(err) {
|
||||
logger.V(2).Info("Unable to schedule pod, possibly due to node not found; waiting", "pod", klog.KObj(pod), "err", errMsg)
|
||||
|
@ -363,9 +363,9 @@ func New(ctx context.Context,
|
||||
}
|
||||
|
||||
// defaultQueueingHintFn is the default queueing hint function.
|
||||
// It always returns QueueAfterBackoff as the queueing hint.
|
||||
// It always returns Queue as the queueing hint.
|
||||
var defaultQueueingHintFn = func(_ klog.Logger, _ *v1.Pod, _, _ interface{}) (framework.QueueingHint, error) {
|
||||
return framework.QueueAfterBackoff, nil
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
func buildQueueingHintMap(es []framework.EnqueueExtensions) internalqueue.QueueingHintMap {
|
||||
|
@ -544,7 +544,7 @@ func TestRequeueByPermitRejection(t *testing.T) {
|
||||
frameworkHandler: fh,
|
||||
schedulingHint: func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
queueingHintCalledCounter++
|
||||
return framework.QueueImmediately, nil
|
||||
return framework.Queue, nil
|
||||
},
|
||||
}
|
||||
return fakePermit, nil
|
||||
|
@ -71,7 +71,7 @@ func (rp *ReservePlugin) EventsToRegister() []framework.ClusterEventWithHint {
|
||||
{
|
||||
Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add},
|
||||
QueueingHintFn: func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
return framework.QueueImmediately, nil
|
||||
return framework.Queue, nil
|
||||
},
|
||||
},
|
||||
}
|
||||
@ -108,7 +108,7 @@ func (pp *PermitPlugin) EventsToRegister() []framework.ClusterEventWithHint {
|
||||
{
|
||||
Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add},
|
||||
QueueingHintFn: func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
return framework.QueueImmediately, nil
|
||||
return framework.Queue, nil
|
||||
},
|
||||
},
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user