diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go b/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go index 361c3362e9b..f2a5ce2123d 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go @@ -69,6 +69,7 @@ type PodTopologySpread struct { statefulSets appslisters.StatefulSetLister enableNodeInclusionPolicyInPodTopologySpread bool enableMatchLabelKeysInPodTopologySpread bool + enableSchedulingQueueHint bool } var _ framework.PreFilterPlugin = &PodTopologySpread{} @@ -103,6 +104,7 @@ func New(_ context.Context, plArgs runtime.Object, h framework.Handle, fts featu defaultConstraints: args.DefaultConstraints, enableNodeInclusionPolicyInPodTopologySpread: fts.EnableNodeInclusionPolicyInPodTopologySpread, enableMatchLabelKeysInPodTopologySpread: fts.EnableMatchLabelKeysInPodTopologySpread, + enableSchedulingQueueHint: fts.EnableSchedulingQueueHint, } if args.DefaultingType == config.SystemDefaulting { pl.defaultConstraints = systemDefaultConstraints @@ -135,6 +137,18 @@ func (pl *PodTopologySpread) setListers(factory informers.SharedInformerFactory) // EventsToRegister returns the possible events that may make a Pod // failed by this plugin schedulable. func (pl *PodTopologySpread) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) { + podActionType := framework.Add | framework.UpdatePodLabel | framework.Delete + if pl.enableSchedulingQueueHint { + // When the QueueingHint feature is enabled, + // the scheduling queue uses Pod/Update Queueing Hint + // to determine whether a Pod's update makes the Pod schedulable or not. + // https://github.com/kubernetes/kubernetes/pull/122234 + // The Pod rejected by this plugin can be schedulable when the Pod has a spread constraint with NodeTaintsPolicy:Honor + // and has got a new toleration. + // So, we add UpdatePodTolerations here only when QHint is enabled. + podActionType = framework.Add | framework.UpdatePodLabel | framework.UpdatePodTolerations | framework.Delete + } + return []framework.ClusterEventWithHint{ // All ActionType includes the following events: // - Add. An unschedulable Pod may fail due to violating topology spread constraints, @@ -143,15 +157,27 @@ func (pl *PodTopologySpread) EventsToRegister(_ context.Context) ([]framework.Cl // an unschedulable Pod schedulable. // - Delete. An unschedulable Pod may fail due to violating an existing Pod's topology spread constraints, // deleting an existing Pod may make it schedulable. - {Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Add | framework.UpdatePodLabel | framework.Delete}, QueueingHintFn: pl.isSchedulableAfterPodChange}, + {Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: podActionType}, QueueingHintFn: pl.isSchedulableAfterPodChange}, // Node add|delete|update maybe lead an topology key changed, // and make these pod in scheduling schedulable or unschedulable. {Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.Delete | framework.UpdateNodeLabel | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange}, }, nil } +// involvedInTopologySpreading returns true if the incomingPod is involved in the topology spreading of podWithSpreading. func involvedInTopologySpreading(incomingPod, podWithSpreading *v1.Pod) bool { - return incomingPod.Spec.NodeName != "" && incomingPod.Namespace == podWithSpreading.Namespace + return incomingPod.UID == podWithSpreading.UID || + (incomingPod.Spec.NodeName != "" && incomingPod.Namespace == podWithSpreading.Namespace) +} + +// hasConstraintWithNodeTaintsPolicyHonor returns true if any constraint has `NodeTaintsPolicy: Honor`. +func hasConstraintWithNodeTaintsPolicyHonor(constraints []topologySpreadConstraint) bool { + for _, c := range constraints { + if c.NodeTaintsPolicy == v1.NodeInclusionPolicyHonor { + return true + } + } + return false } func (pl *PodTopologySpread) isSchedulableAfterPodChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) { @@ -173,8 +199,15 @@ func (pl *PodTopologySpread) isSchedulableAfterPodChange(logger klog.Logger, pod // Pod is modified. Return Queue when the label(s) matching topologySpread's selector is added, changed, or deleted. if modifiedPod != nil && originalPod != nil { + if pod.UID == modifiedPod.UID && !reflect.DeepEqual(modifiedPod.Spec.Tolerations, originalPod.Spec.Tolerations) && hasConstraintWithNodeTaintsPolicyHonor(constraints) { + // If any constraint has `NodeTaintsPolicy: Honor`, we can return Queue when the target Pod has got a new toleration. + logger.V(5).Info("the unschedulable pod has got a new toleration, which could make it schedulable", + "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod)) + return framework.Queue, nil + } + if reflect.DeepEqual(modifiedPod.Labels, originalPod.Labels) { - logger.V(5).Info("the updated pod is unscheduled or has no updated labels or has different namespace with target pod, so it doesn't make the target pod schedulable", + logger.V(5).Info("the pod's update doesn't include the label update, which doesn't make the target pod schedulable", "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod)) return framework.QueueSkip, nil } diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/plugin_test.go b/pkg/scheduler/framework/plugins/podtopologyspread/plugin_test.go index f7a0e35c635..9ebb568b4a9 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/plugin_test.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/plugin_test.go @@ -27,6 +27,7 @@ import ( "k8s.io/kubernetes/pkg/scheduler/framework" plugintesting "k8s.io/kubernetes/pkg/scheduler/framework/plugins/testing" st "k8s.io/kubernetes/pkg/scheduler/testing" + "k8s.io/utils/ptr" ) func Test_isSchedulableAfterNodeChange(t *testing.T) { @@ -150,150 +151,192 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) { func Test_isSchedulableAfterPodChange(t *testing.T) { testcases := []struct { - name string - pod *v1.Pod - oldPod, newPod *v1.Pod - expectedHint framework.QueueingHint - expectedErr bool + name string + pod *v1.Pod + oldPod, newPod *v1.Pod + expectedHint framework.QueueingHint + expectedErr bool + enableNodeInclusionPolicyInPodTopologySpread bool }{ { name: "add pod with labels match topologySpreadConstraints selector", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - newPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(), + newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(), expectedHint: framework.Queue, }, { name: "add un-scheduled pod", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - newPod: st.MakePod().Label("foo", "").Obj(), + newPod: st.MakePod().UID("p2").Label("foo", "").Obj(), expectedHint: framework.QueueSkip, }, { name: "update un-scheduled pod", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - newPod: st.MakePod().Label("foo", "").Obj(), - oldPod: st.MakePod().Label("bar", "").Obj(), + newPod: st.MakePod().UID("p2").Label("foo", "").Obj(), + oldPod: st.MakePod().UID("p2").Label("bar", "").Obj(), expectedHint: framework.QueueSkip, }, { name: "delete un-scheduled pod", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - oldPod: st.MakePod().Label("foo", "").Obj(), + oldPod: st.MakePod().UID("p2").Label("foo", "").Obj(), expectedHint: framework.QueueSkip, }, { name: "add pod with different namespace", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - newPod: st.MakePod().Node("fake-node").Namespace("fake-namespace").Label("foo", "").Obj(), + newPod: st.MakePod().UID("p2").Node("fake-node").Namespace("fake-namespace").Label("foo", "").Obj(), expectedHint: framework.QueueSkip, }, { name: "add pod with labels don't match topologySpreadConstraints selector", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - newPod: st.MakePod().Node("fake-node").Label("bar", "").Obj(), + newPod: st.MakePod().UID("p2").Node("fake-node").Label("bar", "").Obj(), expectedHint: framework.QueueSkip, }, { name: "delete pod with labels that match topologySpreadConstraints selector", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - oldPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(), + oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(), expectedHint: framework.Queue, }, { name: "delete pod with labels that don't match topologySpreadConstraints selector", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - oldPod: st.MakePod().Node("fake-node").Label("bar", "").Obj(), + oldPod: st.MakePod().UID("p2").Node("fake-node").Label("bar", "").Obj(), expectedHint: framework.QueueSkip, }, { name: "update pod's non-related label", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - oldPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar1").Obj(), - newPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(), + oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar1").Obj(), + newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(), expectedHint: framework.QueueSkip, }, { name: "add pod's label that matches topologySpreadConstraints selector", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - oldPod: st.MakePod().Node("fake-node").Obj(), - newPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(), + oldPod: st.MakePod().UID("p2").Node("fake-node").Obj(), + newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(), expectedHint: framework.Queue, }, { name: "delete pod label that matches topologySpreadConstraints selector", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - oldPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(), - newPod: st.MakePod().Node("fake-node").Obj(), + oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(), + newPod: st.MakePod().UID("p2").Node("fake-node").Obj(), expectedHint: framework.Queue, }, { name: "change pod's label that matches topologySpreadConstraints selector", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - oldPod: st.MakePod().Node("fake-node").Label("foo", "foo1").Obj(), - newPod: st.MakePod().Node("fake-node").Label("foo", "foo2").Obj(), + oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "foo1").Obj(), + newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "foo2").Obj(), expectedHint: framework.QueueSkip, }, { name: "change pod's label that doesn't match topologySpreadConstraints selector", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). Obj(), - oldPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar1").Obj(), - newPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(), + oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar1").Obj(), + newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(), expectedHint: framework.QueueSkip, }, { name: "add pod's label that matches topologySpreadConstraints selector with multi topologySpreadConstraints", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil). Obj(), - oldPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(), - newPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(), + oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(), + newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(), expectedHint: framework.Queue, }, { name: "change pod's label that doesn't match topologySpreadConstraints selector with multi topologySpreadConstraints", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil). Obj(), - oldPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(), - newPod: st.MakePod().Node("fake-node").Label("foo", "").Label("baz", "").Obj(), + oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(), + newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("baz", "").Obj(), expectedHint: framework.QueueSkip, }, { name: "change pod's label that match topologySpreadConstraints selector with multi topologySpreadConstraints", - pod: st.MakePod().Name("p").Label("foo", ""). + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil). Obj(), - oldPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "").Obj(), - newPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(), + oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "").Obj(), + newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(), + expectedHint: framework.QueueSkip, + }, + { + name: "the unschedulable Pod has topologySpreadConstraint with NodeTaintsPolicy:Honor and has got a new toleration", + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, ptr.To(v1.NodeInclusionPolicyHonor), nil). + SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil). + Obj(), + oldPod: st.MakePod().UID("p").Name("p").Label("foo", "").Obj(), + newPod: st.MakePod().UID("p").Name("p").Label("foo", "").Toleration(v1.TaintNodeUnschedulable).Obj(), + expectedHint: framework.Queue, + enableNodeInclusionPolicyInPodTopologySpread: true, + }, + { + name: "the unschedulable Pod has topologySpreadConstraint without NodeTaintsPolicy:Honor and has got a new toleration", + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, ptr.To(v1.NodeInclusionPolicyIgnore), nil). + SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil). + Obj(), + oldPod: st.MakePod().UID("p").Name("p").Label("foo", "").Obj(), + newPod: st.MakePod().UID("p").Name("p").Label("foo", "").Toleration(v1.TaintNodeUnschedulable).Obj(), + expectedHint: framework.QueueSkip, + }, + { + name: "the unschedulable Pod has topologySpreadConstraint and has got a new label matching the selector of the constraint", + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, ptr.To(v1.NodeInclusionPolicyIgnore), nil). + SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil). + Obj(), + oldPod: st.MakePod().UID("p").Name("p").Obj(), + newPod: st.MakePod().UID("p").Name("p").Label("foo", "").Obj(), + expectedHint: framework.Queue, + }, + { + name: "the unschedulable Pod has topologySpreadConstraint and has got a new unrelated label", + pod: st.MakePod().UID("p").Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, ptr.To(v1.NodeInclusionPolicyIgnore), nil). + SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil). + Obj(), + oldPod: st.MakePod().UID("p").Name("p").Obj(), + newPod: st.MakePod().UID("p").Name("p").Label("unrelated", "").Obj(), expectedHint: framework.QueueSkip, }, } @@ -303,6 +346,8 @@ func Test_isSchedulableAfterPodChange(t *testing.T) { snapshot := cache.NewSnapshot(nil, nil) pl := plugintesting.SetupPlugin(ctx, t, topologySpreadFunc, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, snapshot) p := pl.(*PodTopologySpread) + p.enableNodeInclusionPolicyInPodTopologySpread = tc.enableNodeInclusionPolicyInPodTopologySpread + actualHint, err := p.isSchedulableAfterPodChange(logger, tc.pod, tc.oldPod, tc.newPod) if tc.expectedErr { require.Error(t, err)