fix(topologyspread): register UpdatePodTolerations when QHint is enabled

This commit is contained in:
Kensei Nakada 2024-09-19 00:36:51 +09:00
parent 048c8536d6
commit 9c3d4a6fe4
2 changed files with 127 additions and 49 deletions

View File

@ -69,6 +69,7 @@ type PodTopologySpread struct {
statefulSets appslisters.StatefulSetLister
enableNodeInclusionPolicyInPodTopologySpread bool
enableMatchLabelKeysInPodTopologySpread bool
enableSchedulingQueueHint bool
}
var _ framework.PreFilterPlugin = &PodTopologySpread{}
@ -103,6 +104,7 @@ func New(_ context.Context, plArgs runtime.Object, h framework.Handle, fts featu
defaultConstraints: args.DefaultConstraints,
enableNodeInclusionPolicyInPodTopologySpread: fts.EnableNodeInclusionPolicyInPodTopologySpread,
enableMatchLabelKeysInPodTopologySpread: fts.EnableMatchLabelKeysInPodTopologySpread,
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
}
if args.DefaultingType == config.SystemDefaulting {
pl.defaultConstraints = systemDefaultConstraints
@ -135,6 +137,18 @@ func (pl *PodTopologySpread) setListers(factory informers.SharedInformerFactory)
// EventsToRegister returns the possible events that may make a Pod
// failed by this plugin schedulable.
func (pl *PodTopologySpread) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
podActionType := framework.Add | framework.UpdatePodLabel | framework.Delete
if pl.enableSchedulingQueueHint {
// When the QueueingHint feature is enabled,
// the scheduling queue uses Pod/Update Queueing Hint
// to determine whether a Pod's update makes the Pod schedulable or not.
// https://github.com/kubernetes/kubernetes/pull/122234
// The Pod rejected by this plugin can be schedulable when the Pod has a spread constraint with NodeTaintsPolicy:Honor
// and has got a new toleration.
// So, we add UpdatePodTolerations here only when QHint is enabled.
podActionType = framework.Add | framework.UpdatePodLabel | framework.UpdatePodTolerations | framework.Delete
}
return []framework.ClusterEventWithHint{
// All ActionType includes the following events:
// - Add. An unschedulable Pod may fail due to violating topology spread constraints,
@ -143,15 +157,27 @@ func (pl *PodTopologySpread) EventsToRegister(_ context.Context) ([]framework.Cl
// an unschedulable Pod schedulable.
// - Delete. An unschedulable Pod may fail due to violating an existing Pod's topology spread constraints,
// deleting an existing Pod may make it schedulable.
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Add | framework.UpdatePodLabel | framework.Delete}, QueueingHintFn: pl.isSchedulableAfterPodChange},
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: podActionType}, QueueingHintFn: pl.isSchedulableAfterPodChange},
// Node add|delete|update maybe lead an topology key changed,
// and make these pod in scheduling schedulable or unschedulable.
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.Delete | framework.UpdateNodeLabel | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
}, nil
}
// involvedInTopologySpreading returns true if the incomingPod is involved in the topology spreading of podWithSpreading.
func involvedInTopologySpreading(incomingPod, podWithSpreading *v1.Pod) bool {
return incomingPod.Spec.NodeName != "" && incomingPod.Namespace == podWithSpreading.Namespace
return incomingPod.UID == podWithSpreading.UID ||
(incomingPod.Spec.NodeName != "" && incomingPod.Namespace == podWithSpreading.Namespace)
}
// hasConstraintWithNodeTaintsPolicyHonor returns true if any constraint has `NodeTaintsPolicy: Honor`.
func hasConstraintWithNodeTaintsPolicyHonor(constraints []topologySpreadConstraint) bool {
for _, c := range constraints {
if c.NodeTaintsPolicy == v1.NodeInclusionPolicyHonor {
return true
}
}
return false
}
func (pl *PodTopologySpread) isSchedulableAfterPodChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
@ -173,8 +199,15 @@ func (pl *PodTopologySpread) isSchedulableAfterPodChange(logger klog.Logger, pod
// Pod is modified. Return Queue when the label(s) matching topologySpread's selector is added, changed, or deleted.
if modifiedPod != nil && originalPod != nil {
if pod.UID == modifiedPod.UID && !reflect.DeepEqual(modifiedPod.Spec.Tolerations, originalPod.Spec.Tolerations) && hasConstraintWithNodeTaintsPolicyHonor(constraints) {
// If any constraint has `NodeTaintsPolicy: Honor`, we can return Queue when the target Pod has got a new toleration.
logger.V(5).Info("the unschedulable pod has got a new toleration, which could make it schedulable",
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
return framework.Queue, nil
}
if reflect.DeepEqual(modifiedPod.Labels, originalPod.Labels) {
logger.V(5).Info("the updated pod is unscheduled or has no updated labels or has different namespace with target pod, so it doesn't make the target pod schedulable",
logger.V(5).Info("the pod's update doesn't include the label update, which doesn't make the target pod schedulable",
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
return framework.QueueSkip, nil
}

View File

@ -27,6 +27,7 @@ import (
"k8s.io/kubernetes/pkg/scheduler/framework"
plugintesting "k8s.io/kubernetes/pkg/scheduler/framework/plugins/testing"
st "k8s.io/kubernetes/pkg/scheduler/testing"
"k8s.io/utils/ptr"
)
func Test_isSchedulableAfterNodeChange(t *testing.T) {
@ -155,145 +156,187 @@ func Test_isSchedulableAfterPodChange(t *testing.T) {
oldPod, newPod *v1.Pod
expectedHint framework.QueueingHint
expectedErr bool
enableNodeInclusionPolicyInPodTopologySpread bool
}{
{
name: "add pod with labels match topologySpreadConstraints selector",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
newPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(),
newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(),
expectedHint: framework.Queue,
},
{
name: "add un-scheduled pod",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
newPod: st.MakePod().Label("foo", "").Obj(),
newPod: st.MakePod().UID("p2").Label("foo", "").Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "update un-scheduled pod",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
newPod: st.MakePod().Label("foo", "").Obj(),
oldPod: st.MakePod().Label("bar", "").Obj(),
newPod: st.MakePod().UID("p2").Label("foo", "").Obj(),
oldPod: st.MakePod().UID("p2").Label("bar", "").Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "delete un-scheduled pod",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().Label("foo", "").Obj(),
oldPod: st.MakePod().UID("p2").Label("foo", "").Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "add pod with different namespace",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
newPod: st.MakePod().Node("fake-node").Namespace("fake-namespace").Label("foo", "").Obj(),
newPod: st.MakePod().UID("p2").Node("fake-node").Namespace("fake-namespace").Label("foo", "").Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "add pod with labels don't match topologySpreadConstraints selector",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
newPod: st.MakePod().Node("fake-node").Label("bar", "").Obj(),
newPod: st.MakePod().UID("p2").Node("fake-node").Label("bar", "").Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "delete pod with labels that match topologySpreadConstraints selector",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(),
oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(),
expectedHint: framework.Queue,
},
{
name: "delete pod with labels that don't match topologySpreadConstraints selector",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().Node("fake-node").Label("bar", "").Obj(),
oldPod: st.MakePod().UID("p2").Node("fake-node").Label("bar", "").Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "update pod's non-related label",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar1").Obj(),
newPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(),
oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar1").Obj(),
newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "add pod's label that matches topologySpreadConstraints selector",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().Node("fake-node").Obj(),
newPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(),
oldPod: st.MakePod().UID("p2").Node("fake-node").Obj(),
newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(),
expectedHint: framework.Queue,
},
{
name: "delete pod label that matches topologySpreadConstraints selector",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(),
newPod: st.MakePod().Node("fake-node").Obj(),
oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(),
newPod: st.MakePod().UID("p2").Node("fake-node").Obj(),
expectedHint: framework.Queue,
},
{
name: "change pod's label that matches topologySpreadConstraints selector",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().Node("fake-node").Label("foo", "foo1").Obj(),
newPod: st.MakePod().Node("fake-node").Label("foo", "foo2").Obj(),
oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "foo1").Obj(),
newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "foo2").Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "change pod's label that doesn't match topologySpreadConstraints selector",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar1").Obj(),
newPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(),
oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar1").Obj(),
newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "add pod's label that matches topologySpreadConstraints selector with multi topologySpreadConstraints",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(),
newPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(),
oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(),
newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(),
expectedHint: framework.Queue,
},
{
name: "change pod's label that doesn't match topologySpreadConstraints selector with multi topologySpreadConstraints",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().Node("fake-node").Label("foo", "").Obj(),
newPod: st.MakePod().Node("fake-node").Label("foo", "").Label("baz", "").Obj(),
oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Obj(),
newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("baz", "").Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "change pod's label that match topologySpreadConstraints selector with multi topologySpreadConstraints",
pod: st.MakePod().Name("p").Label("foo", "").
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "").Obj(),
newPod: st.MakePod().Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(),
oldPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "").Obj(),
newPod: st.MakePod().UID("p2").Node("fake-node").Label("foo", "").Label("bar", "bar2").Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "the unschedulable Pod has topologySpreadConstraint with NodeTaintsPolicy:Honor and has got a new toleration",
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, ptr.To(v1.NodeInclusionPolicyHonor), nil).
SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().UID("p").Name("p").Label("foo", "").Obj(),
newPod: st.MakePod().UID("p").Name("p").Label("foo", "").Toleration(v1.TaintNodeUnschedulable).Obj(),
expectedHint: framework.Queue,
enableNodeInclusionPolicyInPodTopologySpread: true,
},
{
name: "the unschedulable Pod has topologySpreadConstraint without NodeTaintsPolicy:Honor and has got a new toleration",
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, ptr.To(v1.NodeInclusionPolicyIgnore), nil).
SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().UID("p").Name("p").Label("foo", "").Obj(),
newPod: st.MakePod().UID("p").Name("p").Label("foo", "").Toleration(v1.TaintNodeUnschedulable).Obj(),
expectedHint: framework.QueueSkip,
},
{
name: "the unschedulable Pod has topologySpreadConstraint and has got a new label matching the selector of the constraint",
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, ptr.To(v1.NodeInclusionPolicyIgnore), nil).
SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().UID("p").Name("p").Obj(),
newPod: st.MakePod().UID("p").Name("p").Label("foo", "").Obj(),
expectedHint: framework.Queue,
},
{
name: "the unschedulable Pod has topologySpreadConstraint and has got a new unrelated label",
pod: st.MakePod().UID("p").Name("p").Label("foo", "").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, ptr.To(v1.NodeInclusionPolicyIgnore), nil).
SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector, nil, nil, nil, nil).
Obj(),
oldPod: st.MakePod().UID("p").Name("p").Obj(),
newPod: st.MakePod().UID("p").Name("p").Label("unrelated", "").Obj(),
expectedHint: framework.QueueSkip,
},
}
@ -303,6 +346,8 @@ func Test_isSchedulableAfterPodChange(t *testing.T) {
snapshot := cache.NewSnapshot(nil, nil)
pl := plugintesting.SetupPlugin(ctx, t, topologySpreadFunc, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, snapshot)
p := pl.(*PodTopologySpread)
p.enableNodeInclusionPolicyInPodTopologySpread = tc.enableNodeInclusionPolicyInPodTopologySpread
actualHint, err := p.isSchedulableAfterPodChange(logger, tc.pod, tc.oldPod, tc.newPod)
if tc.expectedErr {
require.Error(t, err)