diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go b/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go index b82d589b225..a0406c661c0 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go @@ -277,9 +277,6 @@ func (pl *PodTopologySpread) getConstraints(pod *v1.Pod) ([]topologySpreadConstr } // isSchedulableAfterNodeChange returns Queue when node has topologyKey in its labels, else return QueueSkip. -// -// TODO: we can filter out node update events in a more fine-grained way once preCheck is completely removed. -// See: https://github.com/kubernetes/kubernetes/issues/110175 func (pl *PodTopologySpread) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) { originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj) if err != nil { @@ -291,23 +288,64 @@ func (pl *PodTopologySpread) isSchedulableAfterNodeChange(logger klog.Logger, po return framework.Queue, err } + var originalNodeMatching, modifiedNodeMatching bool + if originalNode != nil { + originalNodeMatching = nodeLabelsMatchSpreadConstraints(originalNode.Labels, constraints) + } if modifiedNode != nil { - if !nodeLabelsMatchSpreadConstraints(modifiedNode.Labels, constraints) { - logger.V(5).Info("the created/updated node doesn't match pod topology spread constraints", + modifiedNodeMatching = nodeLabelsMatchSpreadConstraints(modifiedNode.Labels, constraints) + } + + // We return Queue in the following cases: + // 1. Node/UpdateNodeLabel: + // - The original node matched the pod's topology spread constraints, but the modified node does not. + // - The modified node matches the pod's topology spread constraints, but the original node does not. + // - The modified node matches the pod's topology spread constraints, and the original node and the modified node have different label values for any topologyKey. + // 2. Node/UpdateNodeTaint: + // - The modified node match the pod's topology spread constraints, and the original node and the modified node have different taints. + // 3. Node/Add: The created node matches the pod's topology spread constraints. + // 4. Node/Delete: The original node matched the pod's topology spread constraints. + if originalNode != nil && modifiedNode != nil { + if originalNodeMatching != modifiedNodeMatching { + logger.V(5).Info("the node is updated and now pod topology spread constraints has changed, and the pod may be schedulable now", + "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode), "originalMatching", originalNodeMatching, "newMatching", modifiedNodeMatching) + return framework.Queue, nil + } + if modifiedNodeMatching && (checkTopologyKeyLabelsChanged(originalNode.Labels, modifiedNode.Labels, constraints) || !equality.Semantic.DeepEqual(originalNode.Spec.Taints, modifiedNode.Spec.Taints)) { + logger.V(5).Info("the node is updated and now has different taints or labels, and the pod may be schedulable now", + "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) + return framework.Queue, nil + } + return framework.QueueSkip, nil + } + + if modifiedNode != nil { + if !modifiedNodeMatching { + logger.V(5).Info("the created node doesn't match pod topology spread constraints", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) return framework.QueueSkip, nil } - logger.V(5).Info("node that match topology spread constraints was created/updated, and the pod may be schedulable now", + logger.V(5).Info("the created node matches topology spread constraints, and the pod may be schedulable now", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) return framework.Queue, nil } - // framework.Delete: return Queue when node has topologyKey in its labels, else return QueueSkip. - if !nodeLabelsMatchSpreadConstraints(originalNode.Labels, constraints) { + if !originalNodeMatching { logger.V(5).Info("the deleted node doesn't match pod topology spread constraints", "pod", klog.KObj(pod), "node", klog.KObj(originalNode)) return framework.QueueSkip, nil } - logger.V(5).Info("node that match topology spread constraints was deleted, and the pod may be schedulable now", + logger.V(5).Info("the deleted node matches topology spread constraints, and the pod may be schedulable now", "pod", klog.KObj(pod), "node", klog.KObj(originalNode)) return framework.Queue, nil } + +// checkTopologyKeyLabelsChanged checks if any of the labels specified as topologyKey in the constraints have changed. +func checkTopologyKeyLabelsChanged(originalLabels, modifiedLabels map[string]string, constraints []topologySpreadConstraint) bool { + for _, constraint := range constraints { + topologyKey := constraint.TopologyKey + if originalLabels[topologyKey] != modifiedLabels[topologyKey] { + return true + } + } + return false +} diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/plugin_test.go b/pkg/scheduler/framework/plugins/podtopologyspread/plugin_test.go index 8cd6fa04a42..77ba14e0ca4 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/plugin_test.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/plugin_test.go @@ -63,7 +63,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) { Obj(), oldNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node1").Label("foo", "bar").Obj(), newNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node1").Obj(), - expectedHint: framework.Queue, + expectedHint: framework.QueueSkip, }, { name: "create node with non-related labels", @@ -131,6 +131,26 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) { newNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node2").Obj(), expectedHint: framework.Queue, }, + { + name: "update node with different taints that match all topologySpreadConstraints", + pod: st.MakePod().Name("p"). + SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). + SpreadConstraint(1, "node", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). + Obj(), + oldNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node1").Taints([]v1.Taint{{Key: "aaa", Value: "bbb", Effect: v1.TaintEffectNoSchedule}}).Obj(), + newNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node1").Taints([]v1.Taint{{Key: "ccc", Value: "bbb", Effect: v1.TaintEffectNoSchedule}}).Obj(), + expectedHint: framework.Queue, + }, + { + name: "update node with different taints that only match one of topologySpreadConstraints", + pod: st.MakePod().Name("p"). + SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). + SpreadConstraint(1, "node", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil). + Obj(), + oldNode: st.MakeNode().Name("node-a").Label("node", "node1").Taints([]v1.Taint{{Key: "aaa", Value: "bbb", Effect: v1.TaintEffectNoSchedule}}).Obj(), + newNode: st.MakeNode().Name("node-a").Label("node", "node1").Taints([]v1.Taint{{Key: "ccc", Value: "bbb", Effect: v1.TaintEffectNoSchedule}}).Obj(), + expectedHint: framework.QueueSkip, + }, } for _, tc := range testcases { diff --git a/test/integration/scheduler/queue_test.go b/test/integration/scheduler/queue_test.go index 74e3dde184c..f874c558b9d 100644 --- a/test/integration/scheduler/queue_test.go +++ b/test/integration/scheduler/queue_test.go @@ -851,28 +851,37 @@ func TestCoreResourceEnqueue(t *testing.T) { { name: "Pods with PodTopologySpread should be requeued when a Node is updated to have the topology label", initialNodes: []*v1.Node{ - st.MakeNode().Name("fake-node1").Label("node", "fake-node").Obj(), - st.MakeNode().Name("fake-node2").Label("node", "fake-node").Obj(), + st.MakeNode().Name("fake-node1").Label("node", "fake-node").Label("region", "fake-node").Label("service", "service-a").Obj(), + st.MakeNode().Name("fake-node2").Label("node", "fake-node").Label("region", "fake-node").Label("service", "service-a").Obj(), }, initialPods: []*v1.Pod{ st.MakePod().Name("pod1").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node1").Obj(), st.MakePod().Name("pod2").Label("key1", "val").SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node2").Obj(), + st.MakePod().Name("pod3").Label("key1", "val").SpreadConstraint(1, "region", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node2").Obj(), + st.MakePod().Name("pod4").Label("key1", "val").SpreadConstraint(1, "service", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node2").Obj(), }, pods: []*v1.Pod{ - // - Pod3 and Pod4 will be rejected by the PodTopologySpread plugin. - st.MakePod().Name("pod3").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(), - st.MakePod().Name("pod4").Label("key1", "val").SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(), + // - Pod5, Pod6, Pod7, Pod8, Pod9 will be rejected by the PodTopologySpread plugin. + st.MakePod().Name("pod5").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(), + st.MakePod().Name("pod6").Label("key1", "val").SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(), + st.MakePod().Name("pod7").Label("key1", "val").SpreadConstraint(1, "region", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(), + st.MakePod().Name("pod8").Label("key1", "val").SpreadConstraint(1, "other", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(), + st.MakePod().Name("pod9").Label("key1", "val").SpreadConstraint(1, "service", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(), }, triggerFn: func(testCtx *testutils.TestContext) (map[framework.ClusterEvent]uint64, error) { // Trigger an Node update event. - // It should requeue pod4 only because this node only has zone label, and doesn't have node label that pod3 requires. - node := st.MakeNode().Name("fake-node2").Label("zone", "fake-node").Obj() + // It should requeue pod5 because it deletes the "node" label from fake-node2. + // It should requeue pod6 because the update adds the "zone" label to fake-node2. + // It should not requeue pod7 because the update does not change the value of "region" label. + // It should not requeue pod8 because the update does not add the "other" label. + // It should requeue pod9 because the update changes the value of "service" label. + node := st.MakeNode().Name("fake-node2").Label("zone", "fake-node").Label("region", "fake-node").Label("service", "service-b").Obj() if _, err := testCtx.ClientSet.CoreV1().Nodes().Update(testCtx.Ctx, node, metav1.UpdateOptions{}); err != nil { return nil, fmt.Errorf("failed to update node: %w", err) } return map[framework.ClusterEvent]uint64{{Resource: framework.Node, ActionType: framework.UpdateNodeLabel}: 1}, nil }, - wantRequeuedPods: sets.New("pod4"), + wantRequeuedPods: sets.New("pod5", "pod6", "pod9"), enableSchedulingQueueHint: []bool{true}, }, {