Merge pull request #127478 from googs1025/scheduler/fine-grained

feature(scheduler): more fine-grained QHints for podtopologyspread plugin
This commit is contained in:
Kubernetes Prow Robot 2024-10-20 13:29:03 +01:00 committed by GitHub
commit e39571591d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 85 additions and 18 deletions

View File

@ -277,9 +277,6 @@ func (pl *PodTopologySpread) getConstraints(pod *v1.Pod) ([]topologySpreadConstr
}
// isSchedulableAfterNodeChange returns Queue when node has topologyKey in its labels, else return QueueSkip.
//
// TODO: we can filter out node update events in a more fine-grained way once preCheck is completely removed.
// See: https://github.com/kubernetes/kubernetes/issues/110175
func (pl *PodTopologySpread) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
if err != nil {
@ -291,23 +288,64 @@ func (pl *PodTopologySpread) isSchedulableAfterNodeChange(logger klog.Logger, po
return framework.Queue, err
}
var originalNodeMatching, modifiedNodeMatching bool
if originalNode != nil {
originalNodeMatching = nodeLabelsMatchSpreadConstraints(originalNode.Labels, constraints)
}
if modifiedNode != nil {
if !nodeLabelsMatchSpreadConstraints(modifiedNode.Labels, constraints) {
logger.V(5).Info("the created/updated node doesn't match pod topology spread constraints",
modifiedNodeMatching = nodeLabelsMatchSpreadConstraints(modifiedNode.Labels, constraints)
}
// We return Queue in the following cases:
// 1. Node/UpdateNodeLabel:
// - The original node matched the pod's topology spread constraints, but the modified node does not.
// - The modified node matches the pod's topology spread constraints, but the original node does not.
// - The modified node matches the pod's topology spread constraints, and the original node and the modified node have different label values for any topologyKey.
// 2. Node/UpdateNodeTaint:
// - The modified node match the pod's topology spread constraints, and the original node and the modified node have different taints.
// 3. Node/Add: The created node matches the pod's topology spread constraints.
// 4. Node/Delete: The original node matched the pod's topology spread constraints.
if originalNode != nil && modifiedNode != nil {
if originalNodeMatching != modifiedNodeMatching {
logger.V(5).Info("the node is updated and now pod topology spread constraints has changed, and the pod may be schedulable now",
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode), "originalMatching", originalNodeMatching, "newMatching", modifiedNodeMatching)
return framework.Queue, nil
}
if modifiedNodeMatching && (checkTopologyKeyLabelsChanged(originalNode.Labels, modifiedNode.Labels, constraints) || !equality.Semantic.DeepEqual(originalNode.Spec.Taints, modifiedNode.Spec.Taints)) {
logger.V(5).Info("the node is updated and now has different taints or labels, and the pod may be schedulable now",
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
return framework.Queue, nil
}
return framework.QueueSkip, nil
}
if modifiedNode != nil {
if !modifiedNodeMatching {
logger.V(5).Info("the created node doesn't match pod topology spread constraints",
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
return framework.QueueSkip, nil
}
logger.V(5).Info("node that match topology spread constraints was created/updated, and the pod may be schedulable now",
logger.V(5).Info("the created node matches topology spread constraints, and the pod may be schedulable now",
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
return framework.Queue, nil
}
// framework.Delete: return Queue when node has topologyKey in its labels, else return QueueSkip.
if !nodeLabelsMatchSpreadConstraints(originalNode.Labels, constraints) {
if !originalNodeMatching {
logger.V(5).Info("the deleted node doesn't match pod topology spread constraints", "pod", klog.KObj(pod), "node", klog.KObj(originalNode))
return framework.QueueSkip, nil
}
logger.V(5).Info("node that match topology spread constraints was deleted, and the pod may be schedulable now",
logger.V(5).Info("the deleted node matches topology spread constraints, and the pod may be schedulable now",
"pod", klog.KObj(pod), "node", klog.KObj(originalNode))
return framework.Queue, nil
}
// checkTopologyKeyLabelsChanged checks if any of the labels specified as topologyKey in the constraints have changed.
func checkTopologyKeyLabelsChanged(originalLabels, modifiedLabels map[string]string, constraints []topologySpreadConstraint) bool {
for _, constraint := range constraints {
topologyKey := constraint.TopologyKey
if originalLabels[topologyKey] != modifiedLabels[topologyKey] {
return true
}
}
return false
}

View File

@ -63,7 +63,7 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
Obj(),
oldNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node1").Label("foo", "bar").Obj(),
newNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node1").Obj(),
expectedHint: framework.Queue,
expectedHint: framework.QueueSkip,
},
{
name: "create node with non-related labels",
@ -131,6 +131,26 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
newNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node2").Obj(),
expectedHint: framework.Queue,
},
{
name: "update node with different taints that match all topologySpreadConstraints",
pod: st.MakePod().Name("p").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
SpreadConstraint(1, "node", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
oldNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node1").Taints([]v1.Taint{{Key: "aaa", Value: "bbb", Effect: v1.TaintEffectNoSchedule}}).Obj(),
newNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node1").Taints([]v1.Taint{{Key: "ccc", Value: "bbb", Effect: v1.TaintEffectNoSchedule}}).Obj(),
expectedHint: framework.Queue,
},
{
name: "update node with different taints that only match one of topologySpreadConstraints",
pod: st.MakePod().Name("p").
SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
SpreadConstraint(1, "node", v1.DoNotSchedule, fooSelector, nil, nil, nil, nil).
Obj(),
oldNode: st.MakeNode().Name("node-a").Label("node", "node1").Taints([]v1.Taint{{Key: "aaa", Value: "bbb", Effect: v1.TaintEffectNoSchedule}}).Obj(),
newNode: st.MakeNode().Name("node-a").Label("node", "node1").Taints([]v1.Taint{{Key: "ccc", Value: "bbb", Effect: v1.TaintEffectNoSchedule}}).Obj(),
expectedHint: framework.QueueSkip,
},
}
for _, tc := range testcases {

View File

@ -851,28 +851,37 @@ func TestCoreResourceEnqueue(t *testing.T) {
{
name: "Pods with PodTopologySpread should be requeued when a Node is updated to have the topology label",
initialNodes: []*v1.Node{
st.MakeNode().Name("fake-node1").Label("node", "fake-node").Obj(),
st.MakeNode().Name("fake-node2").Label("node", "fake-node").Obj(),
st.MakeNode().Name("fake-node1").Label("node", "fake-node").Label("region", "fake-node").Label("service", "service-a").Obj(),
st.MakeNode().Name("fake-node2").Label("node", "fake-node").Label("region", "fake-node").Label("service", "service-a").Obj(),
},
initialPods: []*v1.Pod{
st.MakePod().Name("pod1").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node1").Obj(),
st.MakePod().Name("pod2").Label("key1", "val").SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node2").Obj(),
st.MakePod().Name("pod3").Label("key1", "val").SpreadConstraint(1, "region", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node2").Obj(),
st.MakePod().Name("pod4").Label("key1", "val").SpreadConstraint(1, "service", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), nil, nil, nil, nil).Container("image").Node("fake-node2").Obj(),
},
pods: []*v1.Pod{
// - Pod3 and Pod4 will be rejected by the PodTopologySpread plugin.
st.MakePod().Name("pod3").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
st.MakePod().Name("pod4").Label("key1", "val").SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
// - Pod5, Pod6, Pod7, Pod8, Pod9 will be rejected by the PodTopologySpread plugin.
st.MakePod().Name("pod5").Label("key1", "val").SpreadConstraint(1, "node", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
st.MakePod().Name("pod6").Label("key1", "val").SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
st.MakePod().Name("pod7").Label("key1", "val").SpreadConstraint(1, "region", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
st.MakePod().Name("pod8").Label("key1", "val").SpreadConstraint(1, "other", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
st.MakePod().Name("pod9").Label("key1", "val").SpreadConstraint(1, "service", v1.DoNotSchedule, st.MakeLabelSelector().Exists("key1").Obj(), ptr.To(int32(3)), nil, nil, nil).Container("image").Obj(),
},
triggerFn: func(testCtx *testutils.TestContext) (map[framework.ClusterEvent]uint64, error) {
// Trigger an Node update event.
// It should requeue pod4 only because this node only has zone label, and doesn't have node label that pod3 requires.
node := st.MakeNode().Name("fake-node2").Label("zone", "fake-node").Obj()
// It should requeue pod5 because it deletes the "node" label from fake-node2.
// It should requeue pod6 because the update adds the "zone" label to fake-node2.
// It should not requeue pod7 because the update does not change the value of "region" label.
// It should not requeue pod8 because the update does not add the "other" label.
// It should requeue pod9 because the update changes the value of "service" label.
node := st.MakeNode().Name("fake-node2").Label("zone", "fake-node").Label("region", "fake-node").Label("service", "service-b").Obj()
if _, err := testCtx.ClientSet.CoreV1().Nodes().Update(testCtx.Ctx, node, metav1.UpdateOptions{}); err != nil {
return nil, fmt.Errorf("failed to update node: %w", err)
}
return map[framework.ClusterEvent]uint64{{Resource: framework.Node, ActionType: framework.UpdateNodeLabel}: 1}, nil
},
wantRequeuedPods: sets.New("pod4"),
wantRequeuedPods: sets.New("pod5", "pod6", "pod9"),
enableSchedulingQueueHint: []bool{true},
},
{