mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-14 21:53:52 +00:00
Fine grain QueueHints for nodeaffinity plugin.
Skip queue on unrelated change that keeps pod schedulable when QueueHints are enabled. Split add from QHints disabled case Remove case when QHints are disabled Remove two GHint alternatives in unit tests more fine-grained Node QHint for NodeResourceFit plugin Return early when updated Node causes unmatch Revert "more fine-grained Node QHint for NodeResourceFit plugin" This reverts commit dfbceb60e0c1c4e47748c12722d9ed6dba1a8366. Add integration test for requeue of a pod previously rejected by NodeAffinity plugin when a suitable Node is added Add integratin test for a Node update operation that does not trigger requeue in NodeAffinity plugin Remove innacurrate comment Apply review comments
This commit is contained in:
@@ -104,7 +104,7 @@ func (pl *NodeAffinity) EventsToRegister(_ context.Context) ([]framework.Cluster
|
||||
// isSchedulableAfterNodeChange is invoked whenever a node changed. It checks whether
|
||||
// that change made a previously unschedulable pod schedulable.
|
||||
func (pl *NodeAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
|
||||
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
@@ -119,16 +119,27 @@ func (pl *NodeAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
if isMatched {
|
||||
logger.V(4).Info("node was created or updated, and matches with the pod's NodeAffinity", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
if !isMatched {
|
||||
logger.V(5).Info("node was created or updated, but the pod's NodeAffinity doesn't match", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
// Since the node was added and it matches the pod's affinity criteria, we can unblock it.
|
||||
if originalNode == nil {
|
||||
logger.V(5).Info("node was created, and matches with the pod's NodeAffinity", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// TODO: also check if the original node meets the pod's requestments once preCheck is completely removed.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/110175
|
||||
|
||||
logger.V(4).Info("node was created or updated, but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
// At this point we know the operation is update so we can narrow down the criteria to unmatch -> match changes only
|
||||
// (necessary affinity label was added to the node in this case).
|
||||
wasMatched, err := requiredNodeAffinity.Match(originalNode)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
if wasMatched {
|
||||
logger.V(5).Info("node updated, but the pod's NodeAffinity hasn't changed", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
logger.V(5).Info("node was updated and the pod's NodeAffinity changed to matched", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// PreFilter builds and writes cycle state used by Filter.
|
||||
|
@@ -1297,6 +1297,13 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) {
|
||||
newObj: st.MakeNode().Label("foo", "bar").Obj(),
|
||||
expectedHint: framework.Queue,
|
||||
},
|
||||
"skip-unrelated-change-that-keeps-pod-schedulable": {
|
||||
args: &config.NodeAffinityArgs{},
|
||||
pod: podWithNodeAffinity.Obj(),
|
||||
oldObj: st.MakeNode().Label("foo", "bar").Obj(),
|
||||
newObj: st.MakeNode().Capacity(nil).Label("foo", "bar").Obj(),
|
||||
expectedHint: framework.QueueSkip,
|
||||
},
|
||||
"skip-queue-on-add-scheduler-enforced-node-affinity": {
|
||||
args: &config.NodeAffinityArgs{
|
||||
AddedAffinity: &v1.NodeAffinity{
|
||||
|
@@ -146,7 +146,7 @@ const (
|
||||
|
||||
type ClusterEventWithHint struct {
|
||||
Event ClusterEvent
|
||||
// QueueingHintFn is executed for the plugin rejected by this plugin when the above Event happens,
|
||||
// QueueingHintFn is executed for the Pod rejected by this plugin when the above Event happens,
|
||||
// and filters out events to reduce useless retry of Pod's scheduling.
|
||||
// It's an optional field. If not set,
|
||||
// the scheduling of Pods will be always retried with backoff when this Event happens.
|
||||
|
@@ -271,7 +271,48 @@ func TestCoreResourceEnqueue(t *testing.T) {
|
||||
// It causes pod1 to be requeued.
|
||||
// It causes pod2 not to be requeued.
|
||||
if _, err := testCtx.ClientSet.CoreV1().Nodes().Update(testCtx.Ctx, st.MakeNode().Name("fake-node1").Label("group", "b").Obj(), metav1.UpdateOptions{}); err != nil {
|
||||
return fmt.Errorf("failed to remove taints off the node: %w", err)
|
||||
return fmt.Errorf("failed to update the pod: %w", err)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
wantRequeuedPods: sets.New("pod1"),
|
||||
},
|
||||
{
|
||||
name: "Pod rejected by the NodeAffinity plugin is not requeued when an updated Node haven't changed the 'match' verdict",
|
||||
initialNodes: []*v1.Node{
|
||||
st.MakeNode().Name("node1").Label("group", "a").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj(),
|
||||
st.MakeNode().Name("node2").Label("group", "b").Obj()},
|
||||
pods: []*v1.Pod{
|
||||
// - The initial pod would be accepted by the NodeAffinity plugin for node1, but will be blocked by the NodeResources plugin.
|
||||
// - The pod will be blocked by the NodeAffinity plugin for node2, therefore we know NodeAffinity will be queried for qhint for both testing nodes.
|
||||
st.MakePod().Name("pod1").NodeAffinityIn("group", []string{"a"}, st.NodeSelectorTypeMatchExpressions).Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Container("image").Obj(),
|
||||
},
|
||||
triggerFn: func(testCtx *testutils.TestContext) error {
|
||||
// Trigger a NodeUpdate event to add new label.
|
||||
// It won't cause pod to be requeued, because there was a match already before the update, meaning this plugin wasn't blocking the scheduling.
|
||||
if _, err := testCtx.ClientSet.CoreV1().Nodes().Update(testCtx.Ctx, st.MakeNode().Name("node1").Label("group", "a").Label("node", "fake-node").Obj(), metav1.UpdateOptions{}); err != nil {
|
||||
return fmt.Errorf("failed to update the pod: %w", err)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
wantRequeuedPods: sets.Set[string]{},
|
||||
enableSchedulingQueueHint: []bool{true},
|
||||
},
|
||||
{
|
||||
name: "Pod rejected by the NodeAffinity plugin is requeued when a Node is added",
|
||||
initialNodes: []*v1.Node{st.MakeNode().Name("fake-node1").Label("group", "a").Obj()},
|
||||
pods: []*v1.Pod{
|
||||
// - Pod1 will be rejected by the NodeAffinity plugin.
|
||||
st.MakePod().Name("pod1").NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(),
|
||||
// - Pod2 will be rejected by the NodeAffinity plugin.
|
||||
st.MakePod().Name("pod2").NodeAffinityIn("group", []string{"c"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(),
|
||||
},
|
||||
triggerFn: func(testCtx *testutils.TestContext) error {
|
||||
// Trigger a NodeAdd event with the awaited label.
|
||||
// It causes pod1 to be requeued.
|
||||
// It causes pod2 not to be requeued.
|
||||
if _, err := testCtx.ClientSet.CoreV1().Nodes().Create(testCtx.Ctx, st.MakeNode().Name("fake-node2").Label("group", "b").Obj(), metav1.CreateOptions{}); err != nil {
|
||||
return fmt.Errorf("failed to update the pod: %w", err)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
|
Reference in New Issue
Block a user