From 1b3d10aafa31e7d7400d62cdb20a3e03da32836c Mon Sep 17 00:00:00 2001 From: nayihz Date: Mon, 5 Feb 2024 16:37:04 +0800 Subject: [PATCH] fix: node added with matched pod anti-affinity topologyKey Co-authored-by: Kensei Nakada --- .../plugins/interpodaffinity/plugin.go | 27 +++++++++++++++---- .../plugins/interpodaffinity/plugin_test.go | 13 ++++++--- pkg/scheduler/framework/types.go | 2 +- test/integration/scheduler/queue_test.go | 6 ++--- 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go b/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go index e89afc0f7dc..738ba70aed4 100644 --- a/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go +++ b/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go @@ -166,6 +166,8 @@ func (pl *InterPodAffinity) isSchedulableAfterPodChange(logger klog.Logger, pod } // Pod is updated. Return Queue when the updated pod matching the target pod's affinity or not matching anti-affinity. + // Note that, we don't need to check each affinity individually when the Pod has more than one affinity + // because the current PodAffinity looks for a **single** existing pod that can satisfy **all** the terms of inter-pod affinity of an incoming pod. if modifiedPod != nil && originalPod != nil { if !podMatchesAllAffinityTerms(terms, originalPod) && podMatchesAllAffinityTerms(terms, modifiedPod) { logger.V(5).Info("a scheduled pod was updated to match the target pod's affinity, and the pod may be schedulable now", @@ -195,12 +197,12 @@ func (pl *InterPodAffinity) isSchedulableAfterPodChange(logger klog.Logger, pod } // Pod is deleted. Return Queue when the deleted pod matching the target pod's anti-affinity. - if podMatchesAllAffinityTerms(antiTerms, originalPod) { - logger.V(5).Info("a scheduled pod was deteled but it matches the target pod's anti-affinity", + if !podMatchesAllAffinityTerms(antiTerms, originalPod) { + logger.V(5).Info("a scheduled pod was deleted but it doesn't match the target pod's anti-affinity", "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod)) return framework.QueueSkip, nil } - logger.V(5).Info("a scheduled pod was deleted but it doesn't match the target pod's anti-affinity, and the pod may be schedulable now", + logger.V(5).Info("a scheduled pod was deleted and it matches the target pod's anti-affinity. The pod may be schedulable now", "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod)) return framework.Queue, nil } @@ -218,10 +220,25 @@ func (pl *InterPodAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod for _, term := range terms { if _, ok := modifiedNode.Labels[term.TopologyKey]; ok { - logger.V(5).Info("a node with topologyKey was added/updated and it may make pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) + logger.V(5).Info("a node with matched pod affinity topologyKey was added/updated and it may make pod schedulable", + "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) return framework.Queue, err } } - logger.V(5).Info("a node is added/updated but doesn't have any topologyKey of pod affinity", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) + + antiTerms, err := framework.GetAffinityTerms(pod, framework.GetPodAntiAffinityTerms(pod.Spec.Affinity)) + if err != nil { + return framework.Queue, err + } + + for _, term := range antiTerms { + if _, ok := modifiedNode.Labels[term.TopologyKey]; ok { + logger.V(5).Info("a node with matched pod anti-affinity topologyKey was added/updated and it may make pod schedulable", + "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) + return framework.Queue, err + } + } + logger.V(5).Info("a node is added/updated but doesn't have any topologyKey which matches pod affinity/anti-affinity", + "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) return framework.QueueSkip, nil } diff --git a/pkg/scheduler/framework/plugins/interpodaffinity/plugin_test.go b/pkg/scheduler/framework/plugins/interpodaffinity/plugin_test.go index d71fddc0dd2..67184dd8257 100644 --- a/pkg/scheduler/framework/plugins/interpodaffinity/plugin_test.go +++ b/pkg/scheduler/framework/plugins/interpodaffinity/plugin_test.go @@ -22,6 +22,7 @@ import ( "github.com/google/go-cmp/cmp" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2/ktesting" "k8s.io/kubernetes/pkg/scheduler/apis/config" "k8s.io/kubernetes/pkg/scheduler/framework" @@ -116,13 +117,13 @@ func Test_isSchedulableAfterPodChange(t *testing.T) { { name: "delete a pod which doesn't match pod's anti-affinity", pod: st.MakePod().Name("p").PodAntiAffinityIn("service", "region", []string{"securityscan", "value2"}, st.PodAntiAffinityWithRequiredReq).Obj(), - oldPod: st.MakePod().Node("fake-node").Label("service", "securityscan").Obj(), + oldPod: st.MakePod().Node("fake-node").Label("aaa", "a").Obj(), expectedHint: framework.QueueSkip, }, { name: "delete a pod which matches pod's anti-affinity", pod: st.MakePod().Name("p").PodAntiAffinityIn("service", "region", []string{"securityscan", "value2"}, st.PodAntiAffinityWithRequiredReq).Obj(), - oldPod: st.MakePod().Node("fake-node").Label("aaa", "a").Obj(), + oldPod: st.MakePod().Node("fake-node").Label("service", "securityscan").Obj(), expectedHint: framework.Queue, }, } @@ -154,11 +155,17 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) { expectedHint framework.QueueingHint }{ { - name: "add a new node with matched topologyKey", + name: "add a new node with matched pod affinity topologyKey", pod: st.MakePod().Name("p").PodAffinityIn("service", "zone", []string{"securityscan", "value2"}, st.PodAffinityWithRequiredReq).Obj(), newNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Obj(), expectedHint: framework.Queue, }, + { + name: "add a new node with matched pod anti-affinity topologyKey", + pod: st.MakePod().Name("p").PodAntiAffinity("zone", &metav1.LabelSelector{MatchLabels: map[string]string{"another": "label"}}, st.PodAntiAffinityWithRequiredReq).Obj(), + newNode: st.MakeNode().Name("node-a").Label("zone", "zone1").Obj(), + expectedHint: framework.Queue, + }, { name: "add a new node without matched topologyKey", pod: st.MakePod().Name("p").PodAffinityIn("service", "region", []string{"securityscan", "value2"}, st.PodAffinityWithRequiredReq).Obj(), diff --git a/pkg/scheduler/framework/types.go b/pkg/scheduler/framework/types.go index bdedd76f0c9..97b8010e0e5 100644 --- a/pkg/scheduler/framework/types.go +++ b/pkg/scheduler/framework/types.go @@ -433,7 +433,7 @@ func newAffinityTerm(pod *v1.Pod, term *v1.PodAffinityTerm) (*AffinityTerm, erro return &AffinityTerm{Namespaces: namespaces, Selector: selector, TopologyKey: term.TopologyKey, NamespaceSelector: nsSelector}, nil } -// getAffinityTerms receives a Pod and affinity terms and returns the namespaces and +// GetAffinityTerms receives a Pod and affinity terms and returns the namespaces and // selectors of the terms. func GetAffinityTerms(pod *v1.Pod, v1Terms []v1.PodAffinityTerm) ([]AffinityTerm, error) { if v1Terms == nil { diff --git a/test/integration/scheduler/queue_test.go b/test/integration/scheduler/queue_test.go index fee71583b3d..7ed347873d4 100644 --- a/test/integration/scheduler/queue_test.go +++ b/test/integration/scheduler/queue_test.go @@ -204,9 +204,9 @@ func TestCoreResourceEnqueue(t *testing.T) { // However, due to preCheck, it's not requeueing pod2 to activeQ. // It'll be fixed by the removal of preCheck in the future. // https://github.com/kubernetes/kubernetes/issues/110175 - node := st.MakeNode().Name("fake-node2").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Taints([]v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoSchedule}}).Obj() - if _, err := testCtx.ClientSet.CoreV1().Nodes().Create(testCtx.Ctx, st.MakeNode().Name("fake-node2").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Taints([]v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoSchedule}}).Obj(), metav1.CreateOptions{}); err != nil { - return fmt.Errorf("failed to create a newnode: %w", err) + node := st.MakeNode().Name("fake-node2").Label("node", "fake-node2").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Taints([]v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoSchedule}}).Obj() + if _, err := testCtx.ClientSet.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}); err != nil { + return fmt.Errorf("failed to create a new node: %w", err) } // As a mitigation of an issue described above, all plugins subscribing Node/Add event register UpdateNodeTaint too.