diff --git a/pkg/scheduler/algorithm/predicates/metadata.go b/pkg/scheduler/algorithm/predicates/metadata.go index d2f7abf2727..0a6ba5203f1 100644 --- a/pkg/scheduler/algorithm/predicates/metadata.go +++ b/pkg/scheduler/algorithm/predicates/metadata.go @@ -338,13 +338,20 @@ func GetPredicateMetadata(pod *v1.Pod, sharedLister schedulerlisters.SharedListe } var allNodes []*schedulernodeinfo.NodeInfo + var havePodsWithAffinityNodes []*schedulernodeinfo.NodeInfo if sharedLister != nil { - n, err := sharedLister.NodeInfos().List() + var err error + allNodes, err = sharedLister.NodeInfos().List() if err != nil { klog.Errorf("failed to list NodeInfos: %v", err) return nil } - allNodes = n + havePodsWithAffinityNodes, err = sharedLister.NodeInfos().HavePodsWithAffinityList() + if err != nil { + klog.Errorf("failed to list NodeInfos: %v", err) + return nil + } + } // evenPodsSpreadMetadata represents how existing pods match "pod" @@ -355,7 +362,7 @@ func GetPredicateMetadata(pod *v1.Pod, sharedLister schedulerlisters.SharedListe return nil } - podAffinityMetadata, err := getPodAffinityMetadata(pod, allNodes) + podAffinityMetadata, err := getPodAffinityMetadata(pod, allNodes, havePodsWithAffinityNodes) if err != nil { klog.Errorf("Error calculating podAffinityMetadata: %v", err) return nil @@ -387,9 +394,9 @@ func getPodFitsResourcesMetedata(pod *v1.Pod) *podFitsResourcesMetadata { } } -func getPodAffinityMetadata(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo) (*podAffinityMetadata, error) { +func getPodAffinityMetadata(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo, havePodsWithAffinityNodes []*schedulernodeinfo.NodeInfo) (*podAffinityMetadata, error) { // existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity - existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, allNodes) + existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, havePodsWithAffinityNodes) if err != nil { return nil, err } @@ -759,7 +766,9 @@ func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, allNodes []*schedulernode errCh.SendErrorWithCancel(err, cancel) return } - appendTopologyPairsMaps(existingPodTopologyMaps) + if existingPodTopologyMaps != nil { + appendTopologyPairsMaps(existingPodTopologyMaps) + } } } workqueue.ParallelizeUntil(ctx, 16, len(allNodes), processNode) diff --git a/pkg/scheduler/algorithm/priorities/interpod_affinity.go b/pkg/scheduler/algorithm/priorities/interpod_affinity.go index ea689103b01..75188163fb3 100644 --- a/pkg/scheduler/algorithm/priorities/interpod_affinity.go +++ b/pkg/scheduler/algorithm/priorities/interpod_affinity.go @@ -106,10 +106,17 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *v1.Pod, shar // pm stores (1) all nodes that should be considered and (2) the so-far computed score for each node. pm := newPodAffinityPriorityMap(nodes) - allNodes, err := sharedLister.NodeInfos().List() + + allNodes, err := sharedLister.NodeInfos().HavePodsWithAffinityList() if err != nil { return nil, err } + if hasAffinityConstraints || hasAntiAffinityConstraints { + allNodes, err = sharedLister.NodeInfos().List() + if err != nil { + return nil, err + } + } // convert the topology key based weights to the node name based weights var maxCount, minCount int64 diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index b266e8a1ceb..283b931f33f 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -179,7 +179,6 @@ func (g *genericScheduler) snapshot() error { // for cluster autoscaler integration. func (g *genericScheduler) PredicateMetadataProducer() predicates.PredicateMetadataProducer { return g.predicateMetaProducer - } // Schedule tries to schedule the given pod to one of the nodes in the node list. diff --git a/pkg/scheduler/internal/cache/cache.go b/pkg/scheduler/internal/cache/cache.go index 09877d50e85..d740c0625d9 100644 --- a/pkg/scheduler/internal/cache/cache.go +++ b/pkg/scheduler/internal/cache/cache.go @@ -239,10 +239,14 @@ func (cache *schedulerCache) UpdateNodeInfoSnapshot(nodeSnapshot *nodeinfosnapsh // Take a snapshot of the nodes order in the tree nodeSnapshot.NodeInfoList = make([]*schedulernodeinfo.NodeInfo, 0, cache.nodeTree.numNodes) + nodeSnapshot.HavePodsWithAffinityNodeInfoList = make([]*schedulernodeinfo.NodeInfo, 0, cache.nodeTree.numNodes) for i := 0; i < cache.nodeTree.numNodes; i++ { nodeName := cache.nodeTree.next() if n := nodeSnapshot.NodeInfoMap[nodeName]; n != nil { nodeSnapshot.NodeInfoList = append(nodeSnapshot.NodeInfoList, n) + if len(n.PodsWithAffinity()) > 0 { + nodeSnapshot.HavePodsWithAffinityNodeInfoList = append(nodeSnapshot.HavePodsWithAffinityNodeInfoList, n) + } } else { klog.Errorf("node %q exist in nodeTree but not in NodeInfoMap, this should not happen.", nodeName) } diff --git a/pkg/scheduler/listers/fake/listers.go b/pkg/scheduler/listers/fake/listers.go index b09717432b0..172870b3b97 100644 --- a/pkg/scheduler/listers/fake/listers.go +++ b/pkg/scheduler/listers/fake/listers.go @@ -262,6 +262,12 @@ func (nodes NodeInfoLister) List() ([]*schedulernodeinfo.NodeInfo, error) { return nodes, nil } +// HavePodsWithAffinityList is supposed to list nodes with at least one pod with affinity. For the fake lister +// we just return everything. +func (nodes NodeInfoLister) HavePodsWithAffinityList() ([]*schedulernodeinfo.NodeInfo, error) { + return nodes, nil +} + // NewNodeInfoLister create a new fake NodeInfoLister from a slice of v1.Nodes. func NewNodeInfoLister(nodes []*v1.Node) schedulerlisters.NodeInfoLister { nodeInfoList := make([]*schedulernodeinfo.NodeInfo, len(nodes)) diff --git a/pkg/scheduler/listers/listers.go b/pkg/scheduler/listers/listers.go index a760402a109..984493f0e17 100644 --- a/pkg/scheduler/listers/listers.go +++ b/pkg/scheduler/listers/listers.go @@ -38,6 +38,8 @@ type PodLister interface { type NodeInfoLister interface { // Returns the list of NodeInfos. List() ([]*schedulernodeinfo.NodeInfo, error) + // Returns the list of NodeInfos of nodes with pods with affinity terms. + HavePodsWithAffinityList() ([]*schedulernodeinfo.NodeInfo, error) // Returns the NodeInfo of the given node name. Get(nodeName string) (*schedulernodeinfo.NodeInfo, error) } diff --git a/pkg/scheduler/nodeinfo/snapshot/snapshot.go b/pkg/scheduler/nodeinfo/snapshot/snapshot.go index 4b027383fc9..68e9cf82f29 100644 --- a/pkg/scheduler/nodeinfo/snapshot/snapshot.go +++ b/pkg/scheduler/nodeinfo/snapshot/snapshot.go @@ -32,7 +32,9 @@ type Snapshot struct { NodeInfoMap map[string]*schedulernodeinfo.NodeInfo // NodeInfoList is the list of nodes as ordered in the cache's nodeTree. NodeInfoList []*schedulernodeinfo.NodeInfo - Generation int64 + // HavePodsWithAffinityNodeInfoList is the list of nodes with at least one pod declaring affinity terms. + HavePodsWithAffinityNodeInfoList []*schedulernodeinfo.NodeInfo + Generation int64 } var _ schedulerlisters.SharedLister = &Snapshot{} @@ -48,13 +50,18 @@ func NewEmptySnapshot() *Snapshot { func NewSnapshot(pods []*v1.Pod, nodes []*v1.Node) *Snapshot { nodeInfoMap := schedulernodeinfo.CreateNodeNameToInfoMap(pods, nodes) nodeInfoList := make([]*schedulernodeinfo.NodeInfo, 0, len(nodes)) + havePodsWithAffinityNodeInfoList := make([]*schedulernodeinfo.NodeInfo, 0, len(nodes)) for _, v := range nodeInfoMap { nodeInfoList = append(nodeInfoList, v) + if len(v.PodsWithAffinity()) > 0 { + havePodsWithAffinityNodeInfoList = append(havePodsWithAffinityNodeInfoList, v) + } } s := NewEmptySnapshot() s.NodeInfoMap = nodeInfoMap s.NodeInfoList = nodeInfoList + s.HavePodsWithAffinityNodeInfoList = havePodsWithAffinityNodeInfoList return s } @@ -119,6 +126,11 @@ func (n *nodeInfoLister) List() ([]*schedulernodeinfo.NodeInfo, error) { return n.snapshot.NodeInfoList, nil } +// HavePodsWithAffinityList returns the list of nodes with at least one pods with inter-pod affinity +func (n *nodeInfoLister) HavePodsWithAffinityList() ([]*schedulernodeinfo.NodeInfo, error) { + return n.snapshot.HavePodsWithAffinityNodeInfoList, nil +} + // Returns the NodeInfo of the given node name. func (n *nodeInfoLister) Get(nodeName string) (*schedulernodeinfo.NodeInfo, error) { if v, ok := n.snapshot.NodeInfoMap[nodeName]; ok {