Merge pull request #52146 from resouer/eclass-fix

Automatic merge from submit-queue Note equivalence class for dev and other fix **What this PR does / why we need it**: 1. Add a note for predicate developers to respect equivalence class design 2. Add comments and re-ordered the related data structure, ref https://github.com/kubernetes/community/pull/1031 3. Fix some nits (typo, code length etc) **Special notes for your reviewer**: **Release note**: ```release-note Scheduler predicate developer should respect equivalence class cache ```
2025-07-23 19:56:01 +00:00 · 2017-09-12 04:36:10 -07:00 · 2017-09-12 04:36:10 -07:00 · 0ae98b6ffe
commit 0ae98b6ffe
parent 99b2ee1697 71babd1496
4 changed files with 49 additions and 20 deletions
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go
@ -49,6 +49,14 @@ const (
 	MatchInterPodAffinity = "MatchInterPodAffinity"
 )

+// IMPORTANT NOTE for predicate developers:
+// We are using cached predicate result for pods belonging to the same equivalence class.
+// So when updating a existing predicate, you should consider whether your change will introduce new
+// dependency to attributes of any API object like Pod, Node, Service etc.
+// If yes, you are expected to invalidate the cached predicate result for related API object change.
+// For example:
+// https://github.com/kubernetes/kubernetes/blob/36a218e/plugin/pkg/scheduler/factory/factory.go#L422
+
 // NodeInfo: Other types for predicate functions...
 type NodeInfo interface {
 	GetNodeInfo(nodeID string) (*v1.Node, error)
--- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
+++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
@ -67,6 +67,14 @@ func init() {
 	factory.RegisterAlgorithmProvider(ClusterAutoscalerProvider, defaultPredicates(),
 		copyAndReplace(defaultPriorities(), "LeastRequestedPriority", "MostRequestedPriority"))

+	// IMPORTANT NOTES for predicate developers:
+	// We are using cached predicate result for pods belonging to the same equivalence class.
+	// So when implementing a new predicate, you are expected to check whether the result
+	// of your predicate function can be affected by related API object change (ADD/DELETE/UPDATE).
+	// If yes, you are expected to invalidate the cached predicate result for related API object change.
+	// For example:
+	// https://github.com/kubernetes/kubernetes/blob/36a218e/plugin/pkg/scheduler/factory/factory.go#L422
+
 	// Registers predicates and priorities that are not enabled by default, but user can pick when creating his
 	// own set of priorities/predicates.

--- a/plugin/pkg/scheduler/core/equivalence_cache.go
+++ b/plugin/pkg/scheduler/core/equivalence_cache.go
@ -29,35 +29,39 @@ import (
 	"github.com/golang/groupcache/lru"
 )

-// we use predicate names as cache's key, its count is limited
+// We use predicate names as cache's key, its count is limited
 const maxCacheEntries = 100

+// EquivalenceCache holds:
+// 1. a map of AlgorithmCache with node name as key
+// 2. function to get equivalence pod
+type EquivalenceCache struct {
+	sync.RWMutex
+	getEquivalencePod algorithm.GetEquivalencePodFunc
+	algorithmCache    map[string]AlgorithmCache
+}
+
+// The AlgorithmCache stores PredicateMap with predicate name as key
+type AlgorithmCache struct {
+	// Only consider predicates for now
+	predicatesCache *lru.Cache
+}
+
+// PredicateMap stores HostPrediacte with equivalence hash as key
+type PredicateMap map[uint64]HostPredicate
+
+// HostPredicate is the cached predicate result
 type HostPredicate struct {
 	Fit         bool
 	FailReasons []algorithm.PredicateFailureReason
 }

-type AlgorithmCache struct {
-	// Only consider predicates for now, priorities rely on: #31606
-	predicatesCache *lru.Cache
-}
-
-// PredicateMap use equivalence hash as key
-type PredicateMap map[uint64]HostPredicate
-
 func newAlgorithmCache() AlgorithmCache {
 	return AlgorithmCache{
 		predicatesCache: lru.New(maxCacheEntries),
 	}
 }

-// EquivalenceCache stores a map of predicate cache with maxsize
-type EquivalenceCache struct {
-	sync.RWMutex
-	getEquivalencePod algorithm.GetEquivalencePodFunc
-	algorithmCache    map[string]AlgorithmCache
-}
-
 func NewEquivalenceCache(getEquivalencePodFunc algorithm.GetEquivalencePodFunc) *EquivalenceCache {
 	return &EquivalenceCache{
 		getEquivalencePod: getEquivalencePodFunc,
@ -66,7 +70,12 @@ func NewEquivalenceCache(getEquivalencePodFunc algorithm.GetEquivalencePodFunc)
 }

 // UpdateCachedPredicateItem updates pod predicate for equivalence class
-func (ec *EquivalenceCache) UpdateCachedPredicateItem(podName, nodeName, predicateKey string, fit bool, reasons []algorithm.PredicateFailureReason, equivalenceHash uint64) {
+func (ec *EquivalenceCache) UpdateCachedPredicateItem(
+	podName, nodeName, predicateKey string,
+	fit bool,
+	reasons []algorithm.PredicateFailureReason,
+	equivalenceHash uint64,
+) {
 	ec.Lock()
 	defer ec.Unlock()
 	if _, exist := ec.algorithmCache[nodeName]; !exist {
@ -95,10 +104,14 @@ func (ec *EquivalenceCache) UpdateCachedPredicateItem(podName, nodeName, predica
 // 2. reasons if not fit
 // 3. if this cache is invalid
 // based on cached predicate results
-func (ec *EquivalenceCache) PredicateWithECache(podName, nodeName, predicateKey string, equivalenceHash uint64) (bool, []algorithm.PredicateFailureReason, bool) {
+func (ec *EquivalenceCache) PredicateWithECache(
+	podName, nodeName, predicateKey string,
+	equivalenceHash uint64,
+) (bool, []algorithm.PredicateFailureReason, bool) {
 	ec.RLock()
 	defer ec.RUnlock()
-	glog.V(5).Infof("Begin to calculate predicate: %v for pod: %s on node: %s based on equivalence cache", predicateKey, podName, nodeName)
+	glog.V(5).Infof("Begin to calculate predicate: %v for pod: %s on node: %s based on equivalence cache",
+		predicateKey, podName, nodeName)
 	if algorithmCache, exist := ec.algorithmCache[nodeName]; exist {
 		if cachePredicate, exist := algorithmCache.predicatesCache.Get(predicateKey); exist {
 			predicateMap := cachePredicate.(PredicateMap)
--- a/plugin/pkg/scheduler/scheduler.go
+++ b/plugin/pkg/scheduler/scheduler.go
@ -223,7 +223,7 @@ func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, e
 	return node.Name, err
 }

-// assume signals to the cache that a pod is already in the cache, so that binding can be asnychronous.
+// assume signals to the cache that a pod is already in the cache, so that binding can be asynchronous.
 // assume modifies `assumed`.
 func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
 	// Optimistically assume that the binding will succeed and send it to apiserver