From 71babd14964db86d485e8c42dcf96c51bf4878c1 Mon Sep 17 00:00:00 2001
From: Harry Zhang <harryz@hyper.sh>
Date: Wed, 6 Sep 2017 23:04:51 +0800
Subject: [PATCH] Note equivalence class for dev and other fix

---
 .../algorithm/predicates/predicates.go        |  8 +++
 .../algorithmprovider/defaults/defaults.go    |  8 +++
 .../pkg/scheduler/core/equivalence_cache.go   | 51 ++++++++++++-------
 plugin/pkg/scheduler/scheduler.go             |  2 +-
 4 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/plugin/pkg/scheduler/algorithm/predicates/predicates.go
index a2fef1bd0c5..5d349376a20 100644
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go
@@ -49,6 +49,14 @@ const (
 	MatchInterPodAffinity = "MatchInterPodAffinity"
 )
 
+// IMPORTANT NOTE for predicate developers:
+// We are using cached predicate result for pods belonging to the same equivalence class.
+// So when updating a existing predicate, you should consider whether your change will introduce new
+// dependency to attributes of any API object like Pod, Node, Service etc.
+// If yes, you are expected to invalidate the cached predicate result for related API object change.
+// For example:
+// https://github.com/kubernetes/kubernetes/blob/36a218e/plugin/pkg/scheduler/factory/factory.go#L422
+
 // NodeInfo: Other types for predicate functions...
 type NodeInfo interface {
 	GetNodeInfo(nodeID string) (*v1.Node, error)
diff --git a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
index 041f2a8307c..65998f854a2 100644
--- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
+++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
@@ -67,6 +67,14 @@ func init() {
 	factory.RegisterAlgorithmProvider(ClusterAutoscalerProvider, defaultPredicates(),
 		copyAndReplace(defaultPriorities(), "LeastRequestedPriority", "MostRequestedPriority"))
 
+	// IMPORTANT NOTES for predicate developers:
+	// We are using cached predicate result for pods belonging to the same equivalence class.
+	// So when implementing a new predicate, you are expected to check whether the result
+	// of your predicate function can be affected by related API object change (ADD/DELETE/UPDATE).
+	// If yes, you are expected to invalidate the cached predicate result for related API object change.
+	// For example:
+	// https://github.com/kubernetes/kubernetes/blob/36a218e/plugin/pkg/scheduler/factory/factory.go#L422
+
 	// Registers predicates and priorities that are not enabled by default, but user can pick when creating his
 	// own set of priorities/predicates.
 
diff --git a/plugin/pkg/scheduler/core/equivalence_cache.go b/plugin/pkg/scheduler/core/equivalence_cache.go
index d48f5a4ef83..9977fe18d30 100644
--- a/plugin/pkg/scheduler/core/equivalence_cache.go
+++ b/plugin/pkg/scheduler/core/equivalence_cache.go
@@ -29,35 +29,39 @@ import (
 	"github.com/golang/groupcache/lru"
 )
 
-// we use predicate names as cache's key, its count is limited
+// We use predicate names as cache's key, its count is limited
 const maxCacheEntries = 100
 
+// EquivalenceCache holds:
+// 1. a map of AlgorithmCache with node name as key
+// 2. function to get equivalence pod
+type EquivalenceCache struct {
+	sync.RWMutex
+	getEquivalencePod algorithm.GetEquivalencePodFunc
+	algorithmCache    map[string]AlgorithmCache
+}
+
+// The AlgorithmCache stores PredicateMap with predicate name as key
+type AlgorithmCache struct {
+	// Only consider predicates for now
+	predicatesCache *lru.Cache
+}
+
+// PredicateMap stores HostPrediacte with equivalence hash as key
+type PredicateMap map[uint64]HostPredicate
+
+// HostPredicate is the cached predicate result
 type HostPredicate struct {
 	Fit         bool
 	FailReasons []algorithm.PredicateFailureReason
 }
 
-type AlgorithmCache struct {
-	// Only consider predicates for now, priorities rely on: #31606
-	predicatesCache *lru.Cache
-}
-
-// PredicateMap use equivalence hash as key
-type PredicateMap map[uint64]HostPredicate
-
 func newAlgorithmCache() AlgorithmCache {
 	return AlgorithmCache{
 		predicatesCache: lru.New(maxCacheEntries),
 	}
 }
 
-// EquivalenceCache stores a map of predicate cache with maxsize
-type EquivalenceCache struct {
-	sync.RWMutex
-	getEquivalencePod algorithm.GetEquivalencePodFunc
-	algorithmCache    map[string]AlgorithmCache
-}
-
 func NewEquivalenceCache(getEquivalencePodFunc algorithm.GetEquivalencePodFunc) *EquivalenceCache {
 	return &EquivalenceCache{
 		getEquivalencePod: getEquivalencePodFunc,
@@ -66,7 +70,12 @@ func NewEquivalenceCache(getEquivalencePodFunc algorithm.GetEquivalencePodFunc)
 }
 
 // UpdateCachedPredicateItem updates pod predicate for equivalence class
-func (ec *EquivalenceCache) UpdateCachedPredicateItem(podName, nodeName, predicateKey string, fit bool, reasons []algorithm.PredicateFailureReason, equivalenceHash uint64) {
+func (ec *EquivalenceCache) UpdateCachedPredicateItem(
+	podName, nodeName, predicateKey string,
+	fit bool,
+	reasons []algorithm.PredicateFailureReason,
+	equivalenceHash uint64,
+) {
 	ec.Lock()
 	defer ec.Unlock()
 	if _, exist := ec.algorithmCache[nodeName]; !exist {
@@ -95,10 +104,14 @@ func (ec *EquivalenceCache) UpdateCachedPredicateItem(podName, nodeName, predica
 // 2. reasons if not fit
 // 3. if this cache is invalid
 // based on cached predicate results
-func (ec *EquivalenceCache) PredicateWithECache(podName, nodeName, predicateKey string, equivalenceHash uint64) (bool, []algorithm.PredicateFailureReason, bool) {
+func (ec *EquivalenceCache) PredicateWithECache(
+	podName, nodeName, predicateKey string,
+	equivalenceHash uint64,
+) (bool, []algorithm.PredicateFailureReason, bool) {
 	ec.RLock()
 	defer ec.RUnlock()
-	glog.V(5).Infof("Begin to calculate predicate: %v for pod: %s on node: %s based on equivalence cache", predicateKey, podName, nodeName)
+	glog.V(5).Infof("Begin to calculate predicate: %v for pod: %s on node: %s based on equivalence cache",
+		predicateKey, podName, nodeName)
 	if algorithmCache, exist := ec.algorithmCache[nodeName]; exist {
 		if cachePredicate, exist := algorithmCache.predicatesCache.Get(predicateKey); exist {
 			predicateMap := cachePredicate.(PredicateMap)
diff --git a/plugin/pkg/scheduler/scheduler.go b/plugin/pkg/scheduler/scheduler.go
index 8f1c8ea38ea..729ac23a6cc 100644
--- a/plugin/pkg/scheduler/scheduler.go
+++ b/plugin/pkg/scheduler/scheduler.go
@@ -223,7 +223,7 @@ func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, e
 	return node.Name, err
 }
 
-// assume signals to the cache that a pod is already in the cache, so that binding can be asnychronous.
+// assume signals to the cache that a pod is already in the cache, so that binding can be asynchronous.
 // assume modifies `assumed`.
 func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
 	// Optimistically assume that the binding will succeed and send it to apiserver