mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-03 17:30:00 +00:00
Merge pull request #52146 from resouer/eclass-fix
Automatic merge from submit-queue Note equivalence class for dev and other fix **What this PR does / why we need it**: 1. Add a note for predicate developers to respect equivalence class design 2. Add comments and re-ordered the related data structure, ref https://github.com/kubernetes/community/pull/1031 3. Fix some nits (typo, code length etc) **Special notes for your reviewer**: **Release note**: ```release-note Scheduler predicate developer should respect equivalence class cache ```
This commit is contained in:
commit
0ae98b6ffe
@ -49,6 +49,14 @@ const (
|
|||||||
MatchInterPodAffinity = "MatchInterPodAffinity"
|
MatchInterPodAffinity = "MatchInterPodAffinity"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// IMPORTANT NOTE for predicate developers:
|
||||||
|
// We are using cached predicate result for pods belonging to the same equivalence class.
|
||||||
|
// So when updating a existing predicate, you should consider whether your change will introduce new
|
||||||
|
// dependency to attributes of any API object like Pod, Node, Service etc.
|
||||||
|
// If yes, you are expected to invalidate the cached predicate result for related API object change.
|
||||||
|
// For example:
|
||||||
|
// https://github.com/kubernetes/kubernetes/blob/36a218e/plugin/pkg/scheduler/factory/factory.go#L422
|
||||||
|
|
||||||
// NodeInfo: Other types for predicate functions...
|
// NodeInfo: Other types for predicate functions...
|
||||||
type NodeInfo interface {
|
type NodeInfo interface {
|
||||||
GetNodeInfo(nodeID string) (*v1.Node, error)
|
GetNodeInfo(nodeID string) (*v1.Node, error)
|
||||||
|
@ -67,6 +67,14 @@ func init() {
|
|||||||
factory.RegisterAlgorithmProvider(ClusterAutoscalerProvider, defaultPredicates(),
|
factory.RegisterAlgorithmProvider(ClusterAutoscalerProvider, defaultPredicates(),
|
||||||
copyAndReplace(defaultPriorities(), "LeastRequestedPriority", "MostRequestedPriority"))
|
copyAndReplace(defaultPriorities(), "LeastRequestedPriority", "MostRequestedPriority"))
|
||||||
|
|
||||||
|
// IMPORTANT NOTES for predicate developers:
|
||||||
|
// We are using cached predicate result for pods belonging to the same equivalence class.
|
||||||
|
// So when implementing a new predicate, you are expected to check whether the result
|
||||||
|
// of your predicate function can be affected by related API object change (ADD/DELETE/UPDATE).
|
||||||
|
// If yes, you are expected to invalidate the cached predicate result for related API object change.
|
||||||
|
// For example:
|
||||||
|
// https://github.com/kubernetes/kubernetes/blob/36a218e/plugin/pkg/scheduler/factory/factory.go#L422
|
||||||
|
|
||||||
// Registers predicates and priorities that are not enabled by default, but user can pick when creating his
|
// Registers predicates and priorities that are not enabled by default, but user can pick when creating his
|
||||||
// own set of priorities/predicates.
|
// own set of priorities/predicates.
|
||||||
|
|
||||||
|
@ -29,35 +29,39 @@ import (
|
|||||||
"github.com/golang/groupcache/lru"
|
"github.com/golang/groupcache/lru"
|
||||||
)
|
)
|
||||||
|
|
||||||
// we use predicate names as cache's key, its count is limited
|
// We use predicate names as cache's key, its count is limited
|
||||||
const maxCacheEntries = 100
|
const maxCacheEntries = 100
|
||||||
|
|
||||||
|
// EquivalenceCache holds:
|
||||||
|
// 1. a map of AlgorithmCache with node name as key
|
||||||
|
// 2. function to get equivalence pod
|
||||||
|
type EquivalenceCache struct {
|
||||||
|
sync.RWMutex
|
||||||
|
getEquivalencePod algorithm.GetEquivalencePodFunc
|
||||||
|
algorithmCache map[string]AlgorithmCache
|
||||||
|
}
|
||||||
|
|
||||||
|
// The AlgorithmCache stores PredicateMap with predicate name as key
|
||||||
|
type AlgorithmCache struct {
|
||||||
|
// Only consider predicates for now
|
||||||
|
predicatesCache *lru.Cache
|
||||||
|
}
|
||||||
|
|
||||||
|
// PredicateMap stores HostPrediacte with equivalence hash as key
|
||||||
|
type PredicateMap map[uint64]HostPredicate
|
||||||
|
|
||||||
|
// HostPredicate is the cached predicate result
|
||||||
type HostPredicate struct {
|
type HostPredicate struct {
|
||||||
Fit bool
|
Fit bool
|
||||||
FailReasons []algorithm.PredicateFailureReason
|
FailReasons []algorithm.PredicateFailureReason
|
||||||
}
|
}
|
||||||
|
|
||||||
type AlgorithmCache struct {
|
|
||||||
// Only consider predicates for now, priorities rely on: #31606
|
|
||||||
predicatesCache *lru.Cache
|
|
||||||
}
|
|
||||||
|
|
||||||
// PredicateMap use equivalence hash as key
|
|
||||||
type PredicateMap map[uint64]HostPredicate
|
|
||||||
|
|
||||||
func newAlgorithmCache() AlgorithmCache {
|
func newAlgorithmCache() AlgorithmCache {
|
||||||
return AlgorithmCache{
|
return AlgorithmCache{
|
||||||
predicatesCache: lru.New(maxCacheEntries),
|
predicatesCache: lru.New(maxCacheEntries),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// EquivalenceCache stores a map of predicate cache with maxsize
|
|
||||||
type EquivalenceCache struct {
|
|
||||||
sync.RWMutex
|
|
||||||
getEquivalencePod algorithm.GetEquivalencePodFunc
|
|
||||||
algorithmCache map[string]AlgorithmCache
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewEquivalenceCache(getEquivalencePodFunc algorithm.GetEquivalencePodFunc) *EquivalenceCache {
|
func NewEquivalenceCache(getEquivalencePodFunc algorithm.GetEquivalencePodFunc) *EquivalenceCache {
|
||||||
return &EquivalenceCache{
|
return &EquivalenceCache{
|
||||||
getEquivalencePod: getEquivalencePodFunc,
|
getEquivalencePod: getEquivalencePodFunc,
|
||||||
@ -66,7 +70,12 @@ func NewEquivalenceCache(getEquivalencePodFunc algorithm.GetEquivalencePodFunc)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// UpdateCachedPredicateItem updates pod predicate for equivalence class
|
// UpdateCachedPredicateItem updates pod predicate for equivalence class
|
||||||
func (ec *EquivalenceCache) UpdateCachedPredicateItem(podName, nodeName, predicateKey string, fit bool, reasons []algorithm.PredicateFailureReason, equivalenceHash uint64) {
|
func (ec *EquivalenceCache) UpdateCachedPredicateItem(
|
||||||
|
podName, nodeName, predicateKey string,
|
||||||
|
fit bool,
|
||||||
|
reasons []algorithm.PredicateFailureReason,
|
||||||
|
equivalenceHash uint64,
|
||||||
|
) {
|
||||||
ec.Lock()
|
ec.Lock()
|
||||||
defer ec.Unlock()
|
defer ec.Unlock()
|
||||||
if _, exist := ec.algorithmCache[nodeName]; !exist {
|
if _, exist := ec.algorithmCache[nodeName]; !exist {
|
||||||
@ -95,10 +104,14 @@ func (ec *EquivalenceCache) UpdateCachedPredicateItem(podName, nodeName, predica
|
|||||||
// 2. reasons if not fit
|
// 2. reasons if not fit
|
||||||
// 3. if this cache is invalid
|
// 3. if this cache is invalid
|
||||||
// based on cached predicate results
|
// based on cached predicate results
|
||||||
func (ec *EquivalenceCache) PredicateWithECache(podName, nodeName, predicateKey string, equivalenceHash uint64) (bool, []algorithm.PredicateFailureReason, bool) {
|
func (ec *EquivalenceCache) PredicateWithECache(
|
||||||
|
podName, nodeName, predicateKey string,
|
||||||
|
equivalenceHash uint64,
|
||||||
|
) (bool, []algorithm.PredicateFailureReason, bool) {
|
||||||
ec.RLock()
|
ec.RLock()
|
||||||
defer ec.RUnlock()
|
defer ec.RUnlock()
|
||||||
glog.V(5).Infof("Begin to calculate predicate: %v for pod: %s on node: %s based on equivalence cache", predicateKey, podName, nodeName)
|
glog.V(5).Infof("Begin to calculate predicate: %v for pod: %s on node: %s based on equivalence cache",
|
||||||
|
predicateKey, podName, nodeName)
|
||||||
if algorithmCache, exist := ec.algorithmCache[nodeName]; exist {
|
if algorithmCache, exist := ec.algorithmCache[nodeName]; exist {
|
||||||
if cachePredicate, exist := algorithmCache.predicatesCache.Get(predicateKey); exist {
|
if cachePredicate, exist := algorithmCache.predicatesCache.Get(predicateKey); exist {
|
||||||
predicateMap := cachePredicate.(PredicateMap)
|
predicateMap := cachePredicate.(PredicateMap)
|
||||||
|
@ -223,7 +223,7 @@ func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, e
|
|||||||
return node.Name, err
|
return node.Name, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// assume signals to the cache that a pod is already in the cache, so that binding can be asnychronous.
|
// assume signals to the cache that a pod is already in the cache, so that binding can be asynchronous.
|
||||||
// assume modifies `assumed`.
|
// assume modifies `assumed`.
|
||||||
func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
|
func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
|
||||||
// Optimistically assume that the binding will succeed and send it to apiserver
|
// Optimistically assume that the binding will succeed and send it to apiserver
|
||||||
|
Loading…
Reference in New Issue
Block a user