mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Merge pull request #59245 from resouer/equiv-node
Automatic merge from submit-queue (batch tested with PRs 59394, 58769, 59423, 59363, 59245). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Ensure euqiv hash calculation is per schedule **What this PR does / why we need it**: Currently, equiv hash is calculated per schedule, but also, per node. This is a potential cause of dragging integration test, see #58881 We should ensure this only happens once during scheduling of specific pod no matter how many nodes we have. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes #58989 **Special notes for your reviewer**: **Release note**: ```release-note Ensure euqiv hash calculation is per schedule ```
This commit is contained in:
commit
7223729d51
@ -208,15 +208,22 @@ func (ec *EquivalenceCache) InvalidateCachedPredicateItemForPodAdd(pod *v1.Pod,
|
||||
ec.InvalidateCachedPredicateItem(nodeName, invalidPredicates)
|
||||
}
|
||||
|
||||
// getHashEquivalencePod returns the hash of equivalence pod.
|
||||
// 1. equivalenceHash
|
||||
// 2. if equivalence hash is valid
|
||||
func (ec *EquivalenceCache) getHashEquivalencePod(pod *v1.Pod) (uint64, bool) {
|
||||
// equivalenceClassInfo holds equivalence hash which is used for checking equivalence cache.
|
||||
// We will pass this to podFitsOnNode to ensure equivalence hash is only calculated per schedule.
|
||||
type equivalenceClassInfo struct {
|
||||
// Equivalence hash.
|
||||
hash uint64
|
||||
}
|
||||
|
||||
// getEquivalenceClassInfo returns the equivalence class of given pod.
|
||||
func (ec *EquivalenceCache) getEquivalenceClassInfo(pod *v1.Pod) *equivalenceClassInfo {
|
||||
equivalencePod := ec.getEquivalencePod(pod)
|
||||
if equivalencePod != nil {
|
||||
hash := fnv.New32a()
|
||||
hashutil.DeepHashObject(hash, equivalencePod)
|
||||
return uint64(hash.Sum32()), true
|
||||
return &equivalenceClassInfo{
|
||||
hash: uint64(hash.Sum32()),
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
return nil
|
||||
}
|
||||
|
@ -487,19 +487,22 @@ func TestGetHashEquivalencePod(t *testing.T) {
|
||||
for _, test := range tests {
|
||||
for i, podInfo := range test.podInfoList {
|
||||
testPod := podInfo.pod
|
||||
hash, isValid := ecache.getHashEquivalencePod(testPod)
|
||||
if isValid != podInfo.hashIsValid {
|
||||
eclassInfo := ecache.getEquivalenceClassInfo(testPod)
|
||||
if eclassInfo == nil && podInfo.hashIsValid {
|
||||
t.Errorf("Failed: pod %v is expected to have valid hash", testPod)
|
||||
}
|
||||
// NOTE(harry): the first element will be used as target so
|
||||
// this logic can't verify more than two inequivalent pods
|
||||
if i == 0 {
|
||||
targetHash = hash
|
||||
targetPodInfo = podInfo
|
||||
} else {
|
||||
if targetHash != hash {
|
||||
if test.isEquivalent {
|
||||
t.Errorf("Failed: pod: %v is expected to be equivalent to: %v", testPod, targetPodInfo.pod)
|
||||
|
||||
if eclassInfo != nil {
|
||||
// NOTE(harry): the first element will be used as target so
|
||||
// this logic can't verify more than two inequivalent pods
|
||||
if i == 0 {
|
||||
targetHash = eclassInfo.hash
|
||||
targetPodInfo = podInfo
|
||||
} else {
|
||||
if targetHash != eclassInfo.hash {
|
||||
if test.isEquivalent {
|
||||
t.Errorf("Failed: pod: %v is expected to be equivalent to: %v", testPod, targetPodInfo.pod)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -312,9 +312,25 @@ func findNodesThatFit(
|
||||
|
||||
// We can use the same metadata producer for all nodes.
|
||||
meta := metadataProducer(pod, nodeNameToInfo)
|
||||
|
||||
var equivCacheInfo *equivalenceClassInfo
|
||||
if ecache != nil {
|
||||
// getEquivalenceClassInfo will return immediately if no equivalence pod found
|
||||
equivCacheInfo = ecache.getEquivalenceClassInfo(pod)
|
||||
}
|
||||
|
||||
checkNode := func(i int) {
|
||||
nodeName := nodes[i].Name
|
||||
fits, failedPredicates, err := podFitsOnNode(pod, meta, nodeNameToInfo[nodeName], predicateFuncs, ecache, schedulingQueue, alwaysCheckAllPredicates)
|
||||
fits, failedPredicates, err := podFitsOnNode(
|
||||
pod,
|
||||
meta,
|
||||
nodeNameToInfo[nodeName],
|
||||
predicateFuncs,
|
||||
ecache,
|
||||
schedulingQueue,
|
||||
alwaysCheckAllPredicates,
|
||||
equivCacheInfo,
|
||||
)
|
||||
if err != nil {
|
||||
predicateResultLock.Lock()
|
||||
errs[err.Error()]++
|
||||
@ -389,6 +405,8 @@ func addNominatedPods(podPriority int32, meta algorithm.PredicateMetadata,
|
||||
}
|
||||
|
||||
// podFitsOnNode checks whether a node given by NodeInfo satisfies the given predicate functions.
|
||||
// For given pod, podFitsOnNode will check if any equivalent pod exists and try to reuse its cached
|
||||
// predicate results as possible.
|
||||
// This function is called from two different places: Schedule and Preempt.
|
||||
// When it is called from Schedule, we want to test whether the pod is schedulable
|
||||
// on the node with all the existing pods on the node plus higher and equal priority
|
||||
@ -404,11 +422,11 @@ func podFitsOnNode(
|
||||
ecache *EquivalenceCache,
|
||||
queue SchedulingQueue,
|
||||
alwaysCheckAllPredicates bool,
|
||||
equivCacheInfo *equivalenceClassInfo,
|
||||
) (bool, []algorithm.PredicateFailureReason, error) {
|
||||
var (
|
||||
equivalenceHash uint64
|
||||
failedPredicates []algorithm.PredicateFailureReason
|
||||
eCacheAvailable bool
|
||||
failedPredicates []algorithm.PredicateFailureReason
|
||||
invalid bool
|
||||
fit bool
|
||||
reasons []algorithm.PredicateFailureReason
|
||||
@ -416,10 +434,6 @@ func podFitsOnNode(
|
||||
)
|
||||
predicateResults := make(map[string]HostPredicate)
|
||||
|
||||
if ecache != nil {
|
||||
// getHashEquivalencePod will return immediately if no equivalence pod found
|
||||
equivalenceHash, eCacheAvailable = ecache.getHashEquivalencePod(pod)
|
||||
}
|
||||
podsAdded := false
|
||||
// We run predicates twice in some cases. If the node has greater or equal priority
|
||||
// nominated pods, we run them when those pods are added to meta and nodeInfo.
|
||||
@ -450,13 +464,13 @@ func podFitsOnNode(
|
||||
// Bypass eCache if node has any nominated pods.
|
||||
// TODO(bsalamat): consider using eCache and adding proper eCache invalidations
|
||||
// when pods are nominated or their nominations change.
|
||||
eCacheAvailable = eCacheAvailable && !podsAdded
|
||||
eCacheAvailable = equivCacheInfo != nil && !podsAdded
|
||||
for _, predicateKey := range predicates.PredicatesOrdering() {
|
||||
//TODO (yastij) : compute average predicate restrictiveness to export it as promethus metric
|
||||
//TODO (yastij) : compute average predicate restrictiveness to export it as Prometheus metric
|
||||
if predicate, exist := predicateFuncs[predicateKey]; exist {
|
||||
if eCacheAvailable {
|
||||
// PredicateWithECache will return its cached predicate results.
|
||||
fit, reasons, invalid = ecache.PredicateWithECache(pod.GetName(), info.Node().GetName(), predicateKey, equivalenceHash)
|
||||
fit, reasons, invalid = ecache.PredicateWithECache(pod.GetName(), info.Node().GetName(), predicateKey, equivCacheInfo.hash)
|
||||
}
|
||||
|
||||
if !eCacheAvailable || invalid {
|
||||
@ -498,7 +512,7 @@ func podFitsOnNode(
|
||||
for predKey, result := range predicateResults {
|
||||
// update equivalence cache with newly computed fit & reasons
|
||||
// TODO(resouer) should we do this in another thread? any race?
|
||||
ecache.UpdateCachedPredicateItem(pod.GetName(), nodeName, predKey, result.Fit, result.FailReasons, equivalenceHash)
|
||||
ecache.UpdateCachedPredicateItem(pod.GetName(), nodeName, predKey, result.Fit, result.FailReasons, equivCacheInfo.hash)
|
||||
}
|
||||
}
|
||||
return len(failedPredicates) == 0, failedPredicates, nil
|
||||
@ -922,7 +936,7 @@ func selectVictimsOnNode(
|
||||
// that we should check is if the "pod" is failing to schedule due to pod affinity
|
||||
// failure.
|
||||
// TODO(bsalamat): Consider checking affinity to lower priority pods if feasible with reasonable performance.
|
||||
if fits, _, err := podFitsOnNode(pod, meta, nodeInfoCopy, fitPredicates, nil, queue, false); !fits {
|
||||
if fits, _, err := podFitsOnNode(pod, meta, nodeInfoCopy, fitPredicates, nil, queue, false, nil); !fits {
|
||||
if err != nil {
|
||||
glog.Warningf("Encountered error while selecting victims on node %v: %v", nodeInfo.Node().Name, err)
|
||||
}
|
||||
@ -936,7 +950,7 @@ func selectVictimsOnNode(
|
||||
violatingVictims, nonViolatingVictims := filterPodsWithPDBViolation(potentialVictims.Items, pdbs)
|
||||
reprievePod := func(p *v1.Pod) bool {
|
||||
addPod(p)
|
||||
fits, _, _ := podFitsOnNode(pod, meta, nodeInfoCopy, fitPredicates, nil, queue, false)
|
||||
fits, _, _ := podFitsOnNode(pod, meta, nodeInfoCopy, fitPredicates, nil, queue, false, nil)
|
||||
if !fits {
|
||||
removePod(p)
|
||||
victims = append(victims, p)
|
||||
|
Loading…
Reference in New Issue
Block a user