diff --git a/pkg/scheduler/factory/factory.go b/pkg/scheduler/factory/factory.go index 0c7dee77134..fb5cc22fcbe 100644 --- a/pkg/scheduler/factory/factory.go +++ b/pkg/scheduler/factory/factory.go @@ -634,6 +634,9 @@ func (c *configFactory) updatePodInCache(oldObj, newObj interface{}) { return } + // NOTE: Because the scheduler uses snapshots of schedulerCache and the live + // version of equivalencePodCache, updates must be written to schedulerCache + // before invalidating equivalencePodCache. if err := c.schedulerCache.UpdatePod(oldPod, newPod); err != nil { glog.Errorf("scheduler cache UpdatePod failed: %v", err) } @@ -720,6 +723,9 @@ func (c *configFactory) deletePodFromCache(obj interface{}) { glog.Errorf("cannot convert to *v1.Pod: %v", t) return } + // NOTE: Because the scheduler uses snapshots of schedulerCache and the live + // version of equivalencePodCache, updates must be written to schedulerCache + // before invalidating equivalencePodCache. if err := c.schedulerCache.RemovePod(pod); err != nil { glog.Errorf("scheduler cache RemovePod failed: %v", err) } @@ -776,6 +782,9 @@ func (c *configFactory) updateNodeInCache(oldObj, newObj interface{}) { return } + // NOTE: Because the scheduler uses snapshots of schedulerCache and the live + // version of equivalencePodCache, updates must be written to schedulerCache + // before invalidating equivalencePodCache. if err := c.schedulerCache.UpdateNode(oldNode, newNode); err != nil { glog.Errorf("scheduler cache UpdateNode failed: %v", err) } @@ -869,6 +878,9 @@ func (c *configFactory) deleteNodeFromCache(obj interface{}) { glog.Errorf("cannot convert to *v1.Node: %v", t) return } + // NOTE: Because the scheduler uses snapshots of schedulerCache and the live + // version of equivalencePodCache, updates must be written to schedulerCache + // before invalidating equivalencePodCache. if err := c.schedulerCache.RemoveNode(node); err != nil { glog.Errorf("scheduler cache RemoveNode failed: %v", err) } @@ -1297,6 +1309,9 @@ func (c *configFactory) MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue _, err := c.client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) if err != nil && errors.IsNotFound(err) { node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}} + // NOTE: Because the scheduler uses snapshots of schedulerCache and the live + // version of equivalencePodCache, updates must be written to schedulerCache + // before invalidating equivalencePodCache. c.schedulerCache.RemoveNode(&node) // invalidate cached predicate for the node if c.enableEquivalenceClassCache { diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index a3109599911..98beabe5c57 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -373,6 +373,9 @@ func (sched *Scheduler) assume(assumed *v1.Pod, host string) error { // If the binding fails, scheduler will release resources allocated to assumed pod // immediately. assumed.Spec.NodeName = host + // NOTE: Because the scheduler uses snapshots of SchedulerCache and the live + // version of Ecache, updates must be written to SchedulerCache before + // invalidating Ecache. if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil { glog.Errorf("scheduler cache AssumePod failed: %v", err)