From b3f1e1200b47b1404f64a0743ece9e1a505677d9 Mon Sep 17 00:00:00 2001 From: Yecheng Fu Date: Sat, 22 Sep 2018 11:52:56 +0800 Subject: [PATCH] Update notes to document why invalidation order is important. --- pkg/scheduler/factory/factory.go | 40 ++++++++++++++++++++------------ pkg/scheduler/scheduler.go | 8 ++++--- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/pkg/scheduler/factory/factory.go b/pkg/scheduler/factory/factory.go index d3b9a916a99..7fcb2fce97a 100644 --- a/pkg/scheduler/factory/factory.go +++ b/pkg/scheduler/factory/factory.go @@ -721,9 +721,11 @@ func (c *configFactory) updatePodInCache(oldObj, newObj interface{}) { return } - // NOTE: Because the scheduler uses snapshots of schedulerCache and the live - // version of equivalencePodCache, updates must be written to schedulerCache - // before invalidating equivalencePodCache. + // NOTE: Updates must be written to scheduler cache before invalidating + // equivalence cache, because we could snapshot equivalence cache after the + // invalidation and then snapshot the cache itself. If the cache is + // snapshotted before updates are written, we would update equivalence + // cache with stale information which is based on snapshot of old cache. if err := c.schedulerCache.UpdatePod(oldPod, newPod); err != nil { glog.Errorf("scheduler cache UpdatePod failed: %v", err) } @@ -810,9 +812,11 @@ func (c *configFactory) deletePodFromCache(obj interface{}) { glog.Errorf("cannot convert to *v1.Pod: %v", t) return } - // NOTE: Because the scheduler uses snapshots of schedulerCache and the live - // version of equivalencePodCache, updates must be written to schedulerCache - // before invalidating equivalencePodCache. + // NOTE: Updates must be written to scheduler cache before invalidating + // equivalence cache, because we could snapshot equivalence cache after the + // invalidation and then snapshot the cache itself. If the cache is + // snapshotted before updates are written, we would update equivalence + // cache with stale information which is based on snapshot of old cache. if err := c.schedulerCache.RemovePod(pod); err != nil { glog.Errorf("scheduler cache RemovePod failed: %v", err) } @@ -876,9 +880,11 @@ func (c *configFactory) updateNodeInCache(oldObj, newObj interface{}) { return } - // NOTE: Because the scheduler uses snapshots of schedulerCache and the live - // version of equivalencePodCache, updates must be written to schedulerCache - // before invalidating equivalencePodCache. + // NOTE: Updates must be written to scheduler cache before invalidating + // equivalence cache, because we could snapshot equivalence cache after the + // invalidation and then snapshot the cache itself. If the cache is + // snapshotted before updates are written, we would update equivalence + // cache with stale information which is based on snapshot of old cache. if err := c.schedulerCache.UpdateNode(oldNode, newNode); err != nil { glog.Errorf("scheduler cache UpdateNode failed: %v", err) } @@ -972,9 +978,11 @@ func (c *configFactory) deleteNodeFromCache(obj interface{}) { glog.Errorf("cannot convert to *v1.Node: %v", t) return } - // NOTE: Because the scheduler uses snapshots of schedulerCache and the live - // version of equivalencePodCache, updates must be written to schedulerCache - // before invalidating equivalencePodCache. + // NOTE: Updates must be written to scheduler cache before invalidating + // equivalence cache, because we could snapshot equivalence cache after the + // invalidation and then snapshot the cache itself. If the cache is + // snapshotted before updates are written, we would update equivalence + // cache with stale information which is based on snapshot of old cache. if err := c.schedulerCache.RemoveNode(node); err != nil { glog.Errorf("scheduler cache RemoveNode failed: %v", err) } @@ -1404,9 +1412,11 @@ func (c *configFactory) MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue _, err := c.client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) if err != nil && errors.IsNotFound(err) { node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}} - // NOTE: Because the scheduler uses snapshots of schedulerCache and the live - // version of equivalencePodCache, updates must be written to schedulerCache - // before invalidating equivalencePodCache. + // NOTE: Updates must be written to scheduler cache before invalidating + // equivalence cache, because we could snapshot equivalence cache after the + // invalidation and then snapshot the cache itself. If the cache is + // snapshotted before updates are written, we would update equivalence + // cache with stale information which is based on snapshot of old cache. c.schedulerCache.RemoveNode(&node) // invalidate cached predicate for the node if c.enableEquivalenceClassCache { diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 3d836b6ee57..3c6b6a6df82 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -329,9 +329,11 @@ func (sched *Scheduler) assume(assumed *v1.Pod, host string) error { // If the binding fails, scheduler will release resources allocated to assumed pod // immediately. assumed.Spec.NodeName = host - // NOTE: Because the scheduler uses snapshots of SchedulerCache and the live - // version of Ecache, updates must be written to SchedulerCache before - // invalidating Ecache. + // NOTE: Updates must be written to scheduler cache before invalidating + // equivalence cache, because we could snapshot equivalence cache after the + // invalidation and then snapshot the cache itself. If the cache is + // snapshotted before updates are written, we would update equivalence + // cache with stale information which is based on snapshot of old cache. if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil { glog.Errorf("scheduler cache AssumePod failed: %v", err)