From b3f1e1200b47b1404f64a0743ece9e1a505677d9 Mon Sep 17 00:00:00 2001
From: Yecheng Fu <fuyecheng@qiniu.com>
Date: Sat, 22 Sep 2018 11:52:56 +0800
Subject: [PATCH] Update notes to document why invalidation order is important.

---
 pkg/scheduler/factory/factory.go | 40 ++++++++++++++++++++------------
 pkg/scheduler/scheduler.go       |  8 ++++---
 2 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/pkg/scheduler/factory/factory.go b/pkg/scheduler/factory/factory.go
index d3b9a916a99..7fcb2fce97a 100644
--- a/pkg/scheduler/factory/factory.go
+++ b/pkg/scheduler/factory/factory.go
@@ -721,9 +721,11 @@ func (c *configFactory) updatePodInCache(oldObj, newObj interface{}) {
 		return
 	}
 
-	// NOTE: Because the scheduler uses snapshots of schedulerCache and the live
-	// version of equivalencePodCache, updates must be written to schedulerCache
-	// before invalidating equivalencePodCache.
+	// NOTE: Updates must be written to scheduler cache before invalidating
+	// equivalence cache, because we could snapshot equivalence cache after the
+	// invalidation and then snapshot the cache itself. If the cache is
+	// snapshotted before updates are written, we would update equivalence
+	// cache with stale information which is based on snapshot of old cache.
 	if err := c.schedulerCache.UpdatePod(oldPod, newPod); err != nil {
 		glog.Errorf("scheduler cache UpdatePod failed: %v", err)
 	}
@@ -810,9 +812,11 @@ func (c *configFactory) deletePodFromCache(obj interface{}) {
 		glog.Errorf("cannot convert to *v1.Pod: %v", t)
 		return
 	}
-	// NOTE: Because the scheduler uses snapshots of schedulerCache and the live
-	// version of equivalencePodCache, updates must be written to schedulerCache
-	// before invalidating equivalencePodCache.
+	// NOTE: Updates must be written to scheduler cache before invalidating
+	// equivalence cache, because we could snapshot equivalence cache after the
+	// invalidation and then snapshot the cache itself. If the cache is
+	// snapshotted before updates are written, we would update equivalence
+	// cache with stale information which is based on snapshot of old cache.
 	if err := c.schedulerCache.RemovePod(pod); err != nil {
 		glog.Errorf("scheduler cache RemovePod failed: %v", err)
 	}
@@ -876,9 +880,11 @@ func (c *configFactory) updateNodeInCache(oldObj, newObj interface{}) {
 		return
 	}
 
-	// NOTE: Because the scheduler uses snapshots of schedulerCache and the live
-	// version of equivalencePodCache, updates must be written to schedulerCache
-	// before invalidating equivalencePodCache.
+	// NOTE: Updates must be written to scheduler cache before invalidating
+	// equivalence cache, because we could snapshot equivalence cache after the
+	// invalidation and then snapshot the cache itself. If the cache is
+	// snapshotted before updates are written, we would update equivalence
+	// cache with stale information which is based on snapshot of old cache.
 	if err := c.schedulerCache.UpdateNode(oldNode, newNode); err != nil {
 		glog.Errorf("scheduler cache UpdateNode failed: %v", err)
 	}
@@ -972,9 +978,11 @@ func (c *configFactory) deleteNodeFromCache(obj interface{}) {
 		glog.Errorf("cannot convert to *v1.Node: %v", t)
 		return
 	}
-	// NOTE: Because the scheduler uses snapshots of schedulerCache and the live
-	// version of equivalencePodCache, updates must be written to schedulerCache
-	// before invalidating equivalencePodCache.
+	// NOTE: Updates must be written to scheduler cache before invalidating
+	// equivalence cache, because we could snapshot equivalence cache after the
+	// invalidation and then snapshot the cache itself. If the cache is
+	// snapshotted before updates are written, we would update equivalence
+	// cache with stale information which is based on snapshot of old cache.
 	if err := c.schedulerCache.RemoveNode(node); err != nil {
 		glog.Errorf("scheduler cache RemoveNode failed: %v", err)
 	}
@@ -1404,9 +1412,11 @@ func (c *configFactory) MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue
 					_, err := c.client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
 					if err != nil && errors.IsNotFound(err) {
 						node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}
-						// NOTE: Because the scheduler uses snapshots of schedulerCache and the live
-						// version of equivalencePodCache, updates must be written to schedulerCache
-						// before invalidating equivalencePodCache.
+						// NOTE: Updates must be written to scheduler cache before invalidating
+						// equivalence cache, because we could snapshot equivalence cache after the
+						// invalidation and then snapshot the cache itself. If the cache is
+						// snapshotted before updates are written, we would update equivalence
+						// cache with stale information which is based on snapshot of old cache.
 						c.schedulerCache.RemoveNode(&node)
 						// invalidate cached predicate for the node
 						if c.enableEquivalenceClassCache {
diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go
index 3d836b6ee57..3c6b6a6df82 100644
--- a/pkg/scheduler/scheduler.go
+++ b/pkg/scheduler/scheduler.go
@@ -329,9 +329,11 @@ func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
 	// If the binding fails, scheduler will release resources allocated to assumed pod
 	// immediately.
 	assumed.Spec.NodeName = host
-	// NOTE: Because the scheduler uses snapshots of SchedulerCache and the live
-	// version of Ecache, updates must be written to SchedulerCache before
-	// invalidating Ecache.
+	// NOTE: Updates must be written to scheduler cache before invalidating
+	// equivalence cache, because we could snapshot equivalence cache after the
+	// invalidation and then snapshot the cache itself. If the cache is
+	// snapshotted before updates are written, we would update equivalence
+	// cache with stale information which is based on snapshot of old cache.
 	if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil {
 		glog.Errorf("scheduler cache AssumePod failed: %v", err)