From 859f6b13fa3ccb73a3017106ed105a0be0fb951c Mon Sep 17 00:00:00 2001
From: Prashanth Balasubramanian <beeps@google.com>
Date: Sun, 28 Feb 2016 00:23:47 -0800
Subject: [PATCH] Don't double count graceful deletion.

---
 pkg/controller/replicaset/replica_set.go      | 57 ++++++++-----
 pkg/controller/replicaset/replica_set_test.go | 82 +++++++++++++++++++
 .../replication/replication_controller.go     | 53 +++++++-----
 .../replication_controller_test.go            | 81 ++++++++++++++++++
 4 files changed, 232 insertions(+), 41 deletions(-)

diff --git a/pkg/controller/replicaset/replica_set.go b/pkg/controller/replicaset/replica_set.go
index 6fdc5542465..cee4bd9387c 100644
--- a/pkg/controller/replicaset/replica_set.go
+++ b/pkg/controller/replicaset/replica_set.go
@@ -289,21 +289,24 @@ func isReplicaSetMatch(pod *api.Pod, rs *extensions.ReplicaSet) bool {
 // When a pod is created, enqueue the replica set that manages it and update it's expectations.
 func (rsc *ReplicaSetController) addPod(obj interface{}) {
 	pod := obj.(*api.Pod)
+
+	rs := rsc.getPodReplicaSet(pod)
+	if rs == nil {
+		return
+	}
+	rsKey, err := controller.KeyFunc(rs)
+	if err != nil {
+		glog.Errorf("Couldn't get key for replication controller %#v: %v", rs, err)
+		return
+	}
 	if pod.DeletionTimestamp != nil {
 		// on a restart of the controller manager, it's possible a new pod shows up in a state that
 		// is already pending deletion. Prevent the pod from being a creation observation.
-		rsc.deletePod(pod)
-		return
-	}
-	if rs := rsc.getPodReplicaSet(pod); rs != nil {
-		rsKey, err := controller.KeyFunc(rs)
-		if err != nil {
-			glog.Errorf("Couldn't get key for ReplicaSet %#v: %v", rs, err)
-			return
-		}
+		rsc.expectations.DeletionObserved(rsKey)
+	} else {
 		rsc.expectations.CreationObserved(rsKey)
-		rsc.enqueueReplicaSet(rs)
 	}
+	rsc.enqueueReplicaSet(rs)
 }
 
 // When a pod is updated, figure out what replica set/s manage it and wake them
@@ -314,22 +317,28 @@ func (rsc *ReplicaSetController) updatePod(old, cur interface{}) {
 		// A periodic relist will send update events for all known pods.
 		return
 	}
-	// TODO: Write a unittest for this case
 	curPod := cur.(*api.Pod)
-	if curPod.DeletionTimestamp != nil {
-		// when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period,
-		// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
-		// for modification of the deletion timestamp and expect an ReplicaSet to create more replicas asap, not wait
-		// until the kubelet actually deletes the pod. This is different from the Phase of a pod changing, because
-		// a ReplicaSet never initiates a phase change, and so is never asleep waiting for the same.
-		rsc.deletePod(curPod)
+	rs := rsc.getPodReplicaSet(curPod)
+	if rs == nil {
 		return
 	}
-	if rs := rsc.getPodReplicaSet(curPod); rs != nil {
-		rsc.enqueueReplicaSet(rs)
+	rsKey, err := controller.KeyFunc(rs)
+	if err != nil {
+		glog.Errorf("Couldn't get key for replication controller %#v: %v", rs, err)
+		return
 	}
 	oldPod := old.(*api.Pod)
-	// Only need to get the old replica set if the labels changed.
+
+	if curPod.DeletionTimestamp != nil && oldPod.DeletionTimestamp == nil {
+		// when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period,
+		// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
+		// for modification of the deletion timestamp and expect an rc to create more replicas asap, not wait
+		// until the kubelet actually deletes the pod. This is different from the Phase of a pod changing, because
+		// an rc never initiates a phase change, and so is never asleep waiting for the same.
+		rsc.expectations.DeletionObserved(rsKey)
+	}
+
+	rsc.enqueueReplicaSet(rs)
 	if !reflect.DeepEqual(curPod.Labels, oldPod.Labels) {
 		// If the old and new ReplicaSet are the same, the first one that syncs
 		// will set expectations preventing any damage from the second.
@@ -366,7 +375,11 @@ func (rsc *ReplicaSetController) deletePod(obj interface{}) {
 			glog.Errorf("Couldn't get key for ReplicaSet %#v: %v", rs, err)
 			return
 		}
-		rsc.expectations.DeletionObserved(rsKey)
+		// This method only manages expectations for the case where a pod is
+		// deleted without a grace period.
+		if pod.DeletionTimestamp == nil {
+			rsc.expectations.DeletionObserved(rsKey)
+		}
 		rsc.enqueueReplicaSet(rs)
 	}
 }
diff --git a/pkg/controller/replicaset/replica_set_test.go b/pkg/controller/replicaset/replica_set_test.go
index 0258a39a6e0..74924227596 100644
--- a/pkg/controller/replicaset/replica_set_test.go
+++ b/pkg/controller/replicaset/replica_set_test.go
@@ -910,3 +910,85 @@ func TestOverlappingRSs(t *testing.T) {
 		}
 	}
 }
+
+func TestDeletionTimestamp(t *testing.T) {
+	c := clientset.NewForConfigOrDie(&client.Config{Host: "", ContentConfig: client.ContentConfig{GroupVersion: testapi.Default.GroupVersion()}})
+	labelMap := map[string]string{"foo": "bar"}
+	manager := NewReplicaSetController(c, controller.NoResyncPeriodFunc, 10, 0)
+	manager.podStoreSynced = alwaysReady
+
+	rs := newReplicaSet(1, labelMap)
+	manager.rsStore.Store.Add(rs)
+	rsKey, err := controller.KeyFunc(rs)
+	if err != nil {
+		t.Errorf("Couldn't get key for object %+v: %v", rs, err)
+	}
+	pod := newPodList(nil, 1, api.PodPending, labelMap, rs).Items[0]
+	pod.DeletionTimestamp = &unversioned.Time{time.Now()}
+	manager.expectations.SetExpectations(rsKey, 0, 1)
+
+	// A pod added with a deletion timestamp should decrement deletions, not creations.
+	manager.addPod(&pod)
+
+	queueRC, _ := manager.queue.Get()
+	if queueRC != rsKey {
+		t.Fatalf("Expected to find key %v in queue, found %v", rsKey, queueRC)
+	}
+	manager.queue.Done(rsKey)
+
+	podExp, exists, err := manager.expectations.GetExpectations(rsKey)
+	if !exists || err != nil || !podExp.Fulfilled() {
+		t.Fatalf("Wrong expectations %+v", podExp)
+	}
+
+	// An update from no deletion timestamp to having one should be treated
+	// as a deletion.
+	oldPod := newPodList(nil, 1, api.PodPending, labelMap, rs).Items[0]
+	manager.expectations.SetExpectations(rsKey, 0, 1)
+	manager.updatePod(&oldPod, &pod)
+
+	queueRC, _ = manager.queue.Get()
+	if queueRC != rsKey {
+		t.Fatalf("Expected to find key %v in queue, found %v", rsKey, queueRC)
+	}
+	manager.queue.Done(rsKey)
+
+	podExp, exists, err = manager.expectations.GetExpectations(rsKey)
+	if !exists || err != nil || !podExp.Fulfilled() {
+		t.Fatalf("Wrong expectations %+v", podExp)
+	}
+
+	// An update to the pod (including an update to the deletion timestamp)
+	// should not be counted as a second delete.
+	manager.expectations.SetExpectations(rsKey, 0, 1)
+	oldPod.DeletionTimestamp = &unversioned.Time{time.Now()}
+	manager.updatePod(&oldPod, &pod)
+
+	podExp, exists, err = manager.expectations.GetExpectations(rsKey)
+	if !exists || err != nil || podExp.Fulfilled() {
+		t.Fatalf("Wrong expectations %+v", podExp)
+	}
+
+	// A pod with a non-nil deletion timestamp should also be ignored by the
+	// delete handler, because it's already been counted in the update.
+	manager.deletePod(&pod)
+	podExp, exists, err = manager.expectations.GetExpectations(rsKey)
+	if !exists || err != nil || podExp.Fulfilled() {
+		t.Fatalf("Wrong expectations %+v", podExp)
+	}
+
+	// A pod with a nil timestamp should be counted as a deletion.
+	pod.DeletionTimestamp = nil
+	manager.deletePod(&pod)
+
+	queueRC, _ = manager.queue.Get()
+	if queueRC != rsKey {
+		t.Fatalf("Expected to find key %v in queue, found %v", rsKey, queueRC)
+	}
+	manager.queue.Done(rsKey)
+
+	podExp, exists, err = manager.expectations.GetExpectations(rsKey)
+	if !exists || err != nil || !podExp.Fulfilled() {
+		t.Fatalf("Wrong expectations %+v", podExp)
+	}
+}
diff --git a/pkg/controller/replication/replication_controller.go b/pkg/controller/replication/replication_controller.go
index 92a3e3fc919..56e1bda3cee 100644
--- a/pkg/controller/replication/replication_controller.go
+++ b/pkg/controller/replication/replication_controller.go
@@ -286,21 +286,25 @@ func isControllerMatch(pod *api.Pod, rc *api.ReplicationController) bool {
 // When a pod is created, enqueue the controller that manages it and update it's expectations.
 func (rm *ReplicationManager) addPod(obj interface{}) {
 	pod := obj.(*api.Pod)
+
+	rc := rm.getPodController(pod)
+	if rc == nil {
+		return
+	}
+	rcKey, err := controller.KeyFunc(rc)
+	if err != nil {
+		glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err)
+		return
+	}
+
 	if pod.DeletionTimestamp != nil {
 		// on a restart of the controller manager, it's possible a new pod shows up in a state that
 		// is already pending deletion. Prevent the pod from being a creation observation.
-		rm.deletePod(pod)
-		return
-	}
-	if rc := rm.getPodController(pod); rc != nil {
-		rcKey, err := controller.KeyFunc(rc)
-		if err != nil {
-			glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err)
-			return
-		}
+		rm.expectations.DeletionObserved(rcKey)
+	} else {
 		rm.expectations.CreationObserved(rcKey)
-		rm.enqueueController(rc)
 	}
+	rm.enqueueController(rc)
 }
 
 // When a pod is updated, figure out what controller/s manage it and wake them
@@ -311,21 +315,28 @@ func (rm *ReplicationManager) updatePod(old, cur interface{}) {
 		// A periodic relist will send update events for all known pods.
 		return
 	}
-	// TODO: Write a unittest for this case
 	curPod := cur.(*api.Pod)
-	if curPod.DeletionTimestamp != nil {
+	rc := rm.getPodController(curPod)
+	if rc == nil {
+		return
+	}
+	rcKey, err := controller.KeyFunc(rc)
+	if err != nil {
+		glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err)
+		return
+	}
+	oldPod := old.(*api.Pod)
+
+	if curPod.DeletionTimestamp != nil && oldPod.DeletionTimestamp == nil {
 		// when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period,
 		// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
 		// for modification of the deletion timestamp and expect an rc to create more replicas asap, not wait
 		// until the kubelet actually deletes the pod. This is different from the Phase of a pod changing, because
 		// an rc never initiates a phase change, and so is never asleep waiting for the same.
-		rm.deletePod(curPod)
-		return
+		rm.expectations.DeletionObserved(rcKey)
 	}
-	if rc := rm.getPodController(curPod); rc != nil {
-		rm.enqueueController(rc)
-	}
-	oldPod := old.(*api.Pod)
+
+	rm.enqueueController(rc)
 	// Only need to get the old controller if the labels changed.
 	if !reflect.DeepEqual(curPod.Labels, oldPod.Labels) {
 		// If the old and new rc are the same, the first one that syncs
@@ -363,7 +374,11 @@ func (rm *ReplicationManager) deletePod(obj interface{}) {
 			glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err)
 			return
 		}
-		rm.expectations.DeletionObserved(rcKey)
+		// This method only manages expectations for the case where a pod is
+		// deleted without a grace period.
+		if pod.DeletionTimestamp == nil {
+			rm.expectations.DeletionObserved(rcKey)
+		}
 		rm.enqueueController(rc)
 	}
 }
diff --git a/pkg/controller/replication/replication_controller_test.go b/pkg/controller/replication/replication_controller_test.go
index 5f54eb543fa..6d576163a3f 100644
--- a/pkg/controller/replication/replication_controller_test.go
+++ b/pkg/controller/replication/replication_controller_test.go
@@ -893,6 +893,87 @@ func TestOverlappingRCs(t *testing.T) {
 	}
 }
 
+func TestDeletionTimestamp(t *testing.T) {
+	c := clientset.NewForConfigOrDie(&client.Config{Host: "", ContentConfig: client.ContentConfig{GroupVersion: testapi.Default.GroupVersion()}})
+	manager := NewReplicationManager(c, controller.NoResyncPeriodFunc, 10, 0)
+	manager.podStoreSynced = alwaysReady
+
+	controllerSpec := newReplicationController(1)
+	manager.rcStore.Store.Add(controllerSpec)
+	rcKey, err := controller.KeyFunc(controllerSpec)
+	if err != nil {
+		t.Errorf("Couldn't get key for object %+v: %v", controllerSpec, err)
+	}
+	pod := newPodList(nil, 1, api.PodPending, controllerSpec).Items[0]
+	pod.DeletionTimestamp = &unversioned.Time{time.Now()}
+	manager.expectations.SetExpectations(rcKey, 0, 1)
+
+	// A pod added with a deletion timestamp should decrement deletions, not creations.
+	manager.addPod(&pod)
+
+	queueRC, _ := manager.queue.Get()
+	if queueRC != rcKey {
+		t.Fatalf("Expected to find key %v in queue, found %v", rcKey, queueRC)
+	}
+	manager.queue.Done(rcKey)
+
+	podExp, exists, err := manager.expectations.GetExpectations(rcKey)
+	if !exists || err != nil || !podExp.Fulfilled() {
+		t.Fatalf("Wrong expectations %+v", podExp)
+	}
+
+	// An update from no deletion timestamp to having one should be treated
+	// as a deletion.
+	oldPod := newPodList(nil, 1, api.PodPending, controllerSpec).Items[0]
+	manager.expectations.SetExpectations(rcKey, 0, 1)
+	manager.updatePod(&oldPod, &pod)
+
+	queueRC, _ = manager.queue.Get()
+	if queueRC != rcKey {
+		t.Fatalf("Expected to find key %v in queue, found %v", rcKey, queueRC)
+	}
+	manager.queue.Done(rcKey)
+
+	podExp, exists, err = manager.expectations.GetExpectations(rcKey)
+	if !exists || err != nil || !podExp.Fulfilled() {
+		t.Fatalf("Wrong expectations %+v", podExp)
+	}
+
+	// An update to the pod (including an update to the deletion timestamp)
+	// should not be counted as a second delete.
+	manager.expectations.SetExpectations(rcKey, 0, 1)
+	oldPod.DeletionTimestamp = &unversioned.Time{time.Now()}
+	manager.updatePod(&oldPod, &pod)
+
+	podExp, exists, err = manager.expectations.GetExpectations(rcKey)
+	if !exists || err != nil || podExp.Fulfilled() {
+		t.Fatalf("Wrong expectations %+v", podExp)
+	}
+
+	// A pod with a non-nil deletion timestamp should also be ignored by the
+	// delete handler, because it's already been counted in the update.
+	manager.deletePod(&pod)
+	podExp, exists, err = manager.expectations.GetExpectations(rcKey)
+	if !exists || err != nil || podExp.Fulfilled() {
+		t.Fatalf("Wrong expectations %+v", podExp)
+	}
+
+	// A pod with a nil timestamp should be counted as a deletion.
+	pod.DeletionTimestamp = nil
+	manager.deletePod(&pod)
+
+	queueRC, _ = manager.queue.Get()
+	if queueRC != rcKey {
+		t.Fatalf("Expected to find key %v in queue, found %v", rcKey, queueRC)
+	}
+	manager.queue.Done(rcKey)
+
+	podExp, exists, err = manager.expectations.GetExpectations(rcKey)
+	if !exists || err != nil || !podExp.Fulfilled() {
+		t.Fatalf("Wrong expectations %+v", podExp)
+	}
+}
+
 func BenchmarkGetPodControllerMultiNS(b *testing.B) {
 	client := clientset.NewForConfigOrDie(&client.Config{Host: "", ContentConfig: client.ContentConfig{GroupVersion: testapi.Default.GroupVersion()}})
 	manager := NewReplicationManager(client, controller.NoResyncPeriodFunc, BurstReplicas, 0)