From 64ad3e17ad15cd0f9a4fd86706eec1c572033254 Mon Sep 17 00:00:00 2001
From: Prashanth Balasubramanian <beeps@google.com>
Date: Sat, 20 Jun 2015 21:36:52 -0700
Subject: [PATCH] Fix deadlock

---
 pkg/kubelet/status_manager.go | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/pkg/kubelet/status_manager.go b/pkg/kubelet/status_manager.go
index 4df39d7c086..aaeedd0c04f 100644
--- a/pkg/kubelet/status_manager.go
+++ b/pkg/kubelet/status_manager.go
@@ -96,6 +96,12 @@ func (s *statusManager) SetPodStatus(pod *api.Pod, status api.PodStatus) {
 		}
 	}
 
+	// TODO: Holding a lock during blocking operations is dangerous. Refactor so this isn't necessary.
+	// The intent here is to prevent concurrent updates to a pod's status from
+	// clobbering each other so the phase of a pod progresses monotonically.
+	// Currently this routine is not called for the same pod from multiple
+	// workers and/or the kubelet but dropping the lock before sending the
+	// status down the channel feels like an easy way to get a bullet in foot.
 	if !found || !reflect.DeepEqual(oldStatus, status) {
 		s.podStatuses[podFullName] = status
 		s.podStatusChannel <- podStatusSyncRequest{pod, status}
@@ -148,6 +154,10 @@ func (s *statusManager) syncBatch() error {
 	// We failed to update status. In order to make sure we retry next time
 	// we delete cached value. This may result in an additional update, but
 	// this is ok.
-	s.DeletePodStatus(podFullName)
+	// Doing this synchronously will lead to a deadlock if the podStatusChannel
+	// is full, and the pod worker holding the lock is waiting on this method
+	// to clear the channel. Even if this delete never runs subsequent container
+	// changes on the node should trigger updates.
+	go s.DeletePodStatus(podFullName)
 	return fmt.Errorf("error updating status for pod %q: %v", pod.Name, err)
 }