From 64ad3e17ad15cd0f9a4fd86706eec1c572033254 Mon Sep 17 00:00:00 2001 From: Prashanth Balasubramanian Date: Sat, 20 Jun 2015 21:36:52 -0700 Subject: [PATCH] Fix deadlock --- pkg/kubelet/status_manager.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/status_manager.go b/pkg/kubelet/status_manager.go index 4df39d7c086..aaeedd0c04f 100644 --- a/pkg/kubelet/status_manager.go +++ b/pkg/kubelet/status_manager.go @@ -96,6 +96,12 @@ func (s *statusManager) SetPodStatus(pod *api.Pod, status api.PodStatus) { } } + // TODO: Holding a lock during blocking operations is dangerous. Refactor so this isn't necessary. + // The intent here is to prevent concurrent updates to a pod's status from + // clobbering each other so the phase of a pod progresses monotonically. + // Currently this routine is not called for the same pod from multiple + // workers and/or the kubelet but dropping the lock before sending the + // status down the channel feels like an easy way to get a bullet in foot. if !found || !reflect.DeepEqual(oldStatus, status) { s.podStatuses[podFullName] = status s.podStatusChannel <- podStatusSyncRequest{pod, status} @@ -148,6 +154,10 @@ func (s *statusManager) syncBatch() error { // We failed to update status. In order to make sure we retry next time // we delete cached value. This may result in an additional update, but // this is ok. - s.DeletePodStatus(podFullName) + // Doing this synchronously will lead to a deadlock if the podStatusChannel + // is full, and the pod worker holding the lock is waiting on this method + // to clear the channel. Even if this delete never runs subsequent container + // changes on the node should trigger updates. + go s.DeletePodStatus(podFullName) return fmt.Errorf("error updating status for pod %q: %v", pod.Name, err) }