From b8dca4dd9d5b0e69e5d13ed68415b5b3b4c08d7f Mon Sep 17 00:00:00 2001 From: Andrew Pilloud Date: Mon, 13 Mar 2017 11:30:20 -0700 Subject: [PATCH] Refactor syncPod to fix panics on error --- pkg/kubelet/status/status_manager.go | 71 ++++++++++++----------- pkg/kubelet/status/status_manager_test.go | 10 ++-- 2 files changed, 43 insertions(+), 38 deletions(-) diff --git a/pkg/kubelet/status/status_manager.go b/pkg/kubelet/status/status_manager.go index 4bd1b5771c5..36121115020 100644 --- a/pkg/kubelet/status/status_manager.go +++ b/pkg/kubelet/status/status_manager.go @@ -427,42 +427,45 @@ func (m *manager) syncPod(uid types.UID, status versionedPodStatus) { // RemoveOrphanedStatuses, so we just ignore the update here. return } - if err == nil { - translatedUID := m.podManager.TranslatePodUID(pod.UID) - if len(translatedUID) > 0 && translatedUID != uid { - glog.V(2).Infof("Pod %q was deleted and then recreated, skipping status update; old UID %q, new UID %q", format.Pod(pod), uid, translatedUID) - m.deletePodStatus(uid) - return - } - pod.Status = status.status - if err := podutil.SetInitContainersStatusesAnnotations(pod); err != nil { - glog.Error(err) - } - // TODO: handle conflict as a retry, make that easier too. - pod, err = m.kubeClient.Core().Pods(pod.Namespace).UpdateStatus(pod) - if err == nil { - glog.V(3).Infof("Status for pod %q updated successfully: (%d, %+v)", format.Pod(pod), status.version, status.status) - m.apiStatusVersions[pod.UID] = status.version - if kubepod.IsMirrorPod(pod) { - // We don't handle graceful deletion of mirror pods. - return - } - if !m.podDeletionSafety.OkToDeletePod(pod) { - return - } - deleteOptions := metav1.NewDeleteOptions(0) - // Use the pod UID as the precondition for deletion to prevent deleting a newly created pod with the same name and namespace. - deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(pod.UID)) - if err = m.kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, deleteOptions); err == nil { - glog.V(3).Infof("Pod %q fully terminated and removed from etcd", format.Pod(pod)) - m.deletePodStatus(uid) - return - } - } + if err != nil { + glog.Warningf("Failed to get status for pod %q: %v", format.PodDesc(status.podName, status.podNamespace, uid), err) + return } - // We failed to update status, wait for periodic sync to retry. - glog.Warningf("Failed to update status for pod %q: %v", format.Pod(pod), err) + translatedUID := m.podManager.TranslatePodUID(pod.UID) + if len(translatedUID) > 0 && translatedUID != uid { + glog.V(2).Infof("Pod %q was deleted and then recreated, skipping status update; old UID %q, new UID %q", format.Pod(pod), uid, translatedUID) + m.deletePodStatus(uid) + return + } + pod.Status = status.status + if err := podutil.SetInitContainersStatusesAnnotations(pod); err != nil { + glog.Error(err) + } + // TODO: handle conflict as a retry, make that easier too. + newPod, err := m.kubeClient.Core().Pods(pod.Namespace).UpdateStatus(pod) + if err != nil { + glog.Warningf("Failed to update status for pod %q: %v", format.Pod(pod), err) + return + } + pod = newPod + + glog.V(3).Infof("Status for pod %q updated successfully: (%d, %+v)", format.Pod(pod), status.version, status.status) + m.apiStatusVersions[pod.UID] = status.version + + // We don't handle graceful deletion of mirror pods. + if !kubepod.IsMirrorPod(pod) && m.podDeletionSafety.OkToDeletePod(pod) { + deleteOptions := metav1.NewDeleteOptions(0) + // Use the pod UID as the precondition for deletion to prevent deleting a newly created pod with the same name and namespace. + deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(pod.UID)) + err = m.kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, deleteOptions) + if err != nil { + glog.Warningf("Failed to delete status for pod %q: %v", format.Pod(pod), err) + return + } + glog.V(3).Infof("Pod %q fully terminated and removed from etcd", format.Pod(pod)) + m.deletePodStatus(uid) + } } // needsUpdate returns whether the status is stale for the given pod UID. diff --git a/pkg/kubelet/status/status_manager_test.go b/pkg/kubelet/status/status_manager_test.go index ae0c38600fb..b51d1ced28d 100644 --- a/pkg/kubelet/status/status_manager_test.go +++ b/pkg/kubelet/status/status_manager_test.go @@ -324,7 +324,7 @@ func TestSyncPodNoDeadlock(t *testing.T) { pod := getTestPod() // Setup fake client. - var ret v1.Pod + var ret *v1.Pod var err error client.AddReactor("*", "pods", func(action core.Action) (bool, runtime.Object, error) { switch action := action.(type) { @@ -335,25 +335,26 @@ func TestSyncPodNoDeadlock(t *testing.T) { default: assert.Fail(t, "Unexpected Action: %+v", action) } - return true, &ret, err + return true, ret, err }) pod.Status.ContainerStatuses = []v1.ContainerStatus{{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}} t.Logf("Pod not found.") - ret = *pod + ret = nil err = errors.NewNotFound(api.Resource("pods"), pod.Name) m.SetPodStatus(pod, getRandomPodStatus()) verifyActions(t, m, []core.Action{getAction()}) t.Logf("Pod was recreated.") + ret = getTestPod() ret.UID = "other_pod" err = nil m.SetPodStatus(pod, getRandomPodStatus()) verifyActions(t, m, []core.Action{getAction()}) t.Logf("Pod not deleted (success case).") - ret = *pod + ret = getTestPod() m.SetPodStatus(pod, getRandomPodStatus()) verifyActions(t, m, []core.Action{getAction(), updateAction()}) @@ -369,6 +370,7 @@ func TestSyncPodNoDeadlock(t *testing.T) { verifyActions(t, m, []core.Action{getAction(), updateAction()}) t.Logf("Error case.") + ret = nil err = fmt.Errorf("intentional test error") m.SetPodStatus(pod, getRandomPodStatus()) verifyActions(t, m, []core.Action{getAction()})