Merge pull request #43006 from apilloud/fix_failure_crash

Automatic merge from submit-queue

Fix crash on Pods().Get() failure

**What this PR does / why we need it**:

Fixes a potential crash in syncPod when Pods().Get() returns an error other than NotFound. This is unlikely to occur with the standard client, but easily shows up with a stub kube client that returns Unimplemented to everything. Updates the unit test as well.

**Release note**:
`NONE`
This commit is contained in:
Kubernetes Submit Queue 2017-05-05 16:25:23 -07:00 committed by GitHub
commit 069a25f378
2 changed files with 43 additions and 38 deletions

View File

@ -427,7 +427,11 @@ func (m *manager) syncPod(uid types.UID, status versionedPodStatus) {
// RemoveOrphanedStatuses, so we just ignore the update here.
return
}
if err == nil {
if err != nil {
glog.Warningf("Failed to get status for pod %q: %v", format.PodDesc(status.podName, status.podNamespace, uid), err)
return
}
translatedUID := m.podManager.TranslatePodUID(pod.UID)
if len(translatedUID) > 0 && translatedUID != uid {
glog.V(2).Infof("Pod %q was deleted and then recreated, skipping status update; old UID %q, new UID %q", format.Pod(pod), uid, translatedUID)
@ -439,32 +443,31 @@ func (m *manager) syncPod(uid types.UID, status versionedPodStatus) {
glog.Error(err)
}
// TODO: handle conflict as a retry, make that easier too.
pod, err = m.kubeClient.Core().Pods(pod.Namespace).UpdateStatus(pod)
if err == nil {
newPod, err := m.kubeClient.Core().Pods(pod.Namespace).UpdateStatus(pod)
if err != nil {
glog.Warningf("Failed to update status for pod %q: %v", format.Pod(pod), err)
return
}
pod = newPod
glog.V(3).Infof("Status for pod %q updated successfully: (%d, %+v)", format.Pod(pod), status.version, status.status)
m.apiStatusVersions[pod.UID] = status.version
if kubepod.IsMirrorPod(pod) {
// We don't handle graceful deletion of mirror pods.
return
}
if !m.podDeletionSafety.OkToDeletePod(pod) {
return
}
if !kubepod.IsMirrorPod(pod) && m.podDeletionSafety.OkToDeletePod(pod) {
deleteOptions := metav1.NewDeleteOptions(0)
// Use the pod UID as the precondition for deletion to prevent deleting a newly created pod with the same name and namespace.
deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(pod.UID))
if err = m.kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, deleteOptions); err == nil {
glog.V(3).Infof("Pod %q fully terminated and removed from etcd", format.Pod(pod))
m.deletePodStatus(uid)
err = m.kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, deleteOptions)
if err != nil {
glog.Warningf("Failed to delete status for pod %q: %v", format.Pod(pod), err)
return
}
glog.V(3).Infof("Pod %q fully terminated and removed from etcd", format.Pod(pod))
m.deletePodStatus(uid)
}
}
// We failed to update status, wait for periodic sync to retry.
glog.Warningf("Failed to update status for pod %q: %v", format.Pod(pod), err)
}
// needsUpdate returns whether the status is stale for the given pod UID.
// This method is not thread safe, and most only be accessed by the sync thread.
func (m *manager) needsUpdate(uid types.UID, status versionedPodStatus) bool {

View File

@ -324,7 +324,7 @@ func TestSyncPodNoDeadlock(t *testing.T) {
pod := getTestPod()
// Setup fake client.
var ret v1.Pod
var ret *v1.Pod
var err error
client.AddReactor("*", "pods", func(action core.Action) (bool, runtime.Object, error) {
switch action := action.(type) {
@ -335,25 +335,26 @@ func TestSyncPodNoDeadlock(t *testing.T) {
default:
assert.Fail(t, "Unexpected Action: %+v", action)
}
return true, &ret, err
return true, ret, err
})
pod.Status.ContainerStatuses = []v1.ContainerStatus{{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}}
t.Logf("Pod not found.")
ret = *pod
ret = nil
err = errors.NewNotFound(api.Resource("pods"), pod.Name)
m.SetPodStatus(pod, getRandomPodStatus())
verifyActions(t, m, []core.Action{getAction()})
t.Logf("Pod was recreated.")
ret = getTestPod()
ret.UID = "other_pod"
err = nil
m.SetPodStatus(pod, getRandomPodStatus())
verifyActions(t, m, []core.Action{getAction()})
t.Logf("Pod not deleted (success case).")
ret = *pod
ret = getTestPod()
m.SetPodStatus(pod, getRandomPodStatus())
verifyActions(t, m, []core.Action{getAction(), updateAction()})
@ -369,6 +370,7 @@ func TestSyncPodNoDeadlock(t *testing.T) {
verifyActions(t, m, []core.Action{getAction(), updateAction()})
t.Logf("Error case.")
ret = nil
err = fmt.Errorf("intentional test error")
m.SetPodStatus(pod, getRandomPodStatus())
verifyActions(t, m, []core.Action{getAction()})