Merge pull request #41436 from dashpole/status_bug

Automatic merge from submit-queue

Fix bug in status manager TerminatePod

In TerminatePod, we previously pass pod.Status to updateStatusInternal.  This is a bug, since it is the original status that we are given.  Not only does it skip updates made to container statuses, but in some cases it reverted the pod's status to an earlier version, since it was being passed a stale status initially.

This was the case in #40239 and #41095.  As shown in #40239, the pod's status is set to running after it is set to failed, occasionally causing very long delays in pod deletion since we have to wait for this to be corrected.

This PR fixes the bug, adds some helpful debugging statements, and adds a unit test for TerminatePod (which for some reason didnt exist before?).

@kubernetes/sig-node-bugs @vish @Random-Liu
This commit is contained in:
Kubernetes Submit Queue 2017-02-14 21:03:31 -08:00 committed by GitHub
commit a57967f47b
2 changed files with 34 additions and 1 deletions

View File

@ -135,6 +135,8 @@ func (m *manager) Start() {
go wait.Forever(func() {
select {
case syncRequest := <-m.podStatusChannel:
glog.V(5).Infof("Status Manager: syncing pod: %q, with status: (%d, %v) from podStatusChannel",
syncRequest.podUID, syncRequest.status.version, syncRequest.status.status)
m.syncPod(syncRequest.podUID, syncRequest.status)
case <-syncTicker:
m.syncBatch()
@ -260,7 +262,7 @@ func (m *manager) TerminatePod(pod *v1.Pod) {
Terminated: &v1.ContainerStateTerminated{},
}
}
m.updateStatusInternal(pod, pod.Status, true)
m.updateStatusInternal(pod, status, true)
}
// updateStatusInternal updates the internal status cache, and queues an update to the api server if
@ -326,6 +328,8 @@ func (m *manager) updateStatusInternal(pod *v1.Pod, status v1.PodStatus, forceUp
select {
case m.podStatusChannel <- podStatusSyncRequest{pod.UID, newStatus}:
glog.V(5).Infof("Status Manager: adding pod: %q, with status: (%q, %v) to podStatusChannel",
pod.UID, newStatus.version, newStatus.status)
return true
default:
// Let the periodic syncBatch handle the update if the channel is full.
@ -395,6 +399,7 @@ func (m *manager) syncBatch() {
}()
for _, update := range updatedStatuses {
glog.V(5).Infof("Status Manager: syncPod in syncbatch. pod UID: %q", update.podUID)
m.syncPod(update.podUID, update.status)
}
}

View File

@ -583,6 +583,34 @@ func TestStaticPod(t *testing.T) {
})
}
func TestTerminatePod(t *testing.T) {
syncer := newTestManager(&fake.Clientset{})
testPod := getTestPod()
// update the pod's status to Failed. TerminatePod should preserve this status update.
firstStatus := getRandomPodStatus()
firstStatus.Phase = v1.PodFailed
syncer.SetPodStatus(testPod, firstStatus)
// set the testPod to a pod with Phase running, to simulate a stale pod
testPod.Status = getRandomPodStatus()
testPod.Status.Phase = v1.PodRunning
syncer.TerminatePod(testPod)
// we expect the container statuses to have changed to terminated
newStatus := expectPodStatus(t, syncer, testPod)
for i := range newStatus.ContainerStatuses {
assert.False(t, newStatus.ContainerStatuses[i].State.Terminated == nil, "expected containers to be terminated")
}
for i := range newStatus.InitContainerStatuses {
assert.False(t, newStatus.InitContainerStatuses[i].State.Terminated == nil, "expected init containers to be terminated")
}
// we expect the previous status update to be preserved.
assert.Equal(t, newStatus.Phase, firstStatus.Phase)
assert.Equal(t, newStatus.Message, firstStatus.Message)
}
func TestSetContainerReadiness(t *testing.T) {
cID1 := kubecontainer.ContainerID{Type: "test", ID: "1"}
cID2 := kubecontainer.ContainerID{Type: "test", ID: "2"}