diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 4b5e59f48f7..115161003cf 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -1447,6 +1447,20 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...) s.Phase = getPhase(&pod.Spec, allStatus) klog.V(4).InfoS("Got phase for pod", "pod", klog.KObj(pod), "oldPhase", oldPodStatus.Phase, "phase", s.Phase) + + // Perform a three-way merge between the statuses from the status manager, + // runtime, and generated status to ensure terminal status is correctly set. + if s.Phase != v1.PodFailed && s.Phase != v1.PodSucceeded { + switch { + case oldPodStatus.Phase == v1.PodFailed || oldPodStatus.Phase == v1.PodSucceeded: + klog.V(4).InfoS("Status manager phase was terminal, updating phase to match", "pod", klog.KObj(pod), "phase", oldPodStatus.Phase) + s.Phase = oldPodStatus.Phase + case pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded: + klog.V(4).InfoS("API phase was terminal, updating phase to match", "pod", klog.KObj(pod), "phase", pod.Status.Phase) + s.Phase = pod.Status.Phase + } + } + if s.Phase == oldPodStatus.Phase { // preserve the reason and message which is associated with the phase s.Reason = oldPodStatus.Reason diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index 655cb04d115..491a0a284b9 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -2595,6 +2595,95 @@ func Test_generateAPIPodStatus(t *testing.T) { }, }, }, + { + name: "terminal phase from previous status must remain terminal, restartAlways", + pod: &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + }, + }, + currentStatus: &kubecontainer.PodStatus{}, + previousStatus: v1.PodStatus{ + Phase: v1.PodSucceeded, + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + // Reason and message should be preserved + Reason: "Test", + Message: "test", + }, + expected: v1.PodStatus{ + Phase: v1.PodSucceeded, + HostIP: "127.0.0.1", + QOSClass: v1.PodQOSBestEffort, + Conditions: []v1.PodCondition{ + {Type: v1.PodInitialized, Status: v1.ConditionTrue, Reason: "PodCompleted"}, + {Type: v1.PodReady, Status: v1.ConditionFalse, Reason: "PodCompleted"}, + {Type: v1.ContainersReady, Status: v1.ConditionFalse, Reason: "PodCompleted"}, + {Type: v1.PodScheduled, Status: v1.ConditionTrue}, + }, + ContainerStatuses: []v1.ContainerStatus{ + ready(waitingWithLastTerminationUnknown("containerA", 1)), + ready(waitingWithLastTerminationUnknown("containerB", 1)), + }, + Reason: "Test", + Message: "test", + }, + }, + { + name: "terminal phase from previous status must remain terminal, restartNever", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + NodeName: "machine", + Containers: []v1.Container{ + {Name: "containerA"}, + {Name: "containerB"}, + }, + RestartPolicy: v1.RestartPolicyNever, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + }, + }, + currentStatus: &kubecontainer.PodStatus{}, + previousStatus: v1.PodStatus{ + Phase: v1.PodSucceeded, + ContainerStatuses: []v1.ContainerStatus{ + succeededState("containerA"), + succeededState("containerB"), + }, + // Reason and message should be preserved + Reason: "Test", + Message: "test", + }, + expected: v1.PodStatus{ + Phase: v1.PodSucceeded, + HostIP: "127.0.0.1", + QOSClass: v1.PodQOSBestEffort, + Conditions: []v1.PodCondition{ + {Type: v1.PodInitialized, Status: v1.ConditionTrue, Reason: "PodCompleted"}, + {Type: v1.PodReady, Status: v1.ConditionFalse, Reason: "PodCompleted"}, + {Type: v1.ContainersReady, Status: v1.ConditionFalse, Reason: "PodCompleted"}, + {Type: v1.PodScheduled, Status: v1.ConditionTrue}, + }, + ContainerStatuses: []v1.ContainerStatus{ + ready(succeededState("containerA")), + ready(succeededState("containerB")), + }, + Reason: "Test", + Message: "test", + }, + }, { name: "running can revert to pending", pod: &v1.Pod{ diff --git a/pkg/kubelet/status/status_manager.go b/pkg/kubelet/status/status_manager.go index 70381df836a..9c93a746b58 100644 --- a/pkg/kubelet/status/status_manager.go +++ b/pkg/kubelet/status/status_manager.go @@ -334,7 +334,7 @@ func (m *manager) TerminatePod(pod *v1.Pod) { } status := *oldStatus.DeepCopy() for i := range status.ContainerStatuses { - if status.ContainerStatuses[i].State.Terminated != nil || status.ContainerStatuses[i].State.Waiting != nil { + if status.ContainerStatuses[i].State.Terminated != nil { continue } status.ContainerStatuses[i].State = v1.ContainerState{ @@ -346,7 +346,7 @@ func (m *manager) TerminatePod(pod *v1.Pod) { } } for i := range status.InitContainerStatuses { - if status.InitContainerStatuses[i].State.Terminated != nil || status.InitContainerStatuses[i].State.Waiting != nil { + if status.InitContainerStatuses[i].State.Terminated != nil { continue } status.InitContainerStatuses[i].State = v1.ContainerState{ diff --git a/pkg/kubelet/status/status_manager_test.go b/pkg/kubelet/status/status_manager_test.go index 26e8db86717..855fac52ecd 100644 --- a/pkg/kubelet/status/status_manager_test.go +++ b/pkg/kubelet/status/status_manager_test.go @@ -622,6 +622,64 @@ func TestTerminatePod(t *testing.T) { assert.Equal(t, newStatus.Message, firstStatus.Message) } +func TestTerminatePodWaiting(t *testing.T) { + syncer := newTestManager(&fake.Clientset{}) + testPod := getTestPod() + t.Logf("update the pod's status to Failed. TerminatePod should preserve this status update.") + firstStatus := getRandomPodStatus() + firstStatus.Phase = v1.PodFailed + firstStatus.InitContainerStatuses = []v1.ContainerStatus{ + {Name: "init-test-1"}, + {Name: "init-test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 0}}}, + {Name: "init-test-3", State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: "InitTest"}}}, + } + firstStatus.ContainerStatuses = []v1.ContainerStatus{ + {Name: "test-1"}, + {Name: "test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "Test", ExitCode: 2}}}, + {Name: "test-3", State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: "Test"}}}, + } + syncer.SetPodStatus(testPod, firstStatus) + + t.Logf("set the testPod to a pod with Phase running, to simulate a stale pod") + testPod.Status = getRandomPodStatus() + testPod.Status.Phase = v1.PodRunning + + syncer.TerminatePod(testPod) + + t.Logf("we expect the container statuses to have changed to terminated") + newStatus := expectPodStatus(t, syncer, testPod) + for i := range newStatus.ContainerStatuses { + assert.False(t, newStatus.ContainerStatuses[i].State.Terminated == nil, "expected containers to be terminated") + } + for i := range newStatus.InitContainerStatuses { + assert.False(t, newStatus.InitContainerStatuses[i].State.Terminated == nil, "expected init containers to be terminated") + } + + expectUnknownState := v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "ContainerStatusUnknown", Message: "The container could not be located when the pod was terminated", ExitCode: 137}} + if !reflect.DeepEqual(newStatus.InitContainerStatuses[0].State, expectUnknownState) { + t.Errorf("terminated container state not defaulted: %s", diff.ObjectReflectDiff(newStatus.InitContainerStatuses[0].State, expectUnknownState)) + } + if !reflect.DeepEqual(newStatus.InitContainerStatuses[1].State, firstStatus.InitContainerStatuses[1].State) { + t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses) + } + if !reflect.DeepEqual(newStatus.InitContainerStatuses[2].State, expectUnknownState) { + t.Errorf("waiting container state not defaulted: %s", diff.ObjectReflectDiff(newStatus.InitContainerStatuses[2].State, expectUnknownState)) + } + if !reflect.DeepEqual(newStatus.ContainerStatuses[0].State, expectUnknownState) { + t.Errorf("terminated container state not defaulted: %s", diff.ObjectReflectDiff(newStatus.ContainerStatuses[0].State, expectUnknownState)) + } + if !reflect.DeepEqual(newStatus.ContainerStatuses[1].State, firstStatus.ContainerStatuses[1].State) { + t.Errorf("existing terminated container state not preserved: %#v", newStatus.ContainerStatuses) + } + if !reflect.DeepEqual(newStatus.ContainerStatuses[2].State, expectUnknownState) { + t.Errorf("waiting container state not defaulted: %s", diff.ObjectReflectDiff(newStatus.ContainerStatuses[2].State, expectUnknownState)) + } + + t.Logf("we expect the previous status update to be preserved.") + assert.Equal(t, newStatus.Phase, firstStatus.Phase) + assert.Equal(t, newStatus.Message, firstStatus.Message) +} + func TestSetContainerReadiness(t *testing.T) { cID1 := kubecontainer.ContainerID{Type: "test", ID: "1"} cID2 := kubecontainer.ContainerID{Type: "test", ID: "2"} diff --git a/test/e2e/node/pods.go b/test/e2e/node/pods.go index 675dfbf3456..a4cf9f3bf5e 100644 --- a/test/e2e/node/pods.go +++ b/test/e2e/node/pods.go @@ -379,6 +379,8 @@ var _ = SIGDescribe("Pods Extended", func() { switch { case t.ExitCode == 1: // expected + case t.ExitCode == 137 && (t.Reason == "ContainerStatusUnknown" || t.Reason == "Error"): + // expected, pod was force-killed after grace period case t.ExitCode == 128 && (t.Reason == "StartError" || t.Reason == "ContainerCannotRun") && reBug88766.MatchString(t.Message): // pod volume teardown races with container start in CRI, which reports a failure framework.Logf("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/88766", pod.Name, pod.Spec.NodeName) @@ -425,7 +427,7 @@ var _ = SIGDescribe("Pods Extended", func() { if !hasTerminalPhase { var names []string for _, status := range pod.Status.ContainerStatuses { - if status.State.Terminated != nil || status.State.Running != nil { + if status.State.Running != nil { names = append(names, status.Name) } }