diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index a7485f6c476..f78fde8ee3c 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -1529,11 +1529,12 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po spec := &pod.Spec allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...) s.Phase = getPhase(spec, allStatus) + klog.V(4).InfoS("Got phase for pod", "pod", klog.KObj(pod), "phase", s.Phase) // Check for illegal phase transition if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded { // API server shows terminal phase; transitions are not allowed if s.Phase != pod.Status.Phase { - klog.ErrorS(nil, "Pod attempted illegal phase transition", "originalStatusPhase", pod.Status.Phase, "apiStatusPhase", s.Phase, "apiStatus", s) + klog.ErrorS(nil, "Pod attempted illegal phase transition", "pod", klog.KObj(pod), "originalStatusPhase", pod.Status.Phase, "apiStatusPhase", s.Phase, "apiStatus", s) // Force back to phase from the API server s.Phase = pod.Status.Phase } @@ -1736,8 +1737,6 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon oldStatus, found := oldStatuses[container.Name] if found { if oldStatus.State.Terminated != nil { - // Do not update status on terminated init containers as - // they be removed at any time. status = &oldStatus } else { // Apply some values from the old statuses as the default values. @@ -1760,7 +1759,7 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon continue } // if no container is found, then assuming it should be waiting seems plausible, but the status code requires - // that a previous termination be present. If we're offline long enough (or something removed the container?), then + // that a previous termination be present. If we're offline long enough or something removed the container, then // the previous termination may not be present. This next code block ensures that if the container was previously running // then when that container status disappears, we can infer that it terminated even if we don't know the status code. // By setting the lasttermination state we are able to leave the container status waiting and present more accurate @@ -1779,21 +1778,17 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon continue } - if pod.DeletionTimestamp == nil { - continue - } - - // and if the pod itself is being deleted, then the CRI may have removed the container already and for whatever reason the kubelet missed the exit code - // (this seems not awesome). We know at this point that we will not be restarting the container. + // If we're here, we know the pod was previously running, but doesn't have a terminated status. We will check now to + // see if it's in a pending state. status := statuses[container.Name] - // if the status we're about to write indicates the default, the Waiting status will force this pod back into Pending. - // That isn't true, we know the pod is going away. + // If the status we're about to write indicates the default, the Waiting status will force this pod back into Pending. + // That isn't true, we know the pod was previously running. isDefaultWaitingStatus := status.State.Waiting != nil && status.State.Waiting.Reason == ContainerCreating if hasInitContainers { isDefaultWaitingStatus = status.State.Waiting != nil && status.State.Waiting.Reason == PodInitializing } if !isDefaultWaitingStatus { - // we the status was written, don't override + // the status was written, don't override continue } if status.LastTerminationState.Terminated != nil { @@ -1809,6 +1804,12 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon Message: "The container could not be located when the pod was deleted. The container used to be Running", ExitCode: 137, } + + // If the pod was not deleted, then it's been restarted. Increment restart count. + if pod.DeletionTimestamp == nil { + status.RestartCount += 1 + } + statuses[container.Name] = status } diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index 94844df2101..102761b5768 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -1782,6 +1782,22 @@ func failedState(cName string) v1.ContainerStatus { }, } } +func waitingWithLastTerminationUnknown(cName string, restartCount int32) v1.ContainerStatus { + return v1.ContainerStatus{ + Name: cName, + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{Reason: "ContainerCreating"}, + }, + LastTerminationState: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + Reason: "ContainerStatusUnknown", + Message: "The container could not be located when the pod was deleted. The container used to be Running", + ExitCode: 137, + }, + }, + RestartCount: restartCount, + } +} func TestPodPhaseWithRestartAlways(t *testing.T) { desiredState := v1.PodSpec{ @@ -2306,6 +2322,97 @@ func TestPodPhaseWithRestartOnFailure(t *testing.T) { // func TestPodPhaseWithRestartOnFailureInitContainers(t *testing.T) { // } +func TestConvertToAPIContainerStatuses(t *testing.T) { + desiredState := v1.PodSpec{ + NodeName: "machine", + Containers: []v1.Container{ + {Name: "containerA"}, + {Name: "containerB"}, + }, + RestartPolicy: v1.RestartPolicyAlways, + } + now := metav1.Now() + + tests := []struct { + name string + pod *v1.Pod + currentStatus *kubecontainer.PodStatus + previousStatus []v1.ContainerStatus + containers []v1.Container + hasInitContainers bool + isInitContainer bool + expected []v1.ContainerStatus + }{ + { + name: "no current status, with previous statuses and deletion", + pod: &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + }, + ObjectMeta: metav1.ObjectMeta{Name: "my-pod", DeletionTimestamp: &now}, + }, + currentStatus: &kubecontainer.PodStatus{}, + previousStatus: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + containers: desiredState.Containers, + // no init containers + // is not an init container + expected: []v1.ContainerStatus{ + waitingWithLastTerminationUnknown("containerA", 0), + waitingWithLastTerminationUnknown("containerB", 0), + }, + }, + { + name: "no current status, with previous statuses and no deletion", + pod: &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + }, + }, + currentStatus: &kubecontainer.PodStatus{}, + previousStatus: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + containers: desiredState.Containers, + // no init containers + // is not an init container + expected: []v1.ContainerStatus{ + waitingWithLastTerminationUnknown("containerA", 1), + waitingWithLastTerminationUnknown("containerB", 1), + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) + defer testKubelet.Cleanup() + kl := testKubelet.kubelet + containerStatuses := kl.convertToAPIContainerStatuses( + test.pod, + test.currentStatus, + test.previousStatus, + test.containers, + test.hasInitContainers, + test.isInitContainer, + ) + for i, status := range containerStatuses { + assert.Equal(t, test.expected[i], status, "[test %s]", test.name) + } + }) + } +} + func TestGetExec(t *testing.T) { const ( podName = "podFoo"