diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 89086b46cda..04155c65636 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1730,7 +1730,7 @@ func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, podStatus *kubecont } } - apiPodStatus := kl.generatePodStatus(pod, podStatus) + apiPodStatus := kl.generateAPIPodStatus(pod, podStatus) // Record the time it takes for the pod to become running. existingStatus, ok := kl.statusManager.GetPodStatus(pod.UID) if !ok || existingStatus.Phase == api.PodPending && apiPodStatus.Phase == api.PodRunning && @@ -1944,7 +1944,7 @@ func (kl *Kubelet) cleanupBandwidthLimits(allPods []*api.Pod) error { if err != nil { return err } - status = kl.generatePodStatus(pod, s) + status = kl.generateAPIPodStatus(pod, s) } if status.Phase == api.PodRunning { possibleCIDRs.Insert(fmt.Sprintf("%s/32", status.PodIP)) @@ -3280,7 +3280,7 @@ func GetPhase(spec *api.PodSpec, info []api.ContainerStatus) api.PodPhase { } } -func (kl *Kubelet) generatePodStatus(pod *api.Pod, podStatus *kubecontainer.PodStatus) api.PodStatus { +func (kl *Kubelet) generateAPIPodStatus(pod *api.Pod, podStatus *kubecontainer.PodStatus) api.PodStatus { glog.V(3).Infof("Generating status for %q", format.Pod(pod)) // TODO: Consider include the container information. if kl.pastActiveDeadline(pod) { @@ -3315,11 +3315,10 @@ func (kl *Kubelet) generatePodStatus(pod *api.Pod, podStatus *kubecontainer.PodS return *s } -// TODO(random-liu): Move this to some better place. -// TODO(random-liu): Add test for convertStatusToAPIStatus() func (kl *Kubelet) convertStatusToAPIStatus(pod *api.Pod, podStatus *kubecontainer.PodStatus) *api.PodStatus { var apiPodStatus api.PodStatus uid := pod.UID + apiPodStatus.PodIP = podStatus.IP convertContainerStatus := func(cs *kubecontainer.ContainerStatus) *api.ContainerStatus { cid := cs.ID.String() @@ -3348,104 +3347,83 @@ func (kl *Kubelet) convertStatusToAPIStatus(pod *api.Pod, podStatus *kubecontain return status } - // Make the latest container status comes first. - sort.Sort(sort.Reverse(kubecontainer.SortContainerStatusesByCreationTime(podStatus.ContainerStatuses))) - - statuses := make(map[string]*api.ContainerStatus, len(pod.Spec.Containers)) - // Create a map of expected containers based on the pod spec. - expectedContainers := make(map[string]api.Container) - for _, container := range pod.Spec.Containers { - expectedContainers[container.Name] = container - } - - containerDone := sets.NewString() - apiPodStatus.PodIP = podStatus.IP - for _, containerStatus := range podStatus.ContainerStatuses { - cName := containerStatus.Name - if _, ok := expectedContainers[cName]; !ok { - // This would also ignore the infra container. - continue - } - if containerDone.Has(cName) { - continue - } - status := convertContainerStatus(containerStatus) - if existing, found := statuses[cName]; found { - existing.LastTerminationState = status.State - containerDone.Insert(cName) - } else { - statuses[cName] = status - } - } - - // Handle the containers for which we cannot find any associated active or dead containers or are in restart backoff // Fetch old containers statuses from old pod status. - // TODO(random-liu) Maybe it's better to get status from status manager, because it takes the newest status and there is not - // status in api.Pod of static pod. oldStatuses := make(map[string]api.ContainerStatus, len(pod.Spec.Containers)) for _, status := range pod.Status.ContainerStatuses { oldStatuses[status.Name] = status } + + // Set all container statuses to default waiting state + statuses := make(map[string]*api.ContainerStatus, len(pod.Spec.Containers)) + defaultWaitingState := api.ContainerState{Waiting: &api.ContainerStateWaiting{Reason: "ContainerCreating"}} for _, container := range pod.Spec.Containers { - // TODO(random-liu): We should define "Waiting" state better. And cleanup the following code. - if containerStatus, found := statuses[container.Name]; found { - reason, message, ok := kl.reasonCache.Get(uid, container.Name) - if ok && reason == kubecontainer.ErrCrashLoopBackOff { - containerStatus.LastTerminationState = containerStatus.State - containerStatus.State = api.ContainerState{ - Waiting: &api.ContainerStateWaiting{ - Reason: reason.Error(), - Message: message, - }, - } - } + status := &api.ContainerStatus{ + Name: container.Name, + Image: container.Image, + State: defaultWaitingState, + } + // Apply some values from the old statuses as the default values. + if oldStatus, found := oldStatuses[container.Name]; found { + status.RestartCount = oldStatus.RestartCount + status.LastTerminationState = oldStatus.LastTerminationState + } + statuses[container.Name] = status + } + + // Make the latest container status comes first. + sort.Sort(sort.Reverse(kubecontainer.SortContainerStatusesByCreationTime(podStatus.ContainerStatuses))) + + // Set container statuses according to the statuses seen in pod status + containerSeen := map[string]int{} + for _, cStatus := range podStatus.ContainerStatuses { + cName := cStatus.Name + if _, ok := statuses[cName]; !ok { + // This would also ignore the infra container. continue } - var containerStatus api.ContainerStatus - containerStatus.Name = container.Name - containerStatus.Image = container.Image - if oldStatus, found := oldStatuses[container.Name]; found { - // Some states may be lost due to GC; apply the last observed - // values if possible. - containerStatus.RestartCount = oldStatus.RestartCount - containerStatus.LastTerminationState = oldStatus.LastTerminationState + if containerSeen[cName] >= 2 { + continue } - reason, _, ok := kl.reasonCache.Get(uid, container.Name) + status := convertContainerStatus(cStatus) + if containerSeen[cName] == 0 { + statuses[cName] = status + } else { + statuses[cName].LastTerminationState = status.State + } + containerSeen[cName] = containerSeen[cName] + 1 + } - if !ok { - // default position for a container - // At this point there are no active or dead containers, the reasonCache is empty (no entry or the entry has expired) - // its reasonable to say the container is being created till a more accurate reason is logged - containerStatus.State = api.ContainerState{ - Waiting: &api.ContainerStateWaiting{ - Reason: fmt.Sprintf("ContainerCreating"), - Message: fmt.Sprintf("Image: %s is ready, container is creating", container.Image), - }, - } - } else if reason == kubecontainer.ErrImagePullBackOff || - reason == kubecontainer.ErrImageInspect || - reason == kubecontainer.ErrImagePull || - reason == kubecontainer.ErrImageNeverPull || - reason == kubecontainer.RegistryUnavailable { - // mark it as waiting, reason will be filled bellow - containerStatus.State = api.ContainerState{Waiting: &api.ContainerStateWaiting{}} - } else if reason == kubecontainer.ErrRunContainer { - // mark it as waiting, reason will be filled bellow - containerStatus.State = api.ContainerState{Waiting: &api.ContainerStateWaiting{}} + // Handle the containers failed to be started, which should be in Waiting state. + for _, container := range pod.Spec.Containers { + // If a container should be restarted in next syncpod, it is *Waiting*. + if !kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) { + continue } - statuses[container.Name] = &containerStatus + status := statuses[container.Name] + reason, message, ok := kl.reasonCache.Get(uid, container.Name) + if !ok { + // In fact, we could also apply Waiting state here, but it is less informative, + // and the container will be restarted soon, so we prefer the original state here. + // Note that with the current implementation of ShouldContainerBeRestarted the original state here + // could be: + // * Waiting: There is no associated historical container and start failure reason record. + // * Terminated: The container is terminated. + continue + } + if status.State.Terminated != nil { + status.LastTerminationState = status.State + } + status.State = api.ContainerState{ + Waiting: &api.ContainerStateWaiting{ + Reason: reason.Error(), + Message: message, + }, + } + statuses[container.Name] = status } apiPodStatus.ContainerStatuses = make([]api.ContainerStatus, 0) - for containerName, status := range statuses { - if status.State.Waiting != nil { - status.State.Running = nil - // For containers in the waiting state, fill in a specific reason if it is recorded. - if reason, message, ok := kl.reasonCache.Get(uid, containerName); ok { - status.State.Waiting.Reason = reason.Error() - status.State.Waiting.Message = message - } - } + for _, status := range statuses { apiPodStatus.ContainerStatuses = append(apiPodStatus.ContainerStatuses, *status) } diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 51dcd0a6b1f..21ad3357f50 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -4427,4 +4427,228 @@ func TestGetPodsToSync(t *testing.T) { } } -// TODO(random-liu): Add unit test for convertStatusToAPIStatus (issue #20478) +func TestGenerateAPIPodStatusWithSortedContainers(t *testing.T) { + testKubelet := newTestKubelet(t) + kubelet := testKubelet.kubelet + numContainers := 10 + expectedOrder := []string{} + cStatuses := []*kubecontainer.ContainerStatus{} + specContainerList := []api.Container{} + for i := 0; i < numContainers; i++ { + id := fmt.Sprintf("%v", i) + containerName := fmt.Sprintf("%vcontainer", id) + expectedOrder = append(expectedOrder, containerName) + cStatus := &kubecontainer.ContainerStatus{ + ID: kubecontainer.BuildContainerID("test", id), + Name: containerName, + } + // Rearrange container statuses + if i%2 == 0 { + cStatuses = append(cStatuses, cStatus) + } else { + cStatuses = append([]*kubecontainer.ContainerStatus{cStatus}, cStatuses...) + } + specContainerList = append(specContainerList, api.Container{Name: containerName}) + } + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + UID: types.UID("uid1"), + Name: "foo", + Namespace: "test", + }, + Spec: api.PodSpec{ + Containers: specContainerList, + }, + } + status := &kubecontainer.PodStatus{ + ID: pod.UID, + Name: pod.Name, + Namespace: pod.Namespace, + ContainerStatuses: cStatuses, + } + for i := 0; i < 5; i++ { + apiStatus := kubelet.generateAPIPodStatus(pod, status) + for i, c := range apiStatus.ContainerStatuses { + if expectedOrder[i] != c.Name { + t.Fatalf("Container status not sorted, expected %v at index %d, but found %v", expectedOrder[i], i, c.Name) + } + } + } +} + +func TestGenerateAPIPodStatusWithReasonCache(t *testing.T) { + // The following waiting reason and message are generated in convertStatusToAPIStatus() + startWaitingReason := "ContainerCreating" + testTimestamp := time.Unix(123456789, 987654321) + testErrorReason := fmt.Errorf("test-error") + emptyContainerID := (&kubecontainer.ContainerID{}).String() + verifyStatus := func(t *testing.T, c int, podStatus api.PodStatus, state, lastTerminationState map[string]api.ContainerState) { + statuses := podStatus.ContainerStatuses + for _, s := range statuses { + if !reflect.DeepEqual(s.State, state[s.Name]) { + t.Errorf("case #%d, unexpected state: %s", c, diff.ObjectDiff(state[s.Name], s.State)) + } + if !reflect.DeepEqual(s.LastTerminationState, lastTerminationState[s.Name]) { + t.Errorf("case #%d, unexpected last termination state %s", c, diff.ObjectDiff( + lastTerminationState[s.Name], s.LastTerminationState)) + } + } + } + testKubelet := newTestKubelet(t) + kubelet := testKubelet.kubelet + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + UID: "12345678", + Name: "foo", + Namespace: "new", + }, + Spec: api.PodSpec{RestartPolicy: api.RestartPolicyOnFailure}, + } + podStatus := &kubecontainer.PodStatus{ + ID: pod.UID, + Name: pod.Name, + Namespace: pod.Namespace, + } + tests := []struct { + containers []api.Container + statuses []*kubecontainer.ContainerStatus + reasons map[string]error + oldStatuses []api.ContainerStatus + expectedState map[string]api.ContainerState + expectedLastTerminationState map[string]api.ContainerState + }{ + // For container with no historical record, State should be Waiting, LastTerminationState should be retrieved from + // old status from apiserver. + { + []api.Container{{Name: "without-old-record"}, {Name: "with-old-record"}}, + []*kubecontainer.ContainerStatus{}, + map[string]error{}, + []api.ContainerStatus{{ + Name: "with-old-record", + LastTerminationState: api.ContainerState{Terminated: &api.ContainerStateTerminated{}}, + }}, + map[string]api.ContainerState{ + "without-old-record": {Waiting: &api.ContainerStateWaiting{ + Reason: startWaitingReason, + }}, + "with-old-record": {Waiting: &api.ContainerStateWaiting{ + Reason: startWaitingReason, + }}, + }, + map[string]api.ContainerState{ + "with-old-record": {Terminated: &api.ContainerStateTerminated{}}, + }, + }, + // For running container, State should be Running, LastTerminationState should be retrieved from latest terminated status. + { + []api.Container{{Name: "running"}}, + []*kubecontainer.ContainerStatus{ + { + Name: "running", + State: kubecontainer.ContainerStateRunning, + StartedAt: testTimestamp, + }, + { + Name: "running", + State: kubecontainer.ContainerStateExited, + ExitCode: 1, + }, + }, + map[string]error{}, + []api.ContainerStatus{}, + map[string]api.ContainerState{ + "running": {Running: &api.ContainerStateRunning{ + StartedAt: unversioned.NewTime(testTimestamp), + }}, + }, + map[string]api.ContainerState{ + "running": {Terminated: &api.ContainerStateTerminated{ + ExitCode: 1, + ContainerID: emptyContainerID, + }}, + }, + }, + // For terminated container: + // * If there is no recent start error record, State should be Terminated, LastTerminationState should be retrieved from + // second latest terminated status; + // * If there is recent start error record, State should be Waiting, LastTerminationState should be retrieved from latest + // terminated status; + // * If ExitCode = 0, restart policy is RestartPolicyOnFailure, the container shouldn't be restarted. No matter there is + // recent start error or not, State should be Terminated, LastTerminationState should be retrieved from second latest + // terminated status. + { + []api.Container{{Name: "without-reason"}, {Name: "with-reason"}}, + []*kubecontainer.ContainerStatus{ + { + Name: "without-reason", + State: kubecontainer.ContainerStateExited, + ExitCode: 1, + }, + { + Name: "with-reason", + State: kubecontainer.ContainerStateExited, + ExitCode: 2, + }, + { + Name: "without-reason", + State: kubecontainer.ContainerStateExited, + ExitCode: 3, + }, + { + Name: "with-reason", + State: kubecontainer.ContainerStateExited, + ExitCode: 4, + }, + { + Name: "succeed", + State: kubecontainer.ContainerStateExited, + ExitCode: 0, + }, + { + Name: "succeed", + State: kubecontainer.ContainerStateExited, + ExitCode: 5, + }, + }, + map[string]error{"with-reason": testErrorReason, "succeed": testErrorReason}, + []api.ContainerStatus{}, + map[string]api.ContainerState{ + "without-reason": {Terminated: &api.ContainerStateTerminated{ + ExitCode: 1, + ContainerID: emptyContainerID, + }}, + "with-reason": {Waiting: &api.ContainerStateWaiting{Reason: testErrorReason.Error()}}, + "succeed": {Terminated: &api.ContainerStateTerminated{ + ExitCode: 0, + ContainerID: emptyContainerID, + }}, + }, + map[string]api.ContainerState{ + "without-reason": {Terminated: &api.ContainerStateTerminated{ + ExitCode: 3, + ContainerID: emptyContainerID, + }}, + "with-reason": {Terminated: &api.ContainerStateTerminated{ + ExitCode: 2, + ContainerID: emptyContainerID, + }}, + "succeed": {Terminated: &api.ContainerStateTerminated{ + ExitCode: 5, + ContainerID: emptyContainerID, + }}, + }, + }, + } + + for i, test := range tests { + kubelet.reasonCache = NewReasonCache() + for n, e := range test.reasons { + kubelet.reasonCache.add(pod.UID, n, e, "") + } + pod.Spec.Containers = test.containers + pod.Status.ContainerStatuses = test.oldStatuses + podStatus.ContainerStatuses = test.statuses + apiStatus := kubelet.generateAPIPodStatus(pod, podStatus) + verifyStatus(t, i, apiStatus, test.expectedState, test.expectedLastTerminationState) + } +}