diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index 77ca5a4e017..c9479b10ddd 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -231,8 +231,17 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) { uid := pod.UID manifest := pod.Spec + oldStatuses := make(map[string]api.ContainerStatus, len(pod.Spec.Containers)) + lastObservedTime := make(map[string]util.Time, len(pod.Spec.Containers)) + for _, status := range pod.Status.ContainerStatuses { + oldStatuses[status.Name] = status + if status.LastTerminationState.Termination != nil { + lastObservedTime[status.Name] = status.LastTerminationState.Termination.FinishedAt + } + } + var podStatus api.PodStatus - statuses := make(map[string]api.ContainerStatus) + statuses := make(map[string]*api.ContainerStatus, len(pod.Spec.Containers)) expectedContainers := make(map[string]api.Container) for _, container := range manifest.Containers { @@ -245,6 +254,10 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) { return nil, err } + containerDone := util.NewStringSet() + // Loop through list of running and exited docker containers to construct + // the statuses. We assume docker returns a list of containers sorted in + // reverse by time. for _, value := range containers { if len(value.Names) == 0 { continue @@ -261,30 +274,44 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) { } dockerContainerName := dockerName.ContainerName c, found := expectedContainers[dockerContainerName] - terminationMessagePath := "" if !found { - // TODO(dchen1107): should figure out why not continue here - // continue - } else { - terminationMessagePath = c.TerminationMessagePath + continue } - // We assume docker return us a list of containers in time order - if containerStatus, found := statuses[dockerContainerName]; found { - // Populate last termination state - if containerStatus.LastTerminationState.Termination == nil { - result := self.inspectContainer(value.ID, dockerContainerName, terminationMessagePath) - if result.err == nil && result.status.State.Termination != nil { - containerStatus.LastTerminationState = result.status.State - } - } - containerStatus.RestartCount += 1 - statuses[dockerContainerName] = containerStatus + terminationMessagePath := c.TerminationMessagePath + if containerDone.Has(dockerContainerName) { continue } + var terminationState *api.ContainerState = nil + // Inspect the container. result := self.inspectContainer(value.ID, dockerContainerName, terminationMessagePath) if result.err != nil { return nil, result.err + } else if result.status.State.Termination != nil { + terminationState = &result.status.State + } + + if containerStatus, found := statuses[dockerContainerName]; found { + if containerStatus.LastTerminationState.Termination == nil && terminationState != nil { + // Populate the last termination state. + containerStatus.LastTerminationState = *terminationState + } + count := true + // Only count dead containers terminated after last time we observed, + if lastObservedTime, ok := lastObservedTime[dockerContainerName]; ok { + if terminationState != nil && terminationState.Termination.FinishedAt.After(lastObservedTime.Time) { + count = false + } else { + // The container finished before the last observation. No + // need to examine/count the older containers. Mark the + // container name as done. + containerDone.Insert(dockerContainerName) + } + } + if count { + containerStatus.RestartCount += 1 + } + continue } if dockerContainerName == PodInfraContainerName { @@ -294,44 +321,54 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) { } } else { // Add user container information. - statuses[dockerContainerName] = result.status + if oldStatus, found := oldStatuses[dockerContainerName]; found { + // Use the last observed restart count if it's available. + result.status.RestartCount = oldStatus.RestartCount + } + statuses[dockerContainerName] = &result.status } } + // Handle the containers for which we cannot find any associated active or + // dead docker containers. for _, container := range manifest.Containers { + if _, found := statuses[container.Name]; found { + continue + } var containerStatus api.ContainerStatus - if status, found := statuses[container.Name]; found { - containerStatus = status - } else { - // The container has not been created yet. Check image is ready on - // the node or not. - // TODO: If we integrate DockerPuller into DockerManager, we can - // record the pull failure and eliminate the image checking below. - image := container.Image - // TODO(dchen1107): docker/docker/issues/8365 to figure out if the image exists - _, err := self.client.InspectImage(image) - if err == nil { - containerStatus.State.Waiting = &api.ContainerStateWaiting{ - Reason: fmt.Sprintf("Image: %s is ready, container is creating", image), - } - } else if err == docker.ErrNoSuchImage { - containerStatus.State.Waiting = &api.ContainerStateWaiting{ - Reason: fmt.Sprintf("Image: %s is not ready on the node", image), - } + if oldStatus, found := oldStatuses[container.Name]; found { + // Some states may be lost due to GC; apply the last observed + // values if possible. + containerStatus.RestartCount = oldStatus.RestartCount + containerStatus.LastTerminationState = oldStatus.LastTerminationState + } + //Check image is ready on the node or not. + // TODO: If we integrate DockerPuller into DockerManager, we can + // record the pull failure and eliminate the image checking below. + image := container.Image + // TODO(dchen1107): docker/docker/issues/8365 to figure out if the image exists + _, err := self.client.InspectImage(image) + if err == nil { + containerStatus.State.Waiting = &api.ContainerStateWaiting{ + Reason: fmt.Sprintf("Image: %s is ready, container is creating", image), + } + } else if err == docker.ErrNoSuchImage { + containerStatus.State.Waiting = &api.ContainerStateWaiting{ + Reason: fmt.Sprintf("Image: %s is not ready on the node", image), } } - if containerStatus.State.Waiting != nil { - // For containers in the waiting state, fill in a specific reason if it is recorded. - if reason, ok := self.reasonCache.Get(uid, container.Name); ok { - containerStatus.State.Waiting.Reason = reason - } - } - statuses[container.Name] = containerStatus + statuses[container.Name] = &containerStatus } podStatus.ContainerStatuses = make([]api.ContainerStatus, 0) - for _, status := range statuses { - podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, status) + for containerName, status := range statuses { + if status.State.Waiting != nil { + // For containers in the waiting state, fill in a specific reason if it is recorded. + if reason, ok := self.reasonCache.Get(uid, containerName); ok { + status.State.Waiting.Reason = reason + } + } + podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, *status) } return &podStatus, nil diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index e279594bca4..4927feca1af 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -3833,3 +3833,90 @@ func TestGetPodCreationFailureReason(t *testing.T) { } } } + +func TestGetRestartCount(t *testing.T) { + testKubelet := newTestKubelet(t) + testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorApi.MachineInfo{}, nil) + kubelet := testKubelet.kubelet + fakeDocker := testKubelet.fakeDocker + + containers := []api.Container{ + {Name: "bar"}, + } + pod := api.Pod{ + ObjectMeta: api.ObjectMeta{ + UID: "12345678", + Name: "foo", + Namespace: "new", + }, + Spec: api.PodSpec{ + Containers: containers, + }, + } + + // format is // k8s___ + names := []string{"/k8s_bar." + strconv.FormatUint(dockertools.HashContainer(&containers[0]), 16) + "_foo_new_12345678_0"} + currTime := time.Now() + containerMap := map[string]*docker.Container{ + "1234": { + ID: "1234", + Name: "bar", + Config: &docker.Config{}, + State: docker.State{ + ExitCode: 42, + StartedAt: currTime.Add(-60 * time.Second), + FinishedAt: currTime.Add(-60 * time.Second), + }, + }, + "5678": { + ID: "5678", + Name: "bar", + Config: &docker.Config{}, + State: docker.State{ + ExitCode: 42, + StartedAt: currTime.Add(-30 * time.Second), + FinishedAt: currTime.Add(-30 * time.Second), + }, + }, + "9101": { + ID: "9101", + Name: "bar", + Config: &docker.Config{}, + State: docker.State{ + ExitCode: 42, + StartedAt: currTime.Add(30 * time.Minute), + FinishedAt: currTime.Add(30 * time.Minute), + }, + }, + } + fakeDocker.ContainerMap = containerMap + + // Helper function for verifying the restart count. + verifyRestartCount := func(pod *api.Pod, expectedCount int) api.PodStatus { + status, err := kubelet.generatePodStatus(pod) + if err != nil { + t.Errorf("unexpected error %v", err) + } + restartCount := status.ContainerStatuses[0].RestartCount + if restartCount != expectedCount { + t.Errorf("expected %d restart count, got %d", expectedCount, restartCount) + } + return status + } + + // Container "bar" has failed twice; create two dead docker containers. + // TODO: container lists are expected to be sorted reversely by time. + // We should fix FakeDockerClient to sort the list before returning. + fakeDocker.ExitedContainerList = []docker.APIContainers{{Names: names, ID: "5678"}, {Names: names, ID: "1234"}} + pod.Status = verifyRestartCount(&pod, 1) + + // Found a new dead container. The restart count should be incremented. + fakeDocker.ExitedContainerList = []docker.APIContainers{ + {Names: names, ID: "9101"}, {Names: names, ID: "5678"}, {Names: names, ID: "1234"}} + pod.Status = verifyRestartCount(&pod, 2) + + // All dead containers have been GC'd. The restart count should persist + // (i.e., remain the same). + fakeDocker.ExitedContainerList = []docker.APIContainers{} + verifyRestartCount(&pod, 2) +}