Kubelet: persist restart count of a container

Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.

This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
This commit is contained in:
Yu-Ju Hong 2015-04-10 22:33:45 -07:00
parent 26f8bc1a68
commit fd34441d2d
2 changed files with 169 additions and 45 deletions

View File

@ -231,8 +231,17 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
uid := pod.UID uid := pod.UID
manifest := pod.Spec manifest := pod.Spec
oldStatuses := make(map[string]api.ContainerStatus, len(pod.Spec.Containers))
lastObservedTime := make(map[string]util.Time, len(pod.Spec.Containers))
for _, status := range pod.Status.ContainerStatuses {
oldStatuses[status.Name] = status
if status.LastTerminationState.Termination != nil {
lastObservedTime[status.Name] = status.LastTerminationState.Termination.FinishedAt
}
}
var podStatus api.PodStatus var podStatus api.PodStatus
statuses := make(map[string]api.ContainerStatus) statuses := make(map[string]*api.ContainerStatus, len(pod.Spec.Containers))
expectedContainers := make(map[string]api.Container) expectedContainers := make(map[string]api.Container)
for _, container := range manifest.Containers { for _, container := range manifest.Containers {
@ -245,6 +254,10 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
return nil, err return nil, err
} }
containerDone := util.NewStringSet()
// Loop through list of running and exited docker containers to construct
// the statuses. We assume docker returns a list of containers sorted in
// reverse by time.
for _, value := range containers { for _, value := range containers {
if len(value.Names) == 0 { if len(value.Names) == 0 {
continue continue
@ -261,30 +274,44 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
} }
dockerContainerName := dockerName.ContainerName dockerContainerName := dockerName.ContainerName
c, found := expectedContainers[dockerContainerName] c, found := expectedContainers[dockerContainerName]
terminationMessagePath := ""
if !found { if !found {
// TODO(dchen1107): should figure out why not continue here continue
// continue
} else {
terminationMessagePath = c.TerminationMessagePath
} }
// We assume docker return us a list of containers in time order terminationMessagePath := c.TerminationMessagePath
if containerStatus, found := statuses[dockerContainerName]; found { if containerDone.Has(dockerContainerName) {
// Populate last termination state
if containerStatus.LastTerminationState.Termination == nil {
result := self.inspectContainer(value.ID, dockerContainerName, terminationMessagePath)
if result.err == nil && result.status.State.Termination != nil {
containerStatus.LastTerminationState = result.status.State
}
}
containerStatus.RestartCount += 1
statuses[dockerContainerName] = containerStatus
continue continue
} }
var terminationState *api.ContainerState = nil
// Inspect the container.
result := self.inspectContainer(value.ID, dockerContainerName, terminationMessagePath) result := self.inspectContainer(value.ID, dockerContainerName, terminationMessagePath)
if result.err != nil { if result.err != nil {
return nil, result.err return nil, result.err
} else if result.status.State.Termination != nil {
terminationState = &result.status.State
}
if containerStatus, found := statuses[dockerContainerName]; found {
if containerStatus.LastTerminationState.Termination == nil && terminationState != nil {
// Populate the last termination state.
containerStatus.LastTerminationState = *terminationState
}
count := true
// Only count dead containers terminated after last time we observed,
if lastObservedTime, ok := lastObservedTime[dockerContainerName]; ok {
if terminationState != nil && terminationState.Termination.FinishedAt.After(lastObservedTime.Time) {
count = false
} else {
// The container finished before the last observation. No
// need to examine/count the older containers. Mark the
// container name as done.
containerDone.Insert(dockerContainerName)
}
}
if count {
containerStatus.RestartCount += 1
}
continue
} }
if dockerContainerName == PodInfraContainerName { if dockerContainerName == PodInfraContainerName {
@ -294,44 +321,54 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
} }
} else { } else {
// Add user container information. // Add user container information.
statuses[dockerContainerName] = result.status if oldStatus, found := oldStatuses[dockerContainerName]; found {
// Use the last observed restart count if it's available.
result.status.RestartCount = oldStatus.RestartCount
}
statuses[dockerContainerName] = &result.status
} }
} }
// Handle the containers for which we cannot find any associated active or
// dead docker containers.
for _, container := range manifest.Containers { for _, container := range manifest.Containers {
if _, found := statuses[container.Name]; found {
continue
}
var containerStatus api.ContainerStatus var containerStatus api.ContainerStatus
if status, found := statuses[container.Name]; found { if oldStatus, found := oldStatuses[container.Name]; found {
containerStatus = status // Some states may be lost due to GC; apply the last observed
} else { // values if possible.
// The container has not been created yet. Check image is ready on containerStatus.RestartCount = oldStatus.RestartCount
// the node or not. containerStatus.LastTerminationState = oldStatus.LastTerminationState
// TODO: If we integrate DockerPuller into DockerManager, we can }
// record the pull failure and eliminate the image checking below. //Check image is ready on the node or not.
image := container.Image // TODO: If we integrate DockerPuller into DockerManager, we can
// TODO(dchen1107): docker/docker/issues/8365 to figure out if the image exists // record the pull failure and eliminate the image checking below.
_, err := self.client.InspectImage(image) image := container.Image
if err == nil { // TODO(dchen1107): docker/docker/issues/8365 to figure out if the image exists
containerStatus.State.Waiting = &api.ContainerStateWaiting{ _, err := self.client.InspectImage(image)
Reason: fmt.Sprintf("Image: %s is ready, container is creating", image), if err == nil {
} containerStatus.State.Waiting = &api.ContainerStateWaiting{
} else if err == docker.ErrNoSuchImage { Reason: fmt.Sprintf("Image: %s is ready, container is creating", image),
containerStatus.State.Waiting = &api.ContainerStateWaiting{ }
Reason: fmt.Sprintf("Image: %s is not ready on the node", image), } else if err == docker.ErrNoSuchImage {
} containerStatus.State.Waiting = &api.ContainerStateWaiting{
Reason: fmt.Sprintf("Image: %s is not ready on the node", image),
} }
} }
if containerStatus.State.Waiting != nil { statuses[container.Name] = &containerStatus
// For containers in the waiting state, fill in a specific reason if it is recorded.
if reason, ok := self.reasonCache.Get(uid, container.Name); ok {
containerStatus.State.Waiting.Reason = reason
}
}
statuses[container.Name] = containerStatus
} }
podStatus.ContainerStatuses = make([]api.ContainerStatus, 0) podStatus.ContainerStatuses = make([]api.ContainerStatus, 0)
for _, status := range statuses { for containerName, status := range statuses {
podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, status) if status.State.Waiting != nil {
// For containers in the waiting state, fill in a specific reason if it is recorded.
if reason, ok := self.reasonCache.Get(uid, containerName); ok {
status.State.Waiting.Reason = reason
}
}
podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, *status)
} }
return &podStatus, nil return &podStatus, nil

View File

@ -3833,3 +3833,90 @@ func TestGetPodCreationFailureReason(t *testing.T) {
} }
} }
} }
func TestGetRestartCount(t *testing.T) {
testKubelet := newTestKubelet(t)
testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorApi.MachineInfo{}, nil)
kubelet := testKubelet.kubelet
fakeDocker := testKubelet.fakeDocker
containers := []api.Container{
{Name: "bar"},
}
pod := api.Pod{
ObjectMeta: api.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "new",
},
Spec: api.PodSpec{
Containers: containers,
},
}
// format is // k8s_<container-id>_<pod-fullname>_<pod-uid>
names := []string{"/k8s_bar." + strconv.FormatUint(dockertools.HashContainer(&containers[0]), 16) + "_foo_new_12345678_0"}
currTime := time.Now()
containerMap := map[string]*docker.Container{
"1234": {
ID: "1234",
Name: "bar",
Config: &docker.Config{},
State: docker.State{
ExitCode: 42,
StartedAt: currTime.Add(-60 * time.Second),
FinishedAt: currTime.Add(-60 * time.Second),
},
},
"5678": {
ID: "5678",
Name: "bar",
Config: &docker.Config{},
State: docker.State{
ExitCode: 42,
StartedAt: currTime.Add(-30 * time.Second),
FinishedAt: currTime.Add(-30 * time.Second),
},
},
"9101": {
ID: "9101",
Name: "bar",
Config: &docker.Config{},
State: docker.State{
ExitCode: 42,
StartedAt: currTime.Add(30 * time.Minute),
FinishedAt: currTime.Add(30 * time.Minute),
},
},
}
fakeDocker.ContainerMap = containerMap
// Helper function for verifying the restart count.
verifyRestartCount := func(pod *api.Pod, expectedCount int) api.PodStatus {
status, err := kubelet.generatePodStatus(pod)
if err != nil {
t.Errorf("unexpected error %v", err)
}
restartCount := status.ContainerStatuses[0].RestartCount
if restartCount != expectedCount {
t.Errorf("expected %d restart count, got %d", expectedCount, restartCount)
}
return status
}
// Container "bar" has failed twice; create two dead docker containers.
// TODO: container lists are expected to be sorted reversely by time.
// We should fix FakeDockerClient to sort the list before returning.
fakeDocker.ExitedContainerList = []docker.APIContainers{{Names: names, ID: "5678"}, {Names: names, ID: "1234"}}
pod.Status = verifyRestartCount(&pod, 1)
// Found a new dead container. The restart count should be incremented.
fakeDocker.ExitedContainerList = []docker.APIContainers{
{Names: names, ID: "9101"}, {Names: names, ID: "5678"}, {Names: names, ID: "1234"}}
pod.Status = verifyRestartCount(&pod, 2)
// All dead containers have been GC'd. The restart count should persist
// (i.e., remain the same).
fakeDocker.ExitedContainerList = []docker.APIContainers{}
verifyRestartCount(&pod, 2)
}