mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-05 02:09:56 +00:00
Merge pull request #6794 from yujuhong/restart_counts
Kubelet: persist restart count of a container
This commit is contained in:
commit
54406a5b7c
@ -231,8 +231,17 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
|
|||||||
uid := pod.UID
|
uid := pod.UID
|
||||||
manifest := pod.Spec
|
manifest := pod.Spec
|
||||||
|
|
||||||
|
oldStatuses := make(map[string]api.ContainerStatus, len(pod.Spec.Containers))
|
||||||
|
lastObservedTime := make(map[string]util.Time, len(pod.Spec.Containers))
|
||||||
|
for _, status := range pod.Status.ContainerStatuses {
|
||||||
|
oldStatuses[status.Name] = status
|
||||||
|
if status.LastTerminationState.Termination != nil {
|
||||||
|
lastObservedTime[status.Name] = status.LastTerminationState.Termination.FinishedAt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var podStatus api.PodStatus
|
var podStatus api.PodStatus
|
||||||
statuses := make(map[string]api.ContainerStatus)
|
statuses := make(map[string]*api.ContainerStatus, len(pod.Spec.Containers))
|
||||||
|
|
||||||
expectedContainers := make(map[string]api.Container)
|
expectedContainers := make(map[string]api.Container)
|
||||||
for _, container := range manifest.Containers {
|
for _, container := range manifest.Containers {
|
||||||
@ -245,6 +254,10 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
containerDone := util.NewStringSet()
|
||||||
|
// Loop through list of running and exited docker containers to construct
|
||||||
|
// the statuses. We assume docker returns a list of containers sorted in
|
||||||
|
// reverse by time.
|
||||||
for _, value := range containers {
|
for _, value := range containers {
|
||||||
if len(value.Names) == 0 {
|
if len(value.Names) == 0 {
|
||||||
continue
|
continue
|
||||||
@ -261,30 +274,44 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
|
|||||||
}
|
}
|
||||||
dockerContainerName := dockerName.ContainerName
|
dockerContainerName := dockerName.ContainerName
|
||||||
c, found := expectedContainers[dockerContainerName]
|
c, found := expectedContainers[dockerContainerName]
|
||||||
terminationMessagePath := ""
|
|
||||||
if !found {
|
if !found {
|
||||||
// TODO(dchen1107): should figure out why not continue here
|
continue
|
||||||
// continue
|
|
||||||
} else {
|
|
||||||
terminationMessagePath = c.TerminationMessagePath
|
|
||||||
}
|
}
|
||||||
// We assume docker return us a list of containers in time order
|
terminationMessagePath := c.TerminationMessagePath
|
||||||
if containerStatus, found := statuses[dockerContainerName]; found {
|
if containerDone.Has(dockerContainerName) {
|
||||||
// Populate last termination state
|
|
||||||
if containerStatus.LastTerminationState.Termination == nil {
|
|
||||||
result := self.inspectContainer(value.ID, dockerContainerName, terminationMessagePath)
|
|
||||||
if result.err == nil && result.status.State.Termination != nil {
|
|
||||||
containerStatus.LastTerminationState = result.status.State
|
|
||||||
}
|
|
||||||
}
|
|
||||||
containerStatus.RestartCount += 1
|
|
||||||
statuses[dockerContainerName] = containerStatus
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var terminationState *api.ContainerState = nil
|
||||||
|
// Inspect the container.
|
||||||
result := self.inspectContainer(value.ID, dockerContainerName, terminationMessagePath)
|
result := self.inspectContainer(value.ID, dockerContainerName, terminationMessagePath)
|
||||||
if result.err != nil {
|
if result.err != nil {
|
||||||
return nil, result.err
|
return nil, result.err
|
||||||
|
} else if result.status.State.Termination != nil {
|
||||||
|
terminationState = &result.status.State
|
||||||
|
}
|
||||||
|
|
||||||
|
if containerStatus, found := statuses[dockerContainerName]; found {
|
||||||
|
if containerStatus.LastTerminationState.Termination == nil && terminationState != nil {
|
||||||
|
// Populate the last termination state.
|
||||||
|
containerStatus.LastTerminationState = *terminationState
|
||||||
|
}
|
||||||
|
count := true
|
||||||
|
// Only count dead containers terminated after last time we observed,
|
||||||
|
if lastObservedTime, ok := lastObservedTime[dockerContainerName]; ok {
|
||||||
|
if terminationState != nil && terminationState.Termination.FinishedAt.After(lastObservedTime.Time) {
|
||||||
|
count = false
|
||||||
|
} else {
|
||||||
|
// The container finished before the last observation. No
|
||||||
|
// need to examine/count the older containers. Mark the
|
||||||
|
// container name as done.
|
||||||
|
containerDone.Insert(dockerContainerName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if count {
|
||||||
|
containerStatus.RestartCount += 1
|
||||||
|
}
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if dockerContainerName == PodInfraContainerName {
|
if dockerContainerName == PodInfraContainerName {
|
||||||
@ -294,44 +321,54 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Add user container information.
|
// Add user container information.
|
||||||
statuses[dockerContainerName] = result.status
|
if oldStatus, found := oldStatuses[dockerContainerName]; found {
|
||||||
|
// Use the last observed restart count if it's available.
|
||||||
|
result.status.RestartCount = oldStatus.RestartCount
|
||||||
|
}
|
||||||
|
statuses[dockerContainerName] = &result.status
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle the containers for which we cannot find any associated active or
|
||||||
|
// dead docker containers.
|
||||||
for _, container := range manifest.Containers {
|
for _, container := range manifest.Containers {
|
||||||
|
if _, found := statuses[container.Name]; found {
|
||||||
|
continue
|
||||||
|
}
|
||||||
var containerStatus api.ContainerStatus
|
var containerStatus api.ContainerStatus
|
||||||
if status, found := statuses[container.Name]; found {
|
if oldStatus, found := oldStatuses[container.Name]; found {
|
||||||
containerStatus = status
|
// Some states may be lost due to GC; apply the last observed
|
||||||
} else {
|
// values if possible.
|
||||||
// The container has not been created yet. Check image is ready on
|
containerStatus.RestartCount = oldStatus.RestartCount
|
||||||
// the node or not.
|
containerStatus.LastTerminationState = oldStatus.LastTerminationState
|
||||||
// TODO: If we integrate DockerPuller into DockerManager, we can
|
}
|
||||||
// record the pull failure and eliminate the image checking below.
|
//Check image is ready on the node or not.
|
||||||
image := container.Image
|
// TODO: If we integrate DockerPuller into DockerManager, we can
|
||||||
// TODO(dchen1107): docker/docker/issues/8365 to figure out if the image exists
|
// record the pull failure and eliminate the image checking below.
|
||||||
_, err := self.client.InspectImage(image)
|
image := container.Image
|
||||||
if err == nil {
|
// TODO(dchen1107): docker/docker/issues/8365 to figure out if the image exists
|
||||||
containerStatus.State.Waiting = &api.ContainerStateWaiting{
|
_, err := self.client.InspectImage(image)
|
||||||
Reason: fmt.Sprintf("Image: %s is ready, container is creating", image),
|
if err == nil {
|
||||||
}
|
containerStatus.State.Waiting = &api.ContainerStateWaiting{
|
||||||
} else if err == docker.ErrNoSuchImage {
|
Reason: fmt.Sprintf("Image: %s is ready, container is creating", image),
|
||||||
containerStatus.State.Waiting = &api.ContainerStateWaiting{
|
}
|
||||||
Reason: fmt.Sprintf("Image: %s is not ready on the node", image),
|
} else if err == docker.ErrNoSuchImage {
|
||||||
}
|
containerStatus.State.Waiting = &api.ContainerStateWaiting{
|
||||||
|
Reason: fmt.Sprintf("Image: %s is not ready on the node", image),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if containerStatus.State.Waiting != nil {
|
statuses[container.Name] = &containerStatus
|
||||||
// For containers in the waiting state, fill in a specific reason if it is recorded.
|
|
||||||
if reason, ok := self.reasonCache.Get(uid, container.Name); ok {
|
|
||||||
containerStatus.State.Waiting.Reason = reason
|
|
||||||
}
|
|
||||||
}
|
|
||||||
statuses[container.Name] = containerStatus
|
|
||||||
}
|
}
|
||||||
|
|
||||||
podStatus.ContainerStatuses = make([]api.ContainerStatus, 0)
|
podStatus.ContainerStatuses = make([]api.ContainerStatus, 0)
|
||||||
for _, status := range statuses {
|
for containerName, status := range statuses {
|
||||||
podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, status)
|
if status.State.Waiting != nil {
|
||||||
|
// For containers in the waiting state, fill in a specific reason if it is recorded.
|
||||||
|
if reason, ok := self.reasonCache.Get(uid, containerName); ok {
|
||||||
|
status.State.Waiting.Reason = reason
|
||||||
|
}
|
||||||
|
}
|
||||||
|
podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, *status)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &podStatus, nil
|
return &podStatus, nil
|
||||||
|
@ -4117,3 +4117,90 @@ func TestGetPodCreationFailureReason(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetRestartCount(t *testing.T) {
|
||||||
|
testKubelet := newTestKubelet(t)
|
||||||
|
testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorApi.MachineInfo{}, nil)
|
||||||
|
kubelet := testKubelet.kubelet
|
||||||
|
fakeDocker := testKubelet.fakeDocker
|
||||||
|
|
||||||
|
containers := []api.Container{
|
||||||
|
{Name: "bar"},
|
||||||
|
}
|
||||||
|
pod := api.Pod{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
UID: "12345678",
|
||||||
|
Name: "foo",
|
||||||
|
Namespace: "new",
|
||||||
|
},
|
||||||
|
Spec: api.PodSpec{
|
||||||
|
Containers: containers,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// format is // k8s_<container-id>_<pod-fullname>_<pod-uid>
|
||||||
|
names := []string{"/k8s_bar." + strconv.FormatUint(dockertools.HashContainer(&containers[0]), 16) + "_foo_new_12345678_0"}
|
||||||
|
currTime := time.Now()
|
||||||
|
containerMap := map[string]*docker.Container{
|
||||||
|
"1234": {
|
||||||
|
ID: "1234",
|
||||||
|
Name: "bar",
|
||||||
|
Config: &docker.Config{},
|
||||||
|
State: docker.State{
|
||||||
|
ExitCode: 42,
|
||||||
|
StartedAt: currTime.Add(-60 * time.Second),
|
||||||
|
FinishedAt: currTime.Add(-60 * time.Second),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"5678": {
|
||||||
|
ID: "5678",
|
||||||
|
Name: "bar",
|
||||||
|
Config: &docker.Config{},
|
||||||
|
State: docker.State{
|
||||||
|
ExitCode: 42,
|
||||||
|
StartedAt: currTime.Add(-30 * time.Second),
|
||||||
|
FinishedAt: currTime.Add(-30 * time.Second),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"9101": {
|
||||||
|
ID: "9101",
|
||||||
|
Name: "bar",
|
||||||
|
Config: &docker.Config{},
|
||||||
|
State: docker.State{
|
||||||
|
ExitCode: 42,
|
||||||
|
StartedAt: currTime.Add(30 * time.Minute),
|
||||||
|
FinishedAt: currTime.Add(30 * time.Minute),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
fakeDocker.ContainerMap = containerMap
|
||||||
|
|
||||||
|
// Helper function for verifying the restart count.
|
||||||
|
verifyRestartCount := func(pod *api.Pod, expectedCount int) api.PodStatus {
|
||||||
|
status, err := kubelet.generatePodStatus(pod)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error %v", err)
|
||||||
|
}
|
||||||
|
restartCount := status.ContainerStatuses[0].RestartCount
|
||||||
|
if restartCount != expectedCount {
|
||||||
|
t.Errorf("expected %d restart count, got %d", expectedCount, restartCount)
|
||||||
|
}
|
||||||
|
return status
|
||||||
|
}
|
||||||
|
|
||||||
|
// Container "bar" has failed twice; create two dead docker containers.
|
||||||
|
// TODO: container lists are expected to be sorted reversely by time.
|
||||||
|
// We should fix FakeDockerClient to sort the list before returning.
|
||||||
|
fakeDocker.ExitedContainerList = []docker.APIContainers{{Names: names, ID: "5678"}, {Names: names, ID: "1234"}}
|
||||||
|
pod.Status = verifyRestartCount(&pod, 1)
|
||||||
|
|
||||||
|
// Found a new dead container. The restart count should be incremented.
|
||||||
|
fakeDocker.ExitedContainerList = []docker.APIContainers{
|
||||||
|
{Names: names, ID: "9101"}, {Names: names, ID: "5678"}, {Names: names, ID: "1234"}}
|
||||||
|
pod.Status = verifyRestartCount(&pod, 2)
|
||||||
|
|
||||||
|
// All dead containers have been GC'd. The restart count should persist
|
||||||
|
// (i.e., remain the same).
|
||||||
|
fakeDocker.ExitedContainerList = []docker.APIContainers{}
|
||||||
|
verifyRestartCount(&pod, 2)
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user