diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index 6a388f16dea..97564744f17 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -1979,9 +1979,17 @@ func getUidFromUser(id string) string { // backoff deadline. However, because that won't cause error and the chance is really slim, we can just ignore it for now. // If a container is still in backoff, the function will return a brief backoff error and a detailed error message. func (dm *DockerManager) doBackOff(pod *api.Pod, container *api.Container, podStatus *kubecontainer.PodStatus, backOff *util.Backoff) (bool, error, string) { - containerStatus := podStatus.FindContainerStatusByName(container.Name) - if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateExited && !containerStatus.FinishedAt.IsZero() { - ts := containerStatus.FinishedAt + var cStatus *kubecontainer.ContainerStatus + // Use the finished time of the latest exited container as the start point to calculate whether to do back-off. + // TODO(random-liu): Better define backoff start point; add unit and e2e test after we finalize this. (See github issue #22240) + for _, c := range podStatus.ContainerStatuses { + if c.Name == container.Name && c.State == kubecontainer.ContainerStateExited { + cStatus = c + break + } + } + if cStatus != nil { + ts := cStatus.FinishedAt // found a container that requires backoff dockerName := KubeletContainerName{ PodFullName: kubecontainer.GetPodFullName(pod), @@ -1998,7 +2006,6 @@ func (dm *DockerManager) doBackOff(pod *api.Pod, container *api.Container, podSt return true, kubecontainer.ErrCrashLoopBackOff, err.Error() } backOff.Next(stableName, ts) - } return false, nil, "" } diff --git a/test/e2e/pods.go b/test/e2e/pods.go index ad22a094d20..928df86f2eb 100644 --- a/test/e2e/pods.go +++ b/test/e2e/pods.go @@ -20,7 +20,6 @@ import ( "bytes" "fmt" "io" - "math" "strconv" "strings" "time" @@ -199,7 +198,7 @@ func getRestartDelay(c *client.Client, pod *api.Pod, ns string, name string, con if status.State.Waiting == nil && status.State.Running != nil && status.LastTerminationState.Terminated != nil && status.State.Running.StartedAt.Time.After(beginTime) { startedAt := status.State.Running.StartedAt.Time finishedAt := status.LastTerminationState.Terminated.FinishedAt.Time - Logf("getRestartDelay: finishedAt=%s restartedAt=%s (%s)", finishedAt, startedAt, startedAt.Sub(finishedAt)) + Logf("getRestartDelay: restartCount = %d, finishedAt=%s restartedAt=%s (%s)", status.RestartCount, finishedAt, startedAt, startedAt.Sub(finishedAt)) return startedAt.Sub(finishedAt), nil } } @@ -982,46 +981,6 @@ var _ = Describe("Pods", func() { } }) - It("should not back-off restarting a container on LivenessProbe failure [Serial]", func() { - podClient := framework.Client.Pods(framework.Namespace.Name) - podName := "pod-back-off-liveness" - containerName := "back-off-liveness" - pod := &api.Pod{ - ObjectMeta: api.ObjectMeta{ - Name: podName, - Labels: map[string]string{"test": "liveness"}, - }, - Spec: api.PodSpec{ - Containers: []api.Container{ - { - Name: containerName, - Image: "gcr.io/google_containers/busybox:1.24", - Command: []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 5; rm -rf /tmp/health; sleep 600"}, - LivenessProbe: &api.Probe{ - Handler: api.Handler{ - Exec: &api.ExecAction{ - Command: []string{"cat", "/tmp/health"}, - }, - }, - InitialDelaySeconds: 5, - }, - }, - }, - }, - } - - defer func() { - By("deleting the pod") - podClient.Delete(pod.Name, api.NewDeleteOptions(0)) - }() - - delay1, delay2 := startPodAndGetBackOffs(framework, pod, podName, containerName, buildBackOffDuration) - - if math.Abs(float64(delay2-delay1)) > float64(syncLoopFrequency) { - Failf("back-off increasing on LivenessProbe failure delay1=%s delay2=%s", delay1, delay2) - } - }) - // Slow issue #19027 (20 mins) It("should cap back-off at MaxContainerBackOff [Slow]", func() { podClient := framework.Client.Pods(framework.Namespace.Name)