Merge pull request #22241 from Random-Liu/recover-back-off-behaviour

Auto commit by PR queue bot
This commit is contained in:
k8s-merge-robot 2016-03-05 00:05:15 -08:00
commit e9d3be83ae
2 changed files with 12 additions and 46 deletions

View File

@ -1979,9 +1979,17 @@ func getUidFromUser(id string) string {
// backoff deadline. However, because that won't cause error and the chance is really slim, we can just ignore it for now.
// If a container is still in backoff, the function will return a brief backoff error and a detailed error message.
func (dm *DockerManager) doBackOff(pod *api.Pod, container *api.Container, podStatus *kubecontainer.PodStatus, backOff *util.Backoff) (bool, error, string) {
containerStatus := podStatus.FindContainerStatusByName(container.Name)
if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateExited && !containerStatus.FinishedAt.IsZero() {
ts := containerStatus.FinishedAt
var cStatus *kubecontainer.ContainerStatus
// Use the finished time of the latest exited container as the start point to calculate whether to do back-off.
// TODO(random-liu): Better define backoff start point; add unit and e2e test after we finalize this. (See github issue #22240)
for _, c := range podStatus.ContainerStatuses {
if c.Name == container.Name && c.State == kubecontainer.ContainerStateExited {
cStatus = c
break
}
}
if cStatus != nil {
ts := cStatus.FinishedAt
// found a container that requires backoff
dockerName := KubeletContainerName{
PodFullName: kubecontainer.GetPodFullName(pod),
@ -1998,7 +2006,6 @@ func (dm *DockerManager) doBackOff(pod *api.Pod, container *api.Container, podSt
return true, kubecontainer.ErrCrashLoopBackOff, err.Error()
}
backOff.Next(stableName, ts)
}
return false, nil, ""
}

View File

@ -20,7 +20,6 @@ import (
"bytes"
"fmt"
"io"
"math"
"strconv"
"strings"
"time"
@ -199,7 +198,7 @@ func getRestartDelay(c *client.Client, pod *api.Pod, ns string, name string, con
if status.State.Waiting == nil && status.State.Running != nil && status.LastTerminationState.Terminated != nil && status.State.Running.StartedAt.Time.After(beginTime) {
startedAt := status.State.Running.StartedAt.Time
finishedAt := status.LastTerminationState.Terminated.FinishedAt.Time
Logf("getRestartDelay: finishedAt=%s restartedAt=%s (%s)", finishedAt, startedAt, startedAt.Sub(finishedAt))
Logf("getRestartDelay: restartCount = %d, finishedAt=%s restartedAt=%s (%s)", status.RestartCount, finishedAt, startedAt, startedAt.Sub(finishedAt))
return startedAt.Sub(finishedAt), nil
}
}
@ -982,46 +981,6 @@ var _ = Describe("Pods", func() {
}
})
It("should not back-off restarting a container on LivenessProbe failure [Serial]", func() {
podClient := framework.Client.Pods(framework.Namespace.Name)
podName := "pod-back-off-liveness"
containerName := "back-off-liveness"
pod := &api.Pod{
ObjectMeta: api.ObjectMeta{
Name: podName,
Labels: map[string]string{"test": "liveness"},
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: containerName,
Image: "gcr.io/google_containers/busybox:1.24",
Command: []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 5; rm -rf /tmp/health; sleep 600"},
LivenessProbe: &api.Probe{
Handler: api.Handler{
Exec: &api.ExecAction{
Command: []string{"cat", "/tmp/health"},
},
},
InitialDelaySeconds: 5,
},
},
},
},
}
defer func() {
By("deleting the pod")
podClient.Delete(pod.Name, api.NewDeleteOptions(0))
}()
delay1, delay2 := startPodAndGetBackOffs(framework, pod, podName, containerName, buildBackOffDuration)
if math.Abs(float64(delay2-delay1)) > float64(syncLoopFrequency) {
Failf("back-off increasing on LivenessProbe failure delay1=%s delay2=%s", delay1, delay2)
}
})
// Slow issue #19027 (20 mins)
It("should cap back-off at MaxContainerBackOff [Slow]", func() {
podClient := framework.Client.Pods(framework.Namespace.Name)