Merge pull request #13103 from yujuhong/restart_count

Kubelet: fix container restart counts
This commit is contained in:
Yu-Ju Hong 2015-08-26 15:13:10 -07:00
commit c7afae75f5
2 changed files with 65 additions and 15 deletions

View File

@ -2385,6 +2385,18 @@ func (kl *Kubelet) generatePodStatus(pod *api.Pod) (api.PodStatus, error) {
podFullName := kubecontainer.GetPodFullName(pod) podFullName := kubecontainer.GetPodFullName(pod)
glog.V(3).Infof("Generating status for %q", podFullName) glog.V(3).Infof("Generating status for %q", podFullName)
if existingStatus, ok := kl.statusManager.GetPodStatus(pod.UID); ok {
// This is a hacky fix to ensure container restart counts increment
// monotonically. Normally, we should not modify given pod. In this
// case, we check if there are cached status for this pod, and update
// the pod so that we update restart count appropriately.
// TODO(yujuhong): We will not need to count dead containers every time
// once we add the runtime pod cache.
// Note that kubelet restarts may still cause temporarily setback of
// restart counts.
pod.Status = existingStatus
}
// TODO: Consider include the container information. // TODO: Consider include the container information.
if kl.pastActiveDeadline(pod) { if kl.pastActiveDeadline(pod) {
reason := "DeadlineExceeded" reason := "DeadlineExceeded"

View File

@ -35,7 +35,7 @@ import (
. "github.com/onsi/gomega" . "github.com/onsi/gomega"
) )
func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectRestart bool) { func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectNumRestarts int) {
By(fmt.Sprintf("Creating pod %s in namespace %s", podDescr.Name, ns)) By(fmt.Sprintf("Creating pod %s in namespace %s", podDescr.Name, ns))
_, err := c.Pods(ns).Create(podDescr) _, err := c.Pods(ns).Create(podDescr)
expectNoError(err, fmt.Sprintf("creating pod %s", podDescr.Name)) expectNoError(err, fmt.Sprintf("creating pod %s", podDescr.Name))
@ -61,24 +61,35 @@ func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectResta
By(fmt.Sprintf("Initial restart count of pod %s is %d", podDescr.Name, initialRestartCount)) By(fmt.Sprintf("Initial restart count of pod %s is %d", podDescr.Name, initialRestartCount))
// Wait for the restart state to be as desired. // Wait for the restart state to be as desired.
restarts, deadline := false, time.Now().Add(2*time.Minute) deadline := time.Now().Add(2 * time.Minute)
lastRestartCount := initialRestartCount
observedRestarts := 0
for start := time.Now(); time.Now().Before(deadline); time.Sleep(2 * time.Second) { for start := time.Now(); time.Now().Before(deadline); time.Sleep(2 * time.Second) {
pod, err = c.Pods(ns).Get(podDescr.Name) pod, err = c.Pods(ns).Get(podDescr.Name)
expectNoError(err, fmt.Sprintf("getting pod %s", podDescr.Name)) expectNoError(err, fmt.Sprintf("getting pod %s", podDescr.Name))
restartCount := api.GetExistingContainerStatus(pod.Status.ContainerStatuses, "liveness").RestartCount restartCount := api.GetExistingContainerStatus(pod.Status.ContainerStatuses, "liveness").RestartCount
if restartCount != lastRestartCount {
By(fmt.Sprintf("Restart count of pod %s/%s is now %d (%v elapsed)", By(fmt.Sprintf("Restart count of pod %s/%s is now %d (%v elapsed)",
ns, podDescr.Name, restartCount, time.Since(start))) ns, podDescr.Name, restartCount, time.Since(start)))
if restartCount > initialRestartCount { if restartCount < lastRestartCount {
By(fmt.Sprintf("Restart count of pod %s/%s changed from %d to %d", Failf("Restart count should increment monotonically: restart cont of pod %s/%s changed from %d to %d",
ns, podDescr.Name, initialRestartCount, restartCount)) ns, podDescr.Name, lastRestartCount, restartCount)
restarts = true }
}
observedRestarts = restartCount - initialRestartCount
if expectNumRestarts > 0 && observedRestarts >= expectNumRestarts {
// Stop if we have observed more than expectNumRestarts restarts.
break break
} }
lastRestartCount = restartCount
} }
if restarts != expectRestart { // If we expected 0 restarts, fail if observed any restart.
Failf("pod %s/%s - expected restarts: %t, found restarts: %t", // If we expected n restarts (n > 0), fail if we observed < n restarts.
ns, podDescr.Name, expectRestart, restarts) if (expectNumRestarts == 0 && observedRestarts > 0) || (expectNumRestarts > 0 &&
observedRestarts < expectNumRestarts) {
Failf("pod %s/%s - expected number of restarts: %t, found restarts: %t",
ns, podDescr.Name, expectNumRestarts, observedRestarts)
} }
} }
@ -466,7 +477,7 @@ var _ = Describe("Pods", func() {
}, },
}, },
}, },
}, true) }, 1)
}) })
It("should *not* be restarted with a docker exec \"cat /tmp/health\" liveness probe", func() { It("should *not* be restarted with a docker exec \"cat /tmp/health\" liveness probe", func() {
@ -492,7 +503,7 @@ var _ = Describe("Pods", func() {
}, },
}, },
}, },
}, false) }, 0)
}) })
It("should be restarted with a /healthz http liveness probe", func() { It("should be restarted with a /healthz http liveness probe", func() {
@ -519,7 +530,34 @@ var _ = Describe("Pods", func() {
}, },
}, },
}, },
}, true) }, 1)
})
It("should have monotonically increasing restart count", func() {
runLivenessTest(framework.Client, framework.Namespace.Name, &api.Pod{
ObjectMeta: api.ObjectMeta{
Name: "liveness-http",
Labels: map[string]string{"test": "liveness"},
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "liveness",
Image: "gcr.io/google_containers/liveness",
Command: []string{"/server"},
LivenessProbe: &api.Probe{
Handler: api.Handler{
HTTPGet: &api.HTTPGetAction{
Path: "/healthz",
Port: util.NewIntOrStringFromInt(8080),
},
},
InitialDelaySeconds: 5,
},
},
},
},
}, 8)
}) })
It("should *not* be restarted with a /healthz http liveness probe", func() { It("should *not* be restarted with a /healthz http liveness probe", func() {
@ -552,7 +590,7 @@ var _ = Describe("Pods", func() {
}, },
}, },
}, },
}, false) }, 0)
}) })
// The following tests for remote command execution and port forwarding are // The following tests for remote command execution and port forwarding are