Merge pull request #13103 from yujuhong/restart_count

Kubelet: fix container restart counts
This commit is contained in:
Yu-Ju Hong 2015-08-26 15:13:10 -07:00
commit c7afae75f5
2 changed files with 65 additions and 15 deletions

View File

@ -2385,6 +2385,18 @@ func (kl *Kubelet) generatePodStatus(pod *api.Pod) (api.PodStatus, error) {
podFullName := kubecontainer.GetPodFullName(pod)
glog.V(3).Infof("Generating status for %q", podFullName)
if existingStatus, ok := kl.statusManager.GetPodStatus(pod.UID); ok {
// This is a hacky fix to ensure container restart counts increment
// monotonically. Normally, we should not modify given pod. In this
// case, we check if there are cached status for this pod, and update
// the pod so that we update restart count appropriately.
// TODO(yujuhong): We will not need to count dead containers every time
// once we add the runtime pod cache.
// Note that kubelet restarts may still cause temporarily setback of
// restart counts.
pod.Status = existingStatus
}
// TODO: Consider include the container information.
if kl.pastActiveDeadline(pod) {
reason := "DeadlineExceeded"

View File

@ -35,7 +35,7 @@ import (
. "github.com/onsi/gomega"
)
func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectRestart bool) {
func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectNumRestarts int) {
By(fmt.Sprintf("Creating pod %s in namespace %s", podDescr.Name, ns))
_, err := c.Pods(ns).Create(podDescr)
expectNoError(err, fmt.Sprintf("creating pod %s", podDescr.Name))
@ -61,24 +61,35 @@ func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectResta
By(fmt.Sprintf("Initial restart count of pod %s is %d", podDescr.Name, initialRestartCount))
// Wait for the restart state to be as desired.
restarts, deadline := false, time.Now().Add(2*time.Minute)
deadline := time.Now().Add(2 * time.Minute)
lastRestartCount := initialRestartCount
observedRestarts := 0
for start := time.Now(); time.Now().Before(deadline); time.Sleep(2 * time.Second) {
pod, err = c.Pods(ns).Get(podDescr.Name)
expectNoError(err, fmt.Sprintf("getting pod %s", podDescr.Name))
restartCount := api.GetExistingContainerStatus(pod.Status.ContainerStatuses, "liveness").RestartCount
By(fmt.Sprintf("Restart count of pod %s/%s is now %d (%v elapsed)",
ns, podDescr.Name, restartCount, time.Since(start)))
if restartCount > initialRestartCount {
By(fmt.Sprintf("Restart count of pod %s/%s changed from %d to %d",
ns, podDescr.Name, initialRestartCount, restartCount))
restarts = true
if restartCount != lastRestartCount {
By(fmt.Sprintf("Restart count of pod %s/%s is now %d (%v elapsed)",
ns, podDescr.Name, restartCount, time.Since(start)))
if restartCount < lastRestartCount {
Failf("Restart count should increment monotonically: restart cont of pod %s/%s changed from %d to %d",
ns, podDescr.Name, lastRestartCount, restartCount)
}
}
observedRestarts = restartCount - initialRestartCount
if expectNumRestarts > 0 && observedRestarts >= expectNumRestarts {
// Stop if we have observed more than expectNumRestarts restarts.
break
}
lastRestartCount = restartCount
}
if restarts != expectRestart {
Failf("pod %s/%s - expected restarts: %t, found restarts: %t",
ns, podDescr.Name, expectRestart, restarts)
// If we expected 0 restarts, fail if observed any restart.
// If we expected n restarts (n > 0), fail if we observed < n restarts.
if (expectNumRestarts == 0 && observedRestarts > 0) || (expectNumRestarts > 0 &&
observedRestarts < expectNumRestarts) {
Failf("pod %s/%s - expected number of restarts: %t, found restarts: %t",
ns, podDescr.Name, expectNumRestarts, observedRestarts)
}
}
@ -466,7 +477,7 @@ var _ = Describe("Pods", func() {
},
},
},
}, true)
}, 1)
})
It("should *not* be restarted with a docker exec \"cat /tmp/health\" liveness probe", func() {
@ -492,7 +503,7 @@ var _ = Describe("Pods", func() {
},
},
},
}, false)
}, 0)
})
It("should be restarted with a /healthz http liveness probe", func() {
@ -519,7 +530,34 @@ var _ = Describe("Pods", func() {
},
},
},
}, true)
}, 1)
})
It("should have monotonically increasing restart count", func() {
runLivenessTest(framework.Client, framework.Namespace.Name, &api.Pod{
ObjectMeta: api.ObjectMeta{
Name: "liveness-http",
Labels: map[string]string{"test": "liveness"},
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "liveness",
Image: "gcr.io/google_containers/liveness",
Command: []string{"/server"},
LivenessProbe: &api.Probe{
Handler: api.Handler{
HTTPGet: &api.HTTPGetAction{
Path: "/healthz",
Port: util.NewIntOrStringFromInt(8080),
},
},
InitialDelaySeconds: 5,
},
},
},
},
}, 8)
})
It("should *not* be restarted with a /healthz http liveness probe", func() {
@ -552,7 +590,7 @@ var _ = Describe("Pods", func() {
},
},
},
}, false)
}, 0)
})
// The following tests for remote command execution and port forwarding are