diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index d50502e31c0..a35f1400c17 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1202,36 +1202,17 @@ func (kl *Kubelet) syncPod(pod *api.BoundPod, containersInPod dockertools.Docker // look for changes in the container. containerChanged := hash != 0 && hash != expectedHash if !containerChanged { - // TODO: This should probably be separated out into a separate goroutine. - // If the container's liveness probe is unsuccessful, set readiness to false. If liveness is succesful, do a readiness check and set - // readiness accordingly. If the initalDelay since container creation on liveness probe has not passed the probe will return Success. - // If the initial delay on the readiness probe has not passed the probe will return Failure. - ready := probe.Unknown - live, err := kl.probeContainer(container.LivenessProbe, podFullName, uid, podStatus, container, dockerContainer, probe.Success) - if live == probe.Success { - ready, _ = kl.probeContainer(container.ReadinessProbe, podFullName, uid, podStatus, container, dockerContainer, probe.Failure) - } + result, err := kl.probeContainer(pod, podStatus, container, dockerContainer) if err != nil { glog.V(1).Infof("liveness/readiness probe errored: %v", err) containersInPod.RemoveContainerWithID(containerID) continue } - if ready == probe.Success { - kl.readiness.set(dockerContainer.ID, true) - } else { - kl.readiness.set(dockerContainer.ID, false) - } - if live == probe.Success { + if result == probe.Success { containersInPod.RemoveContainerWithID(containerID) continue } - ref, ok := kl.getRef(containerID) - if !ok { - glog.Warningf("No ref for pod '%v' - '%v'", containerID, container.Name) - } else { - kl.recorder.Eventf(ref, "unhealthy", "Liveness Probe Failed %v - %v", containerID, container.Name) - } - glog.Infof("pod %q container %q is unhealthy (probe result: %v). Container will be killed and re-created.", podFullName, container.Name, live) + glog.Infof("pod %q container %q is unhealthy (probe result: %v). Container will be killed and re-created.", podFullName, container.Name, result) } else { glog.Infof("pod %q container %q hash changed (%d vs %d). Container will be killed and re-created.", podFullName, container.Name, hash, expectedHash) // Also kill associated pod infra container if the container changed. diff --git a/pkg/kubelet/probe.go b/pkg/kubelet/probe.go index 75cab23604a..b22f250d4c7 100644 --- a/pkg/kubelet/probe.go +++ b/pkg/kubelet/probe.go @@ -24,53 +24,102 @@ import ( "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools" "github.com/GoogleCloudPlatform/kubernetes/pkg/probe" execprobe "github.com/GoogleCloudPlatform/kubernetes/pkg/probe/exec" httprobe "github.com/GoogleCloudPlatform/kubernetes/pkg/probe/http" tcprobe "github.com/GoogleCloudPlatform/kubernetes/pkg/probe/tcp" - "github.com/GoogleCloudPlatform/kubernetes/pkg/types" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" "github.com/GoogleCloudPlatform/kubernetes/pkg/util/exec" - "github.com/fsouza/go-dockerclient" + "github.com/golang/glog" ) const maxProbeRetries = 3 -// probeContainer executes the given probe on a container and returns the result. -// If the probe is nil this returns Success. If the probe's initial delay has not passed -// since the creation of the container, this returns the defaultResult. It will then attempt -// to execute the probe repeatedly up to maxProbeRetries times, and return on the first -// successful result, else returning the last unsucessful result and error. -func (kl *Kubelet) probeContainer(p *api.Probe, - podFullName string, - podUID types.UID, - status api.PodStatus, - container api.Container, - dockerContainer *docker.APIContainers, - defaultResult probe.Result) (probe.Result, error) { - var err error - result := probe.Unknown +// probeContainer probes the liveness/readiness of the given container. +// If the container's liveness probe is unsuccessful, set readiness to false. +// If liveness is successful, do a readiness check and set readiness accordingly. +func (kl *Kubelet) probeContainer(pod *api.BoundPod, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (probe.Result, error) { + // Probe liveness. + live, err := kl.probeContainerLiveness(pod, status, container, dockerContainer) + if err != nil { + glog.V(1).Infof("liveness probe errored: %v", err) + kl.readiness.set(dockerContainer.ID, false) + return probe.Unknown, err + } + if live != probe.Success { + glog.V(1).Infof("liveness probe unsuccessful: %v", live) + kl.readiness.set(dockerContainer.ID, false) + return live, nil + } + + // Probe readiness. + ready, err := kl.probeContainerReadiness(pod, status, container, dockerContainer) + if err == nil && ready == probe.Success { + glog.V(1).Infof("readiness probe successful: %v", ready) + kl.readiness.set(dockerContainer.ID, true) + return probe.Success, nil + } + + glog.V(1).Infof("readiness probe failed/errored: %v, %v", ready, err) + kl.readiness.set(dockerContainer.ID, false) + + containerID := dockertools.DockerID(dockerContainer.ID) + ref, ok := kl.getRef(containerID) + if !ok { + glog.Warningf("No ref for pod '%v' - '%v'", containerID, container.Name) + } else { + kl.recorder.Eventf(ref, "unhealthy", "Liveness Probe Failed %v - %v", containerID, container.Name) + } + return ready, err +} + +// probeContainerLiveness probes the liveness of a container. +// If the initalDelay since container creation on liveness probe has not passed the probe will return probe.Success. +func (kl *Kubelet) probeContainerLiveness(pod *api.BoundPod, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (probe.Result, error) { + p := container.LivenessProbe if p == nil { return probe.Success, nil } if time.Now().Unix()-dockerContainer.Created < p.InitialDelaySeconds { - return defaultResult, nil + return probe.Success, nil } - for i := 0; i < maxProbeRetries; i++ { - result, err = kl.runProbe(p, podFullName, podUID, status, container) + return kl.runProbeWithRetries(p, pod, status, container, maxProbeRetries) +} + +// probeContainerLiveness probes the readiness of a container. +// If the initial delay on the readiness probe has not passed the probe will return probe.Failure. +func (kl *Kubelet) probeContainerReadiness(pod *api.BoundPod, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (probe.Result, error) { + p := container.ReadinessProbe + if p == nil { + return probe.Success, nil + } + if time.Now().Unix()-dockerContainer.Created < p.InitialDelaySeconds { + return probe.Failure, nil + } + return kl.runProbeWithRetries(p, pod, status, container, maxProbeRetries) +} + +// runProbeWithRetries tries to probe the container in a finite loop, it returns the last result +// if it never succeeds. +func (kl *Kubelet) runProbeWithRetries(p *api.Probe, pod *api.BoundPod, status api.PodStatus, container api.Container, retires int) (probe.Result, error) { + var err error + var result probe.Result + for i := 0; i < retires; i++ { + result, err = kl.runProbe(p, pod, status, container) if result == probe.Success { - return result, err + return probe.Success, nil } } return result, err } -func (kl *Kubelet) runProbe(p *api.Probe, podFullName string, podUID types.UID, status api.PodStatus, container api.Container) (probe.Result, error) { +func (kl *Kubelet) runProbe(p *api.Probe, pod *api.BoundPod, status api.PodStatus, container api.Container) (probe.Result, error) { timeout := time.Duration(p.TimeoutSeconds) * time.Second if p.Exec != nil { - return kl.prober.exec.Probe(kl.newExecInContainer(podFullName, podUID, container)) + return kl.prober.exec.Probe(kl.newExecInContainer(pod, container)) } if p.HTTPGet != nil { port, err := extractPort(p.HTTPGet.Port, container) @@ -141,9 +190,11 @@ type execInContainer struct { run func() ([]byte, error) } -func (kl *Kubelet) newExecInContainer(podFullName string, podUID types.UID, container api.Container) exec.Cmd { +func (kl *Kubelet) newExecInContainer(pod *api.BoundPod, container api.Container) exec.Cmd { + uid := pod.UID + podFullName := GetPodFullName(pod) return execInContainer{func() ([]byte, error) { - return kl.RunInContainer(podFullName, podUID, container.Name, container.LivenessProbe.Exec.Command) + return kl.RunInContainer(podFullName, uid, container.Name, container.LivenessProbe.Exec.Command) }} } diff --git a/pkg/kubelet/probe_test.go b/pkg/kubelet/probe_test.go index 988ebf55cdf..40e75b14134 100644 --- a/pkg/kubelet/probe_test.go +++ b/pkg/kubelet/probe_test.go @@ -23,7 +23,6 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/probe" - "github.com/GoogleCloudPlatform/kubernetes/pkg/types" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" "github.com/GoogleCloudPlatform/kubernetes/pkg/util/exec" @@ -147,6 +146,7 @@ func (p fakeExecProber) Probe(_ exec.Cmd) (probe.Result, error) { func makeTestKubelet(result probe.Result, err error) *Kubelet { return &Kubelet{ + readiness: newReadinessStates(), prober: probeHolder{ exec: fakeExecProber{ result: result, @@ -156,68 +156,239 @@ func makeTestKubelet(result probe.Result, err error) *Kubelet { } } +// TestProbeContainer tests the functionality of probeContainer. +// Test cases are: +// +// No probe. +// Only LivenessProbe. +// Only ReadinessProbe. +// Both probes. +// +// Also, for each probe, there will be several cases covering whether the initial +// delay has passed, whether the probe handler will return Success, Failure, +// Unknown or error. +// func TestProbeContainer(t *testing.T) { - dc := &docker.APIContainers{Created: time.Now().Unix()} + dc := &docker.APIContainers{ + ID: "foobar", + Created: time.Now().Unix(), + } tests := []struct { - p *api.Probe - defaultResult probe.Result - expectError bool - expectedResult probe.Result + testContainer api.Container + expectError bool + expectedResult probe.Result + expectedReadiness bool }{ + // No probes. { - defaultResult: probe.Success, - expectedResult: probe.Success, + testContainer: api.Container{}, + expectedResult: probe.Success, + expectedReadiness: true, }, + // Only LivenessProbe. { - defaultResult: probe.Failure, - expectedResult: probe.Success, - }, - { - p: &api.Probe{InitialDelaySeconds: 100}, - defaultResult: probe.Failure, - expectError: false, - expectedResult: probe.Failure, - }, - { - p: &api.Probe{ - InitialDelaySeconds: -100, + testContainer: api.Container{ + LivenessProbe: &api.Probe{InitialDelaySeconds: 100}, }, - defaultResult: probe.Failure, - expectError: false, - expectedResult: probe.Unknown, + expectedResult: probe.Success, + expectedReadiness: true, }, { - p: &api.Probe{ - InitialDelaySeconds: -100, - Handler: api.Handler{ - Exec: &api.ExecAction{}, + testContainer: api.Container{ + LivenessProbe: &api.Probe{InitialDelaySeconds: -100}, + }, + expectedResult: probe.Unknown, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + LivenessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, }, }, - defaultResult: probe.Failure, - expectError: false, - expectedResult: probe.Success, + expectedResult: probe.Failure, + expectedReadiness: false, }, { - p: &api.Probe{ - InitialDelaySeconds: -100, - Handler: api.Handler{ - Exec: &api.ExecAction{}, + testContainer: api.Container{ + LivenessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, }, }, - defaultResult: probe.Failure, - expectError: true, - expectedResult: probe.Unknown, + expectedResult: probe.Success, + expectedReadiness: true, }, { - p: &api.Probe{ - InitialDelaySeconds: -100, - Handler: api.Handler{ - Exec: &api.ExecAction{}, + testContainer: api.Container{ + LivenessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, }, }, - defaultResult: probe.Success, - expectError: false, - expectedResult: probe.Failure, + expectedResult: probe.Unknown, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + LivenessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, + }, + }, + expectError: true, + expectedResult: probe.Unknown, + expectedReadiness: false, + }, + // Only ReadinessProbe. + { + testContainer: api.Container{ + ReadinessProbe: &api.Probe{InitialDelaySeconds: 100}, + }, + expectedResult: probe.Failure, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + ReadinessProbe: &api.Probe{InitialDelaySeconds: -100}, + }, + expectedResult: probe.Unknown, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + ReadinessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, + }, + }, + expectedResult: probe.Failure, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + ReadinessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, + }, + }, + expectedResult: probe.Success, + expectedReadiness: true, + }, + { + testContainer: api.Container{ + ReadinessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, + }, + }, + expectedResult: probe.Unknown, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + ReadinessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, + }, + }, + expectError: true, + expectedResult: probe.Unknown, + expectedReadiness: false, + }, + // Both LivenessProbe and ReadinessProbe. + { + testContainer: api.Container{ + LivenessProbe: &api.Probe{InitialDelaySeconds: 100}, + ReadinessProbe: &api.Probe{InitialDelaySeconds: 100}, + }, + expectedResult: probe.Failure, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + LivenessProbe: &api.Probe{InitialDelaySeconds: 100}, + ReadinessProbe: &api.Probe{InitialDelaySeconds: -100}, + }, + expectedResult: probe.Unknown, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + LivenessProbe: &api.Probe{InitialDelaySeconds: -100}, + ReadinessProbe: &api.Probe{InitialDelaySeconds: 100}, + }, + expectedResult: probe.Unknown, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + LivenessProbe: &api.Probe{InitialDelaySeconds: -100}, + ReadinessProbe: &api.Probe{InitialDelaySeconds: -100}, + }, + expectedResult: probe.Unknown, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + LivenessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, + }, + ReadinessProbe: &api.Probe{InitialDelaySeconds: -100}, + }, + expectedResult: probe.Unknown, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + LivenessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, + }, + ReadinessProbe: &api.Probe{InitialDelaySeconds: -100}, + }, + expectedResult: probe.Failure, + expectedReadiness: false, + }, + { + testContainer: api.Container{ + LivenessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, + }, + ReadinessProbe: &api.Probe{ + InitialDelaySeconds: -100, + Handler: api.Handler{ + Exec: &api.ExecAction{}, + }, + }, + }, + expectedResult: probe.Success, + expectedReadiness: true, }, } @@ -229,8 +400,7 @@ func TestProbeContainer(t *testing.T) { } else { kl = makeTestKubelet(test.expectedResult, nil) } - - result, err := kl.probeContainer(test.p, "", types.UID(""), api.PodStatus{}, api.Container{}, dc, test.defaultResult) + result, err := kl.probeContainer(&api.BoundPod{}, api.PodStatus{}, test.testContainer, dc) if test.expectError && err == nil { t.Error("Expected error but did no error was returned.") } @@ -240,5 +410,8 @@ func TestProbeContainer(t *testing.T) { if test.expectedResult != result { t.Errorf("Expected result was %v but probeContainer() returned %v", test.expectedResult, result) } + if test.expectedReadiness != kl.readiness.get(dc.ID) { + t.Errorf("Expected readiness was %v but probeContainer() set %v", test.expectedReadiness, kl.readiness.get(dc.ID)) + } } }