diff --git a/contrib/exec-healthz/Dockerfile b/contrib/exec-healthz/Dockerfile new file mode 100644 index 00000000000..ca39e5f8671 --- /dev/null +++ b/contrib/exec-healthz/Dockerfile @@ -0,0 +1,19 @@ +# Copyright 2015 The Kubernetes Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM busybox +MAINTAINER Prashanth B +ADD exechealthz exechealthz +ADD README.md README.md +ENTRYPOINT ["/exechealthz"] diff --git a/contrib/exec-healthz/Makefile b/contrib/exec-healthz/Makefile new file mode 100644 index 00000000000..6a200f6ca7f --- /dev/null +++ b/contrib/exec-healthz/Makefile @@ -0,0 +1,17 @@ +all: push + +# 0.0 shouldn't clobber any released builds +TAG = 0.0 +PREFIX = gcr.io/google_containers/exechealthz + +server: exechealthz.go + CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-w' -o exechealthz ./exechealthz.go + +container: server + docker build -t $(PREFIX):$(TAG) . + +push: container + gcloud docker push $(PREFIX):$(TAG) + +clean: + rm -f exechealthz diff --git a/contrib/exec-healthz/README.md b/contrib/exec-healthz/README.md new file mode 100644 index 00000000000..f970c09cdf6 --- /dev/null +++ b/contrib/exec-healthz/README.md @@ -0,0 +1,99 @@ +# Exec healthz server + +The exec healthz server is a sidecar container meant to serve as a liveness-exec-over-http bridge. It isolates pods from the idiosyncracies of container runtime exec implemetations. + +## Examples: + +### Run the healthz server directly on localhost: + +```shell +$ make server +$ ./exechealthz -cmd "ls /tmp/test" +$ curl http://localhost:8080/healthz +Healthz probe error: Result of last exec: ls: cannot access /tmp/test: No such file or directory +, at 2015-07-08 17:59:45.698036238 -0700 PDT, error exit status 2 +$ touch /tmp/test +$ curl http://localhost:8080/healthz +ok +``` + +### Run the healthz server in a docker container: + +The [docker daemon](https://docs.docker.com/userguide/) needs to be running on your host. +```shell +$ make container PREFIX=mycontainer/test +$ docker run -itP -p 8080:8080 mycontainer/test:0.0 -cmd "ls /tmp/test" +$ curl http://localhost:8080/healthz +Healthz probe error: Result of last exec: ls: cannot access /tmp/test: No such file or directory +, at 2015-07-08 18:00:57.698103532 -0700 PDT, error exit status 2 + +$ docker ps +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +8e86f8accfa6 mycontainer/test:0.0 "/exechealthz -cm" 27 seconds ago Up 26 seconds 0.0.0.0:8080->8080/tcp loving_albattani +$ docker exec -it 8e86f8accfa6 touch /tmp/test +$ curl http://localhost:8080/healthz +ok +``` + +### Run the healthz server in a kubernetes pod: + +You need a running [kubernetes cluster](../../docs/getting-started-guides/README.md). +Create a pod.json that looks like: +```json +{ + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "name": "simple" + }, + "spec": { + "containers": [ + { + "name": "healthz", + "image": "gcr.io/google_containers/exechealthz:1.0", + "args": [ + "-cmd=nslookup localhost" + ], + "ports": [ + { + "containerPort": 8080, + "protocol": "TCP" + } + ] + } + ] + } +} +``` + +And run the pod on your cluster using kubectl: +```shell +$ kubectl create -f pod.json +pods/simple +$ kubectl get pods -o wide +NAME READY STATUS RESTARTS AGE NODE +simple 0/1 Pending 0 3s node +``` + +SSH into the node (note that the recommended way to access a server in a container is through a [service](../../docs/services.md), the example that follows is just to illustrate how the kubelet performs an http liveness probe): +```shell +node$ kubectl get pods simple -o json | grep podIP +"podIP": "10.1.0.2", + +node$ curl http://10.1.0.2:8080/healthz +ok +``` + +### Run the healthz server as a sidecar container for liveness probes of another container: +Create a pod.json with 2 containers, one of which is the healthz probe and the other, the container being health checked. The +pod.json example file in this directory does exactly that. If you create the pod the same way you created the pod in the previous +example, the kubelet on the node will periodically perform a health check similar to what you did manually and restart the container +when it fails. Explore [liveness probes](../../examples/liveness/README.md). + +## Limitations: +* Doesn't handle sigterm, which means docker stop on this container can take longer than it needs to. +* Doesn't sanity check the probe command. You should set the -period and -latency parameters of exechealthz appropriately. +* Only ever returns 503 or 200. + + +[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/contrib/exec-healthz/README.md?pixel)]() diff --git a/contrib/exec-healthz/exechealthz.go b/contrib/exec-healthz/exechealthz.go new file mode 100644 index 00000000000..7a47c9821dc --- /dev/null +++ b/contrib/exec-healthz/exechealthz.go @@ -0,0 +1,161 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// A tiny web server that returns 200 on it's healthz endpoint if the command +// passed in via -cmd exits with 0. Returns 503 otherwise. +// Usage: exechealthz -port 8080 -period 2s -latency 30s -cmd 'nslookup localhost >/dev/null' +package main + +import ( + "flag" + "fmt" + "log" + "net/http" + "os" + "os/exec" + "sync" + "time" +) + +// TODO: +// 1. Sigterm handler for docker stop +// 2. Meaningful default healthz +// 3. 404 for unknown endpoints + +var ( + port = flag.Int("port", 8080, "Port number to serve /healthz.") + cmd = flag.String("cmd", "echo healthz", "Command to run in response to a GET on /healthz. If the given command exits with 0, /healthz will respond with a 200.") + period = flag.Duration("period", 2*time.Second, "Period to run the given cmd in an async worker.") + maxLatency = flag.Duration("latency", 30*time.Second, "If the async worker hasn't updated the probe command output in this long, return a 503.") + // prober is the async worker running the cmd, the output of which is used to service /healthz. + prober *execWorker +) + +// execResult holds the result of the latest exec from the execWorker. +type execResult struct { + output []byte + err error + ts time.Time +} + +func (r execResult) String() string { + errMsg := "None" + if r.err != nil { + errMsg = fmt.Sprintf("%v", r.err) + } + return fmt.Sprintf("Result of last exec: %v, at %v, error %v", string(r.output), r.ts, errMsg) +} + +// execWorker provides an async interface to exec. +type execWorker struct { + result execResult + mutex sync.Mutex + period time.Duration + probeCmd string + stopCh chan struct{} +} + +// getResults returns the results of the latest execWorker run. +// The caller should treat returned results as read-only. +func (h *execWorker) getResults() execResult { + h.mutex.Lock() + defer h.mutex.Unlock() + return h.result +} + +// start attemtps to run the probeCmd every `period` seconds. +// Meant to be called as a goroutine. +func (h *execWorker) start() { + ticker := time.NewTicker(h.period) + defer ticker.Stop() + + for { + select { + // If the command takes > period, the command runs continuously. + case <-ticker.C: + log.Printf("Worker running %v", *cmd) + output, err := exec.Command("sh", "-c", *cmd).CombinedOutput() + ts := time.Now() + func() { + h.mutex.Lock() + defer h.mutex.Unlock() + h.result = execResult{output, err, ts} + }() + case <-h.stopCh: + return + } + } +} + +// newExecWorker is a constructor for execWorker. +func newExecWorker(probeCmd string, execPeriod time.Duration) *execWorker { + return &execWorker{ + // Initializing the result with a timestamp here allows us to + // wait maxLatency for the worker goroutine to start, and for each + // iteration of the worker to complete. + result: execResult{[]byte{}, nil, time.Now()}, + period: execPeriod, + probeCmd: probeCmd, + stopCh: make(chan struct{}), + } +} + +func main() { + flag.Parse() + links := []struct { + link, desc string + }{ + {"/healthz", "healthz probe. Returns \"ok\" if the command given through -cmd exits with 0."}, + {"/quit", "Cause this container to exit."}, + } + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, " Kubernetes healthz sidecar container

") + for _, v := range links { + fmt.Fprintf(w, `%v: %v
`, v.link, v.link, v.desc) + } + }) + + http.HandleFunc("/quit", func(w http.ResponseWriter, r *http.Request) { + log.Printf("Shutdown requested via /quit by %v", r.RemoteAddr) + os.Exit(0) + }) + prober = newExecWorker(*cmd, *period) + defer close(prober.stopCh) + go prober.start() + + http.HandleFunc("/healthz", healthzHandler) + log.Fatal(http.ListenAndServe(fmt.Sprintf("0.0.0.0:%d", *port), nil)) +} + +func healthzHandler(w http.ResponseWriter, r *http.Request) { + log.Printf("Client ip %v requesting /healthz probe servicing cmd %v", r.RemoteAddr, *cmd) + result := prober.getResults() + + // return 503 if the last command exec returned a non-zero status, or the worker + // hasn't run in maxLatency (including when the worker goroutine is cpu starved, + // because the pod is probably unavailable too). + if result.err != nil { + msg := fmt.Sprintf("Healthz probe error: %v", result) + log.Printf(msg) + http.Error(w, msg, http.StatusServiceUnavailable) + } else if time.Since(result.ts) > *maxLatency { + msg := fmt.Sprintf("Latest result too old to be useful: %v.", result) + log.Printf(msg) + http.Error(w, msg, http.StatusServiceUnavailable) + } else { + fmt.Fprintf(w, "ok") + } +} diff --git a/contrib/exec-healthz/pod.json b/contrib/exec-healthz/pod.json new file mode 100644 index 00000000000..e000bee62b7 --- /dev/null +++ b/contrib/exec-healthz/pod.json @@ -0,0 +1,37 @@ +{ + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "name": "simple" + }, + "spec": { + "containers": [ + { + "name": "healthz", + "image": "gcr.io/google_containers/exechealthz:1.0", + "args": [ + "-cmd=nslookup localhost" + ], + "ports": [ + { + "containerPort": 8080, + "protocol": "TCP" + } + ] + }, + { + "name":"test-container", + "image":"ubuntu:14.04", + "command": ["bash", "-c", "while true; do sleep 100; done"], + "livenessProbe": { + "httpGet": { + "path": "/healthz", + "port":8080 + }, + "initialDelaySeconds": 10, + "timeoutSeconds": 2 + } + } + ] + } +}