From edf6d8ee3f6ea50ab08dec5e2ef3bae67d5e9682 Mon Sep 17 00:00:00 2001 From: Vishnu Kannan Date: Thu, 6 Nov 2014 19:35:33 +0000 Subject: [PATCH 1/3] Use Heapster as an in-built monitoring solution for Kubernetes in GCE. Users will have an option to enable it when they setup their cluster (kube-up). --- cluster/gce/util.sh | 29 +++++++++++ cluster/kube-up.sh | 1 + examples/monitoring/README.md | 52 +++++++++++++++++++ examples/monitoring/heapster-pod.json | 18 +++++++ examples/monitoring/influx-grafana-pod.json | 34 ++++++++++++ .../monitoring/influx-grafana-service.json | 10 ++++ 6 files changed, 144 insertions(+) create mode 100644 examples/monitoring/README.md create mode 100644 examples/monitoring/heapster-pod.json create mode 100644 examples/monitoring/influx-grafana-pod.json create mode 100644 examples/monitoring/influx-grafana-service.json diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 295eb2c7a85..9210ecbcd85 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -429,6 +429,9 @@ EOF function kube-down { # Detect the project into $PROJECT detect-project + + # Monitoring might have been setup. It doesn't hurt to attempt shutdown even it wasn't setup. + teardown-monitoring echo "Bringing down cluster" gcutil deletefirewall \ @@ -569,3 +572,29 @@ function ssh-to-node { function restart-kube-proxy { ssh-to-node "$1" "sudo /etc/init.d/kube-proxy restart" } + +# Setup monitoring using heapster and InfluxDB +function setup-monitoring { + read -p "Setup monitoring of the cluster using heapster (https://github.com/GoogleCloudPlatform/heapster) [Y|N]? " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]] + then + teardown-monitoring + kubectl.sh create -f "${KUBE_ROOT}/examples/monitoring/influx-grafana-pod.json" && + kubectl.sh create -f "${KUBE_ROOT}/examples/monitoring/influx-grafana-service.json" && + kubectl.sh create -f "${KUBE_ROOT}/examples/monitoring/heapster-pod.json" + if [ $? -ne 0 ]; then + teardown-monitoring + else + dashboardIP="http://`kubectl.sh get -o json pod influx-grafana | grep hostIP | awk '{print $2}' | sed 's/[,|\"]//g'`" + echo "Grafana dashboard is available at $dashboardIP" + echo "username is 'admin' and password is 'admin'" + fi + fi +} + +function teardown-monitoring { + kubectl.sh delete pods heapster || true + kubectl.sh delete pods influx-grafana || true + kubectl.sh delete services influx-master || true +} \ No newline at end of file diff --git a/cluster/kube-up.sh b/cluster/kube-up.sh index 09466f478d3..c2434d4bf14 100755 --- a/cluster/kube-up.sh +++ b/cluster/kube-up.sh @@ -34,5 +34,6 @@ verify-prereqs kube-up "${KUBE_ROOT}/cluster/validate-cluster.sh" +setup-monitoring echo "Done" diff --git a/examples/monitoring/README.md b/examples/monitoring/README.md new file mode 100644 index 00000000000..37149786a88 --- /dev/null +++ b/examples/monitoring/README.md @@ -0,0 +1,52 @@ +Heapster +=========== + +Heapster enables monitoring of Kubernetes Clusters using [cAdvisor](https://github.com/google/cadvisor). It currently works only on GCE. + +#####Run Heapster in a Kubernetes cluster with an Influxdb backend and [Grafana](http://grafana.org/docs/features/influxdb) + +**Step 1: Setup Kube cluster** + +Fork the Kubernetes repository and [turn up a Kubernetes cluster](https://github.com/GoogleCloudPlatform/kubernetes-new#contents), if you haven't already. Make sure kubectl.sh is exported. + +**Step 2: Start a Pod with Influxdb, grafana and elasticsearch** + +```shell +$ kubectl.sh create -f deploy/influx-grafana-pod.json +``` + +**Step 3: Start Influxdb service** + +```shell +$ kubectl.sh create -f deploy/influx-grafana-service.json +``` + +**Step 4: Update firewall rules** + +Open up ports tcp:80,8083,8086,9200. +```shell +$ gcutil addfirewall --allowed=tcp:80,tcp:8083,tcp:8086,tcp:9200 --target_tags=kubernetes-minion heapster +``` + +**Step 5: Start Heapster Pod** + +```shell +$ kubectl.sh create -f deploy/heapster-pod.json +``` + +Verify that all the pods and services are up and running: + +```shell +$ kubectl.sh get pods +``` +```shell +$ kubectl.sh get services +``` + +To start monitoring the cluster using grafana, find out the the external IP of the minion where the 'influx-grafana' Pod is running from the output of `kubectl.sh get pods`, and visit `http://:80`. + +To access the Influxdb UI visit `http://:8083`. + +#####Hints +* Grafana's default username and password is 'admin'. You can change that by modifying the grafana container [here](influx-grafana/deploy/grafana-influxdb-pod.json) +* To enable memory and swap accounting on the minions follow the instructions [here](https://docs.docker.com/installation/ubuntulinux/#memory-and-swap-accounting) diff --git a/examples/monitoring/heapster-pod.json b/examples/monitoring/heapster-pod.json new file mode 100644 index 00000000000..7ecbc505b26 --- /dev/null +++ b/examples/monitoring/heapster-pod.json @@ -0,0 +1,18 @@ +{ + "id": "heapster", + "kind": "Pod", + "apiVersion": "v1beta1", + "desiredState": { + "manifest": { + "version": "v1beta1", + "id": "heapster", + "containers": [{ + "name": "heapster", + "image": "kubernetes/heapster", + }] + } + }, + "labels": { + "name": "heapster", + } +} diff --git a/examples/monitoring/influx-grafana-pod.json b/examples/monitoring/influx-grafana-pod.json new file mode 100644 index 00000000000..001ecdda1bc --- /dev/null +++ b/examples/monitoring/influx-grafana-pod.json @@ -0,0 +1,34 @@ +{ + "id": "influx-grafana", + "kind": "Pod", + "apiVersion": "v1beta1", + "desiredState": { + "manifest": { + "version": "v1beta1", + "id": "influx-grafana", + "containers": [{ + "name": "influxdb", + "image": "kubernetes/heapster_influxdb", + "ports": [ + {"containerPort": 8083, "hostPort": 8083}, + {"containerPort": 8086, "hostPort": 8086}, + {"containerPort": 8090, "hostPort": 8090}, + {"containerPort": 8099, "hostPort": 8099}] + }, { + "name": "grafana", + "image": "kubernetes/heapster_grafana", + "ports": [{"containerPort": 80, "hostPort": 80}], + "env": [{"name": HTTP_USER, "value": admin}, + {"name": HTTP_PASS, "value": admin}], + }, { + "name": "elasticsearch", + "image": "dockerfile/elasticsearch", + "ports": [{"containerPort": 9200, "hostPort": 9200}, + {"containerPort": 9300}], + }] + }, + }, + "labels": { + "name": "influxdb", + } +} diff --git a/examples/monitoring/influx-grafana-service.json b/examples/monitoring/influx-grafana-service.json new file mode 100644 index 00000000000..18e7bca4c8a --- /dev/null +++ b/examples/monitoring/influx-grafana-service.json @@ -0,0 +1,10 @@ +{ + "id": "influx-master", + "kind": "Service", + "apiVersion": "v1beta1", + "port": 8085, + "containerPort": 8086, + "provider": "kubernetes-default", + "component": "influxdb" + "selector": { "name": "influxdb" } +} \ No newline at end of file From ad9cb982e544e4a5a1ac2065f96c9b10178b50ea Mon Sep 17 00:00:00 2001 From: Vishnu Kannan Date: Thu, 6 Nov 2014 22:25:18 +0000 Subject: [PATCH 2/3] Add an option for enabling monitoring in gce/config-default.sh and disable it for tests. Updated heapster readme. --- cluster/gce/config-default.sh | 2 ++ cluster/gce/config-test.sh | 1 + cluster/gce/util.sh | 5 +--- examples/monitoring/README.md | 53 ++--------------------------------- 4 files changed, 6 insertions(+), 55 deletions(-) diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index ab317650cac..8c6e66f055c 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -35,3 +35,5 @@ MINION_SCOPES="compute-rw" # Increase the sleep interval value if concerned about API rate limits. 3, in seconds, is the default. POLL_SLEEP_INTERVAL=3 PORTAL_NET="10.0.0.0/16" +# When set to true, heapster will be setup as part of the cluster bring up. +MONITORING=true \ No newline at end of file diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index 496f9f52cdd..92e6e67cf35 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -35,3 +35,4 @@ MINION_SCOPES="" # Increase the sleep interval value if concerned about API rate limits. 3, in seconds, is the default. POLL_SLEEP_INTERVAL=3 PORTAL_NET="10.0.0.0/16" +MONITORING=false \ No newline at end of file diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 9210ecbcd85..638b4b49d17 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -575,10 +575,7 @@ function restart-kube-proxy { # Setup monitoring using heapster and InfluxDB function setup-monitoring { - read -p "Setup monitoring of the cluster using heapster (https://github.com/GoogleCloudPlatform/heapster) [Y|N]? " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]] - then + if [ $MONITORING ]; then teardown-monitoring kubectl.sh create -f "${KUBE_ROOT}/examples/monitoring/influx-grafana-pod.json" && kubectl.sh create -f "${KUBE_ROOT}/examples/monitoring/influx-grafana-service.json" && diff --git a/examples/monitoring/README.md b/examples/monitoring/README.md index 37149786a88..e40cb638c26 100644 --- a/examples/monitoring/README.md +++ b/examples/monitoring/README.md @@ -1,52 +1,3 @@ -Heapster -=========== +# Heapster -Heapster enables monitoring of Kubernetes Clusters using [cAdvisor](https://github.com/google/cadvisor). It currently works only on GCE. - -#####Run Heapster in a Kubernetes cluster with an Influxdb backend and [Grafana](http://grafana.org/docs/features/influxdb) - -**Step 1: Setup Kube cluster** - -Fork the Kubernetes repository and [turn up a Kubernetes cluster](https://github.com/GoogleCloudPlatform/kubernetes-new#contents), if you haven't already. Make sure kubectl.sh is exported. - -**Step 2: Start a Pod with Influxdb, grafana and elasticsearch** - -```shell -$ kubectl.sh create -f deploy/influx-grafana-pod.json -``` - -**Step 3: Start Influxdb service** - -```shell -$ kubectl.sh create -f deploy/influx-grafana-service.json -``` - -**Step 4: Update firewall rules** - -Open up ports tcp:80,8083,8086,9200. -```shell -$ gcutil addfirewall --allowed=tcp:80,tcp:8083,tcp:8086,tcp:9200 --target_tags=kubernetes-minion heapster -``` - -**Step 5: Start Heapster Pod** - -```shell -$ kubectl.sh create -f deploy/heapster-pod.json -``` - -Verify that all the pods and services are up and running: - -```shell -$ kubectl.sh get pods -``` -```shell -$ kubectl.sh get services -``` - -To start monitoring the cluster using grafana, find out the the external IP of the minion where the 'influx-grafana' Pod is running from the output of `kubectl.sh get pods`, and visit `http://:80`. - -To access the Influxdb UI visit `http://:8083`. - -#####Hints -* Grafana's default username and password is 'admin'. You can change that by modifying the grafana container [here](influx-grafana/deploy/grafana-influxdb-pod.json) -* To enable memory and swap accounting on the minions follow the instructions [here](https://docs.docker.com/installation/ubuntulinux/#memory-and-swap-accounting) +Heapster enables monitoring of Kubernetes Clusters using [cAdvisor](https://github.com/google/cadvisor). Detailed information about heapster can be found [here](https://github.com/GoogleCloudPlatform/heapster). From 37af9210bafed2a1f1bab14c73bb2aa6f7949412 Mon Sep 17 00:00:00 2001 From: Vishnu Kannan Date: Fri, 7 Nov 2014 01:23:14 +0000 Subject: [PATCH 3/3] Improve the monitoring setup script and fix bugs in the monitoring config. --- cluster/gce/config-default.sh | 2 +- cluster/gce/config-test.sh | 2 +- cluster/gce/util.sh | 43 +++++++++++++------ cluster/rackspace/util.sh | 4 ++ cluster/vagrant/util.sh | 4 ++ cluster/vsphere/util.sh | 4 ++ .../monitoring/influx-grafana-service.json | 4 +- 7 files changed, 46 insertions(+), 17 deletions(-) diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index 8c6e66f055c..2b98bc6fd8d 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -36,4 +36,4 @@ MINION_SCOPES="compute-rw" POLL_SLEEP_INTERVAL=3 PORTAL_NET="10.0.0.0/16" # When set to true, heapster will be setup as part of the cluster bring up. -MONITORING=true \ No newline at end of file +MONITORING=true diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index 92e6e67cf35..8f3a7f4d340 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -35,4 +35,4 @@ MINION_SCOPES="" # Increase the sleep interval value if concerned about API rate limits. 3, in seconds, is the default. POLL_SLEEP_INTERVAL=3 PORTAL_NET="10.0.0.0/16" -MONITORING=false \ No newline at end of file +MONITORING=false diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 638b4b49d17..805fd108fc2 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -429,9 +429,6 @@ EOF function kube-down { # Detect the project into $PROJECT detect-project - - # Monitoring might have been setup. It doesn't hurt to attempt shutdown even it wasn't setup. - teardown-monitoring echo "Bringing down cluster" gcutil deletefirewall \ @@ -577,21 +574,41 @@ function restart-kube-proxy { function setup-monitoring { if [ $MONITORING ]; then teardown-monitoring - kubectl.sh create -f "${KUBE_ROOT}/examples/monitoring/influx-grafana-pod.json" && - kubectl.sh create -f "${KUBE_ROOT}/examples/monitoring/influx-grafana-service.json" && - kubectl.sh create -f "${KUBE_ROOT}/examples/monitoring/heapster-pod.json" + if ! gcutil getfirewall monitoring-heapster &> /dev/null; then + gcutil addfirewall monitoring-heapster \ + --project "${PROJECT}" \ + --norespect_terminal_width \ + --sleep_between_polls "${POLL_SLEEP_INTERVAL}" \ + --target_tags="${MINION_TAG}" \ + --allowed "tcp:80,tcp:8083,tcp:8086,tcp:9200"; + if [ $? -ne 0 ]; then + echo "Failed to Setup Firewall for Monitoring" && false + fi + fi + + kubectl.sh create -f "${KUBE_ROOT}/examples/monitoring/influx-grafana-pod.json" > /dev/null && + kubectl.sh create -f "${KUBE_ROOT}/examples/monitoring/influx-grafana-service.json" > /dev/null && + kubectl.sh create -f "${KUBE_ROOT}/examples/monitoring/heapster-pod.json" > /dev/null if [ $? -ne 0 ]; then + echo "Failed to Setup Monitoring" teardown-monitoring else - dashboardIP="http://`kubectl.sh get -o json pod influx-grafana | grep hostIP | awk '{print $2}' | sed 's/[,|\"]//g'`" - echo "Grafana dashboard is available at $dashboardIP" - echo "username is 'admin' and password is 'admin'" + dashboardIP="http://admin:admin@`kubectl.sh get -o json pod influx-grafana | grep hostIP | awk '{print $2}' | sed 's/[,|\"]//g'`" + echo "Grafana dashboard will be available at $dashboardIP. Wait for the monitoring dashboard to be online." fi fi } function teardown-monitoring { - kubectl.sh delete pods heapster || true - kubectl.sh delete pods influx-grafana || true - kubectl.sh delete services influx-master || true -} \ No newline at end of file + if [ $MONITORING ]; then + kubectl.sh delete pods heapster &> /dev/null || true + kubectl.sh delete pods influx-grafana &> /dev/null || true + kubectl.sh delete services influx-master &> /dev/null || true + gcutil deletefirewall \ + --project "${PROJECT}" \ + --norespect_terminal_width \ + --sleep_between_polls "${POLL_SLEEP_INTERVAL}" \ + --force \ + monitoring-heapster || true > /dev/null + fi +} diff --git a/cluster/rackspace/util.sh b/cluster/rackspace/util.sh index 86f898f2738..4e93aa1b63e 100644 --- a/cluster/rackspace/util.sh +++ b/cluster/rackspace/util.sh @@ -322,3 +322,7 @@ kube-up() { echo " subject to \"Man in the middle\" type attacks." echo } + +function setup-monitoring { + echo "TODO" +} diff --git a/cluster/vagrant/util.sh b/cluster/vagrant/util.sh index 0000d0e90e7..c3c7774f0b9 100644 --- a/cluster/vagrant/util.sh +++ b/cluster/vagrant/util.sh @@ -172,3 +172,7 @@ function ssh-to-node { function restart-kube-proxy { ssh-to-node "$1" "sudo systemctl restart kube-proxy" } + +function setup-monitoring { + echo "TODO" +} diff --git a/cluster/vsphere/util.sh b/cluster/vsphere/util.sh index 7a85af1d4aa..c149bff3e2e 100755 --- a/cluster/vsphere/util.sh +++ b/cluster/vsphere/util.sh @@ -471,3 +471,7 @@ function test-setup { function test-teardown { echo "TODO" } + +function setup-monitoring { + echo "TODO" +} diff --git a/examples/monitoring/influx-grafana-service.json b/examples/monitoring/influx-grafana-service.json index 18e7bca4c8a..d83724a2c9f 100644 --- a/examples/monitoring/influx-grafana-service.json +++ b/examples/monitoring/influx-grafana-service.json @@ -5,6 +5,6 @@ "port": 8085, "containerPort": 8086, "provider": "kubernetes-default", - "component": "influxdb" + "component": "influxdb", "selector": { "name": "influxdb" } -} \ No newline at end of file +}