From 8df33bc1a75ca7e81a07b49c033ae94e04dd66f4 Mon Sep 17 00:00:00 2001 From: Robert Bailey Date: Tue, 4 Aug 2015 11:14:46 -0700 Subject: [PATCH] Register the kubelet on the master node with an apiserver. This option is separated from the apiserver running locally on the master node so that it can be optionally enabled or disabled as needed. Also, fix the healthchecking configuration for the master components, which was previously only working by coincidence: If a kubelet doesn't register with a master, it never bothers to figure out what its local address is. In which case it ends up constructing a URL like http://:8080/healthz for the http probe. This happens to work on the master because all of the pods are using host networking and explicitly binding to 127.0.0.1. Once the kubelet is registered with the master and it determines the local node address, it tries to healthcheck on an address where the pod isn't listening and the kubelet periodically restarts each master component when the liveness probe fails. --- cluster/gce/config-default.sh | 1 + cluster/gce/config-test.sh | 1 + cluster/gce/configure-vm.sh | 53 +++++++++++++++---- cluster/gce/coreos/helper.sh | 3 ++ cluster/gce/debian/helper.sh | 5 +- cluster/gce/util.sh | 5 ++ cluster/saltbase/salt/etcd/etcd.manifest | 5 +- .../kube-apiserver/kube-apiserver.manifest | 10 ++-- .../kube-controller-manager.manifest | 10 ++-- .../kube-scheduler/kube-scheduler.manifest | 10 ++-- cluster/saltbase/salt/kubelet/default | 25 +++++---- cluster/saltbase/salt/kubelet/init.sls | 23 ++------ .../kubelet/{kubernetes_auth => kubeconfig} | 0 cluster/validate-cluster.sh | 16 ++++-- .../high-availability/kube-apiserver.manifest | 14 ++++- .../kube-controller-manager.manifest | 10 ++-- .../high-availability/kube-scheduler.manifest | 10 ++-- 17 files changed, 140 insertions(+), 61 deletions(-) rename cluster/saltbase/salt/kubelet/{kubernetes_auth => kubeconfig} (100%) diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index 051b421a24d..084eff975eb 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -25,6 +25,7 @@ MASTER_DISK_TYPE=pd-ssd MASTER_DISK_SIZE=${MASTER_DISK_SIZE:-20GB} MINION_DISK_TYPE=pd-standard MINION_DISK_SIZE=${MINION_DISK_SIZE:-100GB} +REGISTER_MASTER_KUBELET=${REGISTER_MASTER:-true} OS_DISTRIBUTION=${KUBE_OS_DISTRIBUTION:-debian} MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-container-vm-v20150715} diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index afe52805cae..454f245ece7 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -25,6 +25,7 @@ MASTER_DISK_TYPE=pd-ssd MASTER_DISK_SIZE=${MASTER_DISK_SIZE:-20GB} MINION_DISK_TYPE=pd-standard MINION_DISK_SIZE=${MINION_DISK_SIZE:-100GB} +REGISTER_MASTER_KUBELET=${REGISTER_MASTER:-false} KUBE_APISERVER_REQUEST_TIMEOUT=300 OS_DISTRIBUTION=${KUBE_OS_DISTRIBUTION:-debian} diff --git a/cluster/gce/configure-vm.sh b/cluster/gce/configure-vm.sh index 3281501543d..07db65ad4e5 100644 --- a/cluster/gce/configure-vm.sh +++ b/cluster/gce/configure-vm.sh @@ -358,13 +358,35 @@ function create-salt-master-auth() { fi } +# This should happen only on cluster initialization. After the first boot +# and on upgrade, the kubeconfig file exists on the master-pd and should +# never be touched again. +# +# - Uses KUBELET_CA_CERT (falling back to CA_CERT), KUBELET_CERT, and +# KUBELET_KEY to generate a kubeconfig file for the kubelet to securely +# connect to the apiserver. +function create-salt-master-kubelet-auth() { + # Only configure the kubelet on the master if the required variables are + # set in the environment. + if [[ ! -z "${KUBELET_APISERVER:-}" ]] && [[ ! -z "${KUBELET_CERT:-}" ]] && [[ ! -z "${KUBELET_KEY:-}" ]]; then + create-salt-kubelet-auth + fi +} + # This should happen both on cluster initialization and node upgrades. # -# - Uses CA_CERT, KUBELET_CERT, and KUBELET_KEY to generate a kubeconfig file -# for the kubelet to securely connect to the apiserver. +# - Uses KUBELET_CA_CERT (falling back to CA_CERT), KUBELET_CERT, and +# KUBELET_KEY to generate a kubeconfig file for the kubelet to securely +# connect to the apiserver. + function create-salt-kubelet-auth() { local -r kubelet_kubeconfig_file="/srv/salt-overlay/salt/kubelet/kubeconfig" if [ ! -e "${kubelet_kubeconfig_file}" ]; then + # If there isn't a CA certificate set specifically for the kubelet, use + # the cluster CA certificate. + if [[ -z "${KUBELET_CA_CERT:-}" ]]; then + KUBELET_CA_CERT="${CA_CERT}" + fi mkdir -p /srv/salt-overlay/salt/kubelet (umask 077; cat > "${kubelet_kubeconfig_file}" <>/etc/salt/minion.d/grains.conf + kubelet_api_servers: '${KUBELET_APISERVER}' + cbr-cidr: 10.123.45.0/30 +EOF + else + # If the kubelet is running disconnected from a master, give it a fixed + # CIDR range. + cat <>/etc/salt/minion.d/grains.conf + cbr-cidr: ${MASTER_IP_RANGE} EOF fi } @@ -519,6 +555,7 @@ grains: - kubernetes-pool cbr-cidr: 10.123.45.0/30 cloud: gce + api_servers: '${KUBERNETES_MASTER_NAME}' EOF } @@ -536,12 +573,6 @@ EOF fi } -function salt-set-apiserver() { - cat <>/etc/salt/minion.d/grains.conf - api_servers: '${KUBERNETES_MASTER_NAME}' -EOF -} - function configure-salt() { fix-apt-sources mkdir -p /etc/salt/minion.d @@ -554,7 +585,6 @@ function configure-salt() { else salt-node-role salt-docker-opts - salt-set-apiserver fi install-salt stop-salt-minion @@ -577,6 +607,7 @@ if [[ -z "${is_push}" ]]; then create-salt-pillar if [[ "${KUBERNETES_MASTER}" == "true" ]]; then create-salt-master-auth + create-salt-master-kubelet-auth else create-salt-kubelet-auth create-salt-kubeproxy-auth diff --git a/cluster/gce/coreos/helper.sh b/cluster/gce/coreos/helper.sh index d4e9ad60e2b..5497c945450 100755 --- a/cluster/gce/coreos/helper.sh +++ b/cluster/gce/coreos/helper.sh @@ -59,8 +59,11 @@ RKT_VERSION: $(yaml-quote ${RKT_VERSION}) CA_CERT: $(yaml-quote ${CA_CERT_BASE64}) MASTER_CERT: $(yaml-quote ${MASTER_CERT_BASE64:-}) MASTER_KEY: $(yaml-quote ${MASTER_KEY_BASE64:-}) +KUBELET_CERT: $(yaml-quote ${KUBELET_CERT_BASE64:-}) +KUBELET_KEY: $(yaml-quote ${KUBELET_KEY_BASE64:-}) KUBECFG_CERT: $(yaml-quote ${KUBECFG_CERT_BASE64:-}) KUBECFG_KEY: $(yaml-quote ${KUBECFG_KEY_BASE64:-}) +KUBELET_APISERVER: $(yaml-quote ${KUBELET_APISERVER:-}) EOF else cat >>$file <>$file <>$file <>$file < "${MINIONS_FILE}" || true + # + # Suppress errors from kubectl output because during cluster bootstrapping + # for clusters where the master node is registered, the apiserver will become + # available and then get restarted as the kubelet configures the docker bridge. + "${KUBE_ROOT}/cluster/kubectl.sh" get nodes > "${MINIONS_FILE}" 2> /dev/null || true found=$(cat "${MINIONS_FILE}" | sed '1d' | grep -c .) || true ready=$(cat "${MINIONS_FILE}" | sed '1d' | awk '{print $NF}' | grep -c '^Ready') || true - if (( ${found} == "${NUM_MINIONS}" )) && (( ${ready} == "${NUM_MINIONS}")); then + if (( "${found}" == "${EXPECTED_NUM_NODES}" )) && (( "${ready}" == "${EXPECTED_NUM_NODES}")); then break else # Set the timeout to ~10minutes (40 x 15 second) to avoid timeouts for 100-node clusters. if (( attempt > 40 )); then - echo -e "${color_red}Detected ${ready} ready nodes, found ${found} nodes out of expected ${NUM_MINIONS}. Your cluster may not be working.${color_norm}" + echo -e "${color_red}Detected ${ready} ready nodes, found ${found} nodes out of expected ${EXPECTED_NUM_NODES}. Your cluster may not be working.${color_norm}" cat -n "${MINIONS_FILE}" exit 2 else - echo -e "${color_yellow}Waiting for ${NUM_MINIONS} ready nodes. ${ready} ready nodes, ${found} registered. Retrying.${color_norm}" + echo -e "${color_yellow}Waiting for ${EXPECTED_NUM_NODES} ready nodes. ${ready} ready nodes, ${found} registered. Retrying.${color_norm}" fi attempt=$((attempt+1)) sleep 15 diff --git a/examples/high-availability/kube-apiserver.manifest b/examples/high-availability/kube-apiserver.manifest index 41f29dfe910..99f0f6f7c46 100644 --- a/examples/high-availability/kube-apiserver.manifest +++ b/examples/high-availability/kube-apiserver.manifest @@ -1,7 +1,10 @@ { "apiVersion": "v1", "kind": "Pod", -"metadata": {"name":"kube-apiserver"}, +"metadata": { + "name":"kube-apiserver", + "namespace": "kube-system" +}, "spec":{ "hostNetwork": true, "containers":[ @@ -13,6 +16,15 @@ "-c", "/usr/local/bin/kube-apiserver --address=0.0.0.0 --etcd_servers=http://kube0.ha:2379 --service-cluster-ip-range=10.0.0.0/16 --v=4 --allow_privileged=True 1>>/var/log/kube-apiserver.log 2>&1" ], + "livenessProbe": { + "httpGet": { + "host": "127.0.0.1", + "port": 8080, + "path": "/healthz" + }, + "initialDelaySeconds": 15, + "timeoutSeconds": 15 + }, "ports":[ { "name": "https", "containerPort": 443, diff --git a/examples/high-availability/kube-controller-manager.manifest b/examples/high-availability/kube-controller-manager.manifest index 21446977aa9..4d358da5d32 100644 --- a/examples/high-availability/kube-controller-manager.manifest +++ b/examples/high-availability/kube-controller-manager.manifest @@ -1,7 +1,10 @@ { "apiVersion": "v1", "kind": "Pod", -"metadata": {"name":"kube-controller-manager"}, +"metadata": { + "name":"kube-controller-manager", + "namespace": "kube-system" +}, "spec":{ "hostNetwork": true, "containers":[ @@ -15,8 +18,9 @@ ], "livenessProbe": { "httpGet": { - "path": "/healthz", - "port": 10252 + "host": "127.0.0.1", + "port": 10252, + "path": "/healthz" }, "initialDelaySeconds": 15, "timeoutSeconds": 1 diff --git a/examples/high-availability/kube-scheduler.manifest b/examples/high-availability/kube-scheduler.manifest index 43171c01222..50e89d695fc 100644 --- a/examples/high-availability/kube-scheduler.manifest +++ b/examples/high-availability/kube-scheduler.manifest @@ -1,7 +1,10 @@ { "apiVersion": "v1", "kind": "Pod", -"metadata": {"name":"kube-scheduler"}, +"metadata": { + "name":"kube-scheduler", + "namespace": "kube-system" +}, "spec":{ "hostNetwork": true, "containers":[ @@ -15,8 +18,9 @@ ], "livenessProbe": { "httpGet": { - "path": "/healthz", - "port": 10251 + "host": "127.0.0.1", + "port": 10251, + "path": "/healthz" }, "initialDelaySeconds": 15, "timeoutSeconds": 1