Register the kubelet on the master node with an apiserver. This option is

separated from the apiserver running locally on the master node so that it
can be optionally enabled or disabled as needed.

Also, fix the healthchecking configuration for the master components, which
was previously only working by coincidence:

If a kubelet doesn't register with a master, it never bothers to figure out
what its local address is. In which case it ends up constructing a URL like
http://:8080/healthz for the http probe. This happens to work on the master
because all of the pods are using host networking and explicitly binding to
127.0.0.1. Once the kubelet is registered with the master and it determines
the local node address, it tries to healthcheck on an address where the pod
isn't listening and the kubelet periodically restarts each master component
when the liveness probe fails.
This commit is contained in:
Robert Bailey 2015-08-04 11:14:46 -07:00
parent 1407aee8b0
commit 8df33bc1a7
17 changed files with 140 additions and 61 deletions

View File

@ -25,6 +25,7 @@ MASTER_DISK_TYPE=pd-ssd
MASTER_DISK_SIZE=${MASTER_DISK_SIZE:-20GB}
MINION_DISK_TYPE=pd-standard
MINION_DISK_SIZE=${MINION_DISK_SIZE:-100GB}
REGISTER_MASTER_KUBELET=${REGISTER_MASTER:-true}
OS_DISTRIBUTION=${KUBE_OS_DISTRIBUTION:-debian}
MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-container-vm-v20150715}

View File

@ -25,6 +25,7 @@ MASTER_DISK_TYPE=pd-ssd
MASTER_DISK_SIZE=${MASTER_DISK_SIZE:-20GB}
MINION_DISK_TYPE=pd-standard
MINION_DISK_SIZE=${MINION_DISK_SIZE:-100GB}
REGISTER_MASTER_KUBELET=${REGISTER_MASTER:-false}
KUBE_APISERVER_REQUEST_TIMEOUT=300
OS_DISTRIBUTION=${KUBE_OS_DISTRIBUTION:-debian}

View File

@ -358,13 +358,35 @@ function create-salt-master-auth() {
fi
}
# This should happen only on cluster initialization. After the first boot
# and on upgrade, the kubeconfig file exists on the master-pd and should
# never be touched again.
#
# - Uses KUBELET_CA_CERT (falling back to CA_CERT), KUBELET_CERT, and
# KUBELET_KEY to generate a kubeconfig file for the kubelet to securely
# connect to the apiserver.
function create-salt-master-kubelet-auth() {
# Only configure the kubelet on the master if the required variables are
# set in the environment.
if [[ ! -z "${KUBELET_APISERVER:-}" ]] && [[ ! -z "${KUBELET_CERT:-}" ]] && [[ ! -z "${KUBELET_KEY:-}" ]]; then
create-salt-kubelet-auth
fi
}
# This should happen both on cluster initialization and node upgrades.
#
# - Uses CA_CERT, KUBELET_CERT, and KUBELET_KEY to generate a kubeconfig file
# for the kubelet to securely connect to the apiserver.
# - Uses KUBELET_CA_CERT (falling back to CA_CERT), KUBELET_CERT, and
# KUBELET_KEY to generate a kubeconfig file for the kubelet to securely
# connect to the apiserver.
function create-salt-kubelet-auth() {
local -r kubelet_kubeconfig_file="/srv/salt-overlay/salt/kubelet/kubeconfig"
if [ ! -e "${kubelet_kubeconfig_file}" ]; then
# If there isn't a CA certificate set specifically for the kubelet, use
# the cluster CA certificate.
if [[ -z "${KUBELET_CA_CERT:-}" ]]; then
KUBELET_CA_CERT="${CA_CERT}"
fi
mkdir -p /srv/salt-overlay/salt/kubelet
(umask 077;
cat > "${kubelet_kubeconfig_file}" <<EOF
@ -378,7 +400,7 @@ users:
clusters:
- name: local
cluster:
certificate-authority-data: ${CA_CERT}
certificate-authority-data: ${KUBELET_CA_CERT}
contexts:
- context:
cluster: local
@ -493,7 +515,6 @@ function salt-master-role() {
grains:
roles:
- kubernetes-master
cbr-cidr: ${MASTER_IP_RANGE}
cloud: gce
EOF
if ! [[ -z "${PROJECT_ID:-}" ]] && ! [[ -z "${TOKEN_URL:-}" ]] && ! [[ -z "${NODE_NETWORK:-}" ]] ; then
@ -508,6 +529,21 @@ EOF
cloud_config: /etc/gce.conf
advertise_address: '${EXTERNAL_IP}'
proxy_ssh_user: '${PROXY_SSH_USER}'
EOF
fi
# If the kubelet on the master is enabled, give it the same CIDR range
# as a generic node.
if [[ ! -z "${KUBELET_APISERVER:-}" ]] && [[ ! -z "${KUBELET_CERT:-}" ]] && [[ ! -z "${KUBELET_KEY:-}" ]]; then
cat <<EOF >>/etc/salt/minion.d/grains.conf
kubelet_api_servers: '${KUBELET_APISERVER}'
cbr-cidr: 10.123.45.0/30
EOF
else
# If the kubelet is running disconnected from a master, give it a fixed
# CIDR range.
cat <<EOF >>/etc/salt/minion.d/grains.conf
cbr-cidr: ${MASTER_IP_RANGE}
EOF
fi
}
@ -519,6 +555,7 @@ grains:
- kubernetes-pool
cbr-cidr: 10.123.45.0/30
cloud: gce
api_servers: '${KUBERNETES_MASTER_NAME}'
EOF
}
@ -536,12 +573,6 @@ EOF
fi
}
function salt-set-apiserver() {
cat <<EOF >>/etc/salt/minion.d/grains.conf
api_servers: '${KUBERNETES_MASTER_NAME}'
EOF
}
function configure-salt() {
fix-apt-sources
mkdir -p /etc/salt/minion.d
@ -554,7 +585,6 @@ function configure-salt() {
else
salt-node-role
salt-docker-opts
salt-set-apiserver
fi
install-salt
stop-salt-minion
@ -577,6 +607,7 @@ if [[ -z "${is_push}" ]]; then
create-salt-pillar
if [[ "${KUBERNETES_MASTER}" == "true" ]]; then
create-salt-master-auth
create-salt-master-kubelet-auth
else
create-salt-kubelet-auth
create-salt-kubeproxy-auth

View File

@ -59,8 +59,11 @@ RKT_VERSION: $(yaml-quote ${RKT_VERSION})
CA_CERT: $(yaml-quote ${CA_CERT_BASE64})
MASTER_CERT: $(yaml-quote ${MASTER_CERT_BASE64:-})
MASTER_KEY: $(yaml-quote ${MASTER_KEY_BASE64:-})
KUBELET_CERT: $(yaml-quote ${KUBELET_CERT_BASE64:-})
KUBELET_KEY: $(yaml-quote ${KUBELET_KEY_BASE64:-})
KUBECFG_CERT: $(yaml-quote ${KUBECFG_CERT_BASE64:-})
KUBECFG_KEY: $(yaml-quote ${KUBECFG_KEY_BASE64:-})
KUBELET_APISERVER: $(yaml-quote ${KUBELET_APISERVER:-})
EOF
else
cat >>$file <<EOF

View File

@ -49,6 +49,8 @@ KUBE_PROXY_TOKEN: $(yaml-quote ${KUBE_PROXY_TOKEN:-})
ADMISSION_CONTROL: $(yaml-quote ${ADMISSION_CONTROL:-})
MASTER_IP_RANGE: $(yaml-quote ${MASTER_IP_RANGE})
CA_CERT: $(yaml-quote ${CA_CERT_BASE64:-})
KUBELET_CERT: $(yaml-quote ${KUBELET_CERT_BASE64:-})
KUBELET_KEY: $(yaml-quote ${KUBELET_KEY_BASE64:-})
EOF
if [ -n "${KUBE_APISERVER_REQUEST_TIMEOUT:-}" ]; then
cat >>$file <<EOF
@ -66,6 +68,7 @@ MASTER_CERT: $(yaml-quote ${MASTER_CERT_BASE64:-})
MASTER_KEY: $(yaml-quote ${MASTER_KEY_BASE64:-})
KUBECFG_CERT: $(yaml-quote ${KUBECFG_CERT_BASE64:-})
KUBECFG_KEY: $(yaml-quote ${KUBECFG_KEY_BASE64:-})
KUBELET_APISERVER: $(yaml-quote ${KUBELET_APISERVER:-})
EOF
if [ -n "${APISERVER_TEST_ARGS:-}" ]; then
cat >>$file <<EOF
@ -93,8 +96,6 @@ EOF
KUBERNETES_MASTER: "false"
ZONE: $(yaml-quote ${ZONE})
EXTRA_DOCKER_OPTS: $(yaml-quote ${EXTRA_DOCKER_OPTS:-})
KUBELET_CERT: $(yaml-quote ${KUBELET_CERT_BASE64:-})
KUBELET_KEY: $(yaml-quote ${KUBELET_KEY_BASE64:-})
EOF
if [ -n "${KUBELET_TEST_ARGS:-}" ]; then
cat >>$file <<EOF

View File

@ -487,6 +487,11 @@ function yaml-quote {
}
function write-master-env {
# If the user requested that the master be part of the cluster, set the
# environment variable to program the master kubelet to register itself.
if [[ "${REGISTER_MASTER_KUBELET:-}" == "true" ]]; then
KUBELET_APISERVER="${MASTER_NAME}"
fi
build-kube-env true "${KUBE_TEMP}/master-kube-env.yaml"
}

View File

@ -1,7 +1,10 @@
{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {"name":"etcd-server"},
"metadata": {
"name":"etcd-server",
"namespace": "kube-system"
},
"spec":{
"hostNetwork": true,
"containers":[

View File

@ -99,7 +99,10 @@
{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {"name":"kube-apiserver"},
"metadata": {
"name":"kube-apiserver",
"namespace": "kube-system"
},
"spec":{
"hostNetwork": true,
"containers":[
@ -118,8 +121,9 @@
],
"livenessProbe": {
"httpGet": {
"path": "/healthz",
"port": 8080
"host": "127.0.0.1",
"port": 8080,
"path": "/healthz"
},
"initialDelaySeconds": 15,
"timeoutSeconds": 15

View File

@ -44,7 +44,10 @@
{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {"name":"kube-controller-manager"},
"metadata": {
"name":"kube-controller-manager",
"namespace": "kube-system"
},
"spec":{
"hostNetwork": true,
"containers":[
@ -63,8 +66,9 @@
],
"livenessProbe": {
"httpGet": {
"path": "/healthz",
"port": 10252
"host": "127.0.0.1",
"port": 10252,
"path": "/healthz"
},
"initialDelaySeconds": 15,
"timeoutSeconds": 15

View File

@ -8,7 +8,10 @@
{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {"name":"kube-scheduler"},
"metadata": {
"name":"kube-scheduler",
"namespace": "kube-system"
},
"spec":{
"hostNetwork": true,
"containers":[
@ -27,8 +30,9 @@
],
"livenessProbe": {
"httpGet": {
"path": "/healthz",
"port": 10251
"host": "127.0.0.1",
"port": 10251,
"path": "/healthz"
},
"initialDelaySeconds": 15,
"timeoutSeconds": 15

View File

@ -22,15 +22,22 @@
{% set api_servers_with_port = api_servers + ":6443" -%}
{% endif -%}
# Disable registration for the kubelet running on the master on AWS, GCE, Vagrant. Also disable
# the debugging handlers (/run and /exec) to prevent arbitrary code execution on
# the master.
# TODO(roberthbailey): Make this configurable via an env var in config-default.sh
{% set debugging_handlers = "--enable-debugging-handlers=true" -%}
{% if grains.cloud in ['aws', 'gce', 'vagrant'] -%}
{% if grains['roles'][0] == 'kubernetes-master' -%}
{% set api_servers_with_port = "" -%}
{% if grains['roles'][0] == 'kubernetes-master' -%}
{% if grains.cloud in ['aws', 'gce', 'vagrant'] -%}
# Unless given a specific directive, disable registration for the kubelet
# running on the master.
{% if grains.kubelet_api_servers is defined -%}
{% set api_servers_with_port = "--api_servers=https://" + grains.kubelet_api_servers -%}
{% else -%}
{% set api_servers_with_port = "" -%}
{% endif -%}
# Disable the debugging handlers (/run and /exec) to prevent arbitrary
# code execution on the master.
# TODO(roberthbailey): Relax this constraint once the master is self-hosted.
{% set debugging_handlers = "--enable-debugging-handlers=false" -%}
{% endif -%}
{% endif -%}
@ -88,7 +95,7 @@
{% set pod_cidr = "" %}
{% if grains['roles'][0] == 'kubernetes-master' and grains.get('cbr-cidr') %}
{% set pod_cidr = "--pod-cidr=" + grains['cbr-cidr'] %}
{% endif %}
{% endif %}
{% set test_args = "" -%}
{% if pillar['kubelet_test_args'] is defined -%}

View File

@ -19,9 +19,9 @@
- group: root
- mode: 755
# The default here is that this file is blank. If this is the case, the kubelet
# won't be able to parse it as JSON and will try to use the kubernetes_auth file
# instead. You'll see a single error line in the kubelet start up file
# The default here is that this file is blank. If this is the case, the kubelet
# won't be able to parse it as JSON and it will not be able to publish events
# to the apiserver. You'll see a single error line in the kubelet start up file
# about this.
/var/lib/kubelet/kubeconfig:
file.managed:
@ -31,19 +31,6 @@
- mode: 400
- makedirs: true
#
# --- This file is DEPRECATED ---
# The default here is that this file is blank. If this is the case, the kubelet
# won't be able to parse it as JSON and it'll not be able to publish events to
# the apiserver. You'll see a single error line in the kubelet start up file
# about this.
/var/lib/kubelet/kubernetes_auth:
file.managed:
- source: salt://kubelet/kubernetes_auth
- user: root
- group: root
- mode: 400
- makedirs: true
{% if pillar.get('is_systemd') %}
@ -64,7 +51,7 @@ fix-service-kubelet:
- file: /usr/local/bin/kubelet
- file: {{ pillar.get('systemd_system_path') }}/kubelet.service
- file: {{ environment_file }}
- file: /var/lib/kubelet/kubernetes_auth
- file: /var/lib/kubelet/kubeconfig
{% else %}
@ -91,4 +78,4 @@ kubelet:
- file: /usr/lib/systemd/system/kubelet.service
{% endif %}
- file: {{ environment_file }}
- file: /var/lib/kubelet/kubernetes_auth
- file: /var/lib/kubelet/kubeconfig

View File

@ -27,6 +27,10 @@ source "${KUBE_ROOT}/cluster/kube-util.sh"
MINIONS_FILE=/tmp/minions-$$
trap 'rm -rf "${MINIONS_FILE}"' EXIT
EXPECTED_NUM_NODES="${NUM_MINIONS}"
if [[ "${REGISTER_MASTER_KUBELET:-}" == "true" ]]; then
EXPECTED_NUM_NODES=$((EXPECTED_NUM_NODES+1))
fi
# Make several attempts to deal with slow cluster birth.
attempt=0
while true; do
@ -38,20 +42,24 @@ while true; do
# Echo the output, strip the first line, then gather 2 counts:
# - Total number of nodes.
# - Number of "ready" nodes.
"${KUBE_ROOT}/cluster/kubectl.sh" get nodes > "${MINIONS_FILE}" || true
#
# Suppress errors from kubectl output because during cluster bootstrapping
# for clusters where the master node is registered, the apiserver will become
# available and then get restarted as the kubelet configures the docker bridge.
"${KUBE_ROOT}/cluster/kubectl.sh" get nodes > "${MINIONS_FILE}" 2> /dev/null || true
found=$(cat "${MINIONS_FILE}" | sed '1d' | grep -c .) || true
ready=$(cat "${MINIONS_FILE}" | sed '1d' | awk '{print $NF}' | grep -c '^Ready') || true
if (( ${found} == "${NUM_MINIONS}" )) && (( ${ready} == "${NUM_MINIONS}")); then
if (( "${found}" == "${EXPECTED_NUM_NODES}" )) && (( "${ready}" == "${EXPECTED_NUM_NODES}")); then
break
else
# Set the timeout to ~10minutes (40 x 15 second) to avoid timeouts for 100-node clusters.
if (( attempt > 40 )); then
echo -e "${color_red}Detected ${ready} ready nodes, found ${found} nodes out of expected ${NUM_MINIONS}. Your cluster may not be working.${color_norm}"
echo -e "${color_red}Detected ${ready} ready nodes, found ${found} nodes out of expected ${EXPECTED_NUM_NODES}. Your cluster may not be working.${color_norm}"
cat -n "${MINIONS_FILE}"
exit 2
else
echo -e "${color_yellow}Waiting for ${NUM_MINIONS} ready nodes. ${ready} ready nodes, ${found} registered. Retrying.${color_norm}"
echo -e "${color_yellow}Waiting for ${EXPECTED_NUM_NODES} ready nodes. ${ready} ready nodes, ${found} registered. Retrying.${color_norm}"
fi
attempt=$((attempt+1))
sleep 15

View File

@ -1,7 +1,10 @@
{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {"name":"kube-apiserver"},
"metadata": {
"name":"kube-apiserver",
"namespace": "kube-system"
},
"spec":{
"hostNetwork": true,
"containers":[
@ -13,6 +16,15 @@
"-c",
"/usr/local/bin/kube-apiserver --address=0.0.0.0 --etcd_servers=http://kube0.ha:2379 --service-cluster-ip-range=10.0.0.0/16 --v=4 --allow_privileged=True 1>>/var/log/kube-apiserver.log 2>&1"
],
"livenessProbe": {
"httpGet": {
"host": "127.0.0.1",
"port": 8080,
"path": "/healthz"
},
"initialDelaySeconds": 15,
"timeoutSeconds": 15
},
"ports":[
{ "name": "https",
"containerPort": 443,

View File

@ -1,7 +1,10 @@
{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {"name":"kube-controller-manager"},
"metadata": {
"name":"kube-controller-manager",
"namespace": "kube-system"
},
"spec":{
"hostNetwork": true,
"containers":[
@ -15,8 +18,9 @@
],
"livenessProbe": {
"httpGet": {
"path": "/healthz",
"port": 10252
"host": "127.0.0.1",
"port": 10252,
"path": "/healthz"
},
"initialDelaySeconds": 15,
"timeoutSeconds": 1

View File

@ -1,7 +1,10 @@
{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {"name":"kube-scheduler"},
"metadata": {
"name":"kube-scheduler",
"namespace": "kube-system"
},
"spec":{
"hostNetwork": true,
"containers":[
@ -15,8 +18,9 @@
],
"livenessProbe": {
"httpGet": {
"path": "/healthz",
"port": 10251
"host": "127.0.0.1",
"port": 10251,
"path": "/healthz"
},
"initialDelaySeconds": 15,
"timeoutSeconds": 1