mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
Make multiple attempts to sanity check (GCE) and validate (ALL) cluster
This commit is contained in:
parent
68450e0c61
commit
1ce71d1e33
@ -21,6 +21,13 @@
|
|||||||
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
|
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
|
||||||
source "${KUBE_ROOT}/cluster/gce/${KUBE_CONFIG_FILE-"config-default.sh"}"
|
source "${KUBE_ROOT}/cluster/gce/${KUBE_CONFIG_FILE-"config-default.sh"}"
|
||||||
|
|
||||||
|
# Some useful colors.
|
||||||
|
declare -r color_start="\033["
|
||||||
|
declare -r color_red="${color_start}0;31m"
|
||||||
|
declare -r color_yellow="${color_start}0;33m"
|
||||||
|
declare -r color_green="${color_start}0;32m"
|
||||||
|
declare -r color_norm="${color_start}0m"
|
||||||
|
|
||||||
# Verify prereqs
|
# Verify prereqs
|
||||||
function verify-prereqs {
|
function verify-prereqs {
|
||||||
local cmd
|
local cmd
|
||||||
@ -87,6 +94,7 @@ function detect-project () {
|
|||||||
echo "Project: $PROJECT (autodetected from gcloud config)"
|
echo "Project: $PROJECT (autodetected from gcloud config)"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Take the local tar files and upload them to Google Storage. They will then be
|
# Take the local tar files and upload them to Google Storage. They will then be
|
||||||
# downloaded by the master as part of the start up script for the master.
|
# downloaded by the master as part of the start up script for the master.
|
||||||
#
|
#
|
||||||
@ -140,7 +148,6 @@ function upload-server-tars() {
|
|||||||
# Vars set:
|
# Vars set:
|
||||||
# KUBE_MINION_IP_ADDRESS (array)
|
# KUBE_MINION_IP_ADDRESS (array)
|
||||||
function detect-minions () {
|
function detect-minions () {
|
||||||
detect-project
|
|
||||||
KUBE_MINION_IP_ADDRESSES=()
|
KUBE_MINION_IP_ADDRESSES=()
|
||||||
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
||||||
local minion_ip=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \
|
local minion_ip=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \
|
||||||
@ -163,12 +170,15 @@ function detect-minions () {
|
|||||||
#
|
#
|
||||||
# Assumed vars:
|
# Assumed vars:
|
||||||
# MASTER_NAME
|
# MASTER_NAME
|
||||||
|
# PROJECT (if unset, will detect-project)
|
||||||
# ZONE
|
# ZONE
|
||||||
# Vars set:
|
# Vars set:
|
||||||
# KUBE_MASTER
|
# KUBE_MASTER
|
||||||
# KUBE_MASTER_IP
|
# KUBE_MASTER_IP
|
||||||
function detect-master () {
|
function detect-master () {
|
||||||
detect-project
|
if [[ -z "${PROJECT-}" ]]; then
|
||||||
|
detect-project
|
||||||
|
fi
|
||||||
KUBE_MASTER=${MASTER_NAME}
|
KUBE_MASTER=${MASTER_NAME}
|
||||||
if [[ -z "${KUBE_MASTER_IP-}" ]]; then
|
if [[ -z "${KUBE_MASTER_IP-}" ]]; then
|
||||||
KUBE_MASTER_IP=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \
|
KUBE_MASTER_IP=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \
|
||||||
@ -223,6 +233,103 @@ function get-admin-token {
|
|||||||
KUBE_ADMIN_TOKEN=$(python -c 'import string,random; print "".join(random.SystemRandom().choice(string.ascii_letters + string.digits) for _ in range(32))')
|
KUBE_ADMIN_TOKEN=$(python -c 'import string,random; print "".join(random.SystemRandom().choice(string.ascii_letters + string.digits) for _ in range(32))')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Wait for background jobs to finish. Exit with
|
||||||
|
# an error status if any of the jobs failed.
|
||||||
|
function wait-for-jobs {
|
||||||
|
local fail=0
|
||||||
|
local job
|
||||||
|
for job in $(jobs -p); do
|
||||||
|
wait "${job}" || fail=$((fail + 1))
|
||||||
|
done
|
||||||
|
if (( fail != 0 )); then
|
||||||
|
echo -e "${color_red}${fail} commands failed. Exiting.${color_norm}" >&2
|
||||||
|
# Ignore failures for now.
|
||||||
|
# exit 2
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Robustly try to create a firewall rule.
|
||||||
|
# $1: The name of firewall rule.
|
||||||
|
# $2: IP ranges.
|
||||||
|
function create-firewall-rule {
|
||||||
|
local attempt=0
|
||||||
|
while true; do
|
||||||
|
if ! gcloud compute firewall-rules create "$1" \
|
||||||
|
--project "${PROJECT}" \
|
||||||
|
--network "${NETWORK}" \
|
||||||
|
--source-ranges "$2" \
|
||||||
|
--allow tcp udp icmp esp ah sctp; then
|
||||||
|
if (( attempt > 5 )); then
|
||||||
|
echo -e "${color_red}Failed to create firewall rule $1 ${color_norm}"
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create firewall rule $1. Retrying.${color_norm}"
|
||||||
|
attempt=$(($attempt+1))
|
||||||
|
else
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Robustly try to create a route.
|
||||||
|
# $1: The name of the route.
|
||||||
|
# $2: IP range.
|
||||||
|
function create-route {
|
||||||
|
local attempt=0
|
||||||
|
while true; do
|
||||||
|
if ! gcloud compute routes create "$1" \
|
||||||
|
--project "${PROJECT}" \
|
||||||
|
--destination-range "$2" \
|
||||||
|
--network "${NETWORK}" \
|
||||||
|
--next-hop-instance "$1" \
|
||||||
|
--next-hop-instance-zone "${ZONE}"; then
|
||||||
|
if (( attempt > 5 )); then
|
||||||
|
echo -e "${color_red}Failed to create route $1 ${color_norm}"
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create route $1. Retrying.${color_norm}"
|
||||||
|
attempt=$(($attempt+1))
|
||||||
|
else
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Robustly try to create an instance.
|
||||||
|
# $1: The name of the instance.
|
||||||
|
# $2: The scopes flag.
|
||||||
|
# $3: The minion start script.
|
||||||
|
function create-instance {
|
||||||
|
local attempt=0
|
||||||
|
while true; do
|
||||||
|
if ! gcloud compute instances create "$1" \
|
||||||
|
--project "${PROJECT}" \
|
||||||
|
--zone "${ZONE}" \
|
||||||
|
--machine-type "${MINION_SIZE}" \
|
||||||
|
--image-project="${IMAGE_PROJECT}" \
|
||||||
|
--image "${IMAGE}" \
|
||||||
|
--tags "${MINION_TAG}" \
|
||||||
|
--network "${NETWORK}" \
|
||||||
|
$2 \
|
||||||
|
--can-ip-forward \
|
||||||
|
--metadata-from-file "$3"; then
|
||||||
|
if (( attempt > 5 )); then
|
||||||
|
echo -e "${color_red}Failed to create instance $1 ${color_norm}"
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create node $1. Retrying.${color_norm}"
|
||||||
|
attempt=$(($attempt+1))
|
||||||
|
# Attempt to delete the disk for this node (the disk may have been created even
|
||||||
|
# if the instance creation failed).
|
||||||
|
gcloud compute disks delete "$1" --project "${PROJECT}" --zone "${ZONE}" --quiet || true
|
||||||
|
else
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
# Instantiate a kubernetes cluster
|
# Instantiate a kubernetes cluster
|
||||||
#
|
#
|
||||||
# Assumed vars
|
# Assumed vars
|
||||||
@ -244,14 +351,14 @@ function kube-up {
|
|||||||
local htpasswd
|
local htpasswd
|
||||||
htpasswd=$(cat "${KUBE_TEMP}/htpasswd")
|
htpasswd=$(cat "${KUBE_TEMP}/htpasswd")
|
||||||
|
|
||||||
if ! gcloud compute networks describe --project ${PROJECT} "${NETWORK}" &>/dev/null; then
|
if ! gcloud compute networks describe "${NETWORK}" &>/dev/null; then
|
||||||
echo "Creating new network: ${NETWORK}"
|
echo "Creating new network: ${NETWORK}"
|
||||||
# The network needs to be created synchronously or we have a race. The
|
# The network needs to be created synchronously or we have a race. The
|
||||||
# firewalls can be added concurrent with instance creation.
|
# firewalls can be added concurrent with instance creation.
|
||||||
gcloud compute networks create --project ${PROJECT} "${NETWORK}" --range "10.240.0.0/16"
|
gcloud compute networks create "${NETWORK}" --range "10.240.0.0/16"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! gcloud compute firewall-rules describe --project ${PROJECT} "${NETWORK}-default-internal" &>/dev/null; then
|
if ! gcloud compute firewall-rules describe "${NETWORK}-default-internal" &>/dev/null; then
|
||||||
gcloud compute firewall-rules create "${NETWORK}-default-internal" \
|
gcloud compute firewall-rules create "${NETWORK}-default-internal" \
|
||||||
--project "${PROJECT}" \
|
--project "${PROJECT}" \
|
||||||
--network "${NETWORK}" \
|
--network "${NETWORK}" \
|
||||||
@ -259,7 +366,7 @@ function kube-up {
|
|||||||
--allow "tcp:1-65535" "udp:1-65535" "icmp" &
|
--allow "tcp:1-65535" "udp:1-65535" "icmp" &
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! gcloud compute firewall-rules describe --project "${PROJECT}" "${NETWORK}-default-ssh" &>/dev/null; then
|
if ! gcloud compute firewall-rules describe "${NETWORK}-default-ssh" &>/dev/null; then
|
||||||
gcloud compute firewall-rules create "${NETWORK}-default-ssh" \
|
gcloud compute firewall-rules create "${NETWORK}-default-ssh" \
|
||||||
--project "${PROJECT}" \
|
--project "${PROJECT}" \
|
||||||
--network "${NETWORK}" \
|
--network "${NETWORK}" \
|
||||||
@ -313,6 +420,39 @@ function kube-up {
|
|||||||
--scopes "storage-ro" "compute-rw" \
|
--scopes "storage-ro" "compute-rw" \
|
||||||
--metadata-from-file "startup-script=${KUBE_TEMP}/master-start.sh" &
|
--metadata-from-file "startup-script=${KUBE_TEMP}/master-start.sh" &
|
||||||
|
|
||||||
|
# Create the firewall rules, 10 at a time.
|
||||||
|
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
||||||
|
create-firewall-rule "${MINION_NAMES[$i]}-all" "${MINION_IP_RANGES[$i]}" &
|
||||||
|
|
||||||
|
if [ $i -ne 0 ] && [ $((i%10)) -eq 0 ]; then
|
||||||
|
echo Waiting for a batch of firewall rules at $i...
|
||||||
|
wait-for-jobs
|
||||||
|
fi
|
||||||
|
|
||||||
|
done
|
||||||
|
# Wait for last batch of jobs.
|
||||||
|
wait-for-jobs
|
||||||
|
|
||||||
|
# Create the routes, 10 at a time.
|
||||||
|
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
||||||
|
create-route "${MINION_NAMES[$i]}" "${MINION_IP_RANGES[$i]}" &
|
||||||
|
|
||||||
|
if [ $i -ne 0 ] && [ $((i%10)) -eq 0 ]; then
|
||||||
|
echo Waiting for a batch of routes at $i...
|
||||||
|
wait-for-jobs
|
||||||
|
fi
|
||||||
|
|
||||||
|
done
|
||||||
|
# Wait for last batch of jobs.
|
||||||
|
wait-for-jobs
|
||||||
|
|
||||||
|
local -a scope_flags=()
|
||||||
|
if (( "${#MINION_SCOPES[@]}" > 0 )); then
|
||||||
|
scope_flags=("--scopes" "${MINION_SCOPES[@]}")
|
||||||
|
else
|
||||||
|
scope_flags=("--no-scopes")
|
||||||
|
fi
|
||||||
|
# Create the instances, 5 at a time.
|
||||||
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
||||||
(
|
(
|
||||||
echo "#! /bin/bash"
|
echo "#! /bin/bash"
|
||||||
@ -325,47 +465,17 @@ function kube-up {
|
|||||||
grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/salt-minion.sh"
|
grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/salt-minion.sh"
|
||||||
) > "${KUBE_TEMP}/minion-start-${i}.sh"
|
) > "${KUBE_TEMP}/minion-start-${i}.sh"
|
||||||
|
|
||||||
gcloud compute firewall-rules create "${MINION_NAMES[$i]}-all" \
|
local scopes_flag="${scope_flags[@]}"
|
||||||
--project "${PROJECT}" \
|
create-instance "${MINION_NAMES[$i]}" "${scopes_flag}" "startup-script=${KUBE_TEMP}/minion-start-${i}.sh" &
|
||||||
--network "${NETWORK}" \
|
|
||||||
--source-ranges "${MINION_IP_RANGES[$i]}" \
|
|
||||||
--allow tcp udp icmp esp ah sctp &
|
|
||||||
|
|
||||||
local -a scope_flags=()
|
if [ $i -ne 0 ] && [ $((i%5)) -eq 0 ]; then
|
||||||
if (( "${#MINION_SCOPES[@]}" > 0 )); then
|
echo Waiting for creation of a batch of instances at $i...
|
||||||
scope_flags=("--scopes" "${MINION_SCOPES[@]}")
|
wait-for-jobs
|
||||||
else
|
|
||||||
scope_flags=("--no-scopes")
|
|
||||||
fi
|
fi
|
||||||
gcloud compute instances create ${MINION_NAMES[$i]} \
|
|
||||||
--project "${PROJECT}" \
|
|
||||||
--zone "${ZONE}" \
|
|
||||||
--machine-type "${MINION_SIZE}" \
|
|
||||||
--image-project="${IMAGE_PROJECT}" \
|
|
||||||
--image "${IMAGE}" \
|
|
||||||
--tags "${MINION_TAG}" \
|
|
||||||
--network "${NETWORK}" \
|
|
||||||
"${scope_flags[@]}" \
|
|
||||||
--can-ip-forward \
|
|
||||||
--metadata-from-file "startup-script=${KUBE_TEMP}/minion-start-${i}.sh" &
|
|
||||||
|
|
||||||
gcloud compute routes create "${MINION_NAMES[$i]}" \
|
|
||||||
--project "${PROJECT}" \
|
|
||||||
--destination-range "${MINION_IP_RANGES[$i]}" \
|
|
||||||
--network "${NETWORK}" \
|
|
||||||
--next-hop-instance "${MINION_NAMES[$i]}" \
|
|
||||||
--next-hop-instance-zone "${ZONE}" &
|
|
||||||
done
|
done
|
||||||
|
# Wait for last batch of jobs.
|
||||||
local fail=0
|
wait-for-jobs
|
||||||
local job
|
|
||||||
for job in $(jobs -p); do
|
|
||||||
wait "${job}" || fail=$((fail + 1))
|
|
||||||
done
|
|
||||||
if (( $fail != 0 )); then
|
|
||||||
echo "${fail} commands failed. Exiting." >&2
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
|
|
||||||
detect-master
|
detect-master
|
||||||
|
|
||||||
@ -383,30 +493,6 @@ function kube-up {
|
|||||||
done
|
done
|
||||||
|
|
||||||
echo "Kubernetes cluster created."
|
echo "Kubernetes cluster created."
|
||||||
echo "Sanity checking cluster..."
|
|
||||||
|
|
||||||
sleep 5
|
|
||||||
|
|
||||||
# Basic sanity checking
|
|
||||||
local i
|
|
||||||
local rc # Capture return code without exiting because of errexit bash option
|
|
||||||
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
|
||||||
# Make sure docker is installed
|
|
||||||
gcloud compute ssh --project "${PROJECT}" --zone "$ZONE" "${MINION_NAMES[$i]}" --command "which docker" >/dev/null || {
|
|
||||||
echo "Docker failed to install on ${MINION_NAMES[$i]}. Your cluster is unlikely" >&2
|
|
||||||
echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
|
|
||||||
echo "cluster. (sorry!)" >&2
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
done
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Kubernetes cluster is running. The master is running at:"
|
|
||||||
echo
|
|
||||||
echo " https://${KUBE_MASTER_IP}"
|
|
||||||
echo
|
|
||||||
echo "The user name and password to use is located in ~/.kubernetes_auth."
|
|
||||||
echo
|
|
||||||
|
|
||||||
local kube_cert=".kubecfg.crt"
|
local kube_cert=".kubecfg.crt"
|
||||||
local kube_key=".kubecfg.key"
|
local kube_key=".kubecfg.key"
|
||||||
@ -431,7 +517,57 @@ EOF
|
|||||||
|
|
||||||
chmod 0600 ~/.kubernetes_auth "${HOME}/${kube_cert}" \
|
chmod 0600 ~/.kubernetes_auth "${HOME}/${kube_cert}" \
|
||||||
"${HOME}/${kube_key}" "${HOME}/${ca_cert}"
|
"${HOME}/${kube_key}" "${HOME}/${ca_cert}"
|
||||||
|
echo Wrote ~/.kubernetes_auth
|
||||||
)
|
)
|
||||||
|
|
||||||
|
echo "Sanity checking cluster..."
|
||||||
|
|
||||||
|
# Basic sanity checking
|
||||||
|
local i
|
||||||
|
local rc # Capture return code without exiting because of errexit bash option
|
||||||
|
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
||||||
|
# Make sure docker is installed and working.
|
||||||
|
local attempt=0
|
||||||
|
while true; do
|
||||||
|
echo -n Attempt "$(($attempt+1))" to check Docker on node "${MINION_NAMES[$i]}" ...
|
||||||
|
local output=$(gcloud compute --project "${PROJECT}" ssh --zone "$ZONE" "${MINION_NAMES[$i]}" --command "sudo docker ps -a" 2>/dev/null)
|
||||||
|
if [[ -z "${output}" ]]; then
|
||||||
|
if (( attempt > 9 )); then
|
||||||
|
echo
|
||||||
|
echo -e "${color_red}Docker failed to install on node ${MINION_NAMES[$i]}. Your cluster is unlikely" >&2
|
||||||
|
echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
|
||||||
|
echo -e "cluster. (sorry!)${color_norm}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
elif [[ "${output}" != *"kubernetes/pause"* ]]; then
|
||||||
|
if (( attempt > 9 )); then
|
||||||
|
echo
|
||||||
|
echo -e "${color_red}Failed to observe kubernetes/pause on node ${MINION_NAMES[$i]}. Your cluster is unlikely" >&2
|
||||||
|
echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
|
||||||
|
echo -e "cluster. (sorry!)${color_norm}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -e " ${color_green}[working]${color_norm}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo -e " ${color_yellow}[not working yet]${color_norm}"
|
||||||
|
# Start Docker, in case it failed to start.
|
||||||
|
gcloud compute --project "${PROJECT}" ssh --zone "$ZONE" "${MINION_NAMES[$i]}" \
|
||||||
|
--command "sudo service docker start" 2>/dev/null || true
|
||||||
|
attempt=$(($attempt+1))
|
||||||
|
sleep 30
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo -e "${color_green}Kubernetes cluster is running. The master is running at:"
|
||||||
|
echo
|
||||||
|
echo -e "${color_yellow} https://${KUBE_MASTER_IP}"
|
||||||
|
echo
|
||||||
|
echo -e "${color_green}The user name and password to use is located in ~/.kubernetes_auth.${color_norm}"
|
||||||
|
echo
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Delete a kubernetes cluster.
|
# Delete a kubernetes cluster.
|
||||||
@ -469,7 +605,7 @@ function kube-down {
|
|||||||
gcloud compute instances delete \
|
gcloud compute instances delete \
|
||||||
--project "${PROJECT}" \
|
--project "${PROJECT}" \
|
||||||
--quiet \
|
--quiet \
|
||||||
--delete-disks all \
|
--delete-disks boot \
|
||||||
--zone "${ZONE}" \
|
--zone "${ZONE}" \
|
||||||
"${minions[@]::10}" || true
|
"${minions[@]::10}" || true
|
||||||
minions=( "${minions[@]:10}" )
|
minions=( "${minions[@]:10}" )
|
||||||
@ -531,7 +667,7 @@ function kube-push {
|
|||||||
echo "echo Executing configuration"
|
echo "echo Executing configuration"
|
||||||
echo "sudo salt '*' mine.update"
|
echo "sudo salt '*' mine.update"
|
||||||
echo "sudo salt --force-color '*' state.highstate"
|
echo "sudo salt --force-color '*' state.highstate"
|
||||||
) | gcloud compute ssh --project "$PROJECT" --zone "$ZONE" "$KUBE_MASTER" --command "sudo bash"
|
) | gcloud compute ssh --project "${PROJECT}" --zone "$ZONE" "$KUBE_MASTER" --command "sudo bash"
|
||||||
|
|
||||||
get-password
|
get-password
|
||||||
|
|
||||||
@ -596,7 +732,7 @@ function test-teardown {
|
|||||||
function ssh-to-node {
|
function ssh-to-node {
|
||||||
local node="$1"
|
local node="$1"
|
||||||
local cmd="$2"
|
local cmd="$2"
|
||||||
gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project ${PROJECT} --zone="${ZONE}" "${node}" --command "${cmd}"
|
gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --zone="${ZONE}" "${node}" --command "${cmd}"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Restart the kube-proxy on a node ($1)
|
# Restart the kube-proxy on a node ($1)
|
||||||
@ -606,11 +742,10 @@ function restart-kube-proxy {
|
|||||||
|
|
||||||
# Setup monitoring using heapster and InfluxDB
|
# Setup monitoring using heapster and InfluxDB
|
||||||
function setup-monitoring {
|
function setup-monitoring {
|
||||||
detect-project
|
|
||||||
if [[ "${ENABLE_CLUSTER_MONITORING}" == "true" ]]; then
|
if [[ "${ENABLE_CLUSTER_MONITORING}" == "true" ]]; then
|
||||||
echo "Setting up cluster monitoring using Heapster."
|
echo "Setting up cluster monitoring using Heapster."
|
||||||
|
|
||||||
if ! gcloud compute firewall-rules describe --project ${PROJECT} monitoring-heapster &>/dev/null; then
|
if ! gcloud compute firewall-rules describe monitoring-heapster &>/dev/null; then
|
||||||
if ! gcloud compute firewall-rules create monitoring-heapster \
|
if ! gcloud compute firewall-rules create monitoring-heapster \
|
||||||
--project "${PROJECT}" \
|
--project "${PROJECT}" \
|
||||||
--target-tags="${MINION_TAG}" \
|
--target-tags="${MINION_TAG}" \
|
||||||
@ -652,7 +787,7 @@ function teardown-monitoring {
|
|||||||
"${kubectl}" delete pods heapster &> /dev/null || true
|
"${kubectl}" delete pods heapster &> /dev/null || true
|
||||||
"${kubectl}" delete pods influx-grafana &> /dev/null || true
|
"${kubectl}" delete pods influx-grafana &> /dev/null || true
|
||||||
"${kubectl}" delete services influx-master &> /dev/null || true
|
"${kubectl}" delete services influx-master &> /dev/null || true
|
||||||
if gcloud compute firewall-rules describe --project ${PROJECT} monitoring-heapster &> /dev/null; then
|
if gcloud compute firewall-rules describe monitoring-heapster &> /dev/null; then
|
||||||
gcloud compute firewall-rules delete \
|
gcloud compute firewall-rules delete \
|
||||||
--project "${PROJECT}" \
|
--project "${PROJECT}" \
|
||||||
--quiet \
|
--quiet \
|
||||||
|
@ -33,38 +33,78 @@ detect-master > /dev/null
|
|||||||
detect-minions > /dev/null
|
detect-minions > /dev/null
|
||||||
|
|
||||||
MINIONS_FILE=/tmp/minions
|
MINIONS_FILE=/tmp/minions
|
||||||
"${KUBE_ROOT}/cluster/kubecfg.sh" -template $'{{range.items}}{{.id}}\n{{end}}' list minions > ${MINIONS_FILE}
|
attempt=0
|
||||||
|
while true; do
|
||||||
|
"${KUBE_ROOT}/cluster/kubecfg.sh" -template $'{{range.items}}{{.id}}\n{{end}}' list minions > "${MINIONS_FILE}"
|
||||||
|
found=$(grep -c . "${MINIONS_FILE}")
|
||||||
|
if [[ ${found} == "${NUM_MINIONS}" ]]; then
|
||||||
|
break
|
||||||
|
else
|
||||||
|
if (( attempt > 5 )); then
|
||||||
|
echo -e "${color_red}Detected ${found} nodes out of ${NUM_MINIONS}. Your cluster may not be working. ${color_norm}"
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
attempt=$((attempt+1))
|
||||||
|
sleep 30
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo "Found ${found} nodes."
|
||||||
|
|
||||||
# On vSphere, use minion IPs as their names
|
# On vSphere, use minion IPs as their names
|
||||||
if [[ "${KUBERNETES_PROVIDER}" == "vsphere" ]]; then
|
if [[ "${KUBERNETES_PROVIDER}" == "vsphere" ]]; then
|
||||||
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
||||||
MINION_NAMES[i]=${KUBE_MINION_IP_ADDRESSES[i]}
|
MINION_NAMES[$i]=${KUBE_MINION_IP_ADDRESSES[$i]}
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
|
||||||
# Grep returns an exit status of 1 when line is not found, so we need the : to always return a 0 exit status
|
# Grep returns an exit status of 1 when line is not found, so we need the : to always return a 0 exit status
|
||||||
count=$(grep -c ${MINION_NAMES[i]} ${MINIONS_FILE}) || :
|
# Make several attempts to deal with slow cluster birth.
|
||||||
if [[ "$count" == "0" ]]; then
|
attempt=0
|
||||||
echo "Failed to find ${MINION_NAMES[$i]}, cluster is probably broken."
|
while true; do
|
||||||
exit 1
|
echo -n "Attempt $((attempt+1)) to find ${MINION_NAMES[$i]} ..."
|
||||||
fi
|
count=$(grep -c "${MINION_NAMES[$i]}" "${MINIONS_FILE}") || :
|
||||||
|
if [[ "${count}" == "0" ]]; then
|
||||||
|
if (( attempt > 5 )); then
|
||||||
|
echo -e "${color_red}Failed to find ${MINION_NAMES[$i]}, cluster is probably broken.${color_norm}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -e " ${color_green}[working]${color_norm}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo -e " ${color_yellow}[not working yet]${color_norm}"
|
||||||
|
attempt=$((attempt+1))
|
||||||
|
sleep 20
|
||||||
|
done
|
||||||
|
|
||||||
NAME=${MINION_NAMES[i]}
|
name="${MINION_NAMES[$i]}"
|
||||||
if [ "$KUBERNETES_PROVIDER" != "vsphere" ]; then
|
if [ "$KUBERNETES_PROVIDER" != "vsphere" ]; then
|
||||||
# Grab fully qualified name
|
# Grab fully qualified name
|
||||||
NAME=$(grep "${MINION_NAMES[i]}" ${MINIONS_FILE})
|
name=$(grep "${MINION_NAMES[$i]}\." "${MINIONS_FILE}")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Make sure the kubelet is healthy
|
# Make sure the kubelet is healthy.
|
||||||
curl_output=$(curl -s --insecure --user "${KUBE_USER}:${KUBE_PASSWORD}" \
|
# Make several attempts to deal with slow cluster birth.
|
||||||
"https://${KUBE_MASTER_IP}/api/v1beta1/proxy/minions/${NAME}/healthz")
|
attempt=0
|
||||||
if [[ "${curl_output}" != "ok" ]]; then
|
while true; do
|
||||||
echo "Kubelet failed to install on ${MINION_NAMES[$i]}. Your cluster is unlikely to work correctly."
|
echo -n "Attempt $((attempt+1)) at checking Kubelet installation on node ${MINION_NAMES[$i]} ..."
|
||||||
echo "Please run ./cluster/kube-down.sh and re-create the cluster. (sorry!)"
|
curl_output=$(curl -s --insecure --user "${KUBE_USER}:${KUBE_PASSWORD}" \
|
||||||
exit 1
|
"https://${KUBE_MASTER_IP}/api/v1beta1/proxy/minions/${name}/healthz")
|
||||||
else
|
if [[ "${curl_output}" != "ok" ]]; then
|
||||||
echo "Kubelet is successfully installed on ${MINION_NAMES[$i]}"
|
if (( attempt > 5 )); then
|
||||||
fi
|
echo
|
||||||
|
echo -e "${color_red}Kubelet failed to install on node ${MINION_NAMES[$i]}. Your cluster is unlikely to work correctly."
|
||||||
|
echo -e "Please run ./cluster/kube-down.sh and re-create the cluster. (sorry!)${color_norm}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -e " ${color_green}[working]${color_norm}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo -e " ${color_yellow}[not working yet]${color_norm}"
|
||||||
|
attempt=$((attempt+1))
|
||||||
|
sleep 30
|
||||||
|
done
|
||||||
done
|
done
|
||||||
echo "Cluster validation succeeded"
|
echo -e "${color_green}Cluster validation succeeded${color_norm}"
|
||||||
|
Loading…
Reference in New Issue
Block a user