diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index bafed834352..2896be6bb74 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -39,6 +39,8 @@ KUBE_SKIP_UPDATE=${KUBE_SKIP_UPDATE-"n"} # multiple versions of the server are being used in the same project # simultaneously (e.g. on Jenkins). KUBE_GCS_STAGING_PATH_SUFFIX=${KUBE_GCS_STAGING_PATH_SUFFIX-""} +# How long (in seconds) to wait for cluster initialization. +KUBE_CLUSTER_INITIALIZATION_TIMEOUT=${KUBE_CLUSTER_INITIALIZATION_TIMEOUT:-300} # VERSION_REGEX matches things like "v0.13.1" readonly KUBE_VERSION_REGEX="^v(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$" @@ -59,7 +61,7 @@ function verify-prereqs { else # TODO: add checking if RUNTIME_CONFIG contains "experimental/v1alpha1=false" and appending "experimental/v1alpha1=true" if not. if echo "${RUNTIME_CONFIG}" | grep -q -v "experimental/v1alpha1=true"; then - echo "Experimental API should be turned on, but is not turned on in RUNTIME_CONFIG!" + echo "Experimental API should be turned on, but is not turned on in RUNTIME_CONFIG!" >&2 exit 1 fi fi @@ -79,8 +81,8 @@ function verify-prereqs { curl https://sdk.cloud.google.com | bash fi if ! which "${cmd}" >/dev/null; then - echo "Can't find ${cmd} in PATH, please fix and retry. The Google Cloud " - echo "SDK can be downloaded from https://cloud.google.com/sdk/." + echo "Can't find ${cmd} in PATH, please fix and retry. The Google Cloud " >&2 + echo "SDK can be downloaded from https://cloud.google.com/sdk/." >&2 exit 1 fi fi @@ -123,7 +125,7 @@ function find-release-tars { SERVER_BINARY_TAR="${KUBE_ROOT}/_output/release-tars/kubernetes-server-linux-amd64.tar.gz" fi if [[ ! -f "$SERVER_BINARY_TAR" ]]; then - echo "!!! Cannot find kubernetes-server-linux-amd64.tar.gz" + echo "!!! Cannot find kubernetes-server-linux-amd64.tar.gz" >&2 exit 1 fi @@ -132,7 +134,7 @@ function find-release-tars { SALT_TAR="${KUBE_ROOT}/_output/release-tars/kubernetes-salt.tar.gz" fi if [[ ! -f "$SALT_TAR" ]]; then - echo "!!! Cannot find kubernetes-salt.tar.gz" + echo "!!! Cannot find kubernetes-salt.tar.gz" >&2 exit 1 fi } @@ -346,10 +348,10 @@ function create-firewall-rule { --target-tags "$3" \ --allow tcp,udp,icmp,esp,ah,sctp; then if (( attempt > 5 )); then - echo -e "${color_red}Failed to create firewall rule $1 ${color_norm}" + echo -e "${color_red}Failed to create firewall rule $1 ${color_norm}" >&2 exit 2 fi - echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create firewall rule $1. Retrying.${color_norm}" + echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create firewall rule $1. Retrying.${color_norm}" >&2 attempt=$(($attempt+1)) else break @@ -431,10 +433,10 @@ function add-instance-metadata { --zone "${ZONE}" \ --metadata "${kvs[@]}"; then if (( attempt > 5 )); then - echo -e "${color_red}Failed to add instance metadata in ${instance} ${color_norm}" + echo -e "${color_red}Failed to add instance metadata in ${instance} ${color_norm}" >&2 exit 2 fi - echo -e "${color_yellow}Attempt $(($attempt+1)) failed to add metadata in ${instance}. Retrying.${color_norm}" + echo -e "${color_yellow}Attempt $(($attempt+1)) failed to add metadata in ${instance}. Retrying.${color_norm}" >&2 attempt=$(($attempt+1)) else break @@ -458,10 +460,10 @@ function add-instance-metadata-from-file { --zone "${ZONE}" \ --metadata-from-file "$(join_csv ${kvs[@]})"; then if (( attempt > 5 )); then - echo -e "${color_red}Failed to add instance metadata in ${instance} ${color_norm}" + echo -e "${color_red}Failed to add instance metadata in ${instance} ${color_norm}" >&2 exit 2 fi - echo -e "${color_yellow}Attempt $(($attempt+1)) failed to add metadata in ${instance}. Retrying.${color_norm}" + echo -e "${color_yellow}Attempt $(($attempt+1)) failed to add metadata in ${instance}. Retrying.${color_norm}" >&2 attempt=$(($attempt+1)) else break @@ -541,7 +543,7 @@ function create-certs { ./easyrsa build-client-full kubecfg nopass > /dev/null 2>&1) || { # If there was an error in the subshell, just die. # TODO(roberthbailey): add better error handling here - echo "=== Failed to generate certificates: Aborting ===" + echo "=== Failed to generate certificates: Aborting ===" >&2 exit 2 } CERT_DIR="${KUBE_TEMP}/easy-rsa-master/easyrsa3" @@ -721,11 +723,10 @@ function kube-up { --min-num-replicas "${AUTOSCALER_MIN_NODES}" --max-num-replicas "${AUTOSCALER_MAX_NODES}" ${METRICS} || true fi - echo "Waiting for cluster initialization." + echo "Waiting up to ${KUBE_CLUSTER_INITIALIZATION_TIMEOUT} seconds for cluster initialization." echo echo " This will continually check to see if the API for kubernetes is reachable." - echo " This might loop forever if there was some uncaught error during start" - echo " up." + echo " This may time out if there was some uncaught error during start up." echo # curl in mavericks is borked. @@ -736,12 +737,17 @@ function kube-up { fi fi - + local start_time=$(date +%s) until curl --cacert "${CERT_DIR}/pki/ca.crt" \ -H "Authorization: Bearer ${KUBE_BEARER_TOKEN}" \ ${secure} \ --max-time 5 --fail --output /dev/null --silent \ "https://${KUBE_MASTER_IP}/api/v1/pods"; do + local elapsed=$(($(date +%s) - ${start_time})) + if [[ ${elapsed} -gt ${KUBE_CLUSTER_INITIALIZATION_TIMEOUT} ]]; then + echo -e "${color_red}Cluster failed to initialize within ${KUBE_CLUSTER_INITIALIZATION_TIMEOUT} seconds.${color_norm}" >&2 + exit 2 + fi printf "." sleep 2 done