From 35b317c0946bde48360e1e31e79d053a5724fa55 Mon Sep 17 00:00:00 2001 From: gmarek Date: Fri, 22 Jan 2016 16:50:07 +0100 Subject: [PATCH] Kubemark scripts retry failed gcloud commands --- test/kubemark/common.sh | 20 +++++++ test/kubemark/start-kubemark.sh | 92 ++++++++++++++++----------------- 2 files changed, 66 insertions(+), 46 deletions(-) diff --git a/test/kubemark/common.sh b/test/kubemark/common.sh index 032dffdaa4e..d76044dc576 100644 --- a/test/kubemark/common.sh +++ b/test/kubemark/common.sh @@ -16,6 +16,7 @@ source "${KUBE_ROOT}/cluster/kubemark/config-default.sh" source "${KUBE_ROOT}/cluster/kubemark/util.sh" +source "${KUBE_ROOT}/cluster/kube-env.sh" detect-project &> /dev/null export PROJECT @@ -23,3 +24,22 @@ export PROJECT MASTER_NAME="${INSTANCE_PREFIX}-kubemark-master" MASTER_TAG="kubemark-master" EVENT_STORE_NAME="${INSTANCE_PREFIX}-event-store" + +RETRIES=3 + +# Runs gcloud compute command with the given parameters. Up to $RETRIES will be made +# to execute the command. +# arguments: +# $@: all stuff that goes after 'gcloud compute ' +function run-gcloud-compute-with-retries { + for attempt in $(seq 1 ${RETRIES}); do + if ! gcloud compute $@; then + echo -e "${color_yellow}Attempt $(($attempt+1)) failed to $1 $2 $3. Retrying.${color_norm}" >& 2 + sleep $(($attempt * 5)) + else + return 0 + fi + done + echo -e "${color_red} Failed to $1 $2 $3.${color_norm}" >& 2 + exit 1 +} diff --git a/test/kubemark/start-kubemark.sh b/test/kubemark/start-kubemark.sh index 6db0c5f378a..e7b5f6b6d8c 100755 --- a/test/kubemark/start-kubemark.sh +++ b/test/kubemark/start-kubemark.sh @@ -46,39 +46,39 @@ cd $CURR_DIR GCLOUD_COMMON_ARGS="--project ${PROJECT} --zone ${ZONE}" -gcloud compute disks create "${MASTER_NAME}-pd" \ - ${GCLOUD_COMMON_ARGS} \ - --type "${MASTER_DISK_TYPE}" \ - --size "${MASTER_DISK_SIZE}" +run-gcloud-compute-with-retries disks create "${MASTER_NAME}-pd" \ + ${GCLOUD_COMMON_ARGS} \ + --type "${MASTER_DISK_TYPE}" \ + --size "${MASTER_DISK_SIZE}" -gcloud compute instances create "${MASTER_NAME}" \ - ${GCLOUD_COMMON_ARGS} \ - --machine-type "${MASTER_SIZE}" \ - --image-project="${MASTER_IMAGE_PROJECT}" \ - --image "${MASTER_IMAGE}" \ - --tags "${MASTER_TAG}" \ - --network "${NETWORK}" \ - --scopes "storage-ro,compute-rw,logging-write" \ - --disk "name=${MASTER_NAME}-pd,device-name=master-pd,mode=rw,boot=no,auto-delete=no" +run-gcloud-compute-with-retries instances create "${MASTER_NAME}" \ + ${GCLOUD_COMMON_ARGS} \ + --machine-type "${MASTER_SIZE}" \ + --image-project="${MASTER_IMAGE_PROJECT}" \ + --image "${MASTER_IMAGE}" \ + --tags "${MASTER_TAG}" \ + --network "${NETWORK}" \ + --scopes "storage-ro,compute-rw,logging-write" \ + --disk "name=${MASTER_NAME}-pd,device-name=master-pd,mode=rw,boot=no,auto-delete=no" -gcloud compute firewall-rules create "${INSTANCE_PREFIX}-kubemark-master-https" \ - --project "${PROJECT}" \ - --network "${NETWORK}" \ - --source-ranges "0.0.0.0/0" \ - --target-tags "${MASTER_TAG}" \ - --allow "tcp:443" +run-gcloud-compute-with-retries firewall-rules create "${INSTANCE_PREFIX}-kubemark-master-https" \ + --project "${PROJECT}" \ + --network "${NETWORK}" \ + --source-ranges "0.0.0.0/0" \ + --target-tags "${MASTER_TAG}" \ + --allow "tcp:443" MASTER_IP=$(gcloud compute instances describe ${MASTER_NAME} \ --zone="${ZONE}" --project="${PROJECT}" | grep natIP: | cut -f2 -d":" | sed "s/ //g") if [ "${SEPARATE_EVENT_MACHINE:-false}" == "true" ]; then EVENT_STORE_NAME="${INSTANCE_PREFIX}-event-store" - gcloud compute disks create "${EVENT_STORE_NAME}-pd" \ + run-gcloud-compute-with-retries disks create "${EVENT_STORE_NAME}-pd" \ ${GCLOUD_COMMON_ARGS} \ --type "${MASTER_DISK_TYPE}" \ --size "${MASTER_DISK_SIZE}" - gcloud compute instances create "${EVENT_STORE_NAME}" \ + run-gcloud-compute-with-retries instances create "${EVENT_STORE_NAME}" \ ${GCLOUD_COMMON_ARGS} \ --machine-type "${MASTER_SIZE}" \ --image-project="${MASTER_IMAGE_PROJECT}" \ @@ -95,7 +95,7 @@ if [ "${SEPARATE_EVENT_MACHINE:-false}" == "true" ]; then sleep 1 done - gcloud compute ssh ${EVENT_STORE_NAME} --zone=${ZONE} --project="${PROJECT}" \ + gcloud compute ssh "${EVENT_STORE_NAME}" --zone="${ZONE}" --project="${PROJECT}" \ --command="sudo docker run --net=host -d gcr.io/google_containers/etcd:2.0.12 /usr/local/bin/etcd \ --listen-peer-urls http://127.0.0.1:2380 \ --addr=127.0.0.1:4002 \ @@ -117,17 +117,17 @@ until gcloud compute ssh --zone="${ZONE}" --project="${PROJECT}" "${MASTER_NAME} sleep 1 done -gcloud compute ssh --zone=${ZONE} --project="${PROJECT}" ${MASTER_NAME} \ +gcloud compute ssh --zone="${ZONE}" --project="${PROJECT}" "${MASTER_NAME}" \ --command="sudo mkdir /srv/kubernetes -p && \ - sudo bash -c \"echo ${MASTER_CERT_BASE64} | base64 -d > /srv/kubernetes/server.cert\" && \ - sudo bash -c \"echo ${MASTER_KEY_BASE64} | base64 -d > /srv/kubernetes/server.key\" && \ - sudo bash -c \"echo ${CA_CERT_BASE64} | base64 -d > /srv/kubernetes/ca.crt\" && \ - sudo bash -c \"echo ${KUBECFG_CERT_BASE64} | base64 -d > /srv/kubernetes/kubecfg.crt\" && \ - sudo bash -c \"echo ${KUBECFG_KEY_BASE64} | base64 -d > /srv/kubernetes/kubecfg.key\" && \ - sudo bash -c \"echo \"${KUBE_BEARER_TOKEN},admin,admin\" > /srv/kubernetes/known_tokens.csv\" && \ - sudo bash -c \"echo \"${KUBELET_TOKEN},kubelet,kubelet\" >> /srv/kubernetes/known_tokens.csv\" && \ - sudo bash -c \"echo \"${KUBE_PROXY_TOKEN},kube_proxy,kube_proxy\" >> /srv/kubernetes/known_tokens.csv\" && \ - sudo bash -c \"echo admin,admin,admin > /srv/kubernetes/basic_auth.csv\"" + sudo bash -c \"echo ${MASTER_CERT_BASE64} | base64 -d > /srv/kubernetes/server.cert\" && \ + sudo bash -c \"echo ${MASTER_KEY_BASE64} | base64 -d > /srv/kubernetes/server.key\" && \ + sudo bash -c \"echo ${CA_CERT_BASE64} | base64 -d > /srv/kubernetes/ca.crt\" && \ + sudo bash -c \"echo ${KUBECFG_CERT_BASE64} | base64 -d > /srv/kubernetes/kubecfg.crt\" && \ + sudo bash -c \"echo ${KUBECFG_KEY_BASE64} | base64 -d > /srv/kubernetes/kubecfg.key\" && \ + sudo bash -c \"echo \"${KUBE_BEARER_TOKEN},admin,admin\" > /srv/kubernetes/known_tokens.csv\" && \ + sudo bash -c \"echo \"${KUBELET_TOKEN},kubelet,kubelet\" >> /srv/kubernetes/known_tokens.csv\" && \ + sudo bash -c \"echo \"${KUBE_PROXY_TOKEN},kube_proxy,kube_proxy\" >> /srv/kubernetes/known_tokens.csv\" && \ + sudo bash -c \"echo admin,admin,admin > /srv/kubernetes/basic_auth.csv\"" if [ "${RUN_FROM_DISTRO}" == "false" ]; then gcloud compute copy-files --zone="${ZONE}" --project="${PROJECT}" \ @@ -143,7 +143,7 @@ else "${MASTER_NAME}":~ fi -gcloud compute ssh ${MASTER_NAME} --zone=${ZONE} --project="${PROJECT}" \ +gcloud compute ssh "${MASTER_NAME}" --zone="${ZONE}" --project="${PROJECT}" \ --command="chmod a+x configure-kubectl.sh && chmod a+x start-kubemark-master.sh && sudo ./start-kubemark-master.sh ${EVENT_STORE_IP:-127.0.0.1}" # create kubeconfig for Kubelet: @@ -152,12 +152,12 @@ kind: Config users: - name: kubelet user: - client-certificate-data: ${KUBELET_CERT_BASE64} - client-key-data: ${KUBELET_KEY_BASE64} + client-certificate-data: "${KUBELET_CERT_BASE64}" + client-key-data: "${KUBELET_KEY_BASE64}" clusters: - name: kubemark cluster: - certificate-authority-data: ${CA_CERT_BASE64} + certificate-authority-data: "${CA_CERT_BASE64}" server: https://${MASTER_IP} contexts: - context: @@ -188,14 +188,14 @@ kind: Config users: - name: admin user: - client-certificate-data: ${KUBECFG_CERT_BASE64} - client-key-data: ${KUBECFG_KEY_BASE64} + client-certificate-data: "${KUBECFG_CERT_BASE64}" + client-key-data: "${KUBECFG_KEY_BASE64}" username: admin password: admin clusters: - name: kubemark cluster: - certificate-authority-data: ${CA_CERT_BASE64} + certificate-authority-data: "${CA_CERT_BASE64}" server: https://${MASTER_IP} contexts: - context: @@ -205,17 +205,17 @@ contexts: current-context: kubemark-context EOF -sed "s/##numreplicas##/${NUM_NODES:-10}/g" ${KUBE_ROOT}/test/kubemark/hollow-node_template.json > ${KUBE_ROOT}/test/kubemark/hollow-node.json -sed -i'' -e "s/##project##/${PROJECT}/g" ${KUBE_ROOT}/test/kubemark/hollow-node.json -kubectl create -f ${KUBE_ROOT}/test/kubemark/kubemark-ns.json -kubectl create -f ${KUBECONFIG_SECRET} --namespace="kubemark" -kubectl create -f ${KUBE_ROOT}/test/kubemark/hollow-node.json --namespace="kubemark" +sed "s/##numreplicas##/${NUM_NODES:-10}/g" "${KUBE_ROOT}"/test/kubemark/hollow-node_template.json > "${KUBE_ROOT}"/test/kubemark/hollow-node.json +sed -i'' -e "s/##project##/${PROJECT}/g" "${KUBE_ROOT}"/test/kubemark/hollow-node.json +kubectl create -f "${KUBE_ROOT}"/test/kubemark/kubemark-ns.json +kubectl create -f "${KUBECONFIG_SECRET}" --namespace="kubemark" +kubectl create -f "${KUBE_ROOT}"/test/kubemark/hollow-node.json --namespace="kubemark" -rm ${KUBECONFIG_SECRET} +rm "${KUBECONFIG_SECRET}" echo "Waiting for all HollowNodes to become Running..." echo "This can loop forever if something crashed." -until [[ "$(kubectl --kubeconfig=${KUBE_ROOT}/test/kubemark/kubeconfig.loc get node | grep Ready | wc -l)" == "${NUM_NODES}" ]]; do +until [[ "$(kubectl --kubeconfig="${KUBE_ROOT}"/test/kubemark/kubeconfig.loc get node | grep Ready | wc -l)" == "${NUM_NODES}" ]]; do echo -n . sleep 1 done