Kubemark scripts retry failed gcloud commands

This commit is contained in:
gmarek 2016-01-22 16:50:07 +01:00
parent 171c2ecbe7
commit 35b317c094
2 changed files with 66 additions and 46 deletions

View File

@ -16,6 +16,7 @@
source "${KUBE_ROOT}/cluster/kubemark/config-default.sh" source "${KUBE_ROOT}/cluster/kubemark/config-default.sh"
source "${KUBE_ROOT}/cluster/kubemark/util.sh" source "${KUBE_ROOT}/cluster/kubemark/util.sh"
source "${KUBE_ROOT}/cluster/kube-env.sh"
detect-project &> /dev/null detect-project &> /dev/null
export PROJECT export PROJECT
@ -23,3 +24,22 @@ export PROJECT
MASTER_NAME="${INSTANCE_PREFIX}-kubemark-master" MASTER_NAME="${INSTANCE_PREFIX}-kubemark-master"
MASTER_TAG="kubemark-master" MASTER_TAG="kubemark-master"
EVENT_STORE_NAME="${INSTANCE_PREFIX}-event-store" EVENT_STORE_NAME="${INSTANCE_PREFIX}-event-store"
RETRIES=3
# Runs gcloud compute command with the given parameters. Up to $RETRIES will be made
# to execute the command.
# arguments:
# $@: all stuff that goes after 'gcloud compute '
function run-gcloud-compute-with-retries {
for attempt in $(seq 1 ${RETRIES}); do
if ! gcloud compute $@; then
echo -e "${color_yellow}Attempt $(($attempt+1)) failed to $1 $2 $3. Retrying.${color_norm}" >& 2
sleep $(($attempt * 5))
else
return 0
fi
done
echo -e "${color_red} Failed to $1 $2 $3.${color_norm}" >& 2
exit 1
}

View File

@ -46,39 +46,39 @@ cd $CURR_DIR
GCLOUD_COMMON_ARGS="--project ${PROJECT} --zone ${ZONE}" GCLOUD_COMMON_ARGS="--project ${PROJECT} --zone ${ZONE}"
gcloud compute disks create "${MASTER_NAME}-pd" \ run-gcloud-compute-with-retries disks create "${MASTER_NAME}-pd" \
${GCLOUD_COMMON_ARGS} \ ${GCLOUD_COMMON_ARGS} \
--type "${MASTER_DISK_TYPE}" \ --type "${MASTER_DISK_TYPE}" \
--size "${MASTER_DISK_SIZE}" --size "${MASTER_DISK_SIZE}"
gcloud compute instances create "${MASTER_NAME}" \ run-gcloud-compute-with-retries instances create "${MASTER_NAME}" \
${GCLOUD_COMMON_ARGS} \ ${GCLOUD_COMMON_ARGS} \
--machine-type "${MASTER_SIZE}" \ --machine-type "${MASTER_SIZE}" \
--image-project="${MASTER_IMAGE_PROJECT}" \ --image-project="${MASTER_IMAGE_PROJECT}" \
--image "${MASTER_IMAGE}" \ --image "${MASTER_IMAGE}" \
--tags "${MASTER_TAG}" \ --tags "${MASTER_TAG}" \
--network "${NETWORK}" \ --network "${NETWORK}" \
--scopes "storage-ro,compute-rw,logging-write" \ --scopes "storage-ro,compute-rw,logging-write" \
--disk "name=${MASTER_NAME}-pd,device-name=master-pd,mode=rw,boot=no,auto-delete=no" --disk "name=${MASTER_NAME}-pd,device-name=master-pd,mode=rw,boot=no,auto-delete=no"
gcloud compute firewall-rules create "${INSTANCE_PREFIX}-kubemark-master-https" \ run-gcloud-compute-with-retries firewall-rules create "${INSTANCE_PREFIX}-kubemark-master-https" \
--project "${PROJECT}" \ --project "${PROJECT}" \
--network "${NETWORK}" \ --network "${NETWORK}" \
--source-ranges "0.0.0.0/0" \ --source-ranges "0.0.0.0/0" \
--target-tags "${MASTER_TAG}" \ --target-tags "${MASTER_TAG}" \
--allow "tcp:443" --allow "tcp:443"
MASTER_IP=$(gcloud compute instances describe ${MASTER_NAME} \ MASTER_IP=$(gcloud compute instances describe ${MASTER_NAME} \
--zone="${ZONE}" --project="${PROJECT}" | grep natIP: | cut -f2 -d":" | sed "s/ //g") --zone="${ZONE}" --project="${PROJECT}" | grep natIP: | cut -f2 -d":" | sed "s/ //g")
if [ "${SEPARATE_EVENT_MACHINE:-false}" == "true" ]; then if [ "${SEPARATE_EVENT_MACHINE:-false}" == "true" ]; then
EVENT_STORE_NAME="${INSTANCE_PREFIX}-event-store" EVENT_STORE_NAME="${INSTANCE_PREFIX}-event-store"
gcloud compute disks create "${EVENT_STORE_NAME}-pd" \ run-gcloud-compute-with-retries disks create "${EVENT_STORE_NAME}-pd" \
${GCLOUD_COMMON_ARGS} \ ${GCLOUD_COMMON_ARGS} \
--type "${MASTER_DISK_TYPE}" \ --type "${MASTER_DISK_TYPE}" \
--size "${MASTER_DISK_SIZE}" --size "${MASTER_DISK_SIZE}"
gcloud compute instances create "${EVENT_STORE_NAME}" \ run-gcloud-compute-with-retries instances create "${EVENT_STORE_NAME}" \
${GCLOUD_COMMON_ARGS} \ ${GCLOUD_COMMON_ARGS} \
--machine-type "${MASTER_SIZE}" \ --machine-type "${MASTER_SIZE}" \
--image-project="${MASTER_IMAGE_PROJECT}" \ --image-project="${MASTER_IMAGE_PROJECT}" \
@ -95,7 +95,7 @@ if [ "${SEPARATE_EVENT_MACHINE:-false}" == "true" ]; then
sleep 1 sleep 1
done done
gcloud compute ssh ${EVENT_STORE_NAME} --zone=${ZONE} --project="${PROJECT}" \ gcloud compute ssh "${EVENT_STORE_NAME}" --zone="${ZONE}" --project="${PROJECT}" \
--command="sudo docker run --net=host -d gcr.io/google_containers/etcd:2.0.12 /usr/local/bin/etcd \ --command="sudo docker run --net=host -d gcr.io/google_containers/etcd:2.0.12 /usr/local/bin/etcd \
--listen-peer-urls http://127.0.0.1:2380 \ --listen-peer-urls http://127.0.0.1:2380 \
--addr=127.0.0.1:4002 \ --addr=127.0.0.1:4002 \
@ -117,17 +117,17 @@ until gcloud compute ssh --zone="${ZONE}" --project="${PROJECT}" "${MASTER_NAME}
sleep 1 sleep 1
done done
gcloud compute ssh --zone=${ZONE} --project="${PROJECT}" ${MASTER_NAME} \ gcloud compute ssh --zone="${ZONE}" --project="${PROJECT}" "${MASTER_NAME}" \
--command="sudo mkdir /srv/kubernetes -p && \ --command="sudo mkdir /srv/kubernetes -p && \
sudo bash -c \"echo ${MASTER_CERT_BASE64} | base64 -d > /srv/kubernetes/server.cert\" && \ sudo bash -c \"echo ${MASTER_CERT_BASE64} | base64 -d > /srv/kubernetes/server.cert\" && \
sudo bash -c \"echo ${MASTER_KEY_BASE64} | base64 -d > /srv/kubernetes/server.key\" && \ sudo bash -c \"echo ${MASTER_KEY_BASE64} | base64 -d > /srv/kubernetes/server.key\" && \
sudo bash -c \"echo ${CA_CERT_BASE64} | base64 -d > /srv/kubernetes/ca.crt\" && \ sudo bash -c \"echo ${CA_CERT_BASE64} | base64 -d > /srv/kubernetes/ca.crt\" && \
sudo bash -c \"echo ${KUBECFG_CERT_BASE64} | base64 -d > /srv/kubernetes/kubecfg.crt\" && \ sudo bash -c \"echo ${KUBECFG_CERT_BASE64} | base64 -d > /srv/kubernetes/kubecfg.crt\" && \
sudo bash -c \"echo ${KUBECFG_KEY_BASE64} | base64 -d > /srv/kubernetes/kubecfg.key\" && \ sudo bash -c \"echo ${KUBECFG_KEY_BASE64} | base64 -d > /srv/kubernetes/kubecfg.key\" && \
sudo bash -c \"echo \"${KUBE_BEARER_TOKEN},admin,admin\" > /srv/kubernetes/known_tokens.csv\" && \ sudo bash -c \"echo \"${KUBE_BEARER_TOKEN},admin,admin\" > /srv/kubernetes/known_tokens.csv\" && \
sudo bash -c \"echo \"${KUBELET_TOKEN},kubelet,kubelet\" >> /srv/kubernetes/known_tokens.csv\" && \ sudo bash -c \"echo \"${KUBELET_TOKEN},kubelet,kubelet\" >> /srv/kubernetes/known_tokens.csv\" && \
sudo bash -c \"echo \"${KUBE_PROXY_TOKEN},kube_proxy,kube_proxy\" >> /srv/kubernetes/known_tokens.csv\" && \ sudo bash -c \"echo \"${KUBE_PROXY_TOKEN},kube_proxy,kube_proxy\" >> /srv/kubernetes/known_tokens.csv\" && \
sudo bash -c \"echo admin,admin,admin > /srv/kubernetes/basic_auth.csv\"" sudo bash -c \"echo admin,admin,admin > /srv/kubernetes/basic_auth.csv\""
if [ "${RUN_FROM_DISTRO}" == "false" ]; then if [ "${RUN_FROM_DISTRO}" == "false" ]; then
gcloud compute copy-files --zone="${ZONE}" --project="${PROJECT}" \ gcloud compute copy-files --zone="${ZONE}" --project="${PROJECT}" \
@ -143,7 +143,7 @@ else
"${MASTER_NAME}":~ "${MASTER_NAME}":~
fi fi
gcloud compute ssh ${MASTER_NAME} --zone=${ZONE} --project="${PROJECT}" \ gcloud compute ssh "${MASTER_NAME}" --zone="${ZONE}" --project="${PROJECT}" \
--command="chmod a+x configure-kubectl.sh && chmod a+x start-kubemark-master.sh && sudo ./start-kubemark-master.sh ${EVENT_STORE_IP:-127.0.0.1}" --command="chmod a+x configure-kubectl.sh && chmod a+x start-kubemark-master.sh && sudo ./start-kubemark-master.sh ${EVENT_STORE_IP:-127.0.0.1}"
# create kubeconfig for Kubelet: # create kubeconfig for Kubelet:
@ -152,12 +152,12 @@ kind: Config
users: users:
- name: kubelet - name: kubelet
user: user:
client-certificate-data: ${KUBELET_CERT_BASE64} client-certificate-data: "${KUBELET_CERT_BASE64}"
client-key-data: ${KUBELET_KEY_BASE64} client-key-data: "${KUBELET_KEY_BASE64}"
clusters: clusters:
- name: kubemark - name: kubemark
cluster: cluster:
certificate-authority-data: ${CA_CERT_BASE64} certificate-authority-data: "${CA_CERT_BASE64}"
server: https://${MASTER_IP} server: https://${MASTER_IP}
contexts: contexts:
- context: - context:
@ -188,14 +188,14 @@ kind: Config
users: users:
- name: admin - name: admin
user: user:
client-certificate-data: ${KUBECFG_CERT_BASE64} client-certificate-data: "${KUBECFG_CERT_BASE64}"
client-key-data: ${KUBECFG_KEY_BASE64} client-key-data: "${KUBECFG_KEY_BASE64}"
username: admin username: admin
password: admin password: admin
clusters: clusters:
- name: kubemark - name: kubemark
cluster: cluster:
certificate-authority-data: ${CA_CERT_BASE64} certificate-authority-data: "${CA_CERT_BASE64}"
server: https://${MASTER_IP} server: https://${MASTER_IP}
contexts: contexts:
- context: - context:
@ -205,17 +205,17 @@ contexts:
current-context: kubemark-context current-context: kubemark-context
EOF EOF
sed "s/##numreplicas##/${NUM_NODES:-10}/g" ${KUBE_ROOT}/test/kubemark/hollow-node_template.json > ${KUBE_ROOT}/test/kubemark/hollow-node.json sed "s/##numreplicas##/${NUM_NODES:-10}/g" "${KUBE_ROOT}"/test/kubemark/hollow-node_template.json > "${KUBE_ROOT}"/test/kubemark/hollow-node.json
sed -i'' -e "s/##project##/${PROJECT}/g" ${KUBE_ROOT}/test/kubemark/hollow-node.json sed -i'' -e "s/##project##/${PROJECT}/g" "${KUBE_ROOT}"/test/kubemark/hollow-node.json
kubectl create -f ${KUBE_ROOT}/test/kubemark/kubemark-ns.json kubectl create -f "${KUBE_ROOT}"/test/kubemark/kubemark-ns.json
kubectl create -f ${KUBECONFIG_SECRET} --namespace="kubemark" kubectl create -f "${KUBECONFIG_SECRET}" --namespace="kubemark"
kubectl create -f ${KUBE_ROOT}/test/kubemark/hollow-node.json --namespace="kubemark" kubectl create -f "${KUBE_ROOT}"/test/kubemark/hollow-node.json --namespace="kubemark"
rm ${KUBECONFIG_SECRET} rm "${KUBECONFIG_SECRET}"
echo "Waiting for all HollowNodes to become Running..." echo "Waiting for all HollowNodes to become Running..."
echo "This can loop forever if something crashed." echo "This can loop forever if something crashed."
until [[ "$(kubectl --kubeconfig=${KUBE_ROOT}/test/kubemark/kubeconfig.loc get node | grep Ready | wc -l)" == "${NUM_NODES}" ]]; do until [[ "$(kubectl --kubeconfig="${KUBE_ROOT}"/test/kubemark/kubeconfig.loc get node | grep Ready | wc -l)" == "${NUM_NODES}" ]]; do
echo -n . echo -n .
sleep 1 sleep 1
done done