From 6678a3f013f4fab6a7e713679029894ad93ef654 Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Thu, 10 Mar 2016 06:37:29 -0500 Subject: [PATCH] AWS kube-up: add retries around delete-security-group If we deleted an ELB, we often fail to delete the security group, because deleting the ELB is invisibly asynchronous. Add a retry loop around delete-security-group to work around this. Fix #21147 --- cluster/aws/util.sh | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/cluster/aws/util.sh b/cluster/aws/util.sh index 8a68e9d8106..bdec8c0b31e 100755 --- a/cluster/aws/util.sh +++ b/cluster/aws/util.sh @@ -781,6 +781,24 @@ function release-elastic-ip { fi } +# Deletes a security group +# usage: delete_security_group +function delete_security_group { + local -r sg_id=${1} + + echo "Deleting security group: ${sg_id}" + + # We retry in case there's a dependent resource - typically an ELB + n=0 + until [ $n -ge 20 ]; do + $AWS_CMD delete-security-group --group-id ${sg_id} > $LOG && return + n=$[$n+1] + sleep 3 + done + echo "Unable to delete security group: ${sg_id}" + exit 1 +} + function ssh-key-setup { if [[ ! -f "$AWS_SSH_KEY" ]]; then ssh-keygen -f "$AWS_SSH_KEY" -N '' @@ -1372,8 +1390,7 @@ function kube-down { continue fi - echo "Deleting security group: ${sg_id}" - $AWS_CMD delete-security-group --group-id ${sg_id} > $LOG + delete_security_group ${sg_id} done subnet_ids=$($AWS_CMD describe-subnets \