From 60d10e9e2773b21f2a7e06e59c4ff289d442f99c Mon Sep 17 00:00:00 2001 From: "Madhusudan.C.S" Date: Sun, 4 Jun 2017 13:07:32 -0700 Subject: [PATCH 1/3] Do not delete PVs with --all, instead delete them selectively. PV is a non-namespaced resource. Running `kubectl delete pv --all`, even with `--namespace` is going to delete all the PVs in the cluster. This is a dangerous operation and should not be deleted this way. Instead we now retrieve the PVs bound to the PVCs in the namespace we are deleteing and delete only those PVs. Fixes issue #46380. --- federation/cluster/common.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/federation/cluster/common.sh b/federation/cluster/common.sh index 37fe14a6ec4..40e074f0012 100644 --- a/federation/cluster/common.sh +++ b/federation/cluster/common.sh @@ -414,8 +414,15 @@ function cleanup-federation-api-objects { echo "Cleaning Federation control plane objects" # Delete all resources with the federated-cluster label. $host_kubectl delete pods,svc,rc,deployment,secret -lapp=federated-cluster + + # Delete all PVs bound to PVCs in FEDERATION_NAMESPACE + pvs=$($host_kubectl get pvc --namespace=${FEDERATION_NAMESPACE} -o jsonpath='{.items[*].spec.volumeName}') + while $host_kubectl delete pv ${pvs} >/dev/null 2>&1; do + sleep 2 + done + # Delete all resources in FEDERATION_NAMESPACE. - $host_kubectl delete pvc,pv,pods,svc,rc,deployment,secret --namespace=${FEDERATION_NAMESPACE} --all + $host_kubectl delete pvc,pods,svc,rc,deployment,secret --namespace=${FEDERATION_NAMESPACE} --all $host_kubectl delete ns ${FEDERATION_NAMESPACE} # Poll until the namespace is completely gone. From c30afde32ee125c63478ca6613bce273c09c2e41 Mon Sep 17 00:00:00 2001 From: "Madhusudan.C.S" Date: Sun, 4 Jun 2017 13:19:59 -0700 Subject: [PATCH 2/3] Delete federation system namespace from all the federated clusters. This is a big hammer. `kubefed join` creates federation-system namespace in the joining clusters if they don't already exist. This namespace usually exists in the host cluster and hence cannot be deleted while unjoining. So in order to be safe, we don't delete the federation-system namespace from any federated cluster while unjoining them. This causes a problem in our test environment if certain resources are left in the namespace. Therefore we are deleting all federation-system namespace in all the clusters. --- federation/cluster/common.sh | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/federation/cluster/common.sh b/federation/cluster/common.sh index 40e074f0012..49223f1a188 100644 --- a/federation/cluster/common.sh +++ b/federation/cluster/common.sh @@ -411,6 +411,10 @@ function push-federation-images { } function cleanup-federation-api-objects { + # This is a cleanup function. We cannot stop on errors here. So disable + # errexit in this function. + set +o errexit + echo "Cleaning Federation control plane objects" # Delete all resources with the federated-cluster label. $host_kubectl delete pods,svc,rc,deployment,secret -lapp=federated-cluster @@ -423,10 +427,19 @@ function cleanup-federation-api-objects { # Delete all resources in FEDERATION_NAMESPACE. $host_kubectl delete pvc,pods,svc,rc,deployment,secret --namespace=${FEDERATION_NAMESPACE} --all - $host_kubectl delete ns ${FEDERATION_NAMESPACE} - # Poll until the namespace is completely gone. - while $host_kubectl get namespace ${FEDERATION_NAMESPACE} >/dev/null 2>&1; do - sleep 5 + # This is a big hammer. We get rid of federation-system namespace from + # all the clusters + for context in $(federation_cluster_contexts); do + kube::log::status "Removing namespace \"${FEDERATION_NAMESPACE}\" from \"${context}\"" + ( + # Try deleting until the namespace is completely gone. + while $host_kubectl --context="${context}" delete namespace ${FEDERATION_NAMESPACE} >/dev/null 2>&1; do + sleep 5 + done + kube::log::status "Removed namespace \"${FEDERATION_NAMESPACE}\" from \"${context}\"" + ) & done + wait + set -o errexit } From c3d5113365d92fdb8113b9872ad0a8e7551c23c4 Mon Sep 17 00:00:00 2001 From: "Madhusudan.C.S" Date: Sun, 4 Jun 2017 13:19:59 -0700 Subject: [PATCH 3/3] Delete cluster role and their bindings federated clusters. This is part of the namespace deletion big hammer. `kubefed join` not just creates federation-system namespace, but also cluster role and cluster role bindings in the joining clusters. Sometimes unjoin fails to delete them. So we use a big hammer here to delete them. This smells like a real problem in kubefed and needs investigation. This is a short term fix to unblock the submit queue. --- federation/cluster/common.sh | 19 +++++++++++++++++-- federation/cluster/federation-down.sh | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/federation/cluster/common.sh b/federation/cluster/common.sh index 49223f1a188..a13c69a9f41 100644 --- a/federation/cluster/common.sh +++ b/federation/cluster/common.sh @@ -431,13 +431,28 @@ function cleanup-federation-api-objects { # This is a big hammer. We get rid of federation-system namespace from # all the clusters for context in $(federation_cluster_contexts); do - kube::log::status "Removing namespace \"${FEDERATION_NAMESPACE}\" from \"${context}\"" ( + local -r role="federation-controller-manager:${FEDERATION_NAME}-${context}-${HOST_CLUSTER_CONTEXT}" + kube::log::status "Removing namespace \"${FEDERATION_NAMESPACE}\", cluster role \"${role}\" and cluster role binding \"${role}\" from \"${context}\"" # Try deleting until the namespace is completely gone. - while $host_kubectl --context="${context}" delete namespace ${FEDERATION_NAMESPACE} >/dev/null 2>&1; do + while $host_kubectl --context="${context}" delete namespace "${FEDERATION_NAMESPACE}" >/dev/null 2>&1; do + # It is usually slower to remove a namespace because it involves + # performing a cascading deletion of all the resources in the + # namespace. So we sleep a little longer than other resources + # before retrying sleep 5 done kube::log::status "Removed namespace \"${FEDERATION_NAMESPACE}\" from \"${context}\"" + + while $host_kubectl --context="${context}" delete clusterrole "${role}" >/dev/null 2>&1; do + sleep 2 + done + kube::log::status "Removed cluster role \"${role}\" from \"${context}\"" + + while $host_kubectl --context="${context}" delete clusterrolebinding "${role}" >/dev/null 2>&1; do + sleep 2 + done + kube::log::status "Removed cluster role binding \"${role}\" from \"${context}\"" ) & done wait diff --git a/federation/cluster/federation-down.sh b/federation/cluster/federation-down.sh index be91e974f60..8aca4580399 100755 --- a/federation/cluster/federation-down.sh +++ b/federation/cluster/federation-down.sh @@ -27,7 +27,7 @@ source "${KUBE_ROOT}/federation/cluster/common.sh" # federation_clusters returns a list of all the clusters in # federation, if at all the federation control plane exists -# and there are any clusters registerd. +# and there are any clusters registered. function federation_clusters() { if clusters=$("${KUBE_ROOT}/cluster/kubectl.sh" \ --context="${FEDERATION_KUBE_CONTEXT}" \