From 64ccb1645d9f0b48d89d8af7aea62cb75491aff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 20 Aug 2024 10:03:10 +0200 Subject: [PATCH] helm: Add a post-delete hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using a lifecycle.preStop hook, as done when we're using using the helm chat, let's add a post-delete hook to take care of properly cleaning up the node during when uninstalling kata-deploy. The reason why the lifecyle.preStop hook would never work on our case is simply because each helm chart operation follows the Kuberentes "declarative" approach, meaning that an operation won't wait for its previous operation to successfully finish before being called, leading to us trying to access content that's defined by our RBAC, in an operation that was started before our RBAC was deleted, but having the RBAC being deleted before the operation actually started. Unfortunately this hook brings in some code duplicatioon, mainly related to the RBAC parts, but that's not new as the same happens with our deamonset. Signed-off-by: Fabiano FidĂȘncio --- .../kata-deploy/templates/kata-deploy.yaml | 4 - .../templates/post-delete-job.yaml | 116 ++++++++++++++++++ .../kata-deploy/scripts/kata-deploy.sh | 30 ++++- 3 files changed, 145 insertions(+), 5 deletions(-) create mode 100644 tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/post-delete-job.yaml diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml index 5d339e47b..041650144 100644 --- a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml @@ -22,10 +22,6 @@ spec: - name: kube-kata image: {{ .Values.image.reference }}:{{ default .Chart.AppVersion .Values.image.tag }} imagePullPolicy: {{ .Values.imagePullPolicy }} - lifecycle: - preStop: - exec: - command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"] command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh install"] env: - name: NODE_NAME diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/post-delete-job.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/post-delete-job.yaml new file mode 100644 index 000000000..c49b34927 --- /dev/null +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/post-delete-job.yaml @@ -0,0 +1,116 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Chart.Name }}-sa-cleanup + namespace: {{ .Release.Namespace }} + annotations: + "helm.sh/hook": post-delete + "helm.sh/hook-weight": "-3" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ .Chart.Name }}-role-cleanup + annotations: + "helm.sh/hook": post-delete + "helm.sh/hook-weight": "-2" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +rules: +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "patch"] +- apiGroups: ["node.k8s.io"] + resources: ["runtimeclasses"] + verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ .Chart.Name }}-rb-cleanup + annotations: + "helm.sh/hook": post-delete + "helm.sh/hook-weight": "-1" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ .Chart.Name }}-role-cleanup +subjects: +- kind: ServiceAccount + name: {{ .Chart.Name }}-sa-cleanup + namespace: {{ .Release.Namespace }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Chart.Name }}-cleanup + namespace: {{ .Release.Namespace }} + annotations: + "helm.sh/hook": post-delete + "helm.sh/hook-weight": "0" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + template: + metadata: + labels: + role: cleanup + spec: + serviceAccountName: {{ .Chart.Name }}-sa-cleanup + hostPID: true + containers: + - name: kube-kata-cleanup + image: {{ .Values.image.reference }}:{{ default .Chart.AppVersion .Values.image.tag }} + imagePullPolicy: IfNotPresent + command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: DEBUG + value: {{ .Values.env.debug | quote }} + - name: SHIMS + value: {{ .Values.env.shims | quote }} + - name: DEFAULT_SHIM + value: {{ .Values.env.defaultShim | quote }} + - name: CREATE_RUNTIMECLASSES + value: {{ .Values.env.createRuntimeClasses | quote }} + - name: CREATE_DEFAULT_RUNTIMECLASS + value: {{ .Values.env.createDefaultRuntimeClass | quote }} + - name: ALLOWED_HYPERVISOR_ANNOTATIONS + value: {{ .Values.env.allowedHypervisorAnnotations | quote }} + - name: SNAPSHOTTER_HANDLER_MAPPING + value: {{ .Values.env.snapshotterHandlerMapping | quote }} + - name: AGENT_HTTPS_PROXY + value: {{ .Values.env.agentHttpsProxy | quote }} + - name: AGENT_NO_PROXY + value: {{ .Values.env.agentNoProxy | quote }} + - name: PULL_TYPE_MAPPING + value: {{ .Values.env.pullTypeMapping | quote }} + - name: HELM_POST_DELETE_HOOK + value: "true" +{{- with .Values.env.hostOS }} + - name: HOST_OS + value: {{ . | quote }} +{{- end }} + securityContext: + privileged: true + volumeMounts: + - name: crio-conf + mountPath: /etc/crio/ + - name: containerd-conf + mountPath: /etc/containerd/ + - name: host + mountPath: /host/ + volumes: + - name: crio-conf + hostPath: + path: /etc/crio/ + - name: containerd-conf + hostPath: + path: '{{- template "containerdConfPath" .Values }}' + - name: host + hostPath: + path: / + restartPolicy: Never diff --git a/tools/packaging/kata-deploy/scripts/kata-deploy.sh b/tools/packaging/kata-deploy/scripts/kata-deploy.sh index 03e602b1a..0780261d8 100755 --- a/tools/packaging/kata-deploy/scripts/kata-deploy.sh +++ b/tools/packaging/kata-deploy/scripts/kata-deploy.sh @@ -48,6 +48,8 @@ fi # doubled here as well, as: `/host//opt/kata` host_install_dir="/host${dest_dir}" +HELM_POST_DELETE_HOOK="${HELM_POST_DELETE_HOOK:-"false"}" + # If we fail for any reason a message will be displayed die() { msg="$*" @@ -560,6 +562,16 @@ function cleanup_cri_runtime() { ;; esac + [ "${HELM_POST_DELETE_HOOK}" == "false" ] && return + + # Only run this code in the HELM_POST_DELETE_HOOK + if [ "$1" == "k0s-worker" ] || [ "$1" == "k0s-controller" ]; then + # do nothing, k0s will automatically unload the config on the fly + : + else + host_systemctl daemon-reload + host_systemctl restart "$1" + fi } function cleanup_crio() { @@ -659,6 +671,7 @@ function main() { echo "* AGENT_NO_PROXY: ${AGENT_NO_PROXY}" echo "* PULL_TYPE_MAPPING: ${PULL_TYPE_MAPPING}" echo "* INSTALLATION_PREFIX: ${INSTALLATION_PREFIX}" + echo "* HELM_POST_DELETE_HOOK: ${HELM_POST_DELETE_HOOK}" # script requires that user is root euid=$(id -u) @@ -716,9 +729,24 @@ function main() { containerd_conf_file="${containerd_conf_tmpl_file}" fi + if [ "${HELM_POST_DELETE_HOOK}" == "true" ]; then + # Remove the label as the first thing, so we ensure no more kata-containers + # pods would be scheduled here. + kubectl label node "$NODE_NAME" katacontainers.io/kata-runtime- + fi + cleanup_cri_runtime "$runtime" - kubectl label node "$NODE_NAME" --overwrite katacontainers.io/kata-runtime=cleanup + if [ "${HELM_POST_DELETE_HOOK}" == "false" ]; then + # The Confidential Containers operator relies on this label + kubectl label node "$NODE_NAME" --overwrite katacontainers.io/kata-runtime=cleanup + fi remove_artifacts + + if [ "${HELM_POST_DELETE_HOOK}" == "true" ]; then + # After everything was cleaned up, there's no reason to continue + # and sleep forever. Let's just return success.. + exit 0 + fi ;; reset) reset_runtime $runtime