mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-07-01 22:50:54 +00:00
kata-deploy: add per-node staged cleanup for job mode
Add the uninstall counterpart to the install dispatcher for deploymentMode: job. On `helm uninstall`, a single pre-delete hook Job runs the kata-deploy-job-dispatcher, which enumerates the targeted nodes live and fans out one node-pinned cleanup Job per node that runs the install pipeline in reverse and exits: unlabel -> revert-cri (initContainers, run sequentially) remove-artifacts (main container) Running as a pre-delete hook means the dispatcher ServiceAccount/RBAC and the kata-deploy host-mutation RBAC still exist while the Jobs run, so the unlabel stage retains node get/patch access. revert-cri and remove-artifacts are host-only operations (privileged nsenter / host mount) and need no extra cluster RBAC. Ordering mirrors install in reverse: unlabel first so the scheduler stops placing kata workloads here, then revert the CRI config + restart the runtime, then remove the on-host artifacts. Each stage is idempotent and skips when already undone, so partially-installed nodes and re-runs are safe. Uninstall node selection is deliberately SEPARATE from install (a dedicated job.cleanup.* block) and defaults to every node carrying the katacontainers.io/kata-runtime label (set by the install label stage) rather than re-evaluating the install selector. Because the cleanup dispatcher resolves nodes live when it runs, this stays robust to install-time selector drift (relabeled nodes, etc.) while remaining fully overridable via job.cleanup.nodes / job.cleanup.nodeSelector / job.cleanup.nodeSelectorExpressions. The default (daemonset) mode is unaffected. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com> Assisted-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
committed by
Fabiano Fidêncio
parent
54878fa373
commit
3d732986d2
@@ -0,0 +1,112 @@
|
||||
{{- /*
|
||||
Cleanup dispatcher (deploymentMode: job, pre-delete hook).
|
||||
|
||||
The mirror image of the install dispatcher: a single tiny pre-delete hook Job that
|
||||
runs the dispatcher (kata-deploy-job-dispatcher) to fan out one node-pinned cleanup Job
|
||||
per selected node, paced to job.parallelism. Each per-node Job runs the install
|
||||
pipeline in reverse and exits:
|
||||
|
||||
unlabel -> revert-cri (initContainers, run sequentially)
|
||||
remove-artifacts (main container)
|
||||
|
||||
Unlike the old per-node hook model, node selection here is resolved LIVE when the
|
||||
hook runs at `helm uninstall` (the dispatcher does the lookup), not frozen at
|
||||
install/upgrade time. That is why the default cleanup selector can be
|
||||
"nodes carrying the katacontainers.io/kata-runtime label" (i.e. exactly the
|
||||
nodes install actually labeled) - see values.yaml job.cleanup.
|
||||
|
||||
This runs while the release's kept ServiceAccount/RBAC and the job-templates
|
||||
ConfigMap still exist; they are torn down only after pre-delete hooks complete.
|
||||
*/ -}}
|
||||
{{- if eq (.Values.deploymentMode | default "daemonset") "job" }}
|
||||
{{- $root := . }}
|
||||
{{- $base := .Chart.Name }}
|
||||
{{- if .Values.env.multiInstallSuffix }}
|
||||
{{- $base = printf "%s-%s" .Chart.Name .Values.env.multiInstallSuffix }}
|
||||
{{- end }}
|
||||
{{- $sa := include "kata-deploy.dispatcherServiceAccountName" . }}
|
||||
{{- $dispatcherName := printf "%s-cleanup-dispatcher" $base | trunc 63 | trimSuffix "-" }}
|
||||
{{- $cleanup := .Values.job.cleanup | default dict }}
|
||||
{{- $cNodes := $cleanup.nodes | default list }}
|
||||
{{- $cSelector := include "kata-deploy.nodeLabelSelector" (dict "eq" ($cleanup.nodeSelector | default dict) "exprs" ($cleanup.nodeSelectorExpressions | default list)) }}
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: {{ $dispatcherName }}
|
||||
namespace: {{ $root.Release.Namespace }}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "kata-deploy.name" $root }}
|
||||
app.kubernetes.io/instance: {{ $root.Release.Name }}
|
||||
kata-deploy/dispatcher: cleanup
|
||||
annotations:
|
||||
"helm.sh/hook": pre-delete
|
||||
"helm.sh/hook-weight": "5"
|
||||
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
ttlSecondsAfterFinished: {{ $root.Values.job.ttlSecondsAfterFinished }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "kata-deploy.name" $root }}
|
||||
app.kubernetes.io/instance: {{ $root.Release.Name }}
|
||||
kata-deploy/dispatcher: cleanup
|
||||
spec:
|
||||
{{- with $root.Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ $sa }}
|
||||
restartPolicy: Never
|
||||
# The dispatcher never touches the host; it is a plain API client. Lock the
|
||||
# pod down so a compromise cannot escalate beyond its (minimal) API rights.
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
runAsGroup: 65532
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
{{- with $root.Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with $root.Values.priorityClassName }}
|
||||
priorityClassName: {{ . | quote }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: dispatcher
|
||||
image: {{ include "kata-deploy.dispatcherImage" $root }}
|
||||
imagePullPolicy: {{ $root.Values.imagePullPolicy }}
|
||||
securityContext:
|
||||
privileged: false
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
command:
|
||||
- /usr/bin/kata-deploy-job-dispatcher
|
||||
- "--job-template=/etc/kata-job/cleanup-job.yaml"
|
||||
- "--name-prefix={{ $base }}-cleanup"
|
||||
- "--owner-job-name={{ $dispatcherName }}"
|
||||
- "--parallelism={{ $root.Values.job.parallelism }}"
|
||||
{{- if $cNodes }}
|
||||
- "--nodes={{ join "," $cNodes }}"
|
||||
{{- else if $cSelector }}
|
||||
- "--node-selector={{ $cSelector }}"
|
||||
{{- end }}
|
||||
env:
|
||||
- name: POD_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
volumeMounts:
|
||||
- name: job-templates
|
||||
mountPath: /etc/kata-job
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: job-templates
|
||||
configMap:
|
||||
name: {{ printf "%s-job-templates" $base | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
Reference in New Issue
Block a user