kata-deploy: helm: Add optional post-install verification

Add optional verification that runs after kata-deploy installation.
When a pod spec is provided via --set-file verification.pod=<file>,
a verification job runs after install/upgrade to validate deployment.

The user is fully responsible for the verification pod content:
- Pod name, runtimeClassName, annotations, and verification logic
- Pod must exit 0 on success, non-zero on failure

The verification job simply:
1. Waits for kata-deploy DaemonSet to be ready
2. Applies the user-provided pod spec
3. Waits for the pod to complete
4. Shows logs and cleans up

Usage:
  helm install kata-deploy ... \
    --set-file verification.pod=/path/to/your-pod.yaml

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
Fabiano Fidêncio
2026-01-14 21:08:51 +01:00
parent c0cca81993
commit a188f04d75
3 changed files with 231 additions and 0 deletions

View File

@@ -3,6 +3,46 @@
# SPDX-License-Identifier: Apache-2.0
#
{{/*
Expand the name of the chart.
*/}}
{{- define "kata-deploy.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
*/}}
{{- define "kata-deploy.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "kata-deploy.labels" -}}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
{{ include "kata-deploy.selectorLabels" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "kata-deploy.selectorLabels" -}}
app.kubernetes.io/name: {{ include "kata-deploy.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Set the correct containerd conf path depending on the k8s distribution
*/}}

View File

@@ -0,0 +1,161 @@
{{- /*
Copyright (c) 2026 The Kata Containers Authors
SPDX-License-Identifier: Apache-2.0
Verification Job - runs after kata-deploy installation to validate Kata is working.
Only created when verification.pod is provided.
*/ -}}
{{- if .Values.verification.pod }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "kata-deploy.fullname" . }}-verification-spec
namespace: {{ .Release.Namespace }}
labels:
{{- include "kata-deploy.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "-5"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
data:
pod-spec.yaml: |
{{- .Values.verification.pod | nindent 4 }}
---
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "kata-deploy.fullname" . }}-verify
namespace: {{ .Release.Namespace }}
labels:
{{- include "kata-deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: verification
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "0"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded,hook-failed
spec:
backoffLimit: 3
ttlSecondsAfterFinished: 3600
template:
metadata:
labels:
{{- include "kata-deploy.selectorLabels" . | nindent 8 }}
app.kubernetes.io/component: verification
spec:
restartPolicy: Never
serviceAccountName: {{ include "kata-deploy.fullname" . }}-verification
containers:
- name: verify
image: quay.io/kata-containers/kubectl:latest
command:
- bash
- -c
- |
set -e
VERIFY_NS="{{ .Values.verification.namespace }}"
TIMEOUT="{{ .Values.verification.timeout }}"
echo "=== Kata Deploy Verification ==="
echo "Namespace: ${VERIFY_NS}"
echo "Timeout: ${TIMEOUT}s"
echo ""
# Wait for kata-deploy DaemonSet to be ready
echo "Waiting for kata-deploy DaemonSet to be ready..."
{{- if .Values.env.multiInstallSuffix }}
kubectl rollout status daemonset/{{ .Chart.Name }}-{{ .Values.env.multiInstallSuffix }} -n {{ .Release.Namespace }} --timeout=600s
{{- else }}
kubectl rollout status daemonset/{{ .Chart.Name }} -n {{ .Release.Namespace }} --timeout=600s
{{- end }}
echo ""
echo "Creating verification pod..."
POD_RESOURCE=$(kubectl apply -n "${VERIFY_NS}" -f /config/pod-spec.yaml -o name)
POD_NAME="${POD_RESOURCE#pod/}"
echo "Created: ${POD_NAME}"
# Ensure cleanup runs on any exit (success, failure, or signal)
cleanup() {
echo ""
echo "Cleaning up verification pod..."
kubectl delete pod "${POD_NAME}" -n "${VERIFY_NS}" --ignore-not-found --wait=false
}
trap cleanup EXIT
echo ""
echo "Waiting for verification pod to complete..."
if kubectl wait pod "${POD_NAME}" -n "${VERIFY_NS}" --for=jsonpath='{.status.phase}'=Succeeded --timeout="${TIMEOUT}s"; then
echo ""
echo "=== Verification Pod Logs ==="
kubectl logs "${POD_NAME}" -n "${VERIFY_NS}" || true
echo ""
echo "SUCCESS: Verification passed"
exit 0
else
echo ""
echo "=== Verification Failed ==="
echo "Pod status:"
kubectl describe pod "${POD_NAME}" -n "${VERIFY_NS}" || true
echo ""
echo "Pod logs:"
kubectl logs "${POD_NAME}" -n "${VERIFY_NS}" || true
exit 1
fi
volumeMounts:
- name: pod-spec
mountPath: /config
volumes:
- name: pod-spec
configMap:
name: {{ include "kata-deploy.fullname" . }}-verification-spec
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "kata-deploy.fullname" . }}-verification
namespace: {{ .Release.Namespace }}
labels:
{{- include "kata-deploy.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "-10"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded,hook-failed
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "kata-deploy.fullname" . }}-verification
labels:
{{- include "kata-deploy.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "-10"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded,hook-failed
rules:
- apiGroups: [""]
resources: ["pods", "pods/log"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "kata-deploy.fullname" . }}-verification
labels:
{{- include "kata-deploy.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "-10"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded,hook-failed
subjects:
- kind: ServiceAccount
name: {{ include "kata-deploy.fullname" . }}-verification
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: {{ include "kata-deploy.fullname" . }}-verification
apiGroup: rbac.authorization.k8s.io
{{- end }}

View File

@@ -273,3 +273,33 @@ env:
# deployment, use nodeSelector manually.
node-feature-discovery:
enabled: false
# Verification
# Post-install verification to validate Kata Containers is working correctly.
# When a pod spec is provided, runs a verification pod after installation.
#
# Provide your own pod YAML that validates your specific deployment requirements.
#
verification:
# Namespace where verification pod will be created
namespace: default
# Timeout for verification pod to complete (seconds)
timeout: 180
# Pod spec for verification (optional)
# If provided, a verification job will run after install/upgrade.
# If empty, no verification is performed.
#
# Provide your own pod YAML that validates your deployment works correctly.
# The pod must:
# - Have metadata.name set
# - Include the correct runtimeClassName (e.g., kata-qemu, kata-qemu-snp)
# - Include any annotations needed (e.g., cc_init_data for CoCo)
# - Exit 0 on success, non-zero on failure
#
# Usage:
# helm install kata-deploy ... \
# --set-file verification.pod=/path/to/your-verification-pod.yaml
#
pod: ""