kata-deploy: make verification Job aware of job deployment mode

The verification Job assumed the DaemonSet model: it waited for the
DaemonSet to exist, for its pods, and for `rollout status daemonset/...`,
then required every node in the cluster to be labeled. None of that holds
for deploymentMode: job, where install happens via the dispatcher and the
per-node Jobs it fans out, and only the targeted (worker) nodes get
labeled.

Make the hook mode-aware:
  - Hook weight: in job mode the install dispatcher runs as a
    post-install hook at weight 5, so verification now runs at weight 10
    (after it); daemonset mode keeps weight 0 (the DaemonSet is a normal
    resource).
  - Readiness wait: in job mode, wait for the install dispatcher Job to
    complete and then for the per-node install Jobs
    (kata-deploy/stage=install) to finish (with the same CRI-restart
    retry logic) instead of a DaemonSet rollout.
  - Label check: in job mode, verify exactly the nodes the dispatcher
    targeted are labeled, rather than comparing the labeled count against
    all nodes in the cluster.
  - Grant the verification ClusterRole read access to batch/jobs (used by
    the job-mode waits; harmless in daemonset mode).

The daemonset code path is unchanged and the default render (no
verification.pod) is byte-for-byte identical.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Assisted-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Fabiano Fidêncio
2026-06-03 22:07:31 +02:00
committed by Fabiano Fidêncio
parent 3d732986d2
commit c23fe11529

View File

@@ -6,6 +6,10 @@ Verification Job - runs after kata-deploy installation to validate Kata is worki
Only created when verification.pod is provided.
*/ -}}
{{- if .Values.verification.pod }}
{{- $isJob := eq (.Values.deploymentMode | default "daemonset") "job" }}
{{- $base := .Chart.Name }}
{{- if .Values.env.multiInstallSuffix }}{{- $base = printf "%s-%s" .Chart.Name .Values.env.multiInstallSuffix }}{{- end }}
{{- $installDispatcher := printf "%s-install-dispatcher" $base | trunc 63 | trimSuffix "-" }}
apiVersion: v1
kind: ConfigMap
metadata:
@@ -27,7 +31,10 @@ metadata:
app.kubernetes.io/component: verification
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "0"
# In job mode the per-node install Jobs are post-install hooks at weight 5;
# verification must run after them, so use a higher weight. In daemonset
# mode the DaemonSet is a normal resource (created before any hook), so 0 is fine.
"helm.sh/hook-weight": {{ if $isJob }}"10"{{ else }}"0"{{ end }}
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
spec:
backoffLimit: 3
@@ -57,6 +64,33 @@ spec:
echo "Timeout: ${TIMEOUT}s"
echo ""
{{- if $isJob }}
# job mode: there is no DaemonSet. Helm has already waited for the
# install dispatcher hook (this verification hook runs at a higher
# weight); re-check it defensively here. The dispatcher only reports
# success once every per-node install Job has succeeded.
DISPATCHER="{{ $installDispatcher }}"
INSTALL_TIMEOUT="{{ .Values.verification.daemonsetTimeout }}"
echo "Waiting for install dispatcher Job ${DISPATCHER} to complete (timeout ${INSTALL_TIMEOUT}s)..."
# kata-deploy restarts the CRI runtime during the cri stage, which can
# cause transient API server unavailability. Retry the wait to handle this.
wait_retries=5
wait_retry_delay=15
for wait_attempt in $(seq 1 ${wait_retries}); do
if kubectl wait --for=condition=complete "job/${DISPATCHER}" -n {{ .Release.Namespace }} --timeout="${INSTALL_TIMEOUT}s" 2>&1; then
break
fi
if [[ ${wait_attempt} -eq ${wait_retries} ]]; then
echo "ERROR: install dispatcher ${DISPATCHER} did not complete after ${wait_retries} attempts"
kubectl get job "${DISPATCHER}" -n {{ .Release.Namespace }} || true
kubectl logs -n {{ .Release.Namespace }} "job/${DISPATCHER}" --tail=50 || true
kubectl get jobs -n {{ .Release.Namespace }} -l kata-deploy/stage=install || true
exit 1
fi
echo "API server may be restarting (attempt ${wait_attempt}/${wait_retries}), retrying in ${wait_retry_delay}s..."
sleep ${wait_retry_delay}
done
{{- else }}
# First, wait for kata-deploy DaemonSet to exist (it's created by Helm, not a hook)
echo "Waiting for kata-deploy DaemonSet to be created..."
{{- if .Values.env.multiInstallSuffix }}
@@ -128,6 +162,7 @@ spec:
echo "API server may be restarting (attempt ${rollout_attempt}/${rollout_retries}), retrying in ${rollout_retry_delay}s..."
sleep ${rollout_retry_delay}
done
{{- end }}
# Wait for nodes to be labeled with katacontainers.io/kata-runtime=true
# This label is set by kata-deploy when installation is complete
@@ -137,6 +172,35 @@ spec:
max_wait=60
echo "Node label timeout: ${max_wait}s"
elapsed=0
{{- if $isJob }}
# job mode: only the targeted nodes get labeled. The dispatcher
# created one per-node install Job per targeted node (label
# kata-deploy/stage=install); use that count as the expected
# coverage rather than comparing against all nodes.
expected_count=$(kubectl get jobs -n {{ .Release.Namespace }} -l kata-deploy/stage=install --no-headers 2>/dev/null | wc -l)
echo "Expected ${expected_count} node(s) to be labeled (one per per-node install Job)"
while true; do
labeled_nodes=$(kubectl get nodes -l katacontainers.io/kata-runtime=true --no-headers 2>/dev/null | wc -l)
if [[ ${expected_count} -gt 0 ]] && [[ ${labeled_nodes} -ge ${expected_count} ]]; then
echo "All ${expected_count} targeted node(s) labeled with kata-runtime=true"
kubectl get nodes -L katacontainers.io/kata-runtime || true
break
fi
if [[ ${elapsed} -ge ${max_wait} ]]; then
echo "ERROR: Timeout waiting for nodes to be labeled after ${max_wait}s"
echo "Labeled nodes: ${labeled_nodes}/${expected_count}"
echo "Node labels:"
kubectl get nodes -L katacontainers.io/kata-runtime || true
exit 1
fi
echo "Labeled nodes: ${labeled_nodes}/${expected_count} (${elapsed}s/${max_wait}s)"
sleep 5
elapsed=$((elapsed + 5))
done
{{- else }}
while true; do
labeled_nodes=$(kubectl get nodes -l katacontainers.io/kata-runtime=true --no-headers 2>/dev/null | wc -l)
total_nodes=$(kubectl get nodes --no-headers 2>/dev/null | wc -l)
@@ -159,6 +223,7 @@ spec:
sleep 5
elapsed=$((elapsed + 5))
done
{{- end }}
# Give kubelet time to pick up the new runtime configuration after containerd restart
echo ""
@@ -315,6 +380,9 @@ rules:
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding