Merge pull request #13222 from fidencio/topic/nvidia-switch-to-kata-deploy-jobs

kata-deploy: nvidia: Default to the Job-based deployment mode
This commit is contained in:
Fabiano Fidêncio
2026-06-22 12:55:10 +02:00
committed by GitHub
3 changed files with 90 additions and 39 deletions

View File

@@ -299,7 +299,9 @@ Includes:
### [`try-kata-nvidia-gpu.values.yaml`](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-nvidia-gpu.values.yaml)
This file enables only the NVIDIA GPU-enabled shims:
This file enables only the NVIDIA GPU-enabled shims and installs them using the
[`job` deployment mode](#deployment-modes-daemonset-vs-job) (no always-on
DaemonSet on the node):
```sh
helm install kata-deploy oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy \

View File

@@ -746,6 +746,19 @@ function helm_helper() {
yq -i ".job.dispatcherImage.reference = \"${dispatcher_reference}\"" "${values_yaml}"
yq -i ".job.dispatcherImage.tag = \"${HELM_IMAGE_TAG}\"" "${values_yaml}"
# Resolve the deployment mode coming from the (base) values file so the
# post-install wait below knows whether to expect a DaemonSet or per-node Jobs.
local deployment_mode
deployment_mode="$(yq -r '.deploymentMode // "daemonset"' "${values_yaml}")"
# In "job" mode, the dispatcher's default node selector targets only worker
# (non-control-plane) nodes. Our CI clusters are typically single-node, where
# the only node carries the control-plane label, so clear the role filter to
# target every discovered node (matching the documented single-node/CI setup).
if [[ "${deployment_mode}" == "job" ]]; then
yq -i ".job.nodeSelectorExpressions = []" "${values_yaml}"
fi
[[ -n "${HELM_K8S_DISTRIBUTION}" ]] && yq -i ".k8sDistribution = \"${HELM_K8S_DISTRIBUTION}\"" "${values_yaml}"
if [[ "${HELM_DEFAULT_INSTALLATION}" = "false" ]]; then
@@ -1105,50 +1118,81 @@ VERIFICATION_POD_EOF
return 1
fi
# helm --wait is ineffective for single-node clusters with maxUnavailable=1
# (the DaemonSet is considered ready with 0 ready pods). First wait until at
# least one kata-deploy pod exists, then wait on the pod readiness condition
# instead — the readiness probe (/readyz) returns 200 only after install
# completes (artifacts extracted, CRI restarted, node labeled).
local pod_label_name="kata-deploy"
local multi_install_suffix=""
multi_install_suffix="$(yq -r '.env.multiInstallSuffix // ""' "${values_yaml}")"
if [[ -n "${multi_install_suffix}" ]]; then
pod_label_name="${pod_label_name}-${multi_install_suffix}"
fi
if [[ "${deployment_mode}" == "job" ]]; then
# In "job" mode there is no always-on DaemonSet: the dispatcher runs as a
# blocking post-install hook and fans out one per-node install Job, so by
# the time `helm upgrade --install` returns the install pipeline has run.
# The final stage labels the node, so wait until at least one node carries
# the kata-runtime label as the "install complete" signal.
echo "deploymentMode=job: waiting for per-node install Jobs to label the node(s)"
local label_wait_deadline=$((SECONDS + KATA_DEPLOY_WAIT_TIMEOUT))
while true; do
if [[ -n "$(kubectl get nodes -l katacontainers.io/kata-runtime=true -o name 2>/dev/null)" ]]; then
break
fi
if (( SECONDS >= label_wait_deadline )); then
echo "ERROR: Timed out waiting for kata-deploy install Jobs to label any node"
echo "::group::kata-deploy job-mode status (no node labeled)"
kubectl -n kube-system get jobs -l app.kubernetes.io/name=kata-deploy -o wide || true
kubectl -n kube-system get pods -l app.kubernetes.io/name=kata-deploy -o wide || true
kubectl -n kube-system describe jobs -l app.kubernetes.io/name=kata-deploy || true
kubectl -n kube-system logs -l app.kubernetes.io/name=kata-deploy --all-containers --tail=-1 --timestamps 2>/dev/null || true
echo "::endgroup::"
return 1
fi
sleep 5
done
local pod_wait_deadline=$((SECONDS + KATA_DEPLOY_WAIT_TIMEOUT))
while true; do
if [[ -n "$(kubectl -n kube-system get pod -l "name=${pod_label_name}" -o name 2>/dev/null)" ]]; then
break
echo "::group::kata-deploy job-mode logs (current)"
kubectl_retry -n kube-system get jobs -l app.kubernetes.io/name=kata-deploy -o wide || true
kubectl_retry -n kube-system logs -l app.kubernetes.io/name=kata-deploy --all-containers --tail=-1 --timestamps 2>/dev/null || true
echo "::endgroup::"
else
# helm --wait is ineffective for single-node clusters with maxUnavailable=1
# (the DaemonSet is considered ready with 0 ready pods). First wait until at
# least one kata-deploy pod exists, then wait on the pod readiness condition
# instead — the readiness probe (/readyz) returns 200 only after install
# completes (artifacts extracted, CRI restarted, node labeled).
local pod_label_name="kata-deploy"
local multi_install_suffix=""
multi_install_suffix="$(yq -r '.env.multiInstallSuffix // ""' "${values_yaml}")"
if [[ -n "${multi_install_suffix}" ]]; then
pod_label_name="${pod_label_name}-${multi_install_suffix}"
fi
if (( SECONDS >= pod_wait_deadline )); then
echo "ERROR: Timed out waiting for kata-deploy pod to be created"
echo "::group::kata-deploy daemonset status (no pod created)"
kubectl -n kube-system get ds -l "name=${pod_label_name}" -o wide || true
kubectl -n kube-system describe ds -l "name=${pod_label_name}" || true
local pod_wait_deadline=$((SECONDS + KATA_DEPLOY_WAIT_TIMEOUT))
while true; do
if [[ -n "$(kubectl -n kube-system get pod -l "name=${pod_label_name}" -o name 2>/dev/null)" ]]; then
break
fi
if (( SECONDS >= pod_wait_deadline )); then
echo "ERROR: Timed out waiting for kata-deploy pod to be created"
echo "::group::kata-deploy daemonset status (no pod created)"
kubectl -n kube-system get ds -l "name=${pod_label_name}" -o wide || true
kubectl -n kube-system describe ds -l "name=${pod_label_name}" || true
echo "::endgroup::"
return 1
fi
sleep 1
done
if ! kubectl -n kube-system wait pod -l "name=${pod_label_name}" --for=condition=Ready --timeout="${KATA_DEPLOY_WAIT_TIMEOUT}s"; then
echo "::group::kata-deploy pod describe (install timed out)"
kubectl -n kube-system describe pod -l "name=${pod_label_name}" || true
echo "::endgroup::"
echo "::group::kata-deploy logs (install timed out)"
kubectl -n kube-system logs -l "name=${pod_label_name}" --all-containers --previous --tail=-1 --timestamps 2>/dev/null || true
kubectl -n kube-system logs -l "name=${pod_label_name}" --all-containers --tail=-1 --timestamps 2>/dev/null || true
echo "::endgroup::"
return 1
fi
sleep 1
done
if ! kubectl -n kube-system wait pod -l "name=${pod_label_name}" --for=condition=Ready --timeout="${KATA_DEPLOY_WAIT_TIMEOUT}s"; then
echo "::group::kata-deploy pod describe (install timed out)"
kubectl -n kube-system describe pod -l "name=${pod_label_name}" || true
echo "::endgroup::"
echo "::group::kata-deploy logs (install timed out)"
kubectl -n kube-system logs -l "name=${pod_label_name}" --all-containers --previous --tail=-1 --timestamps 2>/dev/null || true
kubectl -n kube-system logs -l "name=${pod_label_name}" --all-containers --tail=-1 --timestamps 2>/dev/null || true
echo "::endgroup::"
return 1
fi
echo "::group::kata-deploy logs (current)"
kubectl_retry -n kube-system logs -l "name=${pod_label_name}" --all-containers --tail=-1 --timestamps || true
echo "::endgroup::"
echo "::group::kata-deploy logs (previous)"
kubectl_retry -n kube-system logs -l "name=${pod_label_name}" --all-containers --previous --tail=-1 --timestamps 2>/dev/null || true
echo "::endgroup::"
echo "::group::kata-deploy logs (current)"
kubectl_retry -n kube-system logs -l "name=${pod_label_name}" --all-containers --tail=-1 --timestamps || true
echo "::endgroup::"
echo "::group::kata-deploy logs (previous)"
kubectl_retry -n kube-system logs -l "name=${pod_label_name}" --all-containers --previous --tail=-1 --timestamps 2>/dev/null || true
echo "::endgroup::"
fi
echo "::group::Runtime classes"
kubectl_retry get runtimeclass

View File

@@ -8,6 +8,11 @@
debug: false
# Install Kata via short-lived per-node Jobs instead of an always-on DaemonSet.
# A tiny dispatcher Job fans out one install Job per selected node and exits, so
# nothing keeps running on the node once the install completes.
deploymentMode: job
snapshotter:
setup: ["nydus"]