mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-06-30 22:21:05 +00:00
kata-deploy: nvidia: Default to the Job-based deployment mode
Switch the NVIDIA GPU example values file to install Kata via the
Job-based deployment mode (deploymentMode: job) instead of the
always-on, privileged DaemonSet, so that nothing keeps running on the
node once the install completes.
To exercise this in our CI, make the helm_helper aware of the deployment
mode coming from the (base) values file:
- In "job" mode, clear job.nodeSelectorExpressions so the dispatcher
targets every discovered node. Our CI clusters are typically
single-node, where the only node carries the control-plane label,
and the default selector excludes control-plane/master nodes.
- There is no always-on DaemonSet to wait on in "job" mode. The
dispatcher runs as a blocking post-install hook and the final
per-node stage labels the node, so wait until at least one node
carries the katacontainers.io/kata-runtime label as the
"install complete" signal (dumping Job/pod logs on timeout).
Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Assisted-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -299,7 +299,9 @@ Includes:
|
||||
|
||||
### [`try-kata-nvidia-gpu.values.yaml`](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-nvidia-gpu.values.yaml)
|
||||
|
||||
This file enables only the NVIDIA GPU-enabled shims:
|
||||
This file enables only the NVIDIA GPU-enabled shims and installs them using the
|
||||
[`job` deployment mode](#deployment-modes-daemonset-vs-job) (no always-on
|
||||
DaemonSet on the node):
|
||||
|
||||
```sh
|
||||
helm install kata-deploy oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy \
|
||||
|
||||
@@ -745,6 +745,19 @@ function helm_helper() {
|
||||
yq -i ".job.dispatcherImage.reference = \"${dispatcher_reference}\"" "${values_yaml}"
|
||||
yq -i ".job.dispatcherImage.tag = \"${HELM_IMAGE_TAG}\"" "${values_yaml}"
|
||||
|
||||
# Resolve the deployment mode coming from the (base) values file so the
|
||||
# post-install wait below knows whether to expect a DaemonSet or per-node Jobs.
|
||||
local deployment_mode
|
||||
deployment_mode="$(yq -r '.deploymentMode // "daemonset"' "${values_yaml}")"
|
||||
|
||||
# In "job" mode, the dispatcher's default node selector targets only worker
|
||||
# (non-control-plane) nodes. Our CI clusters are typically single-node, where
|
||||
# the only node carries the control-plane label, so clear the role filter to
|
||||
# target every discovered node (matching the documented single-node/CI setup).
|
||||
if [[ "${deployment_mode}" == "job" ]]; then
|
||||
yq -i ".job.nodeSelectorExpressions = []" "${values_yaml}"
|
||||
fi
|
||||
|
||||
[[ -n "${HELM_K8S_DISTRIBUTION}" ]] && yq -i ".k8sDistribution = \"${HELM_K8S_DISTRIBUTION}\"" "${values_yaml}"
|
||||
|
||||
if [[ "${HELM_DEFAULT_INSTALLATION}" = "false" ]]; then
|
||||
@@ -1084,50 +1097,81 @@ VERIFICATION_POD_EOF
|
||||
return 1
|
||||
fi
|
||||
|
||||
# helm --wait is ineffective for single-node clusters with maxUnavailable=1
|
||||
# (the DaemonSet is considered ready with 0 ready pods). First wait until at
|
||||
# least one kata-deploy pod exists, then wait on the pod readiness condition
|
||||
# instead — the readiness probe (/readyz) returns 200 only after install
|
||||
# completes (artifacts extracted, CRI restarted, node labeled).
|
||||
local pod_label_name="kata-deploy"
|
||||
local multi_install_suffix=""
|
||||
multi_install_suffix="$(yq -r '.env.multiInstallSuffix // ""' "${values_yaml}")"
|
||||
if [[ -n "${multi_install_suffix}" ]]; then
|
||||
pod_label_name="${pod_label_name}-${multi_install_suffix}"
|
||||
fi
|
||||
if [[ "${deployment_mode}" == "job" ]]; then
|
||||
# In "job" mode there is no always-on DaemonSet: the dispatcher runs as a
|
||||
# blocking post-install hook and fans out one per-node install Job, so by
|
||||
# the time `helm upgrade --install` returns the install pipeline has run.
|
||||
# The final stage labels the node, so wait until at least one node carries
|
||||
# the kata-runtime label as the "install complete" signal.
|
||||
echo "deploymentMode=job: waiting for per-node install Jobs to label the node(s)"
|
||||
local label_wait_deadline=$((SECONDS + KATA_DEPLOY_WAIT_TIMEOUT))
|
||||
while true; do
|
||||
if [[ -n "$(kubectl get nodes -l katacontainers.io/kata-runtime=true -o name 2>/dev/null)" ]]; then
|
||||
break
|
||||
fi
|
||||
if (( SECONDS >= label_wait_deadline )); then
|
||||
echo "ERROR: Timed out waiting for kata-deploy install Jobs to label any node"
|
||||
echo "::group::kata-deploy job-mode status (no node labeled)"
|
||||
kubectl -n kube-system get jobs -l app.kubernetes.io/name=kata-deploy -o wide || true
|
||||
kubectl -n kube-system get pods -l app.kubernetes.io/name=kata-deploy -o wide || true
|
||||
kubectl -n kube-system describe jobs -l app.kubernetes.io/name=kata-deploy || true
|
||||
kubectl -n kube-system logs -l app.kubernetes.io/name=kata-deploy --all-containers --tail=-1 --timestamps 2>/dev/null || true
|
||||
echo "::endgroup::"
|
||||
return 1
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
local pod_wait_deadline=$((SECONDS + KATA_DEPLOY_WAIT_TIMEOUT))
|
||||
while true; do
|
||||
if [[ -n "$(kubectl -n kube-system get pod -l "name=${pod_label_name}" -o name 2>/dev/null)" ]]; then
|
||||
break
|
||||
echo "::group::kata-deploy job-mode logs (current)"
|
||||
kubectl_retry -n kube-system get jobs -l app.kubernetes.io/name=kata-deploy -o wide || true
|
||||
kubectl_retry -n kube-system logs -l app.kubernetes.io/name=kata-deploy --all-containers --tail=-1 --timestamps 2>/dev/null || true
|
||||
echo "::endgroup::"
|
||||
else
|
||||
# helm --wait is ineffective for single-node clusters with maxUnavailable=1
|
||||
# (the DaemonSet is considered ready with 0 ready pods). First wait until at
|
||||
# least one kata-deploy pod exists, then wait on the pod readiness condition
|
||||
# instead — the readiness probe (/readyz) returns 200 only after install
|
||||
# completes (artifacts extracted, CRI restarted, node labeled).
|
||||
local pod_label_name="kata-deploy"
|
||||
local multi_install_suffix=""
|
||||
multi_install_suffix="$(yq -r '.env.multiInstallSuffix // ""' "${values_yaml}")"
|
||||
if [[ -n "${multi_install_suffix}" ]]; then
|
||||
pod_label_name="${pod_label_name}-${multi_install_suffix}"
|
||||
fi
|
||||
if (( SECONDS >= pod_wait_deadline )); then
|
||||
echo "ERROR: Timed out waiting for kata-deploy pod to be created"
|
||||
echo "::group::kata-deploy daemonset status (no pod created)"
|
||||
kubectl -n kube-system get ds -l "name=${pod_label_name}" -o wide || true
|
||||
kubectl -n kube-system describe ds -l "name=${pod_label_name}" || true
|
||||
|
||||
local pod_wait_deadline=$((SECONDS + KATA_DEPLOY_WAIT_TIMEOUT))
|
||||
while true; do
|
||||
if [[ -n "$(kubectl -n kube-system get pod -l "name=${pod_label_name}" -o name 2>/dev/null)" ]]; then
|
||||
break
|
||||
fi
|
||||
if (( SECONDS >= pod_wait_deadline )); then
|
||||
echo "ERROR: Timed out waiting for kata-deploy pod to be created"
|
||||
echo "::group::kata-deploy daemonset status (no pod created)"
|
||||
kubectl -n kube-system get ds -l "name=${pod_label_name}" -o wide || true
|
||||
kubectl -n kube-system describe ds -l "name=${pod_label_name}" || true
|
||||
echo "::endgroup::"
|
||||
return 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if ! kubectl -n kube-system wait pod -l "name=${pod_label_name}" --for=condition=Ready --timeout="${KATA_DEPLOY_WAIT_TIMEOUT}s"; then
|
||||
echo "::group::kata-deploy pod describe (install timed out)"
|
||||
kubectl -n kube-system describe pod -l "name=${pod_label_name}" || true
|
||||
echo "::endgroup::"
|
||||
echo "::group::kata-deploy logs (install timed out)"
|
||||
kubectl -n kube-system logs -l "name=${pod_label_name}" --all-containers --previous --tail=-1 --timestamps 2>/dev/null || true
|
||||
kubectl -n kube-system logs -l "name=${pod_label_name}" --all-containers --tail=-1 --timestamps 2>/dev/null || true
|
||||
echo "::endgroup::"
|
||||
return 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if ! kubectl -n kube-system wait pod -l "name=${pod_label_name}" --for=condition=Ready --timeout="${KATA_DEPLOY_WAIT_TIMEOUT}s"; then
|
||||
echo "::group::kata-deploy pod describe (install timed out)"
|
||||
kubectl -n kube-system describe pod -l "name=${pod_label_name}" || true
|
||||
echo "::endgroup::"
|
||||
echo "::group::kata-deploy logs (install timed out)"
|
||||
kubectl -n kube-system logs -l "name=${pod_label_name}" --all-containers --previous --tail=-1 --timestamps 2>/dev/null || true
|
||||
kubectl -n kube-system logs -l "name=${pod_label_name}" --all-containers --tail=-1 --timestamps 2>/dev/null || true
|
||||
echo "::endgroup::"
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "::group::kata-deploy logs (current)"
|
||||
kubectl_retry -n kube-system logs -l "name=${pod_label_name}" --all-containers --tail=-1 --timestamps || true
|
||||
echo "::endgroup::"
|
||||
echo "::group::kata-deploy logs (previous)"
|
||||
kubectl_retry -n kube-system logs -l "name=${pod_label_name}" --all-containers --previous --tail=-1 --timestamps 2>/dev/null || true
|
||||
echo "::endgroup::"
|
||||
echo "::group::kata-deploy logs (current)"
|
||||
kubectl_retry -n kube-system logs -l "name=${pod_label_name}" --all-containers --tail=-1 --timestamps || true
|
||||
echo "::endgroup::"
|
||||
echo "::group::kata-deploy logs (previous)"
|
||||
kubectl_retry -n kube-system logs -l "name=${pod_label_name}" --all-containers --previous --tail=-1 --timestamps 2>/dev/null || true
|
||||
echo "::endgroup::"
|
||||
fi
|
||||
|
||||
echo "::group::Runtime classes"
|
||||
kubectl_retry get runtimeclass
|
||||
|
||||
@@ -8,6 +8,11 @@
|
||||
|
||||
debug: false
|
||||
|
||||
# Install Kata via short-lived per-node Jobs instead of an always-on DaemonSet.
|
||||
# A tiny dispatcher Job fans out one install Job per selected node and exits, so
|
||||
# nothing keeps running on the node once the install completes.
|
||||
deploymentMode: job
|
||||
|
||||
snapshotter:
|
||||
setup: ["nydus"]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user