diff --git a/tests/functional/kata-deploy/lib/helm-deploy.bash b/tests/functional/kata-deploy/lib/helm-deploy.bash index 463bd1648b..73d15dae94 100644 --- a/tests/functional/kata-deploy/lib/helm-deploy.bash +++ b/tests/functional/kata-deploy/lib/helm-deploy.bash @@ -31,11 +31,24 @@ generate_base_values() { local output_file="$1" local extra_values_file="${2:-}" + local kata_deploy_image="${DOCKER_REGISTRY}/${DOCKER_REPO}" + local dispatcher_image + if [[ "${kata_deploy_image}" == *-ci ]]; then + dispatcher_image="${kata_deploy_image%-ci}-job-dispatcher-ci" + else + dispatcher_image="${kata_deploy_image}-job-dispatcher" + fi + cat > "${output_file}" <-job-dispatcher", with the "-ci" suffix (if + # any) kept at the very end (e.g. kata-deploy-ci -> kata-deploy-job-dispatcher-ci). + local dispatcher_reference + if [[ "${HELM_IMAGE_REFERENCE}" == *-ci ]]; then + dispatcher_reference="${HELM_IMAGE_REFERENCE%-ci}-job-dispatcher-ci" + else + dispatcher_reference="${HELM_IMAGE_REFERENCE}-job-dispatcher" + fi + yq -i ".job.dispatcherImage.reference = \"${dispatcher_reference}\"" "${values_yaml}" + yq -i ".job.dispatcherImage.tag = \"${HELM_IMAGE_TAG}\"" "${values_yaml}" + [[ -n "${HELM_K8S_DISTRIBUTION}" ]] && yq -i ".k8sDistribution = \"${HELM_K8S_DISTRIBUTION}\"" "${values_yaml}" if [[ "${HELM_DEFAULT_INSTALLATION}" = "false" ]]; then diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/_helpers.tpl b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/_helpers.tpl index dae7c0ca32..457cb00ab6 100644 --- a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/_helpers.tpl +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/_helpers.tpl @@ -391,6 +391,21 @@ reference:tag (tag defaults to Chart.AppVersion). {{- end -}} {{- end -}} +{{/* +Dispatcher image reference for the job-mode dispatcher (kata-deploy-job-dispatcher). +Supports tag (reference:tag) and digest (reference@sha256:...) formats; tag +defaults to Chart.AppVersion. +*/}} +{{- define "kata-deploy.dispatcherImage" -}} +{{- $ref := .Values.job.dispatcherImage.reference -}} +{{- $tag := default .Chart.AppVersion .Values.job.dispatcherImage.tag | toString -}} +{{- if contains "@" $ref -}} +{{- $ref -}} +{{- else -}} +{{- printf "%s:%s" $ref $tag -}} +{{- end -}} +{{- end -}} + {{/* Get snapshotter setup list from structured config */}} @@ -592,6 +607,166 @@ e.g. `{{- include "kata-deploy.commonEnv" . | nindent 8 }}`. {{- end }} {{- end -}} +{{/* +Build a Kubernetes label-selector STRING (the form accepted by the apiserver +and `kubectl --selector`) from an equality map plus a list of match-expression +requirements. This is handed to `kata-deploy-job-dispatcher --node-selector`, which +resolves the actual target nodes LIVE at run time (so node membership is never +frozen into the Helm release). + +Arguments (dict): + eq - equality label map -> "k=v" + exprs - list of {key, operator, values}: + Exists -> "key" + DoesNotExist -> "!key" + In -> "key in (v1,v2)" + NotIn -> "key notin (v1,v2)" + +Returns the comma-joined selector string (possibly empty, meaning "all nodes"). +*/}} +{{- define "kata-deploy.nodeLabelSelector" -}} +{{- $parts := list -}} +{{- range $k, $v := (.eq | default dict) -}} +{{- $parts = append $parts (printf "%s=%s" $k $v) -}} +{{- end -}} +{{- range $expr := (.exprs | default list) -}} +{{- $op := $expr.operator -}} +{{- if eq $op "Exists" -}} +{{- $parts = append $parts $expr.key -}} +{{- else if eq $op "DoesNotExist" -}} +{{- $parts = append $parts (printf "!%s" $expr.key) -}} +{{- else if eq $op "In" -}} +{{- $parts = append $parts (printf "%s in (%s)" $expr.key (join "," ($expr.values | default list))) -}} +{{- else if eq $op "NotIn" -}} +{{- $parts = append $parts (printf "%s notin (%s)" $expr.key (join "," ($expr.values | default list))) -}} +{{- else -}} +{{- fail (printf "nodeSelectorExpressions: unsupported operator %q for key %q (use In, NotIn, Exists, DoesNotExist)" $op $expr.key) -}} +{{- end -}} +{{- end -}} +{{- join "," $parts -}} +{{- end -}} + +{{/* +Per-node staged Job manifest (deploymentMode: job), embedded verbatim into the +job-templates ConfigMap. The dispatcher (kata-deploy-job-dispatcher) clones this once per +target node, injecting metadata.name + spec.template.spec.nodeName, so the +template itself carries NO node identity and NO Helm hook annotations. + +Arguments (dict): + root - top-level context (.) + stage - "install" | "cleanup" + +install pipeline: host-check -> artifacts -> cri (initContainers) ; label (main) +cleanup pipeline: unlabel -> revert-cri (initContainers) ; remove-artifacts (main) + +Emitted at column 0 (a standalone Job document); embed with `indent` at the call +site under a ConfigMap data key. +*/}} +{{- define "kata-deploy.perNodeJob" -}} +{{- $root := .root -}} +{{- $stage := .stage -}} +apiVersion: batch/v1 +kind: Job +metadata: + labels: + app.kubernetes.io/name: {{ include "kata-deploy.name" $root }} + app.kubernetes.io/instance: {{ $root.Release.Name }} + kata-deploy/stage: {{ $stage }} +spec: + backoffLimit: {{ $root.Values.job.backoffLimit }} + ttlSecondsAfterFinished: {{ $root.Values.job.ttlSecondsAfterFinished }} + template: + metadata: + labels: + app.kubernetes.io/name: {{ include "kata-deploy.name" $root }} + app.kubernetes.io/instance: {{ $root.Release.Name }} + kata-deploy/stage: {{ $stage }} + spec: +{{- with $root.Values.imagePullSecrets }} + imagePullSecrets: +{{- toYaml . | nindent 8 }} +{{- end }} + serviceAccountName: {{ include "kata-deploy.serviceAccountName" $root }} + restartPolicy: Never + hostPID: true +{{- with $root.Values.tolerations }} + tolerations: +{{- toYaml . | nindent 8 }} +{{- end }} +{{- with $root.Values.priorityClassName }} + priorityClassName: {{ . | quote }} +{{- end }} +{{- if eq $stage "install" }} + initContainers: +{{- include "kata-deploy.stageContainer" (dict "root" $root "name" "host-check" "action" "install-stage-host-check" "privileged" true "mountHost" true) | nindent 8 }} +{{- include "kata-deploy.stageContainer" (dict "root" $root "name" "artifacts" "action" "install-stage-artifacts" "privileged" true "mountHost" true) | nindent 8 }} +{{- include "kata-deploy.stageContainer" (dict "root" $root "name" "cri" "action" "install-stage-cri" "privileged" true "mountHost" true) | nindent 8 }} + containers: +{{- include "kata-deploy.stageContainer" (dict "root" $root "name" "label" "action" "install-stage-label" "privileged" false "mountHost" false) | nindent 8 }} +{{- else }} + initContainers: +{{- include "kata-deploy.stageContainer" (dict "root" $root "name" "unlabel" "action" "cleanup-stage-unlabel" "privileged" false "mountHost" false) | nindent 8 }} +{{- include "kata-deploy.stageContainer" (dict "root" $root "name" "revert-cri" "action" "cleanup-stage-revert-cri" "privileged" true "mountHost" true) | nindent 8 }} + containers: +{{- include "kata-deploy.stageContainer" (dict "root" $root "name" "remove-artifacts" "action" "cleanup-stage-remove-artifacts" "privileged" true "mountHost" true) | nindent 8 }} +{{- end }} + volumes: +{{- include "kata-deploy.commonVolumes" $root | nindent 8 }} +{{- end -}} + +{{/* +Service account name (honoring multiInstallSuffix), shared by all kata-deploy +workloads (DaemonSet and staged Jobs). +*/}} +{{- define "kata-deploy.serviceAccountName" -}} +{{- if .Values.env.multiInstallSuffix -}} +{{ .Chart.Name }}-sa-{{ .Values.env.multiInstallSuffix }} +{{- else -}} +{{ .Chart.Name }}-sa +{{- end -}} +{{- end -}} + +{{/* +ServiceAccount name for the job-mode dispatcher (kata-deploy-job-dispatcher). Separate from +kata-deploy.serviceAccountName: the dispatcher is a pure API client (list nodes, +manage Jobs) and must NOT carry the privileged kata-deploy host-mutation rights. +*/}} +{{- define "kata-deploy.dispatcherServiceAccountName" -}} +{{- if .Values.env.multiInstallSuffix -}} +{{ .Chart.Name }}-dispatcher-sa-{{ .Values.env.multiInstallSuffix }} +{{- else -}} +{{ .Chart.Name }}-dispatcher-sa +{{- end -}} +{{- end -}} + +{{/* +Render a single staged-pipeline container that runs one kata-deploy stage action. +Used by the per-node staged install/cleanup Jobs (deploymentMode: job). + +Arguments (dict): + root - the top-level context (.) + name - container name + action - kata-deploy subcommand (e.g. install-stage-cri) + privileged - bool, whether the container runs privileged (host nsenter/restart) + mountHost - bool, whether to mount the host paths (crio/containerd/host) + +Emitted at column 0; indent with `nindent` at the call site. +*/}} +{{- define "kata-deploy.stageContainer" -}} +- name: {{ .name }} + image: {{ include "kata-deploy.image" .root }} + imagePullPolicy: {{ .root.Values.imagePullPolicy }} + command: ["/usr/bin/kata-deploy", "{{ .action }}"] + env: +{{- include "kata-deploy.commonEnv" .root | nindent 4 }} + securityContext: + privileged: {{ .privileged }} +{{- if .mountHost }} + volumeMounts: +{{- include "kata-deploy.commonVolumeMounts" .root | nindent 4 }} +{{- end }} +{{- end -}} + {{/* Common volumeMounts for any pod that runs the kata-deploy binary against the host. Emitted at column 0; indent with `nindent` at the call site. diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy-install-job.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy-install-job.yaml new file mode 100644 index 0000000000..ff8e97f3fb --- /dev/null +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy-install-job.yaml @@ -0,0 +1,113 @@ +{{- /* +Install dispatcher (deploymentMode: job). + +A single, tiny post-install/post-upgrade hook Job that runs the dispatcher +(kata-deploy-job-dispatcher). The dispatcher enumerates the selected nodes LIVE, then +creates one node-pinned install Job per node from the job-templates ConfigMap, +keeping at most job.parallelism in flight and refilling as they finish. This +guarantees one install per node (coverage) with a paced rollout, while the Helm +release stays O(1) regardless of fleet size. + +Each per-node Job runs the staged pipeline as ordered initContainers and exits: + + host-check -> artifacts -> cri (initContainers, run sequentially) + label (main container) + +Helm waits only on THIS dispatcher Job (the verification hook runs at a higher +weight, after it). before-hook-creation lets `helm upgrade` re-run the dispatcher, +which re-enumerates nodes (idempotent stages skip already-installed nodes and +pick up newly added ones). +*/ -}} +{{- if eq (.Values.deploymentMode | default "daemonset") "job" }} +{{- $root := . }} +{{- $base := .Chart.Name }} +{{- if .Values.env.multiInstallSuffix }} +{{- $base = printf "%s-%s" .Chart.Name .Values.env.multiInstallSuffix }} +{{- end }} +{{- $sa := include "kata-deploy.dispatcherServiceAccountName" . }} +{{- $dispatcherName := printf "%s-install-dispatcher" $base | trunc 63 | trimSuffix "-" }} +{{- $nodes := .Values.job.nodes | default list }} +{{- $selector := include "kata-deploy.nodeLabelSelector" (dict "eq" (.Values.job.nodeSelector | default dict) "exprs" (.Values.job.nodeSelectorExpressions | default list)) }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ $dispatcherName }} + namespace: {{ $root.Release.Namespace }} + labels: + app.kubernetes.io/name: {{ include "kata-deploy.name" $root }} + app.kubernetes.io/instance: {{ $root.Release.Name }} + kata-deploy/dispatcher: install + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-weight": "5" + "helm.sh/hook-delete-policy": before-hook-creation +spec: + # The dispatcher does per-node retries (job.backoffLimit) itself; a dispatcher + # failure means "some node failed" and should surface, not be retried blindly. + backoffLimit: 0 + ttlSecondsAfterFinished: {{ $root.Values.job.ttlSecondsAfterFinished }} + template: + metadata: + labels: + app.kubernetes.io/name: {{ include "kata-deploy.name" $root }} + app.kubernetes.io/instance: {{ $root.Release.Name }} + kata-deploy/dispatcher: install + spec: +{{- with $root.Values.imagePullSecrets }} + imagePullSecrets: +{{- toYaml . | nindent 8 }} +{{- end }} + serviceAccountName: {{ $sa }} + restartPolicy: Never + # The dispatcher never touches the host; it is a plain API client. Lock the + # pod down so a compromise cannot escalate beyond its (minimal) API rights. + securityContext: + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + seccompProfile: + type: RuntimeDefault +{{- with $root.Values.tolerations }} + tolerations: +{{- toYaml . | nindent 8 }} +{{- end }} +{{- with $root.Values.priorityClassName }} + priorityClassName: {{ . | quote }} +{{- end }} + containers: + - name: dispatcher + image: {{ include "kata-deploy.dispatcherImage" $root }} + imagePullPolicy: {{ $root.Values.imagePullPolicy }} + securityContext: + privileged: false + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + command: + - /usr/bin/kata-deploy-job-dispatcher + - "--job-template=/etc/kata-job/install-job.yaml" + - "--name-prefix={{ $base }}-install" + - "--owner-job-name={{ $dispatcherName }}" + - "--parallelism={{ $root.Values.job.parallelism }}" +{{- if $nodes }} + - "--nodes={{ join "," $nodes }}" +{{- else if $selector }} + - "--node-selector={{ $selector }}" +{{- end }} + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - name: job-templates + mountPath: /etc/kata-job + readOnly: true + volumes: + - name: job-templates + configMap: + name: {{ printf "%s-job-templates" $base | trunc 63 | trimSuffix "-" }} +{{- end }} diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy-job-templates.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy-job-templates.yaml new file mode 100644 index 0000000000..4d455f0763 --- /dev/null +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy-job-templates.yaml @@ -0,0 +1,33 @@ +{{- /* +Per-node Job templates for deploymentMode: job. + +This ConfigMap holds the install and cleanup per-node Job manifests, rendered +ONCE (constant size, independent of the number of nodes). The job-mode dispatcher +(kata-deploy-job-dispatcher) mounts it, and for every selected node clones the relevant +template, injects metadata.name + spec.template.spec.nodeName, and creates the +Job. Keeping the rich pod spec (env/volumes/shim config) here means the Helm +chart stays the single source of truth; the dispatcher only does fan-out. + +It is a normal (non-hook) resource: Helm creates it before the post-install +dispatcher hook runs, and it still exists during the pre-delete cleanup hook +(release resources are torn down only after pre-delete hooks complete). +*/ -}} +{{- if eq (.Values.deploymentMode | default "daemonset") "job" }} +{{- $base := .Chart.Name }} +{{- if .Values.env.multiInstallSuffix }} +{{- $base = printf "%s-%s" .Chart.Name .Values.env.multiInstallSuffix }} +{{- end }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ printf "%s-job-templates" $base | trunc 63 | trimSuffix "-" }} + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ include "kata-deploy.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} +data: + install-job.yaml: | +{{ include "kata-deploy.perNodeJob" (dict "root" . "stage" "install") | indent 4 }} + cleanup-job.yaml: | +{{ include "kata-deploy.perNodeJob" (dict "root" . "stage" "cleanup") | indent 4 }} +{{- end }} diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml index ff02c34de6..17ff5bd183 100644 --- a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml @@ -1,3 +1,4 @@ +{{- if eq (.Values.deploymentMode | default "daemonset") "daemonset" -}} {{- if index .Values "node-feature-discovery" "enabled" -}} {{- $existingNFDNamespace := include "kata-deploy.detectExistingNFD" . | trim -}} {{- if $existingNFDNamespace -}} @@ -204,3 +205,4 @@ spec: updateStrategy: {{- toYaml . | nindent 4 }} {{- end}} +{{- end -}} diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-rbac.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-rbac.yaml index 863b037c51..0f66e45a4a 100644 --- a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-rbac.yaml +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-rbac.yaml @@ -65,6 +65,68 @@ subjects: name: {{ .Chart.Name }}-sa {{- end }} namespace: {{ .Release.Namespace }} +{{- if eq (.Values.deploymentMode | default "daemonset") "job" }} +--- +# Dedicated, least-privilege identity for the job-mode dispatcher +# (kata-deploy-job-dispatcher). It is a pure control-plane client: it lists nodes +# (cluster-scoped) and manages per-node Jobs in the release namespace +# (namespace-scoped). It deliberately does NOT get the privileged kata-deploy +# host-mutation rights (node patch, runtimeclasses, NFD, etc.); those stay on +# kata-deploy-sa, which only the per-node Jobs use. +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "kata-deploy.dispatcherServiceAccountName" . }} + namespace: {{ .Release.Namespace }} +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ .Chart.Name }}-dispatcher-noderole{{ with .Values.env.multiInstallSuffix }}-{{ . }}{{ end }} +rules: +# Enumerating nodes is inherently cluster-scoped. +- apiGroups: [""] + resources: ["nodes"] + verbs: ["list"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ .Chart.Name }}-dispatcher-noderb{{ with .Values.env.multiInstallSuffix }}-{{ . }}{{ end }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ .Chart.Name }}-dispatcher-noderole{{ with .Values.env.multiInstallSuffix }}-{{ . }}{{ end }} +subjects: +- kind: ServiceAccount + name: {{ include "kata-deploy.dispatcherServiceAccountName" . }} + namespace: {{ .Release.Namespace }} +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ .Chart.Name }}-dispatcher-role{{ with .Values.env.multiInstallSuffix }}-{{ . }}{{ end }} + namespace: {{ .Release.Namespace }} +rules: +# The dispatcher only ever creates/watches/GCs per-node Jobs in its own namespace. +- apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create", "get", "list", "watch", "delete"] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ .Chart.Name }}-dispatcher-rb{{ with .Values.env.multiInstallSuffix }}-{{ . }}{{ end }} + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ .Chart.Name }}-dispatcher-role{{ with .Values.env.multiInstallSuffix }}-{{ . }}{{ end }} +subjects: +- kind: ServiceAccount + name: {{ include "kata-deploy.dispatcherServiceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} --- # ServiceAccount and RBAC for the post-delete Job that removes the kept RBAC above. # Created as post-delete hooks with lower weight than the Job so they exist when the Job runs. diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml index 7b0e9fa74a..120bfd5027 100644 --- a/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml @@ -1,3 +1,106 @@ +# Deployment model for installing/cleaning up Kata on nodes. +# daemonset: (default) the long-running kata-deploy DaemonSet installs Kata on +# every matching node and reverts it on pod termination (uninstall). +# job: no always-on component. A tiny dispatcher Job (the dispatcher, +# kata-deploy-job-dispatcher) runs as a post-install/upgrade hook, enumerates +# the selected nodes LIVE, and creates one node-pinned install Job +# per node - paced to job.parallelism and guaranteeing one install +# per node. Each per-node Job runs the staged pipeline as ordered +# initContainers and exits. Uninstall works the same way via a +# pre-delete dispatcher (reverse pipeline). +# +# Why a dispatcher instead of rendering per-node Jobs in the chart: Helm stores +# the whole rendered release in one ~1 MiB Secret and runs hook resources +# sequentially, and neither an Indexed Job nor a JobSet can guarantee one pod +# per node once parallelism < node-count (the scheduler ignores completed pods +# when balancing spread). The dispatcher keeps the release O(1), enumerates nodes +# at run time, and paces a guaranteed-coverage rollout with built-in Jobs only. +# +# NOTE on "job" mode and new nodes: +# The dispatcher only runs on `helm install` / `helm upgrade` / `helm uninstall`. +# When you add nodes later, re-run `helm upgrade` so the dispatcher enumerates +# and installs the new nodes (the staged actions are idempotent, so already- +# installed nodes are skipped). This is intentional: it avoids an always-on +# privileged component on every node. +deploymentMode: daemonset # daemonset | job + +# Settings specific to deploymentMode: job +job: + # Dispatcher image: the dispatcher that fans out per-node Jobs. It only talks to + # the Kubernetes API (lists nodes, creates/watches Jobs); it never touches the + # host. Supports reference:tag or reference@sha256:digest; tag defaults to the + # chart appVersion. + dispatcherImage: + reference: quay.io/kata-containers/kata-deploy-job-dispatcher + tag: "" + # Maximum number of nodes processed concurrently (the dispatcher keeps at most + # this many per-node Jobs in flight, refilling as they finish). Lower it to + # pace the rollout (e.g. limit how many CRI runtimes restart at once on a big + # fleet); raise it to install faster. Effectively capped at the node count. + parallelism: 100 + # How to choose which nodes get a per-node INSTALL Job. Precedence: + # 1. job.nodes (explicit list of node names) - if non-empty, used verbatim + # (passed to the dispatcher as --nodes). + # 2. otherwise a label selector built from job.nodeSelector (equality) ANDed + # with job.nodeSelectorExpressions (In/NotIn/Exists/DoesNotExist) is + # passed to the dispatcher, which resolves matching nodes LIVE at run time. + # 3. if both are empty, ALL nodes are targeted. + # + # DEFAULT: target worker (non-control-plane) nodes, so no custom labeling is + # required. Override these freely: + # - Target nodes with a specific label: + # job: + # nodeSelector: { kata-containers: "enabled" } + # - Target every node (including control-plane), e.g. single-node clusters/CI: + # job: + # nodeSelectorExpressions: [] + # - Richer expressions: + # job: + # nodeSelectorExpressions: + # - { key: kubernetes.io/os, operator: In, values: ["linux"] } + # - { key: node-role.kubernetes.io/control-plane, operator: DoesNotExist } + # - Pin to explicit nodes: + # job: + # nodes: ["worker-1", "worker-2"] + nodes: [] + # Equality label selector (ANDed with nodeSelectorExpressions). Ignored when + # job.nodes is set. Empty by default. + nodeSelector: {} + # Kubernetes-style label selector requirements (ANDed with nodeSelector). + # Each entry: { key, operator, values }. operator is one of: + # In | NotIn (values required) | Exists | DoesNotExist (values must be empty). + # Default selects nodes that are NOT control-plane/master (i.e. worker nodes). + # Set to [] to disable role filtering and target all discovered nodes. + nodeSelectorExpressions: + - key: node-role.kubernetes.io/control-plane + operator: DoesNotExist + - key: node-role.kubernetes.io/master + operator: DoesNotExist + # Node selection for the UNINSTALL (pre-delete hook) dispatcher. Same precedence + # and semantics as install (cleanup.nodes, else cleanup.nodeSelector ANDed with + # cleanup.nodeSelectorExpressions, else all nodes). + # + # The cleanup dispatcher resolves nodes LIVE when it runs at `helm uninstall` + # (the dispatcher does the lookup), so - unlike a frozen Helm-rendered hook - + # the DEFAULT below can safely be "nodes carrying katacontainers.io/kata-runtime", + # i.e. exactly the nodes install actually labeled. Override to clean a + # different set, e.g.: + # job: + # cleanup: + # nodes: ["worker-1"] + cleanup: + nodes: [] + nodeSelector: {} + nodeSelectorExpressions: + - key: katacontainers.io/kata-runtime + operator: Exists + # How long finished per-node Jobs are retained before automatic garbage + # collection (seconds). Applies to both install and cleanup per-node Jobs. + ttlSecondsAfterFinished: 600 + # Per-node retry budget: retries for a single node's Job before it is marked + # failed. One node failing never aborts the others. + backoffLimit: 3 + imagePullPolicy: Always imagePullSecrets: [] diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-helm-chart.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-helm-chart.sh index 337a29291b..2897bcdd46 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-helm-chart.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-helm-chart.sh @@ -23,8 +23,21 @@ tmp="$(mktemp -d)" trap '[[ -n "${KEEP_TMPDIR}" ]] && echo "kept: ${tmp}" || rm -rf "${tmp}"' EXIT cp -r "${CHART_SRC}" "${tmp}/" + +# Job-mode dispatcher image. Its repo mirrors the kata-deploy repo with +# "-job-dispatcher" inserted before any "-ci" suffix (so the "-ci" stays last): +# .../kata-deploy -> .../kata-deploy-job-dispatcher +# .../kata-deploy-ci -> .../kata-deploy-job-dispatcher-ci +# It is built and pushed with the same tag by kata-deploy-build-and-upload-payload.sh. +if [[ "${REGISTRY}" == *-ci ]]; then + JOB_DISPATCHER_IMAGE_REFERENCE="${JOB_DISPATCHER_IMAGE_REFERENCE:-"${REGISTRY%-ci}-job-dispatcher-ci"}" +else + JOB_DISPATCHER_IMAGE_REFERENCE="${JOB_DISPATCHER_IMAGE_REFERENCE:-"${REGISTRY}-job-dispatcher"}" +fi + yq eval ".version = \"${CHART_VERSION}\" | .appVersion = \"${CHART_VERSION}\"" -i "${tmp}/kata-deploy/Chart.yaml" yq eval ".image.reference = \"${REGISTRY}\" | .image.tag = \"${TAG}\"" -i "${tmp}/kata-deploy/values.yaml" +yq eval ".job.dispatcherImage.reference = \"${JOB_DISPATCHER_IMAGE_REFERENCE}\" | .job.dispatcherImage.tag = \"${TAG}\"" -i "${tmp}/kata-deploy/values.yaml" helm dependencies update "${tmp}/kata-deploy" helm package "${tmp}/kata-deploy" -d "${tmp}" helm push "${tmp}/kata-deploy-${CHART_VERSION}.tgz" "oci://${CHART_REGISTRY}"