release: Add kata-lifecycle-manager chart to release process

Update the release workflow and scripts to package and publish the kata-lifecycle-manager Helm chart alongside kata-deploy. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
fixup! helm: Add kata-lifecycle-manager chart for Argo Workflows-based upgrades
2026-03-14 16:52:18 +00:00 · 2026-02-05 12:00:19 +01:00 · 2026-02-05 11:55:44 +01:00 · 2026-02-05 11:50:04 +01:00 · 2026-02-05 11:49:24 +01:00 · 2026-02-05 09:29:19 +01:00
184 changed files with 4805 additions and 1113 deletions
--- a/.github/workflows/build-helm-image.yaml
+++ b/.github/workflows/build-helm-image.yaml
@@ -0,0 +1,75 @@
+name: Build helm multi-arch image
+
+on:
+  schedule:
+    # Run every Sunday at 12:00 UTC (12 hours after kubectl image build)
+    - cron: '0 12 * * 0'
+  workflow_dispatch:
+    # Allow manual triggering
+  push:
+    branches:
+      - main
+    paths:
+      - 'tools/packaging/helm/Dockerfile'
+      - '.github/workflows/build-helm-image.yaml'
+
+permissions: {}
+
+env:
+  REGISTRY: quay.io
+  IMAGE_NAME: kata-containers/helm
+
+jobs:
+  build-and-push:
+    name: Build and push multi-arch image
+    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
+
+      - name: Login to Quay.io
+        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
+          password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
+
+      - name: Get helm version
+        id: helm-version
+        run: |
+          HELM_VERSION=$(curl -s https://api.github.com/repos/helm/helm/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/')
+          echo "version=${HELM_VERSION}" >> "$GITHUB_OUTPUT"
+
+      - name: Generate image metadata
+        id: meta
+        uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=raw,value=latest
+            type=raw,value={{date 'YYYYMMDD'}}
+            type=raw,value=${{ steps.helm-version.outputs.version }}
+            type=sha,prefix=
+
+      - name: Build and push multi-arch image
+        uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
+        with:
+          context: tools/packaging/helm/
+          file: tools/packaging/helm/Dockerfile
+          platforms: linux/amd64,linux/arm64,linux/s390x,linux/ppc64le
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -284,11 +284,15 @@ jobs:
          echo "${QUAY_DEPLOYER_PASSWORD}" | helm registry login quay.io --username "${QUAY_DEPLOYER_USERNAME}" --password-stdin
          echo "${GITHUB_TOKEN}" | helm registry login ghcr.io --username "${GITHUB_ACTOR}" --password-stdin

-      - name: Push helm chart to the OCI registries
+      - name: Push helm charts to the OCI registries
        run: |
          release_version=$(./tools/packaging/release/release.sh release-version)
+          # Push kata-deploy chart
          helm push "kata-deploy-${release_version}.tgz" oci://quay.io/kata-containers/kata-deploy-charts
          helm push "kata-deploy-${release_version}.tgz" oci://ghcr.io/kata-containers/kata-deploy-charts
+          # Push kata-lifecycle-manager chart
+          helm push "kata-lifecycle-manager-${release_version}.tgz" oci://quay.io/kata-containers/kata-deploy-charts
+          helm push "kata-lifecycle-manager-${release_version}.tgz" oci://ghcr.io/kata-containers/kata-deploy-charts

  publish-release:
    name: publish-release
--- a/.github/workflows/run-k8s-tests-on-arm64.yaml
+++ b/.github/workflows/run-k8s-tests-on-arm64.yaml
@@ -32,6 +32,7 @@ jobs:
      matrix:
        vmm:
          - qemu
+          - qemu-runtime-rs
        k8s:
          - kubeadm
    runs-on: arm64-k8s
--- a/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
+++ b/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
@@ -126,5 +126,6 @@ jobs:

      - name: Delete CoCo KBS
        if: always() && matrix.environment.name != 'nvidia-gpu'
+        timeout-minutes: 10
        run: |
          bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
--- a/.github/workflows/run-k8s-tests-on-zvsi.yaml
+++ b/.github/workflows/run-k8s-tests-on-zvsi.yaml
@@ -137,10 +137,12 @@ jobs:

      - name: Delete kata-deploy
        if: always()
+        timeout-minutes: 10
        run: bash tests/integration/kubernetes/gha-run.sh cleanup-zvsi

      - name: Delete CoCo KBS
        if: always()
+        timeout-minutes: 10
        run: |
          if [ "${KBS}" == "true" ]; then
            bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
--- a/.github/workflows/run-kata-coco-tests.yaml
+++ b/.github/workflows/run-kata-coco-tests.yaml
@@ -120,10 +120,12 @@ jobs:

      - name: Delete kata-deploy
        if: always()
+        timeout-minutes: 15
        run: bash tests/integration/kubernetes/gha-run.sh cleanup

      - name: Delete CoCo KBS
        if: always()
+        timeout-minutes: 10
        run: |
          [[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] && echo "ITA_KEY=${GH_ITA_KEY}" >> "${GITHUB_ENV}"
          bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
--- a/.github/workflows/stale.yaml
+++ b/.github/workflows/stale.yaml
@@ -6,14 +6,21 @@ on:

 permissions: {}

+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
  stale:
    name: stale
    runs-on: ubuntu-22.04
+    permissions:
+      actions: write # Needed to manage caches for state persistence across runs
+      pull-requests: write # Needed to add/remove labels, post comments, or close PRs
    steps:
      - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
        with:
-          stale-pr-message: 'This PR has been opened without with no activity for 180 days. Comment on the issue otherwise it will be closed in 7 days'
+          stale-pr-message: 'This PR has been opened without activity for 180 days. Please comment on the issue or it will be closed in 7 days.'
          days-before-pr-stale: 180
          days-before-pr-close: 7
          days-before-issue-stale: -1
--- a/.github/workflows/zizmor.yaml
+++ b/.github/workflows/zizmor.yaml
@@ -21,7 +21,7 @@ jobs:
          persist-credentials: false

      - name: Run zizmor
-        uses: zizmorcore/zizmor-action@e673c3917a1aef3c65c972347ed84ccd013ecda4 # v0.2.0
+        uses: zizmorcore/zizmor-action@135698455da5c3b3e55f73f4419e481ab68cdd95 # v0.4.1
        with:
          advanced-security: false
          annotations: true
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4005,6 +4005,7 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "common",
+ "containerd-shim-protos",
 "go-flag",
 "logging",
 "nix 0.26.4",
--- a/2
+++ b/2
@@ -1 +1 @@
-3.25.0
+3.26.0
--- a/ci/openshift-ci/cleanup.sh
+++ b/ci/openshift-ci/cleanup.sh
@@ -46,16 +46,12 @@ fi
 [[ ${SELINUX_PERMISSIVE} == "yes" ]] && oc delete -f "${deployments_dir}/machineconfig_selinux.yaml.in"

 # Delete kata-containers
-pushd "${katacontainers_repo_dir}/tools/packaging/kata-deploy" || { echo "Failed to push to ${katacontainers_repo_dir}/tools/packaging/kata-deploy"; exit 125; }
-oc delete -f kata-deploy/base/kata-deploy.yaml
+helm uninstall kata-deploy --wait --namespace kube-system
 oc -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
-oc apply -f kata-cleanup/base/kata-cleanup.yaml
 echo "Wait for all related pods to be gone"
 ( repeats=1; for _ in $(seq 1 600); do
  oc get pods -l name="kubelet-kata-cleanup" --no-headers=true -n kube-system 2>&1 | grep "No resources found" -q && ((repeats++)) || repeats=1
  [[ "${repeats}" -gt 5 ]] && echo kata-cleanup finished && break
  sleep 1
 done) || { echo "There are still some kata-cleanup related pods after 600 iterations"; oc get all -n kube-system; exit 1; }
-oc delete -f kata-cleanup/base/kata-cleanup.yaml
-oc delete -f kata-rbac/base/kata-rbac.yaml
 oc delete -f runtimeclasses/kata-runtimeClasses.yaml
--- a/ci/openshift-ci/cluster/install_kata.sh
+++ b/ci/openshift-ci/cluster/install_kata.sh
@@ -51,13 +51,13 @@ apply_kata_deploy() {

 	oc label --overwrite ns kube-system pod-security.kubernetes.io/enforce=privileged pod-security.kubernetes.io/warn=baseline pod-security.kubernetes.io/audit=baseline
 	local version chart
-	version=$(curl -sSL https://api.github.com/repos/kata-containers/kata-containers/releases/latest | jq .tag_name | tr -d '"')
+	version='0.0.0-dev'
 	chart="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"

 	# Ensure any potential leftover is cleaned up ... and this secret usually is not in case of previous failures
 	oc delete secret sh.helm.release.v1.kata-deploy.v1 -n kube-system || true

-	echo "Installing kata using helm ${chart} ${version}"
+	echo "Installing kata using helm ${chart} ${version} (sha printed in helm output)"
 	helm install kata-deploy --wait --namespace kube-system --set "image.reference=${KATA_DEPLOY_IMAGE%%:*},image.tag=${KATA_DEPLOY_IMAGE##*:}" "${chart}" --version "${version}"
 }

--- a/ci/openshift-ci/peer-pods-azure.sh
+++ b/ci/openshift-ci/peer-pods-azure.sh
@@ -157,6 +157,16 @@ if [[ -z "${CAA_IMAGE}" ]]; then
 fi

 # Get latest PP image
+#
+# You can list the CI images by:
+#     az sig image-version list-community --location "eastus" --public-gallery-name "cocopodvm-d0e4f35f-5530-4b9c-8596-112487cdea85" --gallery-image-definition "podvm_image0" --output table
+# or the release images by:
+#     az sig image-version list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --gallery-image-definition "peerpod-podvm-fedora" --output table
+# or the release debug images by:
+#     az sig image-version list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --gallery-image-definition "peerpod-podvm-fedora-debug" --output table
+#
+# Note there are other flavours of the released images, you can list them by:
+#     az sig image-definition list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --output table
 if [[ -z "${PP_IMAGE_ID}" ]]; then
 	SUCCESS_TIME=$(curl -s \
 	  -H "Accept: application/vnd.github+json" \
--- a/docs/Developer-Guide.md
+++ b/docs/Developer-Guide.md
@@ -125,7 +125,7 @@ If you want to enable SELinux in Permissive mode, add `enforcing=0` to the kerne
 Enable full debug as follows:

 ```bash
-$ sudo sed -i -e 's/^# *\(enable_debug\).*=.*$/\1 = true/g' /etc/kata-containers/configuration.toml
+$ sudo sed -i -E 's/^(\s*enable_debug\s*=\s*)false/\1true/' /etc/kata-containers/configuration.toml
 $ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug initcall_debug"/g' /etc/kata-containers/configuration.toml
 ```

--- a/docs/Kata-Containers-Lifecycle-Management.md
+++ b/docs/Kata-Containers-Lifecycle-Management.md
@@ -0,0 +1,118 @@
+# Kata Containers Lifecycle Management
+
+## Overview
+
+Kata Containers lifecycle management in Kubernetes consists of two operations:
+
+1. **Installation** - Deploy Kata Containers to cluster nodes
+2. **Upgrades** - Update Kata Containers to newer versions without disrupting workloads
+
+The Kata Containers project provides two Helm charts to address these needs:
+
+| Chart | Purpose |
+|-------|---------|
+| `kata-deploy` | Initial installation and configuration |
+| `kata-lifecycle-manager` | Orchestrated rolling upgrades with verification |
+
+---
+
+## Installation with kata-deploy
+
+The `kata-deploy` Helm chart installs Kata Containers across all (or selected) nodes using a Kubernetes DaemonSet. When deployed, it:
+
+- Installs Kata runtime binaries on each node
+- Configures the container runtime (containerd) to use Kata
+- Registers RuntimeClasses (`kata-qemu-nvidia-gpu-snp`, `kata-qemu-nvidia-gpu-tdx`, `kata-qemu-nvidia-gpu`, etc.)
+
+After installation, workloads can use Kata isolation by specifying `runtimeClassName: kata-qemu-nvidia-gpu-snp` (or another Kata RuntimeClass) in their pod spec.
+
+---
+
+## Upgrades with kata-lifecycle-manager
+
+### The Problem
+
+Standard `helm upgrade kata-deploy` updates all nodes simultaneously via the DaemonSet. This approach:
+
+- Provides no per-node verification
+- Offers no controlled rollback mechanism
+- Can leave the cluster in an inconsistent state if something fails
+
+### The Solution
+
+The `kata-lifecycle-manager` Helm chart uses Argo Workflows to orchestrate upgrades with the following guarantees:
+
+| Guarantee | Description |
+|-----------|-------------|
+| **Sequential Processing** | Nodes are upgraded one at a time |
+| **Per-Node Verification** | A user-provided pod validates Kata functionality after each node upgrade |
+| **Fail-Fast** | If verification fails, the workflow stops immediately |
+| **Automatic Rollback** | On failure, Helm rollback is executed and the node is restored |
+
+### Upgrade Flow
+
+For each node in the cluster:
+
+1. **Cordon** - Mark node as unschedulable
+2. **Drain** (optional) - Evict existing workloads
+3. **Upgrade** - Run `helm upgrade kata-deploy` targeting this node
+4. **Wait** - Ensure kata-deploy DaemonSet pod is ready
+5. **Verify** - Run verification pod to confirm Kata works
+6. **Uncordon** - Mark node as schedulable again
+
+If verification fails on any node, the workflow:
+- Rolls back the Helm release
+- Uncordons the node
+- Stops processing (remaining nodes are not upgraded)
+
+### Verification Pod
+
+Users must provide a verification pod that tests Kata functionality. This pod:
+
+- Uses a Kata RuntimeClass
+- Is scheduled on the specific node being verified
+- Runs whatever validation logic the user requires (smoke tests, attestation checks, etc.)
+
+**Basic GPU Verification Example:**
+
+For clusters with NVIDIA GPUs, the CUDA VectorAdd sample provides a more comprehensive verification:
+
+```yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: ${TEST_POD}
+spec:
+  runtimeClassName: kata-qemu-nvidia-gpu-snp # or kata-qemu-nvidia-gpu-tdx
+  restartPolicy: Never
+  nodeSelector:
+    kubernetes.io/hostname: ${NODE}
+  containers:
+  - name: cuda-vectoradd
+    image: nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0-ubuntu22.04
+    resources:
+      limits:
+        nvidia.com/pgpu: "1"
+        memory: 16Gi
+```
+
+This verifies that GPU passthrough works correctly with the upgraded Kata runtime.
+
+The placeholders `${NODE}` and `${TEST_POD}` are substituted at runtime.
+
+---
+
+## Demo Recordings
+
+| Demo | Description | Link |
+|------|-------------|------|
+| Sunny Path | Successful upgrade from 3.24.0 to 3.25.0 | [TODO] |
+| Rainy Path | Failed verification triggers rollback | [TODO] |
+
+---
+
+## References
+
+- [kata-deploy Helm Chart](tools/packaging/kata-deploy/helm-chart/README.md)
+- [kata-lifecycle-manager Helm Chart](tools/packaging/kata-deploy/helm-chart/kata-lifecycle-manager/README.md)
+- [kata-lifecycle-manager Design Document](docs/design/kata-lifecycle-manager-design.md)
--- a/docs/Release-Process.md
+++ b/docs/Release-Process.md
@@ -28,13 +28,15 @@ Bug fixes are released as part of `MINOR` or `MAJOR` releases only. `PATCH` is a

 ## Release Process

-### Bump the `VERSION` and `Chart.yaml` file
+### Bump the `VERSION` and `Chart.yaml` files

 When the `kata-containers/kata-containers` repository is ready for a new release,
 first create a PR to set the release in the [`VERSION`](./../VERSION) file and update the
-`version` and `appVersion` in the
-[`Chart.yaml`](./../tools/packaging/kata-deploy/helm-chart/kata-deploy/Chart.yaml) file and
-have it merged.
+`version` and `appVersion` in the following `Chart.yaml` files:
+- [`kata-deploy/Chart.yaml`](./../tools/packaging/kata-deploy/helm-chart/kata-deploy/Chart.yaml)
+- [`kata-lifecycle-manager/Chart.yaml`](./../tools/packaging/kata-deploy/helm-chart/kata-lifecycle-manager/Chart.yaml)
+
+Have the PR merged before proceeding.

 ### Lock the `main` branch

--- a/docs/design/README.md
+++ b/docs/design/README.md
@@ -19,6 +19,7 @@ Kata Containers design documents:
 - [Design for direct-assigned volume](direct-blk-device-assignment.md)
 - [Design for core-scheduling](core-scheduling.md)
 - [Virtualization Reference Architecture](kata-vra.md)
+- [Design for kata-lifecycle-manager Helm chart](kata-lifecycle-manager-design.md)
 ---

 - [Design proposals](proposals)
--- a/docs/design/architecture/storage.md
+++ b/docs/design/architecture/storage.md
@@ -51,6 +51,7 @@ containers started after the VM has been launched.
 Users can check to see if the container uses the `devicemapper` block
 device as its rootfs by calling `mount(8)` within the container. If
 the `devicemapper` block device is used, the root filesystem (`/`)
-will be mounted from `/dev/vda`. Users can disable direct mounting of
-the underlying block device through the runtime
-[configuration](README.md#configuration).
+will be mounted from `/dev/vda`. Users can enable direct mounting of
+the underlying block device by setting the runtime
+[configuration](README.md#configuration) flag `disable_block_device_use` to
+`false`.
--- a/docs/design/kata-lifecycle-manager-design.md
+++ b/docs/design/kata-lifecycle-manager-design.md
@@ -0,0 +1,502 @@
+# Kata Containers Lifecycle Manager Design
+
+## Summary
+
+This document proposes a Helm chart-based orchestration solution for Kata Containers that
+enables controlled, node-by-node upgrades with verification and rollback capabilities
+using Argo Workflows.
+
+## Motivation
+
+### Problem Statement
+
+Upgrading Kata Containers in a production Kubernetes cluster presents several challenges:
+
+1. **Workload Scheduling Control**: New Kata workloads should not be scheduled on a node
+   during upgrade until the new runtime is verified.
+
+2. **Verification Gap**: There is no standardized way to verify that Kata is working correctly
+   after an upgrade before allowing workloads to return to the node. This solution addresses
+   the gap by running a user-provided verification pod on each upgraded node.
+
+3. **Rollback Complexity**: If an upgrade fails, administrators must manually coordinate
+   rollback across multiple nodes.
+
+4. **Controlled Rollout**: Operators need the ability to upgrade nodes incrementally
+   (canary approach) with fail-fast behavior if any node fails verification.
+
+5. **Multi-Architecture Support**: The upgrade tooling must work across all architectures
+   supported by Kata Containers (amd64, arm64, s390x, ppc64le).
+
+### Current State
+
+The `kata-deploy` Helm chart provides installation and configuration of Kata Containers,
+including a post-install verification job. However, there is no built-in mechanism for
+orchestrating upgrades across nodes in a controlled manner.
+
+## Goals
+
+1. Provide a standardized, automated way to upgrade Kata Containers node-by-node
+2. Ensure each node is verified before returning to service
+3. Support user-defined verification logic
+4. Automatically rollback if verification fails
+5. Work with the existing `kata-deploy` Helm chart
+6. Support all Kata-supported architectures
+
+## Non-Goals
+
+1. Initial Kata Containers installation (use kata-deploy Helm chart for that)
+2. Managing Kubernetes cluster upgrades
+3. Providing Kata-specific verification logic (this is user responsibility)
+4. Managing Argo Workflows installation
+
+## Argo Workflows Dependency
+
+### What Works Without Argo
+
+The following components work independently of Argo Workflows:
+
+| Component | Description |
+|-----------|-------------|
+| **kata-deploy Helm chart** | Full installation, configuration, `RuntimeClasses` |
+| **Post-install verification** | Helm hook runs verification pod after install |
+| **Label-gated deployment** | Progressive rollout via node labels |
+| **Manual upgrades** | User can script: cordon, helm upgrade, verify, `uncordon` |
+
+Users who do not want Argo can still:
+- Install and configure Kata via kata-deploy
+- Perform upgrades manually or with custom scripts
+- Use the verification pod pattern in their own automation
+
+### What Requires Argo
+
+The kata-lifecycle-manager Helm chart provides orchestration via Argo Workflows:
+
+| Feature | Description |
+|---------|-------------|
+| **Automated node-by-node upgrades** | Sequential processing with fail-fast |
+| **Taint-based node selection** | Select nodes by taint key/value |
+| **`WorkflowTemplate`** | Reusable upgrade workflow |
+| **Rollback entrypoint** | `argo submit --entrypoint rollback-node` |
+| **Status tracking** | Node annotations updated at each phase |
+
+### For Users Already Using Argo
+
+If your cluster already has Argo Workflows installed:
+
+```bash
+# Install kata-lifecycle-manager - integrates with your existing Argo installation
+helm install kata-lifecycle-manager oci://ghcr.io/kata-containers/kata-deploy-charts/kata-lifecycle-manager \
+  --set argoNamespace=argo \
+  --set-file defaults.verificationPod=./verification-pod.yaml
+
+# Trigger upgrades via argo CLI or integrate with existing workflows
+argo submit -n argo --from workflowtemplate/kata-lifecycle-manager -p target-version=3.25.0
+```
+
+kata-lifecycle-manager can also be triggered by other Argo workflows, CI/CD pipelines, or `GitOps`
+tools that support Argo.
+
+### For Users Not Wanting Argo
+
+If you prefer not to use Argo Workflows:
+
+1. **Use kata-deploy directly** - handles installation and basic verification
+2. **Script your own orchestration** - example approach:
+
+```bash
+#!/bin/bash
+# Manual upgrade script (no Argo required)
+set -euo pipefail
+
+VERSION="3.25.0"
+
+# Upgrade each node with Kata runtime
+kubectl get nodes -l katacontainers.io/kata-runtime=true -o name | while read -r node_path; do
+  NODE="${node_path#node/}"
+  echo "Upgrading $NODE..."
+  kubectl cordon "$NODE"
+  
+  helm upgrade kata-deploy oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy \
+    --namespace kube-system \
+    --version "$VERSION" \
+    --reuse-values \
+    --wait
+  
+  # Wait for DaemonSet pod on this node
+  kubectl rollout status daemonset/kata-deploy -n kube-system
+  
+  # Run verification (apply your pod, wait, check exit code)
+  kubectl apply -f verification-pod.yaml
+  kubectl wait pod/kata-verify --for=jsonpath='{.status.phase}'=Succeeded --timeout=180s
+  kubectl delete pod/kata-verify
+  
+  kubectl uncordon "$NODE"
+  echo "$NODE upgraded successfully"
+done
+```
+
+This approach requires more manual effort but avoids the Argo dependency.
+
+## Proposed Design
+
+### Architecture Overview
+
+```text
+┌─────────────────────────────────────────────────────────────────┐
+│                    Argo Workflows Controller                    │
+│                         (pre-installed)                         │
+└────────────────────────────┬────────────────────────────────────┘
+                             │
+                             ▼
+┌──────────────────────────────────────────────────────────────┐
+│                    kata-lifecycle-manager Helm Chart                   │
+│  ┌────────────────────────────────────────────────────────┐  │
+│  │                   WorkflowTemplate                     │  │
+│  │  - upgrade-all-nodes (entrypoint)                      │  │
+│  │  - upgrade-single-node (per-node steps)                │  │
+│  │  - rollback-node (manual recovery)                     │  │
+│  └────────────────────────────────────────────────────────┘  │
+│  ┌────────────────────────────────────────────────────────┐  │
+│  │                   RBAC Resources                       │  │
+│  │  - ServiceAccount                                      │  │
+│  │  - ClusterRole (node, pod, helm operations)            │  │
+│  │  - ClusterRoleBinding                                  │  │
+│  └────────────────────────────────────────────────────────┘  │
+└──────────────────────────────────────────────────────────────┘
+                             │
+                             ▼
+┌─────────────────────────────────────────────────────────────────┐
+│                    kata-deploy Helm Chart                       │
+│                   (existing installation)                       │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### Upgrade Flow
+
+For each node selected by the upgrade label:
+
+```text
+┌────────────┐    ┌──────────────┐    ┌────────────┐    ┌────────────┐
+│  Prepare   │───▶│  Cordon      │───▶│  Upgrade   │───▶│Wait Ready  │
+│ (annotate) │    │  (mark       │    │  (helm     │    │(kata-deploy│
+│            │    │unschedulable)│    │ upgrade)   │    │ DaemonSet) │
+└────────────┘    └──────────────┘    └────────────┘    └────────────┘
+                                                               │
+                                                               ▼
+                  ┌────────────┐    ┌──────────────┐    ┌────────────┐
+                  │  Complete  │◀───│   Uncordon   │◀───│  Verify    │
+                  │ (annotate  │    │  (mark       │    │  (user pod)│
+                  │  version)  │    │schedulable)  │    │            │
+                  └────────────┘    └──────────────┘    └────────────┘
+```
+
+**Note:** Drain is not required for Kata upgrades. Running Kata VMs continue using
+the in-memory binaries. Only new workloads use the upgraded binaries. Cordon ensures
+the verification pod runs before any new workloads are scheduled with the new runtime.
+
+**Optional Drain:** For users who prefer to evict workloads before any maintenance
+operation, an optional drain step can be enabled via `drain-enabled=true`. When
+enabled, an additional drain step runs after cordon and before upgrade.
+
+### Node Selection Model
+
+Nodes can be selected for upgrade using **labels**, **taints**, or **both**.
+
+**Label-based selection:**
+
+```bash
+# Select nodes by label
+argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
+  -p target-version=3.25.0 \
+  -p node-selector="katacontainers.io/kata-lifecycle-manager-window=true"
+```
+
+**Taint-based selection:**
+
+Some organizations use taints to mark nodes for maintenance. The workflow supports
+selecting nodes by taint key and optionally taint value:
+
+```bash
+# Select nodes with a specific taint
+kubectl taint nodes worker-1 kata-lifecycle-manager=pending:NoSchedule
+
+argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
+  -p target-version=3.25.0 \
+  -p node-taint-key=kata-lifecycle-manager \
+  -p node-taint-value=pending
+```
+
+**Combined selection:**
+
+Labels and taints can be used together for precise targeting:
+
+```bash
+argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
+  -p target-version=3.25.0 \
+  -p node-selector="node-pool=kata-pool" \
+  -p node-taint-key=maintenance
+```
+
+This allows operators to:
+1. Upgrade a single canary node first
+2. Gradually add nodes to the upgrade window
+3. Control upgrade timing via `GitOps` or automation
+4. Integrate with existing taint-based maintenance workflows
+
+### Node Pool Support
+
+The node selector and taint selector parameters enable basic node pool targeting:
+
+```bash
+# Upgrade only nodes matching a specific node pool label
+argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
+  -p target-version=3.25.0 \
+  -p node-selector="node-pool=kata-pool"
+```
+
+**Current Capabilities:**
+
+| Feature | Status | Chart | Notes |
+|---------|--------|-------|-------|
+| Label-based selection | Supported | kata-lifecycle-manager | Works with any label combination |
+| Taint-based selection | Supported | kata-lifecycle-manager | Select by taint key/value |
+| Sequential upgrades | Supported | kata-lifecycle-manager | One node at a time with fail-fast |
+| Pool-specific verification pods | Not supported | kata-lifecycle-manager | Same verification for all nodes |
+| Pool-ordered upgrades | Not supported | kata-lifecycle-manager | Upgrade pool A before pool B |
+
+See the [Potential Enhancements](#potential-enhancements) section for future work.
+
+### Verification Model
+
+**Verification runs on each node that is upgraded.** The node is only `uncordoned` after
+its verification pod succeeds. If verification fails, automatic rollback is triggered
+to restore the previous version before `uncordoning` the node.
+
+**Common failure modes detected by verification:**
+- Pod stuck in Pending/`ContainerCreating` (runtime can't start VM)
+- Pod crashes immediately (containerd/CRI-O configuration issues)
+- Pod times out (resource issues, image pull failures)
+- Pod exits with non-zero code (verification logic failed)
+
+All of these trigger automatic rollback. The workflow logs include pod status, events,
+and logs to help diagnose the issue.
+
+The user provides a complete Pod YAML that:
+- Uses the Kata runtime class they want to verify
+- Contains their verification logic (e.g., attestation checks)
+- Exits 0 on success, non-zero on failure
+- Includes tolerations for cordoned nodes (verification runs while node is cordoned)
+- Includes a `nodeSelector` to ensure it runs on the specific node being upgraded
+
+When upgrading multiple nodes (via label selector), nodes are processed sequentially.
+For each node, the following placeholders are substituted with that node's specific values,
+ensuring the verification pod runs on the exact node that was just upgraded:
+
+- `${NODE}` - The hostname of the node being upgraded/verified
+- `${TEST_POD}` - A generated unique pod name
+
+Example verification pod:
+
+```yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: ${TEST_POD}
+spec:
+  runtimeClassName: kata-qemu
+  restartPolicy: Never
+  nodeSelector:
+    kubernetes.io/hostname: ${NODE}
+  tolerations:
+    - operator: Exists    # Required: node is cordoned during verification
+  containers:
+    - name: verify
+      image: quay.io/kata-containers/alpine-bash-curl:latest
+      command: ["uname", "-a"]
+```
+
+This design keeps verification logic entirely in the user's domain, supporting:
+- Different runtime classes (`kata-qemu`, `kata-qemu-snp`, `kata-qemu-tdx`, etc.)
+- TEE-specific attestation verification
+- GPU/accelerator validation
+- Custom application smoke tests
+
+### Sequential Execution with Fail-Fast
+
+Nodes are upgraded strictly sequentially using recursive Argo templates. This design
+ensures that if any node fails verification, the workflow stops immediately before
+touching remaining nodes, preventing a mixed-version fleet.
+
+Alternative approaches considered:
+- **`withParam` + semaphore**: Provides cleaner UI but semaphore only controls concurrency,
+  not failure propagation. Other nodes would still proceed after one fails.
+- **`withParam` + `failFast`**: Would be ideal, but Argo only supports `failFast` for DAG
+  tasks, not for steps with `withParam`.
+
+The recursive template approach (`upgrade-node-chain`) naturally provides fail-fast
+behavior because if any step in the chain fails, the recursion stops.
+
+### Status Tracking
+
+Node upgrade status is tracked via Kubernetes annotations:
+
+| Annotation | Values |
+|------------|--------|
+| `katacontainers.io/kata-lifecycle-manager-status` | preparing, cordoned, draining, upgrading, verifying, completed, rolling-back, rolled-back |
+| `katacontainers.io/kata-current-version` | Version string (e.g., "3.25.0") |
+
+This enables:
+- Monitoring upgrade progress via `kubectl get nodes`
+- Integration with external monitoring systems
+- Recovery from interrupted upgrades
+
+### Rollback Support
+
+**Automatic rollback on verification failure:** If the verification pod fails (non-zero exit),
+kata-lifecycle-manager automatically:
+1. Runs `helm rollback` to revert to the previous Helm release
+2. Waits for kata-deploy DaemonSet to be ready with the previous version
+3. `Uncordons` the node
+4. Annotates the node with `rolled-back` status
+
+This ensures nodes are never left in a broken state.
+
+**Manual rollback:** For cases where you need to rollback a successfully upgraded node:
+
+```bash
+argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
+  --entrypoint rollback-node \
+  -p node-name=worker-1
+```
+
+## Components
+
+### Container Images
+
+Two multi-architecture container images are built and published:
+
+| Image | Purpose | Architectures |
+|-------|---------|---------------|
+| `quay.io/kata-containers/kubectl:latest` | Kubernetes operations | amd64, arm64, s390x, ppc64le |
+| `quay.io/kata-containers/helm:latest` | Helm operations | amd64, arm64, s390x, ppc64le |
+
+Images are rebuilt weekly to pick up security updates and tool version upgrades.
+
+### Helm Chart Structure
+
+```text
+kata-lifecycle-manager/
+├── Chart.yaml                  # Chart metadata
+├── values.yaml                 # Configurable defaults
+├── README.md                   # Usage documentation
+└── templates/
+    ├── _helpers.tpl            # Template helpers
+    ├── rbac.yaml               # ServiceAccount, ClusterRole, ClusterRoleBinding
+    └── workflow-template.yaml  # Argo `WorkflowTemplate`
+```
+
+### RBAC Requirements
+
+The workflow requires the following permissions:
+
+| Resource | Verbs | Purpose |
+|----------|-------|---------|
+| nodes | get, list, watch, patch | `cordon`/`uncordon`, annotations |
+| pods | get, list, watch, create, delete | Verification pods |
+| pods/log | get | Verification output |
+| `daemonsets` | get, list, watch | Wait for `kata-deploy` |
+
+## User Experience
+
+### Installation
+
+```bash
+# Install kata-lifecycle-manager with verification config
+helm install kata-lifecycle-manager oci://ghcr.io/kata-containers/kata-deploy-charts/kata-lifecycle-manager \
+  --set-file defaults.verificationPod=/path/to/verification-pod.yaml
+```
+
+### Triggering an Upgrade
+
+```bash
+# Label nodes for upgrade
+kubectl label node worker-1 katacontainers.io/kata-lifecycle-manager-window=true
+
+# Submit upgrade workflow
+argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
+  -p target-version=3.25.0
+
+# Watch progress
+argo watch @latest
+```
+
+### Monitoring
+
+```bash
+kubectl get nodes \
+  -L katacontainers.io/kata-runtime \
+  -L katacontainers.io/kata-lifecycle-manager-status \
+  -L katacontainers.io/kata-current-version
+```
+
+## Security Considerations
+
+1. **Namespace-Scoped Templates**: The chart creates a `WorkflowTemplate` (namespace-scoped)
+   rather than `ClusterWorkflowTemplate` by default, reducing blast radius.
+
+2. **Required Verification**: The chart fails to install if `defaults.verificationPod` is
+   not provided, ensuring upgrades are always verified.
+
+3. **Minimal RBAC**: The `ServiceAccount` has only the permissions required for upgrade
+   operations.
+
+4. **User-Controlled Verification**: Verification logic is entirely user-defined, avoiding
+   any hardcoded assumptions about what "working" means.
+
+## Integration with Release Process
+
+The `kata-lifecycle-manager` chart is:
+- Packaged alongside `kata-deploy` during releases
+- Published to the same OCI registries (`quay.io`, `ghcr.io`)
+- Versioned to match `kata-deploy`
+
+## Potential Enhancements
+
+The following enhancements could be considered if needed:
+
+### kata-lifecycle-manager
+
+1. **Pool-Specific Verification**: Different verification pods for different node pools
+   (e.g., GPU nodes vs. CPU-only nodes).
+
+2. **Ordered Pool Upgrades**: Upgrade node pool A completely before starting pool B.
+
+## Alternatives Considered
+
+### 1. DaemonSet-Based Upgrades
+
+Using a DaemonSet to coordinate upgrades on each node.
+
+**Rejected because**: DaemonSets don't provide the node-by-node sequencing and
+verification workflow needed for controlled upgrades.
+
+### 2. Operator Pattern
+
+Building a Kubernetes Operator to manage upgrades.
+
+**Rejected because**: Adds significant complexity and maintenance burden. Argo Workflows
+is already widely adopted and provides the orchestration primitives needed.
+
+### 3. Shell Script Orchestration
+
+Providing a shell script that loops through nodes.
+
+**Rejected because**: Less reliable, harder to monitor, no built-in retry/recovery,
+and doesn't integrate with Kubernetes-native tooling.
+
+## References
+
+- [kata-deploy Helm Chart](https://github.com/kata-containers/kata-containers/tree/main/tools/packaging/kata-deploy/helm-chart/kata-deploy)
+- [Argo Workflows](https://argoproj.github.io/argo-workflows/)
+- [Helm Documentation](https://helm.sh/docs/)
--- a/docs/how-to/how-to-set-sandbox-config-kata.md
+++ b/docs/how-to/how-to-set-sandbox-config-kata.md
@@ -50,7 +50,7 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor |
 | `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` |
 | `io.katacontainers.config.hypervisor.default_vcpus` | float32| the default vCPUs assigned for a VM by the hypervisor |
-| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disallow a block device from being used |
+| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disable hotplugging host block devices to guest VMs for container rootfs |
 | `io.katacontainers.config.hypervisor.disable_image_nvdimm` | `boolean` | specify if a `nvdimm` device should be used as rootfs for the guest (QEMU) |
 | `io.katacontainers.config.hypervisor.disable_vhost_net` | `boolean` | specify if `vhost-net` is not available on the host |
 | `io.katacontainers.config.hypervisor.enable_hugepages` | `boolean` | if the memory should be `pre-allocated` from huge pages |
--- a/src/agent/rustjail/src/container.rs
+++ b/src/agent/rustjail/src/container.rs
@@ -1588,9 +1588,11 @@ async fn join_namespaces(
        cm.apply(p.pid)?;
    }

-    if p.init && res.is_some() {
-        info!(logger, "set properties to cgroups!");
-        cm.set(res.unwrap(), false)?;
+    if p.init {
+        if let Some(resource) = res {
+            info!(logger, "set properties to cgroups!");
+            cm.set(resource, false)?;
+        }
    }

    info!(logger, "notify child to continue");
--- a/src/agent/rustjail/src/mount.rs
+++ b/src/agent/rustjail/src/mount.rs
@@ -752,15 +752,6 @@ fn parse_mount(m: &Mount) -> (MsFlags, MsFlags, String) {
    (flags, pgflags, data.join(","))
 }

-// This function constructs a canonicalized path by combining the `rootfs` and `unsafe_path` elements.
-// The resulting path is guaranteed to be ("below" / "in a directory under") the `rootfs` directory.
-//
-// Parameters:
-//
-// - `rootfs` is the absolute path to the root of the containers root filesystem directory.
-// - `unsafe_path` is path inside a container. It is unsafe since it may try to "escape" from the containers
-//    rootfs by using one or more "../" path elements or is its a symlink to path.
-
 fn mount_from(
    cfd_log: RawFd,
    m: &Mount,
--- a/src/dragonball/src/api/v1/vmm_action.rs
+++ b/src/dragonball/src/api/v1/vmm_action.rs
@@ -10,7 +10,7 @@ use std::fs::File;
 use std::sync::{Arc, Mutex};

 use crossbeam_channel::{Receiver, Sender, TryRecvError};
-use log::{debug, error, info, warn};
+use log::{debug, info, warn};
 use std::sync::mpsc;
 use tracing::instrument;

--- a/src/dragonball/src/device_manager/mod.rs
+++ b/src/dragonball/src/device_manager/mod.rs
@@ -24,7 +24,6 @@ use dbs_legacy_devices::ConsoleHandler;
 use dbs_pci::CAPABILITY_BAR_SIZE;
 use dbs_utils::epoll_manager::EpollManager;
 use kvm_ioctls::VmFd;
-use log::error;
 use virtio_queue::QueueSync;

 #[cfg(feature = "dbs-virtio-devices")]
--- a/src/libs/kata-types/src/config/hypervisor/mod.rs
+++ b/src/libs/kata-types/src/config/hypervisor/mod.rs
@@ -770,10 +770,11 @@ impl MachineInfo {
 }

 /// Huge page type for VM RAM backend
-#[derive(Clone, Debug, Deserialize_enum_str, Serialize_enum_str, PartialEq, Eq)]
+#[derive(Clone, Debug, Deserialize_enum_str, Serialize_enum_str, PartialEq, Eq, Default)]
 pub enum HugePageType {
    /// Memory allocated using hugetlbfs backend
    #[serde(rename = "hugetlbfs")]
+    #[default]
    Hugetlbfs,

    /// Memory allocated using transparent huge pages
@@ -781,12 +782,6 @@ pub enum HugePageType {
    THP,
 }

-impl Default for HugePageType {
-    fn default() -> Self {
-        Self::Hugetlbfs
-    }
-}
-
 /// Virtual machine memory configuration information.
 #[derive(Clone, Debug, Default, Deserialize, Serialize)]
 pub struct MemoryInfo {
--- a/src/libs/kata-types/src/config/mod.rs
+++ b/src/libs/kata-types/src/config/mod.rs
@@ -4,7 +4,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
 use std::fs;
 use std::io::{self, Result};
 use std::path::{Path, PathBuf};
@@ -206,8 +206,8 @@ impl TomlConfig {
    }

    /// Get agent-specfic kernel parameters for further Hypervisor config revision
-    pub fn get_agent_kernel_params(&self) -> Result<HashMap<String, String>> {
-        let mut kv = HashMap::new();
+    pub fn get_agent_kernel_params(&self) -> Result<BTreeMap<String, String>> {
+        let mut kv = BTreeMap::new();
        if let Some(cfg) = self.agent.get(&self.runtime.agent_name) {
            if cfg.debug {
                kv.insert(LOG_LEVEL_OPTION.to_string(), LOG_LEVEL_DEBUG.to_string());
--- a/src/libs/kata-types/src/initdata.rs
+++ b/src/libs/kata-types/src/initdata.rs
@@ -366,8 +366,8 @@ key = "value"

        let result = add_hypervisor_initdata_overrides(&encoded);
        // This might fail depending on whether algorithm is required
-        if result.is_err() {
-            assert!(result.unwrap_err().to_string().contains("parse initdata"));
+        if let Err(error) = result {
+            assert!(error.to_string().contains("parse initdata"));
        }
    }

@@ -386,8 +386,8 @@ key = "value"

        let result = add_hypervisor_initdata_overrides(&encoded);
        // This might fail depending on whether version is required
-        if result.is_err() {
-            assert!(result.unwrap_err().to_string().contains("parse initdata"));
+        if let Err(error) = result {
+            assert!(error.to_string().contains("parse initdata"));
        }
    }

@@ -488,7 +488,7 @@ key = "value"
        let valid_toml = r#"
            version = "0.1.0"
            algorithm = "sha384"
-            
+
            [data]
            valid_key = "valid_value"
        "#;
@@ -497,7 +497,7 @@ key = "value"
        // Invalid TOML (missing version)
        let invalid_toml = r#"
            algorithm = "sha256"
-            
+
            [data]
            key = "value"
        "#;
--- a/src/libs/test-utils/src/lib.rs
+++ b/src/libs/test-utils/src/lib.rs
@@ -136,8 +136,6 @@ macro_rules! skip_loop_by_user {

 #[cfg(test)]
 mod tests {
-    use super::{skip_if_kvm_unaccessable, skip_if_not_root, skip_if_root};
-
    #[test]
    fn test_skip_if_not_root() {
        skip_if_not_root!();
--- a/src/runtime-rs/Cargo.toml
+++ b/src/runtime-rs/Cargo.toml
@@ -22,6 +22,7 @@ cloud-hypervisor = ["runtimes/cloud-hypervisor"]

 [dependencies]
 anyhow = { workspace = true }
+containerd-shim-protos = { workspace = true }
 go-flag = { workspace = true }
 nix = { workspace = true }
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
--- a/src/runtime-rs/Makefile
+++ b/src/runtime-rs/Makefile
@@ -130,8 +130,23 @@ FCJAILERPATH = $(FCBINDIR)/$(FCJAILERCMD)
 FCVALIDJAILERPATHS = [\"$(FCJAILERPATH)\"]

 PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR)
+
+# EDK2 firmware names per architecture
+ifeq ($(ARCH), aarch64)
+    EDK2_NAME := aavmf
+endif
+
+# Set firmware paths from QEMUFW/QEMUFWVOL if defined
 FIRMWAREPATH :=
 FIRMWAREVOLUMEPATH :=
+ifneq (,$(QEMUCMD))
+    ifneq (,$(QEMUFW))
+        FIRMWAREPATH := $(PREFIXDEPS)/share/$(EDK2_NAME)/$(QEMUFW)
+    endif
+    ifneq (,$(QEMUFWVOL))
+        FIRMWAREVOLUMEPATH := $(PREFIXDEPS)/share/$(EDK2_NAME)/$(QEMUFWVOL)
+    endif
+endif

 ROOTMEASURECONFIG ?= ""
 KERNELTDXPARAMS += $(ROOTMEASURECONFIG)
@@ -374,6 +389,11 @@ ifneq (,$(QEMUCMD))
 ifeq ($(ARCH), s390x)
    VMROOTFSDRIVER_QEMU := virtio-blk-ccw
    DEFBLOCKSTORAGEDRIVER_QEMU := virtio-blk-ccw
+else ifeq ($(ARCH), aarch64)
+    # NVDIMM/virtio-pmem has issues on arm64 (cache coherency problems with DAX),
+    # so we use virtio-blk-pci instead.
+    VMROOTFSDRIVER_QEMU := virtio-blk-pci
+    DEFBLOCKSTORAGEDRIVER_QEMU := virtio-scsi
 else
    VMROOTFSDRIVER_QEMU := virtio-pmem
    DEFBLOCKSTORAGEDRIVER_QEMU := virtio-scsi
--- a/src/runtime-rs/arch/aarch64-options.mk
+++ b/src/runtime-rs/arch/aarch64-options.mk
@@ -4,12 +4,16 @@
 # SPDX-License-Identifier: Apache-2.0
 #

-MACHINETYPE :=
+# ARM 64 settings
+
+MACHINETYPE := virt
 KERNELPARAMS := cgroup_no_v1=all systemd.unified_cgroup_hierarchy=1
-MACHINEACCELERATORS :=
+MACHINEACCELERATORS := usb=off,gic-version=host
 CPUFEATURES := pmu=off

 QEMUCMD := qemu-system-aarch64
+QEMUFW := AAVMF_CODE.fd
+QEMUFWVOL := AAVMF_VARS.fd

 # dragonball binary name
 DBCMD := dragonball
--- a/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in
+++ b/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in
@@ -19,7 +19,7 @@ image = "@IMAGEPATH@"
 #   - xfs
 #   - erofs
 rootfs_type = @DEFROOTFSTYPE@
- 
+
 # Block storage driver to be used for the VM rootfs is backed
 # by a block device.
 vm_rootfs_driver = "@VMROOTFSDRIVER_CLH@"
@@ -41,7 +41,7 @@ valid_hypervisor_paths = @CLHVALIDHYPERVISORPATHS@

 # List of valid annotations values for ctlpath
 # The default if not set is empty (all annotations rejected.)
-# Your distribution recommends: 
+# Your distribution recommends:
 valid_ctlpaths = []

 # Optional space-separated list of options to pass to the guest kernel.
--- a/src/runtime-rs/config/configuration-dragonball.toml.in
+++ b/src/runtime-rs/config/configuration-dragonball.toml.in
@@ -23,7 +23,7 @@ image = "@IMAGEPATH@"
 #   - erofs
 rootfs_type = @DEFROOTFSTYPE@

- 
+
 # Block storage driver to be used for the VM rootfs is backed
 # by a block device. This is virtio-blk-pci, virtio-blk-mmio or nvdimm
 vm_rootfs_driver = "@VMROOTFSDRIVER_DB@"
@@ -41,7 +41,7 @@ valid_hypervisor_paths = @DBVALIDHYPERVISORPATHS@

 # List of valid annotations values for ctlpath
 # The default if not set is empty (all annotations rejected.)
-# Your distribution recommends: 
+# Your distribution recommends:
 valid_ctlpaths = []

 # Optional space-separated list of options to pass to the guest kernel.
--- a/src/runtime-rs/config/configuration-qemu-runtime-rs.toml.in
+++ b/src/runtime-rs/config/configuration-qemu-runtime-rs.toml.in
@@ -373,16 +373,16 @@ disable_image_nvdimm = false
 # Default false
 hotplug_vfio_on_root_bus = false

-# Enable hot-plugging of VFIO devices to a bridge-port, 
-# root-port or switch-port. 
+# Enable hot-plugging of VFIO devices to a bridge-port,
+# root-port or switch-port.
 # The default setting is  "no-port"
 hot_plug_vfio = "no-port"

 # In a confidential compute environment hot-plugging can compromise
-# security. 
-# Enable cold-plugging of VFIO devices to a bridge-port, 
-# root-port or switch-port. 
-# The default setting is  "no-port", which means disabled. 
+# security.
+# Enable cold-plugging of VFIO devices to a bridge-port,
+# root-port or switch-port.
+# The default setting is  "no-port", which means disabled.
 cold_plug_vfio = "no-port"

 # Before hot plugging a PCIe device, you need to add a pcie_root_port device.
--- a/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in
+++ b/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in
@@ -767,4 +767,4 @@ dan_conf = "@DEFDANCONF@"
 #              to non-k8s cases)
 #      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
 #              based cold plug.
-pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
--- a/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in
+++ b/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in
@@ -39,7 +39,7 @@ vm_rootfs_driver = "virtio-blk-pci"
 #
 # Known limitations:
 # * Does not work by design:
-#   - CPU Hotplug 
+#   - CPU Hotplug
 #   - Memory Hotplug
 #   - NVDIMM devices
 #
--- a/src/runtime-rs/config/configuration-rs-fc.toml.in
+++ b/src/runtime-rs/config/configuration-rs-fc.toml.in
@@ -304,7 +304,7 @@ debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 45)
-dial_timeout = 45 
+dial_timeout = 45

 # Confidential Data Hub API timeout value in seconds
 # (default: 50)
--- a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs
+++ b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs
@@ -2296,6 +2296,14 @@ impl<'a> QemuCmdLine<'a> {
    }

    fn add_iommu(&mut self) {
+        // vIOMMU (Intel IOMMU) is not supported on the "virt" machine type (arm64)
+        if self.machine.r#type == "virt" {
+            self.kernel
+                .params
+                .append(&mut KernelParams::from_string("iommu.passthrough=0"));
+            return;
+        }
+
        let dev_iommu = DeviceIntelIommu::new();
        self.devices.push(Box::new(dev_iommu));

--- a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs
+++ b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs
@@ -28,8 +28,13 @@ use std::str::FromStr;
 use std::time::Duration;

 use qapi_spec::Dictionary;
+use std::thread;
+use std::time::Instant;
+
 /// default qmp connection read timeout
 const DEFAULT_QMP_READ_TIMEOUT: u64 = 250;
+const DEFAULT_QMP_CONNECT_DEADLINE_MS: u64 = 5000;
+const DEFAULT_QMP_RETRY_SLEEP_MS: u64 = 50;

 pub struct Qmp {
    qmp: qapi::Qmp<qapi::Stream<BufReader<UnixStream>, UnixStream>>,
@@ -58,29 +63,43 @@ impl Debug for Qmp {

 impl Qmp {
    pub fn new(qmp_sock_path: &str) -> Result<Self> {
-        let stream = UnixStream::connect(qmp_sock_path)?;
+        let try_new_once_fn = || -> Result<Qmp> {
+            let stream = UnixStream::connect(qmp_sock_path)?;

-        // Set the read timeout to protect runtime-rs from blocking forever
-        // trying to set up QMP connection if qemu fails to launch.  The exact
-        // value is a matter of judegement.  Setting it too long would risk
-        // being ineffective since container runtime would timeout first anyway
-        // (containerd's task creation timeout is 2 s by default).  OTOH
-        // setting it too short would risk interfering with a normal launch,
-        // perhaps just seeing some delay due to a heavily loaded host.
-        stream.set_read_timeout(Some(Duration::from_millis(DEFAULT_QMP_READ_TIMEOUT)))?;
+            stream
+                .set_read_timeout(Some(Duration::from_millis(DEFAULT_QMP_READ_TIMEOUT)))
+                .context("set qmp read timeout")?;

-        let mut qmp = Qmp {
-            qmp: qapi::Qmp::new(qapi::Stream::new(
-                BufReader::new(stream.try_clone()?),
-                stream,
-            )),
-            guest_memory_block_size: 0,
+            let mut qmp = Qmp {
+                qmp: qapi::Qmp::new(qapi::Stream::new(
+                    BufReader::new(stream.try_clone()?),
+                    stream,
+                )),
+                guest_memory_block_size: 0,
+            };
+
+            let info = qmp.qmp.handshake().context("qmp handshake failed")?;
+            info!(sl!(), "QMP initialized: {:#?}", info);
+
+            Ok(qmp)
        };

-        let info = qmp.qmp.handshake()?;
-        info!(sl!(), "QMP initialized: {:#?}", info);
+        let deadline = Instant::now() + Duration::from_millis(DEFAULT_QMP_CONNECT_DEADLINE_MS);
+        let mut last_err: Option<anyhow::Error> = None;

-        Ok(qmp)
+        while Instant::now() < deadline {
+            match try_new_once_fn() {
+                Ok(qmp) => return Ok(qmp),
+                Err(e) => {
+                    debug!(sl!(), "QMP not ready yet: {}", e);
+                    last_err = Some(e);
+                    thread::sleep(Duration::from_millis(DEFAULT_QMP_RETRY_SLEEP_MS));
+                }
+            }
+        }
+
+        Err(last_err.unwrap_or_else(|| anyhow!("QMP init timed out")))
+            .with_context(|| format!("timed out waiting for QMP ready: {}", qmp_sock_path))
    }

    pub fn set_ignore_shared_memory_capability(&mut self) -> Result<()> {
--- a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/io/shim_io.rs
+++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/io/shim_io.rs
@@ -6,39 +6,54 @@

 use std::{
    io,
-    os::{
-        fd::IntoRawFd,
-        unix::{
-            fs::OpenOptionsExt,
-            io::{FromRawFd, RawFd},
-            net::UnixStream as StdUnixStream,
-            prelude::AsRawFd,
-        },
+    os::unix::{
+        fs::{FileTypeExt, OpenOptionsExt},
+        io::RawFd,
+        prelude::AsRawFd,
    },
    pin::Pin,
    task::{Context as TaskContext, Poll},
 };

-use anyhow::{anyhow, Context, Result};
+use anyhow::{Context, Result};
 use tokio::{
-    fs::OpenOptions,
+    fs::{File, OpenOptions},
    io::{AsyncRead, AsyncWrite},
-    net::UnixStream as AsyncUnixStream,
 };
 use url::Url;

-fn open_fifo_write(path: &str) -> Result<AsyncUnixStream> {
+/// Clear O_NONBLOCK for an fd (turn it into blocking mode).
+fn set_flag_with_blocking(fd: RawFd) {
+    let flag = unsafe { libc::fcntl(fd, libc::F_GETFL) };
+    if flag < 0 {
+        error!(sl!(), "failed to fcntl(F_GETFL) fd {} ret {}", fd, flag);
+        return;
+    }
+
+    let ret = unsafe { libc::fcntl(fd, libc::F_SETFL, flag & !libc::O_NONBLOCK) };
+    if ret < 0 {
+        error!(sl!(), "failed to fcntl(F_SETFL) fd {} ret {}", fd, ret);
+    }
+}
+
+fn open_fifo_write(path: &str) -> Result<File> {
    let std_file = std::fs::OpenOptions::new()
        .write(true)
        // It's not for non-block openning FIFO but for non-block stream which
        // will be add into tokio runtime.
        .custom_flags(libc::O_NONBLOCK)
        .open(path)
-        .with_context(|| format!("open {path} with write"))?;
-    let fd = std_file.into_raw_fd();
-    let std_stream = unsafe { StdUnixStream::from_raw_fd(fd) };
+        .with_context(|| format!("open fifo for write: {path}"))?;

-    AsyncUnixStream::from_std(std_stream).map_err(|e| anyhow!(e))
+    // Debug
+    let meta = std_file.metadata()?;
+    if !meta.file_type().is_fifo() {
+        debug!(sl!(), "[DEBUG]{} is not a fifo (type mismatch)", path);
+    }
+
+    set_flag_with_blocking(std_file.as_raw_fd());
+
+    Ok(File::from_std(std_file))
 }

 pub struct ShimIo {
@@ -58,14 +73,6 @@ impl ShimIo {
            "new shim io stdin {:?} stdout {:?} stderr {:?}", stdin, stdout, stderr
        );

-        let set_flag_with_blocking = |fd: RawFd| {
-            let flag = unsafe { libc::fcntl(fd, libc::F_GETFL) };
-            let ret = unsafe { libc::fcntl(fd, libc::F_SETFL, flag & !libc::O_NONBLOCK) };
-            if ret < 0 {
-                error!(sl!(), "failed to set fcntl for fd {} error {}", fd, ret);
-            }
-        };
-
        let stdin_fd: Option<Box<dyn AsyncRead + Send + Unpin>> = if let Some(stdin) = stdin {
            info!(sl!(), "open stdin {:?}", &stdin);

@@ -98,9 +105,7 @@ impl ShimIo {
                None => None,
                Some(out) => match Url::parse(out.as_str()) {
                    Err(url::ParseError::RelativeUrlWithoutBase) => {
-                        let out = "fifo://".to_owned() + out.as_str();
-                        let u = Url::parse(out.as_str()).unwrap();
-                        Some(u)
+                        Url::parse(&format!("fifo://{}", out)).ok()
                    }
                    Err(err) => {
                        warn!(sl!(), "unable to parse stdout uri: {}", err);
@@ -111,26 +116,25 @@ impl ShimIo {
            }
        };

-        let stdout_url = get_url(stdout);
        let get_fd = |url: &Option<Url>| -> Option<Box<dyn AsyncWrite + Send + Unpin>> {
            info!(sl!(), "get fd for {:?}", &url);
            if let Some(url) = url {
                if url.scheme() == "fifo" {
                    let path = url.path();
                    match open_fifo_write(path) {
-                        Ok(s) => {
-                            return Some(Box::new(ShimIoWrite::Stream(s)));
-                        }
-                        Err(err) => {
-                            error!(sl!(), "failed to open file {} error {:?}", url.path(), err);
-                        }
+                        Ok(f) => return Some(Box::new(ShimIoWrite::File(f))),
+                        Err(err) => error!(sl!(), "failed to open fifo {} error {:?}", path, err),
                    }
+                } else {
+                    warn!(sl!(), "unsupported io scheme {}", url.scheme());
                }
            }
            None
        };

+        let stdout_url = get_url(stdout);
        let stderr_url = get_url(stderr);
+
        Ok(Self {
            stdin: stdin_fd,
            stdout: get_fd(&stdout_url),
@@ -141,7 +145,7 @@ impl ShimIo {

 #[derive(Debug)]
 enum ShimIoWrite {
-    Stream(AsyncUnixStream),
+    File(File),
    // TODO: support other type
 }

@@ -151,20 +155,20 @@ impl AsyncWrite for ShimIoWrite {
        cx: &mut TaskContext<'_>,
        buf: &[u8],
    ) -> Poll<io::Result<usize>> {
-        match *self {
-            ShimIoWrite::Stream(ref mut s) => Pin::new(s).poll_write(cx, buf),
+        match &mut *self {
+            ShimIoWrite::File(f) => Pin::new(f).poll_write(cx, buf),
        }
    }

    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut TaskContext<'_>) -> Poll<io::Result<()>> {
-        match *self {
-            ShimIoWrite::Stream(ref mut s) => Pin::new(s).poll_flush(cx),
+        match &mut *self {
+            ShimIoWrite::File(f) => Pin::new(f).poll_flush(cx),
        }
    }

    fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut TaskContext<'_>) -> Poll<io::Result<()>> {
-        match *self {
-            ShimIoWrite::Stream(ref mut s) => Pin::new(s).poll_shutdown(cx),
+        match &mut *self {
+            ShimIoWrite::File(f) => Pin::new(f).poll_shutdown(cx),
        }
    }
 }
--- a/src/runtime-rs/crates/shim/src/bin/main.rs
+++ b/src/runtime-rs/crates/shim/src/bin/main.rs
@@ -6,10 +6,15 @@

 use std::{
    ffi::{OsStr, OsString},
+    io::Write,
    path::PathBuf,
 };

 use anyhow::{anyhow, Context, Result};
+use containerd_shim_protos::{
+    protobuf::Message,
+    types::introspection::{RuntimeInfo, RuntimeVersion},
+};
 use nix::{
    mount::{mount, MsFlags},
    sched::{self, CloneFlags},
@@ -29,11 +34,13 @@ enum Action {
    Delete(Args),
    Help,
    Version,
+    Info,
 }

 fn parse_args(args: &[OsString]) -> Result<Action> {
    let mut help = false;
    let mut version = false;
+    let mut info = false;
    let mut shim_args = Args::default();

    // Crate `go_flag` is used to keep compatible with go/flag package.
@@ -46,6 +53,7 @@ fn parse_args(args: &[OsString]) -> Result<Action> {
        flags.add_flag("publish-binary", &mut shim_args.publish_binary);
        flags.add_flag("help", &mut help);
        flags.add_flag("version", &mut version);
+        flags.add_flag("info", &mut info);
    })
    .context(Error::ParseArgument(format!("{args:?}")))?;

@@ -53,6 +61,8 @@ fn parse_args(args: &[OsString]) -> Result<Action> {
        Ok(Action::Help)
    } else if version {
        Ok(Action::Version)
+    } else if info {
+        Ok(Action::Info)
    } else if rest_args.is_empty() {
        Ok(Action::Run(shim_args))
    } else if rest_args[0] == "start" {
@@ -83,6 +93,8 @@ fn show_help(cmd: &OsStr) {
        enable debug output in logs
  -id string
        id of the task
+  -info
+        output the runtime info as protobuf (for containerd v2.0+)
  -namespace string
        namespace that owns the shim
  -publish-binary string
@@ -114,6 +126,25 @@ fn show_version(err: Option<anyhow::Error>) {
    }
 }

+fn show_info() -> Result<()> {
+    let mut version = RuntimeVersion::new();
+    version.version = config::RUNTIME_VERSION.to_string();
+    version.revision = config::RUNTIME_GIT_COMMIT.to_string();
+
+    let mut info = RuntimeInfo::new();
+    info.name = config::CONTAINERD_RUNTIME_NAME.to_string();
+    info.version = Some(version).into();
+
+    let data = info
+        .write_to_bytes()
+        .context("failed to marshal RuntimeInfo")?;
+    std::io::stdout()
+        .write_all(&data)
+        .context("failed to write RuntimeInfo to stdout")?;
+
+    Ok(())
+}
+
 fn get_tokio_runtime() -> Result<tokio::runtime::Runtime> {
    let worker_threads = std::env::var(ENV_TOKIO_RUNTIME_WORKER_THREADS)
        .unwrap_or_default()
@@ -155,6 +186,7 @@ fn real_main() -> Result<()> {
        }
        Action::Help => show_help(&args[0]),
        Action::Version => show_version(None),
+        Action::Info => show_info().context("show info")?,
    }
    Ok(())
 }
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -174,10 +174,6 @@ HYPERVISORS := $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH) $(HYPERVISO
 QEMUPATH := $(QEMUBINDIR)/$(QEMUCMD)
 QEMUVALIDHYPERVISORPATHS := [\"$(QEMUPATH)\"]

-#QEMUTDXPATH := $(QEMUBINDIR)/$(QEMUTDXCMD)
-QEMUTDXPATH := PLACEHOLDER_FOR_DISTRO_QEMU_WITH_TDX_SUPPORT
-QEMUTDXVALIDHYPERVISORPATHS := [\"$(QEMUTDXPATH)\"]
-
 QEMUTDXEXPERIMENTALPATH := $(QEMUBINDIR)/$(QEMUTDXEXPERIMENTALCMD)
 QEMUTDXEXPERIMENTALVALIDHYPERVISORPATHS := [\"$(QEMUTDXEXPERIMENTALPATH)\"]

@@ -250,7 +246,7 @@ DEFSECCOMPSANDBOXPARAM :=
 DEFENTROPYSOURCE := /dev/urandom
 DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"]

-DEFDISABLEBLOCK := false
+DEFDISABLEBLOCK := true
 DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs
 DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
 # Please keep DEFSHAREDFS_QEMU_COCO_DEV_VIRTIOFS in sync with TDX/SNP
@@ -702,18 +698,15 @@ USER_VARS += PROJECT_TYPE
 USER_VARS += PROJECT_URL
 USER_VARS += QEMUBINDIR
 USER_VARS += QEMUCMD
-USER_VARS += QEMUTDXCMD
 USER_VARS += QEMUTDXEXPERIMENTALCMD
 USER_VARS += QEMUCCAEXPERIMENTALCMD
 USER_VARS += QEMUSNPCMD
 USER_VARS += QEMUPATH
-USER_VARS += QEMUTDXPATH
 USER_VARS += QEMUTDXEXPERIMENTALPATH
 USER_VARS += QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT
 USER_VARS += QEMUSNPPATH
 USER_VARS += QEMUCCAEXPERIMENTALPATH
 USER_VARS += QEMUVALIDHYPERVISORPATHS
-USER_VARS += QEMUTDXVALIDHYPERVISORPATHS
 USER_VARS += QEMUTDXEXPERIMENTALVALIDHYPERVISORPATHS
 USER_VARS += QEMUCCAVALIDHYPERVISORPATHS
 USER_VARS += QEMUCCAEXPERIMENTALVALIDHYPERVISORPATHS
--- a/src/runtime/cmd/containerd-shim-kata-v2/main.go
+++ b/src/runtime/cmd/containerd-shim-kata-v2/main.go
@@ -9,7 +9,9 @@ import (
 	"fmt"
 	"os"

+	containerdtypes "github.com/containerd/containerd/api/types"
 	shimapi "github.com/containerd/containerd/runtime/v2/shim"
+	"google.golang.org/protobuf/proto"

 	shim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
@@ -21,6 +23,25 @@ func shimConfig(config *shimapi.Config) {
 	config.NoSubreaper = true
 }

+func handleInfoFlag() {
+	info := &containerdtypes.RuntimeInfo{
+		Name: types.DefaultKataRuntimeName,
+		Version: &containerdtypes.RuntimeVersion{
+			Version:  katautils.VERSION,
+			Revision: katautils.COMMIT,
+		},
+	}
+
+	data, err := proto.Marshal(info)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "failed to marshal RuntimeInfo: %v\n", err)
+		os.Exit(1)
+	}
+
+	os.Stdout.Write(data)
+	os.Exit(0)
+}
+
 func main() {

 	if len(os.Args) == 2 && os.Args[1] == "--version" {
@@ -28,5 +49,9 @@ func main() {
 		os.Exit(0)
 	}

+	if len(os.Args) == 2 && os.Args[1] == "-info" {
+		handleInfoFlag()
+	}
+
 	shimapi.Run(types.DefaultKataRuntimeName, shim.New, shimConfig)
 }
--- a/src/runtime/config/configuration-clh.toml.in
+++ b/src/runtime/config/configuration-clh.toml.in
@@ -109,6 +109,20 @@ memory_slots = @DEFMEMSLOTS@
 # > amount of physical RAM      --> will be set to the actual amount of physical RAM
 default_maxmemory = @DEFMAXMEMSZ@

+# Disable hotplugging host block devices to guest VMs for container rootfs.
+# In case of a storage driver like devicemapper where a container's
+# root file system is backed by a block device, the block device is passed
+# directly to the hypervisor for performance reasons.
+# This flag prevents the block device from being passed to the hypervisor,
+# virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
+disable_block_device_use = @DEFDISABLEBLOCK@
+
 # Shared file system type:
 #   - virtio-fs (default)
 #   - virtio-fs-nydus
@@ -237,9 +251,9 @@ guest_hook_path = ""
 # and we strongly advise users to refer the Cloud Hypervisor official
 # documentation for a better understanding of its internals:
 # https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md
-# 
+#
 # Bandwidth rate limiter options
-# 
+#
 # net_rate_limiter_bw_max_rate controls network I/O bandwidth (size in bits/sec
 # for SB/VM).
 # The same value is used for inbound and outbound bandwidth.
@@ -273,9 +287,9 @@ net_rate_limiter_ops_one_time_burst = 0
 # and we strongly advise users to refer the Cloud Hypervisor official
 # documentation for a better understanding of its internals:
 # https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md
-# 
+#
 # Bandwidth rate limiter options
-# 
+#
 # disk_rate_limiter_bw_max_rate controls disk I/O bandwidth (size in bits/sec
 # for SB/VM).
 # The same value is used for inbound and outbound bandwidth.
@@ -462,9 +476,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

--- a/src/runtime/config/configuration-fc.toml.in
+++ b/src/runtime/config/configuration-fc.toml.in
@@ -367,9 +367,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

--- a/src/runtime/config/configuration-qemu-cca.toml.in
+++ b/src/runtime/config/configuration-qemu-cca.toml.in
@@ -159,12 +159,18 @@ memory_offset = 0
 # Default false
 enable_virtio_mem = false

-# Disable block device from being used for a container's rootfs.
+# Disable hotplugging host block devices to guest VMs for container rootfs.
 # In case of a storage driver like devicemapper where a container's
 # root file system is backed by a block device, the block device is passed
 # directly to the hypervisor for performance reasons.
 # This flag prevents the block device from being passed to the hypervisor,
 # virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
 disable_block_device_use = @DEFDISABLEBLOCK@

 # Shared file system type:
@@ -630,9 +636,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

--- a/src/runtime/config/configuration-qemu-coco-dev.toml.in
+++ b/src/runtime/config/configuration-qemu-coco-dev.toml.in
@@ -145,12 +145,18 @@ memory_offset = 0
 # Default false
 enable_virtio_mem = false

-# Disable block device from being used for a container's rootfs.
+# Disable hotplugging host block devices to guest VMs for container rootfs.
 # In case of a storage driver like devicemapper where a container's
 # root file system is backed by a block device, the block device is passed
 # directly to the hypervisor for performance reasons.
 # This flag prevents the block device from being passed to the hypervisor,
 # virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
 disable_block_device_use = @DEFDISABLEBLOCK@

 # Shared file system type:
@@ -356,17 +362,17 @@ msize_9p = @DEFMSIZE9P@
 # nvdimm is not supported when `confidential_guest = true`.
 disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@

-# Enable hot-plugging of VFIO devices to a bridge-port, 
-# root-port or switch-port. 
+# Enable hot-plugging of VFIO devices to a bridge-port,
+# root-port or switch-port.
 # The default setting is  "no-port"
-hot_plug_vfio = "no-port" 
+hot_plug_vfio = "no-port"

 # In a confidential compute environment hot-plugging can compromise
-# security. 
-# Enable cold-plugging of VFIO devices to a bridge-port, 
-# root-port or switch-port. 
-# The default setting is  "no-port", which means disabled. 
-cold_plug_vfio = "no-port" 
+# security.
+# Enable cold-plugging of VFIO devices to a bridge-port,
+# root-port or switch-port.
+# The default setting is  "no-port", which means disabled.
+cold_plug_vfio = "no-port"

 # Before hot plugging a PCIe device, you need to add a pcie_root_port device.
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
@@ -688,9 +694,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

--- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
@@ -34,7 +34,7 @@ rootfs_type = @DEFROOTFSTYPE@
 #
 # Known limitations:
 # * Does not work by design:
-#   - CPU Hotplug 
+#   - CPU Hotplug
 #   - Memory Hotplug
 #   - NVDIMM devices
 #
@@ -75,7 +75,7 @@ snp_id_auth = ""

 # SNP Guest Policy, the ‘POLICY’ parameter to the SNP_LAUNCH_START command.
 # If unset, the QEMU default policy (0x30000) will be used.
-# Notice that the guest policy is enforced at VM launch, and your pod VMs 
+# Notice that the guest policy is enforced at VM launch, and your pod VMs
 # won't start at all if the policy denys it. This will be indicated by a
 # 'SNP_LAUNCH_START' error.
 snp_guest_policy = 196608
@@ -185,12 +185,18 @@ memory_offset = 0
 # Default false
 enable_virtio_mem = false

-# Disable block device from being used for a container's rootfs.
+# Disable hotplugging host block devices to guest VMs for container rootfs.
 # In case of a storage driver like devicemapper where a container's
 # root file system is backed by a block device, the block device is passed
 # directly to the hypervisor for performance reasons.
 # This flag prevents the block device from being passed to the hypervisor,
 # virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
 disable_block_device_use = @DEFDISABLEBLOCK@

 # Shared file system type:
@@ -388,10 +394,10 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_NV@
 pcie_root_port = 0

 # In a confidential compute environment hot-plugging can compromise
-# security. 
-# Enable cold-plugging of VFIO devices to a bridge-port, 
-# root-port or switch-port. 
-# The default setting is  "no-port", which means disabled. 
+# security.
+# Enable cold-plugging of VFIO devices to a bridge-port,
+# root-port or switch-port.
+# The default setting is  "no-port", which means disabled.
 cold_plug_vfio = "@DEFAULTVFIOPORT_NV@"

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
@@ -704,9 +710,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFAULTTIMEOUT_NV@

--- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
@@ -34,7 +34,7 @@ rootfs_type = @DEFROOTFSTYPE@
 #
 # Known limitations:
 # * Does not work by design:
-#   - CPU Hotplug 
+#   - CPU Hotplug
 #   - Memory Hotplug
 #   - NVDIMM devices
 #
@@ -162,12 +162,18 @@ memory_offset = 0
 # Default false
 enable_virtio_mem = false

-# Disable block device from being used for a container's rootfs.
+# Disable hotplugging host block devices to guest VMs for container rootfs.
 # In case of a storage driver like devicemapper where a container's
 # root file system is backed by a block device, the block device is passed
 # directly to the hypervisor for performance reasons.
 # This flag prevents the block device from being passed to the hypervisor,
 # virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
 disable_block_device_use = @DEFDISABLEBLOCK@

 # Shared file system type:
@@ -365,10 +371,10 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_NV@
 pcie_root_port = 0

 # In a confidential compute environment hot-plugging can compromise
-# security. 
-# Enable cold-plugging of VFIO devices to a bridge-port, 
-# root-port or switch-port. 
-# The default setting is  "no-port", which means disabled. 
+# security.
+# Enable cold-plugging of VFIO devices to a bridge-port,
+# root-port or switch-port.
+# The default setting is  "no-port", which means disabled.
 cold_plug_vfio = "@DEFAULTVFIOPORT_NV@"

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
@@ -681,9 +687,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFAULTTIMEOUT_NV@

--- a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in
@@ -144,12 +144,18 @@ memory_offset = 0
 # Default false
 enable_virtio_mem = false

-# Disable block device from being used for a container's rootfs.
+# Disable hotplugging host block devices to guest VMs for container rootfs.
 # In case of a storage driver like devicemapper where a container's
 # root file system is backed by a block device, the block device is passed
 # directly to the hypervisor for performance reasons.
 # This flag prevents the block device from being passed to the hypervisor,
 # virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
 disable_block_device_use = @DEFDISABLEBLOCK@

 # Shared file system type:
@@ -355,16 +361,16 @@ msize_9p = @DEFMSIZE9P@
 # nvdimm is not supported when `confidential_guest = true`.
 disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_NV@

-# Enable hot-plugging of VFIO devices to a bridge-port, 
-# root-port or switch-port. 
+# Enable hot-plugging of VFIO devices to a bridge-port,
+# root-port or switch-port.
 # The default setting is  "no-port"
 hot_plug_vfio = "no-port"

 # In a confidential compute environment hot-plugging can compromise
-# security. 
-# Enable cold-plugging of VFIO devices to a bridge-port, 
-# root-port or switch-port. 
-# The default setting is  "no-port", which means disabled. 
+# security.
+# Enable cold-plugging of VFIO devices to a bridge-port,
+# root-port or switch-port.
+# The default setting is  "no-port", which means disabled.
 cold_plug_vfio = "@DEFAULTVFIOPORT_NV@"

 # Before hot plugging a PCIe device, you need to add a pcie_root_port device.
@@ -683,9 +689,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFAULTTIMEOUT_NV@

--- a/src/runtime/config/configuration-qemu-se.toml.in
+++ b/src/runtime/config/configuration-qemu-se.toml.in
@@ -25,7 +25,7 @@ machine_type = "@MACHINETYPE@"
 #
 # Known limitations:
 # * Does not work by design:
-#   - CPU Hotplug 
+#   - CPU Hotplug
 #   - Memory Hotplug
 #   - NVDIMM devices
 #
@@ -153,12 +153,18 @@ memory_offset = 0
 # Default false
 enable_virtio_mem = false

-# Disable block device from being used for a container's rootfs.
+# Disable hotplugging host block devices to guest VMs for container rootfs.
 # In case of a storage driver like devicemapper where a container's
 # root file system is backed by a block device, the block device is passed
 # directly to the hypervisor for performance reasons.
 # This flag prevents the block device from being passed to the hypervisor,
 # virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
 disable_block_device_use = @DEFDISABLEBLOCK@

 # Shared file system type:
@@ -343,7 +349,7 @@ msize_9p = @DEFMSIZE9P@
 # nvdimm is not supported when `confidential_guest = true`.
 disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@

-# Enable hot-plugging of VFIO devices to a bridge-port, 
+# Enable hot-plugging of VFIO devices to a bridge-port,
 # root-port or switch-port.
 # The default setting is "no-port"
 hot_plug_vfio = "no-port"
@@ -671,9 +677,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

--- a/src/runtime/config/configuration-qemu-snp.toml.in
+++ b/src/runtime/config/configuration-qemu-snp.toml.in
@@ -33,7 +33,7 @@ rootfs_type = @DEFROOTFSTYPE@
 #
 # Known limitations:
 # * Does not work by design:
-#   - CPU Hotplug 
+#   - CPU Hotplug
 #   - Memory Hotplug
 #   - NVDIMM devices
 #
@@ -74,7 +74,7 @@ snp_id_auth = ""

 # SNP Guest Policy, the ‘POLICY’ parameter to the SNP_LAUNCH_START command.
 # If unset, the QEMU default policy (0x30000) will be used.
-# Notice that the guest policy is enforced at VM launch, and your pod VMs 
+# Notice that the guest policy is enforced at VM launch, and your pod VMs
 # won't start at all if the policy denys it. This will be indicated by a
 # 'SNP_LAUNCH_START' error.
 snp_guest_policy = 196608
@@ -184,12 +184,18 @@ memory_offset = 0
 # Default false
 enable_virtio_mem = false

-# Disable block device from being used for a container's rootfs.
+# Disable hotplugging host block devices to guest VMs for container rootfs.
 # In case of a storage driver like devicemapper where a container's
 # root file system is backed by a block device, the block device is passed
 # directly to the hypervisor for performance reasons.
 # This flag prevents the block device from being passed to the hypervisor,
 # virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
 disable_block_device_use = @DEFDISABLEBLOCK@

 # Shared file system type:
@@ -696,9 +702,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

--- a/src/runtime/config/configuration-qemu-tdx.toml.in
+++ b/src/runtime/config/configuration-qemu-tdx.toml.in
@@ -12,7 +12,7 @@
 # XXX:   Type: @PROJECT_TYPE@

 [hypervisor.qemu]
-path = "@QEMUTDXPATH@"
+path = "@QEMUPATH@"
 kernel = "@KERNELCONFIDENTIALPATH@"
 image = "@IMAGECONFIDENTIALPATH@"
 machine_type = "@MACHINETYPE@"
@@ -33,7 +33,7 @@ rootfs_type = @DEFROOTFSTYPE@
 #
 # Known limitations:
 # * Does not work by design:
-#   - CPU Hotplug 
+#   - CPU Hotplug
 #   - Memory Hotplug
 #   - NVDIMM devices
 #
@@ -54,7 +54,7 @@ enable_annotations = @DEFENABLEANNOTATIONS_COCO@
 # Each member of the list is a path pattern as described by glob(3).
 # The default if not set is empty (all annotations rejected.)
 # Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@
-valid_hypervisor_paths = @QEMUTDXVALIDHYPERVISORPATHS@
+valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@

 # Optional space-separated list of options to pass to the guest kernel.
 # For example, use `kernel_params = "vsyscall=emulate"` if you are having
@@ -161,12 +161,18 @@ memory_offset = 0
 # Default false
 enable_virtio_mem = false

-# Disable block device from being used for a container's rootfs.
+# Disable hotplugging host block devices to guest VMs for container rootfs.
 # In case of a storage driver like devicemapper where a container's
 # root file system is backed by a block device, the block device is passed
 # directly to the hypervisor for performance reasons.
 # This flag prevents the block device from being passed to the hypervisor,
 # virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
 disable_block_device_use = @DEFDISABLEBLOCK@

 # Shared file system type:
@@ -673,9 +679,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

--- a/src/runtime/config/configuration-qemu.toml.in
+++ b/src/runtime/config/configuration-qemu.toml.in
@@ -144,12 +144,18 @@ memory_offset = 0
 # Default false
 enable_virtio_mem = false

-# Disable block device from being used for a container's rootfs.
+# Disable hotplugging host block devices to guest VMs for container rootfs.
 # In case of a storage driver like devicemapper where a container's
 # root file system is backed by a block device, the block device is passed
 # directly to the hypervisor for performance reasons.
 # This flag prevents the block device from being passed to the hypervisor,
 # virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
 disable_block_device_use = @DEFDISABLEBLOCK@

 # Shared file system type:
@@ -355,17 +361,17 @@ msize_9p = @DEFMSIZE9P@
 # nvdimm is not supported when `confidential_guest = true`.
 disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@

-# Enable hot-plugging of VFIO devices to a bridge-port, 
-# root-port or switch-port. 
+# Enable hot-plugging of VFIO devices to a bridge-port,
+# root-port or switch-port.
 # The default setting is  "no-port"
 hot_plug_vfio = "no-port"

 # In a confidential compute environment hot-plugging can compromise
-# security. 
-# Enable cold-plugging of VFIO devices to a bridge-port, 
-# root-port or switch-port. 
-# The default setting is  "no-port", which means disabled. 
-cold_plug_vfio = "no-port" 
+# security.
+# Enable cold-plugging of VFIO devices to a bridge-port,
+# root-port or switch-port.
+# The default setting is  "no-port", which means disabled.
+cold_plug_vfio = "no-port"

 # Before hot plugging a PCIe device, you need to add a pcie_root_port device.
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
@@ -687,9 +693,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

--- a/src/runtime/config/configuration-stratovirt.toml.in
+++ b/src/runtime/config/configuration-stratovirt.toml.in
@@ -103,12 +103,18 @@ default_maxmemory = @DEFMAXMEMSZ@
 # Default 0
 memory_offset = 0

-# Disable block device from being used for a container's rootfs.
+# Disable hotplugging host block devices to guest VMs for container rootfs.
 # In case of a storage driver like devicemapper where a container's
 # root file system is backed by a block device, the block device is passed
 # directly to the hypervisor for performance reasons.
 # This flag prevents the block device from being passed to the hypervisor,
 # virtio-fs is used instead to pass the rootfs.
+# WARNING:
+#   Don't set this flag to false if you don't understand well the behavior of
+#   your container runtime and image snapshotter. Some snapshotters might use
+#   container image storage devices that are not meant to be hotplugged into a
+#   guest VM - e.g., because they contain files used by the host or by other
+#   guests.
 disable_block_device_use = @DEFDISABLEBLOCK@

 # Shared file system type:
@@ -404,9 +410,9 @@ enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
-# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)  
-# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config 
-# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout. 
+# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
+# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
+# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
 # In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
 create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

--- a/src/runtime/go.mod
+++ b/src/runtime/go.mod
@@ -1,7 +1,7 @@
 module github.com/kata-containers/kata-containers/src/runtime

 // Keep in sync with version in versions.yaml
-go 1.24.11
+go 1.24.12

 // WARNING: Do NOT use `replace` directives as those break dependabot:
 // https://github.com/kata-containers/kata-containers/issues/11020
@@ -49,7 +49,7 @@ require (
 	github.com/safchain/ethtool v0.6.2
 	github.com/sirupsen/logrus v1.9.3
 	github.com/stretchr/testify v1.11.1
-	github.com/urfave/cli v1.22.15
+	github.com/urfave/cli v1.22.17
 	github.com/vishvananda/netlink v1.3.1
 	github.com/vishvananda/netns v0.0.5
 	gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20220601114329-47893b162965
@@ -85,7 +85,7 @@ require (
 	github.com/containerd/log v0.1.0 // indirect
 	github.com/containerd/platforms v0.2.1 // indirect
 	github.com/containernetworking/cni v1.3.0 // indirect
-	github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
+	github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
 	github.com/cyphar/filepath-securejoin v0.6.0 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/distribution/reference v0.6.0 // indirect
--- a/src/runtime/go.sum
+++ b/src/runtime/go.sum
@@ -8,7 +8,6 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h
 github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 h1:59MxjQVfjXsBpLy+dbd2/ELV5ofnUkUZBvWSC85sheA=
 github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0/go.mod h1:OahwfttHWG6eJ0clwcfBAHoDI6X/LV/15hx/wlMZSrU=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
-github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
 github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg=
 github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
 github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
@@ -70,9 +69,8 @@ github.com/containernetworking/plugins v1.9.0 h1:Mg3SXBdRGkdXyFC4lcwr6u2ZB2SDeL6
 github.com/containernetworking/plugins v1.9.0/go.mod h1:JG3BxoJifxxHBhG3hFyxyhid7JgRVBu/wtooGEvWf1c=
 github.com/coreos/go-systemd/v22 v22.6.0 h1:aGVa/v8B7hpb0TKl0MWoAavPDmHvobFe5R5zn0bCJWo=
 github.com/coreos/go-systemd/v22 v22.6.0/go.mod h1:iG+pp635Fo7ZmV/j14KUcmEyWF+0X7Lua8rrTWzYgWU=
-github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0=
-github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
+github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
+github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
 github.com/cri-o/cri-o v1.34.0 h1:ux2URwAyENy5e5hD9Z95tshdfy98eqatZk0fxx3rhuk=
 github.com/cri-o/cri-o v1.34.0/go.mod h1:kP40HG+1EW5CDNHjqQBFhb6dehT5dCBKcmtO5RZAm6k=
 github.com/cyphar/filepath-securejoin v0.6.0 h1:BtGB77njd6SVO6VztOHfPxKitJvd/VPT+OFBFMOi1Is=
@@ -289,13 +287,13 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
-github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
-github.com/urfave/cli v1.22.15 h1:nuqt+pdC/KqswQKhETJjo7pvn/k4xMUxgW6liI7XpnM=
-github.com/urfave/cli v1.22.15/go.mod h1:wSan1hmo5zeyLGBjRJbzRTNk8gwoYa2B9n4q9dmRIc0=
+github.com/urfave/cli v1.22.17 h1:SYzXoiPfQjHBbkYxbew5prZHS1TOLT3ierW8SYLqtVQ=
+github.com/urfave/cli v1.22.17/go.mod h1:b0ht0aqgH/6pBYzzxURyrM4xXNgsoT/n2ZzwQiEhNVo=
 github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
 github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
 github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
--- a/src/runtime/pkg/containerd-shim-v2/device_cold_plug.go
+++ b/src/runtime/pkg/containerd-shim-v2/device_cold_plug.go
@@ -19,8 +19,13 @@ import (
 )

 const (
+	// containerd CRI annotations
 	nameAnnotation      = "io.kubernetes.cri.sandbox-name"
 	namespaceAnnotation = "io.kubernetes.cri.sandbox-namespace"
+
+	// CRI-O annotations
+	crioNameAnnotation      = "io.kubernetes.cri-o.KubeName"
+	crioNamespaceAnnotation = "io.kubernetes.cri-o.Namespace"
 )

 // coldPlugDevices handles cold plug of CDI devices into the sandbox
@@ -78,8 +83,7 @@ func coldPlugWithAPI(ctx context.Context, s *service, ociSpec *specs.Spec) error
 // the Kubelet does not pass the device information via CRI during
 // Sandbox creation.
 func getDeviceSpec(ctx context.Context, socket string, ann map[string]string) ([]string, error) {
-	podName := ann[nameAnnotation]
-	podNs := ann[namespaceAnnotation]
+	podName, podNs := getPodIdentifiers(ann)

 	// create dialer for unix socket
 	dialer := func(ctx context.Context, target string) (net.Conn, error) {
@@ -111,7 +115,7 @@ func getDeviceSpec(ctx context.Context, socket string, ann map[string]string) ([
 	}
 	resp, err := client.Get(ctx, prr)
 	if err != nil {
-		return nil, fmt.Errorf("cold plug: GetPodResources failed: %w", err)
+		return nil, fmt.Errorf("cold plug: GetPodResources failed for pod(%s) in namespace(%s): %w", podName, podNs, err)
 	}
 	podRes := resp.PodResources
 	if podRes == nil {
@@ -141,6 +145,24 @@ func formatCDIDevIDs(specName string, devIDs []string) []string {
 	return result
 }

-func debugPodID(ann map[string]string) string {
-	return fmt.Sprintf("%s/%s", ann[namespaceAnnotation], ann[nameAnnotation])
+// getPodIdentifiers returns the pod name and namespace from annotations.
+// It first checks containerd CRI annotations, then falls back to CRI-O annotations.
+func getPodIdentifiers(ann map[string]string) (podName, podNamespace string) {
+	podName = ann[nameAnnotation]
+	podNamespace = ann[namespaceAnnotation]
+
+	// Fall back to CRI-O annotations if containerd annotations are empty
+	if podName == "" {
+		podName = ann[crioNameAnnotation]
+	}
+	if podNamespace == "" {
+		podNamespace = ann[crioNamespaceAnnotation]
+	}
+
+	return podName, podNamespace
+}
+
+func debugPodID(ann map[string]string) string {
+	podName, podNamespace := getPodIdentifiers(ann)
+	return fmt.Sprintf("%s/%s", podNamespace, podName)
 }
--- a/src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go
+++ b/src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go
@@ -1,3 +1,4 @@
+// Package md2man aims in converting markdown into roff (man pages).
 package md2man

 import (
--- a/src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go
+++ b/src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go
@@ -47,13 +47,13 @@ const (
 	tableStart        = "\n.TS\nallbox;\n"
 	tableEnd          = ".TE\n"
 	tableCellStart    = "T{\n"
-	tableCellEnd      = "\nT}\n"
+	tableCellEnd      = "\nT}"
 	tablePreprocessor = `'\" t`
 )

 // NewRoffRenderer creates a new blackfriday Renderer for generating roff documents
 // from markdown
-func NewRoffRenderer() *roffRenderer { // nolint: golint
+func NewRoffRenderer() *roffRenderer {
 	return &roffRenderer{}
 }

@@ -316,9 +316,8 @@ func (r *roffRenderer) handleTableCell(w io.Writer, node *blackfriday.Node, ente
 		} else if nodeLiteralSize(node) > 30 {
 			end = tableCellEnd
 		}
-		if node.Next == nil && end != tableCellEnd {
-			// Last cell: need to carriage return if we are at the end of the
-			// header row and content isn't wrapped in a "tablecell"
+		if node.Next == nil {
+			// Last cell: need to carriage return if we are at the end of the header row.
 			end += crTag
 		}
 		out(w, end)
@@ -356,7 +355,7 @@ func countColumns(node *blackfriday.Node) int {
 }

 func out(w io.Writer, output string) {
-	io.WriteString(w, output) // nolint: errcheck
+	io.WriteString(w, output) //nolint:errcheck
 }

 func escapeSpecialChars(w io.Writer, text []byte) {
@@ -395,7 +394,7 @@ func escapeSpecialCharsLine(w io.Writer, text []byte) {
 			i++
 		}
 		if i > org {
-			w.Write(text[org:i]) // nolint: errcheck
+			w.Write(text[org:i]) //nolint:errcheck
 		}

 		// escape a character
@@ -403,7 +402,7 @@ func escapeSpecialCharsLine(w io.Writer, text []byte) {
 			break
 		}

-		w.Write([]byte{'\\', text[i]}) // nolint: errcheck
+		w.Write([]byte{'\\', text[i]}) //nolint:errcheck
 	}
 }

--- a/src/runtime/vendor/modules.txt
+++ b/src/runtime/vendor/modules.txt
@@ -257,7 +257,7 @@ github.com/containernetworking/plugins/pkg/testutils
 # github.com/coreos/go-systemd/v22 v22.6.0
 ## explicit; go 1.23
 github.com/coreos/go-systemd/v22/dbus
-# github.com/cpuguy83/go-md2man/v2 v2.0.6
+# github.com/cpuguy83/go-md2man/v2 v2.0.7
 ## explicit; go 1.12
 github.com/cpuguy83/go-md2man/v2/md2man
 # github.com/cri-o/cri-o v1.34.0
@@ -526,7 +526,7 @@ github.com/stretchr/testify/assert/yaml
 # github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635
 ## explicit
 github.com/syndtr/gocapability/capability
-# github.com/urfave/cli v1.22.15
+# github.com/urfave/cli v1.22.17
 ## explicit; go 1.11
 github.com/urfave/cli
 # github.com/vishvananda/netlink v1.3.1
--- a/src/runtime/virtcontainers/qemu.go
+++ b/src/runtime/virtcontainers/qemu.go
@@ -861,6 +861,10 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
 				return fmt.Errorf("Cannot get VFIO device from IOMMUFD with device: %v err: %v", dev, err)
 			}
 		} else {
+			if q.config.ConfidentialGuest {
+				return fmt.Errorf("ConfidentialGuest needs IOMMUFD - cannot use %s", dev.HostPath)
+			}
+
 			vfioDevices, err = drivers.GetAllVFIODevicesFromIOMMUGroup(dev)
 			if err != nil {
 				return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
--- a/src/tools/csi-kata-directvolume/go.mod
+++ b/src/tools/csi-kata-directvolume/go.mod
@@ -1,7 +1,7 @@
 module kata-containers/csi-kata-directvolume

 // Keep in sync with version in versions.yaml
-go 1.24.11
+go 1.24.12

 // WARNING: Do NOT use `replace` directives as those break dependabot:
 // https://github.com/kata-containers/kata-containers/issues/11020
--- a/src/tools/kata-ctl/Cargo.lock
+++ b/src/tools/kata-ctl/Cargo.lock
@@ -3024,9 +3024,9 @@ dependencies = [

 [[package]]
 name = "qapi"
-version = "0.14.0"
+version = "0.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6412bdd014ebee03ddbbe79ac03a0b622cce4d80ba45254f6357c847f06fa38"
+checksum = "7b047adab56acc4948d4b9b58693c1f33fd13efef2d6bb5f0f66a47436ceada8"
 dependencies = [
 "bytes",
 "futures",
@@ -3061,9 +3061,9 @@ dependencies = [

 [[package]]
 name = "qapi-qmp"
-version = "0.14.0"
+version = "0.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8b944db7e544d2fa97595e9a000a6ba5c62c426fa185e7e00aabe4b5640b538"
+checksum = "45303cac879d89361cad0287ae15f9ae1e7799b904b474152414aeece39b9875"
 dependencies = [
 "qapi-codegen",
 "qapi-spec",
--- a/src/tools/kata-ctl/src/args.rs
+++ b/src/tools/kata-ctl/src/args.rs
@@ -81,6 +81,7 @@ pub enum Commands {
 #[error("Argument is not valid")]
 pub struct CheckArgument {
    #[clap(subcommand)]
+    #[allow(unused_assignments)]
    pub command: CheckSubCommand,
 }

--- a/src/tools/kata-ctl/src/check.rs
+++ b/src/tools/kata-ctl/src/check.rs
@@ -486,11 +486,11 @@ mod tests {
        let releases = get_kata_all_releases_by_url(KATA_GITHUB_RELEASE_URL);
        // sometime in GitHub action accessing to github.com API may fail
        // we can skip this test to prevent the whole test fail.
-        if releases.is_err() {
+        if let Err(error) = releases {
            warn!(
                sl!(),
                "get kata version failed({:?}), this maybe a temporary error, just skip the test.",
-                releases.unwrap_err()
+                error
            );
            return;
        }
--- a/src/tools/log-parser/go.mod
+++ b/src/tools/log-parser/go.mod
@@ -1,7 +1,7 @@
 module github.com/kata-containers/kata-containers/src/tools/log-parser

 // Keep in sync with version in versions.yaml
-go 1.24.11
+go 1.24.12

 require (
 	github.com/BurntSushi/toml v1.1.0
--- a/tests/functional/kata-deploy/kata-deploy-custom-runtimes.bats
+++ b/tests/functional/kata-deploy/kata-deploy-custom-runtimes.bats
@@ -0,0 +1,366 @@
+#!/usr/bin/env bats
+# Copyright (c) 2025 NVIDIA Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# End-to-end tests for kata-deploy custom runtimes feature
+# These tests deploy kata-deploy with custom runtimes and verify pods can run
+#
+# Required environment variables:
+#   DOCKER_REGISTRY - Container registry for kata-deploy image
+#   DOCKER_REPO     - Repository name for kata-deploy image
+#   DOCKER_TAG      - Image tag to test
+#   KATA_HYPERVISOR - Hypervisor to test (qemu, clh, etc.)
+#   KUBERNETES      - K8s distribution (microk8s, k3s, rke2, etc.)
+
+load "${BATS_TEST_DIRNAME}/../../common.bash"
+repo_root_dir="${BATS_TEST_DIRNAME}/../../../"
+load "${repo_root_dir}/tests/gha-run-k8s-common.sh"
+
+# Load shared helm deployment helpers
+source "${BATS_TEST_DIRNAME}/lib/helm-deploy.bash"
+
+# Test configuration
+CUSTOM_RUNTIME_NAME="special-workload"
+CUSTOM_RUNTIME_HANDLER="kata-my-custom-handler"
+TEST_POD_NAME="kata-deploy-custom-verify"
+CHART_PATH="$(get_chart_path)"
+
+# =============================================================================
+# Template Rendering Tests (no cluster required)
+# =============================================================================
+
+@test "Helm template: ConfigMap is created with custom runtime" {
+	helm template kata-deploy "${CHART_PATH}" \
+		-f "${CUSTOM_VALUES_FILE}" \
+		--set image.reference=quay.io/kata-containers/kata-deploy \
+		--set image.tag=latest \
+		> /tmp/rendered.yaml
+
+	# Check that ConfigMap exists
+	grep -q "kind: ConfigMap" /tmp/rendered.yaml
+	grep -q "kata-deploy-custom-configs" /tmp/rendered.yaml
+	grep -q "${CUSTOM_RUNTIME_HANDLER}" /tmp/rendered.yaml
+}
+
+@test "Helm template: RuntimeClass is created with correct handler" {
+	helm template kata-deploy "${CHART_PATH}" \
+		-f "${CUSTOM_VALUES_FILE}" \
+		--set image.reference=quay.io/kata-containers/kata-deploy \
+		--set image.tag=latest \
+		> /tmp/rendered.yaml
+
+	grep -q "kind: RuntimeClass" /tmp/rendered.yaml
+	grep -q "handler: ${CUSTOM_RUNTIME_HANDLER}" /tmp/rendered.yaml
+}
+
+@test "Helm template: Drop-in file is included in ConfigMap" {
+	helm template kata-deploy "${CHART_PATH}" \
+		-f "${CUSTOM_VALUES_FILE}" \
+		--set image.reference=quay.io/kata-containers/kata-deploy \
+		--set image.tag=latest \
+		> /tmp/rendered.yaml
+
+	grep -q "dropin-${CUSTOM_RUNTIME_HANDLER}.toml" /tmp/rendered.yaml
+	grep -q "dial_timeout = 999" /tmp/rendered.yaml
+}
+
+@test "Helm template: CUSTOM_RUNTIMES_ENABLED env var is set" {
+	helm template kata-deploy "${CHART_PATH}" \
+		-f "${CUSTOM_VALUES_FILE}" \
+		--set image.reference=quay.io/kata-containers/kata-deploy \
+		--set image.tag=latest \
+		> /tmp/rendered.yaml
+
+	grep -q "CUSTOM_RUNTIMES_ENABLED" /tmp/rendered.yaml
+	grep -A1 "CUSTOM_RUNTIMES_ENABLED" /tmp/rendered.yaml | grep -q '"true"'
+}
+
+@test "Helm template: custom-configs volume is mounted" {
+	helm template kata-deploy "${CHART_PATH}" \
+		-f "${CUSTOM_VALUES_FILE}" \
+		--set image.reference=quay.io/kata-containers/kata-deploy \
+		--set image.tag=latest \
+		> /tmp/rendered.yaml
+
+	grep -q "mountPath: /custom-configs/" /tmp/rendered.yaml
+	grep -q "name: custom-configs" /tmp/rendered.yaml
+}
+
+@test "Helm template: No custom runtime resources when disabled" {
+	helm template kata-deploy "${CHART_PATH}" \
+		--set image.reference=quay.io/kata-containers/kata-deploy \
+		--set image.tag=latest \
+		--set customRuntimes.enabled=false \
+		> /tmp/rendered.yaml
+
+	! grep -q "kata-deploy-custom-configs" /tmp/rendered.yaml
+	! grep -q "CUSTOM_RUNTIMES_ENABLED" /tmp/rendered.yaml
+}
+
+@test "Helm template: Custom runtimes only mode (no standard shims)" {
+	# Test that Helm chart renders correctly when all standard shims are disabled
+	# using shims.disableAll and only custom runtimes are enabled
+	
+	local values_file
+	values_file=$(mktemp)
+	cat > "${values_file}" <<EOF
+image:
+  reference: quay.io/kata-containers/kata-deploy
+  tag: latest
+
+# Disable all standard shims at once
+shims:
+  disableAll: true
+
+# Enable only custom runtimes
+customRuntimes:
+  enabled: true
+  runtimes:
+    my-only-runtime:
+      baseConfig: "qemu"
+      dropIn: |
+        [hypervisor.qemu]
+        enable_debug = true
+      runtimeClass: |
+        kind: RuntimeClass
+        apiVersion: node.k8s.io/v1
+        metadata:
+          name: kata-my-only-runtime
+        handler: kata-my-only-runtime
+        scheduling:
+          nodeSelector:
+            katacontainers.io/kata-runtime: "true"
+      containerd:
+        snapshotter: ""
+      crio:
+        pullType: ""
+EOF
+
+	helm template kata-deploy "${CHART_PATH}" -f "${values_file}" > /tmp/rendered.yaml
+	rm -f "${values_file}"
+
+	# Verify custom runtime resources are created
+	grep -q "kata-deploy-custom-configs" /tmp/rendered.yaml
+	grep -q "CUSTOM_RUNTIMES_ENABLED" /tmp/rendered.yaml
+	grep -q "kata-my-only-runtime" /tmp/rendered.yaml
+
+	# Verify SHIMS env var is empty (no standard shims)
+	local shims_value
+	shims_value=$(grep -A1 'name: SHIMS$' /tmp/rendered.yaml | grep 'value:' | head -1 || echo "")
+	echo "# SHIMS env value: ${shims_value}" >&3
+}
+
+# =============================================================================
+# End-to-End Tests (require cluster with kata-deploy)
+# =============================================================================
+
+@test "E2E: Custom RuntimeClass exists and can run a pod" {
+	# Check RuntimeClass exists
+	run kubectl get runtimeclass "${CUSTOM_RUNTIME_HANDLER}" -o name
+	if [[ "${status}" -ne 0 ]]; then
+		echo "# RuntimeClass not found. kata-deploy logs:" >&3
+		kubectl -n kube-system logs -l name=kata-deploy --tail=50 2>/dev/null || true
+		die "Custom RuntimeClass ${CUSTOM_RUNTIME_HANDLER} not found"
+	fi
+
+	echo "# RuntimeClass ${CUSTOM_RUNTIME_HANDLER} exists" >&3
+
+	# Verify handler is correct
+	local handler
+	handler=$(kubectl get runtimeclass "${CUSTOM_RUNTIME_HANDLER}" -o jsonpath='{.handler}')
+	echo "# Handler: ${handler}" >&3
+	[[ "${handler}" == "${CUSTOM_RUNTIME_HANDLER}" ]]
+
+	# Verify overhead is set
+	local overhead_memory
+	overhead_memory=$(kubectl get runtimeclass "${CUSTOM_RUNTIME_HANDLER}" -o jsonpath='{.overhead.podFixed.memory}')
+	echo "# Overhead memory: ${overhead_memory}" >&3
+	[[ "${overhead_memory}" == "640Mi" ]]
+
+	local overhead_cpu
+	overhead_cpu=$(kubectl get runtimeclass "${CUSTOM_RUNTIME_HANDLER}" -o jsonpath='{.overhead.podFixed.cpu}')
+	echo "# Overhead CPU: ${overhead_cpu}" >&3
+	[[ "${overhead_cpu}" == "500m" ]]
+
+	# Verify nodeSelector is set
+	local node_selector
+	node_selector=$(kubectl get runtimeclass "${CUSTOM_RUNTIME_HANDLER}" -o jsonpath='{.scheduling.nodeSelector.katacontainers\.io/kata-runtime}')
+	echo "# Node selector: ${node_selector}" >&3
+	[[ "${node_selector}" == "true" ]]
+
+	# Verify label is set (Helm sets this to "Helm" when it manages the resource)
+	local label
+	label=$(kubectl get runtimeclass "${CUSTOM_RUNTIME_HANDLER}" -o jsonpath='{.metadata.labels.app\.kubernetes\.io/managed-by}')
+	echo "# Label app.kubernetes.io/managed-by: ${label}" >&3
+	[[ "${label}" == "Helm" ]]
+
+	# Create a test pod using the custom runtime
+	cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: Pod
+metadata:
+  name: ${TEST_POD_NAME}
+spec:
+  runtimeClassName: ${CUSTOM_RUNTIME_HANDLER}
+  restartPolicy: Never
+  nodeSelector:
+    katacontainers.io/kata-runtime: "true"
+  containers:
+    - name: test
+      image: quay.io/kata-containers/alpine-bash-curl:latest
+      command: ["echo", "OK"]
+EOF
+
+	# Wait for pod to complete or become ready
+	echo "# Waiting for pod to be ready..." >&3
+	local timeout=120
+	local start_time
+	start_time=$(date +%s)
+
+	while true; do
+		local phase
+		phase=$(kubectl get pod "${TEST_POD_NAME}" -o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown")
+
+		case "${phase}" in
+			Succeeded|Running)
+				echo "# Pod reached phase: ${phase}" >&3
+				break
+				;;
+			Failed)
+				echo "# Pod failed" >&3
+				kubectl describe pod "${TEST_POD_NAME}" >&3
+				die "Pod failed to run with custom runtime"
+				;;
+			*)
+				local current_time
+				current_time=$(date +%s)
+				if (( current_time - start_time > timeout )); then
+					echo "# Timeout waiting for pod" >&3
+					kubectl describe pod "${TEST_POD_NAME}" >&3
+					die "Timeout waiting for pod to be ready"
+				fi
+				sleep 5
+				;;
+		esac
+	done
+
+	# Verify pod ran successfully
+	local exit_code
+	exit_code=$(kubectl get pod "${TEST_POD_NAME}" -o jsonpath='{.status.containerStatuses[0].state.terminated.exitCode}' 2>/dev/null || echo "")
+
+	if [[ "${exit_code}" == "0" ]] || [[ "$(kubectl get pod "${TEST_POD_NAME}" -o jsonpath='{.status.phase}')" == "Running" ]]; then
+		echo "# Pod ran successfully with custom runtime" >&3
+		BATS_TEST_COMPLETED=1
+	else
+		die "Pod did not complete successfully (exit code: ${exit_code})"
+	fi
+}
+
+# =============================================================================
+# Setup and Teardown
+# =============================================================================
+
+setup_file() {
+	ensure_helm
+
+	echo "# Using base config: ${KATA_HYPERVISOR}" >&3
+	echo "# Custom runtime handler: ${CUSTOM_RUNTIME_HANDLER}" >&3
+	echo "# Image: ${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" >&3
+	echo "# K8s distribution: ${KUBERNETES}" >&3
+
+	# Create values file for custom runtimes
+	export DEPLOY_VALUES_FILE=$(mktemp)
+	cat > "${DEPLOY_VALUES_FILE}" <<EOF
+customRuntimes:
+  enabled: true
+  runtimes:
+    ${CUSTOM_RUNTIME_NAME}:
+      baseConfig: "${KATA_HYPERVISOR}"
+      dropIn: |
+        [agent.kata]
+        dial_timeout = 999
+      runtimeClass: |
+        kind: RuntimeClass
+        apiVersion: node.k8s.io/v1
+        metadata:
+          name: ${CUSTOM_RUNTIME_HANDLER}
+          labels:
+            app.kubernetes.io/managed-by: kata-deploy
+        handler: ${CUSTOM_RUNTIME_HANDLER}
+        overhead:
+          podFixed:
+            memory: "640Mi"
+            cpu: "500m"
+        scheduling:
+          nodeSelector:
+            katacontainers.io/kata-runtime: "true"
+      containerd:
+        snapshotter: ""
+      crio:
+        pullType: ""
+EOF
+
+	echo "# Deploying kata-deploy with custom runtimes..." >&3
+	deploy_kata "${DEPLOY_VALUES_FILE}"
+	echo "# kata-deploy deployed successfully" >&3
+}
+
+setup() {
+	# Create temporary values file for template tests
+	CUSTOM_VALUES_FILE=$(mktemp)
+	cat > "${CUSTOM_VALUES_FILE}" <<EOF
+customRuntimes:
+  enabled: true
+  runtimes:
+    ${CUSTOM_RUNTIME_NAME}:
+      baseConfig: "${KATA_HYPERVISOR:-qemu}"
+      dropIn: |
+        [agent.kata]
+        dial_timeout = 999
+      runtimeClass: |
+        kind: RuntimeClass
+        apiVersion: node.k8s.io/v1
+        metadata:
+          name: ${CUSTOM_RUNTIME_HANDLER}
+          labels:
+            app.kubernetes.io/managed-by: kata-deploy
+        handler: ${CUSTOM_RUNTIME_HANDLER}
+        overhead:
+          podFixed:
+            memory: "640Mi"
+            cpu: "500m"
+        scheduling:
+          nodeSelector:
+            katacontainers.io/kata-runtime: "true"
+      containerd:
+        snapshotter: ""
+      crio:
+        pullType: ""
+EOF
+}
+
+teardown() {
+	# Show pod details for debugging if test failed
+	if [[ "${BATS_TEST_COMPLETED:-}" != "1" ]]; then
+		echo "# Test failed, gathering diagnostics..." >&3
+		kubectl describe pod "${TEST_POD_NAME}" 2>/dev/null || true
+		echo "# kata-deploy logs:" >&3
+		kubectl -n kube-system logs -l name=kata-deploy --tail=100 2>/dev/null || true
+	fi
+
+	# Clean up test pod
+	kubectl delete pod "${TEST_POD_NAME}" --ignore-not-found=true --wait=false 2>/dev/null || true
+
+	# Clean up temp file
+	[[ -f "${CUSTOM_VALUES_FILE:-}" ]] && rm -f "${CUSTOM_VALUES_FILE}"
+}
+
+teardown_file() {
+	echo "# Cleaning up..." >&3
+
+	kubectl delete pod "${TEST_POD_NAME}" --ignore-not-found=true --wait=true --timeout=60s 2>/dev/null || true
+
+	uninstall_kata
+	[[ -f "${DEPLOY_VALUES_FILE:-}" ]] && rm -f "${DEPLOY_VALUES_FILE}"
+}
--- a/tests/functional/kata-deploy/kata-deploy.bats
+++ b/tests/functional/kata-deploy/kata-deploy.bats
@@ -4,15 +4,38 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 #
+# Kata Deploy Functional Tests
+#
+# This test validates that kata-deploy successfully installs and configures
+# Kata Containers on a Kubernetes cluster using Helm.
+#
+# Required environment variables:
+#   DOCKER_REGISTRY - Container registry for kata-deploy image
+#   DOCKER_REPO     - Repository name for kata-deploy image  
+#   DOCKER_TAG      - Image tag to test
+#   KATA_HYPERVISOR - Hypervisor to test (qemu, clh, etc.)
+#   KUBERNETES      - K8s distribution (microk8s, k3s, rke2, etc.)
+#
+# Optional timeout configuration (increase for slow networks or large images):
+#   KATA_DEPLOY_TIMEOUT                - Overall helm timeout (default: 30m)
+#   KATA_DEPLOY_DAEMONSET_TIMEOUT      - DaemonSet rollout timeout in seconds (default: 1200 = 20m)
+#                                        Includes time to pull kata-deploy image
+#   KATA_DEPLOY_VERIFICATION_TIMEOUT   - Verification pod timeout in seconds (default: 180 = 3m)
+#                                        Time for verification pod to run
+#
+# Example with custom timeouts for slow network:
+#   KATA_DEPLOY_DAEMONSET_TIMEOUT=3600 bats kata-deploy.bats
+#

 load "${BATS_TEST_DIRNAME}/../../common.bash"
 repo_root_dir="${BATS_TEST_DIRNAME}/../../../"
 load "${repo_root_dir}/tests/gha-run-k8s-common.sh"

-setup() {
-	ensure_yq
+# Load shared helm deployment helpers
+source "${BATS_TEST_DIRNAME}/lib/helm-deploy.bash"

-	pushd "${repo_root_dir}"
+setup() {
+	ensure_helm

 	# We expect 2 runtime classes because:
 	# * `kata` is the default runtimeclass created by Helm, basically an alias for `kata-${KATA_HYPERVISOR}`.
@@ -26,50 +49,80 @@ setup() {
 		"kata\s+kata-${KATA_HYPERVISOR}" \
 		"kata-${KATA_HYPERVISOR}\s+kata-${KATA_HYPERVISOR}" \
 	)
-
-
-	# Set the latest image, the one generated as part of the PR, to be used as part of the tests
-	export HELM_IMAGE_REFERENCE="${DOCKER_REGISTRY}/${DOCKER_REPO}"
-	export HELM_IMAGE_TAG="${DOCKER_TAG}"
-
-	# Enable debug for Kata Containers
-	export HELM_DEBUG="true"
-
-	# Create the runtime class only for the shim that's being tested
-	export HELM_SHIMS="${KATA_HYPERVISOR}"
-
-	# Set the tested hypervisor as the default `kata` shim
-	export HELM_DEFAULT_SHIM="${KATA_HYPERVISOR}"
-
-	# Let the Helm chart create the default `kata` runtime class
-	export HELM_CREATE_DEFAULT_RUNTIME_CLASS="true"
-
-	HOST_OS=""
-        if [[ "${KATA_HOST_OS}" = "cbl-mariner" ]]; then
-                HOST_OS="${KATA_HOST_OS}"
-        fi
-	export HELM_HOST_OS="${HOST_OS}"
-
-	export HELM_K8S_DISTRIBUTION="${KUBERNETES}"
-
-	# Enable deployment verification (verifies Kata Containers
-	# VM kernel isolation by comparing node vs pod kernel)
-	export HELM_VERIFY_DEPLOYMENT="true"
-
-	helm_helper
-
-	echo "::group::kata-deploy logs"
-	kubectl -n kube-system logs --tail=100 -l name=kata-deploy
-	echo "::endgroup::"
-
-	echo "::group::Runtime classes"
-	kubectl get runtimeclass
-	echo "::endgroup::"
-
-	popd
 }

@test "Test runtimeclasses are being properly created and container runtime is not broken" {
+	pushd "${repo_root_dir}"
+	
+	# Create verification pod spec
+	local verification_yaml
+	verification_yaml=$(mktemp)
+	cat > "${verification_yaml}" << EOF
+apiVersion: v1
+kind: Pod
+metadata:
+  name: kata-deploy-verify
+spec:
+  runtimeClassName: kata-${KATA_HYPERVISOR}
+  restartPolicy: Never
+  nodeSelector:
+    katacontainers.io/kata-runtime: "true"
+  containers:
+    - name: verify
+      image: quay.io/kata-containers/alpine-bash-curl:latest
+      imagePullPolicy: Always
+      command:
+        - sh
+        - -c
+        - |
+          echo "=== Kata Verification ==="
+          echo "Kernel: \$(uname -r)"
+          echo "SUCCESS: Pod running with Kata runtime"
+EOF
+	
+	# Install kata-deploy via Helm
+	echo "Installing kata-deploy with Helm..."
+	
+	# Timeouts can be customized via environment variables:
+	# - KATA_DEPLOY_TIMEOUT: Overall helm timeout (includes all hooks)
+	#   Default: 600s (10 minutes)
+	# - KATA_DEPLOY_DAEMONSET_TIMEOUT: Time to wait for kata-deploy DaemonSet rollout (image pull + pod start)
+	#   Default: 300s (5 minutes) - accounts for large image downloads
+	# - KATA_DEPLOY_VERIFICATION_TIMEOUT: Time to wait for verification pod to complete
+	#   Default: 120s (2 minutes) - verification pod execution time
+	local helm_timeout="${KATA_DEPLOY_TIMEOUT:-600s}"
+	local daemonset_timeout="${KATA_DEPLOY_DAEMONSET_TIMEOUT:-300}"
+	local verification_timeout="${KATA_DEPLOY_VERIFICATION_TIMEOUT:-120}"
+	
+	echo "Timeout configuration:"
+	echo "  Helm overall: ${helm_timeout}"
+	echo "  DaemonSet rollout: ${daemonset_timeout}s (includes image pull)"
+	echo "  Verification pod: ${verification_timeout}s (pod execution)"
+
+	# Deploy kata-deploy using shared helper with verification options
+	HELM_TIMEOUT="${helm_timeout}" deploy_kata "" \
+		--set-file verification.pod="${verification_yaml}" \
+		--set verification.timeout="${verification_timeout}" \
+		--set verification.daemonsetTimeout="${daemonset_timeout}"
+	
+	rm -f "${verification_yaml}"
+	
+	echo ""
+	echo "::group::kata-deploy logs"
+	kubectl -n kube-system logs --tail=200 -l name=kata-deploy
+	echo "::endgroup::"
+
+	echo ""
+	echo "::group::Runtime classes"
+	kubectl get runtimeclass
+	echo "::endgroup::"
+	
+	# helm --wait already waits for post-install hooks to complete
+	# If helm returns successfully, the verification job passed
+	# The job is deleted after success (hook-delete-policy: hook-succeeded)
+	echo ""
+	echo "Helm install completed successfully - verification passed"
+	
 	# We filter `kata-mshv-vm-isolation` out as that's present on AKS clusters, but that's not coming from kata-deploy
 	current_runtime_classes=$(kubectl get runtimeclasses | grep -v "kata-mshv-vm-isolation" | grep "kata" | wc -l)
 	[[ ${current_runtime_classes} -eq ${expected_runtime_classes} ]]
@@ -91,10 +144,10 @@ setup() {
 	# Check that the container runtime verison doesn't have unknown, which happens when containerd can't start properly
 	container_runtime_version=$(kubectl get nodes --no-headers -o custom-columns=CONTAINER_RUNTIME:.status.nodeInfo.containerRuntimeVersion)
 	[[ ${container_runtime_version} != *"containerd://Unknown"* ]]
+	
+	popd
 }

 teardown() {
-	pushd "${repo_root_dir}"
-	helm uninstall kata-deploy --ignore-not-found --wait --cascade foreground --timeout 10m --namespace kube-system --debug
-	popd
+	uninstall_kata
 }
--- a/tests/functional/kata-deploy/lib/helm-deploy.bash
+++ b/tests/functional/kata-deploy/lib/helm-deploy.bash
@@ -0,0 +1,127 @@
+#!/bin/bash
+# Copyright (c) 2025 NVIDIA Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Shared helm deployment helpers for kata-deploy tests
+#
+# Required environment variables:
+#   DOCKER_REGISTRY - Container registry for kata-deploy image
+#   DOCKER_REPO     - Repository name for kata-deploy image
+#   DOCKER_TAG      - Image tag to test
+#   KATA_HYPERVISOR - Hypervisor to test (qemu, clh, etc.)
+#   KUBERNETES      - K8s distribution (microk8s, k3s, rke2, etc.)
+
+HELM_RELEASE_NAME="${HELM_RELEASE_NAME:-kata-deploy}"
+HELM_NAMESPACE="${HELM_NAMESPACE:-kube-system}"
+
+# Get the path to the helm chart
+get_chart_path() {
+	local script_dir
+	script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+	echo "${script_dir}/../../../../tools/packaging/kata-deploy/helm-chart/kata-deploy"
+}
+
+# Generate base values YAML that disables all shims except the specified one
+# Arguments:
+#   $1 - Output file path
+#   $2 - (Optional) Additional values file to merge
+generate_base_values() {
+	local output_file="$1"
+	local extra_values_file="${2:-}"
+
+	cat > "${output_file}" <<EOF
+image:
+  reference: ${DOCKER_REGISTRY}/${DOCKER_REPO}
+  tag: ${DOCKER_TAG}
+
+k8sDistribution: "${KUBERNETES}"
+debug: true
+
+# Disable all shims at once, then enable only the one we need
+shims:
+  disableAll: true
+  ${KATA_HYPERVISOR}:
+    enabled: true
+
+defaultShim:
+  amd64: ${KATA_HYPERVISOR}
+  arm64: ${KATA_HYPERVISOR}
+
+runtimeClasses:
+  enabled: true
+  createDefault: true
+EOF
+}
+
+# Deploy kata-deploy using helm
+# Arguments:
+#   $1 - (Optional) Additional values file to merge with base values
+#   $@ - (Optional) Additional helm arguments (after the first positional arg)
+deploy_kata() {
+	local extra_values_file="${1:-}"
+	shift || true
+	local extra_helm_args=("$@")
+	
+	local chart_path
+	local values_yaml
+
+	chart_path="$(get_chart_path)"
+	values_yaml=$(mktemp)
+
+	# Generate base values
+	generate_base_values "${values_yaml}"
+
+	# Add required helm repos for dependencies
+	helm repo add node-feature-discovery https://kubernetes-sigs.github.io/node-feature-discovery/charts 2>/dev/null || true
+	helm repo update
+
+	# Build helm dependencies
+	helm dependency build "${chart_path}"
+
+	# Build helm command
+	local helm_cmd=(
+		helm upgrade --install "${HELM_RELEASE_NAME}" "${chart_path}"
+		-f "${values_yaml}"
+	)
+
+	# Add extra values file if provided
+	if [[ -n "${extra_values_file}" && -f "${extra_values_file}" ]]; then
+		helm_cmd+=(-f "${extra_values_file}")
+	fi
+
+	# Add any extra helm arguments
+	if [[ ${#extra_helm_args[@]} -gt 0 ]]; then
+		helm_cmd+=("${extra_helm_args[@]}")
+	fi
+
+	helm_cmd+=(
+		--namespace "${HELM_NAMESPACE}"
+		--wait --timeout "${HELM_TIMEOUT:-10m}"
+	)
+
+	# Run helm install
+	"${helm_cmd[@]}"
+	local ret=$?
+
+	rm -f "${values_yaml}"
+
+	if [[ ${ret} -ne 0 ]]; then
+		echo "Helm install failed with exit code ${ret}" >&2
+		return ${ret}
+	fi
+
+	# Wait for daemonset to be ready
+	kubectl -n "${HELM_NAMESPACE}" rollout status daemonset/kata-deploy --timeout=300s
+
+	# Give it a moment to configure runtimes
+	sleep 60
+
+	return 0
+}
+
+# Uninstall kata-deploy
+uninstall_kata() {
+	helm uninstall "${HELM_RELEASE_NAME}" -n "${HELM_NAMESPACE}" \
+		--ignore-not-found --wait --cascade foreground --timeout 10m || true
+}
--- a/tests/functional/kata-deploy/run-kata-deploy-tests.sh
+++ b/tests/functional/kata-deploy/run-kata-deploy-tests.sh
@@ -19,6 +19,7 @@ if [[ -n "${KATA_DEPLOY_TEST_UNION:-}" ]]; then
 else
 	KATA_DEPLOY_TEST_UNION=( \
 		"kata-deploy.bats" \
+		"kata-deploy-custom-runtimes.bats" \
 	)
 fi

--- a/tests/gha-run-k8s-common.sh
+++ b/tests/gha-run-k8s-common.sh
@@ -566,11 +566,8 @@ function helm_helper() {
 	[[ -n "${HELM_K8S_DISTRIBUTION}" ]] && yq -i ".k8sDistribution = \"${HELM_K8S_DISTRIBUTION}\"" "${values_yaml}"

 	if [[ "${HELM_DEFAULT_INSTALLATION}" = "false" ]]; then
-		# Disable all shims first (in case we started from an example file with shims enabled)
-		# Then we'll enable only the ones specified in HELM_SHIMS
-		for shim_key in $(yq '.shims | keys | .[]' "${values_yaml}" 2>/dev/null); do
-			yq -i ".shims.${shim_key}.enabled = false" "${values_yaml}"
-		done
+		# Disable all shims at once, then enable only the ones specified in HELM_SHIMS
+		yq -i ".shims.disableAll = true" "${values_yaml}"

 		# Use new structured format
 		if [[ -n "${HELM_DEBUG}" ]]; then
@@ -586,7 +583,7 @@ function helm_helper() {
 			# HELM_SHIMS is a space-separated list of shim names
 			# Enable each shim and set supported architectures
 			# TEE shims that need defaults unset (will be set based on env vars)
-			tee_shims="qemu-se qemu-se-runtime-rs qemu-cca qemu-snp qemu-tdx qemu-coco-dev qemu-coco-dev-runtime-rs qemu-nvidia-gpu-snp qemu-nvidia-gpu-tdx"
+			tee_shims="qemu-se qemu-se-runtime-rs qemu-cca qemu-snp qemu-snp-runtime-rs qemu-tdx qemu-tdx-runtime-rs qemu-coco-dev qemu-coco-dev-runtime-rs qemu-nvidia-gpu-snp qemu-nvidia-gpu-tdx"

 			for shim in ${HELM_SHIMS}; do
 				# Determine supported architectures based on shim name
@@ -604,7 +601,11 @@ function helm_helper() {
 						yq -i ".shims.${shim}.enabled = true" "${values_yaml}"
 						yq -i ".shims.${shim}.supportedArches = [\"amd64\"]" "${values_yaml}"
 						;;
-					qemu-runtime-rs|qemu-coco-dev|qemu-coco-dev-runtime-rs)
+					qemu-runtime-rs)
+						yq -i ".shims.${shim}.enabled = true" "${values_yaml}"
+						yq -i ".shims.${shim}.supportedArches = [\"amd64\", \"arm64\", \"s390x\"]" "${values_yaml}"
+						;;
+					qemu-coco-dev|qemu-coco-dev-runtime-rs)
 						yq -i ".shims.${shim}.enabled = true" "${values_yaml}"
 						yq -i ".shims.${shim}.supportedArches = [\"amd64\", \"s390x\"]" "${values_yaml}"
 						;;
@@ -678,7 +679,7 @@ function helm_helper() {
 		# HELM_ALLOWED_HYPERVISOR_ANNOTATIONS: if not in per-shim format (no colon), convert to per-shim format
 		# Output format: "qemu:foo,bar clh:foo" (space-separated entries, each with shim:annotations where annotations are comma-separated)
 		# Example: "foo bar" with shim "qemu-tdx" -> "qemu-tdx:foo,bar"
-		if [[ "${HELM_ALLOWED_HYPERVISOR_ANNOTATIONS}" != *:* ]]; then
+		if [[ -n "${HELM_ALLOWED_HYPERVISOR_ANNOTATIONS}" && "${HELM_ALLOWED_HYPERVISOR_ANNOTATIONS}" != *:* ]]; then
 			# Simple format: convert to per-shim format for all enabled shims
 			# "default_vcpus" -> "qemu-tdx:default_vcpus" (single shim)
 			# "image kernel default_vcpus" -> "qemu-tdx:image,kernel,default_vcpus" (single shim)
@@ -696,7 +697,7 @@ function helm_helper() {
 		fi

 		# HELM_AGENT_HTTPS_PROXY: if not in per-shim format (no equals), convert to per-shim format
-		if [[ "${HELM_AGENT_HTTPS_PROXY}" != *=* ]]; then
+		if [[ -n "${HELM_AGENT_HTTPS_PROXY}" && "${HELM_AGENT_HTTPS_PROXY}" != *=* ]]; then
 			# Simple format: convert to per-shim format for all enabled shims
 			# "http://proxy:8080" -> "qemu-tdx=http://proxy:8080;qemu-snp=http://proxy:8080"
 			local converted_proxy=""
@@ -710,7 +711,7 @@ function helm_helper() {
 		fi

 		# HELM_AGENT_NO_PROXY: if not in per-shim format (no equals), convert to per-shim format
-		if [[ "${HELM_AGENT_NO_PROXY}" != *=* ]]; then
+		if [[ -n "${HELM_AGENT_NO_PROXY}" && "${HELM_AGENT_NO_PROXY}" != *=* ]]; then
 			# Simple format: convert to per-shim format for all enabled shims
 			# "localhost,127.0.0.1" -> "qemu-tdx=localhost,127.0.0.1;qemu-snp=localhost,127.0.0.1"
 			local converted_noproxy=""
@@ -876,7 +877,7 @@ VERIFICATION_POD_EOF

 	max_tries=3
 	interval=10
-	i=10
+	i=0

 	# Retry loop for helm install to prevent transient failures due to instantly unreachable cluster
 	set +e # Disable immediate exit on failure
@@ -890,15 +891,16 @@ VERIFICATION_POD_EOF
 		fi
 		i=$((i+1))
 		if [[ ${i} -lt ${max_tries} ]]; then
-			echo "Retrying after ${interval} seconds (Attempt ${i} of $((max_tries - 1)))"
+			echo "Retrying after ${interval} seconds (Attempt ${i} of ${max_tries})"
 		else
 			break
 		fi
 		sleep "${interval}"
 	done
 	set -e # Re-enable immediate exit on failure
-	if [[ ${i} -eq ${max_tries} ]]; then
-		die "Failed to deploy kata-deploy after ${max_tries} tries"
+	if [[ ${i} -ge ${max_tries} ]]; then
+		echo "ERROR: Failed to deploy kata-deploy after ${max_tries} tries"
+		return 1
 	fi

 	# `helm install --wait` does not take effect on single replicas and maxUnavailable=1 DaemonSets
--- a/tests/go.mod
+++ b/tests/go.mod
@@ -1,7 +1,7 @@
 module github.com/kata-containers/tests

 // Keep in sync with version in versions.yaml
-go 1.24.11
+go 1.24.12

 // WARNING: Do NOT use `replace` directives as those break dependabot:
 // https://github.com/kata-containers/kata-containers/issues/11020
--- a/tests/integration/kubernetes/confidential_kbs.sh
+++ b/tests/integration/kubernetes/confidential_kbs.sh
@@ -218,15 +218,6 @@ kbs_set_resource_from_file() {
 	kbs-client --url "$(kbs_k8s_svc_http_addr)" config \
 		--auth-private-key "${KBS_PRIVATE_KEY}" set-resource \
 		--path "${path}" --resource-file "${file}"
-
-	kbs_pod=$(kubectl -n "${KBS_NS}" get pods -o NAME)
-	kbs_repo_path="/opt/confidential-containers/kbs/repository"
-	# Waiting for the resource to be created on the kbs pod
-	if ! kubectl -n "${KBS_NS}" exec -it "${kbs_pod}" -- bash -c "for i in {1..30}; do [ -e '${kbs_repo_path}/${path}' ] && exit 0; sleep 0.5; done; exit -1"; then
-		echo "ERROR: resource '${path}' not created in 15s"
-		kubectl -n "${KBS_NS}" exec -it "${kbs_pod}" -- bash -c "find ${kbs_repo_path}"
-		return 1
-	fi
 }

 # Build and install the kbs-client binary, unless it is already present.
--- a/tests/integration/kubernetes/k8s-empty-image.bats
+++ b/tests/integration/kubernetes/k8s-empty-image.bats
@@ -0,0 +1,59 @@
+#!/usr/bin/env bats
+#
+# Copyright (c) 2025 NVIDIA Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+load "${BATS_TEST_DIRNAME}/../../common.bash"
+load "${BATS_TEST_DIRNAME}/lib.sh"
+load "${BATS_TEST_DIRNAME}/tests_common.sh"
+
+setup() {
+	setup_common || die "setup_common failed"
+	pod_name="no-layer-image"
+	get_pod_config_dir
+
+	yaml_file="${pod_config_dir}/${pod_name}.yaml"
+
+	# genpolicy fails for this unusual container image, so use the allow_all policy.
+	add_allow_all_policy_to_yaml "${yaml_file}"
+}
+
+@test "Test image with no layers cannot run" {
+	# Error from run-k8s-tests (ubuntu, qemu, small):
+	#
+	# failed to create containerd task: failed to create shim task: the file sleep was not found
+	#
+	# Error from run-k8s-tests-on-tee (sev-snp, qemu-snp):
+	#
+	# failed to create containerd task: failed to create shim task: rpc status:
+	# Status { code: INTERNAL, message: "[CDH] [ERROR]: Image Pull error: Failed to pull image
+	# ghcr.io/kata-containers/no-layer-image:latest from all mirror/mapping locations or original location: image:
+	# ghcr.io/kata-containers/no-layer-image:latest, error: Internal error", details: [], special_fields:
+	# SpecialFields { unknown_fields: UnknownFields { fields: None }, cached_size: CachedSize { size: 0 } } }
+	#
+	# Error from run-k8s-tests-coco-nontee-with-erofs-snapshotter (qemu-coco-dev, erofs, default):
+	#
+	# failed to create containerd task: failed to create shim task: failed to mount
+	# /run/kata-containers/shared/containers/fadd1af7ea2a7bfc6caf26471f70e9a913a2989fd4a1be9d001b59e48c0781aa/rootfs
+	# to /run/kata-containers/fadd1af7ea2a7bfc6caf26471f70e9a913a2989fd4a1be9d001b59e48c0781aa/rootfs, with error:
+	# ENOENT: No such file or directory
+
+	kubectl create -f "${yaml_file}"
+
+	local -r command="kubectl describe "pod/${pod_name}" | grep -E \
+		'the file sleep was not found|\[CDH\] \[ERROR\]: Image Pull error|ENOENT: No such file or directory'"
+	info "Waiting ${wait_time} seconds for: ${command}"
+	waitForProcess "${wait_time}" "${sleep_time}" "${command}" >/dev/null 2>/dev/null
+}
+
+teardown() {
+	# Debugging information
+	kubectl describe "pod/${pod_name}"
+	kubectl get "pod/${pod_name}" -o yaml
+
+	kubectl delete pod "${pod_name}"
+
+	teardown_common "${node}" "${node_start_time:-}"
+}
--- a/tests/integration/kubernetes/k8s-nvidia-nim.bats
+++ b/tests/integration/kubernetes/k8s-nvidia-nim.bats
@@ -48,12 +48,59 @@ KBS_AUTH_CONFIG_JSON=$(
 )
 export KBS_AUTH_CONFIG_JSON

-# Base64 encoding for use as Kubernetes Secret in pod manifests
+# Base64 encoding for use as Kubernetes Secret in pod manifests (non-TEE)
 NGC_API_KEY_BASE64=$(
    echo -n "${NGC_API_KEY}" | base64 -w0
 )
 export NGC_API_KEY_BASE64

+# Sealed secret format for TEE pods (vault type pointing to KBS resource)
+# Format: sealed.<base64url JWS header>.<base64url payload>.<base64url signature>
+# IMPORTANT: JWS uses base64url encoding WITHOUT padding (no trailing '=')
+# We use tr to convert standard base64 (+/) to base64url (-_) and remove padding (=)
+# For vault type, header and signature can be placeholders since the payload
+# contains the KBS resource path where the actual secret is stored.
+#
+# Vault type sealed secret payload for instruct pod:
+# {
+#   "version": "0.1.0",
+#   "type": "vault",
+#   "name": "kbs:///default/ngc-api-key/instruct",
+#   "provider": "kbs",
+#   "provider_settings": {},
+#   "annotations": {}
+# }
+NGC_API_KEY_SEALED_SECRET_INSTRUCT_PAYLOAD=$(
+    echo -n '{"version":"0.1.0","type":"vault","name":"kbs:///default/ngc-api-key/instruct","provider":"kbs","provider_settings":{},"annotations":{}}' |
+    base64 -w0 | tr '+/' '-_' | tr -d '='
+)
+NGC_API_KEY_SEALED_SECRET_INSTRUCT="sealed.fakejwsheader.${NGC_API_KEY_SEALED_SECRET_INSTRUCT_PAYLOAD}.fakesignature"
+export NGC_API_KEY_SEALED_SECRET_INSTRUCT
+
+# Base64 encode the sealed secret for use in Kubernetes Secret data field
+# (genpolicy only supports the 'data' field which expects base64 values)
+NGC_API_KEY_SEALED_SECRET_INSTRUCT_BASE64=$(echo -n "${NGC_API_KEY_SEALED_SECRET_INSTRUCT}" | base64 -w0)
+export NGC_API_KEY_SEALED_SECRET_INSTRUCT_BASE64
+
+# Vault type sealed secret payload for embedqa pod:
+# {
+#   "version": "0.1.0",
+#   "type": "vault",
+#   "name": "kbs:///default/ngc-api-key/embedqa",
+#   "provider": "kbs",
+#   "provider_settings": {},
+#   "annotations": {}
+# }
+NGC_API_KEY_SEALED_SECRET_EMBEDQA_PAYLOAD=$(
+    echo -n '{"version":"0.1.0","type":"vault","name":"kbs:///default/ngc-api-key/embedqa","provider":"kbs","provider_settings":{},"annotations":{}}' |
+    base64 -w0 | tr '+/' '-_' | tr -d '='
+)
+NGC_API_KEY_SEALED_SECRET_EMBEDQA="sealed.fakejwsheader.${NGC_API_KEY_SEALED_SECRET_EMBEDQA_PAYLOAD}.fakesignature"
+export NGC_API_KEY_SEALED_SECRET_EMBEDQA
+
+NGC_API_KEY_SEALED_SECRET_EMBEDQA_BASE64=$(echo -n "${NGC_API_KEY_SEALED_SECRET_EMBEDQA}" | base64 -w0)
+export NGC_API_KEY_SEALED_SECRET_EMBEDQA_BASE64
+
 setup_langchain_flow() {
    # shellcheck disable=SC1091  # Sourcing virtual environment activation script
    source "${HOME}"/.cicd/venv/bin/activate
@@ -66,18 +113,56 @@ setup_langchain_flow() {
    [[ "$(pip show beautifulsoup4 2>/dev/null | awk '/^Version:/{print $2}')" = "4.13.4" ]] || pip install beautifulsoup4==4.13.4
 }

-setup_kbs_credentials() {
-    # Get KBS address and export it for pod template substitution
-    export CC_KBS_ADDR="$(kbs_k8s_svc_http_addr)"
+# Create initdata TOML file for genpolicy with CDH configuration.
+# This file is used by genpolicy via --initdata-path. Genpolicy will add the
+# generated policy.rego to it and set it as the cc_init_data annotation.
+# We must overwrite the default empty file AFTER create_tmp_policy_settings_dir()
+# copies it to the temp directory.
+create_nim_initdata_file() {
+    local output_file="$1"
+    local cc_kbs_address
+    cc_kbs_address=$(kbs_k8s_svc_http_addr)

-    kbs_set_gpu0_resource_policy
+    cat > "${output_file}" << EOF
+version = "0.1.0"
+algorithm = "sha256"
+
+[data]
+"aa.toml" = '''
+[token_configs]
+[token_configs.kbs]
+url = "${cc_kbs_address}"
+'''
+
+"cdh.toml" = '''
+[kbc]
+name = "cc_kbc"
+url = "${cc_kbs_address}"
+
+[image]
+authenticated_registry_credentials_uri = "kbs:///default/credentials/nvcr"
+'''
+EOF
+}
+
+setup_kbs_credentials() {
+    # Export KBS address for use in pod YAML templates (aa_kbc_params)
+    CC_KBS_ADDR=$(kbs_k8s_svc_http_addr)
+    export CC_KBS_ADDR

    # Set up Kubernetes secret for the containerd metadata pull
    kubectl delete secret ngc-secret-instruct --ignore-not-found
    kubectl create secret docker-registry ngc-secret-instruct --docker-server="nvcr.io" --docker-username="\$oauthtoken" --docker-password="${NGC_API_KEY}"

+    kbs_set_gpu0_resource_policy
+
    # KBS_AUTH_CONFIG_JSON is already base64 encoded
    kbs_set_resource_base64 "default" "credentials" "nvcr" "${KBS_AUTH_CONFIG_JSON}"
+
+    # Store the actual NGC_API_KEY in KBS for sealed secret unsealing.
+    # The sealed secrets in the pod YAML point to these KBS resource paths.
+    kbs_set_resource "default" "ngc-api-key" "instruct" "${NGC_API_KEY}"
+    kbs_set_resource "default" "ngc-api-key" "embedqa" "${NGC_API_KEY}"
 }

 create_inference_pod() {
@@ -122,10 +207,6 @@ setup_file() {
    export POD_EMBEDQA_YAML_IN="${pod_config_dir}/${POD_NAME_EMBEDQA}.yaml.in"
    export POD_EMBEDQA_YAML="${pod_config_dir}/${POD_NAME_EMBEDQA}.yaml"

-    if [ "${TEE}" = "true" ]; then
-        setup_kbs_credentials
-    fi
-
    dpkg -s jq >/dev/null 2>&1 || sudo apt -y install jq

    export PYENV_ROOT="${HOME}/.pyenv"
@@ -140,6 +221,14 @@ setup_file() {
    policy_settings_dir="$(create_tmp_policy_settings_dir "${pod_config_dir}")"
    add_requests_to_policy_settings "${policy_settings_dir}" "ReadStreamRequest"

+    if [ "${TEE}" = "true" ]; then
+        setup_kbs_credentials
+        # Overwrite the empty default-initdata.toml with our CDH configuration.
+        # This must happen AFTER create_tmp_policy_settings_dir() copies the empty
+        # file and BEFORE auto_generate_policy() runs.
+        create_nim_initdata_file "${policy_settings_dir}/default-initdata.toml"
+    fi
+
    create_inference_pod

    if [ "${SKIP_MULTI_GPU_TESTS}" != "true" ]; then
--- a/tests/integration/kubernetes/k8s-policy-pod.bats
+++ b/tests/integration/kubernetes/k8s-policy-pod.bats
@@ -282,7 +282,7 @@ teardown() {

 	# Debugging information. Don't print the "Message:" line because it contains a truncated policy log.
 	kubectl describe pod "${pod_name}" | grep -v "Message:"
-	teardown_common "${node}" "${node_start_time:-}"
+
 	# Clean-up
 	kubectl delete pod "${pod_name}"
 	kubectl delete configmap "${configmap_name}"
@@ -291,4 +291,6 @@ teardown() {
 	rm -f "${incorrect_configmap_yaml}"
 	rm -f "${testcase_pre_generate_pod_yaml}"
 	rm -f "${testcase_pre_generate_configmap_yaml}"
+
+	teardown_common "${node}" "${node_start_time:-}"
 }
--- a/tests/integration/kubernetes/k8s-policy-pvc.bats
+++ b/tests/integration/kubernetes/k8s-policy-pvc.bats
@@ -62,9 +62,11 @@ teardown() {

 	# Debugging information. Don't print the "Message:" line because it contains a truncated policy log.
 	kubectl describe pod "${pod_name}" | grep -v "Message:"
-	teardown_common "${node}" "${node_start_time:-}"
+
 	# Clean-up
 	kubectl delete -f "${correct_pod_yaml}"
 	kubectl delete -f "${pvc_yaml}"
 	rm -f "${incorrect_pod_yaml}"
+
+	teardown_common "${node}" "${node_start_time:-}"
 }
--- a/tests/integration/kubernetes/run_kubernetes_tests.sh
+++ b/tests/integration/kubernetes/run_kubernetes_tests.sh
@@ -42,6 +42,7 @@ else
 	)

 	K8S_TEST_SMALL_HOST_UNION=( \
+		"k8s-empty-image.bats" \
 		"k8s-guest-pull-image.bats" \
 		"k8s-confidential.bats" \
 		"k8s-sealed-secret.bats" \
--- a/tests/integration/kubernetes/runtimeclass_workloads/busybox-pod.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/busybox-pod.yaml
@@ -8,7 +8,6 @@ kind: Pod
 metadata:
  name: busybox
 spec:
-  terminationGracePeriodSeconds: 0
  shareProcessNamespace: true
  runtimeClassName: kata
  containers:
--- a/tests/integration/kubernetes/runtimeclass_workloads/busybox-template.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/busybox-template.yaml
@@ -8,7 +8,6 @@ kind: Pod
 metadata:
  name: POD_NAME
 spec:
-  terminationGracePeriodSeconds: 0
  runtimeClassName: kata
  shareProcessNamespace: true
  containers:
--- a/tests/integration/kubernetes/runtimeclass_workloads/initContainer-shared-volume.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/initContainer-shared-volume.yaml
@@ -8,7 +8,6 @@ kind: Pod
 metadata:
  name: initcontainer-shared-volume
 spec:
-  terminationGracePeriodSeconds: 0
  runtimeClassName: kata
  initContainers:
  - name: first
--- a/tests/integration/kubernetes/runtimeclass_workloads/initcontainer-shareprocesspid.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/initcontainer-shareprocesspid.yaml
@@ -8,7 +8,6 @@ kind: Pod
 metadata:
  name: busybox
 spec:
-  terminationGracePeriodSeconds: 0
  shareProcessNamespace: true
  runtimeClassName: kata
  initContainers:
--- a/tests/integration/kubernetes/runtimeclass_workloads/job-template.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/job-template.yaml
@@ -16,7 +16,6 @@ spec:
      labels:
        jobgroup: jobtest
    spec:
-      terminationGracePeriodSeconds: 0
      runtimeClassName: kata
      containers:
      - name: test
--- a/tests/integration/kubernetes/runtimeclass_workloads/job.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/job.yaml
@@ -10,7 +10,6 @@ metadata:
 spec:
  template:
    spec:
-      terminationGracePeriodSeconds: 0
      runtimeClassName: kata
      containers:
      - name: pi
--- a/tests/integration/kubernetes/runtimeclass_workloads/k8s-layered-sc-deployment.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-layered-sc-deployment.yaml
@@ -23,7 +23,6 @@ spec:
        role: master
        tier: backend
    spec:
-      terminationGracePeriodSeconds: 0
      runtimeClassName: kata
      securityContext:
        runAsUser: 2000
--- a/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-deployment.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-deployment.yaml
@@ -23,7 +23,6 @@ spec:
        role: master
        tier: backend
    spec:
-      terminationGracePeriodSeconds: 0
      runtimeClassName: kata
      securityContext:
        runAsUser: 2000
--- a/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-nobodyupdate-deployment.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-nobodyupdate-deployment.yaml
@@ -23,7 +23,6 @@ spec:
        role: master
        tier: backend
    spec:
-      terminationGracePeriodSeconds: 0
      runtimeClassName: kata
      securityContext:
        runAsUser: 65534
--- a/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-supplementalgroups-deployment.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-supplementalgroups-deployment.yaml
@@ -23,7 +23,6 @@ spec:
        role: master
        tier: backend
    spec:
-      terminationGracePeriodSeconds: 0
      runtimeClassName: kata
      securityContext:
        runAsUser: 2000
--- a/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-deployment.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-deployment.yaml
@@ -23,7 +23,6 @@ spec:
        role: master
        tier: backend
    spec:
-      terminationGracePeriodSeconds: 0
      runtimeClassName: kata
      securityContext:
        runAsUser: 1000
--- a/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-hard-coded.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-hard-coded.yaml
@@ -8,7 +8,6 @@ kind: Pod
 metadata:
  name: hard-coded-policy-pod
 spec:
-  terminationGracePeriodSeconds: 0
  shareProcessNamespace: true
  runtimeClassName: kata
  containers:
--- a/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-job.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-job.yaml
@@ -10,7 +10,6 @@ metadata:
 spec:
  template:
    spec:
-      terminationGracePeriodSeconds: 0
      runtimeClassName: kata
      containers:
        - name: hello
--- a/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-pod-pvc.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-pod-pvc.yaml
@@ -8,7 +8,6 @@ kind: Pod
 metadata:
  name: policy-pod-pvc
 spec:
-  terminationGracePeriodSeconds: 0
  runtimeClassName: kata
  containers:
    - name: busybox
--- a/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-pod.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-pod.yaml
@@ -9,7 +9,6 @@ metadata:
  name: policy-pod
  uid: policy-pod-uid
 spec:
-  terminationGracePeriodSeconds: 0
  runtimeClassName: kata
  containers:
    - name: prometheus
--- a/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-rc.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-rc.yaml
@@ -17,7 +17,6 @@ spec:
      labels:
        app: policy-nginx-rc
    spec:
-      terminationGracePeriodSeconds: 0
      runtimeClassName: kata
      containers:
      - name: nginxtest
--- a/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-set-keys.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-policy-set-keys.yaml
@@ -8,7 +8,6 @@ kind: Pod
 metadata:
  name: set-keys-test
 spec:
-  terminationGracePeriodSeconds: 0
  shareProcessNamespace: true
  runtimeClassName: kata
  containers:
--- a/tests/integration/kubernetes/runtimeclass_workloads/lifecycle-events.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/lifecycle-events.yaml
@@ -9,7 +9,6 @@ kind: Pod
 metadata:
  name: handlers
 spec:
-  terminationGracePeriodSeconds: 0
  runtimeClassName: kata
  containers:
  - name: handlers-container
--- a/tests/integration/kubernetes/runtimeclass_workloads/nginx-deployment.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/nginx-deployment.yaml
@@ -17,7 +17,6 @@ spec:
      labels:
        app: nginx
    spec:
-      terminationGracePeriodSeconds: 0
      runtimeClassName: kata
      containers:
      - name: nginx
--- a/tests/integration/kubernetes/runtimeclass_workloads/no-layer-image.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/no-layer-image.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: no-layer-image
+spec:
+  runtimeClassName: kata
+  containers:
+  - name: no-layer-image
+    image: ghcr.io/kata-containers/no-layer-image:latest
+    resources: {}
+    command:
+    - sleep
+    - infinity
--- a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct-tee.yaml.in
+++ b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct-tee.yaml.in
@@ -10,7 +10,11 @@ metadata:
  labels:
    app: ${POD_NAME_INSTRUCT}
  annotations:
-    io.katacontainers.config.hypervisor.kernel_params: "agent.image_registry_auth=kbs:///default/credentials/nvcr agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"
+    # Start CDH process and configure AA for KBS communication
+    # aa_kbc_params tells the Attestation Agent where KBS is located
+    io.katacontainers.config.hypervisor.kernel_params: "agent.guest_components_procs=confidential-data-hub agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"
+    # cc_init_data annotation will be added by genpolicy with CDH configuration
+    # from the custom default-initdata.toml created by create_nim_initdata_file()
 spec:
  restartPolicy: Never
  runtimeClassName: kata
@@ -58,7 +62,7 @@ spec:
      - name: NGC_API_KEY
        valueFrom:
          secretKeyRef:
-            name: ngc-api-key-instruct
+            name: ngc-api-key-sealed-instruct
            key: api-key
    # GPU resource limit (for NVIDIA GPU)
    resources:
@@ -78,7 +82,9 @@ data:
 apiVersion: v1
 kind: Secret
 metadata:
-  name: ngc-api-key-instruct
+  name: ngc-api-key-sealed-instruct
 type: Opaque
 data:
-  api-key: "${NGC_API_KEY_BASE64}"
+  # Sealed secret pointing to kbs:///default/ngc-api-key/instruct
+  # CDH will unseal this by fetching the actual key from KBS
+  api-key: "${NGC_API_KEY_SEALED_SECRET_INSTRUCT_BASE64}"
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .25.0
 .26.0