release: Bump version to 3.24.0

Bump VERSION and helm-chart versions Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
tests: nvidia: Always rely on the "kata" default runtime class
2026-02-28 09:42:21 +00:00 · 2025-12-12 18:15:41 +01:00 · 2025-12-12 16:31:42 +01:00 · 2025-12-12 16:31:42 +01:00 · 2025-12-12 16:31:42 +01:00 · 2025-12-12 16:31:42 +01:00
372 changed files with 18984 additions and 8243 deletions
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@@ -10,11 +10,6 @@ self-hosted-runner:
    - amd64-nvidia-a100
    - amd64-nvidia-h100-snp
    - arm64-k8s
-    - containerd-v1.7-overlayfs
-    - containerd-v2.0-overlayfs
-    - containerd-v2.1-overlayfs
-    - containerd-v2.2
-    - containerd-v2.2-overlayfs
    - garm-ubuntu-2004
    - garm-ubuntu-2004-smaller
    - garm-ubuntu-2204
@@ -25,6 +20,7 @@ self-hosted-runner:
    - ppc64le-k8s
    - ppc64le-small
    - ubuntu-24.04-ppc64le
+    - ubuntu-24.04-s390x
    - metrics
    - riscv-builder
    - sev-snp
--- a/.github/workflows/basic-ci-amd64.yaml
+++ b/.github/workflows/basic-ci-amd64.yaml
@@ -71,7 +71,7 @@ jobs:
      fail-fast: false
      matrix:
        containerd_version: ['lts', 'active']
-        vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu']
+        vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu', 'qemu-runtime-rs']
    runs-on: ubuntu-22.04
    env:
      CONTAINERD_VERSION: ${{ matrix.containerd_version }}
@@ -117,7 +117,7 @@ jobs:
      fail-fast: false
      matrix:
        containerd_version: ['lts', 'active']
-        vmm: ['clh', 'qemu', 'dragonball']
+        vmm: ['clh', 'qemu', 'dragonball', 'qemu-runtime-rs']
    runs-on: ubuntu-22.04
    env:
      CONTAINERD_VERSION: ${{ matrix.containerd_version }}
@@ -292,6 +292,7 @@ jobs:
          - dragonball
          - qemu
          - cloud-hypervisor
+          - qemu-runtime-rs
    runs-on: ubuntu-22.04
    env:
      KATA_HYPERVISOR: ${{ matrix.vmm }}
--- a/.github/workflows/build-checks.yaml
+++ b/.github/workflows/build-checks.yaml
@@ -12,7 +12,12 @@ name: Build checks
 jobs:
  check:
    name: check
-    runs-on: ${{ matrix.runner || inputs.instance }}
+    runs-on: >-
+      ${{
+        ( contains(inputs.instance, 's390x') && matrix.component.name == 'runtime' ) && 's390x' ||
+        ( contains(inputs.instance, 'ppc64le') && (matrix.component.name == 'runtime' || matrix.component.name == 'agent') ) && 'ppc64le' ||
+        inputs.instance
+      }}
    strategy:
      fail-fast: false
      matrix:
@@ -70,36 +75,6 @@ jobs:
              - protobuf-compiler
        instance:
          - ${{ inputs.instance }} 
-        include:
-          - component:
-              name: runtime
-              path: src/runtime
-              needs:
-                - golang
-                - XDG_RUNTIME_DIR
-            instance: ubuntu-24.04-s390x
-            runner: s390x
-          - component:
-              name: runtime
-              path: src/runtime
-              needs:
-                - golang
-                - XDG_RUNTIME_DIR
-            instance: ubuntu-24.04-ppc64le
-            runner: ppc64le
-          - component:
-              name: agent
-              path: src/agent
-              needs:
-                - rust
-                - libdevmapper
-                - libseccomp
-                - protobuf-compiler
-                - clang
-            instance: ubuntu-24.04-ppc64le
-            runner: ppc64le
-
-             

    steps:
      - name: Adjust a permission for repo
--- a/.github/workflows/build-kata-static-tarball-amd64.yaml
+++ b/.github/workflows/build-kata-static-tarball-amd64.yaml
@@ -121,7 +121,7 @@ jobs:
          echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
          echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"

-      - uses: oras-project/setup-oras@5c0b487ce3fe0ce3ab0d034e63669e426e294e4d # v1.2.2
+      - uses: oras-project/setup-oras@22ce207df3b08e061f537244349aac6ae1d214f6 # v1.2.4
        if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
        with:
          version: "1.2.0"
@@ -171,6 +171,8 @@ jobs:
          - rootfs-image
          - rootfs-image-confidential
          - rootfs-image-mariner
+          - rootfs-image-nvidia-gpu
+          - rootfs-image-nvidia-gpu-confidential
          - rootfs-initrd
          - rootfs-initrd-confidential
          - rootfs-initrd-nvidia-gpu
--- a/.github/workflows/build-kata-static-tarball-arm64.yaml
+++ b/.github/workflows/build-kata-static-tarball-arm64.yaml
@@ -102,7 +102,7 @@ jobs:
          echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
          echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"

-      - uses: oras-project/setup-oras@5c0b487ce3fe0ce3ab0d034e63669e426e294e4d # v1.2.2
+      - uses: oras-project/setup-oras@22ce207df3b08e061f537244349aac6ae1d214f6 # v1.2.4
        if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
        with:
          version: "1.2.0"
@@ -150,6 +150,7 @@ jobs:
      matrix:
        asset:
          - rootfs-image
+          - rootfs-image-nvidia-gpu
          - rootfs-initrd
          - rootfs-initrd-nvidia-gpu
    steps:
--- a/.github/workflows/build-kata-static-tarball-ppc64le.yaml
+++ b/.github/workflows/build-kata-static-tarball-ppc64le.yaml
@@ -32,7 +32,7 @@ jobs:
    permissions:
      contents: read
      packages: write
-    runs-on: ppc64le-small
+    runs-on: ubuntu-24.04-ppc64le
    strategy:
      matrix:
        asset:
@@ -89,7 +89,7 @@ jobs:

  build-asset-rootfs:
    name: build-asset-rootfs
-    runs-on: ppc64le-small
+    runs-on: ubuntu-24.04-ppc64le
    needs: build-asset
    permissions:
      contents: read
@@ -170,7 +170,7 @@ jobs:

  build-asset-shim-v2:
    name: build-asset-shim-v2
-    runs-on: ppc64le-small
+    runs-on: ubuntu-24.04-ppc64le
    needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
    permissions:
      contents: read
@@ -230,7 +230,7 @@ jobs:

  create-kata-tarball:
    name: create-kata-tarball
-    runs-on: ppc64le-small
+    runs-on: ubuntu-24.04-ppc64le
    needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
    permissions:
      contents: read
--- a/.github/workflows/build-kata-static-tarball-s390x.yaml
+++ b/.github/workflows/build-kata-static-tarball-s390x.yaml
@@ -32,7 +32,7 @@ permissions: {}
 jobs:
  build-asset:
    name: build-asset
-    runs-on: s390x
+    runs-on: ubuntu-24.04-s390x
    permissions:
      contents: read
      packages: write
@@ -257,7 +257,7 @@ jobs:

  build-asset-shim-v2:
    name: build-asset-shim-v2
-    runs-on: s390x
+    runs-on: ubuntu-24.04-s390x
    needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
    permissions:
      contents: read
@@ -319,7 +319,7 @@ jobs:

  create-kata-tarball:
    name: create-kata-tarball
-    runs-on: s390x
+    runs-on: ubuntu-24.04-s390x
    needs:
      - build-asset
      - build-asset-rootfs
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -147,7 +147,7 @@ jobs:
      tag: ${{ inputs.tag }}-s390x
      commit-hash: ${{ inputs.commit-hash }}
      target-branch: ${{ inputs.target-branch }}
-      runner: s390x
+      runner: ubuntu-24.04-s390x
      arch: s390x
    secrets:
      QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -165,7 +165,7 @@ jobs:
      tag: ${{ inputs.tag }}-ppc64le
      commit-hash: ${{ inputs.commit-hash }}
      target-branch: ${{ inputs.target-branch }}
-      runner: ppc64le-small
+      runner: ubuntu-24.04-ppc64le
      arch: ppc64le
    secrets:
      QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -314,6 +314,7 @@ jobs:
    needs: publish-kata-deploy-payload-amd64
    uses: ./.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
    with:
+      tarball-suffix: -${{ inputs.tag }}
      registry: ghcr.io
      repo: ${{ github.repository_owner }}/kata-deploy-ci
      tag: ${{ inputs.tag }}-amd64
@@ -473,7 +474,7 @@ jobs:
      vmm: ${{ matrix.params.vmm }}

  run-cri-containerd-tests-arm64:
-    if: ${{ inputs.skip-test != 'yes' }}
+    if: false
    needs: build-kata-static-tarball-arm64
    strategy:
      fail-fast: false
--- a/.github/workflows/gatekeeper.yaml
+++ b/.github/workflows/gatekeeper.yaml
@@ -10,7 +10,9 @@ on:
      - opened
      - synchronize
      - reopened
+      - edited
      - labeled
+      - unlabeled

 permissions: {}

--- a/.github/workflows/release-ppc64le.yaml
+++ b/.github/workflows/release-ppc64le.yaml
@@ -31,7 +31,7 @@ jobs:
    permissions:
      contents: read
      packages: write
-    runs-on: ppc64le-small
+    runs-on: ubuntu-24.04-ppc64le
    steps:
      - name: Login to Kata Containers ghcr.io
        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
--- a/.github/workflows/release-s390x.yaml
+++ b/.github/workflows/release-s390x.yaml
@@ -35,7 +35,7 @@ jobs:
    permissions:
      contents: read
      packages: write
-    runs-on: s390x
+    runs-on: ubuntu-24.04-s390x
    steps:
      - name: Login to Kata Containers ghcr.io
        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
--- a/.github/workflows/run-containerd-guest-pull-stability-tests.yaml
+++ b/.github/workflows/run-containerd-guest-pull-stability-tests.yaml
@@ -1,167 +0,0 @@
-name: CI | Run containerd guest pull stability tests
-on:
-  schedule:
-    - cron: "0 */1 * * *" #run every hour
-
-permissions: {}
-
-# This job relies on k8s pre-installed using kubeadm
-jobs:
-  run-containerd-guest-pull-stability-tests:
-    name: run-containerd-guest-pull-stability-tests-${{ matrix.environment.test-type }}-${{ matrix.environment.containerd }}
-    strategy:
-      fail-fast: false
-      matrix:
-        environment: [
-          { test-type: multi-snapshotter, containerd: v2.2 },
-          { test-type: force-guest-pull, containerd: v1.7 },
-          { test-type: force-guest-pull, containerd: v2.0 },
-          { test-type: force-guest-pull, containerd: v2.1 },
-          { test-type: force-guest-pull, containerd: v2.2 },
-        ]
-    env:
-      # I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here.
-      IMAGES_LIST: quay.io/mongodb/mongodb-community-server@sha256:8b73733842da21b6bbb6df4d7b2449229bb3135d2ec8c6880314d88205772a11 ghcr.io/edgelesssys/redis@sha256:ecb0a964c259a166a1eb62f0eb19621d42bd1cce0bc9bb0c71c828911d4ba93d
-    runs-on: containerd-${{ matrix.environment.test-type }}-${{ matrix.environment.containerd }}
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          persist-credentials: false
-
-      - name: Rotate the journal
-        run: sudo journalctl --rotate --vacuum-time 1s
-
-      - name: Pull the kata-deploy image to be used
-        run: sudo ctr -n k8s.io image pull quay.io/kata-containers/kata-deploy-ci:kata-containers-latest
-
-      - name: Deploy Kata Containers
-        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
-        env:
-          KATA_HYPERVISOR: qemu-coco-dev
-          KUBERNETES: vanilla
-          SNAPSHOTTER: ${{ matrix.environment.test-type == 'multi-snapshotter' && 'nydus' || '' }}
-          USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: ${{ matrix.environment.test-type == 'multi-snapshotter' }}
-          EXPERIMENTAL_FORCE_GUEST_PULL: ${{ matrix.environment.test-type == 'force-guest-pull' && 'qemu-coco-dev' || '' }}
-
-      # This is needed as we may hit the createContainerTimeout
-      - name: Adjust Kata Containers' create_container_timeout
-        run: |
-          sudo sed -i -e 's/^\(create_container_timeout\).*=.*$/\1 = 600/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
-          grep "create_container_timeout.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
-
-      # This is needed in order to have enough tmpfs space inside the guest to pull the image
-      - name: Adjust Kata Containers' default_memory
-        run: |
-          sudo sed -i -e 's/^\(default_memory\).*=.*$/\1 = 4096/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
-          grep "default_memory.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
-
-      - name: Run a few containers using overlayfs
-        run: |
-          # I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
-          # shellcheck disable=SC2086
-          for img in ${IMAGES_LIST}; do
-            echo "overlayfs | Using on image: ${img}"
-            pod="$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
-            kubectl run "${pod}" \
-              -it --rm \
-              --restart=Never \
-              --image="${img}" \
-              --image-pull-policy=Always \
-              --pod-running-timeout=10m \
-              -- uname -r
-          done
-          
-      - name: Run a the same few containers using a different snapshotter
-        run: |
-          # I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
-          # shellcheck disable=SC2086
-          for img in ${IMAGES_LIST}; do
-            echo "nydus | Using on image: ${img}"
-            pod="kata-$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
-            kubectl run "${pod}" \
-              -it --rm \
-              --restart=Never \
-              --image="${img}" \
-              --image-pull-policy=Always \
-              --pod-running-timeout=10m \
-              --overrides='{
-                "spec": {
-                  "runtimeClassName": "kata-qemu-coco-dev"
-                }
-              }' \
-              -- uname -r
-          done
-
-      - name: Uninstall Kata Containers
-        run: bash tests/integration/kubernetes/gha-run.sh cleanup
-        env:
-          KATA_HYPERVISOR: qemu-coco-dev
-          KUBERNETES: vanilla
-          SNAPSHOTTER: nydus
-          USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: true
-
-      - name: Run a few containers using overlayfs
-        run: |
-          # I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
-          # shellcheck disable=SC2086
-          for img in ${IMAGES_LIST}; do
-            echo "overlayfs | Using on image: ${img}"
-            pod="$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
-            kubectl run "${pod}" \
-              -it --rm \
-              --restart=Never \
-              --image=${img} \
-              --image-pull-policy=Always \
-              --pod-running-timeout=10m \
-              -- uname -r
-          done
-          
-      - name: Deploy Kata Containers
-        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
-        env:
-          KATA_HYPERVISOR: qemu-coco-dev
-          KUBERNETES: vanilla
-          SNAPSHOTTER: nydus
-          USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: true
-
-      # This is needed as we may hit the createContainerTimeout
-      - name: Adjust Kata Containers' create_container_timeout
-        run: |
-          sudo sed -i -e 's/^\(create_container_timeout\).*=.*$/\1 = 600/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
-          grep "create_container_timeout.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
-
-      # This is needed in order to have enough tmpfs space inside the guest to pull the image
-      - name: Adjust Kata Containers' default_memory
-        run: |
-          sudo sed -i -e 's/^\(default_memory\).*=.*$/\1 = 4096/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
-          grep "default_memory.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
-
-      - name: Run a the same few containers using a different snapshotter
-        run: |
-          # I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
-          # shellcheck disable=SC2086
-          for img in ${IMAGES_LIST}; do
-            echo "nydus | Using on image: ${img}"
-            pod="kata-$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
-            kubectl run "${pod}" \
-              -it --rm \
-              --restart=Never \
-              --image="${img}" \
-              --image-pull-policy=Always \
-              --pod-running-timeout=10m \
-              --overrides='{
-                "spec": {
-                  "runtimeClassName": "kata-qemu-coco-dev"
-                }
-              }' \
-              -- uname -r
-          done
-
-      - name: Uninstall Kata Containers
-        run: bash tests/integration/kubernetes/gha-run.sh cleanup || true
-        if: always()
-        env:
-          KATA_HYPERVISOR: qemu-coco-dev
-          KUBERNETES: vanilla
-          SNAPSHOTTER: nydus
-          USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: true
--- a/.github/workflows/run-k8s-tests-on-aks.yaml
+++ b/.github/workflows/run-k8s-tests-on-aks.yaml
@@ -142,6 +142,10 @@ jobs:
        timeout-minutes: 60
        run: bash tests/integration/kubernetes/gha-run.sh run-tests

+      - name: Report tests
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh report-tests
+
      - name: Refresh OIDC token in case access token expired
        if: always()
        uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
--- a/.github/workflows/run-k8s-tests-on-arm64.yaml
+++ b/.github/workflows/run-k8s-tests-on-arm64.yaml
@@ -68,6 +68,10 @@ jobs:
        timeout-minutes: 30
        run: bash tests/integration/kubernetes/gha-run.sh run-tests

+      - name: Report tests
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh report-tests
+
      - name: Collect artifacts ${{ matrix.vmm }}
        if: always()
        run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
--- a/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
+++ b/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
@@ -1,7 +1,10 @@
-name: CI | Run NVIDIA GPU kubernetes tests on arm64
+name: CI | Run NVIDIA GPU kubernetes tests on amd64
 on:
  workflow_call:
    inputs:
+      tarball-suffix:
+        required: true
+        type: string
      registry:
        required: true
        type: string
@@ -45,6 +48,7 @@ jobs:
      GH_PR_NUMBER: ${{ inputs.pr-number }}
      KATA_HYPERVISOR: ${{ matrix.environment.vmm }}
      KUBERNETES: kubeadm
+      KBS: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }}
      K8S_TEST_HOST_TYPE: baremetal
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -59,6 +63,15 @@ jobs:
        env:
          TARGET_BRANCH: ${{ inputs.target-branch }}

+      - name: get-kata-tarball
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
+          path: kata-artifacts
+
+      - name: Install kata
+        run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
+
      - name: Uninstall previous `kbs-client`
        if: matrix.environment.name != 'nvidia-gpu'
        timeout-minutes: 10
@@ -89,6 +102,11 @@ jobs:
        run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests
        env:
          NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
+
+      - name: Report tests
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh report-tests
+
      - name: Collect artifacts ${{ matrix.environment.vmm }}
        if: always()
        run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
--- a/.github/workflows/run-k8s-tests-on-ppc64le.yaml
+++ b/.github/workflows/run-k8s-tests-on-ppc64le.yaml
@@ -75,3 +75,7 @@ jobs:
      - name: Run tests
        timeout-minutes: 30
        run: bash tests/integration/kubernetes/gha-run.sh run-tests
+
+      - name: Report tests
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh report-tests
--- a/.github/workflows/run-k8s-tests-on-zvsi.yaml
+++ b/.github/workflows/run-k8s-tests-on-zvsi.yaml
@@ -131,6 +131,10 @@ jobs:
        timeout-minutes: 60
        run: bash tests/integration/kubernetes/gha-run.sh run-tests

+      - name: Report tests
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh report-tests
+
      - name: Delete kata-deploy
        if: always()
        run: bash tests/integration/kubernetes/gha-run.sh cleanup-zvsi
--- a/.github/workflows/run-kata-coco-stability-tests.yaml
+++ b/.github/workflows/run-kata-coco-stability-tests.yaml
@@ -46,6 +46,7 @@ jobs:
      matrix:
        vmm:
          - qemu-coco-dev
+          - qemu-coco-dev-runtime-rs
        snapshotter:
          - nydus
        pull-type:
@@ -139,6 +140,10 @@ jobs:
        timeout-minutes: 300
        run: bash tests/stability/gha-stability-run.sh run-tests

+      - name: Report tests
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh report-tests
+
      - name: Refresh OIDC token in case access token expired
        if: always()
        uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
--- a/.github/workflows/run-kata-coco-tests.yaml
+++ b/.github/workflows/run-kata-coco-tests.yaml
@@ -159,6 +159,7 @@ jobs:
      AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
      AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
      SNAPSHOTTER: ${{ matrix.snapshotter }}
+      EXPERIMENTAL_FORCE_GUEST_PULL: ${{ matrix.pull-type == 'experimental-force-guest-pull' && matrix.vmm || '' }}
      # Caution: current ingress controller used to expose the KBS service
      # requires much vCPUs, lefting only a few for the tests. Depending on the
      # host type chose it will result on the creation of a cluster with
@@ -217,7 +218,6 @@ jobs:
        timeout-minutes: 20
        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-aks
        env:
-          EXPERIMENTAL_FORCE_GUEST_PULL: ${{ env.PULL_TYPE == 'experimental-force-guest-pull' && env.KATA_HYPERVISOR || '' }}
          USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: ${{ env.SNAPSHOTTER == 'nydus' }}
          AUTO_GENERATE_POLICY: ${{ env.PULL_TYPE == 'experimental-force-guest-pull' && 'no' || 'yes' }}

--- a/.github/workflows/run-kata-deploy-tests-on-aks.yaml
+++ b/.github/workflows/run-kata-deploy-tests-on-aks.yaml
@@ -102,6 +102,10 @@ jobs:
      - name: Run tests
        run: bash tests/functional/kata-deploy/gha-run.sh run-tests

+      - name: Report tests
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh report-tests
+
      - name: Refresh OIDC token in case access token expired
        if: always()
        uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
--- a/.github/workflows/run-kata-deploy-tests.yaml
+++ b/.github/workflows/run-kata-deploy-tests.yaml
@@ -85,3 +85,7 @@ jobs:

      - name: Run tests
        run: bash tests/functional/kata-deploy/gha-run.sh run-tests
+
+      - name: Report tests
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh report-tests
--- a/.github/workflows/static-checks-self-hosted.yaml
+++ b/.github/workflows/static-checks-self-hosted.yaml
@@ -29,7 +29,7 @@ jobs:
      matrix:
        instance:
          - "ubuntu-24.04-arm"
-          - "s390x"
+          - "ubuntu-24.04-s390x"
          - "ubuntu-24.04-ppc64le"
    uses: ./.github/workflows/build-checks.yaml
    with:
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ src/tools/log-parser/kata-log-parser
 tools/packaging/static-build/agent/install_libseccomp.sh
 .envrc
 .direnv
+**/.DS_Store
--- a/src/dragonball/Cargo.lock
+++ b/src/dragonball/Cargo.lock
@@ -4,18 +4,18 @@ version = 4

 [[package]]
 name = "addr2line"
-version = "0.21.0"
+version = "0.25.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
+checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b"
 dependencies = [
 "gimli",
 ]

 [[package]]
-name = "adler"
-version = "1.0.2"
+name = "adler2"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"

 [[package]]
 name = "android-tzdata"
@@ -64,17 +64,17 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

 [[package]]
 name = "backtrace"
-version = "0.3.69"
+version = "0.3.76"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
+checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
 dependencies = [
 "addr2line",
- "cc",
 "cfg-if",
 "libc",
 "miniz_oxide",
 "object",
 "rustc-demangle",
+ "windows-link",
 ]

 [[package]]
@@ -638,9 +638,9 @@ dependencies = [

 [[package]]
 name = "flate2"
-version = "1.0.27"
+version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010"
+checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
 dependencies = [
 "crc32fast",
 "libz-sys",
@@ -780,9 +780,9 @@ dependencies = [

 [[package]]
 name = "gimli"
-version = "0.28.0"
+version = "0.32.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
+checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"

 [[package]]
 name = "h2"
@@ -1250,11 +1250,12 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"

 [[package]]
 name = "miniz_oxide"
-version = "0.7.1"
+version = "0.8.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
 dependencies = [
- "adler",
+ "adler2",
+ "simd-adler32",
 ]

 [[package]]
@@ -1452,9 +1453,9 @@ dependencies = [

 [[package]]
 name = "object"
-version = "0.32.1"
+version = "0.37.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
+checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe"
 dependencies = [
 "memchr",
 ]
@@ -1756,9 +1757,9 @@ dependencies = [

 [[package]]
 name = "rustc-demangle"
-version = "0.1.23"
+version = "0.1.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
+checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"

 [[package]]
 name = "rustix"
@@ -1926,6 +1927,12 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"

+[[package]]
+name = "simd-adler32"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
+
 [[package]]
 name = "slab"
 version = "0.4.11"
@@ -2553,6 +2560,12 @@ dependencies = [
 "windows-targets 0.48.5",
 ]

+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
 [[package]]
 name = "windows-sys"
 version = "0.48.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,72 @@
+[workspace.package]
+authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
+edition = "2018"
+license = "Apache-2.0"
+rust-version = "1.85.1"
+
+[workspace]
+members = [
+    # Dragonball
+    "src/dragonball",
+    "src/dragonball/dbs_acpi",
+    "src/dragonball/dbs_address_space",
+    "src/dragonball/dbs_allocator",
+    "src/dragonball/dbs_arch",
+    "src/dragonball/dbs_boot",
+    "src/dragonball/dbs_device",
+    "src/dragonball/dbs_interrupt",
+    "src/dragonball/dbs_legacy_devices",
+    "src/dragonball/dbs_pci",
+    "src/dragonball/dbs_tdx",
+    "src/dragonball/dbs_upcall",
+    "src/dragonball/dbs_utils",
+    "src/dragonball/dbs_virtio_devices",
+]
+resolver = "2"
+
+# TODO: Add all excluded crates to root workspace
+exclude = [
+    "src/agent",
+    "src/tools",
+    "src/libs",
+    "src/runtime-rs",
+
+    # We are cloning and building rust packages under
+    # "tools/packaging/kata-deploy/local-build/build" folder, which may mislead
+    # those packages to think they are part of the kata root workspace
+    "tools/packaging/kata-deploy/local-build/build",
+]
+
+[workspace.dependencies]
+# Rust-VMM crates
+event-manager = "0.2.1"
+kvm-bindings = "0.6.0"
+kvm-ioctls = "=0.12.1"
+linux-loader = "0.8.0"
+seccompiler = "0.5.0"
+vfio-bindings = "0.3.0"
+vfio-ioctls = "0.1.0"
+virtio-bindings = "0.1.0"
+virtio-queue = "0.7.0"
+vm-fdt = "0.2.0"
+vm-memory = "0.10.0"
+vm-superio = "0.5.0"
+vmm-sys-util = "0.11.0"
+
+# Local dependencies from Dragonball Sandbox crates
+dbs-acpi = { path = "src/dragonball/dbs_acpi" }
+dbs-address-space = { path = "src/dragonball/dbs_address_space" }
+dbs-allocator = { path = "src/dragonball/dbs_allocator" }
+dbs-arch = { path = "src/dragonball/dbs_arch" }
+dbs-boot = { path = "src/dragonball/dbs_boot" }
+dbs-device = { path = "src/dragonball/dbs_device" }
+dbs-interrupt = { path = "src/dragonball/dbs_interrupt" }
+dbs-legacy-devices = { path = "src/dragonball/dbs_legacy_devices" }
+dbs-pci = { path = "src/dragonball/dbs_pci" }
+dbs-tdx = { path = "src/dragonball/dbs_tdx" }
+dbs-upcall = { path = "src/dragonball/dbs_upcall" }
+dbs-utils = { path = "src/dragonball/dbs_utils" }
+dbs-virtio-devices = { path = "src/dragonball/dbs_virtio_devices" }
+
+# Local dependencies from `src/lib`
+test-utils = { path = "src/libs/test-utils" }
--- a/2
+++ b/2
@@ -1 +1 @@
-3.22.0
+3.24.0
--- a/docs/README.md
+++ b/docs/README.md
@@ -83,3 +83,7 @@ Documents that help to understand and contribute to Kata Containers.
 If you have a suggestion for how we can improve the
 [website](https://katacontainers.io), please raise an issue (or a PR) on
 [the repository that holds the source for the website](https://github.com/OpenStackweb/kata-netlify-refresh).
+
+### Toolchain Guidance
+
+* [Toolchain Guidance](./Toochain-Guidance.md)
--- a/docs/Toochain-Guidance.md
+++ b/docs/Toochain-Guidance.md
@@ -0,0 +1,39 @@
+# Toolchains
+
+As a community we want to strike a balance between having up-to-date toolchains, to receive the
+latest security fixes and to be able to benefit from new features and packages, whilst not being
+too bleeding edge and disrupting downstream and other consumers. As a result we have the following
+guidelines (note, not hard rules) for our go and rust toolchains that we are attempting to try out:
+
+## Go toolchain
+
+Go is released [every six months](https://go.dev/wiki/Go-Release-Cycle) with support for the
+[last two major release versions](https://go.dev/doc/devel/release#policy). We always want to
+ensure that we are on a supported version so we receive security fixes. To try and make
+things easier for some of our users, we aim to be using the older of the two supported major
+versions, unless there is a compelling reason to adopt the newer version.
+
+In practice this means that we bump our major version of the go toolchain every six months to
+version (1.x-1) in response to a new version (1.x) coming out, which makes our current version
+(1.x-2) no longer supported. We will bump the minor version whenever required to satisfy
+dependency updates, or security fixes.
+
+Our go toolchain version is recorded in [`versions.yaml`](../versions.yaml) under
+`.languages.golang.version` and should match with the version in our `go.mod` files.
+
+## Rust toolchain
+
+Rust has a [six week](https://doc.rust-lang.org/book/appendix-05-editions.html#:~:text=The%20Rust%20language%20and%20compiler,these%20tiny%20changes%20add%20up.)
+release cycle and they only support the latest stable release, so if we wanted to remain on a
+supported release we would only ever build with the latest stable and bump every 6 weeks.
+However feedback from our community has indicated that this is a challenge as downstream consumers
+often want to get rust from their distro, or downstream fork and these struggle to keep up with
+the six week release schedule. As a result the community has agreed to try out a policy of
+"stable-2", where we aim to build with a rust version that is two versions behind the latest stable
+version.
+
+In practice this should mean that we bump our rust toolchain every six weeks, to version
+1.x-2 when 1.x is released as stable and we should be picking up the latest point release
+of that version, if there were any.
+
+The rust-toolchain that we are using is recorded in [`rust-toolchain.toml`](../rust-toolchain.toml).
--- a/docs/how-to/how-to-set-sandbox-config-kata.md
+++ b/docs/how-to/how-to-set-sandbox-config-kata.md
@@ -97,6 +97,8 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.hypervisor.use_legacy_serial` | `boolean` | uses legacy serial device for guest's console (QEMU) |
 | `io.katacontainers.config.hypervisor.default_gpus` | uint32 | the minimum number of GPUs required for the VM. Only used by remote hypervisor to help with instance selection |
 | `io.katacontainers.config.hypervisor.default_gpu_model` | string | the GPU model required for the VM. Only used by remote hypervisor to help with instance selection |
+| `io.katacontainers.config.hypervisor.block_device_num_queues` | `usize` | The number of queues to use for block devices (runtime-rs only) |
+| `io.katacontainers.config.hypervisor.block_device_queue_size` | uint32 | The size of the of the queue to use for block devices (runtime-rs only) |

 ## Container Options
 | Key | Value Type | Comments |
--- a/src/agent/Cargo.lock
+++ b/src/agent/Cargo.lock
@@ -459,15 +459,9 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
 dependencies = [
- "bit-vec 0.8.0",
+ "bit-vec",
 ]

-[[package]]
-name = "bit-vec"
-version = "0.6.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
-
 [[package]]
 name = "bit-vec"
 version = "0.8.0"
@@ -1250,7 +1244,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece"
 dependencies = [
 "crc32fast",
- "libz-sys",
 "miniz_oxide",
 ]

@@ -2266,17 +2259,6 @@ dependencies = [
 "uuid 0.8.2",
 ]

-[[package]]
-name = "libz-sys"
-version = "1.1.22"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d"
-dependencies = [
- "cc",
- "pkg-config",
- "vcpkg",
-]
-
 [[package]]
 name = "linux-raw-sys"
 version = "0.3.8"
@@ -3719,7 +3701,7 @@ dependencies = [
 "anyhow",
 "async-trait",
 "awaitgroup",
- "bit-vec 0.6.3",
+ "bit-vec",
 "capctl",
 "caps",
 "cfg-if",
@@ -4821,12 +4803,6 @@ version = "1.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "943ce29a8a743eb10d6082545d861b24f9d1b160b7d741e0f2cdf726bec909c5"

-[[package]]
-name = "vcpkg"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
-
 [[package]]
 name = "version_check"
 version = "0.9.5"
--- a/src/agent/Cargo.toml
+++ b/src/agent/Cargo.toml
@@ -186,7 +186,7 @@ base64 = "0.22"
 sha2 = "0.10.8"
 async-compression = { version = "0.4.22", features = ["tokio", "gzip"] }

-container-device-interface = "0.1.0"
+container-device-interface = "0.1.1"

 [target.'cfg(target_arch = "s390x")'.dependencies]
 pv_core = { git = "https://github.com/ibm-s390-linux/s390-tools", rev = "4942504a9a2977d49989a5e5b7c1c8e07dc0fa41", package = "s390_pv_core" }
@@ -206,6 +206,7 @@ lto = true
 seccomp = ["rustjail/seccomp"]
 standard-oci-runtime = ["rustjail/standard-oci-runtime"]
 agent-policy = ["kata-agent-policy"]
+init-data = []

 [[bin]]
 name = "kata-agent"
--- a/src/agent/Makefile
+++ b/src/agent/Makefile
@@ -41,6 +41,14 @@ ifeq ($(AGENT_POLICY),yes)
    override EXTRA_RUSTFEATURES += agent-policy
 endif

+##VAR INIT_DATA=yes|no define if agent enables the init data feature
+INIT_DATA ?= yes
+
+# Enable the init data fature of rust build
+ifeq ($(INIT_DATA),yes)
+    override EXTRA_RUSTFEATURES += init-data
+endif
+
 include ../../utils.mk

 ##VAR STANDARD_OCI_RUNTIME=yes|no define if agent enables standard oci runtime feature
--- a/src/agent/policy/src/policy.rs
+++ b/src/agent/policy/src/policy.rs
@@ -10,7 +10,7 @@ use anyhow::{bail, Result};
 use slog::{debug, error, info, warn};
 use tokio::io::AsyncWriteExt;

-static POLICY_LOG_FILE: &str = "/tmp/policy.txt";
+static POLICY_LOG_FILE: &str = "/tmp/policy.jsonl";
 static POLICY_DEFAULT_FILE: &str = "/etc/kata-opa/default-policy.rego";

 /// Convenience macro to obtain the scope logger
@@ -26,7 +26,7 @@ pub struct AgentPolicy {
    /// When true policy errors are ignored, for debug purposes.
    allow_failures: bool,

-    /// "/tmp/policy.txt" log file for policy activity.
+    /// "/tmp/policy.jsonl" log file for policy activity.
    log_file: Option<tokio::fs::File>,

    /// Regorus engine
@@ -213,7 +213,7 @@ impl AgentPolicy {
                    //   The Policy text can be obtained directly from the pod YAML.
                }
                _ => {
-                    let log_entry = format!("[\"ep\":\"{ep}\",{input}],\n\n");
+                    let log_entry = format!("{{\"kind\":\"{ep}\",\"request\":{input}}}\n");

                    if let Err(e) = log_file.write_all(log_entry.as_bytes()).await {
                        warn!(sl!(), "policy: log_eval_input: write_all failed: {}", e);
--- a/src/agent/rustjail/Cargo.toml
+++ b/src/agent/rustjail/Cargo.toml
@@ -44,7 +44,7 @@ async-trait.workspace = true
 inotify = "0.9.2"
 libseccomp = { version = "0.3.0", optional = true }
 zbus = "3.12.0"
-bit-vec = "0.6.3"
+bit-vec = "0.8.0"
 xattr = "0.2.3"

 # Local dependencies
--- a/src/agent/src/initdata.rs
+++ b/src/agent/src/initdata.rs
@@ -9,6 +9,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //

+#[cfg(feature = "init-data")]
 use std::{os::unix::fs::FileTypeExt, path::Path};

 use anyhow::{bail, Context, Result};
@@ -37,14 +38,24 @@ pub const AA_CONFIG_PATH: &str = concatcp!(INITDATA_PATH, "/aa.toml");
 pub const CDH_CONFIG_PATH: &str = concatcp!(INITDATA_PATH, "/cdh.toml");

 /// Magic number of initdata device
+#[cfg(feature = "init-data")]
 pub const INITDATA_MAGIC_NUMBER: &[u8] = b"initdata";

 /// initdata device with disk type 'vd*'
+#[cfg(feature = "init-data")]
 const INITDATA_PREFIX_DISK_VDX: &str = "vd";

 /// initdata device with disk type 'sd*'
+#[cfg(feature = "init-data")]
 const INITDATA_PREFIX_DISK_SDX: &str = "sd";

+#[cfg(not(feature = "init-data"))]
+async fn detect_initdata_device(logger: &Logger) -> Result<Option<String>> {
+    debug!(logger, "Initdata is disabled");
+    Ok(None)
+}
+
+#[cfg(feature = "init-data")]
 async fn detect_initdata_device(logger: &Logger) -> Result<Option<String>> {
    let dev_dir = Path::new("/dev");
    let mut read_dir = tokio::fs::read_dir(dev_dir).await?;
--- a/src/agent/src/netlink.rs
+++ b/src/agent/src/netlink.rs
@@ -401,11 +401,10 @@ impl Handle {
                }

                if let RouteAttribute::Oif(index) = attribute {
-                    route.device = self
-                        .find_link(LinkFilter::Index(*index))
-                        .await
-                        .context(format!("error looking up device {index}"))?
-                        .name();
+                    route.device = match self.find_link(LinkFilter::Index(*index)).await {
+                        Ok(link) => link.name(),
+                        Err(_) => String::new(),
+                    };
                }
            }

@@ -1005,10 +1004,6 @@ mod tests {
            .expect("Failed to list routes");

        assert_ne!(all.len(), 0);
-
-        for r in &all {
-            assert_ne!(r.device.len(), 0);
-        }
    }

    #[tokio::test]
--- a/src/dragonball/Cargo.toml
+++ b/src/dragonball/Cargo.toml
@@ -9,58 +9,6 @@ repository = "https://github.com/kata-containers/kata-containers.git"
 license = "Apache-2.0"
 edition = "2018"

-[workspace]
-members = [
-  "dbs_acpi",
-  "dbs_address_space",
-  "dbs_allocator",
-  "dbs_arch",
-  "dbs_boot",
-  "dbs_device",
-  "dbs_interrupt",
-  "dbs_legacy_devices",
-  "dbs_pci",
-  "dbs_tdx",
-  "dbs_upcall",
-  "dbs_utils",
-  "dbs_virtio_devices",
-]
-resolver = "2"
-
-[workspace.dependencies]
-# Rust-VMM crates
-event-manager = "0.2.1"
-kvm-bindings = "0.6.0"
-kvm-ioctls = "=0.12.1"
-linux-loader = "0.8.0"
-seccompiler = "0.5.0"
-vfio-bindings = "0.3.0"
-vfio-ioctls = "0.1.0"
-virtio-bindings = "0.1.0"
-virtio-queue = "0.7.0"
-vm-fdt = "0.2.0"
-vm-memory = "0.10.0"
-vm-superio = "0.5.0"
-vmm-sys-util = "0.11.0"
-
-# Local dependencies from Dragonball Sandbox crates
-dbs-acpi = { path = "dbs_acpi" }
-dbs-address-space = { path = "dbs_address_space" }
-dbs-allocator = { path = "dbs_allocator" }
-dbs-arch = { path = "dbs_arch" }
-dbs-boot = { path = "dbs_boot" }
-dbs-device = { path = "dbs_device" }
-dbs-interrupt = { path = "dbs_interrupt" }
-dbs-legacy-devices = { path = "dbs_legacy_devices" }
-dbs-pci = { path = "dbs_pci" }
-dbs-tdx = { path = "dbs_tdx" }
-dbs-upcall = { path = "dbs_upcall" }
-dbs-utils = { path = "dbs_utils" }
-dbs-virtio-devices = { path = "dbs_virtio_devices" }
-
-# Local dependencies from `src/lib`
-test-utils = { path = "../libs/test-utils" }
-
 [dependencies]
 anyhow = "1.0.32"
 arc-swap = "1.5.0"
@@ -83,12 +31,12 @@ kvm-bindings = { workspace = true }
 kvm-ioctls = { workspace = true }
 lazy_static = "1.2"
 libc = "0.2.39"
-linux-loader = {workspace = true}
+linux-loader = { workspace = true }
 log = "0.4.14"
 nix = "0.24.2"
 procfs = "0.12.0"
 prometheus = { version = "0.14.0", features = ["process"] }
-seccompiler = {workspace = true}
+seccompiler = { workspace = true }
 serde = "1.0.27"
 serde_derive = "1.0.27"
 serde_json = "1.0.9"
@@ -96,7 +44,7 @@ slog = "2.5.2"
 slog-scope = "4.4.0"
 thiserror = "1"
 tracing = "0.1.41"
-vmm-sys-util = {workspace = true}
+vmm-sys-util = { workspace = true }
 virtio-queue = { workspace = true, optional = true }
 vm-memory = { workspace = true, features = ["backend-mmap"] }
 crossbeam-channel = "0.5.6"
@@ -118,14 +66,14 @@ virtio-blk = ["dbs-virtio-devices/virtio-blk", "virtio-queue"]
 virtio-net = ["dbs-virtio-devices/virtio-net", "virtio-queue"]
 # virtio-fs only work on atomic-guest-memory
 virtio-fs = [
-    "dbs-virtio-devices/virtio-fs-pro",
-    "virtio-queue",
-    "atomic-guest-memory",
+  "dbs-virtio-devices/virtio-fs-pro",
+  "virtio-queue",
+  "atomic-guest-memory",
 ]
 virtio-mem = [
-    "dbs-virtio-devices/virtio-mem",
-    "virtio-queue",
-    "atomic-guest-memory",
+  "dbs-virtio-devices/virtio-mem",
+  "virtio-queue",
+  "atomic-guest-memory",
 ]
 virtio-balloon = ["dbs-virtio-devices/virtio-balloon", "virtio-queue"]
 vhost-net = ["dbs-virtio-devices/vhost-net"]
@@ -136,5 +84,5 @@ host-device = ["dep:vfio-bindings", "dep:vfio-ioctls", "dep:dbs-pci"]

 [lints.rust]
 unexpected_cfgs = { level = "warn", check-cfg = [
-    'cfg(feature, values("test-mock"))',
+  'cfg(feature, values("test-mock"))',
 ] }
--- a/src/libs/kata-types/src/annotations/mod.rs
+++ b/src/libs/kata-types/src/annotations/mod.rs
@@ -283,6 +283,13 @@ pub const KATA_ANNO_CFG_HYPERVISOR_DEFAULT_GPUS: &str =
 pub const KATA_ANNO_CFG_HYPERVISOR_DEFAULT_GPU_MODEL: &str =
    "io.katacontainers.config.hypervisor.default_gpu_model";

+/// Block device specific annotation for num_queues
+pub const KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_NUM_QUEUES: &str =
+    "io.katacontainers.config.hypervisor.block_device_num_queues";
+/// Block device specific annotation for queue_size
+pub const KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_QUEUE_SIZE: &str =
+    "io.katacontainers.config.hypervisor.block_device_queue_size";
+
 // Runtime related annotations
 /// Prefix for Runtime configurations.
 pub const KATA_ANNO_CFG_RUNTIME_PREFIX: &str = "io.katacontainers.config.runtime.";
@@ -503,6 +510,7 @@ impl Annotation {
        let u32_err = io::Error::new(io::ErrorKind::InvalidData, "parse u32 error".to_string());
        let u64_err = io::Error::new(io::ErrorKind::InvalidData, "parse u64 error".to_string());
        let i32_err = io::Error::new(io::ErrorKind::InvalidData, "parse i32 error".to_string());
+        let usize_err = io::Error::new(io::ErrorKind::InvalidData, "parse usize error".to_string());
        let hv = config.hypervisor.get_mut(hypervisor_name).ok_or_else(|| {
            io::Error::new(
                io::ErrorKind::InvalidData,
@@ -620,7 +628,7 @@ impl Annotation {
                        hv.boot_info.kernel = value.to_string();
                    }
                    KATA_ANNO_CFG_HYPERVISOR_KERNEL_PARAMS => {
-                        hv.boot_info.kernel_params = value.to_string();
+                        hv.boot_info.replace_kernel_params(value);
                    }
                    KATA_ANNO_CFG_HYPERVISOR_IMAGE_PATH => {
                        hv.boot_info.validate_boot_path(value)?;
@@ -960,7 +968,26 @@ impl Annotation {
                            return Err(u32_err);
                        }
                    },
-
+                    KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_NUM_QUEUES => {
+                        match self.get_value::<usize>(key) {
+                            Ok(v) => {
+                                hv.blockdev_info.num_queues = v.unwrap_or_default();
+                            }
+                            Err(_e) => {
+                                return Err(usize_err);
+                            }
+                        }
+                    }
+                    KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_QUEUE_SIZE => {
+                        match self.get_value::<u32>(key) {
+                            Ok(v) => {
+                                hv.blockdev_info.queue_size = v.unwrap_or_default();
+                            }
+                            Err(_e) => {
+                                return Err(u32_err);
+                            }
+                        }
+                    }
                    _ => {
                        return Err(io::Error::new(
                            io::ErrorKind::InvalidInput,
--- a/src/libs/kata-types/src/config/default.rs
+++ b/src/libs/kata-types/src/config/default.rs
@@ -41,11 +41,13 @@ pub const DEFAULT_BLOCK_NVDIMM_MEM_OFFSET: u64 = 0;
 pub const DEFAULT_BLOCK_DEVICE_AIO_THREADS: &str = "threads";
 pub const DEFAULT_BLOCK_DEVICE_AIO_NATIVE: &str = "native";
 pub const DEFAULT_BLOCK_DEVICE_AIO: &str = "io_uring";
+pub const DEFAULT_BLOCK_DEVICE_NUM_QUEUES: u32 = 1;
+pub const DEFAULT_BLOCK_DEVICE_QUEUE_SIZE: u32 = 128;

 pub const DEFAULT_SHARED_FS_TYPE: &str = "virtio-fs";
 pub const DEFAULT_VIRTIO_FS_CACHE_MODE: &str = "never";
 pub const DEFAULT_VIRTIO_FS_DAX_SIZE_MB: u32 = 1024;
-pub const DEFAULT_SHARED_9PFS_SIZE_MB: u32 = 128 * 1024;
+pub const DEFAULT_SHARED_9PFS_SIZE_MB: u32 = 8 * 1024;
 pub const MIN_SHARED_9PFS_SIZE_MB: u32 = 4 * 1024;
 pub const MAX_SHARED_9PFS_SIZE_MB: u32 = 8 * 1024 * 1024;

@@ -110,3 +112,6 @@ pub const MAX_REMOTE_VCPUS: u32 = 32;
 pub const MIN_REMOTE_MEMORY_SIZE_MB: u32 = 64;
 pub const DEFAULT_REMOTE_MEMORY_SIZE_MB: u32 = 128;
 pub const DEFAULT_REMOTE_MEMORY_SLOTS: u32 = 128;
+
+// Default configuration for factory/templating
+pub const DEFAULT_TEMPLATE_PATH: &str = "/run/vc/vm/template";
--- a/src/libs/kata-types/src/config/hypervisor/mod.rs
+++ b/src/libs/kata-types/src/config/hypervisor/mod.rs
@@ -189,6 +189,13 @@ pub struct BlockDeviceInfo {
    /// increases the initial max rate
    #[serde(default)]
    pub disk_rate_limiter_ops_one_time_burst: Option<u64>,
+
+    /// virtio queue size. Size: byte
+    #[serde(default)]
+    pub queue_size: u32,
+    /// block device multi-queue
+    #[serde(default)]
+    pub num_queues: usize,
 }

 impl BlockDeviceInfo {
@@ -219,6 +226,15 @@ impl BlockDeviceInfo {
                ));
            }
        }
+
+        if self.num_queues == 0 {
+            self.num_queues = default::DEFAULT_BLOCK_DEVICE_NUM_QUEUES as usize;
+        }
+
+        if self.queue_size == 0 {
+            self.queue_size = default::DEFAULT_BLOCK_DEVICE_QUEUE_SIZE;
+        }
+
        if self.memory_offset == 0 {
            self.memory_offset = default::DEFAULT_BLOCK_NVDIMM_MEM_OFFSET;
        }
@@ -358,6 +374,71 @@ impl BootInfo {
        self.kernel_params = p.join(KERNEL_PARAM_DELIMITER);
    }

+    /// Replace kernel parameters with the same key.
+    ///
+    /// For each parameter in the new_params string, if a parameter with the same key
+    /// already exists in kernel_params, it will be removed before adding the new one.
+    /// This allows selective parameter override from annotations without replacing
+    /// the entire kernel command line.
+    pub fn replace_kernel_params(&mut self, new_params: &str) {
+        if new_params.is_empty() {
+            return;
+        }
+
+        // Parse existing kernel parameters into a map
+        let mut existing_params: Vec<(String, String)> = Vec::new();
+        for param in self.kernel_params.split(KERNEL_PARAM_DELIMITER) {
+            let param = param.trim();
+            if param.is_empty() {
+                continue;
+            }
+            // Split by '=' to get key and value
+            if let Some(eq_pos) = param.find('=') {
+                let key = param[..eq_pos].to_string();
+                let value = param[eq_pos + 1..].to_string();
+                existing_params.push((key, value));
+            } else {
+                // Parameter without value (like "quiet")
+                existing_params.push((param.to_string(), String::new()));
+            }
+        }
+
+        // Parse new parameters and collect keys to replace
+        let mut new_param_keys: Vec<String> = Vec::new();
+        let mut new_param_list: Vec<String> = Vec::new();
+        for param in new_params.split(KERNEL_PARAM_DELIMITER) {
+            let param = param.trim();
+            if param.is_empty() {
+                continue;
+            }
+            if let Some(eq_pos) = param.find('=') {
+                let key = param[..eq_pos].to_string();
+                new_param_keys.push(key);
+            } else {
+                new_param_keys.push(param.to_string());
+            }
+            new_param_list.push(param.to_string());
+        }
+
+        // Remove existing parameters that will be replaced
+        existing_params.retain(|(key, _)| !new_param_keys.contains(key));
+
+        // Reconstruct kernel_params: existing params + new params
+        let mut all_params: Vec<String> = existing_params
+            .iter()
+            .map(|(key, value)| {
+                if value.is_empty() {
+                    key.clone()
+                } else {
+                    format!("{}={}", key, value)
+                }
+            })
+            .collect();
+        all_params.extend(new_param_list);
+
+        self.kernel_params = all_params.join(KERNEL_PARAM_DELIMITER);
+    }
+
    /// Validate guest kernel image annotation.
    pub fn validate_boot_path(&self, path: &str) -> Result<()> {
        validate_path!(path, "path {} is invalid{}")?;
--- a/src/libs/kata-types/src/config/hypervisor/qemu.rs
+++ b/src/libs/kata-types/src/config/hypervisor/qemu.rs
@@ -91,6 +91,10 @@ impl ConfigPlugin for QemuConfig {
            if qemu.memory_info.memory_slots == 0 {
                qemu.memory_info.memory_slots = default::DEFAULT_QEMU_MEMORY_SLOTS;
            }
+
+            if qemu.factory.template_path.is_empty() {
+                qemu.factory.template_path = default::DEFAULT_TEMPLATE_PATH.to_string();
+            }
        }

        Ok(())
--- a/src/libs/kata-types/src/config/hypervisor/remote.rs
+++ b/src/libs/kata-types/src/config/hypervisor/remote.rs
@@ -65,6 +65,11 @@ impl ConfigPlugin for RemoteConfig {
            if remote.memory_info.memory_slots == 0 {
                remote.memory_info.memory_slots = default::DEFAULT_REMOTE_MEMORY_SLOTS
            }
+
+            // Apply factory defaults
+            if remote.factory.template_path.is_empty() {
+                remote.factory.template_path = default::DEFAULT_TEMPLATE_PATH.to_string();
+            }
        }

        Ok(())
--- a/src/libs/kata-types/src/cpu.rs
+++ b/src/libs/kata-types/src/cpu.rs
@@ -25,6 +25,7 @@ pub enum Error {
 }

 /// Assigned CPU resources for a Linux container.
+/// Stores fractional vCPU allocation for more precise resource tracking.
 #[derive(Clone, Default, Debug)]
 pub struct LinuxContainerCpuResources {
    shares: u64,
@@ -32,7 +33,8 @@ pub struct LinuxContainerCpuResources {
    quota: i64,
    cpuset: CpuSet,
    nodeset: NumaNodeSet,
-    calculated_vcpu_time_ms: Option<u64>,
+    /// Calculated fractional vCPU allocation, e.g., 0.25 means 1/4 of a CPU.
+    calculated_vcpu: Option<f64>,
 }

 impl LinuxContainerCpuResources {
@@ -61,10 +63,10 @@ impl LinuxContainerCpuResources {
        &self.nodeset
    }

-    /// Get number of vCPUs to fulfill the CPU resource request, `None` means unconstrained.
-    pub fn get_vcpus(&self) -> Option<u64> {
-        self.calculated_vcpu_time_ms
-            .map(|v| v.saturating_add(999) / 1000)
+    /// Get the number of vCPUs assigned to the container as a fractional value.
+    /// Returns `None` if unconstrained (no limit).
+    pub fn get_vcpus(&self) -> Option<f64> {
+        self.calculated_vcpu
    }
 }

@@ -75,15 +77,18 @@ impl TryFrom<&oci::LinuxCpu> for LinuxContainerCpuResources {
    fn try_from(value: &oci::LinuxCpu) -> Result<Self, Self::Error> {
        let period = value.period().unwrap_or(0);
        let quota = value.quota().unwrap_or(-1);
-        let value_cpus = value.cpus().as_ref().map_or("", |cpus| cpus);
+        let value_cpus = value.cpus().as_deref().unwrap_or("");
        let cpuset = CpuSet::from_str(value_cpus).map_err(Error::InvalidCpuSet)?;
-        let value_mems = value.mems().as_ref().map_or("", |mems| mems);
+        let value_mems = value.mems().as_deref().unwrap_or("");
        let nodeset = NumaNodeSet::from_str(value_mems).map_err(Error::InvalidNodeSet)?;

-        // If quota is -1, it means the CPU resource request is unconstrained. In that case,
-        // we don't currently assign additional CPUs.
-        let milli_sec = if quota >= 0 && period != 0 {
-            Some((quota as u64).saturating_mul(1000) / period)
+        // Calculate fractional vCPUs:
+        // If quota >= 0 and period > 0, vCPUs = quota / period.
+        // Otherwise, if cpuset is non-empty, derive from cpuset length.
+        let vcpu_fraction = if quota >= 0 && period > 0 {
+            Some(quota as f64 / period as f64)
+        } else if !cpuset.is_empty() {
+            Some(cpuset.len() as f64)
        } else {
            None
        };
@@ -94,16 +99,18 @@ impl TryFrom<&oci::LinuxCpu> for LinuxContainerCpuResources {
            quota,
            cpuset,
            nodeset,
-            calculated_vcpu_time_ms: milli_sec,
+            calculated_vcpu: vcpu_fraction,
        })
    }
 }

-/// Assigned CPU resources for a Linux sandbox/pod.
+/// Aggregated CPU resources for a Linux sandbox/pod.
+/// Tracks cumulative fractional vCPU allocation across all containers in the pod.
 #[derive(Default, Debug)]
 pub struct LinuxSandboxCpuResources {
    shares: u64,
-    calculated_vcpu_time_ms: u64,
+    /// Total fractional vCPU allocation for the sandbox.
+    calculated_vcpu: f64,
    cpuset: CpuSet,
    nodeset: NumaNodeSet,
 }
@@ -122,9 +129,9 @@ impl LinuxSandboxCpuResources {
        self.shares
    }

-    /// Get assigned vCPU time in ms.
-    pub fn calculated_vcpu_time_ms(&self) -> u64 {
-        self.calculated_vcpu_time_ms
+    /// Return the cumulative fractional vCPU allocation for the sandbox.
+    pub fn calculated_vcpu(&self) -> f64 {
+        self.calculated_vcpu
    }

    /// Get the CPU set.
@@ -137,19 +144,23 @@ impl LinuxSandboxCpuResources {
        &self.nodeset
    }

-    /// Get number of vCPUs to fulfill the CPU resource request.
-    pub fn get_vcpus(&self) -> u64 {
-        if self.calculated_vcpu_time_ms == 0 && !self.cpuset.is_empty() {
-            self.cpuset.len() as u64
-        } else {
-            self.calculated_vcpu_time_ms.saturating_add(999) / 1000
+    /// Get the number of vCPUs for the sandbox as a fractional value.
+    /// If no quota and cpuset is defined, return cpuset length as float.
+    pub fn get_vcpus(&self) -> f64 {
+        if self.calculated_vcpu == 0.0 {
+            if !self.cpuset.is_empty() {
+                return self.cpuset.len() as f64;
+            }
+            return 0.0;
        }
+        self.calculated_vcpu
    }

-    /// Merge resources assigned to a container into the sandbox/pod resources.
+    /// Merge container CPU resources into this sandbox CPU resource object.
+    /// Aggregates fractional vCPU allocation and extends cpuset/nodeset.
    pub fn merge(&mut self, container_resource: &LinuxContainerCpuResources) -> &mut Self {
-        if let Some(v) = container_resource.calculated_vcpu_time_ms.as_ref() {
-            self.calculated_vcpu_time_ms += v;
+        if let Some(v) = container_resource.calculated_vcpu {
+            self.calculated_vcpu += v;
        }
        self.cpuset.extend(&container_resource.cpuset);
        self.nodeset.extend(&container_resource.nodeset);
@@ -160,16 +171,16 @@ impl LinuxSandboxCpuResources {
 #[cfg(test)]
 mod tests {
    use super::*;
+    const EPSILON: f64 = 0.0001;

    #[test]
    fn test_linux_container_cpu_resources() {
        let resources = LinuxContainerCpuResources::default();

        assert_eq!(resources.shares(), 0);
-        assert_eq!(resources.calculated_vcpu_time_ms, None);
        assert!(resources.cpuset.is_empty());
        assert!(resources.nodeset.is_empty());
-        assert!(resources.calculated_vcpu_time_ms.is_none());
+        assert!(resources.get_vcpus().is_none());

        let mut linux_cpu = oci::LinuxCpu::default();
        linux_cpu.set_shares(Some(2048));
@@ -182,11 +193,20 @@ mod tests {
        assert_eq!(resources.shares(), 2048);
        assert_eq!(resources.period(), 100);
        assert_eq!(resources.quota(), 1001);
-        assert_eq!(resources.calculated_vcpu_time_ms, Some(10010));
-        assert_eq!(resources.get_vcpus().unwrap(), 11);
+
+        // Expected fractional vCPUs = quota / period
+        let expected_vcpus = 1001.0 / 100.0;
+        assert!(
+            (resources.get_vcpus().unwrap() - expected_vcpus).abs() < EPSILON,
+            "got {}, expect {}",
+            resources.get_vcpus().unwrap(),
+            expected_vcpus
+        );
+
        assert_eq!(resources.cpuset().len(), 3);
        assert_eq!(resources.nodeset().len(), 1);

+        // Test cpuset-only path (no quota)
        let mut linux_cpu = oci::LinuxCpu::default();
        linux_cpu.set_shares(Some(2048));
        linux_cpu.set_cpus(Some("1".to_string()));
@@ -196,8 +216,10 @@ mod tests {
        assert_eq!(resources.shares(), 2048);
        assert_eq!(resources.period(), 0);
        assert_eq!(resources.quota(), -1);
-        assert_eq!(resources.calculated_vcpu_time_ms, None);
-        assert!(resources.get_vcpus().is_none());
+        assert!(
+            (resources.get_vcpus().unwrap() - 1.0).abs() < EPSILON,
+            "cpuset size vCPU mismatch"
+        );
        assert_eq!(resources.cpuset().len(), 1);
        assert_eq!(resources.nodeset().len(), 2);
    }
@@ -207,8 +229,7 @@ mod tests {
        let mut sandbox = LinuxSandboxCpuResources::new(1024);

        assert_eq!(sandbox.shares(), 1024);
-        assert_eq!(sandbox.get_vcpus(), 0);
-        assert_eq!(sandbox.calculated_vcpu_time_ms(), 0);
+        assert_eq!(sandbox.get_vcpus(), 0.0);
        assert!(sandbox.cpuset().is_empty());
        assert!(sandbox.nodeset().is_empty());

@@ -222,11 +243,20 @@ mod tests {
        let resources = LinuxContainerCpuResources::try_from(&linux_cpu).unwrap();
        sandbox.merge(&resources);
        assert_eq!(sandbox.shares(), 1024);
-        assert_eq!(sandbox.get_vcpus(), 11);
-        assert_eq!(sandbox.calculated_vcpu_time_ms(), 10010);
+
+        // vCPUs after merge = quota / period
+        let expected_vcpus = 1001.0 / 100.0;
+        assert!(
+            (sandbox.get_vcpus() - expected_vcpus).abs() < EPSILON,
+            "sandbox vCPU mismatch: got {}, expect {}",
+            sandbox.get_vcpus(),
+            expected_vcpus
+        );
+
        assert_eq!(sandbox.cpuset().len(), 3);
        assert_eq!(sandbox.nodeset().len(), 1);

+        // Merge cpuset-only container
        let mut linux_cpu = oci::LinuxCpu::default();
        linux_cpu.set_shares(Some(2048));
        linux_cpu.set_cpus(Some("1,4".to_string()));
@@ -236,8 +266,15 @@ mod tests {
        sandbox.merge(&resources);

        assert_eq!(sandbox.shares(), 1024);
-        assert_eq!(sandbox.get_vcpus(), 11);
-        assert_eq!(sandbox.calculated_vcpu_time_ms(), 10010);
+
+        // Expect quota-based + cpuset len (since cpuset is treated as allocation)
+        let expected_after_merge = expected_vcpus + resources.get_vcpus().unwrap();
+        assert!(
+            (sandbox.get_vcpus() - expected_after_merge).abs() < EPSILON,
+            "sandbox vCPU mismatch after cpuset merge: got {}, expect {}",
+            sandbox.get_vcpus(),
+            expected_after_merge
+        );
        assert_eq!(sandbox.cpuset().len(), 4);
        assert_eq!(sandbox.nodeset().len(), 2);
    }
--- a/src/libs/mem-agent/src/compact.rs
+++ b/src/libs/mem-agent/src/compact.rs
@@ -52,7 +52,8 @@ pub struct Config {
    // the next compact_force_times times, a compaction will be forced
    // regardless of the system's memory situation.
    // If compact_force_times is set to 0, will do force compaction each time.
-    // If compact_force_times is set to std::u64::MAX, will never do force compaction.
+    // If compact_force_times is set to std::u64::MAX, u64::MAX - 1, or i64::MAX, will never do force compaction.
+    // Note: Using i64::MAX (9223372036854775807) instead of u64::MAX to avoid TOML parser issues.
    pub compact_force_times: u64,
 }

@@ -67,7 +68,7 @@ impl Default for Config {
            compact_sec_max: 5 * 60,
            compact_order: PAGE_REPORTING_MIN_ORDER,
            compact_threshold: 2 << PAGE_REPORTING_MIN_ORDER,
-            compact_force_times: u64::MAX,
+            compact_force_times: i64::MAX as u64,
        }
    }
 }
@@ -133,7 +134,7 @@ impl CompactCore {
    }

    fn need_force_compact(&self) -> bool {
-        if self.config.compact_force_times == u64::MAX {
+        if self.config.compact_force_times >= i64::MAX as u64 {
            return false;
        }

--- a/src/runtime-rs/Cargo.lock
+++ b/src/runtime-rs/Cargo.lock
@@ -25,19 +25,13 @@ dependencies = [

 [[package]]
 name = "addr2line"
-version = "0.20.0"
+version = "0.25.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
+checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b"
 dependencies = [
 "gimli",
 ]

-[[package]]
-name = "adler"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
-
 [[package]]
 name = "adler2"
 version = "2.0.1"
@@ -344,17 +338,17 @@ checksum = "cc17ab023b4091c10ff099f9deebaeeb59b5189df07e554c4fef042b70745d68"

 [[package]]
 name = "backtrace"
-version = "0.3.68"
+version = "0.3.76"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
+checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
 dependencies = [
 "addr2line",
- "cc",
 "cfg-if 1.0.0",
 "libc",
- "miniz_oxide 0.7.1",
+ "miniz_oxide",
 "object",
 "rustc-demangle",
+ "windows-link 0.2.1",
 ]

 [[package]]
@@ -582,9 +576,9 @@ dependencies = [

 [[package]]
 name = "cgroups-rs"
-version = "0.4.0"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "879433e90a9bf3c38e4e854ad36bd14507751dbd3a0df15429283ff5c10ff0e4"
+checksum = "efc46cf39fc5922b840030e0e5b378ce5caa9a824a675a95c6dec2c2c9ce9468"
 dependencies = [
 "bit-vec",
 "libc",
@@ -621,7 +615,7 @@ dependencies = [
 "js-sys",
 "num-traits",
 "wasm-bindgen",
- "windows-link",
+ "windows-link 0.1.3",
 ]

 [[package]]
@@ -1448,7 +1442,7 @@ checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
 dependencies = [
 "crc32fast",
 "libz-sys",
- "miniz_oxide 0.8.9",
+ "miniz_oxide",
 ]

 [[package]]
@@ -1674,9 +1668,9 @@ dependencies = [

 [[package]]
 name = "gimli"
-version = "0.27.3"
+version = "0.32.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
+checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"

 [[package]]
 name = "glob"
@@ -2510,15 +2504,6 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"

-[[package]]
-name = "miniz_oxide"
-version = "0.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
-dependencies = [
- "adler",
-]
-
 [[package]]
 name = "miniz_oxide"
 version = "0.8.9"
@@ -2605,55 +2590,37 @@ dependencies = [

 [[package]]
 name = "netlink-packet-core"
-version = "0.7.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72724faf704479d67b388da142b186f916188505e7e0b26719019c525882eda4"
+checksum = "3463cbb78394cb0141e2c926b93fc2197e473394b761986eca3b9da2c63ae0f4"
 dependencies = [
- "anyhow",
- "byteorder",
- "netlink-packet-utils",
+ "paste",
 ]

 [[package]]
 name = "netlink-packet-route"
-version = "0.22.0"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc0e7987b28514adf555dc1f9a5c30dfc3e50750bbaffb1aec41ca7b23dcd8e4"
+checksum = "9ea06a7cec15a9df94c58bddc472b1de04ca53bd32e72da7da2c5dd1c3885edc"
 dependencies = [
- "anyhow",
 "bitflags 2.9.0",
- "byteorder",
 "libc",
 "log",
 "netlink-packet-core",
- "netlink-packet-utils",
-]
-
-[[package]]
-name = "netlink-packet-utils"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ede8a08c71ad5a95cdd0e4e52facd37190977039a4704eb82a283f713747d34"
-dependencies = [
- "anyhow",
- "byteorder",
- "paste",
- "thiserror 1.0.69",
 ]

 [[package]]
 name = "netlink-proto"
-version = "0.11.3"
+version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86b33524dc0968bfad349684447bfce6db937a9ac3332a1fe60c0c5a5ce63f21"
+checksum = "b65d130ee111430e47eed7896ea43ca693c387f097dd97376bffafbf25812128"
 dependencies = [
 "bytes",
 "futures 0.3.28",
 "log",
 "netlink-packet-core",
 "netlink-sys",
- "thiserror 1.0.69",
- "tokio",
+ "thiserror 2.0.11",
 ]

 [[package]]
@@ -2910,9 +2877,9 @@ dependencies = [

 [[package]]
 name = "object"
-version = "0.31.1"
+version = "0.37.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
+checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe"
 dependencies = [
 "memchr",
 ]
@@ -3890,7 +3857,7 @@ dependencies = [
 "async-trait",
 "bitflags 2.9.0",
 "byte-unit",
- "cgroups-rs 0.4.0",
+ "cgroups-rs 0.5.0",
 "flate2",
 "futures 0.3.28",
 "hex",
@@ -3963,18 +3930,18 @@ dependencies = [

 [[package]]
 name = "rtnetlink"
-version = "0.16.0"
+version = "0.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3cb5850b5aa2c9c0ae44f157694bbe85107a2e13d76eb3178d0e3ee96c410f57"
+checksum = "1f3ee907fdcec9200d13b9cdb64dfc8179cb4ac16ead6ae0ac76333dc41981fc"
 dependencies = [
- "futures 0.3.28",
+ "futures-channel",
+ "futures-util",
 "log",
 "netlink-packet-core",
 "netlink-packet-route",
- "netlink-packet-utils",
 "netlink-proto",
 "netlink-sys",
- "nix 0.29.0",
+ "nix 0.30.1",
 "thiserror 1.0.69",
 "tokio",
 ]
@@ -4055,9 +4022,9 @@ dependencies = [

 [[package]]
 name = "rustc-demangle"
-version = "0.1.23"
+version = "0.1.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
+checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"

 [[package]]
 name = "rustix"
@@ -5696,6 +5663,12 @@ version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"

+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
 [[package]]
 name = "windows-result"
 version = "0.1.2"
--- a/src/runtime-rs/Makefile
+++ b/src/runtime-rs/Makefile
@@ -150,8 +150,8 @@ DEFMEMSLOTS := 10
 DEFMAXMEMSZ := 0
 ##VAR DEFBRIDGES=<number> Default number of bridges
 DEFBRIDGES := 0
-DEFENABLEANNOTATIONS := [\"kernel_params\"]
-DEFENABLEANNOTATIONS_COCO := [\"kernel_params\",\"cc_init_data\"]
+DEFENABLEANNOTATIONS := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"default_vcpus\", \"default_memory\"]
+DEFENABLEANNOTATIONS_COCO := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"default_vcpus\", \"default_memory\", \"cc_init_data\"]
 DEFDISABLEGUESTSECCOMP := true
 DEFDISABLEGUESTEMPTYDIR := false
 ##VAR DEFAULTEXPFEATURES=[features] Default experimental features enabled
@@ -347,8 +347,13 @@ endif
    DEFBLOCKDEVICEAIO_QEMU := io_uring
    DEFNETWORKMODEL_QEMU := tcfilter
    DEFDISABLEGUESTSELINUX := true
-    DEFSECCOMPSANDBOXPARAM := on,obsolete=deny,spawn=deny,resourcecontrol=deny
-    DEFGUESTSELINUXLABEL := system_u:system_r:container_t
+    # Default is empty string "" to match Rust default None (when commented out in config).
+    # Most users will want to set this to "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+    # for better security. Note: "elevateprivileges=deny" doesn't work with daemonize option.
+    DEFSECCOMPSANDBOXPARAM := ""
+    # Default is empty string "" to match Rust default None (when commented out in config).
+    # Most users will want to set this to "system_u:system_r:container_t" for SELinux support.
+    DEFGUESTSELINUXLABEL := ""
 endif

 ifneq (,$(FCCMD))
--- a/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in
+++ b/src/runtime-rs/config/configuration-cloud-hypervisor.toml.in
@@ -18,41 +18,15 @@ image = "@IMAGEPATH@"
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
+rootfs_type = @DEFROOTFSTYPE@
 
 # Block storage driver to be used for the VM rootfs is backed
 # by a block device.
 vm_rootfs_driver = "@VMROOTFSDRIVER_CLH@"

-# Enable confidential guest support.
-# Toggling that setting may trigger different hardware features, ranging
-# from memory encryption to both memory and CPU-state encryption and integrity.
-# The Kata Containers runtime dynamically detects the available feature set and
-# aims at enabling the largest possible one, returning an error if none is
-# available, or none is supported by the hypervisor.
-#
-# Known limitations:
-# * Does not work by design:
-#   - CPU Hotplug 
-#   - Memory Hotplug
-#   - NVDIMM devices
-#
-# Supported TEEs:
-# * Intel TDX
-#
-# Default false
-# confidential_guest = true
-
 # Path to the firmware.
 # If you want Cloud Hypervisor to use a specific firmware, set its path below.
-# This is option is only used when confidential_guest is enabled.
-#
-# For more information about firmwared that can be used with specific TEEs,
-# please, refer to:
-# * Intel TDX:
-#   - td-shim: https://github.com/confidential-containers/td-shim
-#
-# firmware = "@FIRMWAREPATH@"
+firmware = "@FIRMWAREPATH@"

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -68,7 +42,7 @@ valid_hypervisor_paths = @CLHVALIDHYPERVISORPATHS@
 # List of valid annotations values for ctlpath
 # The default if not set is empty (all annotations rejected.)
 # Your distribution recommends: 
-# valid_ctlpaths = 
+valid_ctlpaths = []

 # Optional space-separated list of options to pass to the guest kernel.
 # For example, use `kernel_params = "vsyscall=emulate"` if you are having
@@ -166,7 +140,7 @@ default_bridges = @DEFBRIDGES@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Block device driver to be used by the hypervisor when a container's storage
 # is backed by a block device or a file. This driver facilitates attaching the
@@ -176,7 +150,7 @@ block_device_driver = "virtio-blk-pci"
 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Bandwidth rate limiter options
 #
@@ -184,29 +158,35 @@ block_device_driver = "virtio-blk-pci"
 # for SB/VM).
 # The same value is used for inbound and outbound bandwidth.
 # Default 0-sized value means unlimited rate.
-#disk_rate_limiter_bw_max_rate = 0
-#
+disk_rate_limiter_bw_max_rate = 0
+
 # disk_rate_limiter_bw_one_time_burst increases the initial max rate and this
 # initial extra credit does *NOT* affect the overall limit and can be used for
 # an *initial* burst of data.
 # This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
 # set to a non zero value.
-#disk_rate_limiter_bw_one_time_burst = 0
-#
+disk_rate_limiter_bw_one_time_burst = 0
+
 # Operation rate limiter options
 #
 # disk_rate_limiter_ops_max_rate controls disk I/O bandwidth (size in ops/sec
 # for SB/VM).
 # The same value is used for inbound and outbound bandwidth.
 # Default 0-sized value means unlimited rate.
-#disk_rate_limiter_ops_max_rate = 0
-#
+disk_rate_limiter_ops_max_rate = 0
+
 # disk_rate_limiter_ops_one_time_burst increases the initial max rate and this
 # initial extra credit does *NOT* affect the overall limit and can be used for
 # an *initial* burst of data.
 # This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
 # set to a non zero value.
-#disk_rate_limiter_ops_one_time_burst = 0
+disk_rate_limiter_ops_one_time_burst = 0
+
+# Virtio queue size. Size: byte. default 128
+queue_size = 128
+
+# Block device multi-queue, default 1
+num_queues = 1

 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
@@ -215,7 +195,7 @@ block_device_driver = "virtio-blk-pci"
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -223,27 +203,27 @@ block_device_driver = "virtio-blk-pci"
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable running clh VMM as a non-root user.
 # By default clh VMM run as root. When this is set to true, clh VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-#rootless = true
+rootless = false

 # Disable the 'seccomp' feature from Cloud Hypervisor, firecracker or dragonball, default false
-# disable_seccomp = true
+disable_seccomp = false

 # This option changes the default hypervisor and kernel parameters
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = false

 # Path to OCI hook binaries in the *guest rootfs*.
 # This does not affect host-side hooks which must instead be added to
@@ -260,30 +240,31 @@ block_device_driver = "virtio-blk-pci"
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device.
-#enable_guest_swap = true
+enable_guest_swap = false

 # If enable_guest_swap is enabled, the swap device will be created in the guest
 # at this path. Default "/run/kata-containers/swap".
-#guest_swap_path = "/run/kata-containers/swap"
+guest_swap_path = "/run/kata-containers/swap"

 # The percentage of the total memory to be used as swap device.
 # Default 100.
-#guest_swap_size_percent = 100
+guest_swap_size_percent = 100

 # The threshold in seconds to create swap device in the guest.
 # Kata will wait guest_swap_create_threshold_secs seconds before creating swap device.
 # Default 60.
-#guest_swap_create_threshold_secs = 60
+guest_swap_create_threshold_secs = 60

 [agent.@PROJECT_TYPE@]
-container_pipe_size=@PIPESIZE@
+container_pipe_size = @PIPESIZE@
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -297,18 +278,18 @@ container_pipe_size=@PIPESIZE@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent dial timeout in millisecond.
 # (default: 10)
-#dial_timeout_ms = 10
+dial_timeout_ms = 10

 # Agent reconnect timeout in millisecond.
 # Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300)
@@ -317,28 +298,28 @@ container_pipe_size=@PIPESIZE@
 # You'd better not change the value of dial_timeout_ms, unless you have an
 # idea of what you are doing.
 # (default: 3000)
-#reconnect_timeout_ms = 3000
+reconnect_timeout_ms = 3000

 [agent.@PROJECT_TYPE@.mem_agent]
 # Control the mem-agent function enable or disable.
 # Default to false
-#mem_agent_enable = true
+mem_agent_enable = false

 # Control the mem-agent memcg function disable or enable
 # Default to false
-#memcg_disable = false
+memcg_disable = false

 # Control the mem-agent function swap enable or disable.
 # Default to false
-#memcg_swap = false
+memcg_swap = false

 # Control the mem-agent function swappiness max number.
 # Default to 50
-#memcg_swappiness_max = 50
+memcg_swappiness_max = 50

 # Control the mem-agent memcg function wait period seconds
 # Default to 600
-#memcg_period_secs = 600
+memcg_period_secs = 600

 # Control the mem-agent memcg wait period PSI percent limit.
 # If the percentage of memory and IO PSI stall time within
@@ -346,7 +327,7 @@ container_pipe_size=@PIPESIZE@
 # then the aging and eviction for this cgroup will not be
 # executed after this waiting period.
 # Default to 1
-#memcg_period_psi_percent_limit = 1
+memcg_period_psi_percent_limit = 1

 # Control the mem-agent memcg eviction PSI percent limit.
 # If the percentage of memory and IO PSI stall time for a cgroup
@@ -354,44 +335,44 @@ container_pipe_size=@PIPESIZE@
 # this cgroup will immediately stop and will not resume until
 # the next memcg waiting period.
 # Default to 1
-#memcg_eviction_psi_percent_limit = 1
+memcg_eviction_psi_percent_limit = 1

 # Control the mem-agent memcg eviction run aging count min.
 # A cgroup will only perform eviction when the number of aging cycles
 # in memcg is greater than or equal to memcg_eviction_run_aging_count_min.
 # Default to 3
-#memcg_eviction_run_aging_count_min = 3
+memcg_eviction_run_aging_count_min = 3

 # Control the mem-agent compact function disable or enable
 # Default to false
-#compact_disable = false
+compact_disable = false

 # Control the mem-agent compaction function wait period seconds
 # Default to 600
-#compact_period_secs = 600
+compact_period_secs = 600

 # Control the mem-agent compaction function wait period PSI percent limit.
 # If the percentage of memory and IO PSI stall time within
 # the compaction waiting period exceeds this value,
 # then the compaction will not be executed after this waiting period.
 # Default to 1
-#compact_period_psi_percent_limit = 1
+compact_period_psi_percent_limit = 1

 # Control the mem-agent compaction function compact PSI percent limit.
 # During compaction, the percentage of memory and IO PSI stall time
 # is checked every second. If this percentage exceeds
 # compact_psi_percent_limit, the compaction process will stop.
 # Default to 5
-#compact_psi_percent_limit = 5
+compact_psi_percent_limit = 5

 # Control the maximum number of seconds for each compaction of mem-agent compact function.
-# Default to 180
-#compact_sec_max = 180
+# Default to 300
+compact_sec_max = 300

 # Control the mem-agent compaction function compact order.
 # compact_order is use with compact_threshold.
 # Default to 9
-#compact_order = 9
+compact_order = 9

 # Control the mem-agent compaction function compact threshold.
 # compact_threshold is the pages number.
@@ -404,7 +385,7 @@ container_pipe_size=@PIPESIZE@
 # since the previous compaction.
 # then the system should initiate another round of memory compaction.
 # Default to 1024
-#compact_threshold = 1024
+compact_threshold = 1024

 # Control the mem-agent compaction function force compact times.
 # After one compaction, if there has not been a compaction within
@@ -413,7 +394,9 @@ container_pipe_size=@PIPESIZE@
 # If compact_force_times is set to 0, will do force compaction each time.
 # If compact_force_times is set to 18446744073709551615, will never do force compaction.
 # Default to 18446744073709551615
-#compact_force_times = 18446744073709551615
+# Note: Using a large but valid u64 value (within i64::MAX range) instead of u64::MAX to avoid TOML parser issues
+# Using 9223372036854775807 (i64::MAX) which is effectively "never" for practical purposes
+compact_force_times = 9223372036854775807

 # Create Container Request Timeout
 # This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
@@ -426,20 +409,20 @@ container_pipe_size=@PIPESIZE@
 # - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
 # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
 # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
-# create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # If enabled, enabled, it means that 1) if the runtime exits abnormally,
 # the cleanup process will be skipped, and 2) the runtime will not exit
 # even if the health check fails.
 # This option is typically used to retain abnormal information for debugging.
 # (default: false)
-#keep_abnormal = true
+keep_abnormal = false

 # Internetworking model
 # Determines how the VM should be connected to the
@@ -464,33 +447,33 @@ container_pipe_size=@PIPESIZE@
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_CLH@"
+internetworking_model = "@DEFNETWORKMODEL_CLH@"

-name="@RUNTIMENAME@"
-hypervisor_name="@HYPERVISOR_CLH@"
-agent_name="@PROJECT_TYPE@"
+name = "@RUNTIMENAME@"
+hypervisor_name = "@HYPERVISOR_CLH@"
+agent_name = "@PROJECT_TYPE@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -498,7 +481,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -506,18 +489,18 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_CLH@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_CLH@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -526,7 +509,7 @@ experimental=@DEFAULTEXPFEATURES@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_CLH@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_CLH@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted(ro, rw) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
@@ -536,7 +519,7 @@ static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_CLH@
 # - "/path/to", default readonly mode.
 # - "/path/to:ro", readonly mode.
 # - "/path/to:rw", readwrite mode.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # Base directory of directly attachable network config.
 # Network devices for VM-based containers are allowed to be placed in the
--- a/src/runtime-rs/config/configuration-dragonball.toml.in
+++ b/src/runtime-rs/config/configuration-dragonball.toml.in
@@ -16,13 +16,12 @@ path = "@DBPATH@"
 ctlpath = "@DBCTLPATH@"
 kernel = "@KERNELPATH_DB@"
 image = "@IMAGEPATH@"
-# initrd = "@INITRDPATH@"

 # rootfs filesystem type:
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
+rootfs_type = @DEFROOTFSTYPE@

 
 # Block storage driver to be used for the VM rootfs is backed
@@ -43,7 +42,7 @@ valid_hypervisor_paths = @DBVALIDHYPERVISORPATHS@
 # List of valid annotations values for ctlpath
 # The default if not set is empty (all annotations rejected.)
 # Your distribution recommends: 
-# valid_ctlpaths = 
+valid_ctlpaths = []

 # Optional space-separated list of options to pass to the guest kernel.
 # For example, use `kernel_params = "vsyscall=emulate"` if you are having
@@ -106,7 +105,7 @@ default_bridges = @DEFBRIDGES@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Default memory size in MiB for SB/VM.
 # If unspecified then it will be set @DEFMEMSZ@ MiB.
@@ -129,7 +128,7 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_DB@"
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # The log level will be applied to hypervisor.
 # Possible values are:
@@ -140,17 +139,18 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_DB@"
 # - error
 # - critical
 # Default: info
-#log_level = "info"
+log_level = "info"

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+# Default false
+disable_nesting_checks = false

 # If host doesn't support vhost_net, set to true. Thus we won't create vhost fds for nics.
 # Default false
-#disable_vhost_net = true
+disable_vhost_net = false

 # Path to OCI hook binaries in the *guest rootfs*.
 # This does not affect host-side hooks which must instead be added to
@@ -167,7 +167,8 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_DB@"
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""

 # Shared file system type:
 #   - inline-virtio-fs (default)
@@ -209,7 +210,13 @@ virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false
+
+# Virtio queue size. Size: byte. default 128
+queue_size = 128
+
+# Block device multi-queue, default 1
+num_queues = 1

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -217,33 +224,33 @@ virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Disable the 'seccomp' feature from Cloud Hypervisor, firecracker or dragonball, default false
-# disable_seccomp = true
+disable_seccomp = false

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device.
-#enable_guest_swap = true
+enable_guest_swap = false

 # If enable_guest_swap is enabled, the swap device will be created in the guest
 # at this path. Default "/run/kata-containers/swap".
-#guest_swap_path = "/run/kata-containers/swap"
+guest_swap_path = "/run/kata-containers/swap"

 # The percentage of the total memory to be used as swap device.
 # Default 100.
-#guest_swap_size_percent = 100
+guest_swap_size_percent = 100

 # The threshold in seconds to create swap device in the guest.
 # Kata will wait guest_swap_create_threshold_secs seconds before creating swap device.
 # Default 60.
-#guest_swap_create_threshold_secs = 60
+guest_swap_create_threshold_secs = 60

 [agent.@PROJECT_TYPE@]
-container_pipe_size=@PIPESIZE@
+container_pipe_size = @PIPESIZE@
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # The log level will be applied to agent.
 # Possible values are:
@@ -254,7 +261,7 @@ container_pipe_size=@PIPESIZE@
 # - error
 # - critical
 # (default: info)
-#log_level = "info"
+log_level = "info"

 # Enable agent tracing.
 #
@@ -268,18 +275,18 @@ container_pipe_size=@PIPESIZE@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent dial timeout in millisecond.
 # (default: 10)
-#dial_timeout_ms = 10
+dial_timeout_ms = 10

 # Agent reconnect timeout in millisecond.
 # Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300)
@@ -288,7 +295,7 @@ container_pipe_size=@PIPESIZE@
 # You'd better not change the value of dial_timeout_ms, unless you have an
 # idea of what you are doing.
 # (default: 3000)
-#reconnect_timeout_ms = 3000
+reconnect_timeout_ms = 3000

 # Create Container Request Timeout
 # This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
@@ -301,28 +308,28 @@ container_pipe_size=@PIPESIZE@
 # - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
 # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
 # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
-# create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

 [agent.@PROJECT_TYPE@.mem_agent]
 # Control the mem-agent function enable or disable.
 # Default to false
-#mem_agent_enable = true
+mem_agent_enable = false

 # Control the mem-agent memcg function disable or enable
 # Default to false
-#memcg_disable = false
+memcg_disable = false

 # Control the mem-agent function swap enable or disable.
 # Default to false
-#memcg_swap = false
+memcg_swap = false

 # Control the mem-agent function swappiness max number.
 # Default to 50
-#memcg_swappiness_max = 50
+memcg_swappiness_max = 50

 # Control the mem-agent memcg function wait period seconds
 # Default to 600
-#memcg_period_secs = 600
+memcg_period_secs = 600

 # Control the mem-agent memcg wait period PSI percent limit.
 # If the percentage of memory and IO PSI stall time within
@@ -330,7 +337,7 @@ container_pipe_size=@PIPESIZE@
 # then the aging and eviction for this cgroup will not be
 # executed after this waiting period.
 # Default to 1
-#memcg_period_psi_percent_limit = 1
+memcg_period_psi_percent_limit = 1

 # Control the mem-agent memcg eviction PSI percent limit.
 # If the percentage of memory and IO PSI stall time for a cgroup
@@ -338,44 +345,44 @@ container_pipe_size=@PIPESIZE@
 # this cgroup will immediately stop and will not resume until
 # the next memcg waiting period.
 # Default to 1
-#memcg_eviction_psi_percent_limit = 1
+memcg_eviction_psi_percent_limit = 1

 # Control the mem-agent memcg eviction run aging count min.
 # A cgroup will only perform eviction when the number of aging cycles
 # in memcg is greater than or equal to memcg_eviction_run_aging_count_min.
 # Default to 3
-#memcg_eviction_run_aging_count_min = 3
+memcg_eviction_run_aging_count_min = 3

 # Control the mem-agent compact function disable or enable
 # Default to false
-#compact_disable = false
+compact_disable = false

 # Control the mem-agent compaction function wait period seconds
 # Default to 600
-#compact_period_secs = 600
+compact_period_secs = 600

 # Control the mem-agent compaction function wait period PSI percent limit.
 # If the percentage of memory and IO PSI stall time within
 # the compaction waiting period exceeds this value,
 # then the compaction will not be executed after this waiting period.
 # Default to 1
-#compact_period_psi_percent_limit = 1
+compact_period_psi_percent_limit = 1

 # Control the mem-agent compaction function compact PSI percent limit.
 # During compaction, the percentage of memory and IO PSI stall time
 # is checked every second. If this percentage exceeds
 # compact_psi_percent_limit, the compaction process will stop.
 # Default to 5
-#compact_psi_percent_limit = 5
+compact_psi_percent_limit = 5

 # Control the maximum number of seconds for each compaction of mem-agent compact function.
 # Default to 180
-#compact_sec_max = 180
+compact_sec_max = 180

 # Control the mem-agent compaction function compact order.
 # compact_order is use with compact_threshold.
 # Default to 9
-#compact_order = 9
+compact_order = 9

 # Control the mem-agent compaction function compact threshold.
 # compact_threshold is the pages number.
@@ -388,22 +395,22 @@ container_pipe_size=@PIPESIZE@
 # since the previous compaction.
 # then the system should initiate another round of memory compaction.
 # Default to 1024
-#compact_threshold = 1024
+compact_threshold = 1024

 # Control the mem-agent compaction function force compact times.
 # After one compaction, if there has not been a compaction within
 # the next compact_force_times times, a compaction will be forced
 # regardless of the system's memory situation.
 # If compact_force_times is set to 0, will do force compaction each time.
-# If compact_force_times is set to 18446744073709551615, will never do force compaction.
-# Default to 18446744073709551615
-#compact_force_times = 18446744073709551615
+# If compact_force_times is set to 9223372036854775807, will never do force compaction.
+# Default to 9223372036854775807
+compact_force_times = 9223372036854775807

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # The log level will be applied to runtimes.
 # Possible values are:
@@ -414,14 +421,14 @@ container_pipe_size=@PIPESIZE@
 # - error
 # - critical
 # (default: info)
-#log_level = "info"
+log_level = "info"

 # If enabled, enabled, it means that 1) if the runtime exits abnormally,
 # the cleanup process will be skipped, and 2) the runtime will not exit
 # even if the health check fails.
 # This option is typically used to retain abnormal information for debugging.
 # (default: false)
-#keep_abnormal = true
+keep_abnormal = false

 # Internetworking model
 # Determines how the VM should be connected to the
@@ -446,33 +453,33 @@ container_pipe_size=@PIPESIZE@
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_DB@"
+internetworking_model = "@DEFNETWORKMODEL_DB@"

-name="@RUNTIMENAME@"
-hypervisor_name="@HYPERVISOR_DB@"
-agent_name="@PROJECT_TYPE@"
+name = "@RUNTIMENAME@"
+hypervisor_name = "@HYPERVISOR_DB@"
+agent_name = "@PROJECT_TYPE@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -480,7 +487,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -488,18 +495,18 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_DB@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_DB@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -508,7 +515,7 @@ experimental=@DEFAULTEXPFEATURES@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_DB@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted(ro, rw) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
@@ -518,7 +525,7 @@ static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@
 # - "/path/to", default readonly mode.
 # - "/path/to:ro", readonly mode.
 # - "/path/to:rw", readwrite mode.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # Base directory of directly attachable network config.
 # Network devices for VM-based containers are allowed to be placed in the
@@ -534,4 +541,4 @@ dan_conf = "@DEFDANCONF@"
 use_passfd_io = true

 # If fd passthrough io is enabled, the runtime will attempt to use the specified port instead of the default port.
-# passfd_listener_port = 1027
+passfd_listener_port = 1027
--- a/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in
+++ b/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in
@@ -16,14 +16,13 @@
 path = "@QEMUPATH@"
 kernel = "@KERNELPATH_COCO@"
 image = "@IMAGECONFIDENTIALPATH@"
-# initrd = "@INITRDCONFIDENTIALPATH@"
 machine_type = "@MACHINETYPE@"

 # rootfs filesystem type:
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
+rootfs_type = @DEFROOTFSTYPE@

 # Block storage driver to be used for the VM rootfs is backed
 # by a block device. This is virtio-blk-pci, virtio-blk-mmio or nvdimm
@@ -43,18 +42,12 @@ vm_rootfs_driver = "@VMROOTFSDRIVER_QEMU@"
 #   - NVDIMM devices
 #
 # Default false
-# confidential_guest = true
-
-# Choose AMD SEV-SNP confidential guests
-# In case of using confidential guests on AMD hardware that supports both SEV
-# and SEV-SNP, the following enables SEV-SNP guests. SEV guests are default.
-# Default false
-# sev_snp_guest = true
+confidential_guest = false

 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -92,7 +85,7 @@ firmware_volume = "@FIRMWAREVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -100,12 +93,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@CPUFEATURES@"
+cpu_features = "@CPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -151,7 +145,7 @@ default_bridges = @DEFBRIDGES@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Default memory size in MiB for SB/VM.
 # If unspecified then it will be set @DEFMEMSZ@ MiB.
@@ -160,7 +154,7 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -173,13 +167,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -256,17 +250,17 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
 # handled in a separate IO thread. This is currently only implemented
@@ -281,7 +275,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -289,7 +283,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -306,11 +300,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -326,7 +320,7 @@ vhost_user_reconnect_timeout_sec = 0
 # will disable this feature. In the case of virtio-fs, this is enabled
 # automatically and '/dev/shm' is used as the backing folder.
 # This option will be ignored if VM templating is enabled.
-#file_mem_backend = "@DEFFILEMEMBACKEND@"
+file_mem_backend = "@DEFFILEMEMBACKEND@"

 # List of valid annotations values for the file_mem_backend annotation
 # The default if not set is empty (all annotations rejected.)
@@ -341,7 +335,7 @@ pflashes = []
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # This option allows to add an extra HMP or QMP socket when `enable_debug = true`
 #
@@ -356,17 +350,18 @@ pflashes = []
 #
 # If set to the empty string "", no extra monitor socket is added. This is
 # the default.
-#extra_monitor_socket = "hmp"
+extra_monitor_socket = ""

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+# Default false
+disable_nesting_checks = false

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -374,44 +369,44 @@ pflashes = []
 # nvdimm is not supported when `confidential_guest = true`.
 #
 # Default is false
-#disable_image_nvdimm = true
+disable_image_nvdimm = false

 # VFIO devices are hotplugged on a bridge by default.
 # Enable hotplugging on root bus. This may be required for devices with
 # a large PCI bar, as this is a current limitation with hotplugging on
 # a bridge.
 # Default false
-#hotplug_vfio_on_root_bus = true
+hotplug_vfio_on_root_bus = false

 # Enable hot-plugging of VFIO devices to a bridge-port,
 # root-port or switch-port.
 # The default setting is  "no-port"
-#hot_plug_vfio = "root-port"
+hot_plug_vfio = "no-port"

 # In a confidential compute environment hot-plugging can compromise
 # security.
 # Enable cold-plugging of VFIO devices to a bridge-port,
 # root-port or switch-port.
 # The default setting is  "no-port", which means disabled.
-#cold_plug_vfio = "root-port"
+cold_plug_vfio = "no-port"

 # Before hot plugging a PCIe device, you need to add a pcie_root_port device.
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means the number of pcie_root_port
 # This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35"
 # Default 0
-#pcie_root_port = 2
+pcie_root_port = 0

 # Before hot plugging a PCIe device onto a switch port, you need add a pcie_switch_port device fist.
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means how many devices attached onto pcie_switch_port will be created.
 # This value is valid when hotplug_vfio_on_root_bus is true, and machine_type is "q35"
 # Default 0
-#pcie_switch_port = 2
+pcie_switch_port = 0

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
-#disable_vhost_net = true
+disable_vhost_net = false

 #
 # Default entropy source.
@@ -423,7 +418,7 @@ pflashes = []
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source =  "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -445,29 +440,19 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
-
-# Enable connection to Quote Generation Service (QGS)
-# The "tdx_quote_generation_service_socket_port" parameter configures how QEMU connects to the TDX Quote Generation Service (QGS).
-# This connection is essential for Trusted Domain (TD) attestation, as QGS signs the TDREPORT sent by QEMU via the GetQuote hypercall.
-# By default QGS runs on vsock port 4050, but can be modified by the host admin. For QEMU's tdx-guest object, this connection needs to
-# be specified in a JSON format, for example:
-# -object '{"qom-type":"tdx-guest","id":"tdx","quote-generation-socket":{"type":"vsock","cid":"2","port":"4050"}}'
-# It's important to note that setting "tdx_quote_generation_service_socket_port" to 0 enables communication via Unix Domain Sockets (UDS).
-# To activate UDS, the QGS service itself must be launched with the "-port=0" parameter and the UDS will always be located at /var/run/tdx-qgs/qgs.socket.
-# -object '{"qom-type":"tdx-guest","id":"tdx","quote-generation-socket":{"type":"unix","path":"/var/run/tdx-qgs/qgs.socket"}}'
-# tdx_quote_generation_service_socket_port = @QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT@
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""

 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -477,9 +462,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -490,17 +476,17 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 #guest_memory_dump_paging=false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@


 [factory]
@@ -515,41 +501,17 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
-
-# The number of caches of VMCache:
-# unspecified or == 0   --> VMCache is disabled
-# > 0                   --> will be set to the specified number
-#
-# VMCache is a function that creates VMs as caches before using it.
-# It helps speed up new container creation.
-# The function consists of a server and some clients communicating
-# through Unix socket.  The protocol is gRPC in protocols/cache/cache.proto.
-# The VMCache server will create some VMs and cache them by factory cache.
-# It will convert the VM to gRPC format and transport it when gets
-# requestion from clients.
-# Factory grpccache is the VMCache client.  It will request gRPC format
-# VM and convert it back to a VM.  If VMCache function is enabled,
-# kata-runtime will request VM from factory grpccache when it creates
-# a new sandbox.
-#
-# Default 0
-#vm_cache_number = 0
-
-# Specify the address of the Unix socket that is used by VMCache.
-#
-# Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+template_path = "/run/vc/vm/template"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -563,7 +525,7 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -576,18 +538,18 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent dial timeout in millisecond.
 # (default: 10)
-#dial_timeout_ms = 10
+dial_timeout_ms = 10

 # Agent reconnect timeout in millisecond.
 # Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300)
@@ -596,7 +558,7 @@ kernel_modules=[]
 # You'd better not change the value of dial_timeout_ms, unless you have an
 # idea of what you are doing.
 # (default: 3000)
-#reconnect_timeout_ms = 3000
+reconnect_timeout_ms = 3000

 # Create Container Request Timeout
 # This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
@@ -609,28 +571,28 @@ kernel_modules=[]
 # - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
 # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
 # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
-# create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

 [agent.@PROJECT_TYPE@.mem_agent]
 # Control the mem-agent function enable or disable.
 # Default to false
-#mem_agent_enable = true
+mem_agent_enable = false

 # Control the mem-agent memcg function disable or enable
 # Default to false
-#memcg_disable = false
+memcg_disable = false

 # Control the mem-agent function swap enable or disable.
 # Default to false
-#memcg_swap = false
+memcg_swap = false

 # Control the mem-agent function swappiness max number.
 # Default to 50
-#memcg_swappiness_max = 50
+memcg_swappiness_max = 50

 # Control the mem-agent memcg function wait period seconds
 # Default to 600
-#memcg_period_secs = 600
+memcg_period_secs = 600

 # Control the mem-agent memcg wait period PSI percent limit.
 # If the percentage of memory and IO PSI stall time within
@@ -638,7 +600,7 @@ kernel_modules=[]
 # then the aging and eviction for this cgroup will not be
 # executed after this waiting period.
 # Default to 1
-#memcg_period_psi_percent_limit = 1
+memcg_period_psi_percent_limit = 1

 # Control the mem-agent memcg eviction PSI percent limit.
 # If the percentage of memory and IO PSI stall time for a cgroup
@@ -646,44 +608,44 @@ kernel_modules=[]
 # this cgroup will immediately stop and will not resume until
 # the next memcg waiting period.
 # Default to 1
-#memcg_eviction_psi_percent_limit = 1
+memcg_eviction_psi_percent_limit = 1

 # Control the mem-agent memcg eviction run aging count min.
 # A cgroup will only perform eviction when the number of aging cycles
 # in memcg is greater than or equal to memcg_eviction_run_aging_count_min.
 # Default to 3
-#memcg_eviction_run_aging_count_min = 3
+memcg_eviction_run_aging_count_min = 3

 # Control the mem-agent compact function disable or enable
 # Default to false
-#compact_disable = false
+compact_disable = false

 # Control the mem-agent compaction function wait period seconds
 # Default to 600
-#compact_period_secs = 600
+compact_period_secs = 600

 # Control the mem-agent compaction function wait period PSI percent limit.
 # If the percentage of memory and IO PSI stall time within
 # the compaction waiting period exceeds this value,
 # then the compaction will not be executed after this waiting period.
 # Default to 1
-#compact_period_psi_percent_limit = 1
+compact_period_psi_percent_limit = 1

 # Control the mem-agent compaction function compact PSI percent limit.
 # During compaction, the percentage of memory and IO PSI stall time
 # is checked every second. If this percentage exceeds
 # compact_psi_percent_limit, the compaction process will stop.
 # Default to 5
-#compact_psi_percent_limit = 5
+compact_psi_percent_limit = 5

 # Control the maximum number of seconds for each compaction of mem-agent compact function.
 # Default to 180
-#compact_sec_max = 180
+compact_sec_max = 180

 # Control the mem-agent compaction function compact order.
 # compact_order is use with compact_threshold.
 # Default to 9
-#compact_order = 9
+compact_order = 9

 # Control the mem-agent compaction function compact threshold.
 # compact_threshold is the pages number.
@@ -696,16 +658,16 @@ kernel_modules=[]
 # since the previous compaction.
 # then the system should initiate another round of memory compaction.
 # Default to 1024
-#compact_threshold = 1024
+compact_threshold = 1024

 # Control the mem-agent compaction function force compact times.
 # After one compaction, if there has not been a compaction within
 # the next compact_force_times times, a compaction will be forced
 # regardless of the system's memory situation.
 # If compact_force_times is set to 0, will do force compaction each time.
-# If compact_force_times is set to 18446744073709551615, will never do force compaction.
-# Default to 18446744073709551615
-#compact_force_times = 18446744073709551615
+# If compact_force_times is set to 9223372036854775807, will never do force compaction.
+# Default to 9223372036854775807
+compact_force_times = 9223372036854775807

 # Create Container Request Timeout
 # This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
@@ -718,13 +680,14 @@ kernel_modules=[]
 # - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
 # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
 # Defaults to @DEFCREATECONTAINERTIMEOUT_COCO@ second(s)
-# create_container_timeout = @DEFCREATECONTAINERTIMEOUT_COCO@
+create_container_timeout = @DEFCREATECONTAINERTIMEOUT_COCO@

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
+
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -742,23 +705,23 @@ kernel_modules=[]
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

-name="@RUNTIMENAME@"
-hypervisor_name="@HYPERVISOR_QEMU@"
-agent_name="@PROJECT_TYPE@"
+name = "@RUNTIMENAME@"
+hypervisor_name = "@HYPERVISOR_QEMU@"
+agent_name = "@PROJECT_TYPE@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-# enable_vcpus_pinning = false
+enable_vcpus_pinning = false

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -766,22 +729,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -789,7 +753,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -797,7 +761,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_QEMU@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_QEMU@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -806,13 +770,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_QEMU@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_COCO@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_COCO@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -833,19 +797,19 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false
--- a/src/runtime-rs/config/configuration-qemu-runtime-rs.toml.in
+++ b/src/runtime-rs/config/configuration-qemu-runtime-rs.toml.in
@@ -16,45 +16,22 @@
 path = "@QEMUPATH@"
 kernel = "@KERNELPATH_QEMU@"
 image = "@IMAGEPATH@"
-# initrd = "@INITRDPATH@"
 machine_type = "@MACHINETYPE@"

 # rootfs filesystem type:
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
+rootfs_type = @DEFROOTFSTYPE@

 # Block storage driver to be used for the VM rootfs is backed
 # by a block device. This is virtio-blk-pci, virtio-blk-mmio or nvdimm
 vm_rootfs_driver = "@VMROOTFSDRIVER_QEMU@"

-# Enable confidential guest support.
-# Toggling that setting may trigger different hardware features, ranging
-# from memory encryption to both memory and CPU-state encryption and integrity.
-# The Kata Containers runtime dynamically detects the available feature set and
-# aims at enabling the largest possible one, returning an error if none is
-# available, or none is supported by the hypervisor.
-#
-# Known limitations:
-# * Does not work by design:
-#   - CPU Hotplug 
-#   - Memory Hotplug
-#   - NVDIMM devices
-#
-# Default false
-# confidential_guest = true
-
-# Choose AMD SEV-SNP confidential guests
-# In case of using confidential guests on AMD hardware that supports both SEV
-# and SEV-SNP, the following enables SEV-SNP guests. SEV guests are default.
-# Default false
-# sev_snp_guest = true
-
 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -92,7 +69,7 @@ firmware_volume = "@FIRMWAREVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -100,12 +77,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@CPUFEATURES@"
+cpu_features = "@CPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -151,7 +129,7 @@ default_bridges = @DEFBRIDGES@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Default memory size in MiB for SB/VM.
 # If unspecified then it will be set @DEFMEMSZ@ MiB.
@@ -160,7 +138,7 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -173,13 +151,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -262,17 +240,17 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
 # handled in a separate IO thread. This is currently only implemented
@@ -280,6 +258,12 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"
 #
 enable_iothreads = @DEFENABLEIOTHREADS@

+# Virtio queue size. Size: byte. default 128
+queue_size = 128
+
+# Block device multi-queue, default 1
+num_queues = 1
+
 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
 # as all of the memory will be allocated and locked
@@ -287,7 +271,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -295,7 +279,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -312,11 +296,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -332,7 +316,7 @@ vhost_user_reconnect_timeout_sec = 0
 # will disable this feature. In the case of virtio-fs, this is enabled
 # automatically and '/dev/shm' is used as the backing folder.
 # This option will be ignored if VM templating is enabled.
-#file_mem_backend = "@DEFFILEMEMBACKEND@"
+file_mem_backend = "@DEFFILEMEMBACKEND@"

 # List of valid annotations values for the file_mem_backend annotation
 # The default if not set is empty (all annotations rejected.)
@@ -347,7 +331,7 @@ pflashes = []
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # This option allows to add an extra HMP or QMP socket when `enable_debug = true`
 #
@@ -362,17 +346,17 @@ pflashes = []
 #
 # If set to the empty string "", no extra monitor socket is added. This is
 # the default.
-#extra_monitor_socket = "hmp"
+extra_monitor_socket = ""

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = false

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -380,44 +364,44 @@ pflashes = []
 # nvdimm is not supported when `confidential_guest = true`.
 #
 # Default is false
-#disable_image_nvdimm = true
+disable_image_nvdimm = false

 # VFIO devices are hotplugged on a bridge by default.
 # Enable hotplugging on root bus. This may be required for devices with
 # a large PCI bar, as this is a current limitation with hotplugging on
 # a bridge.
 # Default false
-#hotplug_vfio_on_root_bus = true
+hotplug_vfio_on_root_bus = false

 # Enable hot-plugging of VFIO devices to a bridge-port, 
 # root-port or switch-port. 
 # The default setting is  "no-port"
-#hot_plug_vfio = "root-port" 
+hot_plug_vfio = "no-port"

 # In a confidential compute environment hot-plugging can compromise
 # security. 
 # Enable cold-plugging of VFIO devices to a bridge-port, 
 # root-port or switch-port. 
 # The default setting is  "no-port", which means disabled. 
-#cold_plug_vfio = "root-port" 
+cold_plug_vfio = "no-port"

 # Before hot plugging a PCIe device, you need to add a pcie_root_port device.
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means the number of pcie_root_port
 # This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35"
 # Default 0
-#pcie_root_port = 2
+pcie_root_port = 0

 # Before hot plugging a PCIe device onto a switch port, you need add a pcie_switch_port device fist.
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means how many devices attached onto pcie_switch_port will be created.
 # This value is valid when hotplug_vfio_on_root_bus is true, and machine_type is "q35"
 # Default 0
-#pcie_switch_port = 2
+pcie_switch_port = 0

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
-#disable_vhost_net = true
+disable_vhost_net = false

 #
 # Default entropy source.
@@ -429,7 +413,7 @@ pflashes = []
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source =  "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -451,7 +435,8 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""

 # Enable connection to Quote Generation Service (QGS)
 # The "tdx_quote_generation_service_socket_port" parameter configures how QEMU connects to the TDX Quote Generation Service (QGS).
@@ -462,18 +447,18 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # It's important to note that setting "tdx_quote_generation_service_socket_port" to 0 enables communication via Unix Domain Sockets (UDS).
 # To activate UDS, the QGS service itself must be launched with the "-port=0" parameter and the UDS will always be located at /var/run/tdx-qgs/qgs.socket.
 # -object '{"qom-type":"tdx-guest","id":"tdx","quote-generation-socket":{"type":"unix","path":"/var/run/tdx-qgs/qgs.socket"}}'
-# tdx_quote_generation_service_socket_port = @QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT@
+tdx_quote_generation_service_socket_port = @QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT@

 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -483,9 +468,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -493,20 +479,20 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # then you should enable paging.
 #
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
-#guest_memory_dump_paging=false
+guest_memory_dump_paging = false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@


 [hypervisor.qemu.factory]
@@ -521,41 +507,17 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
-
-# The number of caches of VMCache:
-# unspecified or == 0   --> VMCache is disabled
-# > 0                   --> will be set to the specified number
-#
-# VMCache is a function that creates VMs as caches before using it.
-# It helps speed up new container creation.
-# The function consists of a server and some clients communicating
-# through Unix socket.  The protocol is gRPC in protocols/cache/cache.proto.
-# The VMCache server will create some VMs and cache them by factory cache.
-# It will convert the VM to gRPC format and transport it when gets
-# requestion from clients.
-# Factory grpccache is the VMCache client.  It will request gRPC format
-# VM and convert it back to a VM.  If VMCache function is enabled,
-# kata-runtime will request VM from factory grpccache when it creates
-# a new sandbox.
-#
-# Default 0
-#vm_cache_number = 0
-
-# Specify the address of the Unix socket that is used by VMCache.
-#
-# Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+template_path = "/run/vc/vm/template"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -569,7 +531,7 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -582,18 +544,18 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent dial timeout in millisecond.
 # (default: 10)
-#dial_timeout_ms = 10
+dial_timeout_ms = 10

 # Agent reconnect timeout in millisecond.
 # Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300)
@@ -602,28 +564,28 @@ kernel_modules=[]
 # You'd better not change the value of dial_timeout_ms, unless you have an
 # idea of what you are doing.
 # (default: 3000)
-#reconnect_timeout_ms = 3000
+reconnect_timeout_ms = 3000

 [agent.@PROJECT_TYPE@.mem_agent]
 # Control the mem-agent function enable or disable.
 # Default to false
-#mem_agent_enable = true
+mem_agent_enable = false

 # Control the mem-agent memcg function disable or enable
 # Default to false
-#memcg_disable = false
+memcg_disable = false

 # Control the mem-agent function swap enable or disable.
 # Default to false
-#memcg_swap = false
+memcg_swap = false

 # Control the mem-agent function swappiness max number.
 # Default to 50
-#memcg_swappiness_max = 50
+memcg_swappiness_max = 50

 # Control the mem-agent memcg function wait period seconds
 # Default to 600
-#memcg_period_secs = 600
+memcg_period_secs = 600

 # Control the mem-agent memcg wait period PSI percent limit.
 # If the percentage of memory and IO PSI stall time within
@@ -631,7 +593,7 @@ kernel_modules=[]
 # then the aging and eviction for this cgroup will not be
 # executed after this waiting period.
 # Default to 1
-#memcg_period_psi_percent_limit = 1
+memcg_period_psi_percent_limit = 1

 # Control the mem-agent memcg eviction PSI percent limit.
 # If the percentage of memory and IO PSI stall time for a cgroup
@@ -639,44 +601,44 @@ kernel_modules=[]
 # this cgroup will immediately stop and will not resume until
 # the next memcg waiting period.
 # Default to 1
-#memcg_eviction_psi_percent_limit = 1
+memcg_eviction_psi_percent_limit = 1

 # Control the mem-agent memcg eviction run aging count min.
 # A cgroup will only perform eviction when the number of aging cycles
 # in memcg is greater than or equal to memcg_eviction_run_aging_count_min.
 # Default to 3
-#memcg_eviction_run_aging_count_min = 3
+memcg_eviction_run_aging_count_min = 3

 # Control the mem-agent compact function disable or enable
 # Default to false
-#compact_disable = false
+compact_disable = false

 # Control the mem-agent compaction function wait period seconds
 # Default to 600
-#compact_period_secs = 600
+compact_period_secs = 600

 # Control the mem-agent compaction function wait period PSI percent limit.
 # If the percentage of memory and IO PSI stall time within
 # the compaction waiting period exceeds this value,
 # then the compaction will not be executed after this waiting period.
 # Default to 1
-#compact_period_psi_percent_limit = 1
+compact_period_psi_percent_limit = 1

 # Control the mem-agent compaction function compact PSI percent limit.
 # During compaction, the percentage of memory and IO PSI stall time
 # is checked every second. If this percentage exceeds
 # compact_psi_percent_limit, the compaction process will stop.
 # Default to 5
-#compact_psi_percent_limit = 5
+compact_psi_percent_limit = 5

 # Control the maximum number of seconds for each compaction of mem-agent compact function.
-# Default to 180
-#compact_sec_max = 180
+# Default to 300
+compact_sec_max = 300

 # Control the mem-agent compaction function compact order.
 # compact_order is use with compact_threshold.
 # Default to 9
-#compact_order = 9
+compact_order = 9

 # Control the mem-agent compaction function compact threshold.
 # compact_threshold is the pages number.
@@ -689,7 +651,7 @@ kernel_modules=[]
 # since the previous compaction.
 # then the system should initiate another round of memory compaction.
 # Default to 1024
-#compact_threshold = 1024
+compact_threshold = 1024

 # Control the mem-agent compaction function force compact times.
 # After one compaction, if there has not been a compaction within
@@ -698,7 +660,9 @@ kernel_modules=[]
 # If compact_force_times is set to 0, will do force compaction each time.
 # If compact_force_times is set to 18446744073709551615, will never do force compaction.
 # Default to 18446744073709551615
-#compact_force_times = 18446744073709551615
+# Note: Using a large but valid u64 value (within i64::MAX range) instead of u64::MAX to avoid TOML parser issues
+# Using 9223372036854775807 (i64::MAX) which is effectively "never" for practical purposes
+compact_force_times = 9223372036854775807

 # Create Container Request Timeout
 # This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
@@ -711,14 +675,14 @@ kernel_modules=[]
 # - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
 # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
 # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
-# create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
-#
+enable_debug = false
+
 # Internetworking model
 # Determines how the VM should be connected to the
 # the container network interface
@@ -735,23 +699,23 @@ kernel_modules=[]
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

-name="@RUNTIMENAME@"
-hypervisor_name="@HYPERVISOR_QEMU@"
-agent_name="@PROJECT_TYPE@"
+name = "@RUNTIMENAME@"
+hypervisor_name = "@HYPERVISOR_QEMU@"
+agent_name = "@PROJECT_TYPE@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-# enable_vcpus_pinning = false
+enable_vcpus_pinning = false

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -759,22 +723,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -782,7 +747,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -790,7 +755,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_QEMU@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_QEMU@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -799,13 +764,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_QEMU@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_QEMU@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_QEMU@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -826,19 +791,19 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false
--- a/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in
+++ b/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in
@@ -40,7 +40,7 @@ confidential_guest = true
 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -78,7 +78,7 @@ firmware_volume = "@FIRMWAREVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -86,12 +86,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@CPUFEATURES@"
+cpu_features = "@CPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -136,7 +137,7 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -149,13 +150,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -238,17 +239,17 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
 # handled in a separate IO thread. This is currently only implemented
@@ -256,6 +257,12 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"
 #
 enable_iothreads = @DEFENABLEIOTHREADS@

+# Virtio queue size. Size: byte. default 128
+queue_size = 128
+
+# Block device multi-queue, default 1
+num_queues = 1
+
 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
 # as all of the memory will be allocated and locked
@@ -263,7 +270,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -271,7 +278,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -288,11 +295,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -303,7 +310,7 @@ valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@
 # will disable this feature. In the case of virtio-fs, this is enabled
 # automatically and '/dev/shm' is used as the backing folder.
 # This option will be ignored if VM templating is enabled.
-#file_mem_backend = "@DEFFILEMEMBACKEND@"
+file_mem_backend = "@DEFFILEMEMBACKEND@"

 # List of valid annotations values for the file_mem_backend annotation
 # The default if not set is empty (all annotations rejected.)
@@ -318,17 +325,17 @@ pflashes = []
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = false

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -336,33 +343,33 @@ pflashes = []
 # nvdimm is not supported when `confidential_guest = true`.
 #
 # Default is false
-#disable_image_nvdimm = true
+disable_image_nvdimm = false

 # Enable hot-plugging of VFIO devices to a bridge-port,
 # root-port or switch-port.
 # The default setting is "no-port"
-#hot_plug_vfio = "root-port"
+hot_plug_vfio = "no-port"

 # In a confidential compute environment hot-plugging can compromise
 # security.
 # Enable cold-plugging of VFIO devices to a bridge-port,
 # root-port or switch-port.
 # The default setting is "no-port", which means disabled.
-cold_plug_vfio = "root-port"
+cold_plug_vfio = "no-port"

 # VFIO devices are hotplugged on a bridge by default.
 # Enable hotplugging on root bus. This may be required for devices with
 # a large PCI bar, as this is a current limitation with hotplugging on
 # a bridge.
 # Default false
-#hotplug_vfio_on_root_bus = true
+hotplug_vfio_on_root_bus = false

 # Before hot plugging a PCIe device, you need to add a pcie_root_port device.
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means the number of pcie_root_port
 # This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35"
 # Default 0
-#pcie_root_port = 2
+pcie_root_port = 0

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
@@ -378,7 +385,7 @@ disable_vhost_net = true
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source =  "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -400,17 +407,18 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
-#
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""
+
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -420,9 +428,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -430,7 +439,7 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # then you should enable paging.
 #
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
-#guest_memory_dump_paging=false
+guest_memory_dump_paging = false

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
@@ -441,20 +450,20 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
 # If swap_in_bytes and memory_limit_in_bytes is not set, the size should
 # be default_memory.
-#enable_guest_swap = true
+enable_guest_swap = false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@


 [factory]
@@ -469,41 +478,17 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
-
-# The number of caches of VMCache:
-# unspecified or == 0   --> VMCache is disabled
-# > 0                   --> will be set to the specified number
-#
-# VMCache is a function that creates VMs as caches before using it.
-# It helps speed up new container creation.
-# The function consists of a server and some clients communicating
-# through Unix socket.  The protocol is gRPC in protocols/cache/cache.proto.
-# The VMCache server will create some VMs and cache them by factory cache.
-# It will convert the VM to gRPC format and transport it when gets
-# requestion from clients.
-# Factory grpccache is the VMCache client.  It will request gRPC format
-# VM and convert it back to a VM.  If VMCache function is enabled,
-# kata-runtime will request VM from factory grpccache when it creates
-# a new sandbox.
-#
-# Default 0
-#vm_cache_number = 0
-
-# Specify the address of the Unix socket that is used by VMCache.
-#
-# Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+template_path = "/run/vc/vm/template"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -517,7 +502,7 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -530,14 +515,14 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent dial timeout in millisecond.
 # (default: 10)
@@ -563,14 +548,14 @@ reconnect_timeout_ms = 5000
 # - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
 # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
 # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
-# create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
-#
+enable_debug = false
+
 # Internetworking model
 # Determines how the VM should be connected to the
 # the container network interface
@@ -587,23 +572,23 @@ reconnect_timeout_ms = 5000
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

-name="@RUNTIMENAME@"
-hypervisor_name="@HYPERVISOR_QEMU@"
-agent_name="@PROJECT_TYPE@"
+name = "@RUNTIMENAME@"
+hypervisor_name = "@HYPERVISOR_QEMU@"
+agent_name = "@PROJECT_TYPE@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-# enable_vcpus_pinning = false
+enable_vcpus_pinning = false

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -611,22 +596,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -634,7 +620,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -642,7 +628,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_QEMU@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_QEMU@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -651,13 +637,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_QEMU@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_QEMU@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_QEMU@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -678,19 +664,19 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE_SE@"
+vfio_mode = "@DEFVFIOMODE_SE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false
--- a/src/runtime-rs/config/configuration-remote.toml.in
+++ b/src/runtime-rs/config/configuration-remote.toml.in
@@ -19,24 +19,6 @@ remote_hypervisor_socket = "/run/peerpod/hypervisor.sock"
 # Timeout in seconds for creating a remote hypervisor, 600s(10min) by default
 remote_hypervisor_timeout = 600

-
-# Enable confidential guest support.
-# Toggling that setting may trigger different hardware features, ranging
-# from memory encryption to both memory and CPU-state encryption and integrity.
-# The Kata Containers runtime dynamically detects the available feature set and
-# aims at enabling the largest possible one, returning an error if none is
-# available, or none is supported by the hypervisor.
-#
-# Known limitations:
-# * Does not work by design:
-#   - CPU Hotplug
-#   - Memory Hotplug
-#   - NVDIMM devices
-#
-# Default false
-# confidential_guest = true
-
-
 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
 # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
@@ -54,7 +36,7 @@ enable_annotations = ["machine_type", "default_memory", "default_vcpus", "defaul
 # To see the list of default parameters, enable hypervisor debug, create a
 # container and look for 'default-kernel-parameters' log entries.
 # NOTE: kernel_params are not currently passed over in remote hypervisor
-# kernel_params = ""
+kernel_params = ""

 # Path to the firmware.
 # If you want that qemu uses the default firmware leave this option empty
@@ -65,7 +47,7 @@ firmware = "@FIRMWAREPATH@"
 # < 0                             --> will be set to the actual number of physical cores
 # > 0 <= number of physical cores --> will be set to the specified number
 # > number of physical cores      --> will be set to the actual number of physical cores
-# default_vcpus = 1
+default_vcpus = 1

 # Default maximum number of vCPUs per SB/VM:
 # unspecified or == 0             --> will be set to the actual number of physical cores or to the maximum number
@@ -82,7 +64,7 @@ firmware = "@FIRMWAREPATH@"
 # vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
 # unless you know what are you doing.
 # NOTICE: on arm platform with gicv2 interrupt controller, set it to 8.
-# default_maxvcpus = @DEFMAXVCPUS@
+default_maxvcpus = @DEFMAXVCPUS@

 # Bridges can be used to hot plug devices.
 # Limitations:
@@ -99,19 +81,19 @@ default_bridges = @DEFBRIDGES@
 # Default memory size in MiB for SB/VM.
 # If unspecified then it will be set @DEFMEMSZ@ MiB.
 # Note: the remote hypervisor uses the peer pod config to determine the memory of the VM
-# default_memory = @DEFMEMSZ@
+default_memory = @DEFMEMSZ@
 #
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
 # Note: the remote hypervisor uses the peer pod config to determine the memory of the VM
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # This option changes the default hypervisor and kernel parameters
 # to enable debug output where available. And Debug also enable the hmp socket.
 #
 # Default false
-# enable_debug = true
+enable_debug = false

 # Path to OCI hook binaries in the *guest rootfs*.
 # This does not affect host-side hooks which must instead be added to
@@ -128,10 +110,11 @@ default_bridges = @DEFBRIDGES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
@@ -144,7 +127,7 @@ disable_guest_selinux = true
 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-# enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -158,18 +141,18 @@ disable_guest_selinux = true
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-# enable_tracing = true
+enable_tracing = false

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 30)
-#dial_timeout = 30
+dial_timeout = 30

 # Create Container Request Timeout
 # This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
@@ -182,13 +165,13 @@ disable_guest_selinux = true
 # - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
 # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
 # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
-# create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-# enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -207,11 +190,11 @@ disable_guest_selinux = true
 #     provided by plugin to a tap interface connected to the VM.
 #
 # Note: The remote hypervisor, uses it's own network, so "none" is required
-internetworking_model="none"
+internetworking_model = "none"

-name="virt_container"
-hypervisor_name="remote"
-agent_name="kata"
+name = "virt_container"
+hypervisor_name = "remote"
+agent_name = "kata"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
@@ -219,7 +202,7 @@ agent_name="kata"
 # within the guest
 # (default: true)
 # Note: The remote hypervisor has a different guest, so currently requires this to be set to true
-disable_guest_seccomp=true
+disable_guest_seccomp = true


 # Apply a custom SELinux security policy to the container process inside the VM.
@@ -228,22 +211,23 @@ disable_guest_seccomp=true
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -260,7 +244,7 @@ disable_new_netns = false
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_REMOTE@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_REMOTE@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -270,7 +254,7 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_REMOTE@
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
 # Note: the remote hypervisor uses the peer pod config to determine the sandbox size, so requires this to be set to true
-static_sandbox_resource_mgmt=true
+static_sandbox_resource_mgmt = true

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -291,20 +275,20 @@ static_sandbox_resource_mgmt=true
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
 # Note: remote hypervisor has no sharing of emptydir mounts from host to guest
-disable_guest_empty_dir=false
+disable_guest_empty_dir = false

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false
--- a/src/runtime-rs/config/configuration-rs-fc.toml.in
+++ b/src/runtime-rs/config/configuration-rs-fc.toml.in
@@ -16,7 +16,7 @@ path = "@FCPATH@"
 kernel = "@KERNELPATH_FC@"
 image = "@IMAGEPATH@"

-rootfs_type=@DEFROOTFSTYPE@
+rootfs_type = @DEFROOTFSTYPE@
 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
 # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
@@ -32,7 +32,7 @@ valid_hypervisor_paths = @FCVALIDHYPERVISORPATHS@
 # If the jailer path is not set kata will launch firecracker
 # without a jail. If the jailer is set firecracker will be
 # launched in a jailed enviornment created by the jailer
-#jailer_path = "@FCJAILERPATH@"
+jailer_path = "@FCJAILERPATH@"

 # List of valid jailer path values for the hypervisor
 # Each member of the list can be a regular expression
@@ -104,7 +104,7 @@ memory_slots = @DEFMEMSLOTS@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -121,12 +121,12 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Bandwidth rate limiter options
 #
@@ -134,14 +134,14 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
 # for SB/VM).
 # The same value is used for inbound and outbound bandwidth.
 # Default 0-sized value means unlimited rate.
-#disk_rate_limiter_bw_max_rate = 0
+disk_rate_limiter_bw_max_rate = 0
 #
 # disk_rate_limiter_bw_one_time_burst increases the initial max rate and this
 # initial extra credit does *NOT* affect the overall limit and can be used for
 # an *initial* burst of data.
 # This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
 # set to a non zero value.
-#disk_rate_limiter_bw_one_time_burst = 0
+disk_rate_limiter_bw_one_time_burst = 0
 #
 # Operation rate limiter options
 #
@@ -149,14 +149,20 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
 # for SB/VM).
 # The same value is used for inbound and outbound bandwidth.
 # Default 0-sized value means unlimited rate.
-#disk_rate_limiter_ops_max_rate = 0
+disk_rate_limiter_ops_max_rate = 0
 #
 # disk_rate_limiter_ops_one_time_burst increases the initial max rate and this
 # initial extra credit does *NOT* affect the overall limit and can be used for
 # an *initial* burst of data.
 # This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
 # set to a non zero value.
-#disk_rate_limiter_ops_one_time_burst = 0
+disk_rate_limiter_ops_one_time_burst = 0
+
+# Virtio queue size. Size: byte. default 128
+queue_size = 128
+
+# Block device multi-queue, default 1
+num_queues = 1

 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
@@ -165,7 +171,7 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -173,39 +179,40 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Disable the 'seccomp' feature from Cloud Hypervisor, firecracker or dragonball, default false
-# disable_seccomp = true
+disable_seccomp = false

 # Enable vIOMMU, default false
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # This option changes the default hypervisor and kernel parameters
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+# Default false
+disable_nesting_checks = false

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # VFIO devices are hotplugged on a bridge by default.
 # Enable hotplugging on root bus. This may be required for devices with
 # a large PCI bar, as this is a current limitation with hotplugging on
 # a bridge.
 # Default false
-#hotplug_vfio_on_root_bus = true
+hotplug_vfio_on_root_bus = false

 #
 # Default entropy source.
@@ -217,7 +224,7 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source =  "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -239,40 +246,27 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered will scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Firecracker, it provides a built-in rate limiter, which is based on TBF(Token Bucket Filter)
 # queueing discipline.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Firecracker, it provides a built-in rate limiter, which is based on TBF(Token Bucket Filter)
 # queueing discipline.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
-
-[factory]
-# VM templating support. Once enabled, new VMs are created from template
-# using vm cloning. They will share the same initial kernel, initramfs and
-# agent memory by mapping it readonly. It helps speeding up new container
-# creation and saves a lot of memory if there are many kata containers running
-# on the same host.
-#
-# When disabled, new VMs are created from scratch.
-#
-# Note: Requires "initrd=" to be set ("image=" is not supported).
-#
-# Default false
-#enable_template = true
+disable_selinux = @DEFDISABLESELINUX@

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -286,7 +280,7 @@ disable_selinux=@DEFDISABLESELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -299,14 +293,14 @@ disable_selinux=@DEFDISABLESELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 45)
@@ -314,7 +308,7 @@ dial_timeout = 45

 # Confidential Data Hub API timeout value in seconds
 # (default: 50)
-#cdh_api_timeout = 50
+cdh_api_timeout = 50

 # Create Container Request Timeout
 # This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
@@ -327,13 +321,14 @@ dial_timeout = 45
 # - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
 # (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
 # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
-# create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
+create_container_timeout = @DEFCREATECONTAINERTIMEOUT@

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
+
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -351,33 +346,33 @@ dial_timeout = 45
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_FC@"
+internetworking_model = "@DEFNETWORKMODEL_FC@"

-name="@RUNTIMENAME@"
-hypervisor_name="@HYPERVISOR_FC@"
-agent_name="@PROJECT_TYPE@"
+name = "@RUNTIMENAME@"
+hypervisor_name = "@HYPERVISOR_FC@"
+agent_name = "@PROJECT_TYPE@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -385,7 +380,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -393,7 +388,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_FC@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_FC@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -402,19 +397,19 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_FC@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_FC@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_FC@

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false
--- a/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs
+++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs
@@ -25,7 +25,7 @@ use ch_config::ch_api::{
    cloud_hypervisor_vm_fs_add, cloud_hypervisor_vm_netdev_add_with_fds,
    cloud_hypervisor_vm_vsock_add, PciDeviceInfo, VmRemoveDeviceData,
 };
-use ch_config::convert::{DEFAULT_DISK_QUEUES, DEFAULT_DISK_QUEUE_SIZE, DEFAULT_NUM_PCI_SEGMENTS};
+use ch_config::convert::DEFAULT_NUM_PCI_SEGMENTS;
 use ch_config::DiskConfig;
 use ch_config::{net_util::MacAddr, DeviceConfig, FsConfig, NetConfig, VsockConfig};
 use kata_sys_util::netns::NetnsGuard;
@@ -542,8 +542,8 @@ impl TryFrom<BlockConfig> for DiskConfig {
        let disk_config: DiskConfig = DiskConfig {
            path: Some(blkcfg.path_on_host.as_str().into()),
            readonly: blkcfg.is_readonly,
-            num_queues: DEFAULT_DISK_QUEUES,
-            queue_size: DEFAULT_DISK_QUEUE_SIZE,
+            num_queues: blkcfg.num_queues,
+            queue_size: blkcfg.queue_size as u16,
            ..Default::default()
        };

--- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs
+++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs
@@ -103,6 +103,12 @@ pub struct BlockConfig {

    /// device minor number
    pub minor: i64,
+
+    /// virtio queue size. size: byte
+    pub queue_size: u32,
+
+    /// block device multi-queue
+    pub num_queues: usize,
 }

 #[derive(Debug, Clone, Default)]
--- a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs
+++ b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs
@@ -488,7 +488,7 @@ impl Qmp {
        );
        netdev_frontend_args.insert("addr".to_owned(), format!("{:02}", slot).into());
        netdev_frontend_args.insert("mac".to_owned(), virtio_net_device.get_mac_addr().into());
-        netdev_frontend_args.insert("mq".to_owned(), "on".into());
+        netdev_frontend_args.insert("mq".to_owned(), true.into());
        // As the golang runtime documents the vectors computation, it's
        // 2N+2 vectors, N for tx queues, N for rx queues, 1 for config, and one for possible control vq
        netdev_frontend_args.insert(
--- a/src/runtime-rs/crates/resource/Cargo.toml
+++ b/src/runtime-rs/crates/resource/Cargo.toml
@@ -17,7 +17,7 @@ anyhow = { workspace = true }
 async-trait = { workspace = true }
 bitflags = "2.9.0"
 byte-unit = "5.1.6"
-cgroups-rs = { version = "0.4.0", features = ["oci"] }
+cgroups-rs = { version = "0.5.0", features = ["oci"] }
 futures = "0.3.11"
 lazy_static = { workspace = true }
 libc = { workspace = true }
@@ -40,9 +40,9 @@ tempfile = "3.19.1"
 hex = "0.4"

 ## Dependencies from `rust-netlink`
-netlink-packet-route = "0.22"
+netlink-packet-route = "0.26"
 netlink-sys = "0.8"
-rtnetlink = "0.16"
+rtnetlink = "0.19"

 # Local dependencies
 agent = { workspace = true }
--- a/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
@@ -10,7 +10,6 @@ use kata_types::{config::TomlConfig, cpu::LinuxContainerCpuResources};
 use oci::LinuxCpu;
 use oci_spec::runtime as oci;
 use std::{
-    cmp,
    collections::{HashMap, HashSet},
    convert::TryFrom,
    sync::Arc,
@@ -22,7 +21,7 @@ use crate::ResourceUpdateOp;
 #[derive(Default, Debug, Clone)]
 pub struct CpuResource {
    /// Current number of vCPUs
-    pub(crate) current_vcpu: Arc<RwLock<u32>>,
+    pub(crate) current_vcpu: Arc<RwLock<f32>>,

    /// Default number of vCPUs
    pub(crate) default_vcpu: f32,
@@ -39,7 +38,7 @@ impl CpuResource {
            .get(&hypervisor_name)
            .context(format!("failed to get hypervisor {}", hypervisor_name))?;
        Ok(Self {
-            current_vcpu: Arc::new(RwLock::new(hypervisor_config.cpu_info.default_vcpus as u32)),
+            current_vcpu: Arc::new(RwLock::new(hypervisor_config.cpu_info.default_vcpus)),
            default_vcpu: hypervisor_config.cpu_info.default_vcpus,
            container_cpu_resources: Arc::new(RwLock::new(HashMap::new())),
        })
@@ -71,14 +70,14 @@ impl CpuResource {
        Ok(())
    }

-    pub(crate) async fn current_vcpu(&self) -> u32 {
+    pub(crate) async fn current_vcpu(&self) -> f32 {
        let current_vcpu = self.current_vcpu.read().await;
        *current_vcpu
    }

    async fn update_current_vcpu(&self, new_vcpus: u32) {
        let mut current_vcpu = self.current_vcpu.write().await;
-        *current_vcpu = new_vcpus;
+        *current_vcpu = new_vcpus as f32;
    }

    // update container_cpu_resources field
@@ -116,10 +115,11 @@ impl CpuResource {
    }

    // calculates the total required vcpus by adding each container's requirements within the pod
-    async fn calc_cpu_resources(&self) -> Result<u32> {
+    async fn calc_cpu_resources(&self) -> Result<f32> {
        let resources = self.container_cpu_resources.read().await;
        if resources.is_empty() {
-            return Ok(self.default_vcpu.ceil() as u32);
+            // No containers, just keep the default vCPU configuration
+            return Ok(self.default_vcpu);
        }

        // If requests of individual containers are expresses with different
@@ -128,6 +128,7 @@ impl CpuResource {
        // to use the largest period as the common denominator since it
        // shifts precision out of the fractional part and into the
        // integral part in case a rewritten quota ends up non-integral.
+        // Determine the largest CPU period among containers, will be used to normalize quotas
        let max_period = resources
            .iter()
            .map(|(_, cpu_resource)| cpu_resource.period())
@@ -155,53 +156,74 @@ impl CpuResource {

            let quota = cpu_resource.quota() as f64;
            let period = cpu_resource.period() as f64;
-            if quota >= 0.0 && period != 0.0 {
+            if quota >= 0.0 && period > 0.0 {
+                // Normalize to max_period before adding quotas
                total_quota += quota * (max_period / period);
            }
        }

-        // contrained only by cpuset
+        // constrained only by cpuset (no quota set)
        if total_quota == 0.0 && !cpuset_vcpu.is_empty() {
            info!(sl!(), "(from cpuset)get vcpus # {:?}", cpuset_vcpu);
-            return Ok(cpuset_vcpu.len() as u32);
+            return Ok(cpuset_vcpu.len() as f32);
        }

-        let total_vcpu = if total_quota > 0.0 && max_period != 0.0 {
-            self.default_vcpu as f64 + total_quota / max_period
-        } else {
-            self.default_vcpu as f64
-        };
+        // When quota is set: calculate vCPUs as quota/period after normalization
+        if total_quota > 0.0 && max_period > 0.0 {
+            let quota_vcpu = total_quota / max_period;
+            info!(
+                sl!(),
+                "(from cfs_quota&cfs_period) target vcpus {} from quota {} max_period {}",
+                quota_vcpu,
+                total_quota,
+                max_period
+            );

-        info!(
-            sl!(),
-            "(from cfs_quota&cfs_period)get vcpus count {}", total_vcpu
-        );
-        Ok(total_vcpu.ceil() as u32)
+            let total_vcpu = quota_vcpu as f32 + self.default_vcpu;
+
+            return Ok(total_vcpu);
+        }
+
+        // Default case: no quota, no cpuset: use default_vcpu
+        Ok(self.default_vcpu.max(1.0))
    }

    // do hotplug and hot-unplug the vcpu
    async fn do_update_cpu_resources(
        &self,
-        new_vcpus: u32,
+        new_vcpus: f32,
        op: ResourceUpdateOp,
        hypervisor: &dyn Hypervisor,
    ) -> Result<u32> {
        let old_vcpus = self.current_vcpu().await;

-        // when adding vcpus, ignore old_vcpus > new_vcpus
-        // when deleting vcpus, ignore old_vcpus < new_vcpus
+        // Prevent decreasing vCPUs on an Add operation or increasing on a Delete
        if (op == ResourceUpdateOp::Add && old_vcpus > new_vcpus)
            || (op == ResourceUpdateOp::Del && old_vcpus < new_vcpus)
        {
-            return Ok(old_vcpus);
+            return Ok(old_vcpus.ceil() as u32);
        }

-        // do not reduce computing power
-        // the number of vcpus would not be lower than the default size
-        let new_vcpus = cmp::max(new_vcpus, self.default_vcpu.ceil() as u32);
+        // Enforce minimum of 1 vCPU for the VM
+        let min_vcpus = 1.0_f32;
+        let target_vcpus = if new_vcpus < min_vcpus {
+            min_vcpus
+        } else {
+            new_vcpus
+        };
+
+        // Hypervisor only supports integer vCPU counts – round up at the last step
+        let target_vcpus_int = target_vcpus.ceil() as u32;
+        info!(
+            sl!(),
+            "(do_update_cpu_resources) old_vcpus {} -> new_vcpus {} (ceil to {})",
+            old_vcpus,
+            new_vcpus,
+            target_vcpus_int
+        );

        let (_, new) = hypervisor
-            .resize_vcpu(old_vcpus, new_vcpus)
+            .resize_vcpu(old_vcpus.ceil() as u32, target_vcpus_int)
            .await
            .context("resize vcpus")?;

@@ -225,6 +247,7 @@ mod tests {
            .entry("qemu".to_owned())
            .and_modify(|hv_config| hv_config.cpu_info.default_vcpus = default_vcpus);
        config.runtime.hypervisor_name = "qemu".to_owned();
+
        CpuResource::new(Arc::new(config)).unwrap()
    }

@@ -251,31 +274,15 @@ mod tests {
        // result of 0.99999999999999989) but it still doesn't guarantee the
        // correct result in general.  For instance, adding 0.1 twenty times
        // in 64 bits results in 2.0000000000000004.
-        add_linux_container_cpu_resources(
-            &mut cpu_resource,
-            vec![
-                (100_000, 1_000_000),
-                (100_000, 1_000_000),
-                (100_000, 1_000_000),
-                (100_000, 1_000_000),
-                (100_000, 1_000_000),
-                (100_000, 1_000_000),
-                (100_000, 1_000_000),
-                (100_000, 1_000_000),
-                (100_000, 1_000_000),
-                (100_000, 1_000_000),
-            ],
-        )
-        .await;
+        add_linux_container_cpu_resources(&mut cpu_resource, vec![(100_000, 1_000_000); 10]).await;

-        assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 1);
+        // 10 * 0.1 = 1.0: matches expected vCPU sum (float-safe in f64)
+        assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 1.0);
    }

    #[tokio::test]
    async fn test_big_allocation_1() {
-        let default_vcpus = 10.0;
-
-        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
+        let mut cpu_resource = get_cpu_resource_with_default_vcpus(10.0);
        add_linux_container_cpu_resources(
            &mut cpu_resource,
            vec![
@@ -286,16 +293,20 @@ mod tests {
        )
        .await;

-        assert_eq!(
-            cpu_resource.calc_cpu_resources().await.unwrap(),
-            128 + default_vcpus as u32
+        const EPSILON: f32 = 0.0001;
+        let actual = cpu_resource.calc_cpu_resources().await.unwrap();
+        let expected = 138.0;
+        assert!(
+            (actual - expected).abs() < EPSILON,
+            "got {}, expect {}",
+            actual,
+            expected
        );
    }

    #[tokio::test]
    async fn test_big_allocation_2() {
-        let default_vcpus = 10.0;
-        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
+        let mut cpu_resource = get_cpu_resource_with_default_vcpus(10.0);
        add_linux_container_cpu_resources(
            &mut cpu_resource,
            vec![
@@ -306,98 +317,114 @@ mod tests {
        )
        .await;

-        assert_eq!(
-            cpu_resource.calc_cpu_resources().await.unwrap(),
-            (33 + 31 + 77 + 1) + default_vcpus as u32
+        const EPSILON: f32 = 0.0001;
+        let actual = cpu_resource.calc_cpu_resources().await.unwrap();
+        let expected = 151.0;
+        assert!(
+            (actual - expected).abs() < EPSILON,
+            "got {}, expect {}",
+            actual,
+            expected
        );
    }

    #[tokio::test]
    async fn test_big_allocation_3() {
-        let default_vcpus = 10.0;
-        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
+        let mut cpu_resource = get_cpu_resource_with_default_vcpus(10.0);
        add_linux_container_cpu_resources(&mut cpu_resource, vec![(141_000_008, 1_000_000)]).await;

-        assert_eq!(
-            cpu_resource.calc_cpu_resources().await.unwrap(),
-            142 + default_vcpus as u32
+        // 141 + 1(response to hypervisor ceil handling, still in calc we keep float)
+        const EPSILON: f32 = 0.0001;
+        let actual = cpu_resource.calc_cpu_resources().await.unwrap();
+        let expected = 151.0;
+        assert!(
+            (actual - expected).abs() < EPSILON,
+            "got {}, expect {}",
+            actual,
+            expected
        );
    }

    #[tokio::test]
    async fn test_big_allocation_4() {
-        let default_vcpus = 10.0;
-        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
-        add_linux_container_cpu_resources(
-            &mut cpu_resource,
-            vec![
-                (17_000_001, 1_000_000),
-                (17_000_001, 1_000_000),
-                (17_000_001, 1_000_000),
-                (17_000_001, 1_000_000),
-            ],
-        )
-        .await;
+        let mut cpu_resource = get_cpu_resource_with_default_vcpus(10.0);
+        add_linux_container_cpu_resources(&mut cpu_resource, vec![(17_000_001, 1_000_000); 4])
+            .await;

-        assert_eq!(
-            cpu_resource.calc_cpu_resources().await.unwrap(),
-            (4 * 17 + 1) + default_vcpus as u32
+        const EPSILON: f32 = 0.0001;
+        let actual = cpu_resource.calc_cpu_resources().await.unwrap();
+        let expected = 78.0;
+        assert!(
+            (actual - expected).abs() < EPSILON,
+            "got {}, expect {}",
+            actual,
+            expected
        );
    }

    #[tokio::test]
    async fn test_divisible_periods() {
-        let default_vcpus = 3.0;
-        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
+        let mut cpu_resource = get_cpu_resource_with_default_vcpus(3.0);
+
        add_linux_container_cpu_resources(
            &mut cpu_resource,
            vec![(1_000_000, 1_000_000), (1_000_000, 500_000)],
        )
        .await;

-        assert_eq!(
-            cpu_resource.calc_cpu_resources().await.unwrap(),
-            3 + default_vcpus as u32
-        );
+        // periods normalized: second gets * 2 quota. total=1+2=3
+        assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 6.0);

-        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
+        let mut cpu_resource = get_cpu_resource_with_default_vcpus(3.0);
        add_linux_container_cpu_resources(
            &mut cpu_resource,
            vec![(3_000_000, 1_500_000), (1_000_000, 500_000)],
        )
        .await;

-        assert_eq!(
-            cpu_resource.calc_cpu_resources().await.unwrap(),
-            4 + default_vcpus as u32
-        );
+        // normalized: first quota=2, second quota=2. total=4
+        assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 7.0);
    }

    #[tokio::test]
    async fn test_indivisible_periods() {
-        let default_vcpus = 1.0;
-        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
+        const EPSILON: f32 = 0.0001;
+
+        // Case 1
+        let mut cpu_resource = get_cpu_resource_with_default_vcpus(1.0);
        add_linux_container_cpu_resources(
            &mut cpu_resource,
            vec![(1_000_000, 1_000_000), (900_000, 300_000)],
        )
        .await;

-        assert_eq!(
-            cpu_resource.calc_cpu_resources().await.unwrap(),
-            4 + default_vcpus as u32
+        let actual = cpu_resource.calc_cpu_resources().await.unwrap();
+        let expected = 5.0; // pure quota sum, no default_vcpu added
+        assert!(
+            (actual - expected).abs() < EPSILON,
+            "case1: got {}, expect {}",
+            actual,
+            expected
        );

-        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
+        // Case 2
+        let mut cpu_resource = get_cpu_resource_with_default_vcpus(1.0);
        add_linux_container_cpu_resources(
            &mut cpu_resource,
            vec![(1_000_000, 1_000_000), (900_000, 299_999)],
        )
        .await;

-        assert_eq!(
-            cpu_resource.calc_cpu_resources().await.unwrap(),
-            5 + default_vcpus as u32
+        let actual = cpu_resource.calc_cpu_resources().await.unwrap();
+        // total_quota = 1_000_000 + (900_000 * (1_000_000 / 299_999))
+        // total_vcpus = total_quota / 1_000_000
+        let expected = (1_000_000.0 + (900_000.0 * (1_000_000.0 / 299_999.0))) / 1_000_000.0 + 1.0;
+
+        assert!(
+            (actual - expected as f32).abs() < EPSILON,
+            "case2: got {}, expect {}",
+            actual,
+            expected
        );
    }

@@ -407,17 +434,18 @@ mod tests {
        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
        add_linux_container_cpu_resources(&mut cpu_resource, vec![(250_000, 1_000_000)]).await;

-        assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 1);
+        assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 0.75);

        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
        add_linux_container_cpu_resources(&mut cpu_resource, vec![(500_000, 1_000_000)]).await;
-
-        assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 1);
+        assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 1.0);

        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
        add_linux_container_cpu_resources(&mut cpu_resource, vec![(500_001, 1_000_000)]).await;

-        assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 2);
+        let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
+        add_linux_container_cpu_resources(&mut cpu_resource, vec![(500_001, 1_000_000)]).await;
+        assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 1.000001);

        // This test doesn't pass because 0.1 is periodic in binary and thus
        // not exactly representable by a float of any width for fundamental
--- a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs
@@ -17,7 +17,7 @@ use oci_spec::runtime as oci;
 // sandbox/container's workload
 #[derive(Clone, Copy, Debug)]
 struct InitialSize {
-    vcpu: u32,
+    vcpu: f32,
    mem_mb: u32,
    orig_toml_default_mem: u32,
 }
@@ -28,7 +28,7 @@ const MIB: i64 = 1024 * 1024;
 impl TryFrom<&HashMap<String, String>> for InitialSize {
    type Error = anyhow::Error;
    fn try_from(an: &HashMap<String, String>) -> Result<Self> {
-        let mut vcpu: u32 = 0;
+        let mut vcpu: f32 = 0.0;

        let annotation = Annotation::new(an.clone());
        let (period, quota, memory) =
@@ -56,7 +56,7 @@ impl TryFrom<&HashMap<String, String>> for InitialSize {
 impl TryFrom<&oci::Spec> for InitialSize {
    type Error = anyhow::Error;
    fn try_from(spec: &oci::Spec) -> Result<Self> {
-        let mut vcpu: u32 = 0;
+        let mut vcpu: f32 = 0.0;
        let mut mem_mb: u32 = 0;
        match container_type(spec) {
            // podsandbox, from annotation
@@ -140,8 +140,8 @@ impl InitialSizeManager {
            .get_mut(hypervisor_name)
            .context("failed to get hypervisor config")?;

-        if self.resource.vcpu > 0 {
-            hv.cpu_info.default_vcpus = self.resource.vcpu as f32
+        if self.resource.vcpu > 0.0 {
+            info!(sl!(), "resource with vcpu {}", self.resource.vcpu);
        }
        self.resource.orig_toml_default_mem = hv.memory_info.default_memory;
        if self.resource.mem_mb > 0 {
@@ -160,11 +160,11 @@ impl InitialSizeManager {
    }
 }

-fn get_nr_vcpu(resource: &LinuxContainerCpuResources) -> u32 {
+fn get_nr_vcpu(resource: &LinuxContainerCpuResources) -> f32 {
    if let Some(v) = resource.get_vcpus() {
-        v as u32
+        v as f32
    } else {
-        0
+        0.0
    }
 }

@@ -223,7 +223,7 @@ mod tests {
                    memory: None,
                },
                result: InitialSize {
-                    vcpu: 0,
+                    vcpu: 0.0,
                    mem_mb: 0,
                    orig_toml_default_mem: 0,
                },
@@ -237,7 +237,7 @@ mod tests {
                    memory: Some(512 * MIB),
                },
                result: InitialSize {
-                    vcpu: 3,
+                    vcpu: 3.0,
                    mem_mb: 512,
                    orig_toml_default_mem: 0,
                },
@@ -250,7 +250,7 @@ mod tests {
                    memory: Some(513 * MIB),
                },
                result: InitialSize {
-                    vcpu: 0,
+                    vcpu: 0.0,
                    mem_mb: 514,
                    orig_toml_default_mem: 0,
                },
@@ -295,9 +295,12 @@ mod tests {

            let initial_size = initial_size.unwrap();
            assert_eq!(
-                initial_size.vcpu, d.result.vcpu,
+                initial_size.vcpu.ceil(),
+                d.result.vcpu,
                "test[{}]: {:?} vcpu should be {}",
-                i, d.desc, d.result.vcpu,
+                i,
+                d.desc,
+                d.result.vcpu,
            );
            assert_eq!(
                initial_size.mem_mb, d.result.mem_mb,
@@ -349,9 +352,12 @@ mod tests {

            let initial_size = initial_size.unwrap();
            assert_eq!(
-                initial_size.vcpu, d.result.vcpu,
+                initial_size.vcpu.ceil(),
+                d.result.vcpu,
                "test[{}]: {:?} vcpu should be {}",
-                i, d.desc, d.result.vcpu,
+                i,
+                d.desc,
+                d.result.vcpu,
            );
            assert_eq!(
                initial_size.mem_mb, d.result.mem_mb,
--- a/src/runtime-rs/crates/resource/src/manager_inner.rs
+++ b/src/runtime-rs/crates/resource/src/manager_inner.rs
@@ -413,17 +413,14 @@ impl ResourceManagerInner {
        for d in linux_devices.iter() {
            match d.typ() {
                LinuxDeviceType::B => {
-                    let block_driver = get_block_device_info(&self.device_manager)
-                        .await
-                        .block_device_driver;
-                    let aio = get_block_device_info(&self.device_manager)
-                        .await
-                        .block_device_aio;
+                    let blkdev_info = get_block_device_info(&self.device_manager).await;
                    let dev_info = DeviceConfig::BlockCfg(BlockConfig {
                        major: d.major(),
                        minor: d.minor(),
-                        driver_option: block_driver,
-                        blkdev_aio: BlockDeviceAio::new(&aio),
+                        driver_option: blkdev_info.block_device_driver,
+                        blkdev_aio: BlockDeviceAio::new(&blkdev_info.block_device_aio),
+                        num_queues: blkdev_info.num_queues,
+                        queue_size: blkdev_info.queue_size,
                        ..Default::default()
                    });

@@ -595,7 +592,7 @@ impl ResourceManagerInner {
            self.agent
                .online_cpu_mem(OnlineCPUMemRequest {
                    wait: false,
-                    nb_cpus: self.cpu_resource.current_vcpu().await,
+                    nb_cpus: self.cpu_resource.current_vcpu().await.ceil() as u32,
                    cpu_only: false,
                })
                .await
--- a/src/runtime-rs/crates/resource/src/volume/block_volume.rs
+++ b/src/runtime-rs/crates/resource/src/volume/block_volume.rs
@@ -47,6 +47,8 @@ impl BlockVolume {
            minor: stat::minor(fstat.st_rdev) as i64,
            driver_option: blkdev_info.block_device_driver,
            blkdev_aio: BlockDeviceAio::new(&blkdev_info.block_device_aio),
+            num_queues: blkdev_info.num_queues,
+            queue_size: blkdev_info.queue_size,
            ..Default::default()
        };

--- a/src/runtime-rs/crates/resource/src/volume/direct_volumes/rawblock_volume.rs
+++ b/src/runtime-rs/crates/resource/src/volume/direct_volumes/rawblock_volume.rs
@@ -62,6 +62,8 @@ impl RawblockVolume {
            path_on_host: mount_info.device.clone(),
            driver_option: blkdev_info.block_device_driver,
            blkdev_aio: BlockDeviceAio::new(&blkdev_info.block_device_aio),
+            num_queues: blkdev_info.num_queues,
+            queue_size: blkdev_info.queue_size,
            ..Default::default()
        };

--- a/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs
+++ b/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs
@@ -330,9 +330,6 @@ impl VolumeManager {
                state.guest_path,
                state.ref_count,
            );
-
-            // Return guest path
-            return Ok(state.guest_path.clone());
        }

        // Create a new volume state
--- a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs
+++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs
@@ -22,7 +22,7 @@ use kata_types::{
    container::{update_ocispec_annotations, POD_CONTAINER, POD_SANDBOX},
    k8s::{self, container_type},
 };
-use oci_spec::runtime::{self as oci, LinuxDeviceCgroup};
+use oci_spec::runtime as oci;

 use oci::{LinuxResources, Process as OCIProcess};
 use resource::{
@@ -217,11 +217,10 @@ impl Container {
        if let Some(linux) = &mut spec.linux_mut() {
            linux.set_resources(resources);

-            // In certain scenarios, particularly under CoCo/Agent Policy enforcement, the default initial value of `Linux.Resources.Devices`
-            // is considered non-compliant, leading to container creation failures. To address this issue and ensure consistency with the behavior
-            // in `runtime-go`, the default value of `Linux.Resources.Devices` from the OCI Spec should be removed.
+            // In certain scenarios, particularly under CoCo/Agent Policy enforcement,
+            // the value of `Linux.Resources.Devices` should be empty.
            if let Some(resource) = linux.resources_mut() {
-                clean_linux_resources_devices(resource);
+                resource.set_devices(None);
            }
        }

@@ -688,30 +687,6 @@ fn is_pid_namespace_enabled(spec: &oci::Spec) -> bool {
    false
 }

-/// Cleans or filters specific device cgroup rules within the `devices` field of the `LinuxResources`.
-/// Specifically, it iterates through all `LinuxDeviceCgroup` rules in `resources`
-/// and removes those considered to be "default, all-access (rwm), and non-specific device" rules.
-fn clean_linux_resources_devices(resources: &mut LinuxResources) {
-    if let Some(devices) = resources.devices_mut().take() {
-        let cleaned_devices: Vec<LinuxDeviceCgroup> = devices
-            .into_iter()
-            .filter(|device| {
-                !(!device.allow()
-                    && device.typ().is_none()
-                    && device.major().is_none()
-                    && device.minor().is_none()
-                    && device.access().as_deref() == Some("rwm"))
-            })
-            .collect();
-
-        resources.set_devices(if cleaned_devices.is_empty() {
-            None
-        } else {
-            Some(cleaned_devices)
-        });
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::amend_spec;
--- a/src/runtime-rs/crates/shim/Cargo.toml
+++ b/src/runtime-rs/crates/shim/Cargo.toml
@@ -14,9 +14,8 @@ path = "src/bin/main.rs"

 [dependencies]
 anyhow = { workspace = true }
-backtrace = { version = ">=0.3.35", features = [
+backtrace = { version = ">=0.3.76", features = [
    "libunwind",
-    "libbacktrace",
    "std",
 ], default-features = false }
 containerd-shim-protos = { workspace = true }
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -233,13 +233,19 @@ DEFDISABLESELINUX := false

 # Default guest SELinux configuration
 DEFDISABLEGUESTSELINUX := true
-DEFGUESTSELINUXLABEL := system_u:system_r:container_t
+# Default is empty string "" to match the default golang (when commented out in config).
+# Most users will want to set this to "system_u:system_r:container_t" for SELinux support.
+DEFGUESTSELINUXLABEL := 

 #Default SeccomSandbox param
 #The same default policy is used by libvirt
-#More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html
+# More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html
+#
+# Default is empty string "" to match the default (when commented out in config).
+# Most users will want to set this to "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+# for better security. Note: "elevateprivileges=deny" doesn't work with daemonize option.
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
-DEFSECCOMPSANDBOXPARAM := on,obsolete=deny,spawn=deny,resourcecontrol=deny
+DEFSECCOMPSANDBOXPARAM :=

 #Default entropy source
 DEFENTROPYSOURCE := /dev/urandom
@@ -269,6 +275,7 @@ DEFVIRTIOFSQUEUESIZE ?= 1024
 # Make sure you quote args.
 DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"--announce-submounts\"]
 DEFENABLEIOTHREADS := false
+DEFINDEPIOTHREADS := 0
 DEFENABLEVHOSTUSERSTORE := false
 DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user
 DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
@@ -295,6 +302,10 @@ DEFDANCONF := /run/kata-containers/dans

 DEFFORCEGUESTPULL := false

+# Device cold plug
+DEFPODRESOURCEAPISOCK := ""
+DEFPODRESOURCEAPISOCK_NV := "/var/lib/kubelet/pod-resources/kubelet.sock"
+
 SED = sed

 CLI_DIR = cmd
@@ -461,7 +472,7 @@ ifneq (,$(QEMUCMD))

    DEFAULTVCPUS_NV = 1
    DEFAULTMEMORY_NV = 2048
-    DEFAULTTIMEOUT_NV = 500
+    DEFAULTTIMEOUT_NV = 1200
    DEFAULTVFIOPORT_NV = root-port
    DEFAULTPCIEROOTPORT_NV = 8

@@ -469,12 +480,9 @@ ifneq (,$(QEMUCMD))
    KERNELPARAMS_NV += "cgroup_no_v1=all"

    KERNELTDXPARAMS_NV = $(KERNELPARAMS_NV)
-    KERNELTDXPARAMS_NV += "clearcpuid=mtrr"
    KERNELTDXPARAMS_NV += "authorize_allow_devs=pci:ALL"

    KERNELSNPPARAMS_NV = $(KERNELPARAMS_NV)
-    #TODO: temporary until the attestation agent activates the device after successful attestation
-    KERNELSNPPARAMS_NV += "nvrc.smi.srs=1"

    # Setting this to false can lead to cgroup leakages in the host
    # Best practice for production is to set this to true
@@ -758,6 +766,7 @@ USER_VARS += DEFVIRTIOFSEXTRAARGS
 USER_VARS += DEFENABLEANNOTATIONS
 USER_VARS += DEFENABLEANNOTATIONS_COCO
 USER_VARS += DEFENABLEIOTHREADS
+USER_VARS += DEFINDEPIOTHREADS
 USER_VARS += DEFSECCOMPSANDBOXPARAM
 USER_VARS += DEFENABLEVHOSTUSERSTORE
 USER_VARS += DEFVHOSTUSERSTOREPATH
@@ -783,7 +792,8 @@ USER_VARS += BUILDFLAGS
 USER_VARS += DEFDISABLEIMAGENVDIMM
 USER_VARS += DEFCCAMEASUREMENTALGO
 USER_VARS += DEFSHAREDFS_QEMU_CCA_VIRTIOFS
-
+USER_VARS += DEFPODRESOURCEAPISOCK
+USER_VARS += DEFPODRESOURCEAPISOCK_NV

 V              = @
 Q              = $(V:1=)
--- a/src/runtime/config/configuration-clh.toml.in
+++ b/src/runtime/config/configuration-clh.toml.in
@@ -20,41 +20,22 @@ image = "@IMAGEPATH@"
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
-
-# Enable confidential guest support.
-# Toggling that setting may trigger different hardware features, ranging
-# from memory encryption to both memory and CPU-state encryption and integrity.
-# The Kata Containers runtime dynamically detects the available feature set and
-# aims at enabling the largest possible one, returning an error if none is
-# available, or none is supported by the hypervisor.
-#
-# Known limitations:
-# * Does not work by design:
-#   - CPU Hotplug 
-#   - Memory Hotplug
-#   - NVDIMM devices
-#
-# Supported TEEs:
-# * Intel TDX
-#
-# Default false
-# confidential_guest = true
+rootfs_type = @DEFROOTFSTYPE@

 # Enable running clh VMM as a non-root user.
 # By default clh VMM run as root. When this is set to true, clh VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@

 # Path to the firmware.
 # If you want Cloud Hypervisor to use a specific firmware, set its path below.
@@ -120,7 +101,7 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -182,12 +163,12 @@ block_device_driver = "virtio-blk"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Reclaim guest freed memory.
 # Enabling this will result in the VM balloon device having f_reporting=on set.
@@ -197,32 +178,32 @@ block_device_driver = "virtio-blk"
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
 # being allocated using huge pages.
-#enable_hugepages = true
+enable_hugepages = false

 # Disable the 'seccomp' feature from Cloud Hypervisor, default false
-# disable_seccomp = true
+disable_seccomp = false

 # Enable vIOMMU, default false
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # This option changes the default hypervisor and kernel parameters
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # This option specifies the loglevel of the hypervisor
 #
 # Default 1
-#hypervisor_loglevel = 1
+hypervisor_loglevel = 1

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -232,7 +213,7 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@

 # Enable hot-plugging of VFIO devices to a root-port.
 # The default setting is  "no-port"
-#hot_plug_vfio = "root-port"
+hot_plug_vfio = "no-port"

 # Path to OCI hook binaries in the *guest rootfs*.
 # This does not affect host-side hooks which must instead be added to
@@ -249,7 +230,7 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # These options are related to network rate limiter at the VMM level, and are
 # based on the Cloud Hypervisor I/O throttling.  Those are disabled by default
@@ -263,14 +244,14 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # for SB/VM).
 # The same value is used for inbound and outbound bandwidth.
 # Default 0-sized value means unlimited rate.
-#net_rate_limiter_bw_max_rate = 0
+net_rate_limiter_bw_max_rate = 0
 #
 # net_rate_limiter_bw_one_time_burst increases the initial max rate and this
 # initial extra credit does *NOT* affect the overall limit and can be used for
 # an *initial* burst of data.
 # This is *optional* and only takes effect if net_rate_limiter_bw_max_rate is
 # set to a non zero value.
-#net_rate_limiter_bw_one_time_burst = 0
+net_rate_limiter_bw_one_time_burst = 0
 #
 # Operation rate limiter options
 #
@@ -278,14 +259,14 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # for SB/VM).
 # The same value is used for inbound and outbound bandwidth.
 # Default 0-sized value means unlimited rate.
-#net_rate_limiter_ops_max_rate = 0
+net_rate_limiter_ops_max_rate = 0
 #
 # net_rate_limiter_ops_one_time_burst increases the initial max rate and this
 # initial extra credit does *NOT* affect the overall limit and can be used for
 # an *initial* burst of data.
 # This is *optional* and only takes effect if net_rate_limiter_bw_max_rate is
 # set to a non zero value.
-#net_rate_limiter_ops_one_time_burst = 0
+net_rate_limiter_ops_one_time_burst = 0
 #
 # These options are related to disk rate limiter at the VMM level, and are
 # based on the Cloud Hypervisor I/O throttling.  Those are disabled by default
@@ -299,14 +280,14 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # for SB/VM).
 # The same value is used for inbound and outbound bandwidth.
 # Default 0-sized value means unlimited rate.
-#disk_rate_limiter_bw_max_rate = 0
+disk_rate_limiter_bw_max_rate = 0
 #
 # disk_rate_limiter_bw_one_time_burst increases the initial max rate and this
 # initial extra credit does *NOT* affect the overall limit and can be used for
 # an *initial* burst of data.
 # This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
 # set to a non zero value.
-#disk_rate_limiter_bw_one_time_burst = 0
+disk_rate_limiter_bw_one_time_burst = 0
 #
 # Operation rate limiter options
 #
@@ -314,19 +295,19 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # for SB/VM).
 # The same value is used for inbound and outbound bandwidth.
 # Default 0-sized value means unlimited rate.
-#disk_rate_limiter_ops_max_rate = 0
+disk_rate_limiter_ops_max_rate = 0
 #
 # disk_rate_limiter_ops_one_time_burst increases the initial max rate and this
 # initial extra credit does *NOT* affect the overall limit and can be used for
 # an *initial* burst of data.
 # This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
 # set to a non zero value.
-#disk_rate_limiter_ops_one_time_burst = 0
+disk_rate_limiter_ops_one_time_burst = 0

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -340,14 +321,14 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 45)
@@ -355,13 +336,13 @@ dial_timeout = 45

 # Confidential Data Hub API timeout value in seconds
 # (default: 50)
-#cdh_api_timeout = 50
+cdh_api_timeout = 50

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -379,14 +360,14 @@ dial_timeout = 45
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_CLH@"
+internetworking_model = "@DEFNETWORKMODEL_CLH@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -394,22 +375,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -417,7 +399,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -425,7 +407,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -434,13 +416,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_CLH@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_CLH@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -461,22 +443,22 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -494,3 +476,26 @@ create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
 # to the hypervisor.
 # (default: /run/kata-containers/dans)
 dan_conf = "@DEFDANCONF@"
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
--- a/src/runtime/config/configuration-fc.toml.in
+++ b/src/runtime/config/configuration-fc.toml.in
@@ -20,7 +20,7 @@ image = "@IMAGEPATH@"
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
+rootfs_type = @DEFROOTFSTYPE@

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -102,14 +102,14 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # The size in MiB will be plused to max memory of hypervisor.
 # It is the memory address space for the NVDIMM device.
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -124,12 +124,12 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
@@ -138,7 +138,7 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -146,29 +146,29 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vIOMMU, default false
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # This option changes the default hypervisor and kernel parameters
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = false

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 #
 # Default entropy source.
@@ -180,7 +180,7 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source= "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -202,21 +202,21 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered will scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Firecracker, it provides a built-in rate limiter, which is based on TBF(Token Bucket Filter)
 # queueing discipline.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Firecracker, it provides a built-in rate limiter, which is based on TBF(Token Bucket Filter)
 # queueing discipline.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 [factory]
 # VM templating support. Once enabled, new VMs are created from template
@@ -230,12 +230,12 @@ disable_selinux=@DEFDISABLESELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -249,7 +249,7 @@ disable_selinux=@DEFDISABLESELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -262,14 +262,14 @@ disable_selinux=@DEFDISABLESELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 45)
@@ -277,13 +277,13 @@ dial_timeout = 45

 # Confidential Data Hub API timeout value in seconds
 # (default: 50)
-#cdh_api_timeout = 50
+cdh_api_timeout = 50

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -301,29 +301,29 @@ dial_timeout = 45
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_FC@"
+internetworking_model = "@DEFNETWORKMODEL_FC@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -331,7 +331,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -339,7 +339,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -348,22 +348,22 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_FC@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_FC@

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -381,3 +381,22 @@ create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
 # to the hypervisor.
 # (default: /run/kata-containers/dans)
 dan_conf = "@DEFDANCONF@"
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
--- a/src/runtime/config/configuration-qemu-cca.toml.in
+++ b/src/runtime/config/configuration-qemu-cca.toml.in
@@ -14,14 +14,13 @@
 path = "@QEMUCCAEXPERIMENTALPATH@"
 kernel = "@KERNELCONFIDENTIALPATH@"
 image = "@IMAGECONFIDENTIALPATH@"
-# initrd = "@INITRDCONFIDENTIALPATH@"
 machine_type = "@MACHINETYPE@"

 # rootfs filesystem type:
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
+rootfs_type = @DEFROOTFSTYPE@

 # Enable confidential guest support.
 # Toggling that setting may trigger different hardware features, ranging
@@ -42,7 +41,7 @@ confidential_guest = true
 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -80,7 +79,7 @@ firmware_volume = "@FIRMWAREVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -88,12 +87,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@CPUFEATURES@"
+cpu_features = "@CPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -138,7 +138,7 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -151,13 +151,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -217,17 +217,17 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
 # handled in a separate IO thread. This is currently only implemented
@@ -242,7 +242,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -250,7 +250,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -267,11 +267,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -282,7 +282,7 @@ valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@
 # will disable this feature. In the case of virtio-fs, this is enabled
 # automatically and '/dev/shm' is used as the backing folder.
 # This option will be ignored if VM templating is enabled.
-#file_mem_backend = "@DEFFILEMEMBACKEND@"
+file_mem_backend = "@DEFFILEMEMBACKEND@"

 # List of valid annotations values for the file_mem_backend annotation
 # The default if not set is empty (all annotations rejected.)
@@ -297,17 +297,17 @@ pflashes = []
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = false

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -319,11 +319,11 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means the number of pcie_root_port
 # Default 0
-#pcie_root_port = 2
+pcie_root_port = 0

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
-#disable_vhost_net = true
+disable_vhost_net = false

 #
 # Default entropy source.
@@ -335,7 +335,7 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source= "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -357,17 +357,17 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -377,9 +377,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -387,7 +388,7 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # then you should enable paging.
 #
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
-#guest_memory_dump_paging=false
+guest_memory_dump_paging = false

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
@@ -398,26 +399,26 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
 # If swap_in_bytes and memory_limit_in_bytes is not set, the size should
 # be default_memory.
-#enable_guest_swap = true
+enable_guest_swap = false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@

 # In QEMU, the Realm Management Extension (RME) measurement algorithm is used for attestation, and it supports
 # sha256 and sha512 as options. The default is sha512. This algorithm is crucial for verifying the integrity of a
 # Realm, a secure execution environment within the larger system. QEMU supports sha256 and sha512 for CCA RME
 # measurements. sha512 is generally preferred on 64-bit architectures due to potential hardware acceleration.
-measurement_algo="@DEFCCAMEASUREMENTALGO@"
+measurement_algo = "@DEFCCAMEASUREMENTALGO@"

 [factory]
 # VM templating support. Once enabled, new VMs are created from template
@@ -431,12 +432,12 @@ measurement_algo="@DEFCCAMEASUREMENTALGO@"
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
+template_path = "/run/vc/vm/template"

 # The number of caches of VMCache:
 # unspecified or == 0   --> VMCache is disabled
@@ -455,17 +456,17 @@ measurement_algo="@DEFCCAMEASUREMENTALGO@"
 # a new sandbox.
 #
 # Default 0
-#vm_cache_number = 0
+vm_cache_number = 0

 # Specify the address of the Unix socket that is used by VMCache.
 #
 # Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+vm_cache_endpoint = "/var/run/kata-containers/cache.sock"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -479,7 +480,7 @@ measurement_algo="@DEFCCAMEASUREMENTALGO@"
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -492,14 +493,14 @@ measurement_algo="@DEFCCAMEASUREMENTALGO@"
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 90)
@@ -509,7 +510,7 @@ dial_timeout = 90
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -527,14 +528,14 @@ dial_timeout = 90
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -542,22 +543,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -565,7 +567,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -573,7 +575,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -582,13 +584,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_TEE@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_TEE@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -609,22 +611,22 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -647,3 +649,21 @@ dan_conf = "@DEFDANCONF@"
 # the container image should be pulled in the guest, without using an external snapshotter.
 # This is an experimental feature and might be removed in the future.
 experimental_force_guest_pull = @DEFFORCEGUESTPULL@
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
--- a/src/runtime/config/configuration-qemu-coco-dev.toml.in
+++ b/src/runtime/config/configuration-qemu-coco-dev.toml.in
@@ -16,41 +16,18 @@
 path = "@QEMUPATH@"
 kernel = "@KERNELCONFIDENTIALPATH@"
 image = "@IMAGECONFIDENTIALPATH@"
-# initrd = "@INITRDCONFIDENTIALPATH@"
 machine_type = "@MACHINETYPE@"

 # rootfs filesystem type:
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
-
-# Enable confidential guest support.
-# Toggling that setting may trigger different hardware features, ranging
-# from memory encryption to both memory and CPU-state encryption and integrity.
-# The Kata Containers runtime dynamically detects the available feature set and
-# aims at enabling the largest possible one, returning an error if none is
-# available, or none is supported by the hypervisor.
-#
-# Known limitations:
-# * Does not work by design:
-#   - CPU Hotplug 
-#   - Memory Hotplug
-#   - NVDIMM devices
-#
-# Default false
-# confidential_guest = true
-
-# Choose AMD SEV-SNP confidential guests
-# In case of using confidential guests on AMD hardware that supports both SEV
-# and SEV-SNP, the following enables SEV-SNP guests. SEV guests are default.
-# Default false
-# sev_snp_guest = true
+rootfs_type = @DEFROOTFSTYPE@

 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -88,7 +65,7 @@ firmware_volume = "@FIRMWAREVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -96,12 +73,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@CPUFEATURES@"
+cpu_features = "@CPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -146,7 +124,7 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -159,13 +137,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -246,24 +224,28 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
-# handled in a separate IO thread. This is currently only implemented
-# for SCSI.
+# handled in a separate IO thread. This is currently implemented
+# for virtio-scsi and virtio-blk.
 #
 enable_iothreads = @DEFENABLEIOTHREADS@

+# Independent IOThreads enables IO to be processed in a separate thread, it is
+# for QEMU hotplug device attach to iothread, like virtio-blk.
+indep_iothreads = @DEFINDEPIOTHREADS@
+
 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
 # as all of the memory will be allocated and locked
@@ -271,7 +253,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Reclaim guest freed memory.
 # Enabling this will result in the VM balloon device having f_reporting=on set.
@@ -281,7 +263,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -289,7 +271,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -306,11 +288,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -326,7 +308,7 @@ vhost_user_reconnect_timeout_sec = 0
 # will disable this feature. In the case of virtio-fs, this is enabled
 # automatically and '/dev/shm' is used as the backing folder.
 # This option will be ignored if VM templating is enabled.
-#file_mem_backend = "@DEFFILEMEMBACKEND@"
+file_mem_backend = "@DEFFILEMEMBACKEND@"

 # List of valid annotations values for the file_mem_backend annotation
 # The default if not set is empty (all annotations rejected.)
@@ -341,7 +323,7 @@ pflashes = []
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # This option allows to add an extra HMP or QMP socket when `enable_debug = true`
 #
@@ -356,17 +338,17 @@ pflashes = []
 #
 # If set to the empty string "", no extra monitor socket is added. This is
 # the default.
-#extra_monitor_socket = hmp
+extra_monitor_socket = ""

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = true

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -377,24 +359,24 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # Enable hot-plugging of VFIO devices to a bridge-port, 
 # root-port or switch-port. 
 # The default setting is  "no-port"
-#hot_plug_vfio = "root-port" 
+hot_plug_vfio = "no-port" 

 # In a confidential compute environment hot-plugging can compromise
 # security. 
 # Enable cold-plugging of VFIO devices to a bridge-port, 
 # root-port or switch-port. 
 # The default setting is  "no-port", which means disabled. 
-#cold_plug_vfio = "root-port" 
+cold_plug_vfio = "no-port" 

 # Before hot plugging a PCIe device, you need to add a pcie_root_port device.
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means the number of pcie_root_port
 # Default 0
-#pcie_root_port = 2
+pcie_root_port = 0

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
-#disable_vhost_net = true
+disable_vhost_net = false

 #
 # Default entropy source.
@@ -406,7 +388,7 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source= "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -428,17 +410,18 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -448,9 +431,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -458,7 +442,7 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # then you should enable paging.
 #
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
-#guest_memory_dump_paging=false
+guest_memory_dump_paging = false

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
@@ -469,20 +453,20 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
 # If swap_in_bytes and memory_limit_in_bytes is not set, the size should
 # be default_memory.
-#enable_guest_swap = true
+enable_guest_swap = false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@


 [factory]
@@ -497,12 +481,12 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
+template_path = "/run/vc/vm/template"

 # The number of caches of VMCache:
 # unspecified or == 0   --> VMCache is disabled
@@ -521,17 +505,17 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # a new sandbox.
 #
 # Default 0
-#vm_cache_number = 0
+vm_cache_number = 0

 # Specify the address of the Unix socket that is used by VMCache.
 #
 # Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+vm_cache_endpoint = "/var/run/kata-containers/cache.sock"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -545,7 +529,7 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -558,14 +542,14 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 45)
@@ -573,13 +557,13 @@ dial_timeout = 45

 # Confidential Data Hub API timeout value in seconds
 # (default: 50)
-#cdh_api_timeout = 50
+cdh_api_timeout = 50

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -597,19 +581,19 @@ dial_timeout = 45
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-# enable_vcpus_pinning = false
+enable_vcpus_pinning = false

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -617,22 +601,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -640,7 +625,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -648,7 +633,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -657,13 +642,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_TEE@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_TEE@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -684,22 +669,22 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -722,3 +707,22 @@ dan_conf = "@DEFDANCONF@"
 # the container image should be pulled in the guest, without using an external snapshotter.
 # This is an experimental feature and might be removed in the future.
 experimental_force_guest_pull = @DEFFORCEGUESTPULL@
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
--- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in
@@ -23,7 +23,7 @@ machine_type = "@MACHINETYPE@"
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
+rootfs_type = @DEFROOTFSTYPE@

 # Enable confidential guest support.
 # Toggling that setting may trigger different hardware features, ranging
@@ -47,7 +47,7 @@ sev_snp_guest = true
 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -68,17 +68,17 @@ valid_hypervisor_paths = @QEMUSNPVALIDHYPERVISORPATHS@
 #
 # 96-byte, base64-encoded blob to provide the ‘ID Block’ structure for the
 # SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
-#snp_id_block = ""
+snp_id_block = ""
 # 4096-byte, base64-encoded blob to provide the ‘ID Authentication Information Structure’
 # for the SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
-#snp_id_auth = ""
+snp_id_auth = ""

 # SNP Guest Policy, the ‘POLICY’ parameter to the SNP_LAUNCH_START command.
 # If unset, the QEMU default policy (0x30000) will be used.
 # Notice that the guest policy is enforced at VM launch, and your pod VMs 
 # won't start at all if the policy denys it. This will be indicated by a
 # 'SNP_LAUNCH_START' error.
-#snp_guest_policy = 196608
+snp_guest_policy = 196608

 # Optional space-separated list of options to pass to the guest kernel.
 # For example, use `kernel_params = "vsyscall=emulate"` if you are having
@@ -105,7 +105,7 @@ firmware_volume = "@FIRMWARETDVFVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -113,12 +113,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@CPUFEATURES@"
+cpu_features = "@CPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -163,7 +164,7 @@ default_memory = @DEFAULTMEMORY_NV@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -176,13 +177,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -263,24 +264,28 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
-# handled in a separate IO thread. This is currently only implemented
-# for SCSI.
+# handled in a separate IO thread. This is currently implemented
+# for virtio-scsi and virtio-blk.
 #
 enable_iothreads = @DEFENABLEIOTHREADS@

+# Independent IOThreads enables IO to be processed in a separate thread, it is
+# for QEMU hotplug device attach to iothread, like virtio-blk.
+indep_iothreads = @DEFINDEPIOTHREADS@
+
 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
 # as all of the memory will be allocated and locked
@@ -288,7 +293,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Reclaim guest freed memory.
 # Enabling this will result in the VM balloon device having f_reporting=on set.
@@ -298,7 +303,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -306,7 +311,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -323,11 +328,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -358,17 +363,17 @@ pflashes = []
 # to enable debug output where available. And Debug also enable the hmp socket.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = false

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -380,7 +385,7 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means the number of pcie_root_port
 # Default 0
-#pcie_root_port = 2
+pcie_root_port = 0

 # In a confidential compute environment hot-plugging can compromise
 # security. 
@@ -391,7 +396,7 @@ cold_plug_vfio = "@DEFAULTVFIOPORT_NV@"

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
-#disable_vhost_net = true
+disable_vhost_net = false

 #
 # Default entropy source.
@@ -403,7 +408,7 @@ cold_plug_vfio = "@DEFAULTVFIOPORT_NV@"
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source= "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -425,17 +430,18 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -445,9 +451,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -455,7 +462,7 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # then you should enable paging.
 #
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
-#guest_memory_dump_paging=false
+guest_memory_dump_paging = false

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
@@ -466,20 +473,20 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
 # If swap_in_bytes and memory_limit_in_bytes is not set, the size should
 # be default_memory.
-#enable_guest_swap = true
+enable_guest_swap = false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@


 [factory]
@@ -494,12 +501,12 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
+template_path = "/run/vc/vm/template"

 # The number of caches of VMCache:
 # unspecified or == 0   --> VMCache is disabled
@@ -518,17 +525,17 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # a new sandbox.
 #
 # Default 0
-#vm_cache_number = 0
+vm_cache_number = 0

 # Specify the address of the Unix socket that is used by VMCache.
 #
 # Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+vm_cache_endpoint = "/var/run/kata-containers/cache.sock"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -542,7 +549,7 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -555,14 +562,14 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 90)
@@ -572,7 +579,7 @@ dial_timeout = 90
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -590,19 +597,19 @@ dial_timeout = 90
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-# enable_vcpus_pinning = false
+enable_vcpus_pinning = false

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -610,22 +617,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -633,7 +641,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -641,7 +649,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_NV@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -650,13 +658,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_TEE@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_TEE@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -677,22 +685,22 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -715,3 +723,22 @@ dan_conf = "@DEFDANCONF@"
 # the container image should be pulled in the guest, without using an external snapshotter.
 # This is an experimental feature and might be removed in the future.
 experimental_force_guest_pull = @DEFFORCEGUESTPULL@
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK_NV@"
--- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in
@@ -23,7 +23,7 @@ tdx_quote_generation_service_socket_port = @QEMUTDXQUOTEGENERATIONSERVICESOCKETP
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
+rootfs_type = @DEFROOTFSTYPE@

 # Enable confidential guest support.
 # Toggling that setting may trigger different hardware features, ranging
@@ -44,7 +44,7 @@ confidential_guest = true
 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -82,7 +82,7 @@ firmware_volume = "@FIRMWARETDVFVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -90,12 +90,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@TDXCPUFEATURES@"
+cpu_features = "@TDXCPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -140,7 +141,7 @@ default_memory = @DEFAULTMEMORY_NV@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -153,13 +154,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -240,24 +241,28 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
-# handled in a separate IO thread. This is currently only implemented
-# for SCSI.
+# handled in a separate IO thread. This is currently implemented
+# for virtio-scsi and virtio-blk.
 #
 enable_iothreads = @DEFENABLEIOTHREADS@

+# Independent IOThreads enables IO to be processed in a separate thread, it is
+# for QEMU hotplug device attach to iothread, like virtio-blk.
+indep_iothreads = @DEFINDEPIOTHREADS@
+
 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
 # as all of the memory will be allocated and locked
@@ -265,7 +270,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Reclaim guest freed memory.
 # Enabling this will result in the VM balloon device having f_reporting=on set.
@@ -275,7 +280,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -283,7 +288,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -300,11 +305,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -320,7 +325,7 @@ vhost_user_reconnect_timeout_sec = 0
 # will disable this feature. In the case of virtio-fs, this is enabled
 # automatically and '/dev/shm' is used as the backing folder.
 # This option will be ignored if VM templating is enabled.
-#file_mem_backend = "@DEFFILEMEMBACKEND@"
+file_mem_backend = "@DEFFILEMEMBACKEND@"

 # List of valid annotations values for the file_mem_backend annotation
 # The default if not set is empty (all annotations rejected.)
@@ -335,17 +340,17 @@ pflashes = []
 # to enable debug output where available. And Debug also enable the hmp socket.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = false

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -357,7 +362,7 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means the number of pcie_root_port
 # Default 0
-#pcie_root_port = 2
+pcie_root_port = 0

 # In a confidential compute environment hot-plugging can compromise
 # security. 
@@ -368,7 +373,7 @@ cold_plug_vfio = "@DEFAULTVFIOPORT_NV@"

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
-#disable_vhost_net = true
+disable_vhost_net = false

 #
 # Default entropy source.
@@ -380,7 +385,7 @@ cold_plug_vfio = "@DEFAULTVFIOPORT_NV@"
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source= "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -402,17 +407,18 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -422,9 +428,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -432,7 +439,7 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # then you should enable paging.
 #
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
-#guest_memory_dump_paging=false
+guest_memory_dump_paging = false

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
@@ -443,20 +450,20 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
 # If swap_in_bytes and memory_limit_in_bytes is not set, the size should
 # be default_memory.
-#enable_guest_swap = true
+enable_guest_swap = false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@


 [factory]
@@ -471,12 +478,12 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
+template_path = "/run/vc/vm/template"

 # The number of caches of VMCache:
 # unspecified or == 0   --> VMCache is disabled
@@ -495,17 +502,17 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # a new sandbox.
 #
 # Default 0
-#vm_cache_number = 0
+vm_cache_number = 0

 # Specify the address of the Unix socket that is used by VMCache.
 #
 # Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+vm_cache_endpoint = "/var/run/kata-containers/cache.sock"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -519,7 +526,7 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -532,14 +539,14 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 90)
@@ -549,7 +556,7 @@ dial_timeout = 90
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -567,19 +574,19 @@ dial_timeout = 90
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-# enable_vcpus_pinning = false
+enable_vcpus_pinning = false

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -587,22 +594,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -610,7 +618,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -618,7 +626,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_NV@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -627,13 +635,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_TEE@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_TEE@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -654,22 +662,22 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -692,3 +700,22 @@ dan_conf = "@DEFDANCONF@"
 # the container image should be pulled in the guest, without using an external snapshotter.
 # This is an experimental feature and might be removed in the future.
 experimental_force_guest_pull = @DEFFORCEGUESTPULL@
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK_NV@"
--- a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in
+++ b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in
@@ -21,34 +21,12 @@ machine_type = "@MACHINETYPE@"
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
-
-# Enable confidential guest support.
-# Toggling that setting may trigger different hardware features, ranging
-# from memory encryption to both memory and CPU-state encryption and integrity.
-# The Kata Containers runtime dynamically detects the available feature set and
-# aims at enabling the largest possible one, returning an error if none is
-# available, or none is supported by the hypervisor.
-#
-# Known limitations:
-# * Does not work by design:
-#   - CPU Hotplug 
-#   - Memory Hotplug
-#   - NVDIMM devices
-#
-# Default false
-# confidential_guest = true
-
-# Choose AMD SEV-SNP confidential guests
-# In case of using confidential guests on AMD hardware that supports both SEV
-# and SEV-SNP, the following enables SEV-SNP guests. SEV guests are default.
-# Default false
-# sev_snp_guest = true
+rootfs_type = @DEFROOTFSTYPE@

 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -86,7 +64,7 @@ firmware_volume = "@FIRMWAREVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -94,12 +72,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@CPUFEATURES@"
+cpu_features = "@CPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -144,7 +123,7 @@ default_memory = @DEFAULTMEMORY_NV@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -157,13 +136,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -244,24 +223,28 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
-# handled in a separate IO thread. This is currently only implemented
-# for SCSI.
+# handled in a separate IO thread. This is currently implemented
+# for virtio-scsi and virtio-blk.
 #
 enable_iothreads = @DEFENABLEIOTHREADS@

+# Independent IOThreads enables IO to be processed in a separate thread, it is
+# for QEMU hotplug device attach to iothread, like virtio-blk.
+indep_iothreads = @DEFINDEPIOTHREADS@
+
 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
 # as all of the memory will be allocated and locked
@@ -269,7 +252,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Reclaim guest freed memory.
 # Enabling this will result in the VM balloon device having f_reporting=on set.
@@ -279,7 +262,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -287,7 +270,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -304,11 +287,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -324,7 +307,7 @@ vhost_user_reconnect_timeout_sec = 0
 # will disable this feature. In the case of virtio-fs, this is enabled
 # automatically and '/dev/shm' is used as the backing folder.
 # This option will be ignored if VM templating is enabled.
-#file_mem_backend = "@DEFFILEMEMBACKEND@"
+file_mem_backend = "@DEFFILEMEMBACKEND@"

 # List of valid annotations values for the file_mem_backend annotation
 # The default if not set is empty (all annotations rejected.)
@@ -339,7 +322,7 @@ pflashes = []
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # This option allows to add an extra HMP or QMP socket when `enable_debug = true`
 #
@@ -354,17 +337,17 @@ pflashes = []
 #
 # If set to the empty string "", no extra monitor socket is added. This is
 # the default.
-#extra_monitor_socket = hmp
+extra_monitor_socket = ""

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = true

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -382,7 +365,7 @@ hot_plug_vfio = "@DEFAULTVFIOPORT_NV@"
 # Enable cold-plugging of VFIO devices to a bridge-port, 
 # root-port or switch-port. 
 # The default setting is  "no-port", which means disabled. 
-#cold_plug_vfio = "@DEFAULTVFIOPORT_NV@"
+cold_plug_vfio = "no-port"

 # Before hot plugging a PCIe device, you need to add a pcie_root_port device.
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
@@ -392,7 +375,7 @@ pcie_root_port = @DEFAULTPCIEROOTPORT_NV@

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
-#disable_vhost_net = true
+disable_vhost_net = false

 #
 # Default entropy source.
@@ -404,7 +387,7 @@ pcie_root_port = @DEFAULTPCIEROOTPORT_NV@
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source= "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -426,17 +409,18 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -446,9 +430,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -456,7 +441,7 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # then you should enable paging.
 #
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
-#guest_memory_dump_paging=false
+guest_memory_dump_paging = false

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
@@ -467,20 +452,20 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
 # If swap_in_bytes and memory_limit_in_bytes is not set, the size should
 # be default_memory.
-#enable_guest_swap = true
+enable_guest_swap = false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@


 [factory]
@@ -495,12 +480,12 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
+template_path = "/run/vc/vm/template"

 # The number of caches of VMCache:
 # unspecified or == 0   --> VMCache is disabled
@@ -519,17 +504,17 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # a new sandbox.
 #
 # Default 0
-#vm_cache_number = 0
+vm_cache_number = 0

 # Specify the address of the Unix socket that is used by VMCache.
 #
 # Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+vm_cache_endpoint = "/var/run/kata-containers/cache.sock"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -543,7 +528,7 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -556,14 +541,14 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 90)
@@ -573,7 +558,7 @@ dial_timeout = 90
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -591,19 +576,19 @@ dial_timeout = 90
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-# enable_vcpus_pinning = false
+enable_vcpus_pinning = false

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -611,22 +596,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -634,7 +620,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -642,7 +628,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_NV@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -651,13 +637,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY_NV@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -678,22 +664,22 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -711,3 +697,22 @@ create_container_timeout = @DEFAULTTIMEOUT_NV@
 # to the hypervisor.
 # (default: /run/kata-containers/dans)
 dan_conf = "@DEFDANCONF@"
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK_NV@"
--- a/src/runtime/config/configuration-qemu-se.toml.in
+++ b/src/runtime/config/configuration-qemu-se.toml.in
@@ -35,7 +35,7 @@ confidential_guest = true
 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -73,7 +73,7 @@ firmware_volume = "@FIRMWAREVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -81,12 +81,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@CPUFEATURES@"
+cpu_features = "@CPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -131,7 +132,7 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -144,13 +145,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -230,24 +231,28 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
-# handled in a separate IO thread. This is currently only implemented
-# for SCSI.
+# handled in a separate IO thread. This is currently implemented
+# for virtio-scsi and virtio-blk.
 #
 enable_iothreads = @DEFENABLEIOTHREADS@

+# Independent IOThreads enables IO to be processed in a separate thread, it is
+# for QEMU hotplug device attach to iothread, like virtio-blk.
+indep_iothreads = @DEFINDEPIOTHREADS@
+
 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
 # as all of the memory will be allocated and locked
@@ -255,7 +260,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Reclaim guest freed memory.
 # Enabling this will result in the VM balloon device having f_reporting=on set.
@@ -265,7 +270,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -273,7 +278,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -290,11 +295,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -305,7 +310,7 @@ valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@
 # will disable this feature. In the case of virtio-fs, this is enabled
 # automatically and '/dev/shm' is used as the backing folder.
 # This option will be ignored if VM templating is enabled.
-#file_mem_backend = "@DEFFILEMEMBACKEND@"
+file_mem_backend = "@DEFFILEMEMBACKEND@"

 # List of valid annotations values for the file_mem_backend annotation
 # The default if not set is empty (all annotations rejected.)
@@ -320,17 +325,17 @@ pflashes = []
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = false

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -338,10 +343,10 @@ pflashes = []
 # nvdimm is not supported when `confidential_guest = true`.
 disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@

-# Enable hot-plugging of VFIO devices to a bridge-port,
+# Enable hot-plugging of VFIO devices to a bridge-port, 
 # root-port or switch-port.
 # The default setting is "no-port"
-#hot_plug_vfio = "bridge-port"
+hot_plug_vfio = "no-port"

 # In a confidential compute environment hot-plugging can compromise
 # security.
@@ -354,11 +359,11 @@ cold_plug_vfio = "bridge-port"
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means the number of pcie_root_port
 # Default 0
-#pcie_root_port = 2
+pcie_root_port = 0

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
-#disable_vhost_net = true
+disable_vhost_net = false

 #
 # Default entropy source.
@@ -370,7 +375,7 @@ cold_plug_vfio = "bridge-port"
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source= "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -392,17 +397,18 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -412,9 +418,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -422,7 +429,7 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # then you should enable paging.
 #
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
-#guest_memory_dump_paging=false
+guest_memory_dump_paging = false

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
@@ -433,20 +440,20 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
 # If swap_in_bytes and memory_limit_in_bytes is not set, the size should
 # be default_memory.
-#enable_guest_swap = true
+enable_guest_swap = false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@


 [factory]
@@ -461,12 +468,12 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
+template_path = "/run/vc/vm/template"

 # The number of caches of VMCache:
 # unspecified or == 0   --> VMCache is disabled
@@ -485,17 +492,17 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # a new sandbox.
 #
 # Default 0
-#vm_cache_number = 0
+vm_cache_number = 0

 # Specify the address of the Unix socket that is used by VMCache.
 #
 # Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+vm_cache_endpoint = "/var/run/kata-containers/cache.sock"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -509,7 +516,7 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -522,14 +529,14 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 30)
@@ -539,7 +546,7 @@ dial_timeout = 90
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -557,19 +564,19 @@ dial_timeout = 90
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-# enable_vcpus_pinning = false
+enable_vcpus_pinning = false

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -577,22 +584,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -600,7 +608,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -608,7 +616,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -617,13 +625,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -644,22 +652,22 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE_SE@"
+vfio_mode = "@DEFVFIOMODE_SE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -682,3 +690,22 @@ dan_conf = "@DEFDANCONF@"
 # the container image should be pulled in the guest, without using an external snapshotter.
 # This is an experimental feature and might be removed in the future.
 experimental_force_guest_pull = @DEFFORCEGUESTPULL@
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
--- a/src/runtime/config/configuration-qemu-snp.toml.in
+++ b/src/runtime/config/configuration-qemu-snp.toml.in
@@ -15,7 +15,6 @@
 [hypervisor.qemu]
 path = "@QEMUPATH@"
 kernel = "@KERNELCONFIDENTIALPATH@"
-#image = "@IMAGEPATH@"
 initrd = "@INITRDCONFIDENTIALPATH@"
 machine_type = "@MACHINETYPE@"

@@ -23,7 +22,7 @@ machine_type = "@MACHINETYPE@"
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
+rootfs_type = @DEFROOTFSTYPE@

 # Enable confidential guest support.
 # Toggling that setting may trigger different hardware features, ranging
@@ -47,7 +46,7 @@ sev_snp_guest = true
 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -68,17 +67,17 @@ valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@
 #
 # 96-byte, base64-encoded blob to provide the ‘ID Block’ structure for the
 # SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
-#snp_id_block = ""
+snp_id_block = ""
 # 4096-byte, base64-encoded blob to provide the ‘ID Authentication Information Structure’
 # for the SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
-#snp_id_auth = ""
+snp_id_auth = ""

 # SNP Guest Policy, the ‘POLICY’ parameter to the SNP_LAUNCH_START command.
 # If unset, the QEMU default policy (0x30000) will be used.
 # Notice that the guest policy is enforced at VM launch, and your pod VMs 
 # won't start at all if the policy denys it. This will be indicated by a
 # 'SNP_LAUNCH_START' error.
-#snp_guest_policy = 196608
+snp_guest_policy = 196608

 # Optional space-separated list of options to pass to the guest kernel.
 # For example, use `kernel_params = "vsyscall=emulate"` if you are having
@@ -105,7 +104,7 @@ firmware_volume = "@FIRMWARETDVFVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -113,12 +112,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@CPUFEATURES@"
+cpu_features = "@CPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -163,7 +163,7 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -176,13 +176,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -263,24 +263,28 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
-# handled in a separate IO thread. This is currently only implemented
-# for SCSI.
+# handled in a separate IO thread. This is currently implemented
+# for virtio-scsi and virtio-blk.
 #
 enable_iothreads = @DEFENABLEIOTHREADS@

+# Independent IOThreads enables IO to be processed in a separate thread, it is
+# for QEMU hotplug device attach to iothread, like virtio-blk.
+indep_iothreads = @DEFINDEPIOTHREADS@
+
 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
 # as all of the memory will be allocated and locked
@@ -288,7 +292,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Reclaim guest freed memory.
 # Enabling this will result in the VM balloon device having f_reporting=on set.
@@ -298,7 +302,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -306,7 +310,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -323,11 +327,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -358,7 +362,7 @@ pflashes = []
 # to enable debug output where available. And Debug also enable the hmp socket.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
@@ -368,7 +372,7 @@ disable_nesting_checks = true

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -380,11 +384,11 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means the number of pcie_root_port
 # Default 0
-#pcie_root_port = 2
+pcie_root_port = 0

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
-#disable_vhost_net = true
+disable_vhost_net = false

 #
 # Default entropy source.
@@ -396,7 +400,7 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source= "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -418,17 +422,18 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -438,9 +443,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -448,7 +454,7 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # then you should enable paging.
 #
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
-#guest_memory_dump_paging=false
+guest_memory_dump_paging = false

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
@@ -459,20 +465,20 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
 # If swap_in_bytes and memory_limit_in_bytes is not set, the size should
 # be default_memory.
-#enable_guest_swap = true
+enable_guest_swap = false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@


 [factory]
@@ -487,12 +493,12 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
+template_path = "/run/vc/vm/template"

 # The number of caches of VMCache:
 # unspecified or == 0   --> VMCache is disabled
@@ -511,17 +517,17 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # a new sandbox.
 #
 # Default 0
-#vm_cache_number = 0
+vm_cache_number = 0

 # Specify the address of the Unix socket that is used by VMCache.
 #
 # Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+vm_cache_endpoint = "/var/run/kata-containers/cache.sock"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -535,7 +541,7 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -548,14 +554,14 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 90)
@@ -565,7 +571,7 @@ dial_timeout = 90
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -583,19 +589,19 @@ dial_timeout = 90
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-# enable_vcpus_pinning = false
+enable_vcpus_pinning = false

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -603,22 +609,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -626,7 +633,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -634,7 +641,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -643,13 +650,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_TEE@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_TEE@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -670,22 +677,22 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -708,3 +715,22 @@ dan_conf = "@DEFDANCONF@"
 # the container image should be pulled in the guest, without using an external snapshotter.
 # This is an experimental feature and might be removed in the future.
 experimental_force_guest_pull = @DEFFORCEGUESTPULL@
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
--- a/src/runtime/config/configuration-qemu-tdx.toml.in
+++ b/src/runtime/config/configuration-qemu-tdx.toml.in
@@ -15,7 +15,6 @@
 path = "@QEMUTDXPATH@"
 kernel = "@KERNELCONFIDENTIALPATH@"
 image = "@IMAGECONFIDENTIALPATH@"
-# initrd = "@INITRDPATH@"
 machine_type = "@MACHINETYPE@"
 tdx_quote_generation_service_socket_port = @QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT@

@@ -23,7 +22,7 @@ tdx_quote_generation_service_socket_port = @QEMUTDXQUOTEGENERATIONSERVICESOCKETP
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
+rootfs_type = @DEFROOTFSTYPE@

 # Enable confidential guest support.
 # Toggling that setting may trigger different hardware features, ranging
@@ -44,7 +43,7 @@ confidential_guest = true
 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -82,7 +81,7 @@ firmware_volume = "@FIRMWARETDVFVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -90,12 +89,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@TDXCPUFEATURES@"
+cpu_features = "@TDXCPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -140,7 +140,7 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -153,13 +153,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -240,24 +240,28 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
-# handled in a separate IO thread. This is currently only implemented
-# for SCSI.
+# handled in a separate IO thread. This is currently implemented
+# for virtio-scsi and virtio-blk.
 #
 enable_iothreads = @DEFENABLEIOTHREADS@

+# Independent IOThreads enables IO to be processed in a separate thread, it is
+# for QEMU hotplug device attach to iothread, like virtio-blk.
+indep_iothreads = @DEFINDEPIOTHREADS@
+
 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
 # as all of the memory will be allocated and locked
@@ -265,7 +269,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Reclaim guest freed memory.
 # Enabling this will result in the VM balloon device having f_reporting=on set.
@@ -275,7 +279,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -283,7 +287,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -300,11 +304,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -320,7 +324,7 @@ vhost_user_reconnect_timeout_sec = 0
 # will disable this feature. In the case of virtio-fs, this is enabled
 # automatically and '/dev/shm' is used as the backing folder.
 # This option will be ignored if VM templating is enabled.
-#file_mem_backend = "@DEFFILEMEMBACKEND@"
+file_mem_backend = "@DEFFILEMEMBACKEND@"

 # List of valid annotations values for the file_mem_backend annotation
 # The default if not set is empty (all annotations rejected.)
@@ -335,17 +339,17 @@ pflashes = []
 # to enable debug output where available. And Debug also enable the hmp socket.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = false

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -357,11 +361,11 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means the number of pcie_root_port
 # Default 0
-#pcie_root_port = 2
+pcie_root_port = 0

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
-#disable_vhost_net = true
+disable_vhost_net = false

 #
 # Default entropy source.
@@ -373,7 +377,7 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source= "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -395,17 +399,18 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -415,9 +420,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -425,7 +431,7 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # then you should enable paging.
 #
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
-#guest_memory_dump_paging=false
+guest_memory_dump_paging = false

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
@@ -436,20 +442,20 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
 # If swap_in_bytes and memory_limit_in_bytes is not set, the size should
 # be default_memory.
-#enable_guest_swap = true
+enable_guest_swap = false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@


 [factory]
@@ -464,12 +470,12 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
+template_path = "/run/vc/vm/template"

 # The number of caches of VMCache:
 # unspecified or == 0   --> VMCache is disabled
@@ -488,17 +494,17 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # a new sandbox.
 #
 # Default 0
-#vm_cache_number = 0
+vm_cache_number = 0

 # Specify the address of the Unix socket that is used by VMCache.
 #
 # Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+vm_cache_endpoint = "/var/run/kata-containers/cache.sock"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -512,7 +518,7 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -525,14 +531,14 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 60)
@@ -542,7 +548,7 @@ dial_timeout = 60
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -560,19 +566,19 @@ dial_timeout = 60
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-# enable_vcpus_pinning = false
+enable_vcpus_pinning = false

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -580,22 +586,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -603,7 +610,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -611,7 +618,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -620,13 +627,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_TEE@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_TEE@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -647,22 +654,22 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -685,3 +692,22 @@ dan_conf = "@DEFDANCONF@"
 # the container image should be pulled in the guest, without using an external snapshotter.
 # This is an experimental feature and might be removed in the future.
 experimental_force_guest_pull = @DEFFORCEGUESTPULL@
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
--- a/src/runtime/config/configuration-qemu.toml.in
+++ b/src/runtime/config/configuration-qemu.toml.in
@@ -15,41 +15,18 @@
 path = "@QEMUPATH@"
 kernel = "@KERNELPATH@"
 image = "@IMAGEPATH@"
-# initrd = "@INITRDPATH@"
 machine_type = "@MACHINETYPE@"

 # rootfs filesystem type:
 #   - ext4 (default)
 #   - xfs
 #   - erofs
-rootfs_type=@DEFROOTFSTYPE@
-
-# Enable confidential guest support.
-# Toggling that setting may trigger different hardware features, ranging
-# from memory encryption to both memory and CPU-state encryption and integrity.
-# The Kata Containers runtime dynamically detects the available feature set and
-# aims at enabling the largest possible one, returning an error if none is
-# available, or none is supported by the hypervisor.
-#
-# Known limitations:
-# * Does not work by design:
-#   - CPU Hotplug 
-#   - Memory Hotplug
-#   - NVDIMM devices
-#
-# Default false
-# confidential_guest = true
-
-# Choose AMD SEV-SNP confidential guests
-# In case of using confidential guests on AMD hardware that supports both SEV
-# and SEV-SNP, the following enables SEV-SNP guests. SEV guests are default.
-# Default false
-# sev_snp_guest = true
+rootfs_type = @DEFROOTFSTYPE@

 # Enable running QEMU VMM as a non-root user.
 # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
 # a non-root random user. See documentation for the limitations of this mode.
-# rootless = true
+rootless = false

 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
@@ -87,7 +64,7 @@ firmware_volume = "@FIRMWAREVOLUMEPATH@"
 # Machine accelerators
 # comma-separated list of machine accelerators to pass to the hypervisor.
 # For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
-machine_accelerators="@MACHINEACCELERATORS@"
+machine_accelerators = "@MACHINEACCELERATORS@"

 # Qemu seccomp sandbox feature
 # comma-separated list of seccomp sandbox features to control the syscall access.
@@ -95,12 +72,13 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
 # Another note: enabling this feature may reduce performance, you may enable
 # /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
-#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
+# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"

 # CPU features
 # comma-separated list of cpu features to pass to the cpu
 # For example, `cpu_features = "pmu=off,vmx=off"
-cpu_features="@CPUFEATURES@"
+cpu_features = "@CPUFEATURES@"

 # Default number of vCPUs per SB/VM:
 # unspecified or 0                --> will be set to @DEFVCPUS@
@@ -145,7 +123,7 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -158,13 +136,13 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Specifies virtio-mem will be enabled or not.
 # Please note that this option should be used with the command
 # "echo 1 > /proc/sys/vm/overcommit_memory".
 # Default false
-#enable_virtio_mem = true
+enable_virtio_mem = false

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -245,24 +223,28 @@ block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable iothreads (data-plane) to be used. This causes IO to be
-# handled in a separate IO thread. This is currently only implemented
-# for SCSI.
+# handled in a separate IO thread. This is currently implemented
+# for virtio-scsi and virtio-blk.
 #
 enable_iothreads = @DEFENABLEIOTHREADS@

+# Independent IOThreads enables IO to be processed in a separate thread, it is
+# for QEMU hotplug device attach to iothread, like virtio-blk.
+indep_iothreads = @DEFINDEPIOTHREADS@
+
 # Enable pre allocation of VM RAM, default false
 # Enabling this will result in lower container density
 # as all of the memory will be allocated and locked
@@ -270,7 +252,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # upfront or in the cases where you want memory latencies
 # to be very predictable
 # Default false
-#enable_mem_prealloc = true
+enable_mem_prealloc = false

 # Reclaim guest freed memory.
 # Enabling this will result in the VM balloon device having f_reporting=on set.
@@ -280,7 +262,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # the VM.
 #
 # Default false
-#reclaim_guest_freed_memory = true
+reclaim_guest_freed_memory = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -288,7 +270,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vhost-user storage device, default false
 # Enabling this will result in some Linux reserved block type
@@ -305,11 +287,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # Enable IOMMU_PLATFORM, default false
 # Enabling this will result in the VM device having iommu_platform=on set
-#enable_iommu_platform = true
+enable_iommu_platform = false

 # List of valid annotations values for the vhost user store path
 # The default if not set is empty (all annotations rejected.)
@@ -325,7 +307,7 @@ vhost_user_reconnect_timeout_sec = 0
 # will disable this feature. In the case of virtio-fs, this is enabled
 # automatically and '/dev/shm' is used as the backing folder.
 # This option will be ignored if VM templating is enabled.
-#file_mem_backend = "@DEFFILEMEMBACKEND@"
+file_mem_backend = "@DEFFILEMEMBACKEND@"

 # List of valid annotations values for the file_mem_backend annotation
 # The default if not set is empty (all annotations rejected.)
@@ -340,7 +322,7 @@ pflashes = []
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # This option allows to add an extra HMP or QMP socket when `enable_debug = true`
 #
@@ -355,17 +337,17 @@ pflashes = []
 #
 # If set to the empty string "", no extra monitor socket is added. This is
 # the default.
-#extra_monitor_socket = hmp
+extra_monitor_socket = ""

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = false

 # This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
-#msize_9p = @DEFMSIZE9P@
+msize_9p = @DEFMSIZE9P@

 # If false and nvdimm is supported, use nvdimm device to plug guest image.
 # Otherwise virtio-block device is used.
@@ -376,24 +358,24 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # Enable hot-plugging of VFIO devices to a bridge-port, 
 # root-port or switch-port. 
 # The default setting is  "no-port"
-#hot_plug_vfio = "root-port" 
+hot_plug_vfio = "no-port"

 # In a confidential compute environment hot-plugging can compromise
 # security. 
 # Enable cold-plugging of VFIO devices to a bridge-port, 
 # root-port or switch-port. 
 # The default setting is  "no-port", which means disabled. 
-#cold_plug_vfio = "root-port" 
+cold_plug_vfio = "no-port" 

 # Before hot plugging a PCIe device, you need to add a pcie_root_port device.
 # Use this parameter when using some large PCI bar devices, such as Nvidia GPU
 # The value means the number of pcie_root_port
 # Default 0
-#pcie_root_port = 2
+pcie_root_port = 0

 # If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
 # security (vhost-net runs ring0) for network I/O performance.
-#disable_vhost_net = true
+disable_vhost_net = false

 #
 # Default entropy source.
@@ -405,7 +387,7 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
 # The source of entropy /dev/urandom is non-blocking and provides a
 # generally acceptable source of entropy. It should work well for pretty much
 # all practical purposes.
-#entropy_source= "@DEFENTROPYSOURCE@"
+entropy_source= "@DEFENTROPYSOURCE@"

 # List of valid annotations values for entropy_source
 # The default if not set is empty (all annotations rejected.)
@@ -427,17 +409,18 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""
 #
 # Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#rx_rate_limiter_max_rate = 0
+rx_rate_limiter_max_rate = 0
 # Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
 # In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
 # to discipline traffic.
 # Default 0-sized value means unlimited rate.
-#tx_rate_limiter_max_rate = 0
+tx_rate_limiter_max_rate = 0

 # Set where to save the guest memory dump file.
 # If set, when GUEST_PANICKED event occurred,
@@ -447,9 +430,10 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # The dumped file(also called vmcore) can be processed with crash or gdb.
 #
 # WARNING:
-#   Dump guest’s memory can take very long depending on the amount of guest memory
+#   Dump guest's memory can take very long depending on the amount of guest memory
 #   and use much disk space.
-#guest_memory_dump_path="/var/crash/kata"
+# Recommended value when enabling: "/var/crash/kata"
+guest_memory_dump_path = ""

 # If enable paging.
 # Basically, if you want to use "gdb" rather than "crash",
@@ -457,7 +441,7 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # then you should enable paging.
 #
 # See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
-#guest_memory_dump_paging=false
+guest_memory_dump_paging = false

 # Enable swap in the guest. Default false.
 # When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
@@ -468,20 +452,20 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
 # If swap_in_bytes and memory_limit_in_bytes is not set, the size should
 # be default_memory.
-#enable_guest_swap = true
+enable_guest_swap = false

 # use legacy serial for guest console if available and implemented for architecture. Default false
-#use_legacy_serial = true
+use_legacy_serial = false

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
 # Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
 # with `SELINUX=yes`.
 # (default: true)
-disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
+disable_guest_selinux = @DEFDISABLEGUESTSELINUX@


 [factory]
@@ -496,12 +480,12 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 # Specifies the path of template.
 #
 # Default "/run/vc/vm/template"
-#template_path = "/run/vc/vm/template"
+template_path = "/run/vc/vm/template"

 # The number of caches of VMCache:
 # unspecified or == 0   --> VMCache is disabled
@@ -520,17 +504,17 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 # a new sandbox.
 #
 # Default 0
-#vm_cache_number = 0
+vm_cache_number = 0

 # Specify the address of the Unix socket that is used by VMCache.
 #
 # Default /var/run/kata-containers/cache.sock
-#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
+vm_cache_endpoint = "/var/run/kata-containers/cache.sock"

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -544,7 +528,7 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -557,14 +541,14 @@ disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
 #  * The module is not available in the guest or it doesn't met the guest kernel
 #    requirements, like architecture and version.
 #
-kernel_modules=[]
+kernel_modules = []

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 45)
@@ -572,13 +556,13 @@ dial_timeout = 45

 # Confidential Data Hub API timeout value in seconds
 # (default: 50)
-#cdh_api_timeout = 50
+cdh_api_timeout = 50

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -596,19 +580,19 @@ dial_timeout = 45
 #     Uses tc filter rules to redirect traffic from the network interface
 #     provided by plugin to a tap interface connected to the VM.
 #
-internetworking_model="@DEFNETWORKMODEL_QEMU@"
+internetworking_model = "@DEFNETWORKMODEL_QEMU@"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
 # machine and applied by the kata agent. If set to true, seccomp is not applied
 # within the guest
 # (default: true)
-disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@

 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-# enable_vcpus_pinning = false
+enable_vcpus_pinning = false

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -616,22 +600,23 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -639,7 +624,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -647,7 +632,7 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -656,13 +641,13 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
 # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
-static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@
+static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT@

 # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
 # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
 # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
 # These will not be exposed to the container workloads, and are only provided for potential guest services.
-sandbox_bind_mounts=@DEFBINDMOUNTS@
+sandbox_bind_mounts = @DEFBINDMOUNTS@

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -683,22 +668,22 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
-disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
+disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -716,3 +701,22 @@ create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
 # to the hypervisor.
 # (default: /run/kata-containers/dans)
 dan_conf = "@DEFDANCONF@"
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
--- a/src/runtime/config/configuration-remote.toml.in
+++ b/src/runtime/config/configuration-remote.toml.in
@@ -16,24 +16,6 @@
 remote_hypervisor_socket = "/run/peerpod/hypervisor.sock"
 remote_hypervisor_timeout = 600

-
-# Enable confidential guest support.
-# Toggling that setting may trigger different hardware features, ranging
-# from memory encryption to both memory and CPU-state encryption and integrity.
-# The Kata Containers runtime dynamically detects the available feature set and
-# aims at enabling the largest possible one, returning an error if none is
-# available, or none is supported by the hypervisor.
-#
-# Known limitations:
-# * Does not work by design:
-#   - CPU Hotplug
-#   - Memory Hotplug
-#   - NVDIMM devices
-#
-# Default false
-# confidential_guest = true
-
-
 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
 # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
@@ -102,13 +84,13 @@ default_bridges = @DEFBRIDGES@
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
 # Note: the remote hypervisor uses the peer pod config to determine the memory of the VM
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # This option changes the default hypervisor and kernel parameters
 # to enable debug output where available. And Debug also enable the hmp socket.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Path to OCI hook binaries in the *guest rootfs*.
 # This does not affect host-side hooks which must instead be added to
@@ -125,10 +107,11 @@ default_bridges = @DEFBRIDGES@
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""

 # disable applying SELinux on the VMM process (default false)
-disable_selinux=@DEFDISABLESELINUX@
+disable_selinux = @DEFDISABLESELINUX@

 # disable applying SELinux on the container process
 # If set to false, the type `container_t` is applied to the container process by default.
@@ -141,7 +124,7 @@ disable_guest_selinux = true
 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -155,24 +138,24 @@ disable_guest_selinux = true
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Enable debug console.

 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 30)
-#dial_timeout = 30
+dial_timeout = 30

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -191,7 +174,7 @@ disable_guest_selinux = true
 #     provided by plugin to a tap interface connected to the VM.
 #
 # Note: The remote hypervisor, uses it's own network, so "none" is required
-internetworking_model="none"
+internetworking_model = "none"

 # disable guest seccomp
 # Determines whether container seccomp profiles are passed to the virtual
@@ -199,7 +182,7 @@ internetworking_model="none"
 # within the guest
 # (default: true)
 # Note: The remote hypervisor has a different guest, so currently requires this to be set to true
-disable_guest_seccomp=true
+disable_guest_seccomp = true


 # Apply a custom SELinux security policy to the container process inside the VM.
@@ -208,22 +191,23 @@ disable_guest_seccomp=true
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -240,7 +224,7 @@ disable_new_netns = true
 # The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
 # The sandbox cgroup is constrained if there is no container type annotation.
 # See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
-sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@

 # If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
 # this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
@@ -250,7 +234,7 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
 #   does not yet support sandbox sizing annotations.
 # - When running single containers using a tool like ctr, container sizing information will be available.
 # Note: the remote hypervisor uses the peer pod config to determine the sandbox size, so requires this to be set to true
-static_sandbox_resource_mgmt=true
+static_sandbox_resource_mgmt = true

 # VFIO Mode
 # Determines how VFIO devices should be be presented to the container.
@@ -271,23 +255,23 @@ static_sandbox_resource_mgmt=true
 #    Using this mode requires specially built workloads that know how
 #    to locate the relevant device interfaces within the VM.
 #
-vfio_mode="@DEFVFIOMODE@"
+vfio_mode = "@DEFVFIOMODE@"

 # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
 # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
 # Note: remote hypervisor has no sharing of emptydir mounts from host to guest
-disable_guest_empty_dir=false
+disable_guest_empty_dir = false

 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
 # Supported experimental features:
 # (default: [])
-experimental=@DEFAULTEXPFEATURES@
+experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-# enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -305,3 +289,22 @@ create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
 # to the hypervisor.
 # (default: /run/kata-containers/dans)
 dan_conf = "@DEFDANCONF@"
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
--- a/src/runtime/config/configuration-stratovirt.toml.in
+++ b/src/runtime/config/configuration-stratovirt.toml.in
@@ -13,8 +13,7 @@
 [hypervisor.stratovirt]
 path = "@STRATOVIRTPATH@"
 kernel = "@KERNELPATH_STRATOVIRT@"
-#image = "@IMAGEPATH@"
-initrd = "@INITRDPATH@"
+image = "@IMAGEPATH@"
 machine_type = "@DEFMACHINETYPE_STRATOVIRT@"

 # rootfs filesystem type:
@@ -89,7 +88,7 @@ default_memory = @DEFMEMSZ@
 # Default memory slots per SB/VM.
 # If unspecified then it will be set @DEFMEMSLOTS@.
 # This is will determine the times that memory will be hotadded to sandbox/VM.
-#memory_slots = @DEFMEMSLOTS@
+memory_slots = @DEFMEMSLOTS@

 # Default maximum memory in MiB per SB / VM
 # unspecified or == 0           --> will be set to the actual amount of physical RAM
@@ -102,7 +101,7 @@ default_maxmemory = @DEFMAXMEMSZ@
 # If set block storage driver (block_device_driver) to "nvdimm",
 # should set memory_offset to the size of block device.
 # Default 0
-#memory_offset = 0
+memory_offset = 0

 # Disable block device from being used for a container's rootfs.
 # In case of a storage driver like devicemapper where a container's
@@ -164,17 +163,17 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_STRATOVIRT@"

 # Specifies cache-related options will be set to block devices or not.
 # Default false
-#block_device_cache_set = true
+block_device_cache_set = false

 # Specifies cache-related options for block devices.
 # Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
 # Default false
-#block_device_cache_direct = true
+block_device_cache_direct = false

 # Specifies cache-related options for block devices.
 # Denotes whether flush requests for the device are ignored.
 # Default false
-#block_device_cache_noflush = true
+block_device_cache_noflush = false

 # Enable huge pages for VM RAM, default false
 # Enabling this will result in the VM memory
@@ -182,25 +181,25 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_STRATOVIRT@"
 # This is useful when you want to use vhost-user network
 # stacks within the container. This will automatically
 # result in memory pre allocation
-#enable_hugepages = true
+enable_hugepages = false

 # Enable vIOMMU, default false
 # Enabling this will result in the VM having a vIOMMU device
 # This will also add the following options to the kernel's
 # command line: intel_iommu=on,iommu=pt
-#enable_iommu = true
+enable_iommu = false

 # This option changes the default hypervisor and kernel parameters
 # to enable debug output where available.
 #
 # Default false
-#enable_debug = true
+enable_debug = false

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
 #
-#disable_nesting_checks = true
+disable_nesting_checks = false

 #
 # Default entropy source.
@@ -229,7 +228,8 @@ entropy_source = "@DEFENTROPYSOURCE@"
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
 # Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
-#guest_hook_path = "/usr/share/oci/hooks"
+# Recommended value when enabling: "/usr/share/oci/hooks"
+guest_hook_path = ""

 # disable applying SELinux on the VMM process (default false)
 disable_selinux = @DEFDISABLESELINUX@
@@ -253,12 +253,12 @@ disable_guest_selinux = @DEFDISABLEGUESTSELINUX@
 # Note: Requires "initrd=" to be set ("image=" is not supported).
 #
 # Default false
-#enable_template = true
+enable_template = false

 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
-#enable_debug = true
+enable_debug = false

 # Enable agent tracing.
 #
@@ -272,7 +272,7 @@ disable_guest_selinux = @DEFDISABLEGUESTSELINUX@
 #   increasing the container shutdown time slightly.
 #
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Comma separated list of kernel modules and their parameters.
 # These modules will be loaded in the guest kernel using modprobe(8).
@@ -292,7 +292,7 @@ kernel_modules = []
 # If enabled, user can connect guest OS running inside hypervisor
 # through "kata-runtime exec <sandbox-id>" command

-#debug_console_enabled = true
+debug_console_enabled = false

 # Agent connection dialing timeout value in seconds
 # (default: 45)
@@ -300,13 +300,13 @@ dial_timeout = 45

 # Confidential Data Hub API timeout value in seconds
 # (default: 50)
-#cdh_api_timeout = 50
+cdh_api_timeout = 50

 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
 # (default: disabled)
-#enable_debug = true
+enable_debug = false
 #
 # Internetworking model
 # Determines how the VM should be connected to the
@@ -336,7 +336,7 @@ disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@
 # vCPUs pinning settings
 # if enabled, each vCPU thread will be scheduled to a fixed CPU
 # qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
-#enable_vcpus_pinning = false
+enable_vcpus_pinning = true

 # Apply a custom SELinux security policy to the container process inside the VM.
 # This is used when you want to apply a type other than the default `container_t`,
@@ -344,22 +344,23 @@ disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@
 # (format: "user:role:type")
 # Note: You cannot specify MCS policy with the label because the sensitivity levels and
 # categories are determined automatically by high-level container runtimes such as containerd.
-#guest_selinux_label = "@DEFGUESTSELINUXLABEL@"
+# Example value when enabling: "system_u:system_r:container_t"
+guest_selinux_label = "@DEFGUESTSELINUXLABEL@"

 # If enabled, the runtime will create opentracing.io traces and spans.
 # (See https://www.jaegertracing.io/docs/getting-started).
 # (default: disabled)
-#enable_tracing = true
+enable_tracing = false

 # Set the full url to the Jaeger HTTP Thrift collector.
 # The default if not set will be "http://localhost:14268/api/traces"
-#jaeger_endpoint = ""
+jaeger_endpoint = ""

 # Sets the username to be used if basic auth is required for Jaeger.
-#jaeger_user = ""
+jaeger_user = ""

 # Sets the password to be used if basic auth is required for Jaeger.
-#jaeger_password = ""
+jaeger_password = ""

 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
@@ -367,7 +368,7 @@ disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@
 # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
 # (like OVS) directly.
 # (default: false)
-#disable_new_netns = true
+disable_new_netns = false

 # if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
 # The container cgroups in the host are not created, just one single cgroup per sandbox.
@@ -399,7 +400,7 @@ experimental = @DEFAULTEXPFEATURES@

 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
-#enable_pprof = true
+enable_pprof = false

 # Indicates the CreateContainer request timeout needed for the workload(s)
 # It using guest_pull this includes the time to pull the image inside the guest
@@ -417,3 +418,22 @@ create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
 # to the hypervisor.
 # (default: /run/kata-containers/dans)
 dan_conf = "@DEFDANCONF@"
+
+# pod_resource_api_sock specifies the unix socket for the Kubelet's
+# PodResource API endpoint. If empty, kubernetes based cold plug
+# will not be attempted. In order for this feature to work, the
+# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+# if using Kubelet older than 1.34.
+#
+# The pod resource API's socket is relative to the Kubelet's root-dir,
+# which is defined by the cluster admin, and its location is:
+# ${KubeletRootDir}/pod-resources/kubelet.sock
+#
+# cold_plug_vfio(see hypervisor config) acts as a feature gate:
+#      cold_plug_vfio = no_port (default) => no cold plug
+#      cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
+#              explicit CDI annotation for cold plug (applies mainly
+#              to non-k8s cases)
+#      cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
+#              based cold plug.
+pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
--- a/src/runtime/go.mod
+++ b/src/runtime/go.mod
@@ -1,7 +1,7 @@
 module github.com/kata-containers/kata-containers/src/runtime

 // Keep in sync with version in versions.yaml
-go 1.24.9
+go 1.24.11

 // WARNING: Do NOT use `replace` directives as those break dependabot:
 // https://github.com/kata-containers/kata-containers/issues/11020
@@ -20,8 +20,8 @@ require (
 	github.com/containerd/fifo v1.1.0
 	github.com/containerd/ttrpc v1.2.7
 	github.com/containerd/typeurl/v2 v2.2.3
-	github.com/containernetworking/plugins v1.7.1
-	github.com/coreos/go-systemd/v22 v22.5.1-0.20231103132048-7d375ecc2b09
+	github.com/containernetworking/plugins v1.9.0
+	github.com/coreos/go-systemd/v22 v22.6.0
 	github.com/cri-o/cri-o v1.34.0
 	github.com/docker/go-units v0.5.0
 	github.com/fsnotify/fsnotify v1.9.0
@@ -46,11 +46,11 @@ require (
 	github.com/prometheus/client_model v0.6.1
 	github.com/prometheus/common v0.62.0
 	github.com/prometheus/procfs v0.15.1
-	github.com/safchain/ethtool v0.5.10
+	github.com/safchain/ethtool v0.6.2
 	github.com/sirupsen/logrus v1.9.3
 	github.com/stretchr/testify v1.11.1
 	github.com/urfave/cli v1.22.15
-	github.com/vishvananda/netlink v1.3.1-0.20250303224720-0e7078ed04c8
+	github.com/vishvananda/netlink v1.3.1
 	github.com/vishvananda/netns v0.0.5
 	gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20220601114329-47893b162965
 	go.opentelemetry.io/otel v1.35.0
@@ -58,11 +58,12 @@ require (
 	go.opentelemetry.io/otel/sdk v1.35.0
 	go.opentelemetry.io/otel/trace v1.35.0
 	golang.org/x/oauth2 v0.30.0
-	golang.org/x/sys v0.34.0
+	golang.org/x/sys v0.35.0
 	google.golang.org/grpc v1.72.0
-	google.golang.org/protobuf v1.36.6
+	google.golang.org/protobuf v1.36.7
 	k8s.io/apimachinery v0.33.0
 	k8s.io/cri-api v0.33.0
+	k8s.io/kubelet v0.33.0
 	tags.cncf.io/container-device-interface v1.0.1
 )

@@ -71,7 +72,7 @@ require (
 	github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect
 	github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 // indirect
 	github.com/Microsoft/go-winio v0.6.2 // indirect
-	github.com/Microsoft/hcsshim v0.12.9 // indirect
+	github.com/Microsoft/hcsshim v0.13.0 // indirect
 	github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
@@ -91,7 +92,7 @@ require (
 	github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
-	github.com/go-logr/logr v1.4.2 // indirect
+	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-openapi/analysis v0.23.0 // indirect
 	github.com/go-openapi/jsonpointer v0.21.0 // indirect
@@ -131,9 +132,9 @@ require (
 	go.opentelemetry.io/otel/metric v1.35.0 // indirect
 	golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect
 	golang.org/x/mod v0.26.0 // indirect
-	golang.org/x/net v0.42.0 // indirect
+	golang.org/x/net v0.43.0 // indirect
 	golang.org/x/sync v0.16.0 // indirect
-	golang.org/x/text v0.27.0 // indirect
+	golang.org/x/text v0.28.0 // indirect
 	google.golang.org/genproto v0.0.0-20250303144028-a0af3efb3deb // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20250313205543-e70fdf4c4cb4 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
--- a/src/runtime/go.sum
+++ b/src/runtime/go.sum
@@ -11,10 +11,12 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
 github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
 github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg=
 github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
+github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
+github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
 github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
 github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
-github.com/Microsoft/hcsshim v0.12.9 h1:2zJy5KA+l0loz1HzEGqyNnjd3fyZA31ZBCGKacp6lLg=
-github.com/Microsoft/hcsshim v0.12.9/go.mod h1:fJ0gkFAna6ukt0bLdKB8djt4XIJhF/vEPuoIWYVvZ8Y=
+github.com/Microsoft/hcsshim v0.13.0 h1:/BcXOiS6Qi7N9XqUcv27vkIuVOkBEcWstd2pMlWSeaA=
+github.com/Microsoft/hcsshim v0.13.0/go.mod h1:9KWJ/8DgU+QzYGupX4tzMhRQE8h6w90lH6HAaclpEok=
 github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so=
 github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -64,10 +66,10 @@ github.com/containerd/typeurl/v2 v2.2.3 h1:yNA/94zxWdvYACdYO8zofhrTVuQY73fFU1y++
 github.com/containerd/typeurl/v2 v2.2.3/go.mod h1:95ljDnPfD3bAbDJRugOiShd/DlAAsxGtUBhJxIn7SCk=
 github.com/containernetworking/cni v1.3.0 h1:v6EpN8RznAZj9765HhXQrtXgX+ECGebEYEmnuFjskwo=
 github.com/containernetworking/cni v1.3.0/go.mod h1:Bs8glZjjFfGPHMw6hQu82RUgEPNGEaBb9KS5KtNMnJ4=
-github.com/containernetworking/plugins v1.7.1 h1:CNAR0jviDj6FS5Vg85NTgKWLDzZPfi/lj+VJfhMDTIs=
-github.com/containernetworking/plugins v1.7.1/go.mod h1:xuMdjuio+a1oVQsHKjr/mgzuZ24leAsqUYRnzGoXHy0=
-github.com/coreos/go-systemd/v22 v22.5.1-0.20231103132048-7d375ecc2b09 h1:OoRAFlvDGCUqDLampLQjk0yeeSGdF9zzst/3G9IkBbc=
-github.com/coreos/go-systemd/v22 v22.5.1-0.20231103132048-7d375ecc2b09/go.mod h1:m2r/smMKsKwgMSAoFKHaa68ImdCSNuKE1MxvQ64xuCQ=
+github.com/containernetworking/plugins v1.9.0 h1:Mg3SXBdRGkdXyFC4lcwr6u2ZB2SDeL6LC3U+QrEANuQ=
+github.com/containernetworking/plugins v1.9.0/go.mod h1:JG3BxoJifxxHBhG3hFyxyhid7JgRVBu/wtooGEvWf1c=
+github.com/coreos/go-systemd/v22 v22.6.0 h1:aGVa/v8B7hpb0TKl0MWoAavPDmHvobFe5R5zn0bCJWo=
+github.com/coreos/go-systemd/v22 v22.6.0/go.mod h1:iG+pp635Fo7ZmV/j14KUcmEyWF+0X7Lua8rrTWzYgWU=
 github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0=
 github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
@@ -100,8 +102,8 @@ github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXE
 github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A=
 github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
-github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
-github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-openapi/analysis v0.23.0 h1:aGday7OWupfMs+LbmLZG4k0MYXIANxcuBTYUC03zFCU=
@@ -130,7 +132,6 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0 h1:p104kn46Q8Wd
 github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
 github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
 github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
-github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/godbus/dbus/v5 v5.1.1-0.20230522191255-76236955d466 h1:sQspH8M4niEijh3PFscJRLDnkL547IeP7kpPe3uUhEg=
 github.com/godbus/dbus/v5 v5.1.1-0.20230522191255-76236955d466/go.mod h1:ZiQxhyQ+bbbfxUKVvjfO498oPYvtYhZzycal3G/NHmU=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
@@ -165,8 +166,8 @@ github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
+github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6 h1:EEHtgt9IwisQ2AZ4pIsMjahcegHh6rmhqxzIRQIyepY=
+github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U=
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -229,13 +230,13 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W
 github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
 github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc=
 github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0=
-github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus=
-github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
+github.com/onsi/ginkgo/v2 v2.25.1 h1:Fwp6crTREKM+oA6Cz4MsO8RhKQzs2/gOIVOUscMAfZY=
+github.com/onsi/ginkgo/v2 v2.25.1/go.mod h1:ppTWQ1dh9KM/F1XgpeRqelR+zHVwV81DGRSDnFxK7Sk=
 github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
 github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
 github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
-github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y=
-github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0=
+github.com/onsi/gomega v1.38.1 h1:FaLA8GlcpXDwsb7m0h2A9ew2aTk3vnZMlzFgg5tz/pk=
+github.com/onsi/gomega v1.38.1/go.mod h1:LfcV8wZLvwcYRwPiJysphKAEsmcFnLMK/9c+PjvlX8g=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
@@ -270,8 +271,8 @@ github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR
 github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/safchain/ethtool v0.5.10 h1:Im294gZtuf4pSGJRAOGKaASNi3wMeFaGaWuSaomedpc=
-github.com/safchain/ethtool v0.5.10/go.mod h1:w9jh2Lx7YBR4UwzLkzCmWl85UY0W2uZdd7/DckVE5+c=
+github.com/safchain/ethtool v0.6.2 h1:O3ZPFAKEUEfbtE6J/feEe2Ft7dIJ2Sy8t4SdMRiIMHY=
+github.com/safchain/ethtool v0.6.2/go.mod h1:VS7cn+bP3Px3rIq55xImBiZGHVLNyBh5dqG6dDQy8+I=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
@@ -295,9 +296,8 @@ github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtse
 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
 github.com/urfave/cli v1.22.15 h1:nuqt+pdC/KqswQKhETJjo7pvn/k4xMUxgW6liI7XpnM=
 github.com/urfave/cli v1.22.15/go.mod h1:wSan1hmo5zeyLGBjRJbzRTNk8gwoYa2B9n4q9dmRIc0=
-github.com/vishvananda/netlink v1.3.1-0.20250303224720-0e7078ed04c8 h1:Y4egeTrP7sccowz2GWTJVtHlwkZippgBTpUmMteFUWQ=
-github.com/vishvananda/netlink v1.3.1-0.20250303224720-0e7078ed04c8/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
-github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
+github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
+github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
 github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
 github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
 github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
@@ -339,6 +339,8 @@ go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
 go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
 go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
@@ -364,8 +366,8 @@ golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
-golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
-golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=
+golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
+golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
 golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
@@ -395,15 +397,14 @@ golang.org/x/sys v0.0.0-20220817070843-5a390386f1f2/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
-golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
+golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=
-golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=
+golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
+golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
@@ -413,8 +414,8 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo=
-golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg=
+golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
+golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -446,8 +447,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
-google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
+google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A=
+google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
@@ -469,6 +470,8 @@ k8s.io/apimachinery v0.33.0 h1:1a6kHrJxb2hs4t8EE5wuR/WxKDwGN1FKH3JvDtA0CIQ=
 k8s.io/apimachinery v0.33.0/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM=
 k8s.io/cri-api v0.33.0 h1:YyGNgWmuSREqFPlP3XCstlHLilYdW898KwtKoaTYwBs=
 k8s.io/cri-api v0.33.0/go.mod h1:OLQvT45OpIA+tv91ZrpuFIGY+Y2Ho23poS7n115Aocs=
+k8s.io/kubelet v0.33.0 h1:4pJA2Ge6Rp0kDNV76KH7pTBiaV2T1a1874QHMcubuSU=
+k8s.io/kubelet v0.33.0/go.mod h1:iDnxbJQMy9DUNaML5L/WUlt3uJtNLWh7ZAe0JSp4Yi0=
 sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
 sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
 sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
--- a/src/runtime/pkg/containerd-shim-v2/create.go
+++ b/src/runtime/pkg/containerd-shim-v2/create.go
@@ -23,7 +23,6 @@ import (
 	containerd_types "github.com/containerd/containerd/api/types"
 	"github.com/containerd/containerd/mount"
 	"github.com/containerd/typeurl/v2"
-	"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
@@ -113,19 +112,12 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
 		if s.sandbox != nil {
 			return nil, fmt.Errorf("cannot create another sandbox in sandbox: %s", s.sandbox.ID())
 		}
-		// Here we deal with CDI devices that are cold-plugged (k8s) and
-		// for the single_container (nerdctl, podman, ...) use-case.
-		// We can provide additional directories where to search for
-		// CDI specs if needed. immutable OS's only have specific
-		// directories where applications can write too. For instance /opt/cdi
-		//
-		// _, err = withCDI(ociSpec.Annotations, []string{"/opt/cdi"}, ociSpec)
-		_, err = config.WithCDI(ociSpec.Annotations, []string{}, ociSpec)
-		if err != nil {
-			return nil, fmt.Errorf("adding CDI devices failed: %w", err)
-		}

 		s.config = runtimeConfig
+		err = coldPlugDevices(ctx, s, ociSpec)
+		if err != nil {
+			return nil, fmt.Errorf("device cold plug failed: %w", err)
+		}

 		// create tracer
 		// This is the earliest location we can create the tracer because we must wait
--- a/src/runtime/pkg/containerd-shim-v2/device_cold_plug.go
+++ b/src/runtime/pkg/containerd-shim-v2/device_cold_plug.go
@@ -0,0 +1,146 @@
+// Copyright (c) 2025 NVIDIA CORPORATION.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+package containerdshim
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"strings"
+
+	"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+	podresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
+)
+
+const (
+	nameAnnotation      = "io.kubernetes.cri.sandbox-name"
+	namespaceAnnotation = "io.kubernetes.cri.sandbox-namespace"
+)
+
+// coldPlugDevices handles cold plug of CDI devices into the sandbox
+// kubelet's PodResources API is used for determining the devices to be
+// cold plugged, if so configured. Otherwise, cdi annotations can be used for
+// covering stand alone scenarios.
+func coldPlugDevices(ctx context.Context, s *service, ociSpec *specs.Spec) error {
+	if s.config.HypervisorConfig.ColdPlugVFIO == config.NoPort {
+		// device cold plug is not enabled
+		shimLog.Debug("cold_plug_vfio not enabled, skip device cold plug")
+		return nil
+	}
+
+	kubeletSock := s.config.PodResourceAPISock
+	if kubeletSock != "" {
+		return coldPlugWithAPI(ctx, s, ociSpec)
+	}
+
+	shimLog.Debug("config.PodResourceAPISock not set, skip k8s based device cold plug")
+
+	// Here we deal with CDI devices that are cold-plugged
+	// for the single_container (nerdctl, podman, ...) use-case.
+	// We can provide additional directories where to search for
+	// CDI specs if needed. immutable OS's only have specific
+	// directories where applications can write too. For instance /opt/cdi
+	_, err := config.WithCDI(ociSpec.Annotations, []string{}, ociSpec)
+	if err != nil {
+		return fmt.Errorf("CDI device injection failed: %w", err)
+	}
+	return nil
+}
+
+func coldPlugWithAPI(ctx context.Context, s *service, ociSpec *specs.Spec) error {
+	ann := ociSpec.Annotations
+	devices, err := getDeviceSpec(ctx, s.config.PodResourceAPISock, ann)
+	if err != nil {
+		return err
+	}
+
+	if len(devices) == 0 {
+		shimLog.WithField("pod", debugPodID(ann)).Debug("No devices found in Pod Resources, skip cold plug")
+		return nil
+	}
+
+	err = config.InjectCDIDevices(ociSpec, devices)
+	if err != nil {
+		return fmt.Errorf("cold plug: CDI device injection failed: %w", err)
+	}
+
+	return nil
+}
+
+// getDeviceSpec fetches the device information for the pod sandbox using
+// kubelet's pod resource api. This is necessary for cold plug because
+// the Kubelet does not pass the device information via CRI during
+// Sandbox creation.
+func getDeviceSpec(ctx context.Context, socket string, ann map[string]string) ([]string, error) {
+	podName := ann[nameAnnotation]
+	podNs := ann[namespaceAnnotation]
+
+	// create dialer for unix socket
+	dialer := func(ctx context.Context, target string) (net.Conn, error) {
+		// need this workaround to avoid duplicate prefix
+		addr := strings.TrimPrefix(target, "unix://")
+		return (&net.Dialer{}).DialContext(ctx, "unix", addr)
+	}
+
+	target := fmt.Sprintf("unix://%s", socket)
+	conn, err := grpc.NewClient(
+		target,
+		grpc.WithTransportCredentials(insecure.NewCredentials()),
+		grpc.WithContextDialer(dialer),
+		grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(16*1024*1024)),
+	)
+
+	if err != nil {
+		return nil, fmt.Errorf("cold plug: failed to connect to kubelet: %w", err)
+	}
+	defer conn.Close()
+
+	// create client
+	client := podresourcesv1.NewPodResourcesListerClient(conn)
+
+	// get all pod resources
+	prr := &podresourcesv1.GetPodResourcesRequest{
+		PodName:      podName,
+		PodNamespace: podNs,
+	}
+	resp, err := client.Get(ctx, prr)
+	if err != nil {
+		return nil, fmt.Errorf("cold plug: GetPodResources failed: %w", err)
+	}
+	podRes := resp.PodResources
+	if podRes == nil {
+		return nil, fmt.Errorf("cold plug: PodResources is nil")
+	}
+
+	// Process results
+	var devices []string
+	for _, container := range podRes.Containers {
+		for _, d := range container.Devices {
+			shimLog.WithField("container", container.Name).Debugf("Pod Resources Device: %s = %v\n",
+				d.ResourceName, d.DeviceIds)
+			cdiDevs := formatCDIDevIDs(d.ResourceName, d.DeviceIds)
+			devices = append(devices, cdiDevs...)
+		}
+	}
+
+	return devices, nil
+}
+
+// formatCDIDevIDs formats the way CDI package expects
+func formatCDIDevIDs(specName string, devIDs []string) []string {
+	var result []string
+	for _, id := range devIDs {
+		result = append(result, fmt.Sprintf("%s=%s", specName, id))
+	}
+	return result
+}
+
+func debugPodID(ann map[string]string) string {
+	return fmt.Sprintf("%s/%s", ann[namespaceAnnotation], ann[nameAnnotation])
+}
--- a/src/runtime/pkg/device/config/config.go
+++ b/src/runtime/pkg/device/config/config.go
@@ -15,6 +15,7 @@ import (

 	"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
 	"github.com/go-ini/ini"
+	"github.com/kata-containers/kata-containers/src/runtime/pkg/device"
 	vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
 	"github.com/opencontainers/runtime-spec/specs-go"
 	"golang.org/x/sys/unix"
@@ -431,6 +432,16 @@ type VFIODev struct {
 	HostPath string
 }

+// IOMMUFDID returns the IOMMUFD ID if the VFIO device is backed by IOMMUFD
+// otherwise returns an empty string.
+func (t VFIODev) IOMMUFDID() string {
+	if !strings.HasPrefix(t.DevfsDev, device.IommufdDevPath) {
+		return ""
+	}
+	basename := filepath.Base(t.DevfsDev)
+	return strings.TrimPrefix(basename, "vfio")
+}
+
 // RNGDev represents a random number generator device
 type RNGDev struct {
 	// ID is used to identify the device in the hypervisor options.
@@ -684,6 +695,25 @@ func WithCDI(annotations map[string]string, cdiSpecDirs []string, spec *specs.Sp
 		return spec, nil
 	}

+	if err = injectDevices(cdiSpecDirs, spec, devsFromAnnotations); err != nil {
+		return nil, err
+	}
+
+	// One crucial thing to keep in mind is that CDI device injection
+	// might add OCI Spec environment variables, hooks, and mounts as
+	// well. Therefore it is important that none of the corresponding
+	// OCI Spec fields are reset up in the call stack once we return.
+	return spec, nil
+}
+
+// InjectCDIDevices injects the specified devices into the oci spec.
+// Devices must be a slice of strings of the form
+// vendor.com/class=unique_name
+func InjectCDIDevices(spec *specs.Spec, devices []string) error {
+	return injectDevices(nil, spec, devices)
+}
+
+func injectDevices(cdiSpecDirs []string, spec *specs.Spec, devices []string) error {
 	var registry cdi.Registry
 	if len(cdiSpecDirs) > 0 {
 		// We can override the directories where to search for CDI specs
@@ -693,22 +723,13 @@ func WithCDI(annotations map[string]string, cdiSpecDirs []string, spec *specs.Sp
 		registry = cdi.GetRegistry()
 	}

-	if err = registry.Refresh(); err != nil {
-		// We don't consider registry refresh failure a fatal error.
-		// For instance, a dynamically generated invalid CDI Spec file for
-		// any particular vendor shouldn't prevent injection of devices of
-		// different vendors. CDI itself knows better and it will fail the
-		// injection if necessary.
-		return nil, fmt.Errorf("CDI registry refresh failed: %w", err)
+	if err := registry.Refresh(); err != nil {
+		return fmt.Errorf("CDI registry refresh failed: %w", err)
 	}

-	if _, err := registry.InjectDevices(spec, devsFromAnnotations...); err != nil {
-		return nil, fmt.Errorf("CDI device injection failed: %w", err)
+	if _, err := registry.InjectDevices(spec, devices...); err != nil {
+		return fmt.Errorf("CDI device injection failed: %w", err)
 	}

-	// One crucial thing to keep in mind is that CDI device injection
-	// might add OCI Spec environment variables, hooks, and mounts as
-	// well. Therefore it is important that none of the corresponding
-	// OCI Spec fields are reset up in the call stack once we return.
-	return spec, nil
+	return nil
 }
--- a/src/runtime/pkg/device/config/config_test.go
+++ b/src/runtime/pkg/device/config/config_test.go
@@ -68,3 +68,24 @@ func TestGetSysDevPathImpl(t *testing.T) {
 	assert.Contains(path, expectedFormat)
 	assert.Contains(path, "block")
 }
+
+func TestIOMMUFDID(t *testing.T) {
+	for _, tc := range []struct {
+		devfsDev string
+		expected string
+	}{
+		{"/dev/vfio/42", ""},
+		{"/dev/vfio/devices/vfio99", "99"},
+		{"/dev/vfio/invalid", ""},
+		{"/dev/other/42", ""},
+	} {
+		t.Run(tc.devfsDev, func(t *testing.T) {
+			assert := assert.New(t)
+
+			info := VFIODev{
+				DevfsDev: "/dev/vfio/devices/vfio5",
+			}
+			assert.Equal("5", info.IOMMUFDID())
+		})
+	}
+}
--- a/src/runtime/pkg/device/const.go
+++ b/src/runtime/pkg/device/const.go
@@ -0,0 +1,11 @@
+// Copyright (c) 2017-2018 Intel Corporation
+// Copyright (c) 2018 Huawei Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+package device
+
+const (
+	IommufdDevPath = "/dev/vfio/devices"
+)
--- a/src/runtime/pkg/device/drivers/utils.go
+++ b/src/runtime/pkg/device/drivers/utils.go
@@ -160,7 +160,7 @@ func checkIgnorePCIClass(pciClass string, deviceBDF string, bitmask uint64) (boo
 	return false, nil
 }

-func getMajorMinorFromDevPath(devPath string) (uint32, uint32, error) {
+func GetMajorMinorFromDevPath(devPath string) (uint32, uint32, error) {
 	fi, err := os.Stat(devPath)
 	if err != nil {
 		return 0, 0, err
@@ -181,7 +181,7 @@ func extractIndex(devicePath string) (string, error) {
 	return strings.TrimPrefix(base, prefix), nil
 }

-func getBdfFromVFIODev(major uint32, minor uint32) (string, error) {
+func GetBDFFromVFIODev(major uint32, minor uint32) (string, error) {
 	devPath := fmt.Sprintf("/sys/dev/char/%d:%d", major, minor)
 	realPath, err := filepath.EvalSymlinks(devPath)
 	if err != nil {
@@ -203,13 +203,13 @@ func GetDeviceFromVFIODev(device config.DeviceInfo) ([]*config.VFIODev, error) {
 	// device major:minor entries in /sys/chart/major:minor
 	// $ ls -l /dev/vfio/devices/vfio0
 	// crw------- 1 root root 237, 0 Jan 15 16:53 /dev/vfio/devices/vfio0
-	major, minor, err := getMajorMinorFromDevPath(device.HostPath)
+	major, minor, err := GetMajorMinorFromDevPath(device.HostPath)
 	if err != nil {
 		return nil, fmt.Errorf("Failed to get major:minor from %s: %v", device.HostPath, err)
 	}
 	// $ ls -l /sys/dev/char/237:0
 	// /sys/dev/char/237:0 -> ../../devices/pci0000:64/0000:64:00.0/0000:65:00.0/vfio-dev/vfio0
-	deviceBDF, err := getBdfFromVFIODev(major, minor)
+	deviceBDF, err := GetBDFFromVFIODev(major, minor)
 	if err != nil {
 		return nil, err
 	}
--- a/src/runtime/pkg/device/drivers/vfio.go
+++ b/src/runtime/pkg/device/drivers/vfio.go
@@ -15,6 +15,7 @@ import (

 	"github.com/sirupsen/logrus"

+	pkgDevice "github.com/kata-containers/kata-containers/src/runtime/pkg/device"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/device/api"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
@@ -28,7 +29,6 @@ const (
 	iommuGroupPath        = "/sys/bus/pci/devices/%s/iommu_group"
 	vfioDevPath           = "/dev/vfio/%s"
 	vfioAPSysfsDir        = "/sys/devices/vfio_ap"
-	IommufdDevPath        = "/dev/vfio/devices"
 )

 // VFIODevice is a vfio device meant to be passed to the hypervisor
@@ -69,7 +69,7 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
 	// In the case of IOMMUFD the device.HostPath will look like
 	// /dev/vfio/devices/vfio0
 	// (1) Check if we have the new IOMMUFD or old container based VFIO
-	if strings.HasPrefix(device.DeviceInfo.HostPath, IommufdDevPath) {
+	if strings.HasPrefix(device.DeviceInfo.HostPath, pkgDevice.IommufdDevPath) {
 		device.VfioDevs, err = GetDeviceFromVFIODev(*device.DeviceInfo)
 		if err != nil {
 			return err
--- a/src/runtime/pkg/govmm/qemu/qemu.go
+++ b/src/runtime/pkg/govmm/qemu/qemu.go
@@ -27,7 +27,7 @@ import (
 	"strings"
 	"syscall"

-	"github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers"
+	pkgDevice "github.com/kata-containers/kata-containers/src/runtime/pkg/device"
 )

 // Machine describes the machine type qemu will emulate.
@@ -2023,7 +2023,7 @@ func (vfioDev VFIODevice) QemuParams(config *Config) []string {
 		deviceParams = append(deviceParams, fmt.Sprintf("devno=%s", vfioDev.DevNo))
 	}

-	if strings.HasPrefix(vfioDev.DevfsDev, drivers.IommufdDevPath) {
+	if strings.HasPrefix(vfioDev.DevfsDev, pkgDevice.IommufdDevPath) {
 		qemuParams = append(qemuParams, "-object")
 		qemuParams = append(qemuParams, fmt.Sprintf("iommufd,id=iommufd%s", vfioDev.ID))
 		deviceParams = append(deviceParams, fmt.Sprintf("iommufd=iommufd%s", vfioDev.ID))
--- a/src/runtime/pkg/govmm/qemu/qmp.go
+++ b/src/runtime/pkg/govmm/qemu/qmp.go
@@ -43,6 +43,10 @@ type QMPLog interface {
 	// Errorf writes error output to the log.  A newline will be
 	// added to the output if one is not provided.
 	Errorf(string, ...interface{})
+
+	// Debugf writes debug output to the log.  A newline will be
+	// added to the output if one is not provided.
+	Debugf(string, ...interface{})
 }

 type qmpNullLogger struct{}
@@ -60,6 +64,9 @@ func (l qmpNullLogger) Warningf(format string, v ...interface{}) {
 func (l qmpNullLogger) Errorf(format string, v ...interface{}) {
 }

+func (l qmpNullLogger) Debugf(format string, v ...interface{}) {
+}
+
 // QMPConfig is a configuration structure that can be used to specify a
 // logger and a channel to which logs and  QMP events are to be sent.  If
 // neither of these fields are specified, or are set to nil, no logs will be
@@ -653,6 +660,7 @@ func (q *QMP) executeCommandWithResponse(ctx context.Context, name string, args

 func (q *QMP) executeCommand(ctx context.Context, name string, args map[string]interface{},
 	filter *qmpEventFilter) error {
+	q.cfg.Logger.Debugf("Executing QMP command: %s: %v", name, args)

 	_, err := q.executeCommandWithResponse(ctx, name, args, nil, filter)
 	return err
@@ -1101,7 +1109,7 @@ func (q *QMP) ExecuteDeviceDel(ctx context.Context, devID string) error {
 // disableModern indicates if virtio version 1.0 should be replaced by the
 // former version 0.9, as there is a KVM bug that occurs when using virtio
 // 1.0 in nested environments.
-func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver, addr, bus, romfile string, queues int, shared, disableModern bool) error {
+func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver, addr, bus, romfile string, queues int, shared, disableModern bool, iothreadID string) error {
 	args := map[string]interface{}{
 		"id":     devID,
 		"driver": driver,
@@ -1128,6 +1136,10 @@ func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver
 		}
 	}

+	if iothreadID != "" {
+		args["iothread"] = iothreadID
+	}
+
 	return q.executeCommand(ctx, "device_add", args, nil)
 }

@@ -1156,7 +1168,8 @@ func (q *QMP) ExecutePCIVhostUserDevAdd(ctx context.Context, driver, devID, char
 // devID is the id of the device to add. Must be valid QMP identifier.
 // bdf is the PCI bus-device-function of the pci device.
 // bus is optional. When hot plugging a PCIe device, the bus can be the ID of the pcie-root-port.
-func (q *QMP) ExecuteVFIODeviceAdd(ctx context.Context, devID, bdf, bus, romfile string) error {
+// iommufdID is the ID of the iommufd object to be created for this device. If empty, no iommufd object will be created.
+func (q *QMP) ExecuteVFIODeviceAdd(ctx context.Context, devID, bdf, bus, romfile string, iommufdID string) error {
 	var driver string
 	var transport VirtioTransport

@@ -1175,6 +1188,17 @@ func (q *QMP) ExecuteVFIODeviceAdd(ctx context.Context, devID, bdf, bus, romfile
 	if bus != "" {
 		args["bus"] = bus
 	}
+	if iommufdID != "" {
+		iommufdIDFull := "iommufd" + iommufdID
+		objectAddArgs := map[string]interface{}{
+			"qom-type": "iommufd",
+			"id":       iommufdIDFull,
+		}
+		if err := q.executeCommand(ctx, "object-add", objectAddArgs, nil); err != nil {
+			return err
+		}
+		args["iommufd"] = iommufdIDFull
+	}
 	return q.executeCommand(ctx, "device_add", args, nil)
 }

--- a/src/runtime/pkg/govmm/qemu/qmp_test.go
+++ b/src/runtime/pkg/govmm/qemu/qmp_test.go
@@ -50,6 +50,10 @@ func (l qmpTestLogger) Errorf(format string, v ...interface{}) {
 	l.Infof(format, v...)
 }

+func (l qmpTestLogger) Debugf(format string, v ...interface{}) {
+	l.Infof(format, v...)
+}
+
 // nolint: govet
 type qmpTestCommand struct {
 	name string
@@ -1066,7 +1070,7 @@ func TestQMPPCIDeviceAdd(t *testing.T) {
 	blockdevID := fmt.Sprintf("drive_%s", volumeUUID)
 	devID := fmt.Sprintf("device_%s", volumeUUID)
 	err := q.ExecutePCIDeviceAdd(context.Background(), blockdevID, devID,
-		"virtio-blk-pci", "0x1", "", "", 1, true, false)
+		"virtio-blk-pci", "0x1", "", "", 1, true, false, "")
 	if err != nil {
 		t.Fatalf("Unexpected error %v", err)
 	}
@@ -1136,6 +1140,51 @@ func TestQMPAPVFIOMediatedDeviceAdd(t *testing.T) {
 	<-disconnectedCh
 }

+func TestExecuteVFIODeviceAdd(t *testing.T) {
+	bdf := "04:00.0"
+	romfile := ""
+
+	for _, tc := range []struct {
+		name      string
+		iommufdID string
+	}{
+		{
+			name:      "with IOMMUFD",
+			iommufdID: "0",
+		},
+		{
+			name:      "without IOMMUFD",
+			iommufdID: "",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			connectedCh := make(chan *QMPVersion)
+			disconnectedCh := make(chan struct{})
+
+			buf := newQMPTestCommandBuffer(t)
+
+			// Note: At the time of writing, the QMPTestCommandBuffer does not
+			// support verifying parameters passed to object-add and device_add.
+			// So we just verify that the commands are sent in the correct order.
+			if tc.iommufdID != "" {
+				buf.AddCommand("object-add", nil, "return", nil)
+			}
+			buf.AddCommand("device_add", nil, "return", nil)
+
+			cfg := QMPConfig{Logger: qmpTestLogger{}}
+			q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh)
+			checkVersion(t, connectedCh)
+
+			err := q.ExecuteVFIODeviceAdd(context.Background(), "devID", bdf, "rp1", romfile, tc.iommufdID)
+			if err != nil {
+				t.Fatalf("Unexpected error %v", err)
+			}
+			q.Shutdown()
+			<-disconnectedCh
+		})
+	}
+}
+
 // Checks that CPU are correctly added using device_add
 func TestQMPCPUDeviceAdd(t *testing.T) {
 	drivers := []string{"host-x86_64-cpu", "host-s390x-cpu", "host-powerpc64-cpu"}
--- a/src/runtime/pkg/katatestutils/utils.go
+++ b/src/runtime/pkg/katatestutils/utils.go
@@ -207,41 +207,42 @@ const (
 )

 type RuntimeConfigOptions struct {
-	Hypervisor           string
-	HypervisorPath       string
-	DefaultGuestHookPath string
-	KernelPath           string
-	ImagePath            string
-	RootfsType           string
-	KernelParams         string
-	MachineType          string
-	LogPath              string
-	BlockDeviceDriver    string
-	BlockDeviceAIO       string
-	SharedFS             string
-	VirtioFSDaemon       string
-	JaegerEndpoint       string
-	JaegerUser           string
-	JaegerPassword       string
-	PFlash               []string
-	HotPlugVFIO          config.PCIePort
-	ColdPlugVFIO         config.PCIePort
-	PCIeRootPort         uint32
-	PCIeSwitchPort       uint32
-	DefaultVCPUCount     uint32
-	DefaultMaxVCPUCount  uint32
-	DefaultMemSize       uint32
-	DefaultMaxMemorySize uint64
-	DefaultMsize9p       uint32
-	DisableBlock         bool
-	EnableIOThreads      bool
-	DisableNewNetNs      bool
-	HypervisorDebug      bool
-	RuntimeDebug         bool
-	RuntimeTrace         bool
-	AgentDebug           bool
-	AgentTrace           bool
-	EnablePprof          bool
+	Hypervisor            string
+	HypervisorPath        string
+	DefaultGuestHookPath  string
+	KernelPath            string
+	ImagePath             string
+	RootfsType            string
+	KernelParams          string
+	MachineType           string
+	LogPath               string
+	BlockDeviceDriver     string
+	BlockDeviceAIO        string
+	SharedFS              string
+	VirtioFSDaemon        string
+	JaegerEndpoint        string
+	JaegerUser            string
+	JaegerPassword        string
+	PFlash                []string
+	HotPlugVFIO           config.PCIePort
+	ColdPlugVFIO          config.PCIePort
+	PCIeRootPort          uint32
+	PCIeSwitchPort        uint32
+	DefaultVCPUCount      uint32
+	DefaultMaxVCPUCount   uint32
+	DefaultMemSize        uint32
+	DefaultMaxMemorySize  uint64
+	DefaultMsize9p        uint32
+	DefaultIndepIOThreads uint32
+	DisableBlock          bool
+	EnableIOThreads       bool
+	DisableNewNetNs       bool
+	HypervisorDebug       bool
+	RuntimeDebug          bool
+	RuntimeTrace          bool
+	AgentDebug            bool
+	AgentTrace            bool
+	EnablePprof           bool
 }

 // ContainerIDTestDataType is a type used to test Container and Sandbox ID's.
@@ -318,6 +319,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
 	default_memory = ` + strconv.FormatUint(uint64(config.DefaultMemSize), 10) + `
 	disable_block_device_use =  ` + strconv.FormatBool(config.DisableBlock) + `
 	enable_iothreads =  ` + strconv.FormatBool(config.EnableIOThreads) + `
+	indep_iothreads = ` + strconv.FormatUint(uint64(config.DefaultIndepIOThreads), 10) + `
 	cold_plug_vfio =  "` + config.ColdPlugVFIO.String() + `"
 	hot_plug_vfio =   "` + config.HotPlugVFIO.String() + `"
 	pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `
--- a/src/runtime/pkg/katautils/config-settings.go.in
+++ b/src/runtime/pkg/katautils/config-settings.go.in
@@ -74,6 +74,7 @@ const defaultBlockDeviceCacheSet bool = false
 const defaultBlockDeviceCacheDirect bool = false
 const defaultBlockDeviceCacheNoflush bool = false
 const defaultEnableIOThreads bool = false
+const defaultIndepIOThreads uint32 = 0
 const defaultEnableMemPrealloc bool = false
 const defaultEnableReclaimGuestFreedMemory bool = false
 const defaultEnableHugePages bool = false
--- a/src/runtime/pkg/katautils/config.go
+++ b/src/runtime/pkg/katautils/config.go
@@ -157,6 +157,7 @@ type hypervisor struct {
 	Debug                          bool                      `toml:"enable_debug"`
 	DisableNestingChecks           bool                      `toml:"disable_nesting_checks"`
 	EnableIOThreads                bool                      `toml:"enable_iothreads"`
+	IndepIOThreads                 uint32                    `toml:"indep_iothreads"`
 	DisableImageNvdimm             bool                      `toml:"disable_image_nvdimm"`
 	HotPlugVFIO                    config.PCIePort           `toml:"hot_plug_vfio"`
 	ColdPlugVFIO                   config.PCIePort           `toml:"cold_plug_vfio"`
@@ -196,6 +197,7 @@ type runtime struct {
 	CreateContainerTimeout    uint64   `toml:"create_container_timeout"`
 	DanConf                   string   `toml:"dan_conf"`
 	ForceGuestPull            bool     `toml:"experimental_force_guest_pull"`
+	PodResourceAPISock        string   `toml:"pod_resource_api_sock"`
 }

 type agent struct {
@@ -622,6 +624,14 @@ func (h hypervisor) msize9p() uint32 {
 	return h.Msize9p
 }

+func (h hypervisor) indepiothreads() uint32 {
+	if h.IndepIOThreads == 0 {
+		return defaultIndepIOThreads
+	}
+
+	return h.IndepIOThreads
+}
+
 func (h hypervisor) guestHookPath() string {
 	if h.GuestHookPath == "" {
 		return defaultGuestHookPath
@@ -818,6 +828,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		DisableNestingChecks:  h.DisableNestingChecks,
 		BlockDeviceDriver:     blockDriver,
 		EnableIOThreads:       h.EnableIOThreads,
+		IndepIOThreads:        h.indepiothreads(),
 		DisableVhostNet:       true, // vhost-net backend is not supported in Firecracker
 		GuestHookPath:         h.guestHookPath(),
 		RxRateLimiterMaxRate:  rxRateLimiterMaxRate,
@@ -973,6 +984,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		BlockDeviceCacheDirect:   h.BlockDeviceCacheDirect,
 		BlockDeviceCacheNoflush:  h.BlockDeviceCacheNoflush,
 		EnableIOThreads:          h.EnableIOThreads,
+		IndepIOThreads:           h.indepiothreads(),
 		Msize9p:                  h.msize9p(),
 		DisableImageNvdimm:       h.DisableImageNvdimm,
 		HotPlugVFIO:              h.hotPlugVFIO(),
@@ -1105,6 +1117,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		BlockDeviceCacheSet:            h.BlockDeviceCacheSet,
 		BlockDeviceCacheDirect:         h.BlockDeviceCacheDirect,
 		EnableIOThreads:                h.EnableIOThreads,
+		IndepIOThreads:                 h.indepiothreads(),
 		Msize9p:                        h.msize9p(),
 		DisableImageNvdimm:             h.DisableImageNvdimm,
 		ColdPlugVFIO:                   h.coldPlugVFIO(),
@@ -1464,6 +1477,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
 		BlockDeviceCacheDirect:   defaultBlockDeviceCacheDirect,
 		BlockDeviceCacheNoflush:  defaultBlockDeviceCacheNoflush,
 		EnableIOThreads:          defaultEnableIOThreads,
+		IndepIOThreads:           defaultIndepIOThreads,
 		Msize9p:                  defaultMsize9p,
 		ColdPlugVFIO:             defaultColdPlugVFIO,
 		HotPlugVFIO:              defaultHotPlugVFIO,
@@ -1602,6 +1616,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
 	}

 	config.ForceGuestPull = tomlConf.Runtime.ForceGuestPull
+	config.PodResourceAPISock = tomlConf.Runtime.PodResourceAPISock

 	return resolved, config, nil
 }
--- a/src/runtime/pkg/oci/utils.go
+++ b/src/runtime/pkg/oci/utils.go
@@ -174,6 +174,25 @@ type RuntimeConfig struct {

 	// ForceGuestPull enforces guest pull independent of snapshotter annotations.
 	ForceGuestPull bool
+
+	// PodResourceAPISock specifies the unix socket for the Kubelet's
+	// PodResource API endpoint. If empty, kubernetes based cold plug
+	// will not be attempted. In order for this feature to work, the
+	// KubeletPodResourcesGet featureGate must be enabled in Kubelet,
+	// if using Kubelet older than 1.34.
+	//
+	// The pod resource API's socket is relative to the Kubelet's root-dir,
+	// which is defined by the cluster admin, and its location is:
+	// ${KubeletRootDir}/pod-resources/kubelet.sock
+	//
+	// HypervisorConfig.ColdPlugVFIO acts as a feature gate:
+	// 	ColdPlugVFIO = NoPort => no cold plug
+	//	ColdPlugVFIO != NoPort AND PodResourceAPISock = "" => need
+	//		explicit CDI annotation for cold plug (applies mainly
+	//		to non-k8s cases)
+	//	ColdPlugVFIO != NoPort AND PodResourceAPISock != "" => kubelet
+	//		based cold plug.
+	PodResourceAPISock string
 }

 // AddKernelParam allows the addition of new kernel parameters to an existing
@@ -596,7 +615,20 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru
 	if value, ok := ocispec.Annotations[vcAnnotations.KernelParams]; ok {
 		if value != "" {
 			params := vc.DeserializeParams(strings.Fields(value))
+
+			// Annotation parameters should replace existing parameters with the same key
+			// rather than append, to allow overriding default values
 			for _, param := range params {
+				// Remove any existing parameter with the same key
+				var newParams []vc.Param
+				for _, existingParam := range config.HypervisorConfig.KernelParams {
+					if existingParam.Key != param.Key {
+						newParams = append(newParams, existingParam)
+					}
+				}
+				config.HypervisorConfig.KernelParams = newParams
+
+				// Now add the annotation parameter
 				if err := config.HypervisorConfig.AddKernelParam(param); err != nil {
 					return fmt.Errorf("Error adding kernel parameters in annotation kernel_params : %v", err)
 				}
@@ -840,6 +872,17 @@ func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig)
 		return err
 	}

+	if err := newAnnotationConfiguration(ocispec, vcAnnotations.IndepIOThreads).setUintWithCheck(func(indepiothreads uint64) error {
+		// Default indepiothreads limit is less than 50.
+		if indepiothreads == 0 || indepiothreads > 50 {
+			return fmt.Errorf("Error parsing annotation for indepiothreads, please specify numeric value less than 50")
+		}
+		sbConfig.HypervisorConfig.IndepIOThreads = uint32(indepiothreads)
+		return nil
+	}); err != nil {
+		return err
+	}
+
 	if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheSet).setBool(func(blockDeviceCacheSet bool) {
 		sbConfig.HypervisorConfig.BlockDeviceCacheSet = blockDeviceCacheSet
 	}); err != nil {
--- a/src/runtime/vendor/github.com/Microsoft/hcsshim/.clang-format
+++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/.clang-format
@@ -0,0 +1,12 @@
+Language: Cpp
+BasedOnStyle: Microsoft
+BreakBeforeBraces: Attach
+PointerAlignment: Left
+AllowShortFunctionsOnASingleLine: All
+# match Go style
+IndentCaseLabels: false
+# don't break comments over line limit (needed for CodeQL exceptions)
+ReflowComments: false
+InsertNewlineAtEOF: true
+KeepEmptyLines:
+  AtEndOfFile: true
--- a/src/runtime/vendor/github.com/Microsoft/hcsshim/.golangci.yml
+++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/.golangci.yml
@@ -5,9 +5,6 @@ run:
    - admin
    - functional
    - integration
-  skip-dirs:
-    # paths are relative to module root
-    - cri-containerd/test-images

 linters:
  enable:
@@ -34,13 +31,15 @@ linters-settings:
      # struct order is often for Win32 compat
      # also, ignore pointer bytes/GC issues for now until performance becomes an issue
      - fieldalignment
-    check-shadowing: true

  stylecheck:
    # https://staticcheck.io/docs/checks
    checks: ["all"]

 issues:
+  exclude-dirs:
+    # paths are relative to module root
+    - cri-containerd/test-images
  exclude-rules:
    # err is very often shadowed in nested scopes
    - linters:
@@ -70,22 +69,22 @@ issues:
    - path: layer.go
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: hcsshim.go
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: cmd\\ncproxy\\nodenetsvc\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: cmd\\ncproxy_mock\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: internal\\hcs\\schema2\\
      linters:
@@ -95,67 +94,67 @@ issues:
    - path: internal\\wclayer\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: hcn\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: internal\\hcs\\schema1\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: internal\\hns\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: ext4\\internal\\compactext4\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: ext4\\internal\\format\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: internal\\guestrequest\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: internal\\guest\\prot\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: internal\\windevice\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: internal\\winapi\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: internal\\vmcompute\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: internal\\regstate\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    - path: internal\\hcserror\\
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"

    # v0 APIs are deprecated, but still retained for backwards compatability
    - path: cmd\\ncproxy\\
@@ -171,4 +170,4 @@ issues:
    - path: internal\\vhdx\\info
      linters:
        - stylecheck
-      Text: "ST1003:"
+      text: "ST1003:"
--- a/src/runtime/vendor/github.com/Microsoft/hcsshim/Makefile
+++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/Makefile
@@ -1,13 +1,20 @@
-BASE:=base.tar.gz
-DEV_BUILD:=0
+include Makefile.bootfiles

 GO:=go
 GO_FLAGS:=-ldflags "-s -w" # strip Go binaries
 CGO_ENABLED:=0
 GOMODVENDOR:=
+KMOD:=0

 CFLAGS:=-O2 -Wall
-LDFLAGS:=-static -s # strip C binaries
+LDFLAGS:=-static -s #strip C binaries
+LDLIBS:=
+PREPROCESSORFLAGS:=
+ifeq "$(KMOD)" "1"
+LDFLAGS:= -s
+LDLIBS:= -lkmod
+PREPROCESSORFLAGS:=-DMODULES=1
+endif

 GO_FLAGS_EXTRA:=
 ifeq "$(GOMODVENDOR)" "1"
@@ -23,108 +30,14 @@ SRCROOT=$(dir $(abspath $(firstword $(MAKEFILE_LIST))))
 # additional directories to search for rule prerequisites and targets
 VPATH=$(SRCROOT)

-DELTA_TARGET=out/delta.tar.gz
-
-ifeq "$(DEV_BUILD)" "1"
-DELTA_TARGET=out/delta-dev.tar.gz
-endif
-
-ifeq "$(SNP_BUILD)" "1"
-DELTA_TARGET=out/delta-snp.tar.gz
-endif
-
 # The link aliases for gcstools
 GCS_TOOLS=\
 	generichook \
 	install-drivers

-# Common path prefix.
-PATH_PREFIX:=
-# These have PATH_PREFIX prepended to obtain the full path in recipies e.g. $(PATH_PREFIX)/$(VMGS_TOOL)
-VMGS_TOOL:=
-IGVM_TOOL:=
-KERNEL_PATH:=
-
-.PHONY: all always rootfs test snp simple
-
-.DEFAULT_GOAL := all
-
-all: out/initrd.img out/rootfs.tar.gz
-
-clean:
-	find -name '*.o' -print0 | xargs -0 -r rm
-	rm -rf bin deps rootfs out
-
 test:
 	cd $(SRCROOT) && $(GO) test -v ./internal/guest/...

-rootfs: out/rootfs.vhd
-
-snp: out/kernelinitrd.vmgs out/rootfs.hash.vhd out/rootfs.vhd out/v2056.vmgs
-
-simple: out/simple.vmgs snp
-
-%.vmgs: %.bin
-	rm -f $@
-	# du -BM returns the size of the bin file in M, eg 7M. The sed command replaces the M with *1024*1024 and then bc does the math to convert to bytes
-	$(PATH_PREFIX)/$(VMGS_TOOL) create --filepath $@ --filesize `du -BM $< | sed  "s/M.*/*1024*1024/" | bc`
-	$(PATH_PREFIX)/$(VMGS_TOOL) write --filepath $@ --datapath $< -i=8
-
-# Simplest debug UVM used to test changes to the linux kernel. No dmverity protection. Boots an initramdisk rather than directly booting a vhd disk.
-out/simple.bin: out/initrd.img $(PATH_PREFIX)/$(KERNEL_PATH) boot/startup_simple.sh
-	rm -f $@
-	python3 $(PATH_PREFIX)/$(IGVM_TOOL) -o $@ -kernel $(PATH_PREFIX)/$(KERNEL_PATH) -append "8250_core.nr_uarts=0 panic=-1 debug loglevel=7 rdinit=/startup_simple.sh" -rdinit out/initrd.img -vtl 0
-
-ROOTFS_DEVICE:=/dev/sda
-VERITY_DEVICE:=/dev/sdb
-# Debug build for use with uvmtester. UVM with dm-verity protected vhd disk mounted directly via the kernel command line. Ignores corruption in dm-verity protected disk. (Use dmesg to see if dm-verity is ignoring data corruption.)
-out/v2056.bin: out/rootfs.vhd out/rootfs.hash.vhd $(PATH_PREFIX)/$(KERNEL_PATH) out/rootfs.hash.datasectors out/rootfs.hash.datablocksize out/rootfs.hash.hashblocksize out/rootfs.hash.datablocks out/rootfs.hash.rootdigest out/rootfs.hash.salt boot/startup_v2056.sh
-	rm -f $@
-	python3 $(PATH_PREFIX)/$(IGVM_TOOL) -o $@ -kernel $(PATH_PREFIX)/$(KERNEL_PATH) -append "8250_core.nr_uarts=0 panic=-1 debug loglevel=7 root=/dev/dm-0 dm-mod.create=\"dmverity,,,ro,0 $(shell cat out/rootfs.hash.datasectors) verity 1 $(ROOTFS_DEVICE) $(VERITY_DEVICE) $(shell cat out/rootfs.hash.datablocksize) $(shell cat out/rootfs.hash.hashblocksize) $(shell cat out/rootfs.hash.datablocks) 0 sha256 $(shell cat out/rootfs.hash.rootdigest) $(shell cat out/rootfs.hash.salt) 1 ignore_corruption\" init=/startup_v2056.sh"  -vtl 0
-
-# Full UVM with dm-verity protected vhd disk mounted directly via the kernel command line.
-out/kernelinitrd.bin: out/rootfs.vhd out/rootfs.hash.vhd out/rootfs.hash.datasectors out/rootfs.hash.datablocksize out/rootfs.hash.hashblocksize out/rootfs.hash.datablocks out/rootfs.hash.rootdigest out/rootfs.hash.salt $(PATH_PREFIX)/$(KERNEL_PATH) boot/startup.sh
-	rm -f $@
-	python3 $(PATH_PREFIX)/$(IGVM_TOOL) -o $@ -kernel $(PATH_PREFIX)/$(KERNEL_PATH) -append "8250_core.nr_uarts=0 panic=-1 debug loglevel=7 root=/dev/dm-0 dm-mod.create=\"dmverity,,,ro,0 $(shell cat out/rootfs.hash.datasectors) verity 1 $(ROOTFS_DEVICE) $(VERITY_DEVICE) $(shell cat out/rootfs.hash.datablocksize) $(shell cat out/rootfs.hash.hashblocksize) $(shell cat out/rootfs.hash.datablocks) 0 sha256 $(shell cat out/rootfs.hash.rootdigest) $(shell cat out/rootfs.hash.salt)\" init=/startup.sh"  -vtl 0
-
-# Rule to make a vhd from a file. This is used to create the rootfs.hash.vhd from rootfs.hash.
-%.vhd: % bin/cmd/tar2ext4
-	./bin/cmd/tar2ext4 -only-vhd -i $< -o $@
-
-# Rule to make a vhd from an ext4 file. This is used to create the rootfs.vhd from rootfs.ext4.
-%.vhd: %.ext4 bin/cmd/tar2ext4
-	./bin/cmd/tar2ext4 -only-vhd -i $< -o $@
-
-%.hash %.hash.info %.hash.datablocks %.hash.rootdigest %hash.datablocksize %.hash.datasectors %.hash.hashblocksize: %.ext4 %.hash.salt
-	veritysetup format --no-superblock --salt $(shell cat out/rootfs.hash.salt) $< $*.hash > $*.hash.info
-    # Retrieve info required by dm-verity at boot time
-    # Get the blocksize of rootfs
-	cat $*.hash.info | awk '/^Root hash:/{ print $$3 }' > $*.hash.rootdigest
-	cat $*.hash.info | awk '/^Salt:/{ print $$2 }' > $*.hash.salt
-	cat $*.hash.info | awk '/^Data block size:/{ print $$4 }' > $*.hash.datablocksize
-	cat $*.hash.info | awk '/^Hash block size:/{ print $$4 }' > $*.hash.hashblocksize
-	cat $*.hash.info | awk '/^Data blocks:/{ print $$3 }' > $*.hash.datablocks
-	echo $$(( $$(cat $*.hash.datablocks) * $$(cat $*.hash.datablocksize) / 512 )) > $*.hash.datasectors
-
-out/rootfs.hash.salt:
-	hexdump -vn32 -e'8/4 "%08X" 1 "\n"' /dev/random > $@
-
-out/rootfs.ext4: out/rootfs.tar.gz bin/cmd/tar2ext4
-	gzip -f -d ./out/rootfs.tar.gz
-	./bin/cmd/tar2ext4 -i ./out/rootfs.tar -o $@
-
-out/rootfs.tar.gz: out/initrd.img
-	rm -rf rootfs-conv
-	mkdir rootfs-conv
-	gunzip -c out/initrd.img | (cd rootfs-conv && cpio -imd)
-	tar -zcf $@ -C rootfs-conv .
-	rm -rf rootfs-conv
-
-out/initrd.img: $(BASE) $(DELTA_TARGET) $(SRCROOT)/hack/catcpio.sh
-	$(SRCROOT)/hack/catcpio.sh "$(BASE)" $(DELTA_TARGET) > out/initrd.img.uncompressed
-	gzip -c out/initrd.img.uncompressed > $@
-	rm out/initrd.img.uncompressed
-
 # This target includes utilities which may be useful for testing purposes.
 out/delta-dev.tar.gz: out/delta.tar.gz bin/internal/tools/snp-report
 	rm -rf rootfs-dev
@@ -168,10 +81,7 @@ out/delta.tar.gz: bin/init bin/vsockexec bin/cmd/gcs bin/cmd/gcstools bin/cmd/ho
 	tar -zcf $@ -C rootfs .
 	rm -rf rootfs

-out/containerd-shim-runhcs-v1.exe:
-	GOOS=windows $(GO_BUILD) -o $@ $(SRCROOT)/cmd/containerd-shim-runhcs-v1
-
-bin/cmd/gcs bin/cmd/gcstools bin/cmd/hooks/wait-paths bin/cmd/tar2ext4 bin/internal/tools/snp-report bin/cmd/dmverity-vhd:
+bin/cmd/gcs bin/cmd/gcstools bin/cmd/hooks/wait-paths bin/cmd/tar2ext4 bin/internal/tools/snp-report:
 	@mkdir -p $(dir $@)
 	GOOS=linux $(GO_BUILD) -o $@ $(SRCROOT)/$(@:bin/%=%)

@@ -181,8 +91,8 @@ bin/vsockexec: vsockexec/vsockexec.o vsockexec/vsock.o

 bin/init: init/init.o vsockexec/vsock.o
 	@mkdir -p bin
-	$(CC) $(LDFLAGS) -o $@ $^
+	$(CC) $(LDFLAGS) -o $@ $^ $(LDLIBS)

 %.o: %.c
 	@mkdir -p $(dir $@)
-	$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
+	$(CC) $(PREPROCESSORFLAGS) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
--- a/src/runtime/vendor/github.com/Microsoft/hcsshim/Makefile.bootfiles
+++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/Makefile.bootfiles
@@ -0,0 +1,197 @@
+BASE:=base.tar.gz
+DEV_BUILD:=0
+
+DELTA_TARGET=out/delta.tar.gz
+
+ifeq "$(DEV_BUILD)" "1"
+DELTA_TARGET=out/delta-dev.tar.gz
+endif
+
+ifeq "$(SNP_BUILD)" "1"
+DELTA_TARGET=out/delta-snp.tar.gz
+endif
+
+SRCROOT=$(dir $(abspath $(firstword $(MAKEFILE_LIST))))
+
+PATH_PREFIX:=
+# These have PATH_PREFIX prepended to obtain the full path in recipies e.g. $(PATH_PREFIX)/$(VMGS_TOOL)
+VMGS_TOOL:=
+IGVM_TOOL:=
+KERNEL_PATH:=
+TAR2EXT4_TOOL:=bin/cmd/tar2ext4
+
+ROOTFS_DEVICE:=/dev/sda
+HASH_DEVICE:=/dev/sdb
+
+.PHONY: all always rootfs test snp simple
+
+.DEFAULT_GOAL := all
+
+all: out/initrd.img out/rootfs.tar.gz
+
+clean:
+	find -name '*.o' -print0 | xargs -0 -r rm
+	rm -rf bin rootfs out
+
+rootfs: out/rootfs.vhd
+
+snp: out/kernel.vmgs out/rootfs-verity.vhd out/v2056.vmgs out/v2056combined.vmgs
+
+simple: out/simple.vmgs snp
+
+%.vmgs: %.bin
+	rm -f $@
+	# du -BM returns the size of the bin file in M, eg 7M. The sed command replaces the M with *1024*1024 and then bc does the math to convert to bytes
+	$(PATH_PREFIX)/$(VMGS_TOOL) create --filepath $@ --filesize `du -BM $< | sed  "s/M.*/*1024*1024/" | bc`
+	$(PATH_PREFIX)/$(VMGS_TOOL) write --filepath $@ --datapath $< -i=8
+
+# Simplest debug UVM used to test changes to the linux kernel. No dmverity protection. Boots an initramdisk rather than directly booting a vhd disk.
+out/simple.bin: out/initrd.img $(PATH_PREFIX)/$(KERNEL_PATH) boot/startup_simple.sh
+	rm -f $@
+	python3 $(PATH_PREFIX)/$(IGVM_TOOL) \
+		-o $@ \
+		-kernel $(PATH_PREFIX)/$(KERNEL_PATH) \
+		-append "8250_core.nr_uarts=0 panic=-1 debug loglevel=7 rdinit=/startup_simple.sh" \
+		-rdinit out/initrd.img \
+		-vtl 0
+
+# The boot performance is optimized by supplying rootfs as a SCSI attachment. In this case the kernel boots with
+# dm-verity to ensure the integrity. Similar to layer VHDs the verity Merkle tree is appended to ext4 filesystem.
+# It transpires that the /dev/sd* order is not deterministic wrt the scsi device order. Thus build a single userland
+# fs + merkle tree device and boot that.
+#
+# From https://www.kernel.org/doc/html/latest/admin-guide/device-mapper/dm-init.html
+#
+# dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
+#
+# where:
+# <name>          ::= The device name.
+# <uuid>          ::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | ""
+# <minor>         ::= The device minor number | ""
+# <flags>         ::= "ro" | "rw"
+# <table>         ::= <start_sector> <num_sectors> <target_type> <target_args>
+# <target_type>   ::= "verity" | "linear" | ... (see list below)
+#
+# From https://docs.kernel.org/admin-guide/device-mapper/verity.html
+# <version> <dev> <hash_dev>
+# <data_block_size> <hash_block_size>
+# <num_data_blocks> <hash_start_block>
+# <algorithm> <digest> <salt>
+# [<#opt_params> <opt_params>]
+#
+# typical igvm tool line once all the macros are expanded
+# python3 /home/user/igvmfile.py -o out/v2056.bin -kernel /hose/user/bzImage -append "8250_core.nr_uarts=0 panic=-1 debug loglevel=9 ignore_loglevel dev.scsi.logging_level=9411 root=/dev/dm-0 dm-mod.create=\"dmverity,,,ro,0 196744 verity 1 /dev/sda /dev/sdb 4096 4096 24593 0 sha256 6d625a306aafdf73125a84388b7bfdd2c3a154bd8d698955f4adffc736bdfd66 b9065c23231f0d8901cc3a68e1d3b8d624213e76d6f9f6d3ccbcb829f9c710ba 1 ignore_corruption\" init=/startup_v2056.sh"  -vtl 0
+#
+# so a kernel command line of:
+# 8250_core.nr_uarts=0 panic=-1 debug loglevel=9 ignore_loglevel dev.scsi.logging_level=9411 root=/dev/dm-0 dm-mod.create=\"dmverity,,,ro,0 196744 verity 1 /dev/sda /dev/sdb 4096 4096 24593 0 sha256 6d625a306aafdf73125a84388b7bfdd2c3a154bd8d698955f4adffc736bdfd66 b9065c23231f0d8901cc3a68e1d3b8d624213e76d6f9f6d3ccbcb829f9c710ba 1 ignore_corruption\" init=/startup_v2056.sh
+#
+# and a dm-mod.create of:
+# dmverity,,,ro,0 196744 verity 1 /dev/sda /dev/sdb 4096 4096 24593 0 sha256 6d625a306aafdf73125a84388b7bfdd2c3a154bd8d698955f4adffc736bdfd66 b9065c23231f0d8901cc3a68e1d3b8d624213e76d6f9f6d3ccbcb829f9c710ba 1 ignore_corruption
+#
+# which breaks down to:
+#
+# name = "dmverity"
+# uuid = ""
+# minor = ""
+# flags = "ro"
+# table = 0 196744 verity "args"
+#     start_sector = 0
+#     num_sectors = 196744
+#     target_type = verity
+#     target_args = 1 /dev/sda /dev/sdb 4096 4096 24593 0 sha256 6d625a306aafdf73125a84388b7bfdd2c3a154bd8d698955f4adffc736bdfd66 b9065c23231f0d8901cc3a68e1d3b8d624213e76d6f9f6d3ccbcb829f9c710ba 1 ignore_corruption
+# args:
+#     version               1
+#     dev                   /dev/sda
+#     hash_dev              /dev/sdb
+#     data_block_size       4096
+#     hash_block_size       4096
+#     num_data_blocks       24593
+#     hash_start_block      0
+#     algorithm             sha256
+#     digest                6d625a306aafdf73125a84388b7bfdd2c3a154bd8d698955f4adffc736bdfd66
+#     salt                  b9065c23231f0d8901cc3a68e1d3b8d624213e76d6f9f6d3ccbcb829f9c710ba
+#     opt_params
+#         count = 1
+#         ignore_corruption
+#
+# combined typical (not bigger count of sectors for the whole device)
+# dmverity,,,ro,0 199672 verity 1 /dev/sda /dev/sda 4096 4096 24959 24959 sha256 4aa6e79866ee946ddbd9cddd6554bc6449272942fcc65934326817785a3bd374 adc4956274489c936395bab046a2d476f21ef436e571ba53da2fdf3aee59bf0a
+#
+# A few notes:
+#  - num_sectors is the size of the final (aka target) verity device, i.e. the size of our rootfs excluding the Merkle
+#    tree.
+#  - We don't add verity superblock, so the <hash_start_block> will be exactly at the end of ext4 filesystem and equal
+#    to its size. In the case when verity superblock is present an extra block should be added to the offset value,
+#    i.e. 24959 becomes 24960.
+
+
+# Debug build for use with uvmtester. UVM with dm-verity protected vhd disk mounted directly via the kernel command line.
+# Ignores corruption in dm-verity protected disk. (Use dmesg to see if dm-verity is ignoring data corruption.)
+out/v2056.bin: out/rootfs.vhd out/rootfs.hash.vhd $(PATH_PREFIX)/$(KERNEL_PATH) out/rootfs.hash.datasectors out/rootfs.hash.datablocksize out/rootfs.hash.hashblocksize out/rootfs.hash.datablocks out/rootfs.hash.rootdigest out/rootfs.hash.salt boot/startup_v2056.sh
+	rm -f $@
+	python3 $(PATH_PREFIX)/$(IGVM_TOOL) \
+		-o $@ \
+		-kernel $(PATH_PREFIX)/$(KERNEL_PATH) \
+		-append "8250_core.nr_uarts=0 panic=-1 debug loglevel=9 root=/dev/dm-0 dm-mod.create=\"dmverity,,,ro,0 $(shell cat out/rootfs.hash.datasectors) verity 1 $(ROOTFS_DEVICE) $(HASH_DEVICE) $(shell cat out/rootfs.hash.datablocksize) $(shell cat out/rootfs.hash.hashblocksize) $(shell cat out/rootfs.hash.datablocks) $(shell cat out/rootfs.hash.datablocks) sha256 $(shell cat out/rootfs.hash.rootdigest) $(shell cat out/rootfs.hash.salt) 1 ignore_corruption\" init=/startup_v2056.sh" \
+		-vtl 0
+
+out/v2056combined.bin: out/rootfs-verity.vhd $(PATH_PREFIX)/$(KERNEL_PATH) out/rootfs.hash.datablocksize out/rootfs.hash.hashblocksize out/rootfs.hash.datablocks out/rootfs.hash.rootdigest out/rootfs.hash.salt boot/startup_v2056.sh
+	rm -f $@
+	echo root=/dev/dm-0 dm-mod.create=\"dmverity,,,ro,0 $(shell cat out/rootfs.hash.datasectors) verity 1 $(ROOTFS_DEVICE) $(ROOTFS_DEVICE) $(shell cat out/rootfs.hash.datablocksize) $(shell cat out/rootfs.hash.hashblocksize) $(shell cat out/rootfs.hash.datablocks) $(shell cat out/rootfs.hash.datablocks) sha256 $(shell cat out/rootfs.hash.rootdigest) $(shell cat out/rootfs.hash.salt) 1 ignore_corruption\"
+	python3 $(PATH_PREFIX)/$(IGVM_TOOL) \
+		-o $@ \
+		-kernel $(PATH_PREFIX)/$(KERNEL_PATH) \
+		-append "8250_core.nr_uarts=0 panic=-1 debug loglevel=9 ignore_loglevel dev.scsi.logging_level=9411 root=/dev/dm-0 dm-mod.create=\"dmverity,,,ro,0 $(shell cat out/rootfs.hash.datasectors) verity 1 $(ROOTFS_DEVICE) $(ROOTFS_DEVICE) $(shell cat out/rootfs.hash.datablocksize) $(shell cat out/rootfs.hash.hashblocksize) $(shell cat out/rootfs.hash.datablocks) $(shell cat out/rootfs.hash.datablocks) sha256 $(shell cat out/rootfs.hash.rootdigest) $(shell cat out/rootfs.hash.salt) 1 ignore_corruption\" init=/startup_v2056.sh" \
+		-vtl 0
+
+# Full UVM with dm-verity protected vhd disk mounted directly via the kernel command line.
+out/kernel.bin: out/rootfs-verity.vhd $(PATH_PREFIX)/$(KERNEL_PATH) out/rootfs.hash.datasectors out/rootfs.hash.datablocksize out/rootfs.hash.hashblocksize out/rootfs.hash.datablocks out/rootfs.hash.rootdigest out/rootfs.hash.salt boot/startup.sh
+	rm -f $@
+	echo root=/dev/dm-0 dm-mod.create=\"dmverity,,,ro,0 $(shell cat out/rootfs.hash.datasectors) verity 1 $(ROOTFS_DEVICE) $(ROOTFS_DEVICE) $(shell cat out/rootfs.hash.datablocksize) $(shell cat out/rootfs.hash.hashblocksize) $(shell cat out/rootfs.hash.datablocks) $(shell cat out/rootfs.hash.datablocks) sha256 $(shell cat out/rootfs.hash.rootdigest) $(shell cat out/rootfs.hash.salt)\"
+	python3 $(PATH_PREFIX)/$(IGVM_TOOL) \
+		-o $@ \
+		-kernel $(PATH_PREFIX)/$(KERNEL_PATH) \
+		-append "8250_core.nr_uarts=0 panic=-1 debug loglevel=7 root=/dev/dm-0 dm-mod.create=\"dmverity,,,ro,0 $(shell cat out/rootfs.hash.datasectors) verity 1 $(ROOTFS_DEVICE) $(ROOTFS_DEVICE) $(shell cat out/rootfs.hash.datablocksize) $(shell cat out/rootfs.hash.hashblocksize) $(shell cat out/rootfs.hash.datablocks) $(shell cat out/rootfs.hash.datablocks) sha256 $(shell cat out/rootfs.hash.rootdigest) $(shell cat out/rootfs.hash.salt)\" init=/startup.sh" \
+		-vtl 0
+
+# Rule to make a vhd from a file. This is used to create the rootfs.hash.vhd from rootfs.hash.
+%.vhd: % $(TAR2EXT4_TOOL)
+	$(TAR2EXT4_TOOL) -only-vhd -i $< -o $@
+
+# Rule to make a vhd from an ext4 file. This is used to create the rootfs.vhd from rootfs.ext4.
+%.vhd: %.ext4 $(TAR2EXT4_TOOL)
+	$(TAR2EXT4_TOOL) -only-vhd -i $< -o $@
+
+%.hash %.hash.info %.hash.datablocks %.hash.rootdigest %hash.datablocksize %.hash.datasectors %.hash.hashblocksize: %.ext4 %.hash.salt
+	veritysetup format --no-superblock --salt $(shell cat out/rootfs.hash.salt) $< $*.hash > $*.hash.info
+    # Retrieve info required by dm-verity at boot time
+    # Get the blocksize of rootfs
+	cat $*.hash.info | awk '/^Root hash:/{ print $$3 }' > $*.hash.rootdigest
+	cat $*.hash.info | awk '/^Salt:/{ print $$2 }' > $*.hash.salt
+	cat $*.hash.info | awk '/^Data block size:/{ print $$4 }' > $*.hash.datablocksize
+	cat $*.hash.info | awk '/^Hash block size:/{ print $$4 }' > $*.hash.hashblocksize
+	cat $*.hash.info | awk '/^Data blocks:/{ print $$3 }' > $*.hash.datablocks
+	echo $$(( $$(cat $*.hash.datablocks) * $$(cat $*.hash.datablocksize) / 512 )) > $*.hash.datasectors
+
+out/rootfs.hash.salt:
+	hexdump -vn32 -e'8/4 "%08X" 1 "\n"' /dev/random > $@
+
+out/rootfs.ext4: out/rootfs.tar.gz $(TAR2EXT4_TOOL)
+	gzip -f -d ./out/rootfs.tar.gz
+	$(TAR2EXT4_TOOL) -i ./out/rootfs.tar -o $@
+
+out/rootfs-verity.ext4: out/rootfs.ext4 out/rootfs.hash
+	cp out/rootfs.ext4 $@
+	cat out/rootfs.hash >> $@
+
+out/rootfs.tar.gz: out/initrd.img
+	rm -rf rootfs-conv
+	mkdir rootfs-conv
+	gunzip -c out/initrd.img | (cd rootfs-conv && cpio -imd)
+	tar -zcf $@ -C rootfs-conv .
+	rm -rf rootfs-conv
+
+out/initrd.img: $(BASE) $(DELTA_TARGET) $(SRCROOT)/hack/catcpio.sh
+	$(SRCROOT)/hack/catcpio.sh "$(BASE)" $(DELTA_TARGET) > out/initrd.img.uncompressed
+	gzip -c out/initrd.img.uncompressed > $@
+	rm out/initrd.img.uncompressed
--- a/src/runtime/vendor/github.com/Microsoft/hcsshim/README.md
+++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/README.md
@@ -44,7 +44,7 @@ delta.tar.gz  initrd.img  rootfs.tar.gz

 ### Containerd Shim

-For info on the [Runtime V2 API](https://github.com/containerd/containerd/blob/master/runtime/v2/README.md).
+For info on the [Runtime V2 API](https://github.com/containerd/containerd/blob/main/core/runtime/v2/README.md).

 Contrary to the typical Linux architecture of shim -> runc, the runhcs shim is used both to launch and manage the lifetime of containers.

--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .22.0
 .24.0