Add 'foo' to README.md

Merge pull request #11904 from manuelh-dev/mahuber/conf-rootfs-nv-guest-pull
gpu: nvidia rootfs build with guest pull support
2026-03-04 20:02:24 +00:00 · 2025-10-17 14:23:59 -05:00 · 2025-10-17 16:08:05 +01:00 · 2025-10-17 14:38:34 +01:00 · 2025-10-16 09:20:49 -07:00 · 2025-10-16 11:04:57 -05:00
1445 changed files with 94512 additions and 51520 deletions
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@@ -7,8 +7,12 @@
 self-hosted-runner:
  # Labels of self-hosted runner that linter should ignore
  labels:
+    - amd64-nvidia-a100
    - arm64-k8s
-    - ubuntu-22.04-arm
+    - containerd-v1.7
+    - containerd-v2.0
+    - containerd-v2.1
+    - containerd-v2.2
    - garm-ubuntu-2004
    - garm-ubuntu-2004-smaller
    - garm-ubuntu-2204
@@ -16,6 +20,7 @@ self-hosted-runner:
    - garm-ubuntu-2304-smaller
    - garm-ubuntu-2204-smaller
    - k8s-ppc64le
+    - ubuntu-24.04-ppc64le
    - metrics
    - ppc64le
    - riscv-builder
@@ -23,4 +28,5 @@ self-hosted-runner:
    - s390x
    - s390x-large
    - tdx
-    - amd64-nvidia-a100
+    - ubuntu-22.04-arm
+    - ubuntu-24.04-s390x
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -66,6 +66,9 @@ updates:
      rustix:
        patterns:
          - rustix
+      slab:
+        patterns:
+          - slab
      time:
        patterns:
          - time
--- a/.github/workflows/actionlint.yaml
+++ b/.github/workflows/actionlint.yaml
@@ -2,24 +2,17 @@ name: Lint GHA workflows

 on:
  workflow_dispatch:
-  pull_request_target:
-    types:
-      - opened
-      - edited
-      - reopened
-      - synchronize
-    paths:
-      - '.github/workflows/**'
+  pull_request:

 permissions: {}

-
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

 jobs:
  run-actionlint:
+    name: run-actionlint
    env:
      GH_TOKEN: ${{ github.token }}
    runs-on: ubuntu-24.04
--- a/.github/workflows/basic-ci-amd64.yaml
+++ b/.github/workflows/basic-ci-amd64.yaml
@@ -17,6 +17,7 @@ permissions: {}

 jobs:
  run-containerd-sandboxapi:
+    name: run-containerd-sandboxapi
    strategy:
      # We can set this to true whenever we're 100% sure that
      # the all the tests are not flaky, otherwise we'll fail
@@ -65,6 +66,7 @@ jobs:
        run: bash tests/integration/cri-containerd/gha-run.sh run

  run-containerd-stability:
+    name: run-containerd-stability
    strategy:
      fail-fast: false
      matrix:
@@ -107,6 +109,7 @@ jobs:
        run: bash tests/stability/gha-run.sh run

  run-nydus:
+    name: run-nydus
    strategy:
      # We can set this to true whenever we're 100% sure that
      # the all the tests are not flaky, otherwise we'll fail
@@ -152,6 +155,7 @@ jobs:
        run: bash tests/integration/nydus/gha-run.sh run

  run-runk:
+    name: run-runk
    # Skip runk tests as we have no maintainers. TODO: Decide when to remove altogether
    if: false
    runs-on: ubuntu-22.04
@@ -187,6 +191,7 @@ jobs:
        run: bash tests/integration/runk/gha-run.sh run

  run-tracing:
+    name: run-tracing
    strategy:
      fail-fast: false
      matrix:
@@ -231,6 +236,7 @@ jobs:
        run: bash tests/functional/tracing/gha-run.sh run

  run-vfio:
+    name: run-vfio
    strategy:
      fail-fast: false
      matrix:
@@ -274,6 +280,7 @@ jobs:
        run: bash tests/functional/vfio/gha-run.sh run

  run-docker-tests:
+    name: run-docker-tests
    strategy:
      # We can set this to true whenever we're 100% sure that
      # all the tests are not flaky, otherwise we'll fail them
@@ -281,10 +288,7 @@ jobs:
      fail-fast: false
      matrix:
        vmm:
-          - clh
          - qemu
-          - dragonball
-          - cloud-hypervisor
    runs-on: ubuntu-22.04
    env:
      KATA_HYPERVISOR: ${{ matrix.vmm }}
@@ -320,6 +324,7 @@ jobs:
        run: bash tests/integration/docker/gha-run.sh run

  run-nerdctl-tests:
+    name: run-nerdctl-tests
    strategy:
      # We can set this to true whenever we're 100% sure that
      # all the tests are not flaky, otherwise we'll fail them
@@ -379,6 +384,7 @@ jobs:
          retention-days: 1

  run-kata-agent-apis:
+    name: run-kata-agent-apis
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
--- a/.github/workflows/basic-ci-s390x.yaml
+++ b/.github/workflows/basic-ci-s390x.yaml
@@ -17,6 +17,7 @@ permissions: {}

 jobs:
  run-containerd-sandboxapi:
+    name: run-containerd-sandboxapi
    strategy:
      # We can set this to true whenever we're 100% sure that
      # the all the tests are not flaky, otherwise we'll fail
@@ -65,6 +66,7 @@ jobs:
        run: bash tests/integration/cri-containerd/gha-run.sh run

  run-containerd-stability:
+    name: run-containerd-stability
    strategy:
      fail-fast: false
      matrix:
@@ -106,6 +108,7 @@ jobs:
        run: bash tests/stability/gha-run.sh run

  run-docker-tests:
+    name: run-docker-tests
    strategy:
      # We can set this to true whenever we're 100% sure that
      # all the tests are not flaky, otherwise we'll fail them
--- a/.github/workflows/build-checks-preview-riscv64.yaml
+++ b/.github/workflows/build-checks-preview-riscv64.yaml
@@ -17,6 +17,7 @@ permissions: {}
 name: Build checks preview riscv64
 jobs:
  check:
+    name: check
    runs-on: ${{ inputs.instance }}
    strategy:
      fail-fast: false
@@ -123,9 +124,11 @@ jobs:
          echo "GITHUB_RUNNER_CI_NON_VIRT=true" >> "$GITHUB_ENV"
      - name: Running `${{ matrix.command }}` for ${{ matrix.component.name }}
        run: |
-          cd ${{ matrix.component.path }}
-          ${{ matrix.command }}
+          cd "${COMPONENT_PATH}"
+          ${COMMAND}
        env:
+          COMMAND: ${{ matrix.command }}
+          COMPONENT_PATH: ${{ matrix.component.path }}
          RUST_BACKTRACE: "1"
          RUST_LIB_BACKTRACE: "0"
          SKIP_GO_VERSION_CHECK: "1"
--- a/.github/workflows/build-checks.yaml
+++ b/.github/workflows/build-checks.yaml
@@ -11,6 +11,7 @@ permissions: {}
 name: Build checks
 jobs:
  check:
+    name: check
    runs-on: ${{ inputs.instance }}
    strategy:
      fail-fast: false
@@ -46,6 +47,7 @@ jobs:
            path: src/libs
            needs:
              - rust
+              - protobuf-compiler
          - name: agent-ctl
            path: src/tools/agent-ctl
            needs:
@@ -56,6 +58,7 @@ jobs:
            path: src/tools/kata-ctl
            needs:
              - rust
+              - protobuf-compiler
          - name: trace-forwarder
            path: src/tools/trace-forwarder
            needs:
@@ -126,9 +129,11 @@ jobs:
          echo "GITHUB_RUNNER_CI_NON_VIRT=true" >> "$GITHUB_ENV"
      - name: Running `${{ matrix.command }}` for ${{ matrix.component.name }}
        run: |
-          cd ${{ matrix.component.path }}
-          ${{ matrix.command }}
+          cd "${COMPONENT_PATH}"
+          eval "${COMMAND}"
        env:
+          COMMAND: ${{ matrix.command }}
+          COMPONENT_PATH: ${{ matrix.component.path }}
          RUST_BACKTRACE: "1"
          RUST_LIB_BACKTRACE: "0"
          SKIP_GO_VERSION_CHECK: "1"
--- a/.github/workflows/build-kata-static-tarball-amd64.yaml
+++ b/.github/workflows/build-kata-static-tarball-amd64.yaml
@@ -30,6 +30,7 @@ permissions: {}

 jobs:
  build-asset:
+    name: build-asset
    runs-on: ubuntu-22.04
    permissions:
      contents: read
@@ -96,7 +97,6 @@ jobs:
      - name: Build ${{ matrix.asset }}
        id: build
        run: |
-          [[ "${KATA_ASSET}" == *"nvidia"* ]] && echo "KBUILD_SIGN_PIN=${{ secrets.KBUILD_SIGN_PIN }}" >> "${GITHUB_ENV}"
          make "${KATA_ASSET}-tarball"
          build_dir=$(readlink -f build)
          # store-artifact does not work with symlink
@@ -110,12 +110,15 @@ jobs:
          ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
          TARGET_BRANCH: ${{ inputs.target-branch }}
          RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
+          KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}

      - name: Parse OCI image name and digest
        id: parse-oci-segments
        if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
+        env:
+          KATA_ASSET: ${{ matrix.asset }}
        run: |
-          oci_image="$(<"build/${{ matrix.asset }}-oci-image")"
+          oci_image="$(<"build/${KATA_ASSET}-oci-image")"
          echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
          echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"

@@ -157,6 +160,7 @@ jobs:
          if-no-files-found: error

  build-asset-rootfs:
+    name: build-asset-rootfs
    runs-on: ubuntu-22.04
    needs: build-asset
    permissions:
@@ -203,7 +207,6 @@ jobs:
      - name: Build ${{ matrix.asset }}
        id: build
        run: |
-          [[ "${KATA_ASSET}" == *"nvidia"* ]] && echo "KBUILD_SIGN_PIN=${{ secrets.KBUILD_SIGN_PIN }}" >> "${GITHUB_ENV}"
          ./tests/gha-adjust-to-use-prebuilt-components.sh kata-artifacts "${KATA_ASSET}"
          make "${KATA_ASSET}-tarball"
          build_dir=$(readlink -f build)
@@ -218,6 +221,7 @@ jobs:
          ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
          TARGET_BRANCH: ${{ inputs.target-branch }}
          RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
+          KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}

      - name: store-artifact ${{ matrix.asset }}
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
@@ -229,6 +233,7 @@ jobs:

  # We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
  remove-rootfs-binary-artifacts:
+    name: remove-rootfs-binary-artifacts
    runs-on: ubuntu-22.04
    needs: build-asset-rootfs
    strategy:
@@ -246,6 +251,7 @@ jobs:

  # We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
  remove-rootfs-binary-artifacts-for-release:
+    name: remove-rootfs-binary-artifacts-for-release
    runs-on: ubuntu-22.04
    needs: build-asset-rootfs
    strategy:
@@ -259,6 +265,7 @@ jobs:
          name: kata-artifacts-amd64-${{ matrix.asset}}${{ inputs.tarball-suffix }}

  build-asset-shim-v2:
+    name: build-asset-shim-v2
    runs-on: ubuntu-22.04
    needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts, remove-rootfs-binary-artifacts-for-release]
    permissions:
@@ -320,6 +327,7 @@ jobs:
          if-no-files-found: error

  create-kata-tarball:
+    name: create-kata-tarball
    runs-on: ubuntu-22.04
    needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
    permissions:
--- a/.github/workflows/build-kata-static-tarball-arm64.yaml
+++ b/.github/workflows/build-kata-static-tarball-arm64.yaml
@@ -28,6 +28,7 @@ permissions: {}

 jobs:
  build-asset:
+    name: build-asset
    runs-on: ubuntu-22.04-arm
    permissions:
      contents: read
@@ -44,6 +45,7 @@ jobs:
          - kernel
          - kernel-dragonball-experimental
          - kernel-nvidia-gpu
+          - kernel-cca-confidential
          - nydus
          - ovmf
          - qemu
@@ -91,8 +93,10 @@ jobs:
      - name: Parse OCI image name and digest
        id: parse-oci-segments
        if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
+        env:
+          KATA_ASSET: ${{ matrix.asset }}
        run: |
-          oci_image="$(<"build/${{ matrix.asset }}-oci-image")"
+          oci_image="$(<"build/${KATA_ASSET}-oci-image")"
          echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
          echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"

@@ -134,6 +138,7 @@ jobs:
          if-no-files-found: error

  build-asset-rootfs:
+    name: build-asset-rootfs
    runs-on: ubuntu-22.04-arm
    needs: build-asset
    permissions:
@@ -200,6 +205,7 @@ jobs:

  # We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
  remove-rootfs-binary-artifacts:
+    name: remove-rootfs-binary-artifacts
    runs-on: ubuntu-22.04-arm
    needs: build-asset-rootfs
    strategy:
@@ -214,6 +220,7 @@ jobs:

  # We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
  remove-rootfs-binary-artifacts-for-release:
+    name: remove-rootfs-binary-artifacts-for-release
    runs-on: ubuntu-22.04-arm
    needs: build-asset-rootfs
    strategy:
@@ -227,6 +234,7 @@ jobs:
          name: kata-artifacts-arm64-${{ matrix.asset}}${{ inputs.tarball-suffix }}

  build-asset-shim-v2:
+    name: build-asset-shim-v2
    runs-on: ubuntu-22.04-arm
    needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts, remove-rootfs-binary-artifacts-for-release]
    permissions:
@@ -286,6 +294,7 @@ jobs:
          if-no-files-found: error

  create-kata-tarball:
+    name: create-kata-tarball
    runs-on: ubuntu-22.04-arm
    needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
    permissions:
--- a/.github/workflows/build-kata-static-tarball-ppc64le.yaml
+++ b/.github/workflows/build-kata-static-tarball-ppc64le.yaml
@@ -28,10 +28,11 @@ permissions: {}

 jobs:
  build-asset:
+    name: build-asset
    permissions:
      contents: read
      packages: write
-    runs-on: ppc64le
+    runs-on: ubuntu-24.04-ppc64le
    strategy:
      matrix:
        asset:
@@ -87,7 +88,8 @@ jobs:
          if-no-files-found: error

  build-asset-rootfs:
-    runs-on: ppc64le
+    name: build-asset-rootfs
+    runs-on: ubuntu-24.04-ppc64le
    needs: build-asset
    permissions:
      contents: read
@@ -153,6 +155,7 @@ jobs:

  # We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
  remove-rootfs-binary-artifacts:
+    name: remove-rootfs-binary-artifacts
    runs-on: ubuntu-22.04
    needs: build-asset-rootfs
    strategy:
@@ -166,7 +169,8 @@ jobs:
          name: kata-artifacts-ppc64le-${{ matrix.asset}}${{ inputs.tarball-suffix }}

  build-asset-shim-v2:
-    runs-on: ppc64le
+    name: build-asset-shim-v2
+    runs-on: ubuntu-24.04-ppc64le
    needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
    permissions:
      contents: read
@@ -225,7 +229,8 @@ jobs:
          if-no-files-found: error

  create-kata-tarball:
-    runs-on: ppc64le
+    name: create-kata-tarball
+    runs-on: ubuntu-24.04-ppc64le
    needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
    permissions:
      contents: read
--- a/.github/workflows/build-kata-static-tarball-riscv64.yaml
+++ b/.github/workflows/build-kata-static-tarball-riscv64.yaml
@@ -28,6 +28,7 @@ permissions: {}

 jobs:
  build-asset:
+    name: build-asset
    runs-on: riscv-builder
    permissions:
      contents: read
--- a/.github/workflows/build-kata-static-tarball-s390x.yaml
+++ b/.github/workflows/build-kata-static-tarball-s390x.yaml
@@ -31,7 +31,8 @@ permissions: {}

 jobs:
  build-asset:
-    runs-on: s390x
+    name: build-asset
+    runs-on: ubuntu-24.04-s390x
    permissions:
      contents: read
      packages: write
@@ -90,8 +91,10 @@ jobs:
      - name: Parse OCI image name and digest
        id: parse-oci-segments
        if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
+        env:
+          ASSET: ${{ matrix.asset }}
        run: |
-          oci_image="$(<"build/${{ matrix.asset }}-oci-image")"
+          oci_image="$(<"build/${ASSET}-oci-image")"
          echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
          echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"

@@ -119,6 +122,7 @@ jobs:
          if-no-files-found: error

  build-asset-rootfs:
+    name: build-asset-rootfs
    runs-on: s390x
    needs: build-asset
    permissions:
@@ -186,6 +190,7 @@ jobs:
          if-no-files-found: error

  build-asset-boot-image-se:
+    name: build-asset-boot-image-se
    runs-on: s390x
    needs: [build-asset, build-asset-rootfs]
    permissions:
@@ -235,6 +240,7 @@ jobs:

  # We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
  remove-rootfs-binary-artifacts:
+    name: remove-rootfs-binary-artifacts
    runs-on: ubuntu-22.04
    needs: [build-asset-rootfs, build-asset-boot-image-se]
    strategy:
@@ -250,7 +256,8 @@ jobs:
          name: kata-artifacts-s390x-${{ matrix.asset}}${{ inputs.tarball-suffix }}

  build-asset-shim-v2:
-    runs-on: s390x
+    name: build-asset-shim-v2
+    runs-on: ubuntu-24.04-s390x
    needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
    permissions:
      contents: read
@@ -311,7 +318,8 @@ jobs:
          if-no-files-found: error

  create-kata-tarball:
-    runs-on: s390x
+    name: create-kata-tarball
+    runs-on: ubuntu-24.04-s390x
    needs:
      - build-asset
      - build-asset-rootfs
--- a/.github/workflows/cargo-deny-runner.yaml
+++ b/.github/workflows/cargo-deny-runner.yaml
@@ -15,6 +15,7 @@ permissions: {}

 jobs:
  cargo-deny-runner:
+    name: cargo-deny-runner
    runs-on: ubuntu-22.04

    steps:
--- a/.github/workflows/ci-nightly-s390x.yaml
+++ b/.github/workflows/ci-nightly-s390x.yaml
@@ -8,6 +8,7 @@ permissions: {}

 jobs:
  check-internal-test-result:
+    name: check-internal-test-result
    runs-on: s390x
    strategy:
      fail-fast: false
--- a/.github/workflows/ci-on-push.yaml
+++ b/.github/workflows/ci-on-push.yaml
@@ -1,6 +1,6 @@
 name: Kata Containers CI
 on:
-  pull_request_target:
+  pull_request_target: # zizmor: ignore[dangerous-triggers] See #11332.
    branches:
      - 'main'
    types:
--- a/.github/workflows/ci-weekly.yaml
+++ b/.github/workflows/ci-weekly.yaml
@@ -66,6 +66,7 @@ jobs:
      QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}

  build-and-publish-tee-confidential-unencrypted-image:
+    name: build-and-publish-tee-confidential-unencrypted-image
    permissions:
      contents: read
      packages: write
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -159,7 +159,7 @@ jobs:
      tag: ${{ inputs.tag }}-s390x
      commit-hash: ${{ inputs.commit-hash }}
      target-branch: ${{ inputs.target-branch }}
-      runner: s390x
+      runner: ubuntu-24.04-s390x
      arch: s390x
    secrets:
      QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -183,6 +183,7 @@ jobs:
      QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}

  build-and-publish-tee-confidential-unencrypted-image:
+    name: build-and-publish-tee-confidential-unencrypted-image
    permissions:
      contents: read
      packages: write
@@ -224,6 +225,7 @@ jobs:
          file: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile

  publish-csi-driver-amd64:
+    name: publish-csi-driver-amd64
    needs: build-kata-static-tarball-amd64
    permissions:
      contents: read
--- a/.github/workflows/cleanup-resources.yaml
+++ b/.github/workflows/cleanup-resources.yaml
@@ -8,6 +8,7 @@ permissions: {}

 jobs:
  cleanup-resources:
+    name: cleanup-resources
    runs-on: ubuntu-22.04
    permissions:
      id-token: write # Used for OIDC access to log into Azure
--- a/.github/workflows/darwin-tests.yaml
+++ b/.github/workflows/darwin-tests.yaml
@@ -15,8 +15,17 @@ concurrency:
 name: Darwin tests
 jobs:
  test:
+    name: test
    runs-on: macos-latest
    steps:
+    - name: Install Protoc
+      run: |
+        f=$(mktemp)
+        curl -sSLo "$f" https://github.com/protocolbuffers/protobuf/releases/download/v28.2/protoc-28.2-osx-aarch_64.zip
+        mkdir -p "$HOME/.local"
+        unzip -d "$HOME/.local" "$f"
+        echo "$HOME/.local/bin" >> "${GITHUB_PATH}"
+
    - name: Checkout code
      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
      with:
@@ -27,5 +36,8 @@ jobs:
        ./tests/install_go.sh -f -p
        echo "/usr/local/go/bin" >> "${GITHUB_PATH}"

+    - name: Install Rust
+      run: ./tests/install_rust.sh
+
    - name: Build utils
      run: ./ci/darwin-test.sh
--- a/.github/workflows/docs-url-alive-check.yaml
+++ b/.github/workflows/docs-url-alive-check.yaml
@@ -1,12 +1,14 @@
 on:
  schedule:
    - cron:  '0 23 * * 0'
+  workflow_dispatch:

 permissions: {}

 name: Docs URL Alive Check
 jobs:
  test:
+    name: test
    runs-on: ubuntu-22.04
    # don't run this action on forks
    if: github.repository_owner == 'kata-containers'
@@ -15,13 +17,12 @@ jobs:
    steps:
    - name: Set env
      run: |
-        echo "GOPATH=${{ github.workspace }}" >> "$GITHUB_ENV"
+        echo "GOPATH=${GITHUB_WORKSPACE}" >> "$GITHUB_ENV"
    - name: Checkout code
      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
      with:
        fetch-depth: 0
        persist-credentials: false
-        path: ./src/github.com/${{ github.repository }}

    - name: Install golang
      run: |
@@ -30,4 +31,4 @@ jobs:

    - name: Docs URL Alive Check
      run: |
-        cd "${GOPATH}/src/github.com/${{ github.repository }}" && make docs-url-alive-check
+        make docs-url-alive-check
--- a/.github/workflows/gatekeeper-skipper.yaml
+++ b/.github/workflows/gatekeeper-skipper.yaml
@@ -35,6 +35,7 @@ permissions: {}

 jobs:
  skipper:
+    name: skipper
    runs-on: ubuntu-22.04
    outputs:
      skip_build: ${{ steps.skipper.outputs.skip_build }}
--- a/.github/workflows/gatekeeper.yaml
+++ b/.github/workflows/gatekeeper.yaml
@@ -5,7 +5,7 @@ name: Gatekeeper
 # reporting the status.

 on:
-  pull_request_target:
+  pull_request_target: # zizmor: ignore[dangerous-triggers] See #11332.
    types:
      - opened
      - synchronize
@@ -20,6 +20,7 @@ concurrency:

 jobs:
  gatekeeper:
+    name: gatekeeper
    runs-on: ubuntu-22.04
    permissions:
      actions: read
--- a/.github/workflows/govulncheck.yaml
+++ b/.github/workflows/govulncheck.yaml
@@ -7,6 +7,7 @@ permissions: {}

 jobs:
  govulncheck:
+    name: govulncheck
    runs-on: ubuntu-22.04
    strategy:
      matrix:
@@ -39,11 +40,14 @@ jobs:
      - name: Build runtime binaries
        run: |
          cd src/runtime
-          make ${{ matrix.make_target }}
+          make "${MAKE_TARGET}"
        env:
+          MAKE_TARGET: ${{ matrix.make_target }}
          SKIP_GO_VERSION_CHECK: "1"

      - name: Run govulncheck on ${{ matrix.binary }}
+        env:
+          BINARY: ${{ matrix.binary }}
        run: |
          cd src/runtime
-          bash ../../tests/govulncheck-runner.sh "./${{ matrix.binary }}"
+          bash ../../tests/govulncheck-runner.sh "./${BINARY}"
--- a/.github/workflows/kata-runtime-classes-sync.yaml
+++ b/.github/workflows/kata-runtime-classes-sync.yaml
@@ -1,3 +1,5 @@
+name: kata-runtime-classes-sync
+
 on:
  pull_request:
    types:
@@ -14,6 +16,7 @@ concurrency:

 jobs:
  kata-deploy-runtime-classes-check:
+    name: kata-deploy-runtime-classes-check
    runs-on: ubuntu-22.04
    steps:
    - name: Checkout code
--- a/.github/workflows/nydus-snapshotter-version-in-sync.yaml
+++ b/.github/workflows/nydus-snapshotter-version-in-sync.yaml
@@ -0,0 +1,35 @@
+name: nydus-snapshotter-version-sync
+
+on:
+  pull_request:
+    types:
+      - opened
+      - edited
+      - reopened
+      - synchronize
+
+permissions: {}
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  nydus-snapshotter-version-check:
+    name: nydus-snapshotter-version-check
+    runs-on: ubuntu-22.04
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      with:
+        persist-credentials: false
+    - name: Ensure nydus-snapshotter-version is in sync inside our repo
+      run: |
+        dockerfile_version=$(grep "ARG NYDUS_SNAPSHOTTER_VERSION" tools/packaging/kata-deploy/Dockerfile | cut -f2 -d'=')
+        versions_version=$(yq ".externals.nydus-snapshotter.version | explode(.)" versions.yaml)
+        if [[ "${dockerfile_version}" != "${versions_version}" ]]; then
+          echo "nydus-snapshotter version must be the same in the following places: "
+          echo "- versions.yaml: ${versions_version}"
+          echo "- tools/packaging/kata-deploy/Dockerfile: ${dockerfile_version}"
+          exit 1
+        fi
--- a/.github/workflows/payload-after-push.yaml
+++ b/.github/workflows/payload-after-push.yaml
@@ -136,6 +136,7 @@ jobs:
      QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}

  publish-manifest:
+    name: publish-manifest
    runs-on: ubuntu-22.04
    permissions:
      contents: read
--- a/.github/workflows/publish-kata-deploy-payload.yaml
+++ b/.github/workflows/publish-kata-deploy-payload.yaml
@@ -38,6 +38,7 @@ permissions: {}

 jobs:
  kata-payload:
+    name: kata-payload
    permissions:
      contents: read
      packages: write
--- a/.github/workflows/release-amd64.yaml
+++ b/.github/workflows/release-amd64.yaml
@@ -29,6 +29,7 @@ jobs:
      attestations: write

  kata-deploy:
+    name: kata-deploy
    needs: build-kata-static-tarball-amd64
    permissions:
      contents: read
--- a/.github/workflows/release-arm64.yaml
+++ b/.github/workflows/release-arm64.yaml
@@ -26,6 +26,7 @@ jobs:
      attestations: write

  kata-deploy:
+    name: kata-deploy
    needs: build-kata-static-tarball-arm64
    permissions:
      contents: read
--- a/.github/workflows/release-ppc64le.yaml
+++ b/.github/workflows/release-ppc64le.yaml
@@ -26,6 +26,7 @@ jobs:
      attestations: write

  kata-deploy:
+    name: kata-deploy
    needs: build-kata-static-tarball-ppc64le
    permissions:
      contents: read
--- a/.github/workflows/release-s390x.yaml
+++ b/.github/workflows/release-s390x.yaml
@@ -30,6 +30,7 @@ jobs:


  kata-deploy:
+    name: kata-deploy
    needs: build-kata-static-tarball-s390x
    permissions:
      contents: read
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -6,6 +6,7 @@ permissions: {}

 jobs:
  release:
+    name: release
    runs-on: ubuntu-22.04
    permissions:
      contents: write # needed for the `gh release create` command
@@ -77,6 +78,7 @@ jobs:
      QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}

  publish-multi-arch-images:
+    name: publish-multi-arch-images
    runs-on: ubuntu-22.04
    needs: [build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x, build-and-push-assets-ppc64le]
    permissions:
@@ -114,6 +116,7 @@ jobs:
          KATA_DEPLOY_REGISTRIES: "quay.io/kata-containers/kata-deploy ghcr.io/kata-containers/kata-deploy"

  upload-multi-arch-static-tarball:
+    name: upload-multi-arch-static-tarball
    needs: [build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x, build-and-push-assets-ppc64le]
    permissions:
      contents: write # needed for the `gh release` commands
@@ -178,6 +181,7 @@ jobs:
          ARCHITECTURE: ppc64le

  upload-versions-yaml:
+    name: upload-versions-yaml
    needs: release
    runs-on: ubuntu-22.04
    permissions:
@@ -195,6 +199,7 @@ jobs:
          GH_TOKEN: ${{ github.token }}

  upload-cargo-vendored-tarball:
+    name: upload-cargo-vendored-tarball
    needs: release
    runs-on: ubuntu-22.04
    permissions:
@@ -212,6 +217,7 @@ jobs:
          GH_TOKEN: ${{ github.token }}

  upload-libseccomp-tarball:
+    name: upload-libseccomp-tarball
    needs: release
    runs-on: ubuntu-22.04
    permissions:
@@ -229,6 +235,7 @@ jobs:
          GH_TOKEN: ${{ github.token }}

  upload-helm-chart-tarball:
+    name: upload-helm-chart-tarball
    needs: release
    runs-on: ubuntu-22.04
    permissions:
@@ -253,10 +260,11 @@ jobs:
      - name: Login to the OCI registries
        env:
          QUAY_DEPLOYER_USERNAME: ${{ vars.QUAY_DEPLOYER_USERNAME }}
-          GITHUB_ACTOR: ${{ github.actor }}
+          QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
+          GITHUB_TOKEN: ${{ github.token }}
        run: |
-          echo "${{ secrets.QUAY_DEPLOYER_PASSWORD }}" | helm registry login quay.io --username "${QUAY_DEPLOYER_USERNAME}" --password-stdin
-          echo "${{ github.token }}" | helm registry login ghcr.io --username "${GITHUB_ACTOR}" --password-stdin
+          echo "${QUAY_DEPLOYER_PASSWORD}" | helm registry login quay.io --username "${QUAY_DEPLOYER_USERNAME}" --password-stdin
+          echo "${GITHUB_TOKEN}" | helm registry login ghcr.io --username "${GITHUB_ACTOR}" --password-stdin

      - name: Push helm chart to the OCI registries
        run: |
@@ -265,6 +273,7 @@ jobs:
          helm push "kata-deploy-${release_version}.tgz" oci://ghcr.io/kata-containers/kata-deploy-charts

  publish-release:
+    name: publish-release
    needs: [ build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x, build-and-push-assets-ppc64le, publish-multi-arch-images, upload-multi-arch-static-tarball, upload-versions-yaml, upload-cargo-vendored-tarball, upload-libseccomp-tarball ]
    runs-on: ubuntu-22.04
    permissions:
--- a/.github/workflows/run-containerd-multi-snapshotter-stability-test.yaml
+++ b/.github/workflows/run-containerd-multi-snapshotter-stability-test.yaml
@@ -0,0 +1,164 @@
+name: CI | Run containerd multi-snapshotter stability test
+on:
+  schedule:
+    - cron: "0 */1 * * *" #run every hour
+
+permissions: {}
+
+# This job relies on k8s pre-installed using kubeadm
+jobs:
+  run-containerd-multi-snapshotter-stability-tests:
+    name: run-containerd-multi-snapshotter-stability-tests
+    strategy:
+      fail-fast: false
+      matrix:
+        containerd:
+          - v1.7
+          - v2.0
+          - v2.1
+          - v2.2
+    env:
+      # I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here.
+      IMAGES_LIST: quay.io/mongodb/mongodb-community-server@sha256:8b73733842da21b6bbb6df4d7b2449229bb3135d2ec8c6880314d88205772a11 ghcr.io/edgelesssys/redis@sha256:ecb0a964c259a166a1eb62f0eb19621d42bd1cce0bc9bb0c71c828911d4ba93d
+    runs-on: containerd-${{ matrix.containerd }}
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
+
+      - name: Rotate the journal
+        run: sudo journalctl --rotate --vacuum-time 1s
+
+      - name: Pull the kata-deploy image to be used
+        run: sudo ctr -n k8s.io image pull quay.io/kata-containers/kata-deploy-ci:kata-containers-latest
+
+      - name: Deploy Kata Containers
+        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
+        env:
+          KATA_HYPERVISOR: qemu-coco-dev
+          KUBERNETES: vanilla
+          SNAPSHOTTER: nydus
+          USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: true
+
+      # This is needed as we may hit the createContainerTimeout
+      - name: Adjust Kata Containers' create_container_timeout
+        run: |
+          sudo sed -i -e 's/^\(create_container_timeout\).*=.*$/\1 = 600/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
+          grep "create_container_timeout.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
+
+      # This is needed in order to have enough tmpfs space inside the guest to pull the image
+      - name: Adjust Kata Containers' default_memory
+        run: |
+          sudo sed -i -e 's/^\(default_memory\).*=.*$/\1 = 4096/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
+          grep "default_memory.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
+
+      - name: Run a few containers using overlayfs
+        run: |
+          # I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
+          # shellcheck disable=SC2086
+          for img in ${IMAGES_LIST}; do
+            echo "overlayfs | Using on image: ${img}"
+            pod="$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
+            kubectl run "${pod}" \
+              -it --rm \
+              --restart=Never \
+              --image="${img}" \
+              --image-pull-policy=Always \
+              --pod-running-timeout=10m \
+              -- uname -r
+          done
+          
+      - name: Run a the same few containers using a different snapshotter
+        run: |
+          # I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
+          # shellcheck disable=SC2086
+          for img in ${IMAGES_LIST}; do
+            echo "nydus | Using on image: ${img}"
+            pod="kata-$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
+            kubectl run "${pod}" \
+              -it --rm \
+              --restart=Never \
+              --image="${img}" \
+              --image-pull-policy=Always \
+              --pod-running-timeout=10m \
+              --overrides='{
+                "spec": {
+                  "runtimeClassName": "kata-qemu-coco-dev"
+                }
+              }' \
+              -- uname -r
+          done
+
+      - name: Uninstall Kata Containers
+        run: bash tests/integration/kubernetes/gha-run.sh cleanup
+        env:
+          KATA_HYPERVISOR: qemu-coco-dev
+          KUBERNETES: vanilla
+          SNAPSHOTTER: nydus
+          USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: true
+
+      - name: Run a few containers using overlayfs
+        run: |
+          # I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
+          # shellcheck disable=SC2086
+          for img in ${IMAGES_LIST}; do
+            echo "overlayfs | Using on image: ${img}"
+            pod="$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
+            kubectl run "${pod}" \
+              -it --rm \
+              --restart=Never \
+              --image=${img} \
+              --image-pull-policy=Always \
+              --pod-running-timeout=10m \
+              -- uname -r
+          done
+          
+      - name: Deploy Kata Containers
+        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
+        env:
+          KATA_HYPERVISOR: qemu-coco-dev
+          KUBERNETES: vanilla
+          SNAPSHOTTER: nydus
+          USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: true
+
+      # This is needed as we may hit the createContainerTimeout
+      - name: Adjust Kata Containers' create_container_timeout
+        run: |
+          sudo sed -i -e 's/^\(create_container_timeout\).*=.*$/\1 = 600/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
+          grep "create_container_timeout.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
+
+      # This is needed in order to have enough tmpfs space inside the guest to pull the image
+      - name: Adjust Kata Containers' default_memory
+        run: |
+          sudo sed -i -e 's/^\(default_memory\).*=.*$/\1 = 4096/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
+          grep "default_memory.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
+
+      - name: Run a the same few containers using a different snapshotter
+        run: |
+          # I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
+          # shellcheck disable=SC2086
+          for img in ${IMAGES_LIST}; do
+            echo "nydus | Using on image: ${img}"
+            pod="kata-$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
+            kubectl run "${pod}" \
+              -it --rm \
+              --restart=Never \
+              --image="${img}" \
+              --image-pull-policy=Always \
+              --pod-running-timeout=10m \
+              --overrides='{
+                "spec": {
+                  "runtimeClassName": "kata-qemu-coco-dev"
+                }
+              }' \
+              -- uname -r
+          done
+
+      - name: Uninstall Kata Containers
+        run: bash tests/integration/kubernetes/gha-run.sh cleanup || true
+        if: always()
+        env:
+          KATA_HYPERVISOR: qemu-coco-dev
+          KUBERNETES: vanilla
+          SNAPSHOTTER: nydus
+          USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: true
--- a/.github/workflows/run-k8s-tests-on-aks.yaml
+++ b/.github/workflows/run-k8s-tests-on-aks.yaml
@@ -38,6 +38,7 @@ permissions: {}

 jobs:
  run-k8s-tests:
+    name: run-k8s-tests
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/run-k8s-tests-on-amd64.yaml
+++ b/.github/workflows/run-k8s-tests-on-amd64.yaml
@@ -26,15 +26,12 @@ permissions: {}

 jobs:
  run-k8s-tests-amd64:
+    name: run-k8s-tests-amd64
    strategy:
      fail-fast: false
      matrix:
        vmm:
-          - clh #cloud-hypervisor
-          - dragonball
-          - fc #firecracker
          - qemu
-          - cloud-hypervisor
        container_runtime:
          - containerd
        snapshotter:
@@ -72,6 +69,25 @@ jobs:
        env:
          TARGET_BRANCH: ${{ inputs.target-branch }}

+      - name: Remove unnecessary directories to free up space
+        run: |
+          sudo rm -rf /usr/local/.ghcup
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /usr/local/share/boost
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+          sudo rm -rf /usr/lib/jvm
+          sudo rm -rf /usr/share/swift
+          sudo rm -rf /usr/local/share/powershell
+          sudo rm -rf /usr/local/julia*
+          sudo rm -rf /opt/az
+          sudo rm -rf /usr/local/share/chromium
+          sudo rm -rf /opt/microsoft
+          sudo rm -rf /opt/google
+          sudo rm -rf /usr/lib/firefox
+
      - name: Configure CRI-O
        if: matrix.container_runtime == 'crio'
        run: bash tests/integration/kubernetes/gha-run.sh setup-crio
--- a/.github/workflows/run-k8s-tests-on-arm64.yaml
+++ b/.github/workflows/run-k8s-tests-on-arm64.yaml
@@ -26,6 +26,7 @@ permissions: {}

 jobs:
  run-k8s-tests-on-arm64:
+    name: run-k8s-tests-on-arm64
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
+++ b/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
@@ -29,6 +29,7 @@ permissions: {}

 jobs:
  run-nvidia-gpu-tests-on-amd64:
+    name: run-nvidia-gpu-tests-on-amd64
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/run-k8s-tests-on-ppc64le.yaml
+++ b/.github/workflows/run-k8s-tests-on-ppc64le.yaml
@@ -26,6 +26,7 @@ permissions: {}

 jobs:
  run-k8s-tests:
+    name: run-k8s-tests
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/run-k8s-tests-on-zvsi.yaml
+++ b/.github/workflows/run-k8s-tests-on-zvsi.yaml
@@ -29,6 +29,7 @@ permissions: {}

 jobs:
  run-k8s-tests:
+    name: run-k8s-tests
    strategy:
      fail-fast: false
      matrix:
@@ -105,7 +106,9 @@ jobs:
      # qemu-runtime-rs only works with overlayfs
      # See: https://github.com/kata-containers/kata-containers/issues/10066
      - name: Configure the ${{ matrix.snapshotter }} snapshotter
-        run: bash tests/integration/kubernetes/gha-run.sh ${{ matrix.deploy-cmd }}
+        env:
+          DEPLOY_CMD: ${{ matrix.deploy-cmd }}
+        run: bash tests/integration/kubernetes/gha-run.sh "${DEPLOY_CMD}"
        if: ${{ matrix.snapshotter != 'overlayfs' }}

      - name: Deploy Kata
--- a/.github/workflows/run-kata-coco-stability-tests.yaml
+++ b/.github/workflows/run-kata-coco-stability-tests.yaml
@@ -40,6 +40,7 @@ permissions: {}
 jobs:
  # Generate jobs for testing CoCo on non-TEE environments
  run-stability-k8s-tests-coco-nontee:
+    name: run-stability-k8s-tests-coco-nontee
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/run-kata-coco-tests.yaml
+++ b/.github/workflows/run-kata-coco-tests.yaml
@@ -39,17 +39,17 @@ on:
 permissions: {}

 jobs:
-  run-k8s-tests-on-tdx:
+  run-k8s-tests-on-tee:
+    name: run-k8s-tests-on-tee
    strategy:
      fail-fast: false
      matrix:
-        vmm:
-          - qemu-tdx
-        snapshotter:
-          - nydus
-        pull-type:
-          - guest-pull
-    runs-on: tdx
+        include:
+          - runner: tdx
+            vmm: qemu-tdx
+          - runner: sev-snp
+            vmm: qemu-snp
+    runs-on: ${{ matrix.runner }}
    env:
      DOCKER_REGISTRY: ${{ inputs.registry }}
      DOCKER_REPO: ${{ inputs.repo }}
@@ -57,15 +57,15 @@ jobs:
      GH_PR_NUMBER: ${{ inputs.pr-number }}
      KATA_HYPERVISOR: ${{ matrix.vmm }}
      KUBERNETES: "vanilla"
-      USING_NFD: "true"
+      USING_NFD: "false"
      KBS: "true"
      K8S_TEST_HOST_TYPE: "baremetal"
      KBS_INGRESS: "nodeport"
-      SNAPSHOTTER: ${{ matrix.snapshotter }}
-      PULL_TYPE: ${{ matrix.pull-type }}
+      SNAPSHOTTER: "nydus"
+      PULL_TYPE: "guest-pull"
      AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
      AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
-      ITA_KEY: ${{ secrets.ITA_KEY }}
+      GH_ITA_KEY: ${{ secrets.ITA_KEY }}
      AUTO_GENERATE_POLICY: "yes"
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -80,13 +80,9 @@ jobs:
        env:
          TARGET_BRANCH: ${{ inputs.target-branch }}

-      - name: Deploy Snapshotter
-        timeout-minutes: 5
-        run: bash tests/integration/kubernetes/gha-run.sh deploy-snapshotter
-
      - name: Deploy Kata
        timeout-minutes: 10
-        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-tdx
+        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata

      - name: Uninstall previous `kbs-client`
        timeout-minutes: 10
@@ -95,6 +91,8 @@ jobs:
      - name: Deploy CoCo KBS
        timeout-minutes: 10
        run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
+        env:
+          ITA_KEY: ${{ env.KATA_HYPERVISOR == 'qemu-tdx' && env.GH_ITA_KEY || '' }}

      - name: Install `kbs-client`
        timeout-minutes: 10
@@ -108,102 +106,19 @@ jobs:
        timeout-minutes: 100
        run: bash tests/integration/kubernetes/gha-run.sh run-tests

+      - name: Report tests
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh report-tests
+
      - name: Delete kata-deploy
        if: always()
-        run: bash tests/integration/kubernetes/gha-run.sh cleanup-tdx
-
-      - name: Delete Snapshotter
-        if: always()
-        run: bash tests/integration/kubernetes/gha-run.sh cleanup-snapshotter
+        run: bash tests/integration/kubernetes/gha-run.sh cleanup

      - name: Delete CoCo KBS
        if: always()
-        run: bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
-
-      - name: Delete CSI driver
-        timeout-minutes: 5
-        run: bash tests/integration/kubernetes/gha-run.sh delete-csi-driver
-
-  run-k8s-tests-sev-snp:
-    strategy:
-      fail-fast: false
-      matrix:
-        vmm:
-          - qemu-snp
-        snapshotter:
-          - nydus
-        pull-type:
-          - guest-pull
-    runs-on: sev-snp
-    env:
-      DOCKER_REGISTRY: ${{ inputs.registry }}
-      DOCKER_REPO: ${{ inputs.repo }}
-      DOCKER_TAG: ${{ inputs.tag }}
-      GH_PR_NUMBER: ${{ inputs.pr-number }}
-      KATA_HYPERVISOR: ${{ matrix.vmm }}
-      KUBECONFIG: /home/kata/.kube/config
-      KUBERNETES: "vanilla"
-      USING_NFD: "false"
-      KBS: "true"
-      KBS_INGRESS: "nodeport"
-      K8S_TEST_HOST_TYPE: "baremetal"
-      SNAPSHOTTER: ${{ matrix.snapshotter }}
-      PULL_TYPE: ${{ matrix.pull-type }}
-      AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
-      AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
-      AUTO_GENERATE_POLICY: "yes"
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          ref: ${{ inputs.commit-hash }}
-          fetch-depth: 0
-          persist-credentials: false
-
-      - name: Rebase atop of the latest target branch
        run: |
-          ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
-        env:
-          TARGET_BRANCH: ${{ inputs.target-branch }}
-
-      - name: Deploy Snapshotter
-        timeout-minutes: 5
-        run: bash tests/integration/kubernetes/gha-run.sh deploy-snapshotter
-
-      - name: Deploy Kata
-        timeout-minutes: 10
-        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-snp
-
-      - name: Uninstall previous `kbs-client`
-        timeout-minutes: 10
-        run: bash tests/integration/kubernetes/gha-run.sh uninstall-kbs-client
-
-      - name: Deploy CoCo KBS
-        timeout-minutes: 10
-        run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
-
-      - name: Install `kbs-client`
-        timeout-minutes: 10
-        run: bash tests/integration/kubernetes/gha-run.sh install-kbs-client
-
-      - name: Deploy CSI driver
-        timeout-minutes: 5
-        run: bash tests/integration/kubernetes/gha-run.sh deploy-csi-driver
-
-      - name: Run tests
-        timeout-minutes: 50
-        run: bash tests/integration/kubernetes/gha-run.sh run-tests
-
-      - name: Delete kata-deploy
-        if: always()
-        run: bash tests/integration/kubernetes/gha-run.sh cleanup-snp
-
-      - name: Delete Snapshotter
-        if: always()
-        run: bash tests/integration/kubernetes/gha-run.sh cleanup-snapshotter
-
-      - name: Delete CoCo KBS
-        if: always()
-        run: bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
+          [[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] && echo "ITA_KEY=${GH_ITA_KEY}" >> "${GITHUB_ENV}"
+          bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs

      - name: Delete CSI driver
        timeout-minutes: 5
@@ -211,6 +126,7 @@ jobs:

  # Generate jobs for testing CoCo on non-TEE environments
  run-k8s-tests-coco-nontee:
+    name: run-k8s-tests-coco-nontee
    strategy:
      fail-fast: false
      matrix:
@@ -220,6 +136,9 @@ jobs:
          - nydus
        pull-type:
          - guest-pull
+        include:
+          - pull-type: experimental-force-guest-pull
+            snapshotter: ""
    runs-on: ubuntu-22.04
    permissions:
      id-token: write # Used for OIDC access to log into Azure
@@ -245,7 +164,6 @@ jobs:
      # insufficient resources.
      K8S_TEST_HOST_TYPE: "all"
      USING_NFD: "false"
-      AUTO_GENERATE_POLICY: "yes"
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
@@ -295,13 +213,13 @@ jobs:
      - name: Download credentials for the Kubernetes CLI to use them
        run: bash tests/integration/kubernetes/gha-run.sh get-cluster-credentials

-      - name: Deploy Snapshotter
-        timeout-minutes: 5
-        run: bash tests/integration/kubernetes/gha-run.sh deploy-snapshotter
-
      - name: Deploy Kata
        timeout-minutes: 10
        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-aks
+        env:
+          EXPERIMENTAL_FORCE_GUEST_PULL: ${{ env.PULL_TYPE == 'experimental-force-guest-pull' && env.KATA_HYPERVISOR || '' }}
+          USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: ${{ env.SNAPSHOTTER == 'nydus' }}
+          AUTO_GENERATE_POLICY: ${{ env.PULL_TYPE == 'experimental-force-guest-pull' && 'no' || 'yes' }}

      - name: Deploy CoCo KBS
        timeout-minutes: 10
@@ -333,3 +251,95 @@ jobs:
      - name: Delete AKS cluster
        if: always()
        run: bash tests/integration/kubernetes/gha-run.sh delete-cluster
+
+  # Generate jobs for testing CoCo on non-TEE environments with erofs-snapshotter
+  run-k8s-tests-coco-nontee-with-erofs-snapshotter:
+    name: run-k8s-tests-coco-nontee-with-erofs-snapshotter
+    strategy:
+      fail-fast: false
+      matrix:
+        vmm:
+          - qemu-coco-dev
+        snapshotter:
+          - erofs
+        pull-type:
+          - default
+    runs-on: ubuntu-24.04
+    environment: ci
+    env:
+      DOCKER_REGISTRY: ${{ inputs.registry }}
+      DOCKER_REPO: ${{ inputs.repo }}
+      DOCKER_TAG: ${{ inputs.tag }}
+      GH_PR_NUMBER: ${{ inputs.pr-number }}
+      KATA_HYPERVISOR: ${{ matrix.vmm }}
+      # Some tests rely on that variable to run (or not)
+      KBS: "false"
+      # Set the KBS ingress handler (empty string disables handling)
+      KBS_INGRESS: ""
+      KUBERNETES: "vanilla"
+      CONTAINER_ENGINE: "containerd"
+      CONTAINER_ENGINE_VERSION: "v2.2"
+      PULL_TYPE: ${{ matrix.pull-type }}
+      SNAPSHOTTER: ${{ matrix.snapshotter }}
+      USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: "true"
+      K8S_TEST_HOST_TYPE: "all"
+      USING_NFD: "false"
+      # We are skipping the auto generated policy tests for now,
+      # but those should be enabled as soon as we work on that.
+      AUTO_GENERATE_POLICY: "no"
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          ref: ${{ inputs.commit-hash }}
+          fetch-depth: 0
+          persist-credentials: false
+
+      - name: Rebase atop of the latest target branch
+        run: |
+          ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
+        env:
+          TARGET_BRANCH: ${{ inputs.target-branch }}
+
+      - name: Remove unnecessary directories to free up space
+        run: |
+          sudo rm -rf /usr/local/.ghcup
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /usr/local/share/boost
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+          sudo rm -rf /usr/lib/jvm
+          sudo rm -rf /usr/share/swift
+          sudo rm -rf /usr/local/share/powershell
+          sudo rm -rf /usr/local/julia*
+          sudo rm -rf /opt/az
+          sudo rm -rf /usr/local/share/chromium
+          sudo rm -rf /opt/microsoft
+          sudo rm -rf /opt/google
+          sudo rm -rf /usr/lib/firefox
+
+      - name: Deploy kubernetes
+        timeout-minutes: 15
+        run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s
+        env:
+          GH_TOKEN: ${{ github.token }}
+
+      - name: Install `bats`
+        run: bash tests/integration/kubernetes/gha-run.sh install-bats
+
+      - name: Deploy Kata
+        timeout-minutes: 10
+        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
+
+      - name: Deploy CSI driver
+        timeout-minutes: 5
+        run: bash tests/integration/kubernetes/gha-run.sh deploy-csi-driver
+
+      - name: Run tests
+        timeout-minutes: 80
+        run: bash tests/integration/kubernetes/gha-run.sh run-tests
+
+      - name: Report tests
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh report-tests
--- a/.github/workflows/run-kata-deploy-tests-on-aks.yaml
+++ b/.github/workflows/run-kata-deploy-tests-on-aks.yaml
@@ -33,6 +33,7 @@ permissions: {}

 jobs:
  run-kata-deploy-tests:
+    name: run-kata-deploy-tests
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/run-kata-deploy-tests.yaml
+++ b/.github/workflows/run-kata-deploy-tests.yaml
@@ -26,6 +26,7 @@ permissions: {}

 jobs:
  run-kata-deploy-tests:
+    name: run-kata-deploy-tests
    strategy:
      fail-fast: false
      matrix:
@@ -58,6 +59,25 @@ jobs:
        env:
          TARGET_BRANCH: ${{ inputs.target-branch }}

+      - name: Remove unnecessary directories to free up space
+        run: |
+          sudo rm -rf /usr/local/.ghcup
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /usr/local/share/boost
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+          sudo rm -rf /usr/lib/jvm
+          sudo rm -rf /usr/share/swift
+          sudo rm -rf /usr/local/share/powershell
+          sudo rm -rf /usr/local/julia*
+          sudo rm -rf /opt/az
+          sudo rm -rf /usr/local/share/chromium
+          sudo rm -rf /opt/microsoft
+          sudo rm -rf /opt/google
+          sudo rm -rf /usr/lib/firefox
+
      - name: Deploy ${{ matrix.k8s }}
        run:  bash tests/functional/kata-deploy/gha-run.sh deploy-k8s

--- a/.github/workflows/run-kata-monitor-tests.yaml
+++ b/.github/workflows/run-kata-monitor-tests.yaml
@@ -17,6 +17,7 @@ permissions: {}

 jobs:
  run-monitor:
+    name: run-monitor
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/run-metrics.yaml
+++ b/.github/workflows/run-metrics.yaml
@@ -26,6 +26,7 @@ permissions: {}

 jobs:
  run-metrics:
+    name: run-metrics
    strategy:
      # We can set this to true whenever we're 100% sure that
      # the all the tests are not flaky, otherwise we'll fail
--- a/.github/workflows/run-runk-tests.yaml
+++ b/.github/workflows/run-runk-tests.yaml
@@ -17,6 +17,7 @@ permissions: {}

 jobs:
  run-runk:
+    name: run-runk
    # Skip runk tests as we have no maintainers. TODO: Decide when to remove altogether
    if: false
    runs-on: ubuntu-22.04
--- a/.github/workflows/shellcheck.yaml
+++ b/.github/workflows/shellcheck.yaml
@@ -18,6 +18,7 @@ concurrency:

 jobs:
  shellcheck:
+    name: shellcheck
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout the code
--- a/.github/workflows/shellcheck_required.yaml
+++ b/.github/workflows/shellcheck_required.yaml
@@ -19,6 +19,7 @@ concurrency:

 jobs:
  shellcheck-required:
+    name: shellcheck-required
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout the code
--- a/.github/workflows/stale.yaml
+++ b/.github/workflows/stale.yaml
@@ -8,6 +8,7 @@ permissions: {}

 jobs:
  stale:
+    name: stale
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
--- a/.github/workflows/static-checks-self-hosted.yaml
+++ b/.github/workflows/static-checks-self-hosted.yaml
@@ -29,8 +29,8 @@ jobs:
      matrix:
        instance:
          - "ubuntu-22.04-arm"
-          - "s390x"
-          - "ppc64le"
+          - "ubuntu-24.04-s390x"
+          - "ubuntu-24.04-ppc64le"
    uses: ./.github/workflows/build-checks.yaml
    with:
      instance: ${{ matrix.instance }}
--- a/.github/workflows/static-checks.yaml
+++ b/.github/workflows/static-checks.yaml
@@ -22,6 +22,7 @@ jobs:
      target-branch: ${{ github.event.pull_request.base.ref }}

  check-kernel-config-version:
+    name: check-kernel-config-version
    needs: skipper
    if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
    runs-on: ubuntu-22.04
@@ -54,6 +55,7 @@ jobs:
      instance: ubuntu-22.04

  build-checks-depending-on-kvm:
+    name: build-checks-depending-on-kvm
    runs-on: ubuntu-22.04
    needs: skipper
    if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
@@ -88,13 +90,16 @@ jobs:
      - name: Running `${{ matrix.command }}` for ${{ matrix.component }}
        run: |
          export PATH="$PATH:${HOME}/.cargo/bin"
-          cd ${{ matrix.component-path }}
-          ${{ matrix.command }}
+          cd "${COMPONENT_PATH}"
+          eval "${COMMAND}"
        env:
+          COMMAND: ${{ matrix.command }}
+          COMPONENT_PATH: ${{ matrix.component-path }}
          RUST_BACKTRACE: "1"
          RUST_LIB_BACKTRACE: "0"

  static-checks:
+    name: static-checks
    runs-on: ubuntu-22.04
    needs: skipper
    if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
@@ -117,13 +122,13 @@ jobs:
          path: ./src/github.com/${{ github.repository }}
      - name: Install yq
        run: |
-          cd "${GOPATH}/src/github.com/${{ github.repository }}"
+          cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"
          ./ci/install_yq.sh
        env:
          INSTALL_IN_GOPATH: false
      - name: Install golang
        run: |
-          cd "${GOPATH}/src/github.com/${{ github.repository }}"
+          cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"
          ./tests/install_go.sh -f -p
          echo "/usr/local/go/bin" >> "$GITHUB_PATH"
      - name: Install system dependencies
@@ -131,7 +136,7 @@ jobs:
          sudo apt-get update && sudo apt-get -y install moreutils hunspell hunspell-en-gb hunspell-en-us pandoc
      - name: Install open-policy-agent
        run: |
-          cd "${GOPATH}/src/github.com/${{ github.repository }}"
+          cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"
          ./tests/install_opa.sh
      - name: Install regorus
        env:
@@ -139,11 +144,13 @@ jobs:
          ARTEFACT_REGISTRY_USERNAME: "${{ github.actor }}"
          ARTEFACT_REGISTRY_PASSWORD: "${{ secrets.GITHUB_TOKEN }}"
        run: |
-          "${GOPATH}/src/github.com/${{ github.repository }}/tests/install_regorus.sh"
+          "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}/tests/install_regorus.sh"
      - name: Run check
+        env:
+          CMD: ${{ matrix.cmd }}
        run: |
          export PATH="${PATH}:${GOPATH}/bin"
-          cd "${GOPATH}/src/github.com/${{ github.repository }}" && ${{ matrix.cmd }}
+          cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}" && ${CMD}

  govulncheck:
    needs: skipper
@@ -151,6 +158,7 @@ jobs:
    uses: ./.github/workflows/govulncheck.yaml

  codegen:
+    name: codegen
    runs-on: ubuntu-22.04
    needs: skipper
    if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
--- a/.github/workflows/zizmor.yaml
+++ b/.github/workflows/zizmor.yaml
@@ -1,8 +1,6 @@
 name: GHA security analysis

 on:
-  push:
-    branches: ["main"]
  pull_request:

 permissions: {}
@@ -13,10 +11,8 @@ concurrency:

 jobs:
  zizmor:
+    name: zizmor
    runs-on: ubuntu-22.04
-    permissions:
-      contents: read
-      security-events: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -25,6 +21,9 @@ jobs:
          persist-credentials: false

      - name: Run zizmor
-        uses: zizmorcore/zizmor-action@f52a838cfabf134edcbaa7c8b3677dde20045018 # v0.1.1
+        uses: zizmorcore/zizmor-action@e673c3917a1aef3c65c972347ed84ccd013ecda4 # v0.2.0
        with:
+          advanced-security: false
+          annotations: true
          persona: auditor
+          version: v1.13.0
--- a/.github/zizmor.yml
+++ b/.github/zizmor.yml
@@ -0,0 +1,3 @@
+rules:
+  undocumented-permissions:
+    disable: true
--- a/README.md
+++ b/README.md
@@ -1,3 +1,5 @@
+foo
+
 <img src="https://object-storage-ca-ymq-1.vexxhost.net/swift/v1/6e4619c416ff4bd19e1c087f27a43eea/www-images-prod/openstack-logo/kata/SVG/kata-1.svg" width="900">

 [![CI | Publish Kata Containers payload](https://github.com/kata-containers/kata-containers/actions/workflows/payload-after-push.yaml/badge.svg)](https://github.com/kata-containers/kata-containers/actions/workflows/payload-after-push.yaml) [![Kata Containers Nightly CI](https://github.com/kata-containers/kata-containers/actions/workflows/ci-nightly.yaml/badge.svg)](https://github.com/kata-containers/kata-containers/actions/workflows/ci-nightly.yaml)
--- a/2
+++ b/2
@@ -1 +1 @@
-3.20.0
+3.21.0
--- a/ci/darwin-test.sh
+++ b/ci/darwin-test.sh
@@ -8,6 +8,7 @@ set -e

 cidir=$(dirname "$0")
 runtimedir=${cidir}/../src/runtime
+genpolicydir=${cidir}/../src/tools/genpolicy

 build_working_packages() {
 	# working packages:
@@ -40,3 +41,11 @@ build_working_packages() {
 }

 build_working_packages
+
+build_genpolicy() {
+	echo "building genpolicy"
+	pushd "${genpolicydir}" &>/dev/null
+	make TRIPLE=aarch64-apple-darwin build
+}
+
+build_genpolicy
--- a/ci/openshift-ci/cluster/install_kata.sh
+++ b/ci/openshift-ci/cluster/install_kata.sh
@@ -43,19 +43,18 @@ WORKAROUND_9206_CRIO=${WORKAROUND_9206_CRIO:-no}
 # Leverage kata-deploy to install Kata Containers in the cluster.
 #
 apply_kata_deploy() {
-	local deploy_file="tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
-	pushd "${katacontainers_repo_dir}" || die
-	sed -ri "s#(\s+image:) .*#\1 ${KATA_DEPLOY_IMAGE}#" "${deploy_file}"
+	if ! command -v helm &>/dev/null; then
+		echo "Helm not installed, installing..."
+		curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
+	fi

-	info "Applying kata-deploy"
-	oc apply -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
 	oc label --overwrite ns kube-system pod-security.kubernetes.io/enforce=privileged pod-security.kubernetes.io/warn=baseline pod-security.kubernetes.io/audit=baseline
-	oc apply -f "${deploy_file}"
-	oc -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod
+	local version chart
+	version=$(curl -sSL https://api.github.com/repos/kata-containers/kata-containers/releases/latest | jq .tag_name | tr -d '"')
+	chart="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"

-	info "Adding the kata runtime classes"
-	oc apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
-	popd || die
+	echo "Installing kata using helm ${chart} ${version}"
+	helm install kata-deploy --wait --namespace kube-system --set "image.reference=${KATA_DEPLOY_IMAGE%%:*},image.tag=${KATA_DEPLOY_IMAGE##*:}" "${chart}" --version "${version}"
 }


@@ -174,13 +173,13 @@ wait_for_app_pods_message() {
 	local namespace="$5"
 	[[ -z "${pod_count}" ]] && pod_count=1
 	[[ -z "${timeout}" ]] && timeout=60
-	[[ -n "${namespace}" ]] && namespace=" -n ${namespace} "
+	[[ -n "${namespace}" ]] && namespace=("-n" "${namespace}")
 	local pod
 	local pods
 	local i
 	SECONDS=0
 	while :; do
-		mapfile -t pods < <(oc get pods -l app="${app}" --no-headers=true "${namespace}" | awk '{print $1}')
+		mapfile -t pods < <(oc get pods -l app="${app}" --no-headers=true "${namespace[@]}" | awk '{print $1}')
 		[[ "${#pods}" -ge "${pod_count}" ]] && break
 		if [[ "${SECONDS}" -gt "${timeout}" ]]; then
 			printf "Unable to find ${pod_count} pods for '-l app=\"${app}\"' in ${SECONDS}s (%s)" "${pods[@]}"
@@ -190,7 +189,7 @@ wait_for_app_pods_message() {
 	local log
 	for pod in "${pods[@]}"; do
 		while :; do
-			log=$(oc logs "${namespace}" "${pod}")
+			log=$(oc logs "${namespace[@]}" "${pod}")
 			echo "${log}" | grep "${message}" -q && echo "Found $(echo "${log}" | grep "${message}") in ${pod}'s log (${SECONDS})" && break;
 			if [[ "${SECONDS}" -gt "${timeout}" ]]; then
 				echo -n "Message '${message}' not present in '${pod}' pod of the '-l app=\"${app}\"' "
--- a/ci/openshift-ci/peer-pods-azure.sh
+++ b/ci/openshift-ci/peer-pods-azure.sh
@@ -12,6 +12,33 @@

 SCRIPT_DIR=$(dirname "$0")

+##################
+# Helper functions
+##################
+
+# Sparse "git clone" supporting old git version
+# $1  - origin
+# $2  - revision
+# $3- - sparse checkout paths
+# Note: uses pushd to change into the clonned directory!
+git_sparse_clone() {
+  local origin="$1"
+  local revision="$2"
+  shift 2
+  local sparse_paths=("$@")
+
+  local repo
+  repo=$(basename -s .git "${origin}")
+
+  git init "${repo}"
+  pushd "${repo}" || exit 1
+  git remote add origin "${origin}"
+  git fetch --depth 1 origin "${revision}"
+  git sparse-checkout init --cone
+  git sparse-checkout set "${sparse_paths[@]}"
+  git checkout FETCH_HEAD
+}
+
 ###############################
 # Disable security to allow e2e
 ###############################
@@ -116,33 +143,40 @@ az network vnet subnet update \
 for NODE_NAME in $(kubectl get nodes -o jsonpath='{.items[*].metadata.name}'); do [[ "${NODE_NAME}" =~ 'worker' ]] && kubectl label node "${NODE_NAME}" node.kubernetes.io/worker=; done

 # CAA artifacts
-CAA_IMAGE="quay.io/confidential-containers/cloud-api-adaptor"
-TAGS="$(curl https://quay.io/api/v1/repository/confidential-containers/cloud-api-adaptor/tag/?onlyActiveTags=true)"
-DIGEST=$(echo "${TAGS}" | jq -r '.tags[] | select(.name | contains("latest-amd64")) | .manifest_digest')
-CAA_TAG="$(echo "${TAGS}" | jq -r '.tags[] | select(.manifest_digest | contains("'"${DIGEST}"'")) | .name' | grep -v "latest")"
+if [[ -z "${CAA_TAG}" ]]; then
+	if [[ -n "${CAA_IMAGE}" ]]; then
+		echo "CAA_IMAGE (${CAA_IMAGE}) is set but CAA_TAG isn't, which is not supported. Please specify both or none"
+		exit 1
+	fi
+	TAGS="$(curl https://quay.io/api/v1/repository/confidential-containers/cloud-api-adaptor/tag/?onlyActiveTags=true)"
+	DIGEST=$(echo "${TAGS}" | jq -r '.tags[] | select(.name | contains("latest-amd64")) | .manifest_digest')
+	CAA_TAG="$(echo "${TAGS}" | jq -r '.tags[] | select(.manifest_digest | contains("'"${DIGEST}"'")) | .name' | grep -v "latest")"
+fi
+if [[ -z "${CAA_IMAGE}" ]]; then
+	CAA_IMAGE="quay.io/confidential-containers/cloud-api-adaptor"
+fi

 # Get latest PP image
-SUCCESS_TIME=$(curl -s \
-  -H "Accept: application/vnd.github+json" \
-  "https://api.github.com/repos/confidential-containers/cloud-api-adaptor/actions/workflows/azure-nightly-build.yml/runs?status=success" \
-  | jq -r '.workflow_runs[0].updated_at')
-PP_IMAGE_ID="/CommunityGalleries/cocopodvm-d0e4f35f-5530-4b9c-8596-112487cdea85/Images/podvm_image0/Versions/$(date -u -jf "%Y-%m-%dT%H:%M:%SZ" "${SUCCESS_TIME}" "+%Y.%m.%d" 2>/dev/null || date -d "${SUCCESS_TIME}" +%Y.%m.%d)"
+if [[ -z "${PP_IMAGE_ID}" ]]; then
+	SUCCESS_TIME=$(curl -s \
+	  -H "Accept: application/vnd.github+json" \
+	  "https://api.github.com/repos/confidential-containers/cloud-api-adaptor/actions/workflows/azure-nightly-build.yml/runs?status=success" \
+	  | jq -r '.workflow_runs[0].updated_at')
+	PP_IMAGE_ID="/CommunityGalleries/cocopodvm-d0e4f35f-5530-4b9c-8596-112487cdea85/Images/podvm_image0/Versions/$(date -u -jf "%Y-%m-%dT%H:%M:%SZ" "${SUCCESS_TIME}" "+%Y.%m.%d" 2>/dev/null || date -d "${SUCCESS_TIME}" +%Y.%m.%d)"
+fi

-echo "AZURE_REGION: \"${AZURE_REGION}\""
-echo "PP_REGION: \"${PP_REGION}\""
-echo "AZURE_RESOURCE_GROUP: \"${AZURE_RESOURCE_GROUP}\""
-echo "PP_RESOURCE_GROUP: \"${PP_RESOURCE_GROUP}\""
-echo "PP_SUBNET_ID: \"${PP_SUBNET_ID}\""
-echo "CAA_TAG: \"${CAA_TAG}\""
-echo "PP_IMAGE_ID: \"${PP_IMAGE_ID}\""
+echo "AZURE_REGION=\"${AZURE_REGION}\""
+echo "PP_REGION=\"${PP_REGION}\""
+echo "AZURE_RESOURCE_GROUP=\"${AZURE_RESOURCE_GROUP}\""
+echo "PP_RESOURCE_GROUP=\"${PP_RESOURCE_GROUP}\""
+echo "PP_SUBNET_ID=\"${PP_SUBNET_ID}\""
+echo "CAA_IMAGE=\"${CAA_IMAGE}\""
+echo "CAA_TAG=\"${CAA_TAG}\""
+echo "PP_IMAGE_ID=\"${PP_IMAGE_ID}\""

 # Clone and configure caa
-git clone --depth 1 --no-checkout https://github.com/confidential-containers/cloud-api-adaptor.git
-pushd cloud-api-adaptor
-git sparse-checkout init --cone
-git sparse-checkout set src/cloud-api-adaptor/install/
-git checkout
-echo "CAA_GIT_SHA: \"$(git rev-parse HEAD)\""
+git_sparse_clone "https://github.com/confidential-containers/cloud-api-adaptor.git" "${CAA_GIT_SHA:-main}" "src/cloud-api-adaptor/install/"
+echo "CAA_GIT_SHA=\"$(git rev-parse HEAD)\""
 pushd src/cloud-api-adaptor
 cat <<EOF > install/overlays/azure/workload-identity.yaml
 apiVersion: apps/v1
@@ -208,12 +242,8 @@ echo "AZURE_CLIENT_SECRET=${AZURE_CLIENT_SECRET}" >> install/overlays/azure/serv
 echo "AZURE_TENANT_ID=${AZURE_TENANT_ID}" >> install/overlays/azure/service-principal.env

 # Deploy Operator
-git clone --depth 1 --no-checkout https://github.com/confidential-containers/operator
-pushd operator
-git sparse-checkout init --cone
-git sparse-checkout set "config/"
-git checkout
-echo "OPERATOR_SHA: \"$(git rev-parse HEAD)\""
+git_sparse_clone "https://github.com/confidential-containers/operator" "${OPERATOR_SHA:-main}" "config/"
+echo "OPERATOR_SHA=\"$(git rev-parse HEAD)\""
 oc apply -k "config/release"
 oc apply -k "config/samples/ccruntime/peer-pods"
 popd
@@ -227,7 +257,7 @@ popd
 SECONDS=0
 ( while [[ "${SECONDS}" -lt 360 ]]; do
    kubectl get runtimeclass | grep -q kata-remote && exit 0
-done; exit 1 ) || { echo "kata-remote runtimeclass not initialized in 60s"; kubectl -n confidential-containers-system get all; echo; echo CAA; kubectl -n confidential-containers-system logs daemonset.apps/cloud-api-adaptor-daemonset; echo pre-install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-pre-install-daemon; echo install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-daemon-install; exit 1; }
+done; exit 1 ) || { echo "kata-remote runtimeclass not initialized in 60s"; kubectl -n confidential-containers-system get all; echo; echo "kubectl -n confidential-containers-system describe all"; kubectl -n confidential-containers-system describe all; echo; echo CAA; kubectl -n confidential-containers-system logs daemonset.apps/cloud-api-adaptor-daemonset; echo pre-install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-pre-install-daemon; echo install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-daemon-install; exit 1; }


 ################
--- a/docs/Developer-Guide.md
+++ b/docs/Developer-Guide.md
@@ -450,7 +450,7 @@ You can build and install the guest kernel image as shown [here](../tools/packag
 # Install a hypervisor

 When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the
-`QEMU` VMM is installed automatically. Cloud-Hypervisor, Firecracker and StratoVirt VMMs are available from the [release tarballs](https://github.com/kata-containers/kata-containers/releases), as well as through [`kata-deploy`](../tools/packaging/kata-deploy/README.md).
+`QEMU` VMM is installed automatically. Cloud-Hypervisor, Firecracker and StratoVirt VMMs are available from the [release tarballs](https://github.com/kata-containers/kata-containers/releases), as well as through [`kata-deploy`](../tools/packaging/kata-deploy/helm-chart/README.md).
 You may choose to manually build your VMM/hypervisor.

 ## Build a custom QEMU
--- a/docs/Limitations.md
+++ b/docs/Limitations.md
@@ -166,19 +166,26 @@ moment.
 See [this issue](https://github.com/kata-containers/runtime/issues/2812) for more details.
 [Another issue](https://github.com/kata-containers/kata-containers/issues/1728) focuses on the case of `emptyDir`.

+### Kubernetes [hostPath][k8s-hostpath] volumes
+
+When the source path of a hostPath volume is under `/dev`, and the path
+either corresponds to a host device or is not accessible by the Kata
+shim, the Kata agent bind mounts the source path directly from the
+*guest* filesystem into the container.
+
+[k8s-hostpath]: https://kubernetes.io/docs/concepts/storage/volumes/#hostpath
+
 ## Host resource sharing

 ### Privileged containers

 Privileged support in Kata is essentially different from `runc` containers.
-The container runs with elevated capabilities within the guest and is granted
-access to guest devices instead of the host devices.
+The container runs with elevated capabilities within the guest.
 This is also true with using `securityContext privileged=true` with Kubernetes.

-The container may also be granted full access to a subset of host devices
-(https://github.com/kata-containers/runtime/issues/1568).
-
-See [Privileged Kata Containers](how-to/privileged.md) for how to configure some of this behavior.
+Importantly, the default behavior to pass the host devices to a
+privileged container is not supported in Kata Containers and needs to be
+disabled, see [Privileged Kata Containers](how-to/privileged.md).

 # Appendices

--- a/docs/how-to/README.md
+++ b/docs/how-to/README.md
@@ -48,3 +48,4 @@
 - [How to use the Kata Agent Policy](how-to-use-the-kata-agent-policy.md)
 - [How to pull images in the guest](how-to-pull-images-in-guest-with-kata.md)
 - [How to use mem-agent to decrease the memory usage of Kata container](how-to-use-memory-agent.md)
+- [How to use seccomp with runtime-rs](how-to-use-seccomp-with-runtime-rs.md)
--- a/docs/how-to/how-to-run-kata-containers-with-SE-VMs.md
+++ b/docs/how-to/how-to-run-kata-containers-with-SE-VMs.md
@@ -318,7 +318,7 @@ Finally, an operational kata container with IBM Secure Execution is now running.

 It is reasonable to expect that the manual steps mentioned above can be easily executed.
 Typically, you can use
-[kata-deploy](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/README.md)
+[kata-deploy](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/README.md)
 to install Kata Containers on a Kubernetes cluster. However, when leveraging IBM Secure Execution,
 you need to employ the confidential container's
 [operator](https://github.com/confidential-containers/operator).
--- a/docs/how-to/how-to-use-seccomp-with-runtime-rs.md
+++ b/docs/how-to/how-to-use-seccomp-with-runtime-rs.md
@@ -0,0 +1,44 @@
+## Introduction
+
+To enhance security, Kata Containers supports using seccomp to restrict the hypervisor's system calls. Previously, this was only supported for a subset of hypervisors in runtime-go. Now, the runtime-rs also supports seccomp. This document describes how to enable/disable the seccomp feature for the corresponding hypervisor in runtime-rs.
+
+## Pre-requisites
+
+1. Ensure your system's kernel supports **seccomp**.
+2. Confirm that each of the following virtual machines can run correctly on your system.
+
+## Configure seccomp
+
+With the exception of `qemu`, seccomp is enabled by default for all other supported hypervisors. Their corresponding built-in functionalities are also enabled by default.
+
+### QEMU
+
+As with runtime-go, you need to modify the following in your **configuration file**. These parameters will be passed directly to the `qemu` startup command line. For more details on the parameters, you can refer to: [https://www.qemu.org/docs/master/system/qemu-manpage.html](https://www.qemu.org/docs/master/system/qemu-manpage.html)
+
+``` toml
+# Qemu seccomp sandbox feature
+# comma-separated list of seccomp sandbox features to control the syscall access.
+# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"`
+# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
+# Another note: enabling this feature may reduce performance, you may enable
+# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
+seccompsandbox="on,obsolete=deny,spawn=deny,resourcecontrol=deny"
+```
+### Cloud Hypervisor, Firecracker and Dragonball
+
+The **seccomp** functionality is enabled by default for the following three hypervisors: `cloud hypervisor`, `firecracker`, and `dragonball`.
+
+The seccomp rules for `cloud hypervisor` and `firecracker` are built directly into their executable files. For `dragonball`, the relevant configuration is currently located at `src/runtime-rs/crates/hypervisor/src/dragonball/seccomp.rs`.
+
+To disable this functionality for these hypervisors, you can modify the following configuration options in your **configuration file**.
+
+``` toml
+# Disable the 'seccomp' feature from Cloud Hypervisor, firecracker or dragonball, default false
+disable_seccomp = true
+```
+
+## Implementation details
+
+For `qemu`, `cloud hypervisor`, and `firecracker`, their **seccomp** functionality is built into the respective executable files you are using. **runtime-rs** simply provides command-line arguments for their launch based on the configuration file.
+
+For `dragonball`, a set of allowed system calls is currently provided for the entire **runtime-rs** process, and the process is prevented from using any system calls outside of this whitelist. As mentioned above, this set is located at `src/runtime-rs/crates/hypervisor/src/dragonball/seccomp.rs`.
--- a/docs/how-to/how-to-use-the-kata-agent-policy.md
+++ b/docs/how-to/how-to-use-the-kata-agent-policy.md
@@ -32,11 +32,24 @@ Kubernetes users can encode in `base64` format their Policy documents, and add t

 ### Encode a Policy file

-For example, the [`allow-all-except-exec-process.rego`](../../src/kata-opa/allow-all-except-exec-process.rego) sample policy file is different from the [default Policy](../../src/kata-opa/allow-all.rego) because it rejects any `ExecProcess` requests. You can encode this policy file:
+For example, the [`allow-all-except-exec-process.rego`](../../src/kata-opa/allow-all-except-exec-process.rego) sample policy file is different from the [default Policy](../../src/kata-opa/allow-all.rego) because it rejects any `ExecProcess` requests. To encode this policy file, you need to:
+- Embed the policy inside an init data struct
+- Compress
+- Base64 encode 
+For example:

 ```bash
-$ base64 -w 0 allow-all-except-exec-process.rego
-cGFja2FnZSBhZ2VudF9wb2xpY3kKCmRlZmF1bHQgQWRkQVJQTmVpZ2hib3JzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgQWRkU3dhcFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENsb3NlU3RkaW5SZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBDb3B5RmlsZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZUNvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZVNhbmRib3hSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBEZXN0cm95U2FuZGJveFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEdldE1ldHJpY3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBHZXRPT01FdmVudFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEd1ZXN0RGV0YWlsc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IExpc3RJbnRlcmZhY2VzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgTGlzdFJvdXRlc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IE1lbUhvdHBsdWdCeVByb2JlUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgT25saW5lQ1BVTWVtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUGF1c2VDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBQdWxsSW1hZ2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBSZWFkU3RyZWFtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlU3RhbGVWaXJ0aW9mc1NoYXJlTW91bnRzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzZWVkUmFuZG9tRGV2UmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzdW1lQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU2V0R3Vlc3REYXRlVGltZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNldFBvbGljeVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNpZ25hbFByb2Nlc3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBTdGFydENvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXJ0VHJhY2luZ1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXRzQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU3RvcFRyYWNpbmdSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBUdHlXaW5SZXNpemVSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVFcGhlbWVyYWxNb3VudHNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVJbnRlcmZhY2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVSb3V0ZXNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBXYWl0UHJvY2Vzc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFdyaXRlU3RyZWFtUmVxdWVzdCA6PSB0cnVlCgpkZWZhdWx0IEV4ZWNQcm9jZXNzUmVxdWVzdCA6PSBmYWxzZQo=
+$ STRING="$(< allow-all-except-exec-process.rego)"
+$ cat <<EOF | gzip -c | base64 -w0
+version = "0.1.0"
+algorithm = "sha256"
+
+[data]
+"policy.rego" = '''
+$STRING
+'''
+EOF
+H4sIAAAAAAAAA42UTW/TQBCG7/4Vq/QQOCQKQXCo1ENIAkRqiGWnpBJCaGKP7RXrXTM7DnV/PRMiVUh07R582J3H8/XO7AnJa2fVjRrNpm+ms1EEpnSkuarPd76C+bv3oyj6lgPD92jUOKOzbkpYupEA4/E4ulJL13Sky4rVq+y1ms/mb9VWZ+S8K1iM1DgClijRlcBpvLqf3OoMrcfJJkfLutBI12rRQFbhZD6dCRfJ4SeUqOSz/OMSNopyLKA1rBZ5vkjiLyhBj458gr9a9KyubxRTi/9i6W9oQualcR5TzrUNElLZR20waCcExqWzDNoi9WMp2PzoHkLQSi7JdQPUJ+QtMuksWLQQu912fZK+BZHz7QolaRN0c6s9bywjFZBhL5W4lsPEFuvPjhvTlh+6mNwx2MudNdLDZXwnf4SYGFo/3O64NWZTy+SEgAQhT1lECQZKsHan4UgXLGUw+FWTzHjh0woIt661HGxJgh4xT0RoV6/w1IO19XAOKfJFTxmxva6DRQsX/12jIKBLC0Y0Er2DuUutxMM5nak9QaZt2cOwf4En1ww42nN3OK+w14/B4u+a/CWLesHWTYU1Eph+GS/w0470Y/1LcgDNA40/yKOMzw/tE7N+wOx/NwUYj9H5qf4DsX93tO4FAAA=
 ```

 ### Attach the Policy to a pod
@@ -49,7 +62,7 @@ kind: Pod
 metadata:
  name: policy-exec-rejected
  annotations:
-    io.katacontainers.config.agent.policy: cGFja2FnZSBhZ2VudF9wb2xpY3kKCmRlZmF1bHQgQWRkQVJQTmVpZ2hib3JzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgQWRkU3dhcFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENsb3NlU3RkaW5SZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBDb3B5RmlsZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZUNvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZVNhbmRib3hSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBEZXN0cm95U2FuZGJveFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEdldE1ldHJpY3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBHZXRPT01FdmVudFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEd1ZXN0RGV0YWlsc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IExpc3RJbnRlcmZhY2VzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgTGlzdFJvdXRlc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IE1lbUhvdHBsdWdCeVByb2JlUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgT25saW5lQ1BVTWVtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUGF1c2VDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBQdWxsSW1hZ2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBSZWFkU3RyZWFtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlU3RhbGVWaXJ0aW9mc1NoYXJlTW91bnRzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzZWVkUmFuZG9tRGV2UmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzdW1lQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU2V0R3Vlc3REYXRlVGltZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNldFBvbGljeVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNpZ25hbFByb2Nlc3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBTdGFydENvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXJ0VHJhY2luZ1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXRzQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU3RvcFRyYWNpbmdSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBUdHlXaW5SZXNpemVSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVFcGhlbWVyYWxNb3VudHNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVJbnRlcmZhY2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVSb3V0ZXNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBXYWl0UHJvY2Vzc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFdyaXRlU3RyZWFtUmVxdWVzdCA6PSB0cnVlCgpkZWZhdWx0IEV4ZWNQcm9jZXNzUmVxdWVzdCA6PSBmYWxzZQo=
+    io.katacontainers.config.hypervisor.cc_init_data: H4sIAAAAAAAAA42UTW/TQBCG7/4Vq/QQOCQKQXCo1ENIAkRqiGWnpBJCaGKP7RXrXTM7DnV/PRMiVUh07R582J3H8/XO7AnJa2fVjRrNpm+ms1EEpnSkuarPd76C+bv3oyj6lgPD92jUOKOzbkpYupEA4/E4ulJL13Sky4rVq+y1ms/mb9VWZ+S8K1iM1DgClijRlcBpvLqf3OoMrcfJJkfLutBI12rRQFbhZD6dCRfJ4SeUqOSz/OMSNopyLKA1rBZ5vkjiLyhBj458gr9a9KyubxRTi/9i6W9oQualcR5TzrUNElLZR20waCcExqWzDNoi9WMp2PzoHkLQSi7JdQPUJ+QtMuksWLQQu912fZK+BZHz7QolaRN0c6s9bywjFZBhL5W4lsPEFuvPjhvTlh+6mNwx2MudNdLDZXwnf4SYGFo/3O64NWZTy+SEgAQhT1lECQZKsHan4UgXLGUw+FWTzHjh0woIt661HGxJgh4xT0RoV6/w1IO19XAOKfJFTxmxva6DRQsX/12jIKBLC0Y0Er2DuUutxMM5nak9QaZt2cOwf4En1ww42nN3OK+w14/B4u+a/CWLesHWTYU1Eph+GS/w0470Y/1LcgDNA40/yKOMzw/tE7N+wOx/NwUYj9H5qf4DsX93tO4FAAA=
 spec:
  runtimeClassName: kata
  containers:
@@ -66,7 +79,7 @@ Create the pod:
 $ kubectl apply -f pod1.yaml
 ```

-While creating the Pod sandbox, the Kata Shim will notice the `io.katacontainers.config.agent.policy` annotation and will send the Policy document to the Kata Agent - by sending a `SetPolicy` request. Note that this request will fail if the default Policy, included in the Guest image, doesn't allow this `SetPolicy` request. If the `SetPolicy` request is rejected by the Guest, the Kata Shim will fail to start the Pod sandbox.
+While creating the Pod sandbox, the Kata Shim will notice the `io.katacontainers.config.hypervisor.cc_init_data` annotation and will create the init data device on the host and mount it on the guest as a block device. The agent then reads the init data struct from this device and sets the policy if present.

 # How is the Policy being enforced?

--- a/docs/how-to/how-to-use-virtio-fs-with-kata.md
+++ b/docs/how-to/how-to-use-virtio-fs-with-kata.md
@@ -6,4 +6,4 @@ Container deployments utilize explicit or implicit file sharing between host fil

 As of the 2.0 release of Kata Containers, [virtio-fs](https://virtio-fs.gitlab.io/) is the default filesystem sharing mechanism.

-virtio-fs support works out of the box for `cloud-hypervisor` and `qemu`, when Kata Containers is deployed using `kata-deploy`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](../../tools/packaging/kata-deploy/README.md#kubernetes-quick-start).
+virtio-fs support works out of the box for `cloud-hypervisor` and `qemu`, when Kata Containers is deployed using `kata-deploy`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](../../tools/packaging/kata-deploy/helm-chart/README.md).
--- a/docs/how-to/privileged.md
+++ b/docs/how-to/privileged.md
@@ -1,22 +1,25 @@
 # Privileged Kata Containers

+> [!WARNING]
+> Whilst this functionality is supported, it can decrease the security of Kata Containers if not configured correctly.
+
 Kata Containers supports creation of containers that are "privileged" (i.e. have additional capabilities and access
 that is not normally granted).

-## Warnings
+## Enabling privileged containers without host devices

-**Warning:** Whilst this functionality is supported, it can decrease the security of Kata Containers if not configured 
-correctly.
+> [!TIP]
+> When Kata Containers is installed through
+> [kata-deploy](/tools/packaging/kata-deploy/helm-chart/README.md#kata-deploy-helm-chart), this mitigation is configured
+> out of the box, hence there is no action required in that case.

-### Host Devices
+By default, a privileged container attempts to expose all devices from the host. This is generally not supported in Kata
+Containers as the container is running a different kernel than the host.

-By default, when privileged is enabled for a container, all the `/dev/*` block devices from the host are mounted
-into the guest. This will allow the privileged container inside the Kata guest to gain access to mount any block device 
-from the host, a potentially undesirable side-effect that decreases the security of Kata.
+Instead, the following sections document how to disable this behavior in different container runtimes. Note that this
+mitigation does not affect a container's ability to mount *guest* devices.

-The following sections document how to configure this behavior in different container runtimes.
-
-#### Containerd
+## Containerd

 The Containerd allows configuring the privileged host devices behavior for each runtime in the containerd config. This is
 done with the `privileged_without_host_devices` option. Setting this to `true` will disable hot plugging of the host 
@@ -43,7 +46,7 @@ See below example config:
 - [How to use Kata Containers and containerd with Kubernetes](how-to-use-k8s-with-containerd-and-kata.md)
 - [Containerd CRI config documentation](https://github.com/containerd/containerd/blob/main/docs/cri/config.md)

-#### CRI-O
+## CRI-O

 Similar to containerd, CRI-O allows configuring the privileged host devices
 behavior for each runtime in the CRI config. This is done with the 
--- a/docs/install/kata-containers-3.0-rust-runtime-installation-guide.md
+++ b/docs/install/kata-containers-3.0-rust-runtime-installation-guide.md
@@ -32,7 +32,7 @@ architectures:

 ### Kata Deploy Installation

-Follow the [`kata-deploy`](../../tools/packaging/kata-deploy/README.md).
+Follow the [`kata-deploy`](../../tools/packaging/kata-deploy/helm-chart/README.md).
 ### Official packages
 `ToDo`
 ### Automatic Installation
--- a/docs/use-cases/using-Intel-QAT-and-kata.md
+++ b/docs/use-cases/using-Intel-QAT-and-kata.md
@@ -419,7 +419,7 @@ You might need to disable Docker before initializing Kubernetes. Be aware
 that the OpenSSL container image built above will need to be exported from
 Docker and imported into containerd.

-If Kata is installed through [`kata-deploy`](../../tools/packaging/kata-deploy/README.md)
+If Kata is installed through [`kata-deploy`](../../tools/packaging/kata-deploy/helm-chart/README.md)
 there will be multiple `configuration.toml` files associated with different
 hypervisors. Rather than add in the custom Kata kernel, Kata rootfs, and
 kernel modules to each `configuration.toml` as the default, instead use
--- a/src/agent/Cargo.lock
+++ b/src/agent/Cargo.lock
@@ -665,30 +665,6 @@ dependencies = [
 "shlex",
 ]

-[[package]]
-name = "cdi"
-version = "0.1.0"
-source = "git+https://github.com/cncf-tags/container-device-interface-rs?rev=3b1e83dda5efcc83c7a4f134466ec006b37109c9#3b1e83dda5efcc83c7a4f134466ec006b37109c9"
-dependencies = [
- "anyhow",
- "clap",
- "const_format",
- "jsonschema",
- "lazy_static",
- "libc",
- "nix 0.24.3",
- "notify",
- "oci-spec",
- "once_cell",
- "path-clean",
- "regex",
- "semver",
- "serde",
- "serde_derive",
- "serde_json",
- "serde_yaml",
-]
-
 [[package]]
 name = "cfg-if"
 version = "1.0.0"
@@ -808,6 +784,31 @@ dependencies = [
 "unicode-xid",
 ]

+[[package]]
+name = "container-device-interface"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "653849f0c250f73d9afab4b2a9a6b07adaee1f34c44ffa6f2d2c3f9392002c1a"
+dependencies = [
+ "anyhow",
+ "clap",
+ "const_format",
+ "jsonschema",
+ "lazy_static",
+ "libc",
+ "nix 0.24.3",
+ "notify",
+ "oci-spec",
+ "once_cell",
+ "path-clean",
+ "regex",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "serde_yaml",
+]
+
 [[package]]
 name = "core-foundation-sys"
 version = "0.8.7"
@@ -2049,11 +2050,11 @@ dependencies = [
 "async-trait",
 "base64 0.22.1",
 "capctl",
- "cdi",
 "cfg-if",
 "cgroups-rs",
 "clap",
 "const_format",
+ "container-device-interface",
 "derivative",
 "futures",
 "ipnetwork",
@@ -4038,12 +4039,9 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"

 [[package]]
 name = "slab"
-version = "0.4.9"
+version = "0.4.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
-dependencies = [
- "autocfg",
-]
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"

 [[package]]
 name = "slash-formatter"
--- a/src/agent/Cargo.toml
+++ b/src/agent/Cargo.toml
@@ -13,8 +13,12 @@ lazy_static = "1.3.0"
 ttrpc = { version = "0.8.4", features = ["async"], default-features = false }
 protobuf = "3.7.2"
 libc = "0.2.94"
-# Notes: nix needs to stay in sync with libs
+
+# Notes:
+# - Needs to stay in sync with libs
+# - Upgrading to 0.27+ will require code changes (see #11842)
 nix = "0.26.4"
+
 capctl = "0.2.0"
 scan_fmt = "0.2.6"
 scopeguard = "1.0.0"
@@ -84,7 +88,7 @@ vsock-exporter = { path = "vsock-exporter" }
 mem-agent = { path = "../mem-agent", package = "mem-agent-lib" }

 kata-sys-util = { path = "../libs/kata-sys-util" }
-kata-types = { path = "../libs/kata-types" }
+kata-types = { path = "../libs/kata-types", features = ["safe-path"] }
 # Note: this crate sets the slog 'max_*' features which allows the log level
 # to be modified at runtime.
 logging = { path = "../libs/logging" }
@@ -163,9 +167,6 @@ clap.workspace = true
 strum.workspace = true
 strum_macros.workspace = true

-# Agent Policy
-cdi = { git = "https://github.com/cncf-tags/container-device-interface-rs", rev = "3b1e83dda5efcc83c7a4f134466ec006b37109c9" }
-
 # Local dependencies
 kata-agent-policy = { workspace = true, optional = true }
 mem-agent.workspace = true
@@ -185,6 +186,8 @@ base64 = "0.22"
 sha2 = "0.10.8"
 async-compression = { version = "0.4.22", features = ["tokio", "gzip"] }

+container-device-interface = "0.1.0"
+
 [target.'cfg(target_arch = "s390x")'.dependencies]
 pv_core = { git = "https://github.com/ibm-s390-linux/s390-tools", rev = "4942504a9a2977d49989a5e5b7c1c8e07dc0fa41", package = "s390_pv_core" }

--- a/src/agent/rustjail/src/container.rs
+++ b/src/agent/rustjail/src/container.rs
@@ -1037,6 +1037,12 @@ impl BaseContainer for LinuxContainer {
        let child_stderr: std::process::Stdio;

        if tty {
+            // NOTE(#11842): This code will require changes if we upgrade to nix 0.27+:
+            // - `pseudo` will contain OwnedFds instead of RawFds.
+            // - We'll have to use `OwnedFd::into_raw_fd()` which will
+            //   transfer the ownership to the caller.
+            // - The duplication strategy will not change.
+
            let pseudo = pty::openpty(None, None)?;
            p.term_master = Some(pseudo.master);
            let _ = fcntl::fcntl(pseudo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC))
@@ -1045,8 +1051,8 @@ impl BaseContainer for LinuxContainer {
                .map_err(|e| warn!(logger, "fcntl pseudo.slave {:?}", e));

            child_stdin = unsafe { std::process::Stdio::from_raw_fd(pseudo.slave) };
-            child_stdout = unsafe { std::process::Stdio::from_raw_fd(pseudo.slave) };
-            child_stderr = unsafe { std::process::Stdio::from_raw_fd(pseudo.slave) };
+            child_stdout = unsafe { std::process::Stdio::from_raw_fd(unistd::dup(pseudo.slave)?) };
+            child_stderr = unsafe { std::process::Stdio::from_raw_fd(unistd::dup(pseudo.slave)?) };

            if let Some(proc_io) = &mut p.proc_io {
                // A reference count used to clean up the term master fd.
@@ -1914,7 +1920,7 @@ mod tests {
        let cgroups_path = format!(
            "/{}/dummycontainer{}",
            CGROUP_PARENT,
-            since_the_epoch.as_millis()
+            since_the_epoch.as_micros()
        );

        let mut spec = SpecBuilder::default()
--- a/src/agent/rustjail/src/mount.rs
+++ b/src/agent/rustjail/src/mount.rs
@@ -5,6 +5,7 @@

 use anyhow::{anyhow, Context, Result};
 use libc::uid_t;
+use nix::errno::Errno;
 use nix::fcntl::{self, OFlag};
 #[cfg(not(test))]
 use nix::mount;
@@ -336,25 +337,19 @@ fn check_proc_mount(m: &Mount) -> Result<()> {

    if mount_dest == PROC_PATH {
        // only allow a mount on-top of proc if it's source is "proc"
-        unsafe {
-            let mut stats = MaybeUninit::<libc::statfs>::uninit();
-            let mount_source = m.source().as_ref().unwrap().display().to_string();
-            if mount_source
-                .with_nix_path(|path| libc::statfs(path.as_ptr(), stats.as_mut_ptr()))
-                .is_ok()
-            {
-                if stats.assume_init().f_type == PROC_SUPER_MAGIC {
-                    return Ok(());
-                }
-            } else {
-                return Ok(());
-            }
+        let mount_source = m.source().as_ref().unwrap().display().to_string();

-            return Err(anyhow!(format!(
+        let mut stats = MaybeUninit::<libc::statfs>::uninit();
+        let statfs_ret = mount_source
+            .with_nix_path(|path| unsafe { libc::statfs(path.as_ptr(), stats.as_mut_ptr()) })?;
+
+        return match Errno::result(statfs_ret) {
+            Ok(_) if unsafe { stats.assume_init().f_type } == PROC_SUPER_MAGIC => Ok(()),
+            Ok(_) | Err(_) => Err(anyhow!(format!(
                "{} cannot be mounted to {} because it is not of type proc",
                &mount_source, &mount_dest
-            )));
-        }
+            ))),
+        };
    }

    if mount_dest.starts_with(PROC_PATH) {
--- a/src/agent/src/config.rs
+++ b/src/agent/src/config.rs
@@ -202,7 +202,7 @@ macro_rules! config_override {
        }
    };

-    ($builder:ident, $config:ident, $field:ident, $func: ident) => {
+    ($builder:ident, $config:ident, $field:ident, $func:ident) => {
        if let Some(v) = $builder.$field {
            $config.$field = $func(&v)?;
        }
@@ -661,8 +661,8 @@ impl AgentConfig {
            self.server_addr = addr;
        }

-        if let Ok(addr) = env::var(LOG_LEVEL_ENV_VAR) {
-            if let Ok(level) = logrus_to_slog_level(&addr) {
+        if let Ok(level) = env::var(LOG_LEVEL_ENV_VAR) {
+            if let Ok(level) = logrus_to_slog_level(&level) {
                self.log_level = level;
            }
        }
--- a/src/agent/src/device/mod.rs
+++ b/src/agent/src/device/mod.rs
@@ -15,6 +15,7 @@ use anyhow::{anyhow, Context, Result};
 use cdi::annotations::parse_annotations;
 use cdi::cache::{new_cache, with_auto_refresh, CdiOption};
 use cdi::spec_dirs::with_spec_dirs;
+use container_device_interface as cdi;
 use kata_types::device::DeviceHandlerManager;
 use nix::sys::stat;
 use oci::{LinuxDeviceCgroup, Spec};
--- a/src/agent/src/main.rs
+++ b/src/agent/src/main.rs
@@ -30,6 +30,7 @@ use nix::unistd::{self, dup, sync, Pid};
 use std::env;
 use std::ffi::OsStr;
 use std::fs::{self, File};
+use std::io::ErrorKind;
 use std::os::unix::fs::{self as unixfs, FileTypeExt};
 use std::os::unix::io::AsRawFd;
 use std::path::Path;
@@ -465,8 +466,17 @@ fn attestation_binaries_available(logger: &Logger, procs: &GuestComponentsProcs)
        _ => vec![],
    };
    for binary in binaries.iter() {
-        if !Path::new(binary).exists() {
-            warn!(logger, "{} not found", binary);
+        let exists = Path::new(binary)
+            .try_exists()
+            .unwrap_or_else(|error| match error.kind() {
+                ErrorKind::NotFound => {
+                    warn!(logger, "{} not found", binary);
+                    false
+                }
+                _ => panic!("Path existence check failed for '{}': {}", binary, error),
+            });
+
+        if !exists {
            return false;
        }
    }
--- a/src/agent/src/netlink.rs
+++ b/src/agent/src/netlink.rs
@@ -401,7 +401,11 @@ impl Handle {
                }

                if let RouteAttribute::Oif(index) = attribute {
-                    route.device = self.find_link(LinkFilter::Index(*index)).await?.name();
+                    route.device = self
+                        .find_link(LinkFilter::Index(*index))
+                        .await
+                        .context(format!("error looking up device {index}"))?
+                        .name();
                }
            }

@@ -909,10 +913,15 @@ mod tests {
    use super::*;
    use netlink_packet_route::address::AddressHeader;
    use netlink_packet_route::link::LinkHeader;
+    use serial_test::serial;
    use std::iter;
    use std::process::Command;
    use test_utils::skip_if_not_root;

+    // Constants for ARP neighbor tests
+    const TEST_DUMMY_INTERFACE: &str = "dummy_for_arp";
+    const TEST_ARP_IP: &str = "192.0.2.127";
+
    #[tokio::test]
    async fn find_link_by_name() {
        let message = Handle::new()
@@ -972,11 +981,15 @@ mod tests {
    }

    #[tokio::test]
+    #[serial(arp_neighbor_tests)]
    async fn list_routes() {
+        clean_env_for_test_add_one_arp_neighbor(TEST_DUMMY_INTERFACE, TEST_ARP_IP);
+        let devices: Vec<Interface> = Handle::new().unwrap().list_interfaces().await.unwrap();
        let all = Handle::new()
            .unwrap()
            .list_routes()
            .await
+            .context(format!("available devices: {:?}", devices))
            .expect("Failed to list routes");

        assert_ne!(all.len(), 0);
@@ -1088,7 +1101,7 @@ mod tests {
            .expect("prepare: failed to delete neigh");
    }

-    fn prepare_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
+    async fn prepare_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
        clean_env_for_test_add_one_arp_neighbor(dummy_name, ip);
        // modprobe dummy
        Command::new("modprobe")
@@ -1102,9 +1115,9 @@ mod tests {
            .output()
            .expect("failed to add dummy interface");

-        // ip addr add 192.168.0.2/16 dev dummy
+        // ip addr add 192.0.2.2/24 dev dummy
        Command::new("ip")
-            .args(["addr", "add", "192.168.0.2/16", "dev", dummy_name])
+            .args(["addr", "add", "192.0.2.2/24", "dev", dummy_name])
            .output()
            .expect("failed to add ip for dummy");

@@ -1113,24 +1126,26 @@ mod tests {
            .args(["link", "set", dummy_name, "up"])
            .output()
            .expect("failed to up dummy");
+
+        // Wait briefly to ensure the IP address addition is fully complete
+        tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
    }

    #[tokio::test]
+    #[serial(arp_neighbor_tests)]
    async fn test_add_one_arp_neighbor() {
        skip_if_not_root!();

        let mac = "6a:92:3a:59:70:aa";
-        let to_ip = "169.254.1.1";
-        let dummy_name = "dummy_for_arp";

-        prepare_env_for_test_add_one_arp_neighbor(dummy_name, to_ip);
+        prepare_env_for_test_add_one_arp_neighbor(TEST_DUMMY_INTERFACE, TEST_ARP_IP).await;

        let mut ip_address = IPAddress::new();
-        ip_address.set_address(to_ip.to_string());
+        ip_address.set_address(TEST_ARP_IP.to_string());

        let mut neigh = ARPNeighbor::new();
        neigh.set_toIPAddress(ip_address);
-        neigh.set_device(dummy_name.to_string());
+        neigh.set_device(TEST_DUMMY_INTERFACE.to_string());
        neigh.set_lladdr(mac.to_string());
        neigh.set_state(0x80);

@@ -1141,15 +1156,24 @@ mod tests {
            .expect("Failed to add ARP neighbor");

        // ip neigh show dev dummy ip
-        let stdout = Command::new("ip")
-            .args(["neigh", "show", "dev", dummy_name, to_ip])
+        let output = Command::new("ip")
+            .args(["neigh", "show", "dev", TEST_DUMMY_INTERFACE, TEST_ARP_IP])
            .output()
-            .expect("failed to show neigh")
-            .stdout;
+            .expect("failed to show neigh");

-        let stdout = std::str::from_utf8(&stdout).expect("failed to convert stdout");
-        assert_eq!(stdout.trim(), format!("{} lladdr {} PERMANENT", to_ip, mac));
+        let stdout = std::str::from_utf8(&output.stdout).expect("failed to convert stdout");
+        let stderr = std::str::from_utf8(&output.stderr).expect("failed to convert stderr");
+        assert!(
+            output.status.success(),
+            "`ip neigh show` returned exit code {:?}. stderr: {:?}",
+            output.status.code(),
+            stderr
+        );
+        assert_eq!(
+            stdout.trim(),
+            format!("{} lladdr {} PERMANENT", TEST_ARP_IP, mac)
+        );

-        clean_env_for_test_add_one_arp_neighbor(dummy_name, to_ip);
+        clean_env_for_test_add_one_arp_neighbor(TEST_DUMMY_INTERFACE, TEST_ARP_IP);
    }
 }
--- a/src/agent/src/rpc.rs
+++ b/src/agent/src/rpc.rs
@@ -2417,7 +2417,7 @@ mod tests {
        let cgroups_path = format!(
            "/{}/dummycontainer{}",
            CGROUP_PARENT,
-            since_the_epoch.as_millis()
+            since_the_epoch.as_micros()
        );

        let spec = SpecBuilder::default()
--- a/src/agent/src/sandbox.rs
+++ b/src/agent/src/sandbox.rs
@@ -858,7 +858,7 @@ mod tests {
        let cgroups_path = format!(
            "/{}/dummycontainer{}",
            CGROUP_PARENT,
-            since_the_epoch.as_millis()
+            since_the_epoch.as_micros()
        );

        let spec = SpecBuilder::default()
--- a/src/dragonball/Cargo.lock
+++ b/src/dragonball/Cargo.lock
@@ -344,20 +344,26 @@ name = "dbs-pci"
 version = "0.1.0"
 dependencies = [
 "byteorder",
+ "dbs-address-space",
 "dbs-allocator",
 "dbs-arch",
 "dbs-boot",
 "dbs-device",
 "dbs-interrupt",
+ "dbs-utils",
+ "dbs-virtio-devices",
 "downcast-rs",
 "kvm-bindings",
 "kvm-ioctls",
 "libc",
 "log",
+ "serde",
 "thiserror 1.0.48",
 "vfio-bindings",
 "vfio-ioctls",
+ "virtio-queue",
 "vm-memory",
+ "vmm-sys-util",
 ]

 [[package]]
@@ -1810,9 +1816,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"

 [[package]]
 name = "seccompiler"
-version = "0.2.0"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e01d1292a1131b22ccea49f30bd106f1238b5ddeec1a98d39268dcc31d540e68"
+checksum = "a4ae55de56877481d112a559bbc12667635fdaf5e005712fd4e2b2fa50ffc884"
 dependencies = [
 "libc",
 ]
@@ -1922,12 +1928,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"

 [[package]]
 name = "slab"
-version = "0.4.9"
+version = "0.4.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
-dependencies = [
- "autocfg",
-]
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"

 [[package]]
 name = "slog"
--- a/src/dragonball/Cargo.toml
+++ b/src/dragonball/Cargo.toml
@@ -33,7 +33,7 @@ event-manager = "0.2.1"
 kvm-bindings = "0.6.0"
 kvm-ioctls = "0.12.0"
 linux-loader = "0.8.0"
-seccompiler = "0.2.0"
+seccompiler = "0.5.0"
 vfio-bindings = "0.3.0"
 vfio-ioctls = "0.1.0"
 virtio-bindings = "0.1.0"
--- a/src/dragonball/dbs_device/src/device_manager.rs
+++ b/src/dragonball/dbs_device/src/device_manager.rs
@@ -18,7 +18,7 @@
 //!
 //! # Examples
 //!
-//! Creating a dummy deivce which implement DeviceIo trait, and register it to [IoManager] with
+//! Creating a dummy device which implement DeviceIo trait, and register it to [IoManager] with
 //! trapped MMIO/PIO address ranges:
 //!
 //! ```
@@ -102,7 +102,7 @@ use crate::resources::Resource;
 use crate::{DeviceIo, IoAddress, IoSize, PioAddress};

 /// Error types for `IoManager` related operations.
-#[derive(Error, Debug)]
+#[derive(Error, Debug, PartialEq)]
 pub enum Error {
    /// The inserting device overlaps with a current device.
    #[error("device address conflicts with existing devices")]
--- a/src/dragonball/dbs_interrupt/src/notifier.rs
+++ b/src/dragonball/dbs_interrupt/src/notifier.rs
@@ -5,7 +5,9 @@

 use std::any::Any;
 use std::io::Error;
+use std::sync::atomic::{AtomicU16, Ordering};
 use std::sync::Arc;
+use std::sync::Mutex;

 use vmm_sys_util::eventfd::EventFd;

@@ -121,6 +123,77 @@ mod msi {
    }
 }

+/// Vector value used to disable MSI for a queue.
+pub const VIRTQ_MSI_NO_VECTOR: u16 = 0xffff;
+
+#[derive(Clone, PartialEq, Debug, Copy)]
+pub enum VirtioInterruptType {
+    Config,
+    Queue(u16),
+}
+
+#[derive(Clone)]
+pub struct VirtioNotifierMsix {
+    pub(crate) config_vector: Arc<AtomicU16>,
+    pub(crate) queues_vectors: Arc<Mutex<Vec<u16>>>,
+    pub(crate) interrupt_source_group: Arc<Box<dyn InterruptSourceGroup>>,
+    pub(crate) interrupt_type: VirtioInterruptType,
+}
+
+impl VirtioNotifierMsix {
+    pub fn new(
+        config_vector: Arc<AtomicU16>,
+        queues_vectors: Arc<Mutex<Vec<u16>>>,
+        interrupt_source_group: Arc<Box<dyn InterruptSourceGroup>>,
+        interrupt_type: VirtioInterruptType,
+    ) -> Self {
+        VirtioNotifierMsix {
+            config_vector,
+            queues_vectors,
+            interrupt_source_group,
+            interrupt_type,
+        }
+    }
+}
+
+impl InterruptNotifier for VirtioNotifierMsix {
+    fn notify(&self) -> std::result::Result<(), std::io::Error> {
+        let vector = match self.interrupt_type {
+            VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire),
+            VirtioInterruptType::Queue(queue_index) => {
+                self.queues_vectors.lock().unwrap()[queue_index as usize]
+            }
+        };
+        if vector == VIRTQ_MSI_NO_VECTOR {
+            return Ok(());
+        }
+
+        self.interrupt_source_group
+            .trigger(vector as InterruptIndex)
+    }
+    fn notifier(&self) -> Option<&EventFd> {
+        let vector = match self.interrupt_type {
+            VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire),
+            VirtioInterruptType::Queue(queue_index) => {
+                self.queues_vectors.lock().unwrap()[queue_index as usize]
+            }
+        };
+        if vector == VIRTQ_MSI_NO_VECTOR {
+            return None;
+        }
+
+        self.interrupt_source_group
+            .notifier(vector as InterruptIndex)
+    }
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn clone_boxed(&self) -> Box<dyn InterruptNotifier> {
+        Box::new(self.clone())
+    }
+}
+
 /// Struct to discard interrupts.
 #[derive(Copy, Clone, Debug, Default)]
 pub struct NoopNotifier {}
--- a/src/dragonball/dbs_pci/Cargo.toml
+++ b/src/dragonball/dbs_pci/Cargo.toml
@@ -16,6 +16,8 @@ thiserror = "1"
 dbs-allocator = { workspace = true }
 dbs-boot = { workspace = true }
 dbs-device = { workspace = true }
+dbs-address-space = { workspace = true }
+dbs-virtio-devices = { workspace = true }
 dbs-interrupt = { workspace = true, features = [
    "kvm-irq",
    "kvm-legacy-irq",
@@ -23,12 +25,18 @@ dbs-interrupt = { workspace = true, features = [
 ] }
 downcast-rs = "1.2.0"
 byteorder = "1.4.3"
+serde = "1.0.27"
+
 vm-memory = {workspace = true}
 kvm-ioctls = {workspace = true}
 kvm-bindings = {workspace = true}
 vfio-ioctls = {workspace = true}
 vfio-bindings = {workspace = true}
 libc = "0.2.39"
+vmm-sys-util = {workspace = true}
+virtio-queue = {workspace = true}
+dbs-utils = {workspace = true}
+

 [dev-dependencies]
 dbs-arch = { workspace = true }
--- a/src/dragonball/dbs_pci/src/lib.rs
+++ b/src/dragonball/dbs_pci/src/lib.rs
@@ -21,7 +21,7 @@
 //! - PCI configuration: a common framework to emulator PCI configuration space header.
 //! - PCI MSI/MSIx: structs to emulate PCI MSI/MSIx capabilities.

-use std::sync::Arc;
+use std::sync::{Arc, Mutex};

 use dbs_device::device_manager::IoManagerContext;
 use dbs_interrupt::KvmIrqManager;
@@ -58,8 +58,17 @@ pub use msix::{MsixCap, MsixState, MSIX_TABLE_ENTRY_SIZE};
 mod vfio;
 pub use vfio::{VfioPciDevice, VfioPciError, VENDOR_NVIDIA};

+mod virtio_pci;
+pub use virtio_pci::{VirtioPciDevice, VirtioPciDeviceError, CAPABILITY_BAR_SIZE};
+
+mod pci_address;
+use dbs_virtio_devices::VirtioDevice;
+pub use pci_address::PciAddress;
+
+mod pci_common_config;
+
 /// Error codes related to PCI root/bus/device operations.
-#[derive(Debug, thiserror::Error)]
+#[derive(Debug, thiserror::Error, PartialEq)]
 pub enum Error {
    /// Failed to activate the PCI root/bus/device.
    #[error("failed to activate PCI device, {0:?}")]
@@ -110,6 +119,9 @@ pub enum Error {
    /// PCI ROM BAR address is invalid.
    #[error("address {0} size {1} too big")]
    RomBarAddressInvalid(u64, u64),
+    /// Invalid parameter
+    #[error("invalid pci device address")]
+    InvalidParameter,
 }

 /// Specialized `Result` for PCI related operations.
@@ -130,3 +142,8 @@ pub fn fill_config_data(data: &mut [u8]) {
        *pos = 0xff;
    }
 }
+
+/// we only support one pci bus
+pub const PCI_BUS_DEFAULT: u8 = 0;
+
+type ArcMutexBoxDynVirtioDevice<AS, Q, R> = Arc<Mutex<Box<dyn VirtioDevice<AS, Q, R>>>>;
--- a/src/dragonball/dbs_pci/src/pci_address.rs
+++ b/src/dragonball/dbs_pci/src/pci_address.rs
@@ -0,0 +1,100 @@
+// Copyright (C) 2024 Alibaba Cloud. All rights reserved.
+//
+// Copyright (C) 2025 Ant Group. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
+
+use std::cmp::Ordering;
+use std::fmt;
+
+use crate::{Error, Result};
+
+const PCI_MAX_DEV_ID: u8 = 0x1f;
+const PCI_MAX_FUNC_ID: u8 = 0x7;
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct PciAddress {
+    /// Bus number, in the range [0, 0xff].
+    bus: u8,
+    /// Device id, in the range [0x0, 0x1f].
+    dev: u8,
+    /// Function id, in the range [0x0, 0x7].
+    func: u8,
+}
+
+impl PartialOrd for PciAddress {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for PciAddress {
+    fn cmp(&self, other: &Self) -> Ordering {
+        // Compare in the order of bus -> dev -> func.
+        self.bus
+            .cmp(&other.bus)
+            .then_with(|| self.dev.cmp(&other.dev))
+            .then_with(|| self.func.cmp(&other.func))
+    }
+}
+
+impl PciAddress {
+    /// Create a new PCI address from bus and device/function id.
+    ///
+    /// * `bus`: PCI bus number, in the range \[0x0, 0xff\].
+    /// * `dev`: PCI device id, in the range \[0x0, 0x1f\].
+    /// * `func`: PCI function id, in the range \[0x0, 0x7\].
+    pub fn new(bus: u8, dev: u8, func: u8) -> Result<Self> {
+        if dev > PCI_MAX_DEV_ID || func > PCI_MAX_FUNC_ID {
+            return Err(Error::InvalidParameter);
+        }
+
+        Ok(PciAddress { bus, dev, func })
+    }
+
+    /// Get PCI device id on the PCI bus, which is in [0x0, 0x1f]
+    pub fn dev_id(&self) -> u8 {
+        self.dev
+    }
+
+    /// Get PCI device function id, which is in [0x0, 0x7].
+    pub fn func_id(&self) -> u8 {
+        self.func
+    }
+
+    /// Get PCI device bus number, which is in [0x0, 0xff].
+    pub fn bus_id(&self) -> u8 {
+        self.bus
+    }
+}
+
+impl fmt::Debug for PciAddress {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "PciAddress: {:02x}:{:02x}.{:02x}",
+            self.bus, self.dev, self.func
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_pci_address() {
+        // test invlaid device id
+        assert_eq!(PciAddress::new(0, 32, 0), Err(Error::InvalidParameter));
+
+        // test invalid function id
+        assert_eq!(PciAddress::new(0, 0, 8), Err(Error::InvalidParameter));
+
+        // test pci address
+        let (bus, dev, func) = (3, 5, 4);
+        let address = PciAddress::new(bus, dev, func).unwrap();
+        assert_eq!(address.bus_id(), bus);
+        assert_eq!(address.dev_id(), dev);
+        assert_eq!(address.func_id(), func);
+    }
+}
--- a/src/dragonball/dbs_pci/src/pci_common_config.rs
+++ b/src/dragonball/dbs_pci/src/pci_common_config.rs
@@ -0,0 +1,507 @@
+// Copyright 2018 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE-BSD-3-Clause file.
+//
+// Copyright © 2019 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
+//
+// Copyright (C) 2024 Alibaba Cloud. All rights reserved.
+//
+// Copyright (C) 2025 Ant Group. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
+
+use std::sync::atomic::{AtomicU16, Ordering};
+use std::sync::{Arc, Mutex};
+
+use byteorder::{ByteOrder, LittleEndian};
+use log::{error, trace, warn};
+use serde::{Deserialize, Serialize};
+use virtio_queue::QueueT;
+use vm_memory::{GuestAddressSpace, GuestMemoryRegion};
+
+use crate::ArcMutexBoxDynVirtioDevice;
+use dbs_virtio_devices::VirtioQueueConfig;
+
+#[derive(Clone, Serialize, Deserialize)]
+pub struct VirtioPciCommonConfigState {
+    pub driver_status: u8,
+    pub config_generation: u8,
+    pub device_feature_select: u32,
+    pub driver_feature_select: u32,
+    pub queue_select: u16,
+    pub msix_config: u16,
+    pub msix_queues: Vec<u16>,
+}
+
+/* The standard layout for the ring is a continuous chunk of memory which looks
+ * like this.  We assume num is a power of 2.
+ *
+ * struct vring
+ * {
+ *	// The actual descriptors (16 bytes each)
+ *	struct vring_desc desc[num];
+ *
+ *	// A ring of available descriptor heads with free-running index.
+ *	__virtio16 avail_flags;
+ *	__virtio16 avail_idx;
+ *	__virtio16 available[num];
+ *	__virtio16 used_event_idx;
+ *
+ *	// Padding to the next align boundary.
+ *	char pad[];
+ *
+ *	// A ring of used descriptor heads with free-running index.
+ *	__virtio16 used_flags;
+ *	__virtio16 used_idx;
+ *	struct vring_used_elem used[num];
+ *	__virtio16 avail_event_idx;
+ * };
+ * struct vring_desc {
+ *	__virtio64 addr;
+ *	__virtio32 len;
+ *	__virtio16 flags;
+ *	__virtio16 next;
+ * };
+ *
+ * struct vring_avail {
+ *	__virtio16 flags;
+ *	__virtio16 idx;
+ *	__virtio16 ring[];
+ * };
+ *
+ * // u32 is used here for ids for padding reasons.
+ * struct vring_used_elem {
+ *	// Index of start of used descriptor chain.
+ *	__virtio32 id;
+ *	// Total length of the descriptor chain which was used (written to)
+ *	__virtio32 len;
+ * };
+*
+ * Kernel header used for this reference: include/uapi/linux/virtio_ring.h
+ * Virtio Spec: https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html
+ *
+ */
+
+/// Contains the data for reading and writing the common configuration structure of a virtio PCI
+/// device.
+///
+/// * Registers:
+///
+/// ** About the whole device.
+///    le32 device_feature_select;     // 0x00 // read-write
+///    le32 device_feature;            // 0x04 // read-only for driver
+///    le32 driver_feature_select;     // 0x08 // read-write
+///    le32 driver_feature;            // 0x0C // read-write
+///    le16 msix_config;               // 0x10 // read-write
+///    le16 num_queues;                // 0x12 // read-only for driver
+///    u8 device_status;               // 0x14 // read-write (driver_status)
+///    u8 config_generation;           // 0x15 // read-only for driver
+///
+/// ** About a specific virtqueue.
+///    le16 queue_select;              // 0x16 // read-write
+///    le16 queue_size;                // 0x18 // read-write, power of 2, or 0.
+///    le16 queue_msix_vector;         // 0x1A // read-write
+///    le16 queue_enable;              // 0x1C // read-write (Ready)
+///    le16 queue_notify_off;          // 0x1E // read-only for driver
+///    le64 queue_desc;                // 0x20 // read-write
+///    le64 queue_avail;               // 0x28 // read-write
+///    le64 queue_used;                // 0x30 // read-write
+pub struct VirtioPciCommonConfig {
+    pub driver_status: u8,
+    pub config_generation: u8,
+    pub device_feature_select: u32,
+    pub driver_feature_select: u32,
+    pub queue_select: u16,
+    pub msix_config: Arc<AtomicU16>,
+    pub msix_queues: Arc<Mutex<Vec<u16>>>,
+}
+
+impl VirtioPciCommonConfig {
+    pub fn new(state: VirtioPciCommonConfigState) -> Self {
+        VirtioPciCommonConfig {
+            driver_status: state.driver_status,
+            config_generation: state.config_generation,
+            device_feature_select: state.device_feature_select,
+            driver_feature_select: state.driver_feature_select,
+            queue_select: state.queue_select,
+            msix_config: Arc::new(AtomicU16::new(state.msix_config)),
+            msix_queues: Arc::new(Mutex::new(state.msix_queues)),
+        }
+    }
+
+    // TODO(fupan): use for live upgrade later
+    #[allow(dead_code)]
+    fn state(&self) -> VirtioPciCommonConfigState {
+        VirtioPciCommonConfigState {
+            driver_status: self.driver_status,
+            config_generation: self.config_generation,
+            device_feature_select: self.device_feature_select,
+            driver_feature_select: self.driver_feature_select,
+            queue_select: self.queue_select,
+            msix_config: self.msix_config.load(Ordering::Acquire),
+            msix_queues: self.msix_queues.lock().unwrap().clone(),
+        }
+    }
+
+    fn read_common_config_byte(&self, offset: u64) -> u8 {
+        trace!("read_common_config_byte: offset 0x{:x}", offset);
+        // The driver is only allowed to do aligned, properly sized access.
+        match offset {
+            0x14 => self.driver_status,
+            0x15 => self.config_generation,
+            _ => {
+                warn!("invalid virtio config byte read: 0x{:x}", offset);
+                0
+            }
+        }
+    }
+
+    fn write_common_config_byte(&mut self, offset: u64, value: u8) {
+        trace!(
+            "write_common_config_byte: offset 0x{:x} value 0x{:x}",
+            offset,
+            value
+        );
+        match offset {
+            0x14 => self.driver_status = value,
+            _ => {
+                warn!("invalid virtio config byte write: 0x{:x}", offset);
+            }
+        }
+    }
+
+    fn read_common_config_word<Q: QueueT + 'static>(
+        &self,
+        offset: u64,
+        queues: &[VirtioQueueConfig<Q>],
+    ) -> u16 {
+        trace!("read_common_config_word: offset 0x{:x}", offset);
+        match offset {
+            0x10 => self.msix_config.load(Ordering::Acquire),
+            0x12 => queues.len() as u16, // num_queues
+            0x16 => self.queue_select,
+            0x18 => self.with_queue(queues, |q| q.max_size()).unwrap_or(0),
+            0x1a => self.msix_queues.lock().unwrap()[self.queue_select as usize],
+            0x1c => u16::from(self.with_queue(queues, |q| q.ready()).unwrap_or(false)),
+            0x1e => self.queue_select, // notify_off
+            _ => {
+                warn!("invalid virtio register word read: 0x{:x}", offset);
+                0
+            }
+        }
+    }
+
+    fn write_common_config_word<Q: QueueT + 'static>(
+        &mut self,
+        offset: u64,
+        value: u16,
+        queues: &mut [VirtioQueueConfig<Q>],
+    ) {
+        trace!(
+            "write_common_config_word: offset 0x{:x} value 0x{:x}",
+            offset,
+            value
+        );
+        match offset {
+            0x10 => self.msix_config.store(value, Ordering::Release),
+            0x16 => self.queue_select = value,
+            0x18 => self.with_queue_mut(queues, |q| q.set_size(value)),
+            0x1a => self.msix_queues.lock().unwrap()[self.queue_select as usize] = value,
+            0x1c => self.with_queue_mut(queues, |q| {
+                let ready = value == 1;
+                q.set_ready(ready);
+            }),
+            _ => {
+                warn!("invalid virtio register word write: 0x{:x}", offset);
+            }
+        }
+    }
+
+    fn read_common_config_dword<
+        AS: GuestAddressSpace + 'static,
+        Q: QueueT + 'static,
+        R: 'static + GuestMemoryRegion,
+    >(
+        &self,
+        offset: u64,
+        device: ArcMutexBoxDynVirtioDevice<AS, Q, R>,
+    ) -> u32 {
+        trace!("read_common_config_dword: offset 0x{:x}", offset);
+        match offset {
+            0x00 => self.device_feature_select,
+            0x04 => {
+                // Only 64 bits of features (2 pages) are defined for now, so limit
+                // device_feature_select to avoid shifting by 64 or more bits.
+                let locked_device = device.lock().unwrap();
+                if self.device_feature_select < 2 {
+                    locked_device.get_avail_features(self.device_feature_select)
+                } else {
+                    0
+                }
+            }
+            0x08 => self.driver_feature_select,
+            _ => {
+                warn!("invalid virtio register dword read: 0x{:x}", offset);
+                0
+            }
+        }
+    }
+
+    fn write_common_config_dword<
+        AS: GuestAddressSpace + 'static,
+        Q: QueueT + 'static,
+        R: 'static + GuestMemoryRegion,
+    >(
+        &mut self,
+        offset: u64,
+        value: u32,
+        queues: &mut [VirtioQueueConfig<Q>],
+        device: ArcMutexBoxDynVirtioDevice<AS, Q, R>,
+    ) {
+        trace!(
+            "write_common_config_dword: offset 0x{:x} value 0x{:x}",
+            offset,
+            value
+        );
+
+        match offset {
+            0x00 => self.device_feature_select = value,
+            0x08 => self.driver_feature_select = value,
+            0x0c => {
+                if self.driver_feature_select < 2 {
+                    let mut locked_device = device.lock().unwrap();
+                    locked_device.set_acked_features(self.driver_feature_select, value);
+                } else {
+                    warn!(
+                        "invalid ack_features (page {}, value 0x{:x})",
+                        self.driver_feature_select, value
+                    );
+                }
+            }
+            0x20 => self.with_queue_mut(queues, |q| q.set_desc_table_address(Some(value), None)),
+            0x24 => self.with_queue_mut(queues, |q| q.set_desc_table_address(None, Some(value))),
+            0x28 => self.with_queue_mut(queues, |q| q.set_avail_ring_address(Some(value), None)),
+            0x2c => self.with_queue_mut(queues, |q| q.set_avail_ring_address(None, Some(value))),
+            0x30 => self.with_queue_mut(queues, |q| q.set_used_ring_address(Some(value), None)),
+            0x34 => self.with_queue_mut(queues, |q| q.set_used_ring_address(None, Some(value))),
+            _ => {
+                warn!("invalid virtio register dword write: 0x{:x}", offset);
+            }
+        }
+    }
+
+    fn read_common_config_qword(&self, _offset: u64) -> u64 {
+        trace!("read_common_config_qword: offset 0x{:x}", _offset);
+        0 // Assume the guest has no reason to read write-only registers.
+    }
+
+    fn write_common_config_qword<Q: QueueT + 'static>(
+        &mut self,
+        offset: u64,
+        value: u64,
+        queues: &mut [VirtioQueueConfig<Q>],
+    ) {
+        trace!(
+            "write_common_config_qword: offset 0x{:x}, value 0x{:x}",
+            offset,
+            value
+        );
+
+        let low = Some((value & 0xffff_ffff) as u32);
+        let high = Some((value >> 32) as u32);
+
+        match offset {
+            0x20 => self.with_queue_mut(queues, |q| q.set_desc_table_address(low, high)),
+            0x28 => self.with_queue_mut(queues, |q| q.set_avail_ring_address(low, high)),
+            0x30 => self.with_queue_mut(queues, |q| q.set_used_ring_address(low, high)),
+            _ => {
+                warn!("invalid virtio register qword write: 0x{:x}", offset);
+            }
+        }
+    }
+
+    fn with_queue<U, F, Q>(&self, queues: &[VirtioQueueConfig<Q>], f: F) -> Option<U>
+    where
+        F: FnOnce(&Q) -> U,
+        Q: QueueT + 'static,
+    {
+        queues.get(self.queue_select as usize).map(|q| f(&q.queue))
+    }
+
+    fn with_queue_mut<F: FnOnce(&mut Q), Q: QueueT + 'static>(
+        &self,
+        queues: &mut [VirtioQueueConfig<Q>],
+        f: F,
+    ) {
+        if let Some(queue) = queues.get_mut(self.queue_select as usize) {
+            f(&mut queue.queue);
+        }
+    }
+
+    pub fn read<
+        AS: GuestAddressSpace + 'static,
+        Q: QueueT + 'static,
+        R: 'static + GuestMemoryRegion,
+    >(
+        &self,
+        offset: u64,
+        data: &mut [u8],
+        queues: &[VirtioQueueConfig<Q>],
+        device: ArcMutexBoxDynVirtioDevice<AS, Q, R>,
+    ) {
+        assert!(data.len() <= 8);
+
+        match data.len() {
+            1 => {
+                let v = self.read_common_config_byte(offset);
+                data[0] = v;
+            }
+            2 => {
+                let v = self.read_common_config_word(offset, queues);
+                LittleEndian::write_u16(data, v);
+            }
+            4 => {
+                let v = self.read_common_config_dword(offset, device);
+                LittleEndian::write_u32(data, v);
+            }
+            8 => {
+                let v = self.read_common_config_qword(offset);
+                LittleEndian::write_u64(data, v);
+            }
+            _ => error!("invalid data length for virtio read: len {}", data.len()),
+        }
+    }
+
+    pub fn write<
+        AS: GuestAddressSpace + 'static,
+        Q: QueueT + 'static,
+        R: 'static + GuestMemoryRegion,
+    >(
+        &mut self,
+        offset: u64,
+        data: &[u8],
+        queues: &mut [VirtioQueueConfig<Q>],
+        device: ArcMutexBoxDynVirtioDevice<AS, Q, R>,
+    ) {
+        assert!(data.len() <= 8);
+
+        match data.len() {
+            1 => self.write_common_config_byte(offset, data[0]),
+            2 => self.write_common_config_word(offset, LittleEndian::read_u16(data), queues),
+            4 => {
+                self.write_common_config_dword(offset, LittleEndian::read_u32(data), queues, device)
+            }
+            8 => self.write_common_config_qword(offset, LittleEndian::read_u64(data), queues),
+            _ => error!("invalid data length for virtio write: len {}", data.len()),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::virtio_pci::tests::{DummyDevice, DUMMY_FEATURES};
+    use super::*;
+    use dbs_virtio_devices::VirtioDevice;
+    use virtio_queue::QueueSync;
+    use vm_memory::{GuestMemoryMmap, GuestRegionMmap};
+
+    #[test]
+    fn write_base_regs() {
+        let regs_state = VirtioPciCommonConfigState {
+            driver_status: 0xaa,
+            config_generation: 0x55,
+            device_feature_select: 0x0,
+            driver_feature_select: 0x0,
+            queue_select: 0xff,
+            msix_config: 0,
+            msix_queues: vec![0; 3],
+        };
+        let mut regs = VirtioPciCommonConfig::new(regs_state);
+
+        let dev: Arc<
+            Mutex<Box<dyn VirtioDevice<Arc<GuestMemoryMmap>, QueueSync, GuestRegionMmap>>>,
+        > = Arc::new(Mutex::new(Box::new(DummyDevice::new())));
+        let mut queues = Vec::new();
+        queues.push(VirtioQueueConfig::create(2, 0).unwrap());
+        queues.push(VirtioQueueConfig::create(2, 1).unwrap());
+
+        // Can set all bits of driver_status.
+        regs.write(0x14, &[0x55], &mut queues, Arc::clone(&dev));
+        let mut read_back = vec![0x00];
+        regs.read(0x14, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(read_back[0], 0x55);
+
+        // The config generation register is read only.
+        regs.write(0x15, &[0xaa], &mut queues, Arc::clone(&dev));
+        let mut read_back = vec![0x00];
+        regs.read(0x15, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(read_back[0], 0x55);
+
+        // Device features is read-only and passed through from the device.
+        regs.write(0x04, &[0, 0, 0, 0], &mut queues, Arc::clone(&dev));
+        let mut read_back = vec![0, 0, 0, 0];
+        regs.read(0x04, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(LittleEndian::read_u32(&read_back), DUMMY_FEATURES as u32);
+
+        // Read device features with device_feature_select as 0
+        regs.write(0x00, &[0, 0, 0, 0], &mut queues, Arc::clone(&dev));
+        let mut read_back = vec![0, 0, 0, 0];
+        regs.read(0x04, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(LittleEndian::read_u32(&read_back), DUMMY_FEATURES as u32);
+
+        // Read device features with device_feature_select as 1
+        regs.write(0x00, &[1, 0, 0, 0], &mut queues, Arc::clone(&dev));
+        let mut read_back = vec![0, 0, 0, 0];
+        regs.read(0x04, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(
+            LittleEndian::read_u32(&read_back),
+            (DUMMY_FEATURES >> 32) as u32
+        );
+
+        // Feature select registers are read/write.
+        regs.write(0x00, &[1, 2, 3, 4], &mut queues, Arc::clone(&dev));
+        let mut read_back = vec![0, 0, 0, 0];
+        regs.read(0x00, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(LittleEndian::read_u32(&read_back), 0x0403_0201);
+        regs.write(0x08, &[1, 2, 3, 4], &mut queues, Arc::clone(&dev));
+        let mut read_back = vec![0, 0, 0, 0];
+        regs.read(0x08, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(LittleEndian::read_u32(&read_back), 0x0403_0201);
+
+        // 'queue_select' can be read and written.
+        regs.write(0x16, &[0xaa, 0x55], &mut queues, Arc::clone(&dev));
+        let mut read_back = vec![0x00, 0x00];
+        regs.read(0x16, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(read_back[0], 0xaa);
+        assert_eq!(read_back[1], 0x55);
+
+        // write msix_queues by queue_select 2
+        regs.write(0x16, &[0x02, 0x00], &mut queues, Arc::clone(&dev));
+        regs.write(0x1a, &[0xbb, 0xcc], &mut queues, Arc::clone(&dev));
+        let mut read_back = vec![0x00, 0x00];
+        regs.read(0x1a, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(read_back[0], 0xbb);
+        assert_eq!(read_back[1], 0xcc);
+
+        // 'msix_config' can be read and written.
+        regs.write(0x10, &[0xdd, 0xee], &mut queues, Arc::clone(&dev));
+        let mut read_back = vec![0x00, 0x00];
+        regs.read(0x10, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(read_back[0], 0xdd);
+        assert_eq!(read_back[1], 0xee);
+
+        // 'queue_size' can be read and set.
+        let mut read_back = vec![0x00, 0x00];
+        // queue_select is 2 and queues[2] is None, so queue_size is 0
+        regs.read(0x18, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(read_back[0], 0x00);
+        assert_eq!(read_back[1], 0x00);
+        // queue_select is 1, so queue_size is 2
+        regs.write(0x16, &[0x01, 0x00], &mut queues, Arc::clone(&dev));
+        regs.read(0x18, &mut read_back, &queues, Arc::clone(&dev));
+        assert_eq!(read_back[0], 0x02);
+        assert_eq!(read_back[1], 0x00);
+    }
+}
--- a/src/dragonball/dbs_pci/src/vfio.rs
+++ b/src/dragonball/dbs_pci/src/vfio.rs
@@ -888,7 +888,7 @@ pub struct VfioPciDeviceState<C: PciSystemContext> {
    vfio_path: String,
    interrupt: Interrupt,
    vfio_dev: Arc<VfioDevice>,
-    context: Weak<C>,
+    context: Arc<Mutex<C>>,
    configuration: PciConfiguration,
    device: Option<Weak<dyn DeviceIo>>,
    regions: Vec<Region>,
@@ -904,7 +904,7 @@ impl<C: PciSystemContext> VfioPciDeviceState<C> {
        vfio_path: String,
        vfio_dev: Arc<VfioDevice>,
        bus: Weak<PciBus>,
-        context: Weak<C>,
+        context: Arc<Mutex<C>>,
        vendor_device_id: u32,
        clique_id: Option<u8>,
        vfio_container: Arc<VfioContainer>,
@@ -1277,11 +1277,7 @@ impl<C: PciSystemContext> VfioPciDeviceState<C> {
    }

    fn register_regions(&mut self, vm: &Arc<VmFd>) -> Result<()> {
-        let ctx = self
-            .context
-            .upgrade()
-            .ok_or(VfioPciError::BusIsDropped)?
-            .get_device_manager_context();
+        let ctx = self.context.lock().unwrap().get_device_manager_context();
        let mut tx = ctx.begin_tx();

        for region in self.regions.iter_mut() {
@@ -1336,22 +1332,7 @@ impl<C: PciSystemContext> VfioPciDeviceState<C> {
    }

    fn unregister_regions(&mut self, vm: &Arc<VmFd>) -> Result<()> {
-        // This routine handle VfioPciDevice dropped but not unmap memory
-        if self.context.upgrade().is_none() {
-            for region in self.regions.iter_mut() {
-                if region.mappable() {
-                    region.unmap(vm, &self.vfio_container)?;
-                }
-            }
-
-            return Ok(());
-        }
-
-        let ctx = self
-            .context
-            .upgrade()
-            .ok_or(VfioPciError::BusIsDropped)?
-            .get_device_manager_context();
+        let ctx = self.context.lock().unwrap().get_device_manager_context();
        let mut tx = ctx.begin_tx();

        for region in self.regions.iter_mut() {
@@ -1380,11 +1361,8 @@ impl<C: PciSystemContext> VfioPciDeviceState<C> {
                } else {
                    // Safe to unwrap because activate() has set self.device to a valid value.
                    let device = self.device.as_ref().unwrap().clone();
-                    let ctx: <C as PciSystemContext>::D = self
-                        .context
-                        .upgrade()
-                        .ok_or(VfioPciError::BusIsDropped)?
-                        .get_device_manager_context();
+                    let ctx: <C as PciSystemContext>::D =
+                        self.context.lock().unwrap().get_device_manager_context();
                    let mut tx = ctx.begin_tx();

                    if let Err(e) = region.retrap(
@@ -1561,7 +1539,7 @@ impl<C: PciSystemContext> VfioPciDevice<C> {
        path: String,
        bus: Weak<PciBus>,
        device: VfioDevice,
-        context: Weak<C>,
+        context: Arc<Mutex<C>>,
        vm_fd: Arc<VmFd>,
        vendor_device_id: u32,
        clique_id: Option<u8>,
@@ -1649,11 +1627,7 @@ impl<C: PciSystemContext> VfioPciDevice<C> {
            state.interrupt.add_msi_irq_resource(base, size);
        }

-        let irq_manager = state
-            .context
-            .upgrade()
-            .ok_or(VfioPciError::BusIsDropped)?
-            .get_interrupt_manager();
+        let irq_manager = state.context.lock().unwrap().get_interrupt_manager();
        state.interrupt.initialize(irq_manager)?;
        #[cfg(target_arch = "aarch64")]
        self.set_device_id(&mut state);
--- a/src/dragonball/dbs_pci/src/virtio_pci.rs
+++ b/src/dragonball/dbs_pci/src/virtio_pci.rs
--- a/src/dragonball/dbs_virtio_devices/src/lib.rs
+++ b/src/dragonball/dbs_virtio_devices/src/lib.rs
@@ -145,6 +145,8 @@ pub enum ActivateError {
    #[cfg(feature = "vhost")]
    #[error("Vhost activate error")]
    VhostActivate(vhost_rs::Error),
+    #[error("VirtioPci error")]
+    VirtioPci,
 }

 impl std::convert::From<Error> for ActivateError {
--- a/src/dragonball/dbs_virtio_devices/src/mmio/mmio_state.rs
+++ b/src/dragonball/dbs_virtio_devices/src/mmio/mmio_state.rs
@@ -371,6 +371,7 @@ where
            }

            let _ = self.intr_mgr.reset();
+            self.unregister_ioevent_doorbell();
            self.unregister_ioevent();
            self.features_select = 0;
            self.acked_features_select = 0;
--- a/src/dragonball/src/api/v1/vmm_action.rs
+++ b/src/dragonball/src/api/v1/vmm_action.rs
@@ -205,6 +205,10 @@ pub enum VmmAction {
    /// input. This action can only be called before the microVM has booted.
    InsertBlockDevice(BlockDeviceConfigInfo),

+    #[cfg(any(feature = "virtio-blk", feature = "vhost-user-blk"))]
+    /// Prepare to remove a block device that already exists
+    PrepareRemoveBlockDevice(String),
+
    #[cfg(any(feature = "virtio-blk", feature = "vhost-user-blk"))]
    /// Remove a new block device for according to given drive_id
    RemoveBlockDevice(String),
@@ -356,6 +360,10 @@ impl VmmService {
                self.add_block_device(vmm, event_mgr, block_device_config)
            }
            #[cfg(any(feature = "virtio-blk", feature = "vhost-user-blk"))]
+            VmmAction::PrepareRemoveBlockDevice(blkdev_id) => {
+                self.prepare_remove_block_device(vmm, &blkdev_id)
+            }
+            #[cfg(any(feature = "virtio-blk", feature = "vhost-user-blk"))]
            VmmAction::UpdateBlockDevice(blk_update) => {
                self.update_blk_rate_limiters(vmm, blk_update)
            }
@@ -471,14 +479,13 @@ impl VmmService {
        use self::StartMicroVmError::MicroVMAlreadyRunning;
        use self::VmmActionError::StartMicroVm;

-        let vmm_seccomp_filter = vmm.vmm_seccomp_filter();
-        let vcpu_seccomp_filter = vmm.vcpu_seccomp_filter();
+        let seccomp_filters = vmm.seccomp_filters();
        let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?;
        if vm.is_vm_initialized() {
            return Err(StartMicroVm(MicroVMAlreadyRunning));
        }

-        vm.start_microvm(event_mgr, vmm_seccomp_filter, vcpu_seccomp_filter)
+        vm.start_microvm(event_mgr, seccomp_filters)
            .map(|_| VmmData::Empty)
            .map_err(StartMicroVm)
    }
@@ -664,10 +671,17 @@ impl VmmService {
                VmmActionError::Block(BlockDeviceError::UpdateNotAllowedPostBoot)
            })?;

+        let (sender, receiver) = mpsc::channel();
+        let vmm_data = if ctx.is_hotplug() {
+            VmmData::SyncHotplug((sender.clone(), receiver))
+        } else {
+            VmmData::Empty
+        };
+
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, config)
-            .map(|_| VmmData::Empty)
+            .insert_device(ctx, config, sender.clone())
+            .map(|_| vmm_data)
            .map_err(VmmActionError::Block)
    }

@@ -688,6 +702,38 @@ impl VmmService {
            .map_err(VmmActionError::Block)
    }

+    // using upcall to unplug the block device in the guest
+    #[cfg(any(feature = "virtio-blk", feature = "vhost-user-blk"))]
+    fn prepare_remove_block_device(
+        &mut self,
+        vmm: &mut Vmm,
+        blockdev_id: &str,
+    ) -> VmmRequestResult {
+        let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig(
+            VfioDeviceError::InvalidVMID,
+        ))?;
+
+        info!("prepare_remove_block_device: {:?}", blockdev_id);
+        let ctx = vm.create_device_op_context(None).map_err(|e| {
+            info!("create device op context error: {:?}", e);
+            if let StartMicroVmError::MicroVMAlreadyRunning = e {
+                VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot)
+            } else if let StartMicroVmError::UpcallServerNotReady = e {
+                VmmActionError::UpcallServerNotReady
+            } else {
+                VmmActionError::StartMicroVm(e)
+            }
+        })?;
+
+        let (sender, receiver) = mpsc::channel();
+
+        vm.device_manager_mut()
+            .block_manager
+            .prepare_remove_device(&ctx, blockdev_id, sender.clone())
+            .map(|_| VmmData::SyncHotplug((sender, receiver)))
+            .map_err(VmmActionError::Block)
+    }
+
    #[cfg(any(feature = "virtio-blk", feature = "vhost-user-blk"))]
    // Remove the device
    #[instrument(skip(self, event_mgr))]
@@ -1526,6 +1572,7 @@ mod tests {
                    queue_size: 256,
                    use_shared_irq: None,
                    use_generic_irq: None,
+                    use_pci_bus: Some(true),
                }),
                InstanceState::Uninitialized,
                &|result| {
--- a/src/dragonball/src/device_manager/blk_dev_mgr.rs
+++ b/src/dragonball/src/device_manager/blk_dev_mgr.rs
@@ -7,19 +7,28 @@
 // found in the THIRD-PARTY file.

 //! Device manager for virtio-blk and vhost-user-blk devices.
-use std::collections::{vec_deque, VecDeque};
 use std::convert::TryInto;
 use std::fs::OpenOptions;
 use std::os::unix::fs::OpenOptionsExt;
 use std::os::unix::io::AsRawFd;
 use std::path::{Path, PathBuf};
+use std::sync::mpsc::Sender;
 use std::sync::Arc;
+use std::{
+    collections::{vec_deque, VecDeque},
+    sync::mpsc,
+};

+use dbs_device::DeviceIo;
+use dbs_pci::VirtioPciDevice;
+use dbs_upcall::{DevMgrResponse, UpcallClientResponse};
 use dbs_virtio_devices as virtio;
 use dbs_virtio_devices::block::{aio::Aio, io_uring::IoUring, Block, LocalFile, Ufile};
 #[cfg(feature = "vhost-user-blk")]
 use dbs_virtio_devices::vhost::vhost_user::block::VhostUserBlock;
 use serde_derive::{Deserialize, Serialize};
+use virtio_queue::QueueSync;
+use vm_memory::GuestRegionMmap;

 use crate::address_space_manager::GuestAddressSpaceImpl;
 use crate::config_manager::{ConfigItem, DeviceConfigInfo, RateLimiterConfigInfo};
@@ -190,6 +199,8 @@ pub struct BlockDeviceConfigInfo {
    pub use_shared_irq: Option<bool>,
    /// Use generic irq
    pub use_generic_irq: Option<bool>,
+    /// Use pci bus
+    pub use_pci_bus: Option<bool>,
 }

 impl std::default::Default for BlockDeviceConfigInfo {
@@ -208,6 +219,7 @@ impl std::default::Default for BlockDeviceConfigInfo {
            rate_limiter: None,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: None,
        }
    }
 }
@@ -349,6 +361,7 @@ impl BlockDeviceMgr {
        &mut self,
        mut ctx: DeviceOpContext,
        config: BlockDeviceConfigInfo,
+        sender: mpsc::Sender<Option<i32>>,
    ) -> std::result::Result<(), BlockDeviceError> {
        if !cfg!(feature = "hotplug") && ctx.is_hotplug {
            return Err(BlockDeviceError::UpdateNotAllowedPostBoot);
@@ -380,31 +393,68 @@ impl BlockDeviceMgr {
                    return Ok(());
                }

+                let mut slot = 0;
+
+                let use_generic_irq = config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ);
+
                match config.device_type {
                    BlockDeviceType::RawBlock => {
                        let device = Self::create_blk_device(&config, &mut ctx)
                            .map_err(BlockDeviceError::Virtio)?;
-                        let dev = DeviceManager::create_mmio_virtio_device(
-                            device,
-                            &mut ctx,
-                            config.use_shared_irq.unwrap_or(self.use_shared_irq),
-                            config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ),
-                        )
-                        .map_err(BlockDeviceError::DeviceManager)?;
-                        self.update_device_by_index(index, Arc::clone(&dev))?;
+
+                        let dev = if let Some(true) = config.use_pci_bus {
+                            let pci_dev = DeviceManager::create_virtio_pci_device(
+                                device,
+                                &mut ctx,
+                                use_generic_irq,
+                            )
+                            .map_err(BlockDeviceError::DeviceManager)?;
+
+                            let (_, devfn) = DeviceManager::get_pci_device_info(&pci_dev)?;
+                            slot = devfn >> 3;
+
+                            pci_dev
+                        } else {
+                            DeviceManager::create_mmio_virtio_device(
+                                device,
+                                &mut ctx,
+                                config.use_shared_irq.unwrap_or(self.use_shared_irq),
+                                use_generic_irq,
+                            )
+                            .map_err(BlockDeviceError::DeviceManager)?
+                        };
+
+                        let callback: Option<Box<dyn Fn(UpcallClientResponse) + Send>> =
+                            Some(Box::new(move |_| {
+                                // send the pci device slot to caller.
+                                let _ = sender.send(Some(slot as i32));
+                            }));
+
+                        self.update_device_by_index(index, dev.clone())?;
                        // live-upgrade need save/restore device from info.device.
                        self.info_list[index].set_device(dev.clone());
-                        ctx.insert_hotplug_mmio_device(&dev, None).map_err(|e| {
+
+                        let mut cleanup = |e, ctx: DeviceOpContext| -> BlockDeviceError {
                            let logger = ctx.logger().new(slog::o!());
                            self.remove_device(ctx, &config.drive_id).unwrap();
                            error!(
                                logger,
-                                "failed to hot-add virtio block device {}, {:?}",
+                                "failed to hot-add pci virtio block device {}, {:?}",
                                &config.drive_id,
                                e
                            );
                            BlockDeviceError::DeviceManager(e)
-                        })
+                        };
+
+                        if let Some(true) = config.use_pci_bus {
+                            let _ = ctx
+                                .insert_hotplug_pci_device(&dev, callback)
+                                .map_err(|e| cleanup(e, ctx))?;
+                            Ok(())
+                        } else {
+                            ctx.insert_hotplug_mmio_device(&dev, callback)
+                                .map_err(|e| cleanup(e, ctx))
+                        }
                    }
                    #[cfg(feature = "vhost-user-blk")]
                    BlockDeviceType::Spool | BlockDeviceType::Spdk => {
@@ -417,8 +467,13 @@ impl BlockDeviceMgr {
                            config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ),
                        )
                        .map_err(BlockDeviceError::DeviceManager)?;
+                        let callback: Option<Box<dyn Fn(UpcallClientResponse) + Send>> =
+                            Some(Box::new(move |_| {
+                                let _ = sender.send(None);
+                            }));
+
                        self.update_device_by_index(index, Arc::clone(&dev))?;
-                        ctx.insert_hotplug_mmio_device(&dev, None).map_err(|e| {
+                        ctx.insert_hotplug_mmio_device(&dev, callback).map_err(|e| {
                            let logger = ctx.logger().new(slog::o!());
                            self.remove_device(ctx, &config.drive_id).unwrap();
                            error!(
@@ -450,15 +505,25 @@ impl BlockDeviceMgr {
                        info.config.drive_id,
                        info.config.path_on_host.to_str().unwrap_or("<unknown>")
                    );
+
+                    let use_shared_irq = info.config.use_shared_irq.unwrap_or(self.use_shared_irq);
+                    let use_generic_irq = info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ);
                    let device = Self::create_blk_device(&info.config, ctx)
                        .map_err(BlockDeviceError::Virtio)?;
-                    let device = DeviceManager::create_mmio_virtio_device(
-                        device,
-                        ctx,
-                        info.config.use_shared_irq.unwrap_or(self.use_shared_irq),
-                        info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ),
-                    )
-                    .map_err(BlockDeviceError::RegisterBlockDevice)?;
+
+                    let device = if let Some(true) = info.config.use_pci_bus {
+                        DeviceManager::create_virtio_pci_device(device, ctx, use_generic_irq)
+                            .map_err(BlockDeviceError::RegisterBlockDevice)?
+                    } else {
+                        DeviceManager::create_mmio_virtio_device(
+                            device,
+                            ctx,
+                            use_shared_irq,
+                            use_generic_irq,
+                        )
+                        .map_err(BlockDeviceError::RegisterBlockDevice)?
+                    };
+
                    info.device = Some(device);
                }
                #[cfg(feature = "vhost-user-blk")]
@@ -496,7 +561,7 @@ impl BlockDeviceMgr {
        while let Some(mut info) = self.info_list.pop_back() {
            info!(ctx.logger(), "remove drive {}", info.config.drive_id);
            if let Some(device) = info.device.take() {
-                DeviceManager::destroy_mmio_virtio_device(device, ctx)?;
+                DeviceManager::destroy_virtio_device(device, ctx)?;
            }
        }

@@ -510,6 +575,62 @@ impl BlockDeviceMgr {
        }
    }

+    /// prepare to remove device
+    pub fn prepare_remove_device(
+        &self,
+        ctx: &DeviceOpContext,
+        blockdev_id: &str,
+        result_sender: Sender<Option<i32>>,
+    ) -> Result<(), BlockDeviceError> {
+        if !cfg!(feature = "hotplug") {
+            return Err(BlockDeviceError::UpdateNotAllowedPostBoot);
+        }
+
+        info!(ctx.logger(), "prepare remove block device");
+
+        let callback: Option<Box<dyn Fn(UpcallClientResponse) + Send>> =
+            Some(Box::new(move |result| match result {
+                UpcallClientResponse::DevMgr(response) => {
+                    if let DevMgrResponse::Other(resp) = response {
+                        if let Err(e) = result_sender.send(Some(resp.result)) {
+                            log::error!("send upcall result failed, due to {:?}!", e);
+                        }
+                    }
+                }
+                UpcallClientResponse::UpcallReset => {
+                    if let Err(e) = result_sender.send(None) {
+                        log::error!("send upcall result failed, due to {:?}!", e);
+                    }
+                }
+                #[allow(unreachable_patterns)]
+                _ => {
+                    log::debug!("this arm should only be triggered under test");
+                }
+            }));
+
+        let device_index = self
+            .get_index_of_drive_id(blockdev_id)
+            .ok_or(BlockDeviceError::InvalidDeviceId(blockdev_id.to_string()))?;
+
+        let info = &self.info_list[device_index];
+        if let Some(device) = info.device.as_ref() {
+            if let Some(_mmio_dev) = device.as_any().downcast_ref::<DbsMmioV2Device>() {
+                if callback.is_some() {
+                    ctx.remove_hotplug_mmio_device(device, callback)?;
+                }
+            } else if let Some(_pci_dev) = device.as_any().downcast_ref::<VirtioPciDevice<
+                GuestAddressSpaceImpl,
+                QueueSync,
+                GuestRegionMmap,
+            >>() {
+                if callback.is_some() {
+                    ctx.remove_hotplug_pci_device(device, callback)?;
+                }
+            }
+        }
+        Ok(())
+    }
+
    /// remove a block device, it basically is the inverse operation of `insert_device``
    pub fn remove_device(
        &mut self,
@@ -524,7 +645,7 @@ impl BlockDeviceMgr {
            Some(mut info) => {
                info!(ctx.logger(), "remove drive {}", info.config.drive_id);
                if let Some(device) = info.device.take() {
-                    DeviceManager::destroy_mmio_virtio_device(device, &mut ctx)
+                    DeviceManager::destroy_virtio_device(device, &mut ctx)
                        .map_err(BlockDeviceError::DeviceManager)?;
                }
            }
@@ -783,7 +904,7 @@ impl BlockDeviceMgr {
    pub fn update_device_by_index(
        &mut self,
        index: usize,
-        device: Arc<DbsMmioV2Device>,
+        device: Arc<dyn DeviceIo>,
    ) -> Result<(), BlockDeviceError> {
        if let Some(info) = self.info_list.get_mut(index) {
            info.device = Some(device);
@@ -818,6 +939,21 @@ impl BlockDeviceMgr {
                            .map(|_p| ())
                            .map_err(|_e| BlockDeviceError::BlockEpollHanderSendFail);
                    }
+                } else if let Some(pci_dev) = device.as_any().downcast_ref::<VirtioPciDevice<
+                    GuestAddressSpaceImpl,
+                    QueueSync,
+                    GuestRegionMmap,
+                >>() {
+                    let inner_dev = pci_dev.device();
+                    if let Some(blk_dev) = inner_dev
+                        .as_any()
+                        .downcast_ref::<virtio::block::Block<GuestAddressSpaceImpl>>()
+                    {
+                        return blk_dev
+                            .set_patch_rate_limiters(new_cfg.bytes(), new_cfg.ops())
+                            .map(|_p| ())
+                            .map_err(|_e| BlockDeviceError::BlockEpollHanderSendFail);
+                    }
                }
                Ok(())
            }
@@ -848,6 +984,7 @@ mod tests {
    use super::*;
    use crate::device_manager::tests::create_address_space;
    use crate::test_utils::tests::create_vm_for_test;
+    use std::sync::mpsc::channel;

    #[test]
    fn test_block_device_type() {
@@ -887,14 +1024,16 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let mut vm = crate::vm::tests::create_vm_instance();
        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
+        let (sender, _receiver) = channel();
        assert!(vm
            .device_manager_mut()
            .block_manager
-            .insert_device(ctx, dummy_block_device.clone(),)
+            .insert_device(ctx, dummy_block_device.clone(), sender)
            .is_ok());

        assert_eq!(vm.device_manager().block_manager.info_list.len(), 1);
@@ -961,10 +1100,12 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };
+        let (sender, _receiver) = channel();
        vm.device_manager_mut()
            .block_manager
-            .insert_device(device_op_ctx, dummy_block_device)
+            .insert_device(device_op_ctx, dummy_block_device, sender)
            .unwrap();

        let cfg = BlockDeviceConfigUpdateInfo {
@@ -1037,14 +1178,16 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let mut vm = crate::vm::tests::create_vm_instance();
        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
+        let (sender, _receiver) = channel();
        assert!(vm
            .device_manager_mut()
            .block_manager
-            .insert_device(ctx, dummy_block_device.clone(),)
+            .insert_device(ctx, dummy_block_device.clone(), sender)
            .is_ok());

        assert_eq!(vm.device_manager().block_manager.info_list.len(), 1);
@@ -1077,6 +1220,7 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let dummy_file_2 = TempFile::new().unwrap();
@@ -1095,19 +1239,21 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let mut vm = crate::vm::tests::create_vm_instance();
        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
+        let (sender, _receiver) = channel();
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, root_block_device_1)
+            .insert_device(ctx, root_block_device_1, sender.clone())
            .unwrap();
        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
        assert!(vm
            .device_manager_mut()
            .block_manager
-            .insert_device(ctx, root_block_device_2)
+            .insert_device(ctx, root_block_device_2, sender)
            .is_err());
    }

@@ -1131,6 +1277,7 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let dummy_file_2 = TempFile::new().unwrap();
@@ -1149,6 +1296,7 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let dummy_file_3 = TempFile::new().unwrap();
@@ -1167,6 +1315,7 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let mut vm = crate::vm::tests::create_vm_instance();
@@ -1186,23 +1335,24 @@ mod tests {
        assert!(vm.device_manager().block_manager.has_root_block_device(),);
        assert!(!vm.device_manager().block_manager.has_part_uuid_root());
        assert_eq!(vm.device_manager().block_manager.info_list.len(), 3);
+        let (sender, _receiver) = channel();

        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, root_block_device)
+            .insert_device(ctx, root_block_device, sender.clone())
            .unwrap();

        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, dummy_block_device_2)
+            .insert_device(ctx, dummy_block_device_2, sender.clone())
            .unwrap();

        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, dummy_block_device_3)
+            .insert_device(ctx, dummy_block_device_3, sender.clone())
            .unwrap();
    }

@@ -1226,6 +1376,7 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let dummy_file_2 = TempFile::new().unwrap();
@@ -1244,6 +1395,7 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let dummy_file_3 = TempFile::new().unwrap();
@@ -1262,24 +1414,26 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let mut vm = crate::vm::tests::create_vm_instance();

        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
+        let (sender, _receiver) = channel();
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, dummy_block_device_2.clone())
+            .insert_device(ctx, dummy_block_device_2.clone(), sender.clone())
            .unwrap();
        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, dummy_block_device_3.clone())
+            .insert_device(ctx, dummy_block_device_3.clone(), sender.clone())
            .unwrap();
        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, root_block_device.clone())
+            .insert_device(ctx, root_block_device.clone(), sender.clone())
            .unwrap();

        assert!(vm.device_manager().block_manager.has_root_block_device(),);
@@ -1322,6 +1476,7 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let dummy_file_2 = TempFile::new().unwrap();
@@ -1340,20 +1495,22 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };

        let mut vm = crate::vm::tests::create_vm_instance();
+        let (sender, _receiver) = channel();

        // Add 2 block devices.
        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, root_block_device)
+            .insert_device(ctx, root_block_device, sender.clone())
            .unwrap();
        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, dummy_block_device_2.clone())
+            .insert_device(ctx, dummy_block_device_2.clone(), sender.clone())
            .unwrap();

        // Get index zero.
@@ -1384,7 +1541,7 @@ mod tests {
        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, dummy_block_device_2.clone())
+            .insert_device(ctx, dummy_block_device_2.clone(), sender.clone())
            .unwrap();

        let index = vm
@@ -1407,7 +1564,7 @@ mod tests {
        assert!(vm
            .device_manager_mut()
            .block_manager
-            .insert_device(ctx, dummy_block_device_2.clone(),)
+            .insert_device(ctx, dummy_block_device_2.clone(), sender.clone())
            .is_err());

        // Update with 2 root block devices.
@@ -1417,7 +1574,7 @@ mod tests {
        assert!(vm
            .device_manager_mut()
            .block_manager
-            .insert_device(ctx, dummy_block_device_2,)
+            .insert_device(ctx, dummy_block_device_2, sender.clone())
            .is_err(),);

        // Switch roots and add a PARTUUID for the new one.
@@ -1435,6 +1592,7 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };
        let root_block_device_new = BlockDeviceConfigInfo {
            path_on_host: dummy_path_2,
@@ -1450,16 +1608,17 @@ mod tests {
            queue_size: 128,
            use_shared_irq: None,
            use_generic_irq: None,
+            use_pci_bus: Some(true),
        };
        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, root_block_device_old)
+            .insert_device(ctx, root_block_device_old, sender.clone())
            .unwrap();
        let ctx = DeviceOpContext::create_boot_ctx(&vm, None);
        vm.device_manager_mut()
            .block_manager
-            .insert_device(ctx, root_block_device_new)
+            .insert_device(ctx, root_block_device_new, sender.clone())
            .unwrap();
        assert!(vm.device_manager().block_manager.has_part_uuid_root);
    }
--- a/src/dragonball/src/device_manager/mem_dev_mgr.rs
+++ b/src/dragonball/src/device_manager/mem_dev_mgr.rs
@@ -310,7 +310,7 @@ impl MemDeviceMgr {
    pub fn remove_devices(&self, ctx: &mut DeviceOpContext) -> Result<(), DeviceMgrError> {
        for info in self.info_list.iter() {
            if let Some(device) = &info.device {
-                DeviceManager::destroy_mmio_virtio_device(device.clone(), ctx)?;
+                DeviceManager::destroy_mmio_device(device.clone(), ctx)?;
            }
        }

--- a/src/dragonball/src/device_manager/mod.rs
+++ b/src/dragonball/src/device_manager/mod.rs
@@ -13,17 +13,19 @@ use arc_swap::ArcSwap;
 use dbs_address_space::AddressSpace;
 #[cfg(target_arch = "aarch64")]
 use dbs_arch::{DeviceType, MMIODeviceInfo};
+use dbs_boot::layout::MMIO_LOW_END;
 use dbs_device::device_manager::{Error as IoManagerError, IoManager, IoManagerContext};
-#[cfg(target_arch = "aarch64")]
 use dbs_device::resources::DeviceResources;
 use dbs_device::resources::Resource;
 use dbs_device::DeviceIo;
 use dbs_interrupt::KvmIrqManager;
 use dbs_legacy_devices::ConsoleHandler;
-#[cfg(all(feature = "host-device", target_arch = "aarch64"))]
-use dbs_pci::PciBusResources;
+#[cfg(feature = "dbs-virtio-devices")]
+use dbs_pci::CAPABILITY_BAR_SIZE;
 use dbs_utils::epoll_manager::EpollManager;
 use kvm_ioctls::VmFd;
+use log::error;
+use virtio_queue::QueueSync;

 #[cfg(feature = "dbs-virtio-devices")]
 use dbs_device::resources::ResourceConstraint;
@@ -40,6 +42,7 @@ use dbs_virtio_devices::{

 #[cfg(feature = "host-device")]
 use dbs_pci::VfioPciDevice;
+use dbs_pci::VirtioPciDevice;
 #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
 use dbs_upcall::{
    DevMgrRequest, DevMgrService, MmioDevRequest, PciDevRequest, UpcallClient, UpcallClientError,
@@ -57,6 +60,8 @@ use crate::resource_manager::ResourceManager;
 use crate::vm::{KernelConfigInfo, Vm, VmConfigInfo};
 use crate::IoManagerCached;

+use vm_memory::GuestRegionMmap;
+
 /// Virtual machine console device manager.
 pub mod console_manager;
 /// Console Manager for virtual machines console device.
@@ -131,6 +136,9 @@ macro_rules! info(
    };
 );

+// The flag of whether to use the shared irq.
+const USE_SHARED_IRQ: bool = true;
+
 /// Errors related to device manager operations.
 #[derive(Debug, thiserror::Error)]
 pub enum DeviceMgrError {
@@ -173,13 +181,28 @@ pub enum DeviceMgrError {
    HotplugDevice(#[source] UpcallClientError),

    /// Failed to free device resource.
-    #[error("failed to free device resources: {0}")]
+    #[error("failed to allocate/free device resources: {0}")]
    ResourceError(#[source] crate::resource_manager::ResourceError),

    #[cfg(feature = "host-device")]
    /// Error from Vfio Pci
    #[error("failed to do vfio pci operation: {0:?}")]
    VfioPci(#[source] dbs_pci::VfioPciError),
+    /// Error from Virtio Pci
+    #[error("failed to do virtio pci operation")]
+    VirtioPci,
+    /// PCI system manager error
+    #[error("Pci system manager error")]
+    PciSystemManager,
+    /// Dragonball pci system error
+    #[error("pci error: {0:?}")]
+    PciError(#[source] dbs_pci::Error),
+    /// Virtio Pci system error
+    #[error("virtio pci error: {0:?}")]
+    VirtioPciError(#[source] dbs_pci::VirtioPciDeviceError),
+    /// Unsupported pci device type
+    #[error("unsupported pci device type")]
+    InvalidPciDeviceType,
 }

 /// Specialized version of `std::result::Result` for device manager operations.
@@ -290,9 +313,10 @@ pub struct DeviceOpContext {
    #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
    upcall_client: Option<Arc<UpcallClient<DevMgrService>>>,
    #[cfg(feature = "dbs-virtio-devices")]
-    virtio_devices: Vec<Arc<DbsMmioV2Device>>,
+    virtio_devices: Vec<Arc<dyn DeviceIo>>,
    #[cfg(feature = "host-device")]
    vfio_manager: Option<Arc<Mutex<VfioDeviceMgr>>>,
+    pci_system_manager: Arc<Mutex<PciSystemManager>>,
    vm_config: Option<VmConfigInfo>,
    shared_info: Arc<RwLock<InstanceInfo>>,
 }
@@ -343,6 +367,7 @@ impl DeviceOpContext {
            shared_info,
            #[cfg(feature = "host-device")]
            vfio_manager: None,
+            pci_system_manager: device_mgr.pci_system_manager.clone(),
        }
    }

@@ -444,6 +469,11 @@ impl DeviceOpContext {
        }
        Ok(dev_info)
    }
+
+    /// check the hotplug context
+    pub fn is_hotplug(&self) -> bool {
+        self.is_hotplug
+    }
 }

 #[cfg(all(feature = "hotplug", not(feature = "dbs-upcall")))]
@@ -513,7 +543,7 @@ impl DeviceOpContext {

    pub(crate) fn insert_hotplug_mmio_device(
        &self,
-        dev: &Arc<DbsMmioV2Device>,
+        dev: &Arc<dyn DeviceIo>,
        callback: Option<Box<dyn Fn(UpcallClientResponse) + Send>>,
    ) -> Result<()> {
        if !self.is_hotplug {
@@ -532,7 +562,7 @@ impl DeviceOpContext {

    pub(crate) fn remove_hotplug_mmio_device(
        &self,
-        dev: &Arc<DbsMmioV2Device>,
+        dev: &Arc<dyn DeviceIo>,
        callback: Option<Box<dyn Fn(UpcallClientResponse) + Send>>,
    ) -> Result<()> {
        if !self.is_hotplug {
@@ -630,6 +660,7 @@ pub struct DeviceManager {
    vhost_user_net_manager: VhostUserNetDeviceMgr,
    #[cfg(feature = "host-device")]
    pub(crate) vfio_manager: Arc<Mutex<VfioDeviceMgr>>,
+    pub(crate) pci_system_manager: Arc<Mutex<PciSystemManager>>,
 }

 impl DeviceManager {
@@ -640,11 +671,25 @@ impl DeviceManager {
        epoll_manager: EpollManager,
        logger: &slog::Logger,
        shared_info: Arc<RwLock<InstanceInfo>>,
-    ) -> Self {
-        DeviceManager {
-            io_manager: Arc::new(ArcSwap::new(Arc::new(IoManager::new()))),
-            io_lock: Arc::new(Mutex::new(())),
-            irq_manager: Arc::new(KvmIrqManager::new(vm_fd.clone())),
+    ) -> Result<Self> {
+        let irq_manager = Arc::new(KvmIrqManager::new(vm_fd.clone()));
+        let io_manager = Arc::new(ArcSwap::new(Arc::new(IoManager::new())));
+        let io_lock = Arc::new(Mutex::new(()));
+        let io_context = DeviceManagerContext::new(io_manager.clone(), io_lock.clone());
+        let mut mgr = PciSystemManager::new(irq_manager.clone(), io_context, res_manager.clone())?;
+
+        let requirements = mgr.resource_requirements();
+        let resources = res_manager
+            .allocate_device_resources(&requirements, USE_SHARED_IRQ)
+            .map_err(DeviceMgrError::ResourceError)?;
+        mgr.activate(resources)?;
+
+        let pci_system_manager = Arc::new(Mutex::new(mgr));
+
+        Ok(DeviceManager {
+            io_manager,
+            io_lock,
+            irq_manager,
            res_manager,
            vm_fd: vm_fd.clone(),
            logger: logger.new(slog::o!()),
@@ -671,8 +716,13 @@ impl DeviceManager {
            #[cfg(feature = "vhost-user-net")]
            vhost_user_net_manager: VhostUserNetDeviceMgr::default(),
            #[cfg(feature = "host-device")]
-            vfio_manager: Arc::new(Mutex::new(VfioDeviceMgr::new(vm_fd, logger))),
-        }
+            vfio_manager: Arc::new(Mutex::new(VfioDeviceMgr::new(
+                vm_fd,
+                pci_system_manager.clone(),
+                logger,
+            ))),
+            pci_system_manager,
+        })
    }

    /// Get the underlying IoManager to dispatch IO read/write requests.
@@ -1024,7 +1074,7 @@ impl DeviceManager {
    }

    #[cfg(feature = "dbs-virtio-devices")]
-    fn get_virtio_mmio_device_info(device: &Arc<DbsMmioV2Device>) -> Result<(u64, u64, u32)> {
+    fn get_virtio_mmio_device_info(device: &Arc<dyn DeviceIo>) -> Result<(u64, u64, u32)> {
        let resources = device.get_assigned_resources();
        let irq = resources
            .get_legacy_irq()
@@ -1038,26 +1088,11 @@ impl DeviceManager {

        Err(DeviceMgrError::GetDeviceResource)
    }
-
-    /// Get pci bus resources for creating fdt.
-    #[cfg(feature = "host-device")]
-    pub fn get_pci_bus_resources(&self) -> Option<PciBusResources> {
-        let mut vfio_dev_mgr = self.vfio_manager.lock().unwrap();
-        let vfio_pci_mgr = vfio_dev_mgr.get_pci_manager();
-        vfio_pci_mgr.as_ref()?;
-        let pci_manager = vfio_pci_mgr.unwrap();
-        let ecam_space = pci_manager.get_ecam_space();
-        let bar_space = pci_manager.get_bar_space();
-        Some(PciBusResources {
-            ecam_space,
-            bar_space,
-        })
-    }
 }

 #[cfg(feature = "dbs-virtio-devices")]
 impl DeviceManager {
-    fn get_virtio_device_info(device: &Arc<DbsMmioV2Device>) -> Result<(u64, u64, u32)> {
+    fn get_virtio_device_info(device: &Arc<dyn DeviceIo>) -> Result<(u64, u64, u32)> {
        let resources = device.get_assigned_resources();
        let irq = resources
            .get_legacy_irq()
@@ -1079,7 +1114,7 @@ impl DeviceManager {
        ctx: &mut DeviceOpContext,
        use_shared_irq: bool,
        use_generic_irq: bool,
-    ) -> std::result::Result<Arc<DbsMmioV2Device>, DeviceMgrError> {
+    ) -> std::result::Result<Arc<dyn DeviceIo>, DeviceMgrError> {
        let features = DRAGONBALL_FEATURE_INTR_USED | DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY;
        DeviceManager::create_mmio_virtio_device_with_features(
            device,
@@ -1097,7 +1132,7 @@ impl DeviceManager {
        ctx: &mut DeviceOpContext,
        use_shared_irq: bool,
        use_generic_irq: bool,
-    ) -> std::result::Result<Arc<DbsMmioV2Device>, DeviceMgrError> {
+    ) -> std::result::Result<Arc<dyn DeviceIo>, DeviceMgrError> {
        let features = DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY;
        DeviceManager::create_mmio_virtio_device_with_features(
            device,
@@ -1116,7 +1151,7 @@ impl DeviceManager {
        features: Option<u32>,
        use_shared_irq: bool,
        use_generic_irq: bool,
-    ) -> std::result::Result<Arc<DbsMmioV2Device>, DeviceMgrError> {
+    ) -> std::result::Result<Arc<dyn DeviceIo>, DeviceMgrError> {
        // Every emulated Virtio MMIO device needs a 4K configuration space,
        // and another 4K space for per queue notification.
        const MMIO_ADDRESS_DEFAULT: ResourceConstraint = ResourceConstraint::MmioAddress {
@@ -1147,12 +1182,18 @@ impl DeviceManager {
        Self::register_mmio_virtio_device(Arc::new(virtio_dev), ctx)
    }

-    /// Teardown the Virtio MMIO transport layer device associated with the virtio backend device.
-    pub fn destroy_mmio_virtio_device(
+    fn destroy_mmio_device(
        device: Arc<dyn DeviceIo>,
        ctx: &mut DeviceOpContext,
    ) -> std::result::Result<(), DeviceMgrError> {
-        Self::destroy_mmio_device(device.clone(), ctx)?;
+        // unregister IoManager
+        Self::deregister_virtio_device(&device, ctx)?;
+
+        // unregister Resource manager
+        let resources = device.get_assigned_resources();
+        ctx.res_manager
+            .free_device_resources(&resources)
+            .map_err(DeviceMgrError::ResourceError)?;

        let mmio_dev = device
            .as_any()
@@ -1164,27 +1205,11 @@ impl DeviceManager {
        Ok(())
    }

-    fn destroy_mmio_device(
-        device: Arc<dyn DeviceIo>,
-        ctx: &mut DeviceOpContext,
-    ) -> std::result::Result<(), DeviceMgrError> {
-        // unregister IoManager
-        Self::deregister_mmio_virtio_device(&device, ctx)?;
-
-        // unregister Resource manager
-        let resources = device.get_assigned_resources();
-        ctx.res_manager
-            .free_device_resources(&resources)
-            .map_err(DeviceMgrError::ResourceError)?;
-
-        Ok(())
-    }
-
    /// Create an Virtio MMIO transport layer device for the virtio backend device.
    pub fn register_mmio_virtio_device(
-        device: Arc<DbsMmioV2Device>,
+        device: Arc<dyn DeviceIo>,
        ctx: &mut DeviceOpContext,
-    ) -> std::result::Result<Arc<DbsMmioV2Device>, DeviceMgrError> {
+    ) -> std::result::Result<Arc<dyn DeviceIo>, DeviceMgrError> {
        let (mmio_base, mmio_size, irq) = Self::get_virtio_device_info(&device)?;
        info!(
            ctx.logger(),
@@ -1226,8 +1251,210 @@ impl DeviceManager {
        }
    }

+    /// Create an Virtio PCI transport layer device for the virtio backend device.
+    pub fn create_virtio_pci_device(
+        mut device: DbsVirtioDevice,
+        ctx: &mut DeviceOpContext,
+        use_generic_irq: bool,
+    ) -> std::result::Result<Arc<dyn DeviceIo>, DeviceMgrError> {
+        let pci_system_manager = ctx.pci_system_manager.lock().unwrap();
+
+        // We always use 64bit bars, we don't support 32bit bar now
+        // We aligned to the size of the bar itself, refers to cloud-hypervisor
+        // https://github.com/cloud-hypervisor/cloud-hypervisor/commit/bfc65bff2a5bdb9aca7dcd3284a0ced0e5cc7db8
+        //
+        // Allocate virtio-pci config bar below MMIO_LOW_END.
+        // Each bridge PCI bridge only has two bridge windows:
+        // - One is non-prefetchable and located below `MMIO_LOW_END`.
+        // - The other is prefetchable and located above `MMIO_LOW_END`.
+        // In reference to `clh`, the config BAR is set as non-prefetchable.
+        // Therefore, it must be allocated below `MMIO_LOW_END`.
+        const DEFAULE_VIRTIO_PCI_CONFIG_BAR: ResourceConstraint = ResourceConstraint::MmioAddress {
+            range: Some((0, MMIO_LOW_END)),
+            align: CAPABILITY_BAR_SIZE,
+            size: CAPABILITY_BAR_SIZE,
+        };
+
+        // Virtio pci device always use msi-x, extend irq resources to other_requests
+        let mut other_requests = vec![];
+        VirtioPciDevice::get_interrupt_requirements(device.as_ref(), &mut other_requests);
+
+        // allocate device resources by pci_bus, MmioAddress + KvmSlot?
+        let mut device_requests = vec![];
+        device.get_resource_requirements(&mut device_requests, use_generic_irq);
+
+        // Extend KvmSlot resources to other_requests
+        for req in device_requests.iter() {
+            if !matches!(
+                req,
+                ResourceConstraint::PioAddress { .. } | ResourceConstraint::MmioAddress { .. }
+            ) {
+                other_requests.push(*req);
+            }
+        }
+
+        // allocate PciMsixIrq and KvmSlot by res_manager
+        let other_resources = ctx
+            .res_manager
+            .allocate_device_resources(&other_requests, false)
+            .map_err(DeviceMgrError::ResourceError)?;
+
+        let pci_bus = pci_system_manager.pci_root_bus();
+        let dev_id = pci_system_manager
+            .new_device_id(None)
+            .ok_or(DeviceMgrError::VirtioPci)?;
+
+        // Allocate config bar resources by pci_bus
+        let default_config_req = vec![DEFAULE_VIRTIO_PCI_CONFIG_BAR];
+        let default_config_res = pci_bus
+            .allocate_resources(&default_config_req)
+            .map_err(DeviceMgrError::PciError)?;
+        assert!(default_config_res.get_all_resources().len() == 1);
+
+        // Allocate MmioAddress and PioAddress resource by pci bus, other resourece type will skip
+        let mut device_resource = pci_bus
+            .allocate_resources(&device_requests)
+            .map_err(DeviceMgrError::PciError)?;
+
+        // Extend PciMsixIrq and KvmSlot resources to device_resource
+        other_resources.get_all_resources().iter().for_each(|res| {
+            device_resource.append(res.clone());
+        });
+
+        // Do map for virtio share memory region by set_resource, this will use KvmSlot + MmioAddress resources, which should be allocated before
+        let _virito_shared_mem_list = device
+            .as_mut()
+            .set_resource(ctx.vm_fd.clone(), device_resource.clone())
+            .map_err(DeviceMgrError::Virtio)?;
+
+        // Extend config bar resources to device_resource
+        // Now device_resource contains all resources
+        default_config_res
+            .get_all_resources()
+            .iter()
+            .for_each(|res| {
+                device_resource.append(res.clone());
+            });
+
+        drop(pci_system_manager);
+
+        // new a virtio pci device
+        let mut virtio_dev = VirtioPciDevice::new(
+            ctx.vm_fd.clone(),
+            ctx.get_vm_as()?,
+            ctx.get_address_space()?,
+            ctx.irq_manager.clone(),
+            device_resource,
+            dev_id,
+            device,
+            true,
+            Arc::downgrade(&pci_bus),
+            default_config_res.get_all_resources()[0].clone(),
+        )
+        .map_err(DeviceMgrError::VirtioPciError)?;
+
+        virtio_dev
+            .alloc_bars()
+            .map_err(DeviceMgrError::VirtioPciError)?;
+
+        let arc_dev = Arc::new(virtio_dev);
+
+        pci_bus
+            .register_device(arc_dev.clone())
+            .map_err(DeviceMgrError::PciError)?;
+
+        Self::register_virtio_pci_device(arc_dev, ctx)
+    }
+
+    /// Create an Virtio PCI transport layer device for the virtio backend device.
+    pub fn register_virtio_pci_device(
+        device: Arc<dyn DeviceIo>,
+        ctx: &DeviceOpContext,
+    ) -> std::result::Result<Arc<dyn DeviceIo>, DeviceMgrError> {
+        let resources = device.get_trapped_io_resources();
+        let mut tx = ctx.io_context.begin_tx();
+        if let Err(e) = ctx
+            .io_context
+            .register_device_io(&mut tx, device.clone(), &resources)
+        {
+            ctx.io_context.cancel_tx(tx);
+            Err(DeviceMgrError::IoManager(e))
+        } else {
+            ctx.io_context.commit_tx(tx);
+            Ok(device)
+        }
+    }
+
+    /// Deregister Virtio device from IoManager
+    pub fn deregister_virtio_device(
+        device: &Arc<dyn DeviceIo>,
+        ctx: &mut DeviceOpContext,
+    ) -> std::result::Result<(), DeviceMgrError> {
+        let resources = device.get_trapped_io_resources();
+        info!(
+            ctx.logger(),
+            "unregister pci virtio device: {:?}", resources
+        );
+        let mut tx = ctx.io_context.begin_tx();
+        if let Err(e) = ctx.io_context.unregister_device_io(&mut tx, &resources) {
+            ctx.io_context.cancel_tx(tx);
+            Err(DeviceMgrError::IoManager(e))
+        } else {
+            ctx.io_context.commit_tx(tx);
+            Ok(())
+        }
+    }
+
+    /// Destroy/Deregister resources for a Virtio PCI
+    fn destroy_pci_device(
+        device: Arc<dyn DeviceIo>,
+        ctx: &mut DeviceOpContext,
+        dev_id: u8,
+    ) -> std::result::Result<(), DeviceMgrError> {
+        // unregister IoManager
+        Self::deregister_virtio_device(&device, ctx)?;
+        // unregister Resource manager
+        let resources = device.get_assigned_resources();
+        let mut system_resources = DeviceResources::new();
+        resources.iter().for_each(|res| {
+            if !matches!(
+                res,
+                Resource::PioAddressRange { .. } | Resource::MmioAddressRange { .. }
+            ) {
+                system_resources.append(res.clone());
+            }
+        });
+        info!(
+            ctx.logger(),
+            "unregister resource {:?} from system resource manager for pci device",
+            system_resources
+        );
+        ctx.res_manager
+            .free_device_resources(&system_resources)
+            .map_err(DeviceMgrError::ResourceError)?;
+        let pci_system_manager = ctx.pci_system_manager.lock().unwrap();
+        let pci_bus = pci_system_manager.pci_root_bus();
+        info!(
+            ctx.logger(),
+            "unregister resource {:?} from pci bus resource manager for pci device", resources
+        );
+        pci_bus.free_resources(resources);
+        let _ = pci_system_manager.free_device_id(dev_id as u32);
+
+        let pci_dev = device
+            .as_any()
+            .downcast_ref::<VirtioPciDevice<GuestAddressSpaceImpl, QueueSync, GuestRegionMmap>>()
+            .ok_or(DeviceMgrError::InvalidOperation)?;
+
+        pci_dev.remove();
+
+        Ok(())
+    }
+
    #[cfg(feature = "host-device")]
    fn get_pci_device_info(device: &Arc<dyn DeviceIo>) -> Result<(u8, u8)> {
+        use virtio_queue::QueueSync;
+
        if let Some(pci_dev) = device
            .as_any()
            .downcast_ref::<VfioPciDevice<PciSystemManager>>()
@@ -1244,10 +1471,41 @@ impl DeviceManager {
            // together those 8 bits combined as devfn value
            let devfn = (((slot) & 0x1f) << 3) | ((func) & 0x07);

+            return Ok((busno, devfn));
+        } else if let Some(pci_dev) = device.as_any().downcast_ref::<VirtioPciDevice<
+            GuestAddressSpaceImpl,
+            QueueSync,
+            GuestRegionMmap,
+        >>() {
+            // reference from kernel: include/uapi/linux/pci.h
+            let busno = pci_dev.bus_id().map_err(DeviceMgrError::VirtioPciError)?;
+            let slot = pci_dev.device_id();
+            let func = 0;
+            let devfn = (((slot) & 0x1f) << 3) | ((func) & 0x07);
+
            return Ok((busno, devfn));
        }

-        Err(DeviceMgrError::GetDeviceResource)
+        Err(DeviceMgrError::InvalidPciDeviceType)
+    }
+
+    /// Teardown the Virtio PCI or MMIO transport layer device associated with the virtio backend device.
+    pub fn destroy_virtio_device(
+        device: Arc<dyn DeviceIo>,
+        ctx: &mut DeviceOpContext,
+    ) -> std::result::Result<(), DeviceMgrError> {
+        if let Some(mmio_dev) = device.as_any().downcast_ref::<DbsMmioV2Device>() {
+            Self::destroy_mmio_device(device.clone(), ctx)?;
+            mmio_dev.remove();
+        } else if let Some(pci_dev) = device.as_any().downcast_ref::<VirtioPciDevice<
+            GuestAddressSpaceImpl,
+            QueueSync,
+            GuestRegionMmap,
+        >>() {
+            Self::destroy_pci_device(device.clone(), ctx, pci_dev.device_id())?;
+        }
+
+        Ok(())
    }
 }

@@ -1312,12 +1570,29 @@ mod tests {
                String::from("1"),
            )));

+            let irq_manager = Arc::new(KvmIrqManager::new(vm_fd.clone()));
+            let io_manager = Arc::new(ArcSwap::new(Arc::new(IoManager::new())));
+            let io_lock = Arc::new(Mutex::new(()));
+            let io_context = DeviceManagerContext::new(io_manager.clone(), io_lock.clone());
+            let mut mgr =
+                PciSystemManager::new(irq_manager.clone(), io_context, res_manager.clone())
+                    .unwrap();
+
+            let requirements = mgr.resource_requirements();
+            let resources = res_manager
+                .allocate_device_resources(&requirements, USE_SHARED_IRQ)
+                .map_err(DeviceMgrError::ResourceError)
+                .unwrap();
+            mgr.activate(resources).unwrap();
+
+            let pci_system_manager = Arc::new(Mutex::new(mgr));
+
            DeviceManager {
                vm_fd: Arc::clone(&vm_fd),
                con_manager: ConsoleManager::new(epoll_manager, &logger),
-                io_manager: Arc::new(ArcSwap::new(Arc::new(IoManager::new()))),
-                io_lock: Arc::new(Mutex::new(())),
-                irq_manager: Arc::new(KvmIrqManager::new(vm_fd.clone())),
+                io_manager,
+                io_lock,
+                irq_manager,
                res_manager,

                legacy_manager: None,
@@ -1340,7 +1615,12 @@ mod tests {
                #[cfg(feature = "vhost-user-net")]
                vhost_user_net_manager: VhostUserNetDeviceMgr::default(),
                #[cfg(feature = "host-device")]
-                vfio_manager: Arc::new(Mutex::new(VfioDeviceMgr::new(vm_fd, &logger))),
+                vfio_manager: Arc::new(Mutex::new(VfioDeviceMgr::new(
+                    vm_fd,
+                    pci_system_manager.clone(),
+                    &logger,
+                ))),
+                pci_system_manager,

                logger,
                shared_info,
--- a/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs
+++ b/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs
@@ -17,12 +17,11 @@ use std::ops::Deref;
 use std::os::fd::RawFd;
 use std::path::Path;
 use std::sync::mpsc::Sender;
-use std::sync::{Arc, Weak};
+use std::sync::{Arc, Mutex, Weak};

 use dbs_device::resources::Resource::LegacyIrq;
 use dbs_device::resources::{DeviceResources, Resource, ResourceConstraint};
 use dbs_device::DeviceIo;
-use dbs_interrupt::KvmIrqManager;
 use dbs_pci::{VfioPciDevice, VENDOR_NVIDIA};
 use dbs_upcall::{DevMgrResponse, UpcallClientResponse};
 use kvm_ioctls::{DeviceFd, VmFd};
@@ -37,8 +36,8 @@ use vm_memory::{
 use super::StartMicroVmError;
 use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl};
 use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos};
-use crate::device_manager::{DeviceManagerContext, DeviceMgrError, DeviceOpContext};
-use crate::resource_manager::{ResourceError, ResourceManager};
+use crate::device_manager::{DeviceMgrError, DeviceOpContext};
+use crate::resource_manager::ResourceError;

 // The flag of whether to use the shared irq.
 const USE_SHARED_IRQ: bool = true;
@@ -230,7 +229,7 @@ pub struct VfioDeviceMgr {
    info_list: DeviceConfigInfos<HostDeviceConfig>,
    locked_vm_size: u64,
    vfio_container: Option<Arc<VfioContainer>>,
-    pci_vfio_manager: Option<Arc<PciSystemManager>>,
+    pci_system_manager: Arc<Mutex<PciSystemManager>>,
    pci_legacy_irqs: Option<HashMap<u8, u8>>,
    nvidia_shared_irq: Option<u32>,
    logger: slog::Logger,
@@ -238,13 +237,17 @@ pub struct VfioDeviceMgr {

 impl VfioDeviceMgr {
    /// Create a new VFIO device manager.
-    pub fn new(vm_fd: Arc<VmFd>, logger: &slog::Logger) -> Self {
+    pub fn new(
+        vm_fd: Arc<VmFd>,
+        pci_system_manager: Arc<Mutex<PciSystemManager>>,
+        logger: &slog::Logger,
+    ) -> Self {
        VfioDeviceMgr {
            vm_fd,
            info_list: DeviceConfigInfos::new(),
            locked_vm_size: 0,
            vfio_container: None,
-            pci_vfio_manager: None,
+            pci_system_manager,
            pci_legacy_irqs: Some(HashMap::new()),
            nvidia_shared_irq: None,
            logger: logger.new(slog::o!()),
@@ -288,17 +291,6 @@ impl VfioDeviceMgr {
        &mut self,
        ctx: &mut DeviceOpContext,
    ) -> std::result::Result<(), StartMicroVmError> {
-        // create and attach pci root bus
-        #[cfg(all(feature = "hotplug", feature = "host-device"))]
-        if ctx.pci_hotplug_enabled {
-            let _ = self
-                .create_pci_manager(
-                    ctx.irq_manager.clone(),
-                    ctx.io_context.clone(),
-                    ctx.res_manager.clone(),
-                )
-                .map_err(StartMicroVmError::CreateVfioDevice)?;
-        }
        for (idx, info) in self.info_list.clone().iter().enumerate() {
            self.create_device(&info.config, ctx, idx)
                .map_err(StartMicroVmError::CreateVfioDevice)?;
@@ -574,12 +566,9 @@ impl VfioDeviceMgr {
            "subsystem" => "vfio_dev_mgr",
             "host_bdf" => &cfg.bus_slot_func,
        );
-        // safe to get pci_manager
-        let pci_manager = self.create_pci_manager(
-            ctx.irq_manager.clone(),
-            ctx.io_context.clone(),
-            ctx.res_manager.clone(),
-        )?;
+
+        let pci_manager = self.get_pci_manager();
+        let pci_manager = pci_manager.lock().unwrap();
        let pci_bus = pci_manager.pci_root_bus();
        let id = pci_manager
            .new_device_id(cfg.guest_dev_id)
@@ -607,7 +596,7 @@ impl VfioDeviceMgr {
                sysfs_path,
                Arc::downgrade(&pci_bus),
                vfio_dev,
-                Arc::downgrade(self.get_pci_manager().unwrap()),
+                self.get_pci_manager(),
                ctx.vm_fd.clone(),
                cfg.vendor_device_id,
                cfg.clique_id,
@@ -665,8 +654,8 @@ impl VfioDeviceMgr {

        // safe to unwrap because pci vfio manager is already created
        let _ = self
-            .pci_vfio_manager
-            .as_mut()
+            .pci_system_manager
+            .lock()
            .unwrap()
            .free_device_id(device_id)
            .ok_or(VfioDeviceError::InvalidDeviceID(device_id))?;
@@ -698,27 +687,9 @@ impl VfioDeviceMgr {
        Ok(())
    }

-    pub(crate) fn create_pci_manager(
-        &mut self,
-        irq_manager: Arc<KvmIrqManager>,
-        io_context: DeviceManagerContext,
-        res_manager: Arc<ResourceManager>,
-    ) -> Result<&mut Arc<PciSystemManager>> {
-        if self.pci_vfio_manager.is_none() {
-            let mut mgr = PciSystemManager::new(irq_manager, io_context, res_manager.clone())?;
-            let requirements = mgr.resource_requirements();
-            let resources = res_manager
-                .allocate_device_resources(&requirements, USE_SHARED_IRQ)
-                .or(Err(VfioDeviceError::NoResource))?;
-            mgr.activate(resources)?;
-            self.pci_vfio_manager = Some(Arc::new(mgr));
-        }
-        Ok(self.pci_vfio_manager.as_mut().unwrap())
-    }
-
    /// Get the PCI manager to support PCI device passthrough
-    pub fn get_pci_manager(&mut self) -> Option<&mut Arc<PciSystemManager>> {
-        self.pci_vfio_manager.as_mut()
+    pub fn get_pci_manager(&mut self) -> Arc<Mutex<PciSystemManager>> {
+        self.pci_system_manager.clone()
    }
 }

--- a/src/dragonball/src/device_manager/vfio_dev_mgr/pci_vfio.rs
+++ b/src/dragonball/src/device_manager/vfio_dev_mgr/pci_vfio.rs
@@ -12,14 +12,13 @@ use dbs_interrupt::KvmIrqManager;
 use dbs_pci::ECAM_SPACE_LENGTH;
 use dbs_pci::{create_pci_root_bus, PciBus, PciDevice, PciRootDevice, PciSystemContext};

-use super::{Result, VfioDeviceError};
+use super::DeviceMgrError;
 #[cfg(target_arch = "aarch64")]
 use crate::device_manager::vfio_dev_mgr::USE_SHARED_IRQ;
 use crate::device_manager::DeviceManagerContext;
 use crate::resource_manager::ResourceManager;
+use dbs_pci::PCI_BUS_DEFAULT;

-/// we only support one pci bus
-pub const PCI_BUS_DEFAULT: u8 = 0;
 /// The default mmio size for pci root bus.
 const PCI_MMIO_DEFAULT_SIZE: u64 = 2048u64 << 30;

@@ -38,13 +37,13 @@ impl PciSystemManager {
        irq_manager: Arc<KvmIrqManager>,
        io_context: DeviceManagerContext,
        res_manager: Arc<ResourceManager>,
-    ) -> std::result::Result<Self, VfioDeviceError> {
+    ) -> std::result::Result<Self, DeviceMgrError> {
        let resources = PciSystemManager::allocate_root_device_resources(res_manager)?;
        let pci_root = Arc::new(
-            PciRootDevice::create(PCI_BUS_DEFAULT, resources).map_err(VfioDeviceError::PciError)?,
+            PciRootDevice::create(PCI_BUS_DEFAULT, resources).map_err(DeviceMgrError::PciError)?,
        );
        let pci_root_bus =
-            create_pci_root_bus(PCI_BUS_DEFAULT).map_err(VfioDeviceError::PciError)?;
+            create_pci_root_bus(PCI_BUS_DEFAULT).map_err(DeviceMgrError::PciError)?;

        Ok(PciSystemManager {
            irq_manager,
@@ -58,7 +57,7 @@ impl PciSystemManager {
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    fn allocate_root_device_resources(
        _res_manager: Arc<ResourceManager>,
-    ) -> Result<DeviceResources> {
+    ) -> std::result::Result<DeviceResources, DeviceMgrError> {
        let mut resources = DeviceResources::new();
        resources.append(Resource::PioAddressRange {
            // PCI CONFIG_ADDRESS port address 0xcf8 and uses 32 bits
@@ -76,7 +75,7 @@ impl PciSystemManager {
    #[cfg(target_arch = "aarch64")]
    fn allocate_root_device_resources(
        res_manager: Arc<ResourceManager>,
-    ) -> Result<DeviceResources> {
+    ) -> std::result::Result<DeviceResources, DeviceMgrError> {
        let requests = vec![ResourceConstraint::MmioAddress {
            range: Some((0x0, 0xffff_ffff)),
            align: 4096,
@@ -84,23 +83,26 @@ impl PciSystemManager {
        }];
        let resources = res_manager
            .allocate_device_resources(&requests, USE_SHARED_IRQ)
-            .map_err(VfioDeviceError::AllocateDeviceResource)?;
+            .map_err(DeviceMgrError::ResourceError)?;
        Ok(resources)
    }

    /// Activate the PCI subsystem.
-    pub fn activate(&mut self, resources: DeviceResources) -> Result<()> {
+    pub fn activate(
+        &mut self,
+        resources: DeviceResources,
+    ) -> std::result::Result<(), DeviceMgrError> {
        let bus_id = self.pci_root_bus.bus_id();

        self.pci_root
            .add_bus(self.pci_root_bus.clone(), bus_id)
-            .map_err(VfioDeviceError::PciError)?;
+            .map_err(DeviceMgrError::PciError)?;
        PciRootDevice::activate(self.pci_root.clone(), &mut self.io_context)
-            .map_err(VfioDeviceError::PciError)?;
+            .map_err(DeviceMgrError::PciError)?;

        self.pci_root_bus
            .assign_resources(resources)
-            .map_err(VfioDeviceError::PciError)?;
+            .map_err(DeviceMgrError::PciError)?;

        Ok(())
    }
--- a/src/dragonball/src/device_manager/vhost_net_dev_mgr.rs
+++ b/src/dragonball/src/device_manager/vhost_net_dev_mgr.rs
@@ -256,7 +256,7 @@ impl VhostNetDeviceMgr {
                info.config.iface_id
            );
            if let Some(device) = info.device.take() {
-                DeviceManager::destroy_mmio_virtio_device(device, ctx)?;
+                DeviceManager::destroy_mmio_device(device, ctx)?;
            }
        }

--- a/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs
+++ b/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs
@@ -382,7 +382,7 @@ impl VirtioNetDeviceMgr {
                info.config.iface_id
            );
            if let Some(device) = info.device.take() {
-                DeviceManager::destroy_mmio_virtio_device(device, ctx)?;
+                DeviceManager::destroy_mmio_device(device, ctx)?;
            }
        }
        Ok(())
--- a/src/dragonball/src/device_manager/vsock_dev_mgr.rs
+++ b/src/dragonball/src/device_manager/vsock_dev_mgr.rs
@@ -294,7 +294,7 @@ impl VsockDeviceMgr {
                info.config.id
            );
            if let Some(device) = info.device.take() {
-                DeviceManager::destroy_mmio_virtio_device(device, ctx)?;
+                DeviceManager::destroy_mmio_device(device, ctx)?;
            }
        }
        Ok(())
--- a/src/dragonball/src/error.rs
+++ b/src/dragonball/src/error.rs
@@ -77,6 +77,10 @@ pub enum Error {
    /// Cannot open the VM file descriptor.
    #[error(transparent)]
    Vm(vm::VmError),
+
+    /// Fail to create device manager system
+    #[error("failed to create device manager system: {0}")]
+    DeviceMgrError(#[source] device_manager::DeviceMgrError),
 }

 /// Errors associated with starting the instance.
@@ -215,6 +219,10 @@ pub enum StartMicroVmError {
    /// Failed to register DMA memory address range.
    #[error("failure while registering DMA address range: {0:?}")]
    RegisterDMAAddress(#[source] VfioDeviceError),
+
+    /// Cannot build seccomp filters.
+    #[error("failure while configuring seccomp filters: {0}")]
+    SeccompFilters(#[source] seccompiler::Error),
 }

 /// Errors associated with starting the instance.
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .20.0
 .21.0