Compare commits

..

3 Commits

Author SHA1 Message Date
Fabiano Fidêncio
1eabd6c729 tests: k8s: coco: Use a var for AUTHENTICATED_IMAGE_PASSWORD
It's a bot password that only has read permissions for that image, no
need to use a secret here.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
2026-02-15 23:12:02 +01:00
Fabiano Fidêncio
b1ec7d0c02 build: ci: remove KBUILD_SIGN_PIN entirely
Drop kernel build signing (KBUILD_SIGN_PIN) from CI and from all
scripts that referenced it.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-15 18:52:18 +01:00
Fabiano Fidêncio
83dce477d0 build: ci: remove CI_HKD_PATH and s390x boot-image-se build
Drop the CI_HKD_PATH secret and the build-asset-boot-image-se job from
the s390x tarball workflow; the artefact that depended on the host key
was never ever released anyways.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-15 16:24:30 +01:00
1135 changed files with 101998 additions and 93294 deletions

View File

@@ -1,37 +0,0 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/streetsidesoftware/cspell/main/cspell.schema.json
version: "0.2"
language: en,en-GB
dictionaryDefinitions:
- name: kata-terms
path: ./tests/spellcheck/kata-dictionary.txt
addWords: true
dictionaries:
- en-GB
- en_US
- bash
- git
- golang
- k8s
- python
- rust
- companies
- mnemonics
- peopleNames
- softwareTerms
- networking-terms
- kata-terms
ignoreRegExpList:
- /@[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}/gi # Ignores github handles
# Ignore code blocks
- /^\s*`{3,}[\s\S]*?^\s*`{3,}/gm
- /`[^`\n]+`/g
ignorePaths:
- "**/vendor/**" # vendor files aren't owned by us
- "**/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/**" # Generated files
- "**/requirements.txt"
useGitignore: true

View File

@@ -28,9 +28,3 @@ self-hosted-runner:
- s390x-large
- tdx
- ubuntu-24.04-arm
paths:
.github/workflows/**/*.{yml,yaml}:
ignore:
# We use if: false to "temporarily" skip jobs with issues
- 'constant expression "false" in condition'

View File

@@ -15,8 +15,6 @@ updates:
- "/src/tools/trace-forwarder"
schedule:
interval: "daily"
cooldown:
default-days: 7
ignore:
# rust-vmm repos might cause incompatibilities on patch versions, so
# lets handle them manually for now.
@@ -37,9 +35,9 @@ updates:
# create groups for common dependencies, so they can all go in a single PR
# We can extend this as we see more frequent groups
groups:
aws-libcrypto:
bit-vec:
patterns:
- aws-lc-*
- bit-vec
bumpalo:
patterns:
- bumpalo
@@ -67,9 +65,6 @@ updates:
rustix:
patterns:
- rustix
rustls-webpki:
patterns:
- rustls-webpki
slab:
patterns:
- slab
@@ -90,12 +85,8 @@ updates:
- "src/tools/csi-kata-directvolume"
schedule:
interval: "daily"
cooldown:
default-days: 7
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
cooldown:
default-days: 7

View File

@@ -13,13 +13,18 @@ concurrency:
jobs:
run-actionlint:
name: run-actionlint
env:
GH_TOKEN: ${{ github.token }}
runs-on: ubuntu-24.04
steps:
- name: Checkout the code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
persist-credentials: false
- name: Install actionlint gh extension
run: gh extension install https://github.com/cschleiden/gh-actionlint
- name: Run actionlint
uses: raven-actions/actionlint@e01d1ea33dd6a5ed517d95b4c0c357560ac6f518 # v2.1.1
run: gh actionlint

View File

@@ -47,23 +47,6 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install yq
run: |
./ci/install_yq.sh
env:
INSTALL_IN_GOPATH: false
- name: Read properties from versions.yaml
run: |
go_version="$(yq '.languages.golang.version' versions.yaml)"
[ -n "$go_version" ]
echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV"
- name: Setup Golang version ${{ env.GO_VERSION }}
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: ${{ env.GO_VERSION }}
- name: Install dependencies
run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies
env:

View File

@@ -47,25 +47,8 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install yq
run: |
./ci/install_yq.sh
env:
INSTALL_IN_GOPATH: false
- name: Read properties from versions.yaml
run: |
go_version="$(yq '.languages.golang.version' versions.yaml)"
[ -n "$go_version" ]
echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV"
- name: Setup Golang version ${{ env.GO_VERSION }}
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: ${{ env.GO_VERSION }}
- name: Install dependencies
run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies
run: bash tests/integration/cri-containerd/gha-run.sh
env:
GH_TOKEN: ${{ github.token }}

View File

@@ -82,17 +82,11 @@ jobs:
./ci/install_yq.sh
env:
INSTALL_IN_GOPATH: false
- name: Read properties from versions.yaml
- name: Install golang
if: contains(matrix.component.needs, 'golang')
run: |
go_version="$(yq '.languages.golang.version' versions.yaml)"
[ -n "$go_version" ]
echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV"
- name: Setup Golang version ${{ env.GO_VERSION }}
if: contains(matrix.component.needs, 'golang')
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: ${{ env.GO_VERSION }}
./tests/install_go.sh -f -p
echo "/usr/local/go/bin" >> "$GITHUB_PATH"
- name: Setup rust
if: contains(matrix.component.needs, 'rust')
run: |

View File

@@ -94,19 +94,11 @@ jobs:
./ci/install_yq.sh
env:
INSTALL_IN_GOPATH: false
- name: Read properties from versions.yaml
- name: Install golang
if: contains(matrix.component.needs, 'golang')
run: |
go_version="$(yq '.languages.golang.version' versions.yaml)"
[ -n "$go_version" ]
echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV"
- name: Setup Golang version ${{ env.GO_VERSION }}
if: contains(matrix.component.needs, 'golang')
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: ${{ env.GO_VERSION }}
# Setup-go doesn't work properly with ppc64le: https://github.com/actions/setup-go/issues/648
architecture: ${{ contains(inputs.instance, 'ppc64le') && 'ppc64le' || '' }}
./tests/install_go.sh -f -p
echo "/usr/local/go/bin" >> "$GITHUB_PATH"
- name: Setup rust
if: contains(matrix.component.needs, 'rust')
run: |

View File

@@ -23,8 +23,6 @@ on:
secrets:
QUAY_DEPLOYER_PASSWORD:
required: false
KBUILD_SIGN_PIN:
required: true
permissions: {}
@@ -47,7 +45,6 @@ jobs:
- coco-guest-components
- firecracker
- kernel
- kernel-debug
- kernel-dragonball-experimental
- kernel-nvidia-gpu
- nydus
@@ -103,7 +100,6 @@ jobs:
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
- name: Parse OCI image name and digest
id: parse-oci-segments
@@ -144,7 +140,7 @@ jobs:
if-no-files-found: error
- name: store-extratarballs-artifact ${{ matrix.asset }}
if: ${{ startsWith(matrix.asset, 'kernel-nvidia-gpu') }}
if: ${{ matrix.asset == 'kernel' || startsWith(matrix.asset, 'kernel-nvidia-gpu') }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-artifacts-amd64-${{ matrix.asset }}-modules${{ inputs.tarball-suffix }}
@@ -169,6 +165,8 @@ jobs:
- rootfs-image-nvidia-gpu-confidential
- rootfs-initrd
- rootfs-initrd-confidential
- rootfs-initrd-nvidia-gpu
- rootfs-initrd-nvidia-gpu-confidential
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
@@ -214,7 +212,6 @@ jobs:
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
- name: store-artifact ${{ matrix.asset }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
@@ -234,6 +231,7 @@ jobs:
asset:
- busybox
- coco-guest-components
- kernel-modules
- kernel-nvidia-gpu-modules
- pause-image
steps:
@@ -348,16 +346,6 @@ jobs:
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: Check kata tarball size (GitHub release asset limit)
run: |
# https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases#storage-and-bandwidth-quotas
GITHUB_ASSET_MAX_BYTES=2147483648
tarball_size=$(stat -c "%s" kata-static.tar.zst)
if [[ "${tarball_size}" -ge "${GITHUB_ASSET_MAX_BYTES}" ]]; then
echo "::error::tarball size (${tarball_size} bytes) >= GitHub release asset limit (${GITHUB_ASSET_MAX_BYTES} bytes)"
exit 1
fi
echo "tarball size: ${tarball_size} bytes"
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
@@ -376,6 +364,7 @@ jobs:
matrix:
asset:
- agent-ctl
- csi-kata-directvolume
- genpolicy
- kata-ctl
- kata-manager
@@ -458,16 +447,6 @@ jobs:
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-tools-artifacts versions.yaml kata-tools-static.tar.zst
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: Check kata-tools tarball size (GitHub release asset limit)
run: |
# https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases#storage-and-bandwidth-quotas
GITHUB_ASSET_MAX_BYTES=2147483648
tarball_size=$(stat -c "%s" kata-tools-static.tar.zst)
if [[ "${tarball_size}" -ge "${GITHUB_ASSET_MAX_BYTES}" ]]; then
echo "::error::tarball size (${tarball_size} bytes) >= GitHub release asset limit (${GITHUB_ASSET_MAX_BYTES} bytes)"
exit 1
fi
echo "tarball size: ${tarball_size} bytes"
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:

View File

@@ -23,8 +23,6 @@ on:
secrets:
QUAY_DEPLOYER_PASSWORD:
required: false
KBUILD_SIGN_PIN:
required: true
permissions: {}
@@ -45,7 +43,6 @@ jobs:
- cloud-hypervisor
- firecracker
- kernel
- kernel-debug
- kernel-dragonball-experimental
- kernel-nvidia-gpu
- kernel-cca-confidential
@@ -91,7 +88,6 @@ jobs:
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
- name: Parse OCI image name and digest
id: parse-oci-segments
@@ -153,6 +149,7 @@ jobs:
- rootfs-image
- rootfs-image-nvidia-gpu
- rootfs-initrd
- rootfs-initrd-nvidia-gpu
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
@@ -197,7 +194,6 @@ jobs:
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
- name: store-artifact ${{ matrix.asset }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
@@ -327,16 +323,6 @@ jobs:
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: Check kata tarball size (GitHub release asset limit)
run: |
# https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases#storage-and-bandwidth-quotas
GITHUB_ASSET_MAX_BYTES=2147483648
tarball_size=$(stat -c "%s" kata-static.tar.zst)
if [[ "${tarball_size}" -ge "${GITHUB_ASSET_MAX_BYTES}" ]]; then
echo "::error::tarball size (${tarball_size} bytes) >= GitHub release asset limit (${GITHUB_ASSET_MAX_BYTES} bytes)"
exit 1
fi
echo "tarball size: ${tarball_size} bytes"
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:

View File

@@ -262,16 +262,6 @@ jobs:
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: Check kata tarball size (GitHub release asset limit)
run: |
# https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases#storage-and-bandwidth-quotas
GITHUB_ASSET_MAX_BYTES=2147483648
tarball_size=$(stat -c "%s" kata-static.tar.zst)
if [[ "${tarball_size}" -ge "${GITHUB_ASSET_MAX_BYTES}" ]]; then
echo "::error::tarball size (${tarball_size} bytes) >= GitHub release asset limit (${GITHUB_ASSET_MAX_BYTES} bytes)"
exit 1
fi
echo "tarball size: ${tarball_size} bytes"
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:

View File

@@ -21,8 +21,6 @@ on:
type: string
default: ""
secrets:
CI_HKD_PATH:
required: true
QUAY_DEPLOYER_PASSWORD:
required: true
@@ -120,6 +118,15 @@ jobs:
retention-days: 15
if-no-files-found: error
- name: store-extratarballs-artifact ${{ matrix.asset }}
if: ${{ matrix.asset == 'kernel' }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-artifacts-s390x-${{ matrix.asset }}-modules${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-modules.tar.zst
retention-days: 15
if-no-files-found: error
build-asset-rootfs:
name: build-asset-rootfs
runs-on: s390x
@@ -188,60 +195,11 @@ jobs:
retention-days: 15
if-no-files-found: error
build-asset-boot-image-se:
name: build-asset-boot-image-se
runs-on: s390x
needs: [build-asset, build-asset-rootfs]
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
pattern: kata-artifacts-s390x-*${{ inputs.tarball-suffix }}
path: kata-artifacts
merge-multiple: true
- name: Place a host key document
run: |
mkdir -p "host-key-document"
cp "${CI_HKD_PATH}" "host-key-document"
env:
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
- name: Build boot-image-se
run: |
./tests/gha-adjust-to-use-prebuilt-components.sh kata-artifacts "boot-image-se"
make boot-image-se-tarball
build_dir=$(readlink -f build)
sudo cp -r "${build_dir}" "kata-build"
sudo chown -R "$(id -u)":"$(id -g)" "kata-build"
env:
HKD_PATH: "host-key-document"
- name: store-artifact boot-image-se
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-artifacts-s390x${{ inputs.tarball-suffix }}
path: kata-build/kata-static-boot-image-se.tar.zst
retention-days: 1
if-no-files-found: error
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
remove-rootfs-binary-artifacts:
name: remove-rootfs-binary-artifacts
runs-on: ubuntu-22.04
needs: [build-asset-rootfs, build-asset-boot-image-se]
needs: [build-asset-rootfs]
strategy:
matrix:
asset:
@@ -322,7 +280,6 @@ jobs:
needs:
- build-asset
- build-asset-rootfs
- build-asset-boot-image-se
- build-asset-shim-v2
permissions:
contents: read
@@ -350,16 +307,6 @@ jobs:
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: Check kata tarball size (GitHub release asset limit)
run: |
# https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases#storage-and-bandwidth-quotas
GITHUB_ASSET_MAX_BYTES=2147483648
tarball_size=$(stat -c "%s" kata-static.tar.zst)
if [[ "${tarball_size}" -ge "${GITHUB_ASSET_MAX_BYTES}" ]]; then
echo "::error::tarball size (${tarball_size} bytes) >= GitHub release asset limit (${GITHUB_ASSET_MAX_BYTES} bytes)"
exit 1
fi
echo "tarball size: ${tarball_size} bytes"
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:

View File

@@ -25,9 +25,8 @@ jobs:
tag: ${{ github.sha }}-weekly
target-branch: ${{ github.ref_name }}
secrets:
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ vars.AUTHENTICATED_IMAGE_PASSWORD }}
AZ_APPID: ${{ secrets.AZ_APPID }}
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}

View File

@@ -17,18 +17,15 @@ jobs:
pr-number: "dev"
tag: ${{ github.sha }}-dev
target-branch: ${{ github.ref_name }}
extensive-matrix-autogenerated-policy: "yes"
secrets:
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ vars.AUTHENTICATED_IMAGE_PASSWORD }}
AZ_APPID: ${{ secrets.AZ_APPID }}
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
ITA_KEY: ${{ secrets.ITA_KEY }}
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
build-checks:
uses: ./.github/workflows/build-checks.yaml

View File

@@ -22,14 +22,11 @@ jobs:
pr-number: "nightly"
tag: ${{ github.sha }}-nightly
target-branch: ${{ github.ref_name }}
extensive-matrix-autogenerated-policy: "yes"
secrets:
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ vars.AUTHENTICATED_IMAGE_PASSWORD }}
AZ_APPID: ${{ secrets.AZ_APPID }}
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
ITA_KEY: ${{ secrets.ITA_KEY }}
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}

View File

@@ -43,12 +43,10 @@ jobs:
target-branch: ${{ github.event.pull_request.base.ref }}
skip-test: ${{ needs.skipper.outputs.skip_test }}
secrets:
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ vars.AUTHENTICATED_IMAGE_PASSWORD }}
AZ_APPID: ${{ secrets.AZ_APPID }}
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
ITA_KEY: ${{ secrets.ITA_KEY }}
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}

View File

@@ -27,8 +27,6 @@ on:
required: true
QUAY_DEPLOYER_PASSWORD:
required: true
KBUILD_SIGN_PIN:
required: true
permissions: {}
@@ -44,8 +42,6 @@ jobs:
tarball-suffix: -${{ inputs.tag }}
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
secrets:
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
publish-kata-deploy-payload-amd64:
needs: build-kata-static-tarball-amd64
@@ -119,7 +115,7 @@ jobs:
target-branch: ${{ inputs.target-branch }}
tarball-suffix: -${{ inputs.tag }}
secrets:
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ vars.AUTHENTICATED_IMAGE_PASSWORD }}
AZ_APPID: ${{ secrets.AZ_APPID }}
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}

View File

@@ -19,10 +19,6 @@ on:
required: false
type: string
default: no
extensive-matrix-autogenerated-policy:
required: false
type: string
default: no
secrets:
AUTHENTICATED_IMAGE_PASSWORD:
required: true
@@ -33,16 +29,12 @@ on:
required: true
AZ_SUBSCRIPTION_ID:
required: true
CI_HKD_PATH:
required: true
ITA_KEY:
required: true
QUAY_DEPLOYER_PASSWORD:
required: true
NGC_API_KEY:
required: true
KBUILD_SIGN_PIN:
required: true
permissions: {}
@@ -58,8 +50,6 @@ jobs:
tarball-suffix: -${{ inputs.tag }}
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
secrets:
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
publish-kata-deploy-payload-amd64:
needs: build-kata-static-tarball-amd64
@@ -90,8 +80,6 @@ jobs:
tarball-suffix: -${{ inputs.tag }}
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
secrets:
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
publish-kata-deploy-payload-arm64:
needs: build-kata-static-tarball-arm64
@@ -123,7 +111,6 @@ jobs:
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
secrets:
CI_HKD_PATH: ${{ secrets.ci_hkd_path }}
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
build-kata-static-tarball-ppc64le:
@@ -216,6 +203,61 @@ jobs:
platforms: linux/amd64, linux/s390x
file: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile
publish-csi-driver-amd64:
name: publish-csi-driver-amd64
needs: build-kata-static-tarball-amd64
permissions:
contents: read
packages: write
runs-on: ubuntu-22.04
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64-${{ inputs.tag }}
path: kata-tools-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Copy binary into Docker context
run: |
# Copy to the location where the Dockerfile expects the binary.
mkdir -p src/tools/csi-kata-directvolume/bin/
cp /opt/kata/bin/csi-kata-directvolume src/tools/csi-kata-directvolume/bin/directvolplugin
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Docker build and push
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
with:
tags: ghcr.io/kata-containers/csi-kata-directvolume:${{ inputs.pr-number }}
push: true
context: src/tools/csi-kata-directvolume/
platforms: linux/amd64
file: src/tools/csi-kata-directvolume/Dockerfile
run-kata-monitor-tests:
if: ${{ inputs.skip-test != 'yes' }}
needs: build-kata-static-tarball-amd64
@@ -246,21 +288,6 @@ jobs:
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
run-k8s-tests-on-free-runner:
if: ${{ inputs.skip-test != 'yes' }}
needs: publish-kata-deploy-payload-amd64
permissions:
contents: read
uses: ./.github/workflows/run-k8s-tests-on-free-runner.yaml
with:
tarball-suffix: -${{ inputs.tag }}
registry: ghcr.io
repo: ${{ github.repository_owner }}/kata-deploy-ci
tag: ${{ inputs.tag }}-amd64
commit-hash: ${{ inputs.commit-hash }}
pr-number: ${{ inputs.pr-number }}
target-branch: ${{ inputs.target-branch }}
run-k8s-tests-on-arm64:
if: ${{ inputs.skip-test != 'yes' }}
needs: publish-kata-deploy-payload-arm64
@@ -294,6 +321,7 @@ jobs:
needs:
- publish-kata-deploy-payload-amd64
- build-and-publish-tee-confidential-unencrypted-image
- publish-csi-driver-amd64
uses: ./.github/workflows/run-kata-coco-tests.yaml
permissions:
contents: read
@@ -306,9 +334,8 @@ jobs:
commit-hash: ${{ inputs.commit-hash }}
pr-number: ${{ inputs.pr-number }}
target-branch: ${{ inputs.target-branch }}
extensive-matrix-autogenerated-policy: ${{ inputs.extensive-matrix-autogenerated-policy }}
secrets:
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ vars.AUTHENTICATED_IMAGE_PASSWORD }}
AZ_APPID: ${{ secrets.AZ_APPID }}
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
@@ -326,7 +353,7 @@ jobs:
pr-number: ${{ inputs.pr-number }}
target-branch: ${{ inputs.target-branch }}
secrets:
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ vars.AUTHENTICATED_IMAGE_PASSWORD }}
run-k8s-tests-on-ppc64le:
if: ${{ inputs.skip-test != 'yes' }}

View File

@@ -72,7 +72,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@4bdb89f48054571735e3792627da6195c57459e2 # v3.31.10
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
build-mode: ${{ matrix.build-mode }}
@@ -95,6 +95,6 @@ jobs:
make -C src/runtime
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@4bdb89f48054571735e3792627da6195c57459e2 # v3.31.10
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"

View File

@@ -31,22 +31,10 @@ jobs:
with:
persist-credentials: false
- name: Install yq
- name: Install golang
run: |
./ci/install_yq.sh
env:
INSTALL_IN_GOPATH: false
- name: Read properties from versions.yaml
run: |
go_version="$(yq '.languages.golang.version' versions.yaml)"
[ -n "$go_version" ]
echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV"
- name: Setup Golang version ${{ env.GO_VERSION }}
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: ${{ env.GO_VERSION }}
./tests/install_go.sh -f -p
echo "/usr/local/go/bin" >> "${GITHUB_PATH}"
- name: Install Rust
run: ./tests/install_rust.sh

View File

@@ -24,22 +24,10 @@ jobs:
fetch-depth: 0
persist-credentials: false
- name: Install yq
- name: Install golang
run: |
./ci/install_yq.sh
env:
INSTALL_IN_GOPATH: false
- name: Read properties from versions.yaml
run: |
go_version="$(yq '.languages.golang.version' versions.yaml)"
[ -n "$go_version" ]
echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV"
- name: Setup Golang version ${{ env.GO_VERSION }}
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: ${{ env.GO_VERSION }}
./tests/install_go.sh -f -p
echo "/usr/local/go/bin" >> "${GITHUB_PATH}"
- name: Docs URL Alive Check
run: |

View File

@@ -4,50 +4,29 @@ on:
branches:
- main
permissions: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-24.04
name: Build docs
deploy-docs:
name: deploy-docs
permissions:
contents: read
pages: write
id-token: write
steps:
- uses: actions/configure-pages@983d7736d9b0ae728b81ab479565c72886d7745b # v5.0.0
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
with:
persist-credentials: false
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: 3.x
- run: pip install -r docs/requirements.txt
- run: python3 -m mkdocs build --config-file ./mkdocs.yaml --site-dir site/
id: build
- uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4.0.0
id: deployment
with:
path: site/
name: github-pages
deploy:
needs: build
runs-on: ubuntu-24.04
name: Deploy docs
permissions:
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
steps:
- name: Deploy to GitHub Pages
uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5
id: deployment
- uses: actions/configure-pages@v5
- uses: actions/checkout@v5
with:
artifact_name: github-pages
persist-credentials: false
- uses: actions/setup-python@v5
with:
python-version: 3.x
- run: pip install zensical
- run: zensical build --clean
- uses: actions/upload-pages-artifact@v4
with:
path: site
- uses: actions/deploy-pages@v4
id: deployment

View File

@@ -27,22 +27,10 @@ jobs:
fetch-depth: 0
persist-credentials: false
- name: Install yq
- name: Install golang
run: |
./ci/install_yq.sh
env:
INSTALL_IN_GOPATH: false
- name: Read properties from versions.yaml
run: |
go_version="$(yq '.languages.golang.version' versions.yaml)"
[ -n "$go_version" ]
echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV"
- name: Setup Golang version ${{ env.GO_VERSION }}
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: ${{ env.GO_VERSION }}
./tests/install_go.sh -f -p
echo "/usr/local/go/bin" >> "${GITHUB_PATH}"
- name: Install govulncheck
run: |

View File

@@ -19,25 +19,23 @@ permissions: {}
jobs:
scan-scheduled:
name: Scan of whole repo
permissions:
actions: read # # Required to upload SARIF file to CodeQL
contents: read # Read commit contents
security-events: write # Require writing security events to upload SARIF file to security tab
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
uses: "google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@8ae4be80636b94886b3c271caad730985ce0611c" # v2.3.3
uses: "google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@b00f71e051ddddc6e46a193c31c8c0bf283bf9e6" # v2.1.0
with:
scan-args: |-
-r
./
scan-pr:
name: Scan of just PR code
permissions:
actions: read # Required to upload SARIF file to CodeQL
contents: read # Read commit contents
security-events: write # Require writing security events to upload SARIF file to security tab
if: ${{ github.event_name == 'pull_request' }}
uses: "google/osv-scanner-action/.github/workflows/osv-scanner-reusable-pr.yml@8ae4be80636b94886b3c271caad730985ce0611c" # v2.3.3
uses: "google/osv-scanner-action/.github/workflows/osv-scanner-reusable-pr.yml@b00f71e051ddddc6e46a193c31c8c0bf283bf9e6" # v2.1.0
with:
# Example of specifying custom arguments
scan-args: |-

View File

@@ -24,7 +24,6 @@ jobs:
target-branch: ${{ github.ref_name }}
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
build-assets-arm64:
permissions:
@@ -39,7 +38,6 @@ jobs:
target-branch: ${{ github.ref_name }}
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
build-assets-s390x:
permissions:
@@ -53,7 +51,6 @@ jobs:
push-to-registry: yes
target-branch: ${{ github.ref_name }}
secrets:
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
build-assets-ppc64le:

View File

@@ -8,8 +8,6 @@ on:
secrets:
QUAY_DEPLOYER_PASSWORD:
required: true
KBUILD_SIGN_PIN:
required: true
permissions: {}
@@ -21,7 +19,6 @@ jobs:
stage: release
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
permissions:
contents: read
packages: write

View File

@@ -8,8 +8,6 @@ on:
secrets:
QUAY_DEPLOYER_PASSWORD:
required: true
KBUILD_SIGN_PIN:
required: true
permissions: {}
@@ -21,7 +19,6 @@ jobs:
stage: release
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
permissions:
contents: read
packages: write

View File

@@ -6,8 +6,6 @@ on:
required: true
type: string
secrets:
CI_HKD_PATH:
required: true
QUAY_DEPLOYER_PASSWORD:
required: true
@@ -20,7 +18,6 @@ jobs:
push-to-registry: yes
stage: release
secrets:
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
permissions:
contents: read

View File

@@ -35,7 +35,6 @@ jobs:
target-arch: amd64
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
build-and-push-assets-arm64:
needs: release
@@ -49,7 +48,6 @@ jobs:
target-arch: arm64
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
build-and-push-assets-s390x:
needs: release
@@ -62,7 +60,6 @@ jobs:
with:
target-arch: s390x
secrets:
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
build-and-push-assets-ppc64le:

View File

@@ -35,6 +35,8 @@ on:
jobs:
run-cri-containerd:
name: run-cri-containerd-${{ inputs.arch }} (${{ inputs.containerd_version }}, ${{ inputs.vmm }})
strategy:
fail-fast: false
runs-on: ${{ inputs.runner }}
env:
CONTAINERD_VERSION: ${{ inputs.containerd_version }}
@@ -53,25 +55,6 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install yq
run: |
./ci/install_yq.sh
env:
INSTALL_IN_GOPATH: false
- name: Read properties from versions.yaml
run: |
go_version="$(yq '.languages.golang.version' versions.yaml)"
[ -n "$go_version" ]
echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV"
- name: Setup Golang version ${{ env.GO_VERSION }}
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: ${{ env.GO_VERSION }}
# Setup-go doesn't work properly with ppc64le: https://github.com/actions/setup-go/issues/648
architecture: ${{ inputs.arch == 'ppc64le' && 'ppc64le' || '' }}
- name: Install dependencies
timeout-minutes: 15
run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies

View File

@@ -42,6 +42,17 @@ jobs:
strategy:
fail-fast: false
matrix:
host_os:
- ubuntu
vmm:
- clh
- dragonball
- qemu
- qemu-runtime-rs
- cloud-hypervisor
instance-type:
- small
- normal
include:
- host_os: cbl-mariner
vmm: clh
@@ -69,7 +80,6 @@ jobs:
KUBERNETES: "vanilla"
K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }}
GENPOLICY_PULL_METHOD: ${{ matrix.genpolicy-pull-method }}
RUNS_ON_AKS: "true"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:

View File

@@ -1,127 +0,0 @@
# Run Kubernetes integration tests on free GitHub runners with a locally
# deployed cluster (kubeadm).
name: CI | Run kubernetes tests on free runner
on:
workflow_call:
inputs:
tarball-suffix:
required: false
type: string
registry:
required: true
type: string
repo:
required: true
type: string
tag:
required: true
type: string
pr-number:
required: true
type: string
commit-hash:
required: false
type: string
target-branch:
required: false
type: string
default: ""
permissions: {}
jobs:
run-k8s-tests:
name: run-k8s-tests
strategy:
fail-fast: false
matrix:
environment: [
{ vmm: clh, containerd_version: lts },
{ vmm: clh, containerd_version: active },
{ vmm: dragonball, containerd_version: lts },
{ vmm: dragonball, containerd_version: active },
{ vmm: qemu, containerd_version: lts },
{ vmm: qemu, containerd_version: active },
{ vmm: qemu-runtime-rs, containerd_version: lts },
{ vmm: qemu-runtime-rs, containerd_version: active },
{ vmm: cloud-hypervisor, containerd_version: lts },
{ vmm: cloud-hypervisor, containerd_version: active },
]
runs-on: ubuntu-24.04
permissions:
contents: read
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
DOCKER_TAG: ${{ inputs.tag }}
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HOST_OS: ubuntu
KATA_HYPERVISOR: ${{ matrix.environment.vmm }}
KUBERNETES: vanilla
K8S_TEST_HOST_TYPE: baremetal-no-attestation
CONTAINER_ENGINE: containerd
CONTAINER_ENGINE_VERSION: ${{ matrix.environment.containerd_version }}
GH_TOKEN: ${{ github.token }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Remove unnecessary directories to free up space
run: |
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/lib/jvm
sudo rm -rf /usr/share/swift
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/julia*
sudo rm -rf /opt/az
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /opt/microsoft
sudo rm -rf /opt/google
sudo rm -rf /usr/lib/firefox
- name: Deploy k8s (kubeadm)
run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s
- name: Install `bats`
run: bash tests/integration/kubernetes/gha-run.sh install-bats
- name: Deploy Kata
timeout-minutes: 20
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
- name: Run tests
timeout-minutes: 60
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Delete kata-deploy
if: always()
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh cleanup

View File

@@ -49,8 +49,6 @@ jobs:
KATA_HYPERVISOR: ${{ matrix.environment.vmm }}
KUBERNETES: kubeadm
KBS: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }}
SNAPSHOTTER: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'nydus' || '' }}
USE_EXPERIMENTAL_SNAPSHOTTER_SETUP: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }}
K8S_TEST_HOST_TYPE: baremetal
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -100,7 +98,7 @@ jobs:
run: bash tests/integration/kubernetes/gha-run.sh install-bats
- name: Run tests ${{ matrix.environment.vmm }}
timeout-minutes: 60
timeout-minutes: 30
run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests
env:
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}

View File

@@ -57,24 +57,10 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install yq
- name: Install golang
run: |
./ci/install_yq.sh
env:
INSTALL_IN_GOPATH: false
- name: Read properties from versions.yaml
run: |
go_version="$(yq '.languages.golang.version' versions.yaml)"
[ -n "$go_version" ]
echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV"
- name: Setup Golang version ${{ env.GO_VERSION }}
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: ${{ env.GO_VERSION }}
# Setup-go doesn't work properly with ppc64le: https://github.com/actions/setup-go/issues/648
architecture: 'ppc64le'
./tests/install_go.sh -f -p
echo "/usr/local/go/bin" >> "$GITHUB_PATH"
- name: Prepare the runner for k8s test suite
run: bash "${HOME}/scripts/k8s_cluster_prepare.sh"

View File

@@ -76,7 +76,7 @@ jobs:
SNAPSHOTTER: ${{ matrix.snapshotter }}
TARGET_ARCH: "s390x"
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ vars.AUTHENTICATED_IMAGE_PASSWORD }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:

View File

@@ -69,7 +69,7 @@ jobs:
KUBERNETES: "vanilla"
PULL_TYPE: ${{ matrix.pull-type }}
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ vars.AUTHENTICATED_IMAGE_PASSWORD }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

View File

@@ -24,10 +24,6 @@ on:
required: false
type: string
default: ""
extensive-matrix-autogenerated-policy:
required: false
type: string
default: no
secrets:
AUTHENTICATED_IMAGE_PASSWORD:
required: true
@@ -67,7 +63,7 @@ jobs:
SNAPSHOTTER: "nydus"
PULL_TYPE: "guest-pull"
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ vars.AUTHENTICATED_IMAGE_PASSWORD }}
GH_ITA_KEY: ${{ secrets.ITA_KEY }}
AUTO_GENERATE_POLICY: "yes"
steps:
@@ -110,6 +106,10 @@ jobs:
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh install-kbs-client
- name: Deploy CSI driver
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh deploy-csi-driver
- name: Run tests
timeout-minutes: 100
run: bash tests/integration/kubernetes/gha-run.sh run-tests
@@ -130,42 +130,52 @@ jobs:
[[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] && echo "ITA_KEY=${GH_ITA_KEY}" >> "${GITHUB_ENV}"
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
- name: Delete CSI driver
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh delete-csi-driver
# Generate jobs for testing CoCo on non-TEE environments
run-k8s-tests-coco-nontee:
name: run-k8s-tests-coco-nontee
strategy:
fail-fast: false
matrix:
environment: [
{ vmm: qemu-coco-dev, snapshotter: nydus, pull_type: guest-pull },
{ vmm: qemu-coco-dev-runtime-rs, snapshotter: nydus, pull_type: guest-pull },
{ vmm: qemu-coco-dev, snapshotter: "", pull_type: experimental-force-guest-pull },
]
runs-on: ubuntu-24.04
vmm:
- qemu-coco-dev
- qemu-coco-dev-runtime-rs
snapshotter:
- nydus
pull-type:
- guest-pull
include:
- pull-type: experimental-force-guest-pull
vmm: qemu-coco-dev
snapshotter: ""
runs-on: ubuntu-22.04
permissions:
contents: read
id-token: write # Used for OIDC access to log into Azure
environment: ci
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
DOCKER_TAG: ${{ inputs.tag }}
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.environment.vmm }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
# Some tests rely on that variable to run (or not)
KBS: "true"
# Set the KBS ingress handler (empty string disables handling)
KBS_INGRESS: "nodeport"
KBS_INGRESS: "aks"
KUBERNETES: "vanilla"
PULL_TYPE: ${{ matrix.environment.pull_type }}
PULL_TYPE: ${{ matrix.pull-type }}
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
SNAPSHOTTER: ${{ matrix.environment.snapshotter }}
EXPERIMENTAL_FORCE_GUEST_PULL: ${{ matrix.environment.pull_type == 'experimental-force-guest-pull' && matrix.environment.vmm || '' }}
AUTO_GENERATE_POLICY: "yes"
AUTHENTICATED_IMAGE_PASSWORD: ${{ vars.AUTHENTICATED_IMAGE_PASSWORD }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
EXPERIMENTAL_FORCE_GUEST_PULL: ${{ matrix.pull-type == 'experimental-force-guest-pull' && matrix.vmm || '' }}
# Caution: current ingress controller used to expose the KBS service
# requires much vCPUs, lefting only a few for the tests. Depending on the
# host type chose it will result on the creation of a cluster with
# insufficient resources.
K8S_TEST_HOST_TYPE: "all"
CONTAINER_ENGINE: "containerd"
CONTAINER_ENGINE_VERSION: "active"
GH_TOKEN: ${{ github.token }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -188,36 +198,39 @@ jobs:
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Remove unnecessary directories to free up space
run: |
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/lib/jvm
sudo rm -rf /usr/share/swift
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/julia*
sudo rm -rf /opt/az
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /opt/microsoft
sudo rm -rf /opt/google
sudo rm -rf /usr/lib/firefox
- name: Log into the Azure account
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
with:
client-id: ${{ secrets.AZ_APPID }}
tenant-id: ${{ secrets.AZ_TENANT_ID }}
subscription-id: ${{ secrets.AZ_SUBSCRIPTION_ID }}
- name: Deploy kubernetes
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s
- name: Create AKS cluster
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2
with:
timeout_minutes: 15
max_attempts: 20
retry_on: error
retry_wait_seconds: 10
command: bash tests/integration/kubernetes/gha-run.sh create-cluster
- name: Install `bats`
run: bash tests/integration/kubernetes/gha-run.sh install-bats
- name: Install `kubectl`
uses: azure/setup-kubectl@776406bce94f63e41d621b960d78ee25c8b76ede # v4.0.1
with:
version: 'latest'
- name: Download credentials for the Kubernetes CLI to use them
run: bash tests/integration/kubernetes/gha-run.sh get-cluster-credentials
- name: Deploy Kata
timeout-minutes: 20
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-aks
env:
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: ${{ matrix.environment.snapshotter == 'nydus' }}
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: ${{ env.SNAPSHOTTER == 'nydus' }}
AUTO_GENERATE_POLICY: ${{ env.PULL_TYPE == 'experimental-force-guest-pull' && 'no' || 'yes' }}
- name: Deploy CoCo KBS
timeout-minutes: 10
@@ -227,126 +240,9 @@ jobs:
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh install-kbs-client
- name: Run tests
timeout-minutes: 80
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Delete kata-deploy
if: always()
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh cleanup
- name: Delete CoCo KBS
if: always()
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
# Extensive matrix: autogenerated policy tests (nydus + experimental-force-guest-pull) on k0s, k3s, rke2, microk8s with qemu-coco-dev / qemu-coco-dev-runtime-rs
run-k8s-tests-coco-nontee-extensive-matrix:
if: ${{ inputs.extensive-matrix-autogenerated-policy == 'yes' }}
name: run-k8s-tests-coco-nontee-extensive-matrix
strategy:
fail-fast: false
matrix:
environment: [
{ k8s: k0s, vmm: qemu-coco-dev, snapshotter: nydus, pull_type: guest-pull },
{ k8s: k0s, vmm: qemu-coco-dev, snapshotter: "", pull_type: experimental-force-guest-pull },
{ k8s: k0s, vmm: qemu-coco-dev-runtime-rs, snapshotter: nydus, pull_type: guest-pull },
{ k8s: k3s, vmm: qemu-coco-dev, snapshotter: nydus, pull_type: guest-pull },
{ k8s: k3s, vmm: qemu-coco-dev, snapshotter: "", pull_type: experimental-force-guest-pull },
{ k8s: k3s, vmm: qemu-coco-dev-runtime-rs, snapshotter: nydus, pull_type: guest-pull },
{ k8s: rke2, vmm: qemu-coco-dev, snapshotter: nydus, pull_type: guest-pull },
{ k8s: rke2, vmm: qemu-coco-dev, snapshotter: "", pull_type: experimental-force-guest-pull },
{ k8s: rke2, vmm: qemu-coco-dev-runtime-rs, snapshotter: nydus, pull_type: guest-pull },
{ k8s: microk8s, vmm: qemu-coco-dev, snapshotter: nydus, pull_type: guest-pull },
{ k8s: microk8s, vmm: qemu-coco-dev, snapshotter: "", pull_type: experimental-force-guest-pull },
{ k8s: microk8s, vmm: qemu-coco-dev-runtime-rs, snapshotter: nydus, pull_type: guest-pull },
]
runs-on: ubuntu-24.04
permissions:
contents: read
environment: ci
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
DOCKER_TAG: ${{ inputs.tag }}
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.environment.vmm }}
KBS: "true"
KBS_INGRESS: "nodeport"
KUBERNETES: ${{ matrix.environment.k8s }}
SNAPSHOTTER: ${{ matrix.environment.snapshotter }}
PULL_TYPE: ${{ matrix.environment.pull_type }}
EXPERIMENTAL_FORCE_GUEST_PULL: ${{ matrix.environment.pull_type == 'experimental-force-guest-pull' && matrix.environment.vmm || '' }}
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTO_GENERATE_POLICY: "yes"
K8S_TEST_HOST_TYPE: "all"
GH_TOKEN: ${{ github.token }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Remove unnecessary directories to free up space
run: |
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/lib/jvm
sudo rm -rf /usr/share/swift
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/julia*
sudo rm -rf /opt/az
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /opt/microsoft
sudo rm -rf /opt/google
sudo rm -rf /usr/lib/firefox
- name: Deploy ${{ matrix.environment.k8s }}
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s
- name: Install `bats`
run: bash tests/integration/kubernetes/gha-run.sh install-bats
- name: Deploy Kata
timeout-minutes: 20
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
env:
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: ${{ matrix.environment.snapshotter == 'nydus' }}
- name: Deploy CoCo KBS
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
- name: Install `kbs-client`
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh install-kbs-client
- name: Deploy CSI driver
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh deploy-csi-driver
- name: Run tests
timeout-minutes: 80
@@ -356,15 +252,18 @@ jobs:
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Delete kata-deploy
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
with:
client-id: ${{ secrets.AZ_APPID }}
tenant-id: ${{ secrets.AZ_TENANT_ID }}
subscription-id: ${{ secrets.AZ_SUBSCRIPTION_ID }}
- name: Delete AKS cluster
if: always()
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh cleanup
- name: Delete CoCo KBS
if: always()
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
run: bash tests/integration/kubernetes/gha-run.sh delete-cluster
# Generate jobs for testing CoCo on non-TEE environments with erofs-snapshotter
run-k8s-tests-coco-nontee-with-erofs-snapshotter:
@@ -392,7 +291,7 @@ jobs:
KBS_INGRESS: ""
KUBERNETES: "vanilla"
CONTAINER_ENGINE: "containerd"
CONTAINER_ENGINE_VERSION: "active"
CONTAINER_ENGINE_VERSION: "v2.2"
PULL_TYPE: ${{ matrix.pull-type }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: "true"
@@ -400,7 +299,6 @@ jobs:
# We are skipping the auto generated policy tests for now,
# but those should be enabled as soon as we work on that.
AUTO_GENERATE_POLICY: "no"
GH_TOKEN: ${{ github.token }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -444,6 +342,8 @@ jobs:
- name: Deploy kubernetes
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s
env:
GH_TOKEN: ${{ github.token }}
- name: Install `bats`
run: bash tests/integration/kubernetes/gha-run.sh install-bats
@@ -452,6 +352,10 @@ jobs:
timeout-minutes: 20
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
- name: Deploy CSI driver
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh deploy-csi-driver
- name: Run tests
timeout-minutes: 80
run: bash tests/integration/kubernetes/gha-run.sh run-tests
@@ -459,8 +363,3 @@ jobs:
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Delete kata-deploy
if: always()
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh cleanup

View File

@@ -55,6 +55,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard (optional).
# Commenting out will disable upload of results to your repo's Code Scanning dashboard
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@4bdb89f48054571735e3792627da6195c57459e2 # v3.31.10
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: results.sarif

View File

@@ -1,30 +0,0 @@
name: Spelling check
on: ["pull_request"]
permissions: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
check-spelling:
name: check-spelling
runs-on: ubuntu-24.04
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
persist-credentials: false
- name: Check Spelling
uses: streetsidesoftware/cspell-action@9cd41bb518a24fefdafd9880cbab8f0ceba04d28 # 8.3.0
with:
files: |
**/*.md
**/*.rst
**/*.txt
incremental_files_only: true
config: ".cspell.yaml"

View File

@@ -126,19 +126,14 @@ jobs:
./ci/install_yq.sh
env:
INSTALL_IN_GOPATH: false
- name: Read properties from versions.yaml
- name: Install golang
run: |
cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"
go_version="$(yq '.languages.golang.version' versions.yaml)"
[ -n "$go_version" ]
echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV"
- name: Setup Golang version ${{ env.GO_VERSION }}
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: ${{ env.GO_VERSION }}
./tests/install_go.sh -f -p
echo "/usr/local/go/bin" >> "$GITHUB_PATH"
- name: Install system dependencies
run: |
sudo apt-get update && sudo apt-get -y install moreutils
sudo apt-get update && sudo apt-get -y install moreutils hunspell hunspell-en-gb hunspell-en-us pandoc
- name: Install open-policy-agent
run: |
cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"

3
.gitignore vendored
View File

@@ -20,6 +20,3 @@ tools/packaging/static-build/agent/install_libseccomp.sh
.direnv
**/.DS_Store
site/
opt/
tools/packaging/kernel/configs/**/.config
root_hash.txt

3494
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -22,9 +22,6 @@ members = [
"src/dragonball/dbs_utils",
"src/dragonball/dbs_virtio_devices",
# genpolicy
"src/tools/genpolicy",
# runtime-rs
"src/runtime-rs",
"src/runtime-rs/crates/agent",
@@ -56,19 +53,19 @@ exclude = [
[workspace.dependencies]
# Rust-VMM crates
event-manager = "0.4.0"
kvm-bindings = "0.14.0"
kvm-ioctls = "0.24.0"
linux-loader = "0.13.0"
event-manager = "0.2.1"
kvm-bindings = "0.6.0"
kvm-ioctls = "=0.12.1"
linux-loader = "0.8.0"
seccompiler = "0.5.0"
vfio-bindings = "0.6.1"
vfio-ioctls = "0.5.0"
virtio-bindings = "0.2.0"
virtio-queue = "0.17.0"
vm-fdt = "0.3.0"
vm-memory = "=0.17.1"
vm-superio = "0.8.0"
vmm-sys-util = "0.15.0"
vfio-bindings = "0.3.0"
vfio-ioctls = "0.1.0"
virtio-bindings = "0.1.0"
virtio-queue = "0.7.0"
vm-fdt = "0.2.0"
vm-memory = "0.10.0"
vm-superio = "0.5.0"
vmm-sys-util = "0.11.0"
# Local dependencies from Dragonball Sandbox crates
dragonball = { path = "src/dragonball" }
@@ -110,9 +107,6 @@ safe-path = { path = "src/libs/safe-path" }
shim-interface = { path = "src/libs/shim-interface" }
test-utils = { path = "src/libs/test-utils" }
# Local dependencies from `src/agent`
kata-agent-policy = { path = "src/agent/policy" }
# Outside dependencies
actix-rt = "2.7.0"
anyhow = "1.0"

View File

@@ -49,11 +49,8 @@ docs-url-alive-check:
build-and-publish-kata-debug:
bash tools/packaging/kata-debug/kata-debug-build-and-upload-payload.sh ${KATA_DEBUG_REGISTRY} ${KATA_DEBUG_TAG}
docs-build:
docker build -t kata-docs:latest -f ./docs/Dockerfile ./docs
docs-serve: docs-build
docker run --rm -p 8000:8000 -v ${PWD}:/docs:ro kata-docs:latest serve --config-file /docs/mkdocs.yaml -a 0.0.0.0:8000
docs-serve:
docker run --rm -p 8000:8000 -v ./docs:/docs:ro -v ${PWD}/zensical.toml:/zensical.toml:ro zensical/zensical serve --config-file /zensical.toml -a 0.0.0.0:8000
.PHONY: \
all \
@@ -62,5 +59,4 @@ docs-serve: docs-build
default \
static-checks \
docs-url-alive-check \
docs-build \
docs-serve

View File

@@ -1 +1 @@
3.28.0
3.26.0

View File

@@ -378,7 +378,7 @@ that is used in the test" section. From there you can see exactly what you'll
have to use when deploying kata-deploy in your local cluster.
> [!NOTE]
> TODO: @wainersm TO FINISH THIS PART BASED ON HIS PR TO RUN A LOCAL CI
> TODO: WAINER TO FINISH THIS PART BASED ON HIS PR TO RUN A LOCAL CI
## Adding new runners

View File

@@ -98,7 +98,7 @@ Let's say the OCP pipeline passed running with
but failed running with
``quay.io/kata-containers/kata-deploy-ci:kata-containers-9f512c016e75599a4a921bd84ea47559fe610057-amd64``
and you'd like to know which PR caused the regression. You can either run with
all the 60 tags between or you can utilize the [`bisecter`](https://github.com/ldoktor/bisecter)
all the 60 tags between or you can utilize the [bisecter](https://github.com/ldoktor/bisecter)
to optimize the number of steps in between.
Before running the bisection you need a reproducer script. Sample one called

View File

@@ -1,18 +0,0 @@
# https://lukasgeiter.github.io/mkdocs-awesome-nav/
nav:
- Home: index.md
- Getting Started:
- prerequisites.md
- installation.md
- Configuration:
- helm-configuration.md
- runtime-configuration.md
- Platform Support:
- hypervisors.md
- Guides:
- Use Cases:
- NVIDIA GPU Passthrough: use-cases/NVIDIA-GPU-passthrough-and-Kata-QEMU.md
- NVIDIA vGPU: use-cases/NVIDIA-GPU-passthrough-and-Kata.md
- Intel Discrete GPU: use-cases/Intel-Discrete-GPU-passthrough-and-Kata.md
- Misc:
- Architecture: design/architecture/

View File

@@ -730,7 +730,7 @@ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_cons
##### Connecting to the debug console
Next, connect to the debug console. The VSOCK paths vary slightly between each
Next, connect to the debug console. The VSOCKS paths vary slightly between each
VMM solution.
In case of cloud-hypervisor, connect to the `vsock` as shown:

View File

@@ -1,11 +0,0 @@
# Copyright 2026 Kata Contributors
#
# SPDX-License-Identifier: Apache-2.0
#
FROM python:3.12-slim
WORKDIR /
COPY ./requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
ENTRYPOINT ["python3", "-m", "mkdocs"]

View File

@@ -188,14 +188,15 @@ and compare them with standard tools (e.g. `diff(1)`).
# Spelling
Since this project uses a number of terms not found in conventional
dictionaries, we have a [kata-dictionary](../tests/spellcheck/kata-dictionary.txt)
that contains some project specific terms we use.
dictionaries, we have a
[spell checking tool](https://github.com/kata-containers/kata-containers/tree/main/tests/cmd/check-spelling)
that checks both dictionary words and the additional terms we use.
You can run the `cspell` checking tool on your document before raising a PR to ensure it
Run the spell checking tool on your document before raising a PR to ensure it
is free of mistakes.
If your document introduces new terms, you need to update the custom
dictionary to incorporate the new words.
dictionary used by the spell checking tool to incorporate the new words.
# Names

View File

@@ -187,10 +187,9 @@ different compared to `runc` containers:
into the guest and exposes it directly to the container.
**Mounting guest devices**: When the source path of a hostPath volume is
under `/dev` (or `/dev` itself), and the path corresponds to a
non-regular file (i.e., a device, directory, or any other special file)
or is not accessible by the Kata shim, the Kata agent bind mounts the
source path directly from the *guest* filesystem into the container.
under `/dev`, and the path either corresponds to a host device or is not
accessible by the Kata shim, the Kata agent bind mounts the source path
directly from the *guest* filesystem into the container.
[runtime-config]: /src/runtime/README.md#configuration
[k8s-hostpath]: https://kubernetes.io/docs/concepts/storage/volumes/#hostpath
@@ -227,35 +226,6 @@ Importantly, the default behavior to pass the host devices to a
privileged container is not supported in Kata Containers and needs to be
disabled, see [Privileged Kata Containers](how-to/privileged.md).
## Guest pulled container images
When using features like **nydus guest-pull**, set user/group IDs explicitly in the pod spec.
If the ID values are omitted:
- Your workload might be executed with unexpected user/group ID values, because image layers
may be unavailable to containerd, so image config (including user/group) is not applied.
- If using policy or genpolicy, the generated policy may detect these unexpected values and
reject the creation of workload containers.
Set `securityContext` explicitly. Use **pod-level** `spec.securityContext` (for Pods) or
`spec.template.spec.securityContext` (for controllers like Deployments) and/or **container-level**
`spec.containers[].securityContext`. Include at least:
- `runAsUser` — primary user ID
- `runAsGroup` — primary group ID
- `fsGroup` — volume group ownership (often reflected as a supplemental group)
- `supplementalGroups` — list of additional group IDs (if needed)
Example:
```yaml
# Explicit user/group/supplementary groups to support nydus guest-pull
securityContext:
runAsUser: 0
runAsGroup: 0
fsGroup: 0
supplementalGroups: [1, 2, 3, 4, 6, 10, 11, 20, 26, 27]
```
# Appendices
## The constraints challenge

View File

@@ -1,69 +1,59 @@
# How to do a Kata Containers Release
This document lists the tasks required to create a Kata Release.
## Requirements
- GitHub permissions to run workflows.
## Release Model
## Versioning
Kata Containers follows a rolling release model with monthly snapshots.
New features, bug fixes, and improvements are continuously integrated into
`main`. Each month, a snapshot is tagged as a new `MINOR` release.
The Kata Containers project uses [semantic versioning](http://semver.org/) for all releases.
Semantic versions are comprised of three fields in the form:
### Versioning
```
MAJOR.MINOR.PATCH
```
Releases use the `MAJOR.MINOR.PATCH` scheme. Monthly snapshots increment
`MINOR`; `PATCH` is typically `0`. Major releases are rare (years apart) and
signal significant architectural changes that may require updates to container
managers (Containerd, CRI-O) or other infrastructure. Breaking changes in
`MINOR` releases are avoided where possible, but may occasionally occur as
features are deprecated or removed.
When `MINOR` increases, the new release adds **new features** but *without changing the existing behavior*.
### No Stable Branches
When `MAJOR` increases, the new release adds **new features, bug fixes, or
both** and which **changes the behavior from the previous release** (incompatible with previous releases).
The Kata Containers project does not maintain stable branches (see
[#9064](https://github.com/kata-containers/kata-containers/issues/9064)).
Bug fixes land on `main` and ship in the next monthly snapshot rather than
being backported. Downstream projects that need extended support or compliance
certifications should select a monthly snapshot as their stable base and manage
their own validation and patch backporting from there.
A major release will also likely require a change of the container manager version used,
-for example Containerd or CRI-O. Please refer to the release notes for further details.
**Important** : the Kata Containers project doesn't have stable branches (see
[this issue](https://github.com/kata-containers/kata-containers/issues/9064) for details).
Bug fixes are released as part of `MINOR` or `MAJOR` releases only. `PATCH` is always `0`.
## Release Process
### Lock the `main` branch and announce release process
In order to prevent any PRs getting merged during the release process, and
slowing the release process down, by impacting the payload caches, we have
recently trialed setting the `main` branch to read-only.
Once the `kata-containers/kata-containers` repository is ready for a new
release, lock the main branch until the release action has completed.
Notify the #kata-dev Slack channel about the ongoing release process.
Ideally, CI usage by others should be reduced to a minimum during the
ongoing release process.
> [!NOTE]
> Admin permission is needed to lock/unlock the `main` branch.
### Bump the `VERSION` and `Chart.yaml` file
Create a PR to set the release in the [`VERSION`](./../VERSION) file and to
update the `version` and `appVersion` fields in the
[`Chart.yaml`](./../tools/packaging/kata-deploy/helm-chart/kata-deploy/Chart.yaml)
file. Temporarily unlock the main branch to merge the PR.
When the `kata-containers/kata-containers` repository is ready for a new release,
first create a PR to set the release in the [`VERSION`](./../VERSION) file and update the
`version` and `appVersion` in the
[`Chart.yaml`](./../tools/packaging/kata-deploy/helm-chart/kata-deploy/Chart.yaml) file and
have it merged.
### Lock the `main` branch
In order to prevent any PRs getting merged during the release process, and slowing the release
process down, by impacting the payload caches, we have recently trailed setting the `main`
branch to read only whilst the release action runs.
> [!NOTE]
> Admin permission is needed to complete this task.
### Wait for the `VERSION` bump PR payload publish to complete
To reduce the chance of need to re-run the release workflow, check the [CI |
Publish Kata Containers
payload](https://github.com/kata-containers/kata-containers/actions/workflows/payload-after-push.yaml)
To reduce the chance of need to re-run the release workflow, check the
[CI | Publish Kata Containers payload](https://github.com/kata-containers/kata-containers/actions/workflows/payload-after-push.yaml)
once the `VERSION` PR bump has merged to check that the assets build correctly
and are cached, so that the release process can just download these artifacts
rather than needing to build them all, which takes time and can reveal errors in
infra.
rather than needing to build them all, which takes time and can reveal errors in infra.
### Trigger the `Release Kata Containers` GitHub Action
### Check GitHub Actions
We make use of [GitHub actions](https://github.com/features/actions) in the
[release](https://github.com/kata-containers/kata-containers/actions/workflows/release.yaml)
@@ -73,10 +63,11 @@ release artifacts.
> [!NOTE]
> Write permissions to trigger the action.
The action is manually triggered and is responsible for generating a new release
(including a new tag), pushing those to the `kata-containers/kata-containers`
repository. The new release is initially created as a draft. It is promoted to
an official release when the whole workflow has completed successfully.
The action is manually triggered and is responsible for generating a new
release (including a new tag), pushing those to the
`kata-containers/kata-containers` repository. The new release is initially
created as a draft. It is promoted to an official release when the whole
workflow has completed successfully.
Check the [actions status
page](https://github.com/kata-containers/kata-containers/actions) to verify all
@@ -84,13 +75,12 @@ steps in the actions workflow have completed successfully. On success, a static
tarball containing Kata release artifacts will be uploaded to the [Release
page](https://github.com/kata-containers/kata-containers/releases).
If the workflow fails because of some external environmental causes, e.g.
network timeout, simply re-run the failed jobs until they eventually succeed.
If the workflow fails because of some external environmental causes, e.g. network
timeout, simply re-run the failed jobs until they eventually succeed.
If for some reason you need to cancel the workflow or re-run it entirely, go
first to the [Release
page](https://github.com/kata-containers/kata-containers/releases) and delete
the draft release from the previous run.
If for some reason you need to cancel the workflow or re-run it entirely, go first
to the [Release page](https://github.com/kata-containers/kata-containers/releases) and
delete the draft release from the previous run.
### Unlock the `main` branch
@@ -100,8 +90,9 @@ an admin to do it.
### Improve the release notes
Release notes are auto-generated by the GitHub CLI tool used as part of our
release workflow. However, some manual tweaking may still be necessary in order
to highlight the most important features and bug fixes in a specific release.
release workflow. However, some manual tweaking may still be necessary in
order to highlight the most important features and bug fixes in a specific
release.
With this in mind, please, poke @channel on #kata-dev and people who worked on
the release will be able to contribute to that.

View File

Before

Width:  |  Height:  |  Size: 710 B

After

Width:  |  Height:  |  Size: 710 B

View File

@@ -231,6 +231,12 @@ Run the
[markdown checker](https://github.com/kata-containers/kata-containers/tree/main/tests/cmd/check-markdown)
on your documentation changes.
### Spell check
Run the
[spell checker](https://github.com/kata-containers/kata-containers/tree/main/tests/cmd/check-spelling)
on your documentation changes.
## Finally
You may wish to read the documentation that the

View File

@@ -43,7 +43,7 @@ To fulfill the [Kata design requirements](kata-design-requirements.md), and base
|`sandbox.AddInterface(inf)`| Add new NIC to the sandbox.|
|`sandbox.RemoveInterface(inf)`| Remove a NIC from the sandbox.|
|`sandbox.ListInterfaces()`| List all NICs and their configurations in the sandbox, return a `pbTypes.Interface` list.|
|`sandbox.UpdateRoutes(routes)`| Update the sandbox route table (e.g. for port mapping support), return a `pbTypes.Route` list.|
|`sandbox.UpdateRoutes(routes)`| Update the sandbox route table (e.g. for portmapping support), return a `pbTypes.Route` list.|
|`sandbox.ListRoutes()`| List the sandbox route table, return a `pbTypes.Route` list.|
### Sandbox Relay API

View File

@@ -8,7 +8,7 @@ The following benchmarking result shows the performance improvement compared wit
## Proposal - Bring `lazyload` ability to Kata Containers
`Nydusd` is a fuse/`virtiofs` daemon which is provided by `nydus` project and it supports `PassthroughFS` and [`rafs`](https://github.com/dragonflyoss/image-service/blob/master/docs/nydus-design.md) (Registry Acceleration File System) natively, so in Kata Containers, we can use `nydusd` in place of `virtiofsd` and mount `nydus` image to guest in the meanwhile.
`Nydusd` is a fuse/`virtiofs` daemon which is provided by `nydus` project and it supports `PassthroughFS` and [RAFS](https://github.com/dragonflyoss/image-service/blob/master/docs/nydus-design.md) (Registry Acceleration File System) natively, so in Kata Containers, we can use `nydusd` in place of `virtiofsd` and mount `nydus` image to guest in the meanwhile.
The process of creating/starting Kata Containers with `virtiofsd`,

View File

@@ -1,264 +0,0 @@
# Helm Configuration
## Parameters
The helm chart provides a comprehensive set of configuration options. You may view the parameters and their descriptions by going to the [GitHub source](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml) or by using helm:
```sh
# List available kata-deploy chart versions:
# helm search repo kata-deploy-charts/kata-deploy --versions
#
# Then replace X.Y.Z below with the desired chart version:
helm show values --version X.Y.Z oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy
```
### shims
Kata ships with a number of pre-built artifacts and runtimes. You may selectively enable or disable specific shims. For example:
```yaml title="values.yaml"
shims:
disableAll: true
qemu:
enabled: true
qemu-nvidia-gpu:
enabled: true
qemu-nvidia-gpu-snp:
enabled: false
```
Shims can also have configuration options specific to them:
```yaml
qemu-nvidia-gpu:
enabled: ~
supportedArches:
- amd64
- arm64
allowedHypervisorAnnotations: []
containerd:
snapshotter: ""
runtimeClass:
# This label is automatically added by gpu-operator. Override it
# if you want to use a different label.
# Uncomment once GPU Operator v26.3 is out
# nodeSelector:
# nvidia.com/cc.ready.state: "false"
```
It's best to reference the default `values.yaml` file above for more details.
### Custom Runtimes
Kata allows you to create custom runtime configurations. This is done by overlaying one of the pre-existing runtime configs with user-provided configs. For example, we can use the `qemu-nvidia-gpu` as a base config and overlay our own parameters to it:
```yaml
customRuntimes:
enabled: false
runtimes:
my-gpu-runtime:
baseConfig: "qemu-nvidia-gpu" # Required: existing config to use as base
dropIn: | # Optional: overrides via config.d mechanism
[hypervisor.qemu]
default_memory = 1024
default_vcpus = 4
runtimeClass: |
kind: RuntimeClass
apiVersion: node.k8s.io/v1
metadata:
name: kata-my-gpu-runtime
labels:
app.kubernetes.io/managed-by: kata-deploy
handler: kata-my-gpu-runtime
overhead:
podFixed:
memory: "640Mi"
cpu: "500m"
scheduling:
nodeSelector:
katacontainers.io/kata-runtime: "true"
# Optional: CRI-specific configuration
containerd:
snapshotter: "nydus" # Configure containerd snapshotter (nydus, erofs, etc.)
crio:
pullType: "guest-pull" # Configure CRI-O runtime_pull_image = true
```
Again, view the default [`values.yaml`](#parameters) file for more details.
## Examples
We provide a few examples that you can pass to helm via the `-f`/`--values` flag.
### [`try-kata-tee.values.yaml`](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-tee.values.yaml)
This file enables only the TEE (Trusted Execution Environment) shims for confidential computing:
```sh
helm install kata-deploy oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy \
--version VERSION \
-f try-kata-tee.values.yaml
```
Includes:
- `qemu-snp` - AMD SEV-SNP (amd64)
- `qemu-tdx` - Intel TDX (amd64)
- `qemu-se` - IBM Secure Execution for Linux (SEL) (s390x)
- `qemu-se-runtime-rs` - IBM Secure Execution for Linux (SEL) Rust runtime (s390x)
- `qemu-cca` - Arm Confidential Compute Architecture (arm64)
- `qemu-coco-dev` - Confidential Containers development (amd64, s390x)
- `qemu-coco-dev-runtime-rs` - Confidential Containers development Rust runtime (amd64, s390x)
### [`try-kata-nvidia-gpu.values.yaml`](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-nvidia-gpu.values.yaml)
This file enables only the NVIDIA GPU-enabled shims:
```sh
helm install kata-deploy oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy \
--version VERSION \
-f try-kata-nvidia-gpu.values.yaml
```
Includes:
- `qemu-nvidia-gpu` - Standard NVIDIA GPU support (amd64, arm64)
- `qemu-nvidia-gpu-snp` - NVIDIA GPU with AMD SEV-SNP (amd64)
- `qemu-nvidia-gpu-tdx` - NVIDIA GPU with Intel TDX (amd64)
### `nodeSelector`
We can deploy Kata only to specific nodes using `nodeSelector`
```sh
# First, label the nodes where you want kata-containers to be installed
$ kubectl label nodes worker-node-1 kata-containers=enabled
$ kubectl label nodes worker-node-2 kata-containers=enabled
# Then install the chart with `nodeSelector`
$ helm install kata-deploy \
--set nodeSelector.kata-containers="enabled" \
"${CHART}" --version "${VERSION}"
```
You can also use a values file:
```yaml title="values.yaml"
nodeSelector:
kata-containers: "enabled"
node-type: "worker"
```
```sh
$ helm install kata-deploy -f values.yaml "${CHART}" --version "${VERSION}"
```
### Multiple Kata installations on the Same Node
For debugging, testing and other use-case it is possible to deploy multiple
versions of Kata on the very same node. All the needed artifacts are getting the
`multiInstallSuffix` appended to distinguish each installation. **BEWARE** that one
needs at least **containerd-2.0** since this version has drop-in conf support
which is a prerequisite for the `multiInstallSuffix` to work properly.
```sh
$ helm install kata-deploy-cicd \
-n kata-deploy-cicd \
--set env.multiInstallSuffix=cicd \
--set env.debug=true \
"${CHART}" --version "${VERSION}"
```
Note: `runtimeClasses` are automatically created by Helm (via
`runtimeClasses.enabled=true`, which is the default).
Now verify the installation by examining the `runtimeClasses`:
```sh
$ kubectl get runtimeClasses
NAME HANDLER AGE
kata-clh-cicd kata-clh-cicd 77s
kata-cloud-hypervisor-cicd kata-cloud-hypervisor-cicd 77s
kata-dragonball-cicd kata-dragonball-cicd 77s
kata-fc-cicd kata-fc-cicd 77s
kata-qemu-cicd kata-qemu-cicd 77s
kata-qemu-coco-dev-cicd kata-qemu-coco-dev-cicd 77s
kata-qemu-nvidia-gpu-cicd kata-qemu-nvidia-gpu-cicd 77s
kata-qemu-nvidia-gpu-snp-cicd kata-qemu-nvidia-gpu-snp-cicd 77s
kata-qemu-nvidia-gpu-tdx-cicd kata-qemu-nvidia-gpu-tdx-cicd 76s
kata-qemu-runtime-rs-cicd kata-qemu-runtime-rs-cicd 77s
kata-qemu-se-runtime-rs-cicd kata-qemu-se-runtime-rs-cicd 77s
kata-qemu-snp-cicd kata-qemu-snp-cicd 77s
kata-qemu-tdx-cicd kata-qemu-tdx-cicd 77s
kata-stratovirt-cicd kata-stratovirt-cicd 77s
```
## RuntimeClass Node Selectors for TEE Shims
**Manual configuration:** Any `nodeSelector` you set under `shims.<shim>.runtimeClass.nodeSelector`
is **always applied** to that shim's RuntimeClass, whether or not NFD is present. Use this when
you want to pin TEE workloads to specific nodes (e.g. without NFD, or with custom labels).
**Auto-inject when NFD is present:** If you do *not* set a `runtimeClass.nodeSelector` for a
TEE shim, the chart can **automatically inject** NFD-based labels when NFD is detected in the
cluster (deployed by this chart with `node-feature-discovery.enabled=true` or found externally):
- AMD SEV-SNP shims: `amd.feature.node.kubernetes.io/snp: "true"`
- Intel TDX shims: `intel.feature.node.kubernetes.io/tdx: "true"`
- IBM Secure Execution for Linux (SEL) shims (s390x): `feature.node.kubernetes.io/cpu-security.se.enabled: "true"`
The chart uses Helm's `lookup` function to detect NFD (by looking for the
`node-feature-discovery-worker` DaemonSet). Auto-inject only runs when NFD is detected and
no manual `runtimeClass.nodeSelector` is set for that shim.
**Note**: NFD detection requires cluster access. During `helm template` (dry-run without a
cluster), external NFD is not seen, so auto-injected labels are not added. Manual
`runtimeClass.nodeSelector` values are still applied in all cases.
## Customizing Configuration with Drop-in Files
When kata-deploy installs Kata Containers, the base configuration files should not
be modified directly. Instead, use drop-in configuration files to customize
settings. This approach ensures your customizations survive kata-deploy upgrades.
### How Drop-in Files Work
The Kata runtime reads the base configuration file and then applies any `.toml`
files found in the `config.d/` directory alongside it. Files are processed in
alphabetical order, with later files overriding earlier settings.
### Creating Custom Drop-in Files
To add custom settings, create a `.toml` file in the appropriate `config.d/`
directory. Use a numeric prefix to control the order of application.
**Reserved prefixes** (used by kata-deploy):
- `10-*`: Core kata-deploy settings
- `20-*`: Debug settings
- `30-*`: Kernel parameters
**Recommended prefixes for custom settings**: `50-89`
### Drop-In Config Examples
#### Adding Custom Kernel Parameters
```bash
# SSH into the node or use kubectl exec
sudo mkdir -p /opt/kata/share/defaults/kata-containers/runtimes/qemu/config.d/
sudo cat > /opt/kata/share/defaults/kata-containers/runtimes/qemu/config.d/50-custom.toml << 'EOF'
[hypervisor.qemu]
kernel_params = "my_param=value"
EOF
```
#### Changing Default Memory Size
```bash
sudo cat > /opt/kata/share/defaults/kata-containers/runtimes/qemu/config.d/50-memory.toml << 'EOF'
[hypervisor.qemu]
default_memory = 4096
EOF
```

View File

@@ -23,7 +23,7 @@ workloads with isolated sandboxes (i.e. Kata Containers).
As a result, the CRI implementations extended their semantics for the requirements:
- At the beginning, [`Frakti`](https://github.com/kubernetes/frakti) checks the network configuration of a Pod, and
- At the beginning, [Frakti](https://github.com/kubernetes/frakti) checks the network configuration of a Pod, and
treat Pod with `host` network as trusted, while others are treated as untrusted.
- The containerd introduced an annotation for untrusted Pods since [v1.0](https://github.com/containerd/cri/blob/v1.0.0-rc.0/docs/config.md):
```yaml

View File

@@ -18,7 +18,7 @@ The host kernel must be equal to or later than upstream version [6.11](https://c
[`sev-utils`](https://github.com/amd/sev-utils/blob/coco-202501150000/docs/snp.md) is an easy way to install the required host kernel with the `setup-host` command. However, it will also build compatible guest kernel, OVMF, and QEMU components which are not necessary as these components are packaged with kata. The `sev-utils` script utility can be used with these additional components to test the memory encrypted launch and attestation of a base QEMU SNP guest.
For a simplified way to build just the upstream compatible host kernel, use the Confidential Containers fork of [`amdese-amdsev`](https://github.com/confidential-containers/amdese-amdsev/tree/amd-snp-202501150000). Individual components can be built by running the following command:
For a simplified way to build just the upstream compatible host kernel, use the Confidential Containers fork of [AMDESE AMDSEV](https://github.com/confidential-containers/amdese-amdsev/tree/amd-snp-202501150000). Individual components can be built by running the following command:
```
./build.sh kernel host --install
@@ -65,7 +65,7 @@ $ ./configure --enable-virtfs --target-list=x86_64-softmmu --enable-debug
$ make -j "$(nproc)"
$ popd
```
- Create cert-chain for SNP attestation ( using [`snphost`](https://github.com/virtee/snphost/blob/main/docs/snphost.1.adoc) )
- Create cert-chain for SNP attestation ( using [snphost](https://github.com/virtee/snphost/blob/main/docs/snphost.1.adoc) )
```bash
$ git clone https://github.com/virtee/snphost.git && cd snphost/
$ cargo build
@@ -178,3 +178,4 @@ sudo reboot
```bash
sudo rmmod kvm_amd && sudo modprobe kvm_amd sev_snp=0
```

View File

@@ -49,8 +49,6 @@ In order to allow Kubelet to use containerd (using the CRI interface), configure
EOF
```
For Kata Containers (and especially CoCo / Confidential Containers tests), use at least `--runtime-request-timeout=600s` (10m) so CRI CreateContainerRequest does not time out.
- Inform systemd about the new configuration
```bash

View File

@@ -315,7 +315,7 @@ $ kata-agent-ctl connect --server-address "unix:///var/run/kata/$PODID/root/kata
### compact_threshold
Control the mem-agent compaction function compact threshold.<br>
compact_threshold is the pages number.<br>
When examining the `/proc/pagetypeinfo`, if there's an increase in the number of movable pages of orders smaller than the compact_order compared to the amount following the previous compaction period, and this increase surpasses a certain threshold specifically, more than compact_threshold number of pages, or the number of free pages has decreased by compact_threshold since the previous compaction. Current compact run period will not do compaction because there is no enough fragmented pages to be compaction.<br>
When examining the /proc/pagetypeinfo, if there's an increase in the number of movable pages of orders smaller than the compact_order compared to the amount following the previous compaction period, and this increase surpasses a certain threshold specifically, more than compact_threshold number of pages, or the number of free pages has decreased by compact_threshold since the previous compaction. Current compact run period will not do compaction because there is no enough fragmented pages to be compaction.<br>
This design aims to minimize the impact of unnecessary compaction calls on system performance.<br>
Default to 1024.

View File

@@ -99,9 +99,6 @@ The [`genpolicy`](../../src/tools/genpolicy/) application can be used to generat
**Warning** Users should review carefully the automatically-generated Policy, and modify the Policy file if needed to match better their use case, before using this Policy.
**Important — User / Group / Supplemental groups for Policy and genpolicy**
When using features like **nydus guest-pull**, set user/group IDs explicitly in the pod spec, as described in [Limitations](../Limitations.md#guest-pulled-container-images).
See the [`genpolicy` documentation](../../src/tools/genpolicy/README.md) and the [Policy contents examples](#policy-contents) for additional information.
## Policy contents

View File

@@ -8,7 +8,7 @@
> **Note:** `cri-tools` is only used for debugging and validation purpose, and don't use it to run production workloads.
> **Note:** For how to install and configure `cri-tools` with CRI runtimes like `containerd` or CRI-O, please also refer to other [how-tos](./README.md).
> **Note:** For how to install and configure `cri-tools` with CRI runtimes like `containerd` or CRI-O, please also refer to other [howtos](./README.md).
## Use `crictl` run Pods in Kata containers

View File

@@ -16,38 +16,83 @@ which hypervisors you may wish to investigate further.
## Types
| Hypervisor | Written in | Architectures | GPU Support | Intel TDX | AMD SEV-SNP |
|-|-|-|-|-|-|
|[Cloud Hypervisor](#cloud-hypervisor) | rust | `aarch64`, `x86_64` | :x: | :x: | :x: |
|[Firecracker](#firecracker) | rust | `aarch64`, `x86_64` | :x: | :x: | :x: |
|[QEMU](#qemu) | C | all | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|[Dragonball](#dragonball) | rust | `aarch64`, `x86_64` | :x: | :x: | :x: |
|StratoVirt | rust | `aarch64`, `x86_64` | :x: | :x: | :x: |
| Hypervisor | Written in | Architectures | Type |
|-|-|-|-|
|[Cloud Hypervisor] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) |
|[Firecracker] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) |
|[QEMU] | C | all | Type 2 ([KVM]) | `configuration-qemu.toml` |
|[`Dragonball`] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) |
|[StratoVirt] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) |
Each Kata runtime is configured for a specific hypervisor through the runtime's configuration file. For example:
## Determine currently configured hypervisor
```toml title="/opt/kata/share/defaults/kata-containers/configuration.toml"
[hypervisor.qemu]
path = "/opt/kata/bin/qemu-system-x86_64"
```bash
$ kata-runtime kata-env | awk -v RS= '/\[Hypervisor\]/' | grep Path
```
```toml title="/opt/kata/share/defaults/kata-containers/configuration-clh.toml"
[hypervisor.clh]
path = "/opt/kata/bin/cloud-hypervisor"
```
## Choose a Hypervisor
## Cloud Hypervisor
The table below provides a brief summary of some of the differences between
the hypervisors:
[Cloud Hypervisor](https://www.cloudhypervisor.org/) is a more modern hypervisor written in Rust.
| Hypervisor | Summary | Features | Limitations | Container Creation speed | Memory density | Use cases | Comment |
|-|-|-|-|-|-|-|-|
|[Cloud Hypervisor] | Low latency, small memory footprint, small attack surface | Minimal | | excellent | excellent | High performance modern cloud workloads | |
|[Firecracker] | Very slimline | Extremely minimal | Doesn't support all device types | excellent | excellent | Serverless / FaaS | |
|[QEMU] | Lots of features | Lots | | good | good | Good option for most users | |
|[`Dragonball`] | Built-in VMM, low CPU and memory overhead| Minimal | | excellent | excellent | Optimized for most container workloads | `out-of-the-box` Kata Containers experience |
|[StratoVirt] | Unified architecture supporting three scenarios: VM, container, and serverless | Extremely minimal(`MicroVM`) to Lots(`StandardVM`) | | excellent | excellent | Common container workloads | `StandardVM` type of StratoVirt for Kata is under development |
## Firecracker
For further details, see the [Virtualization in Kata Containers](design/virtualization.md) document and the official documentation for each hypervisor.
[Firecracker](https://firecracker-microvm.github.io/) is a minimal and lightweight hypervisor created for the AWS Lambda product.
## Hypervisor configuration files
## QEMU
Since each hypervisor offers different features and options, Kata Containers
provides a separate
[configuration file](../src/runtime/README.md#configuration)
for each. The configuration files contain comments explaining which options
are available, their default values and how each setting can be used.
QEMU is the best supported hypervisor for NVIDIA-based GPUs and for confidential computing use-cases (such as Intel TDX and AMD SEV-SNP). Runtimes that use this are normally named `kata-qemu-nvidia-gpu-*`. The Kata project focuses primarily on QEMU runtimes for GPU support.
| Hypervisor | Golang runtime config file | golang runtime short name | golang runtime default | rust runtime config file | rust runtime short name | rust runtime default |
|-|-|-|-|-|-|-|
| [Cloud Hypervisor] | [`configuration-clh.toml`](../src/runtime/config/configuration-clh.toml.in) | `clh` | | [`configuration-cloud-hypervisor.toml`](../src/runtime-rs/config/configuration-cloud-hypervisor.toml.in) | `cloud-hypervisor` | |
| [Firecracker] | [`configuration-fc.toml`](../src/runtime/config/configuration-fc.toml.in) | `fc` | | | | |
| [QEMU] | [`configuration-qemu.toml`](../src/runtime/config/configuration-qemu.toml.in) | `qemu` | yes | [`configuration-qemu.toml`](../src/runtime-rs/config/configuration-qemu-runtime-rs.toml.in) | `qemu` | |
| [`Dragonball`] | | | | [`configuration-dragonball.toml`](../src/runtime-rs/config/configuration-dragonball.toml.in) | `dragonball` | yes |
| [StratoVirt] | [`configuration-stratovirt.toml`](../src/runtime/config/configuration-stratovirt.toml.in) | `stratovirt` | | | | |
## Dragonball
> **Notes:**
>
> - The short names specified are used by the [`kata-manager`](../utils/README.md) tool.
> - As shown by the default columns, each runtime type has its own default hypervisor.
> - The [golang runtime](../src/runtime) is the current default runtime.
> - The [rust runtime](../src/runtime-rs), also known as `runtime-rs`,
> is the newer runtime written in the rust language.
> - See the [Configuration](../README.md#configuration) for further details.
> - The configuration file links in the table link to the "source"
> versions: these are not usable configuration files as they contain
> variables that need to be expanded:
> - The links are provided for reference only.
> - The final (installed) versions, where all variables have been
> expanded, are built from these source configuration files.
> - The pristine configuration files are usually installed in the
> `/opt/kata/share/defaults/kata-containers/` or
> `/usr/share/defaults/kata-containers/` directories.
> - Some hypervisors may have the same name for both golang and rust
> runtimes, but the file contents may differ.
> - If there is no configuration file listed for the golang or
> rust runtimes, this either means the hypervisor cannot be run with
> a particular runtime, or that a driver has not yet been made
> available for that runtime.
Dragonball is a special hypervisor created by the Ant Group that runs in the same process as the Rust-based containerd shim.
## Switch configured hypervisor
To switch the configured hypervisor, you only need to run a single command.
See [the `kata-manager` documentation](../utils/README.md#choose-a-hypervisor) for further details.
[Cloud Hypervisor]: https://github.com/cloud-hypervisor/cloud-hypervisor
[Firecracker]: https://github.com/firecracker-microvm/firecracker
[KVM]: https://en.wikipedia.org/wiki/Kernel-based_Virtual_Machine
[QEMU]: http://www.qemu.org
[`Dragonball`]: https://github.com/kata-containers/kata-containers/blob/main/src/dragonball
[StratoVirt]: https://gitee.com/openeuler/stratovirt

View File

@@ -1,94 +0,0 @@
# Kata Containers
Kata Containers is an open source community working to build a secure container runtime with lightweight virtual machines (VM's) that feel and perform like standard Linux containers, but provide stronger workload isolation using hardware virtualization technology as a second layer of defense.
## How it Works
Kata implements the [Open Containers Runtime Specification](https://github.com/opencontainers/runtime-spec). More specifically, it implements a containerd shim that implements the expected interface for managing container lifecycles. The default containerd runtime of `runc` spawns a container like this:
```mermaid
flowchart TD
subgraph Host
containerd
runc
process[Container Process]
containerd --> runc --> process
end
```
When containerd receives a request to spawn a container, it will pull the container image down and then call out to the runc shim (usually located at `/usr/local/bin/containerd-shim-runc-v2`). runc will then create various process isolation resources like Linux namespaces (networking, PIDs, mounts etc), seccomp filters, Linux capability reductions, and then spawn the process inside of those resources. This process runs in the host kernel.
Kata spawns containers like this:
```mermaid
flowchart TD
subgraph Host
containerdOuter[containerd]
kata
containerdOuter --> kata
kata --> kataAgent
subgraph VM
kataAgent[Kata Agent]
process[Container Process]
kataAgent --> process
end
end
```
The container process spawned inside of the VM allows us to isolate the guest kernel from the host system. This is the fundamental principle of how Kata achieves its isolation boundaries.
## Example
When Kata is installed in a system, a number of artifacts are laid down. containerd's config will be modified as such:
```toml title="/etc/containerd/config.toml"
imports = ["/opt/kata/containerd/config.d/kata-deploy.toml"]
```
This file will contain configuration for various flavors of Kata runtimes. We can see the vanilla CPU runtime config here:
```toml title="/opt/kata/containerd/config.d/kata-deploy.toml"
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.kata-qemu]
runtime_type = "io.containerd.kata-qemu.v2"
runtime_path = "/opt/kata/bin/containerd-shim-kata-v2"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.kata-qemu.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
```
Because containerd's CRI is aware of the Kata runtimes, we can spawn Kubernetes pods:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: test
spec:
runtimeClassName: kata-qemu
containers:
- name: test
image: "quay.io/libpod/ubuntu:latest"
command: ["/bin/bash", "-c"]
args: ["echo hello"]
```
We can also spawn a Kata container by submitting a request to containerd like so:
<div class="annotate" markdown>
```sh
$ ctr image pull quay.io/libpod/ubuntu:latest
$ ctr run --runtime "io.containerd.kata.v2" --runtime-config-path /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --rm -t "quay.io/libpod/ubuntu:latest" foo sh
# echo hello
hello
```
</div>
!!! tip
`ctr` is not aware of the CRI config in `/etc/containerd/config.toml`. This is why you must specify the `--runtime-config-path`. Additionally, the `--runtime` value is converted into a specific binary name which containerd then searches for in its `PATH`. See the [containerd docs](https://github.com/containerd/containerd/blob/release/2.2/core/runtime/v2/README.md#usage) for more details.

View File

@@ -1,64 +0,0 @@
# Installation
## Helm Chart
[helm](https://helm.sh/docs/intro/install/) can be used to install templated kubernetes manifests.
### Prerequisites
- **Kubernetes ≥ v1.22** v1.22 is the first release where the CRI v1 API
became the default and `RuntimeClass` left alpha. The chart depends on those
stable interfaces; earlier clusters need `featuregates` or CRI shims that are
out of scope.
- **Kata Release 3.12** - v3.12.0 introduced publishing the helm-chart on the
release page for easier consumption, since v3.8.0 we shipped the helm-chart
via source code in the kata-containers `GitHub` repository.
- CRIcompatible runtime (containerd or CRIO). If one wants to use the
`multiInstallSuffix` feature one needs at least **containerd-2.0** which
supports drop-in config files
- Nodes must allow loading kernel modules and installing Kata artifacts (the
chart runs privileged containers to do so)
### `helm install`
```sh
# Install directly from the official ghcr.io OCI registry
# update the VERSION X.YY.Z to your needs or just use the latest
export VERSION=$(curl -sSL https://api.github.com/repos/kata-containers/kata-containers/releases/latest | jq .tag_name | tr -d '"')
export CHART="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"
$ helm install kata-deploy "${CHART}" --version "${VERSION}"
# See everything you can configure
$ helm show values "${CHART}" --version "${VERSION}"
```
This installs the `kata-deploy` DaemonSet and the default Kata `RuntimeClass`
resources on your cluster.
To see what versions of the chart are available:
```sh
$ helm show chart oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy
```
### `helm uninstall`
```sh
$ helm uninstall kata-deploy -n kube-system
```
During uninstall, Helm will report that some resources were kept due to the
resource policy (`ServiceAccount`, `ClusterRole`, `ClusterRoleBinding`). This
is **normal**. A post-delete hook Job runs after uninstall and removes those
resources so no cluster-wide `RBAC` is left behind.
## Pre-Built Release
Kata can also be installed using the pre-built releases: https://github.com/kata-containers/kata-containers/releases
This method does not have any facilities for artifact lifecycle management.

View File

@@ -1,116 +0,0 @@
# Prerequisites
## Kubernetes
If using Kubernetes, at least version `v1.22` is recommended. This is the first release that the CRI v1 API and the `RuntimeClass` left alpha.
## containerd
Kata requires a [CRI](https://kubernetes.io/docs/concepts/containers/cri/)-compatible container runtime. containerd is commonly used for Kata. We recommend installing containerd using your platform's package distribution mechanism. We recommend at least the latest version of containerd v2.1.x.[^1]
### Debian/Ubuntu
To install on Debian-based systems:
```sh
$ apt update
$ apt install containerd
$ systemctl status containerd
● containerd.service - containerd container runtime
Loaded: loaded (/etc/systemd/system/containerd.service; enabled; preset: enabled)
Drop-In: /etc/systemd/system/containerd.service.d
└─http-proxy.conf
Active: active (running) since Wed 2026-02-25 22:58:13 UTC; 5 days ago
Docs: https://containerd.io
Main PID: 3767885 (containerd)
Tasks: 540
Memory: 70.7G (peak: 70.8G)
CPU: 4h 9min 26.153s
CGroup: /runtime.slice/containerd.service
├─ 12694 /usr/local/bin/container
```
### Fedora/RedHat
To install on Fedora-based systems:
```
$ yum install containerd
```
??? help
Documentation assistance is requested for more specific instructions on Fedora systems.
### Pre-Built Releases
Many Linux distributions will not package the latest versions of containerd. If you find that your distribution provides very old versions of containerd, it's recommended to upgrade with the [pre-built releases](https://github.com/containerd/containerd/releases).
#### Executable
Download the latest release of containerd:
```sh
$ wget https://github.com/containerd/containerd/releases/download/v${VERSION}/containerd-${VERSION}-linux-${PLATFORM}.tar.gz
# Extract to the current directory
$ tar -xf ./containerd*.tar.gz
# Extract to root if you want it installed to its final location.
$ tar -C / -xf ./*.tar.gz
```
### Containerd Config
Containerd requires a config file at `/etc/containerd/config.toml`. This needs to be populated with a simple default config:
```sh
$ /usr/local/bin/containerd config default > /etc/containerd/config.toml
```
### Systemd Unit File
Install the systemd unit file:
```sh
$ wget -O /etc/systemd/system/containerd.service https://raw.githubusercontent.com/containerd/containerd/main/containerd.service
```
!!! info
- You must modify the `ExecStart` line to the location of the installed containerd executable.
- containerd's `PATH` variable must allow it to find `containerd-shim-kata-v2`. You can do this by either creating a symlink from `/usr/local/bin/containerd-shim-kata-v2` to `/opt/kata/bin/containerd-shim-kata-v2` or by modifying containerd's `PATH` variable to search in `/opt/kata/bin/`. See the Environment= command in systemd.exec(5) for further details.
Reload systemd and start containerd:
```sh
$ systemctl daemon-reload
$ systemctl enable --now containerd
$ systemctl start containerd
$ systemctl status containerd
```
More details can be found on the [containerd installation docs](https://github.com/containerd/containerd/blob/main/docs/getting-started.md).
### Enable CRI
If you're using Kubernetes, you must enable the containerd Container Runtime Interface (CRI) plugin:
```sh
$ ctr plugins ls | grep cri
io.containerd.cri.v1 images - ok
io.containerd.cri.v1 runtime linux/amd64 ok
io.containerd.grpc.v1 cri - ok
```
If these are not enabled, you'll need to remove it from the `disabled_plugins` section of the containerd config.
[^1]: Kata makes use of containerd's drop-in config merging in `/etc/containerd/config.d/` which is only available starting from containerd v2. containerd v1 may work, but some Kata features will not work as expected.
## runc
The default `runc` runtime needs to be installed for non-kata containers. More details can be found at the [containerd docs](https://github.com/containerd/containerd/blob/979c80d8a5d7fc7be34102a1ada53ae5a0ff09e8/docs/RUNC.md).

View File

@@ -1,9 +0,0 @@
mkdocs-materialx==10.0.9
mkdocs-glightbox==0.4.0
mkdocs-macros-plugin==1.5.0
mkdocs-awesome-nav==3.3.0
mkdocs-open-in-new-tab==1.0.8
mkdocs-redirects==1.2.2
CairoSVG==2.9.0
pillow==12.1.1
click==8.2.1

View File

@@ -1,56 +0,0 @@
# Runtime Configuration
The containerd shims (both the Rust and Go implementations) take configuration files to control their behavior. These files are in `/opt/kata/share/defaults/kata-containers/`. An example excerpt:
```toml title="/opt/kata/share/defaults/kata-containers/configuration.toml"
[hypervisor.qemu]
path = "/opt/kata/bin/qemu-system-x86_64"
kernel = "/opt/kata/share/kata-containers/vmlinux.container"
image = "/opt/kata/share/kata-containers/kata-containers.img"
machine_type = "q35"
# rootfs filesystem type:
# - ext4 (default)
# - xfs
# - erofs
rootfs_type = "ext4"
# Enable running QEMU VMM as a non-root user.
# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
# a non-root random user. See documentation for the limitations of this mode.
rootless = false
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
enable_annotations = ["enable_iommu", "virtio_fs_extra_args", "kernel_params"]
```
These files should never be modified directly. If you wish to create a modified version of these files, you may create your own [custom runtime](helm-configuration.md#custom-runtimes). For example, to modify the image path, we provide these values to helm:
```yaml title="values.yaml"
customRuntimes:
enabled: true
runtimes:
my-gpu-runtime:
baseConfig: "qemu-nvidia-gpu"
dropIn: |
[hypervisor.qemu]
image = "/path/to/custom-image.img"
runtimeClass: |
kind: RuntimeClass
apiVersion: node.k8s.io/v1
metadata:
name: kata-my-gpu-runtime
labels:
app.kubernetes.io/managed-by: kata-deploy
handler: kata-my-gpu-runtime
overhead:
podFixed:
memory: "640Mi"
cpu: "500m"
scheduling:
nodeSelector:
katacontainers.io/kata-runtime: "true"
```

View File

@@ -175,7 +175,7 @@ specific).
##### Dragonball networking
For Dragonball, the `virtio-net` backend default is within Dragonball's VMM.
For Dragonball, the `virtio-net` backend default is within Dragonbasll's VMM.
#### virtio-vsock

View File

@@ -1,12 +1,11 @@
# Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers
This page provides:
1. A description of the components involved when running GPU workloads with
Kata Containers using the NVIDIA TEE and non-TEE GPU runtime classes.
1. An explanation of the orchestration flow on a Kubernetes node for this
scenario.
1. A deployment guide to utilize these runtime classes.
1. A deployment guide enabling to utilize these runtime classes.
The goal is to educate readers familiar with Kubernetes and Kata Containers
on NVIDIA's reference implementation which is reflected in Kata CI's build
@@ -19,56 +18,58 @@ Confidential Containers.
> **Note:**
>
> The currently supported modes for enabling GPU workloads in the TEE
> scenario are: (1) singleGPU passthrough (one physical GPU per pod) and
> (2) multi-GPU passthrough on NVSwitch (NVLink) based HGX systems
> (for example, HGX Hopper (SXM) and HGX Blackwell / HGX B200).
> The current supported mode for enabling GPU workloads in the TEE scenario
> is single GPU passthrough (one GPU per pod) on AMD64 platforms (AMD SEV-SNP
> being the only supported TEE scenario so far with support for Intel TDX being
> on the way).
## Component Overview
Before providing deployment guidance, we describe the components involved to
support running GPU workloads. We start from a top to bottom perspective
from the NVIDIA GPU Operator via the Kata runtime to the components within
from the NVIDIA GPU operator via the Kata runtime to the components within
the NVIDIA GPU Utility Virtual Machine (UVM) root filesystem.
### NVIDIA GPU Operator
A central component is the
[NVIDIA GPU Operator](https://github.com/NVIDIA/gpu-operator) which can be
deployed onto your cluster as a helm chart. Installing the GPU Operator
[NVIDIA GPU operator](https://github.com/NVIDIA/gpu-operator) which can be
deployed onto your cluster as a helm chart. Installing the GPU operator
delivers various operands on your nodes in the form of Kubernetes DaemonSets.
These operands are vital to support the flow of orchestrating pod manifests
using NVIDIA GPU runtime classes with GPU passthrough on your nodes. Without
getting into the details, the most important operands and their
responsibilities are:
- **nvidia-vfio-manager:** Binding discovered NVIDIA GPUs and nvswitches to
the `vfio-pci` driver for VFIO passthrough.
- **nvidia-vfio-manager:** Binding discovered NVIDIA GPUs to the `vfio-pci`
driver for VFIO passthrough.
- **nvidia-cc-manager:** Transitioning GPUs into confidential computing (CC)
and non-CC mode (see the
[NVIDIA/k8s-cc-manager](https://github.com/NVIDIA/k8s-cc-manager)
repository).
- **nvidia-kata-manager:** Creating host-side CDI specifications for GPU
passthrough, resulting in the file `/var/run/cdi/nvidia.yaml`, containing
`kind: nvidia.com/pgpu` (see the
[NVIDIA/k8s-kata-manager](https://github.com/NVIDIA/k8s-kata-manager)
repository).
- **nvidia-sandbox-device-plugin** (see the
[NVIDIA/sandbox-device-plugin](https://github.com/NVIDIA/sandbox-device-plugin)
repository):
- Creating host-side CDI specifications for GPU passthrough,
resulting in the file `/var/run/cdi/nvidia.yaml`, containing
`kind: nvidia.com/pgpu`
- Allocating GPUs during pod deployment.
- Discovering NVIDIA GPUs, their capabilities, and advertising these to
the Kubernetes control plane (allocatable resources as type
`nvidia.com/pgpu` resources will appear for the node and GPU Device IDs
will be registered with Kubelet). These GPUs can thus be allocated as
container resources in your pod manifests. See below GPU Operator
container resources in your pod manifests. See below GPU operator
deployment instructions for the use of the key `pgpu`, controlled via a
variable.
To summarize, the GPU Operator manages the GPUs on each node, allowing for
To summarize, the GPU operator manages the GPUs on each node, allowing for
simple orchestration of pod manifests using Kata Containers. Once the cluster
with GPU Operator and Kata bits is up and running, the end user can schedule
with GPU operator and Kata bits is up and running, the end user can schedule
Kata NVIDIA GPU workloads, using resource limits and the
`kata-qemu-nvidia-gpu`, `kata-qemu-nvidia-gpu-tdx` or
`kata-qemu-nvidia-gpu-snp` runtime classes, for example:
`kata-qemu-nvidia-gpu` or `kata-qemu-nvidia-gpu-snp` runtime classes, for
example:
```yaml
apiVersion: v1
@@ -212,7 +213,7 @@ API and kernel drivers, interacting with the pass-through GPU device.
An additional step is exercised in our CI samples: when using images from an
authenticated registry, the guest-pull mechanism triggers attestation using
Trustee's Key Broker Service (KBS) for secure release of the NGC API
trustee's Key Broker Service (KBS) for secure release of the NGC API
authentication key used to access the NVCR container registry. As part of
this, the attestation agent exercises composite attestation and transitions
the GPU into `Ready` state (without this, the GPU has to explicitly be
@@ -231,40 +232,24 @@ NVIDIA GPU CI validation jobs. Note that, this setup:
- uses the genpolicy tool to attach Kata agent security policies to the pod
manifest
- has dedicated (composite) attestation tests, a CUDA vectorAdd test, and a
NIM/RA test sample with secure API key release using sealed secrets.
NIM/RA test sample with secure API key release
A similar deployment guide and scenario description can be found in NVIDIA resources
under
[NVIDIA Confidential Containers Overview (Early Access)](https://docs.nvidia.com/datacenter/cloud-native/confidential-containers/latest/overview.html).
### Feature Set
The NVIDIA stack for Kata Containers leverages features for the confidential
computing scenario from both the confidential containers open source project
and from the Kata Containers source tree, such as:
- composite attestation using Trustee and the NVIDIA Remote Attestation
Service NRAS
- generating kata agent security policies using the genpolicy tool
- use of signed sealed secrets
- access to authenticated registries for container image guest-pull
- container image signature verification and encrypted container images
- ephemeral container data and image layer storage
[Early Access: NVIDIA GPU Operator with Confidential Containers based on Kata](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/confidential-containers.html).
### Requirements
The requirements for the TEE scenario are:
- Ubuntu 25.10 as host OS
- CPU with AMD SEV-SNP or Intel TDX support with proper BIOS/UEFI version
and settings
- CPU with AMD SEV-SNP support with proper BIOS/UEFI version and settings
- CC-capable Hopper/Blackwell GPU with proper VBIOS version.
BIOS and VBIOS configuration is out of scope for this guide. Other resources,
such as the documentation found on the
[NVIDIA Trusted Computing Solutions](https://docs.nvidia.com/nvtrust/index.html)
page, on the
[Secure AI Compatibility Matrix](https://www.nvidia.com/en-us/data-center/solutions/confidential-computing/secure-ai-compatibility-matrix/)
page, and on the above linked NVIDIA documentation, provide guidance on
page and the above linked NVIDIA documentation, provide guidance on
selecting proper hardware and on properly configuring its firmware and OS.
### Installation
@@ -272,16 +257,12 @@ selecting proper hardware and on properly configuring its firmware and OS.
#### Containerd and Kubernetes
First, set up your Kubernetes cluster. For instance, in Kata CI, our NVIDIA
jobs use a single-node vanilla Kubernetes cluster with a 2.1 containerd
version and Kata's current supported Kubernetes version. This cluster is
being set up using the `deploy_k8s` function from the script file
`tests/integration/kubernetes/gha-run.sh`. If you intend to run this script,
follow these steps, and make sure you have `yq` and `helm` installed. Note
that, these scripts query the GitHub API, so creating and declaring a
personal access token prevents rate limiting issues.
You can execute the function as follows:
jobs use a single-node vanilla Kubernetes cluster with a 2.x containerd
version and Kata's current supported Kubernetes version. We set this cluster
up using the `deploy_k8s` function from `tests/integration/kubernetes/gha-run.sh`
as follows:
```bash
$ export GH_TOKEN="<your-gh-pat>"
$ export KUBERNETES="vanilla"
$ export CONTAINER_ENGINE="containerd"
$ export CONTAINER_ENGINE_VERSION="v2.1"
@@ -295,11 +276,8 @@ $ deploy_k8s
> `runtimeRequestTimeout` timeout value than the two minute default timeout.
> Using the guest-pull mechanism, pulling large images may take a significant
> amount of time and may delay container start, possibly leading your Kubelet
> to de-allocate your pod before it transitions from the *container creating*
> to the *container running* state. The NVIDIA shim configurations use a
> `create_container_timeout` of 1200s, which is the equivalent value on shim
> side, controlling the time the shim allows for a container to remain in
> *container creating* state.
> to de-allocate your pod before it transitions from the *container created*
> to the *container running* state.
> **Note:**
>
@@ -313,7 +291,7 @@ $ deploy_k8s
#### GPU Operator
Assuming you have the helm tools installed, deploy the latest version of the
GPU Operator as a helm chart (minimum version: `v26.3.0`):
GPU Operator as a helm chart (minimum version: `v25.10.0`):
```bash
$ helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && helm repo update
@@ -322,27 +300,33 @@ $ helm install --wait --generate-name \
nvidia/gpu-operator \
--set sandboxWorkloads.enabled=true \
--set sandboxWorkloads.defaultWorkload=vm-passthrough \
--set sandboxWorkloads.mode=kata \
--set kataManager.enabled=true \
--set kataManager.config.runtimeClasses=null \
--set kataManager.repository=nvcr.io/nvidia/cloud-native \
--set kataManager.image=k8s-kata-manager \
--set kataManager.version=v0.2.4 \
--set ccManager.enabled=true \
--set ccManager.defaultMode=on \
--set ccManager.repository=nvcr.io/nvidia/cloud-native \
--set ccManager.image=k8s-cc-manager \
--set ccManager.version=v0.2.0 \
--set sandboxDevicePlugin.repository=nvcr.io/nvidia/cloud-native \
--set sandboxDevicePlugin.image=nvidia-sandbox-device-plugin \
--set sandboxDevicePlugin.version=v0.0.1 \
--set 'sandboxDevicePlugin.env[0].name=P_GPU_ALIAS' \
--set 'sandboxDevicePlugin.env[0].value=pgpu' \
--set nfd.enabled=true \
--set nfd.nodefeaturerules=true
```
> **Note:**
>
> For heterogeneous clusters with different GPU types, you can specify an
> empty `P_GPU_ALIAS` environment variable for the sandbox device plugin:
> `- --set 'sandboxDevicePlugin.env[0].name=P_GPU_ALIAS' \`
> `- --set 'sandboxDevicePlugin.env[0].value=""' \`
> This will cause the sandbox device plugin to create GPU model-specific
> resource types (e.g., `nvidia.com/GH100_H100L_94GB`) instead of the
> default `pgpu` type, which usually results in advertising a resource of
> type `nvidia.com/pgpu`
> The exposed device resource types can be used for pods by specifying
> respective resource limits.
> Your node's nvswitches are exposed as resources of type
> `nvidia.com/nvswitch` by default. Using the variable `NVSWITCH_ALIAS`
> allows to control the advertising behavior similar to the `P_GPU_ALIAS`
> variable.
> For heterogeneous clusters with different GPU types, you can omit
> the `P_GPU_ALIAS` environment variable lines. This will cause the sandbox
> device plugin to create GPU model-specific resource types (e.g.,
> `nvidia.com/GH100_H100L_94GB`) instead of the generic `nvidia.com/pgpu`,
> which in turn can be used by pods through respective resource limits.
> For simplicity, this guide uses the generic alias.
> **Note:**
>
@@ -367,7 +351,8 @@ $ helm install kata-deploy \
--create-namespace \
-f "https://raw.githubusercontent.com/kata-containers/kata-containers/refs/tags/${VERSION}/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-nvidia-gpu.values.yaml" \
--set nfd.enabled=false \
--wait --timeout 10m \
--set shims.qemu-nvidia-gpu-tdx.enabled=false \
--wait --timeout 10m --atomic \
"${CHART}" --version "${VERSION}"
```
@@ -397,22 +382,31 @@ mode which requires entering a licensing agreement with NVIDIA, see the
### Cluster validation and preparation
If you did not use the `sandboxWorkloads.defaultWorkload=vm-passthrough`
parameter during GPU Operator deployment, label your nodes for GPU VM
parameter during GPU operator deployment, label your nodes for GPU VM
passthrough, for the example of using all nodes for GPU passthrough, run:
```bash
$ kubectl label nodes --all nvidia.com/gpu.workload.config=vm-passthrough --overwrite
```
With the suggested parameters for GPU Operator deployment, the
`nvidia-cc-manager` operand will automatically transition the GPU into CC
mode.
Check if the `nvidia-cc-manager` pod is running if you intend to run GPU TEE
scenarios. If not, you need to manually label the node as CC capable. Current
GPU Operator node feature rules do not yet recognize all CC capable GPU PCI
IDs. Run the following command:
```bash
$ kubectl label nodes --all nvidia.com/cc.capable=true
```
After this, assure the `nvidia-cc-manager` pod is running. With the suggested
parameters for GPU Operator deployment, the `nvidia-cc-manager` will
automatically transition the GPU into CC mode.
After deployment, you can transition your node(s) to the desired CC state,
using either the `on`, `ppcie`, or `off` value, depending on your scenario.
For the non-CC scenario, transition to the `off` state via:
using either the `on` or `off` value, depending on your scenario. For the
non-CC scenario, transition to the `off` state via:
`kubectl label nodes --all nvidia.com/cc.mode=off` and wait until all pods
are back running. When an actual change is exercised, various GPU Operator
are back running. When an actual change is exercised, various GPU operator
operands will be restarted.
Ensure all pods are running:
@@ -431,10 +425,9 @@ $ lspci -nnk -d 10de:
### Run the CUDA vectorAdd sample
Create the pod manifest with:
Create the following file:
```bash
$ cat > cuda-vectoradd-kata.yaml.in << 'EOF'
```yaml
apiVersion: v1
kind: Pod
metadata:
@@ -452,7 +445,6 @@ spec:
limits:
nvidia.com/pgpu: "1"
memory: 16Gi
EOF
```
Depending on your scenario and on the CC state, export your desired runtime
@@ -485,17 +477,6 @@ To stop the pod, run: `kubectl delete pod cuda-vectoradd-kata`.
### Next steps
#### Use multi-GPU passthrough
If you have machines supporting multi-GPU passthrough, use a pod deployment
manifest which uses 8 pgpu and 4 nvswitch resources.
On the NVIDIA Hopper architecture multi-GPU passthrough uses protected PCIe
(PPCIE) which claims exclusive use of the nvswitches for a single CVM. In
this case, transition your relevant node(s) GPU mode to `ppcie` mode.
The NVIDIA Blackwell architecture uses NVLink encryption which places the
switches outside of the Trusted Computing Base (TCB) and so does not
require a separate switch setting.
#### Transition between CC and non-CC mode
Use the previously described node labeling approach to transition between
@@ -511,7 +492,7 @@ and a basic NIM/RAG deployment. Running CI tests for the TEE GPU scenario
requires KBS to be deployed (except for the CUDA vectorAdd test). The best
place to get started running these tests locally is to look into our
[NVIDIA CI workflow manifest](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml)
and into the underlying
and into the underling
[run_kubernetes_nv_tests.sh](https://github.com/kata-containers/kata-containers/blob/main/tests/integration/kubernetes/run_kubernetes_nv_tests.sh)
script. For example, to run the CUDA vectorAdd scenario against the TEE GPU
runtime class use the following commands:
@@ -566,22 +547,6 @@ With GPU passthrough being supported by the
you can use the tool to create a Kata agent security policy. Our CI deploys
all sample pod manifests with a Kata agent security policy.
Note that, using containerd 2.1 in upstream's CI, we use the following
modification to the genpolicy default settings:
```bash
[
{
"op": "replace",
"path": "/kata_config/oci_version",
"value": "1.2.1"
}
]
```
This modification is applied via the genpolicy drop-in configuration file
`src\tools\genpolicy\drop-in-examples\20-oci-1.2.1-drop-in.json`.
When using a newer containerd version, such as containerd 2.2, the OCI
version field needs to be adjusted to "1.3.0", for instance.
#### Deploy pods using your own containers and manifests
You can author pod manifests leveraging your own containers, for instance,
@@ -599,3 +564,6 @@ following annotation in the manifest:
>
> - musl-based container images (e.g., using Alpine), or distro-less
> containers are not supported.
> - for the TEE scenario, only single-GPU passthrough per pod is supported,
> so your pod resource limit must be: `nvidia.com/pgpu: "1"` (on a system
> with multiple GPUs, you can thus pass through one GPU per pod).

View File

@@ -1,91 +0,0 @@
site_name: "Kata Containers Docs"
site_description: "Developer and user documentation for the Kata Containers project."
site_author: "Kata Containers Community"
repo_url: "https://github.com/kata-containers/kata-containers"
site_url: "https://kata-containers.github.io/kata-containers"
edit_uri: "edit/main/docs/"
repo_name: kata-containers
theme:
name: materialx
favicon: "assets/images/favicon.svg"
logo: "assets/images/favicon.svg"
topbar_style: glass
palette:
- media: "(prefers-color-scheme)"
toggle:
icon: material/brightness-auto
name: Switch to light mode
- media: "(prefers-color-scheme: light)"
scheme: default
primary: blue
accent: light blue
toggle:
icon: material/weather-sunny
name: Switch to dark mode
- media: "(prefers-color-scheme: dark)"
scheme: slate
primary: cyan
accent: cyan
toggle:
icon: material/brightness-4
name: Switch to system preference
features:
- content.action.edit
- content.action.view
- content.code.annotate
- content.code.copy
- content.code.select
- content.footnote.tooltips
- content.tabs.link
- content.tooltips
- navigation.expand
- navigation.indexes
- navigation.path
- navigation.sections
- navigation.tabs
- navigation.tracking
- navigation.top
- navigation.instant
- navigation.instant.prefetch
- navigation.instant.progress
- toc.follow
markdown_extensions:
- abbr
- admonition
- attr_list
- def_list
- footnotes
- md_in_html
- pymdownx.arithmatex:
generic: true
- pymdownx.emoji:
emoji_index: !!python/name:material.extensions.emoji.twemoji
emoji_generator: !!python/name:material.extensions.emoji.to_svg
- pymdownx.details
- pymdownx.highlight:
anchor_linenums: true
line_spans: __span
pygments_lang_class: true
auto_title: true
- pymdownx.keys
- pymdownx.magiclink
- pymdownx.superfences:
custom_fences:
- name: mermaid
class: mermaid
format: !!python/name:pymdownx.superfences.fence_code_format
- pymdownx.inlinehilite
- pymdownx.tabbed:
alternate_style: true
- pymdownx.tilde
- pymdownx.caret
- pymdownx.mark
- toc:
permalink: true
plugins:
- search
- awesome-nav

View File

@@ -1,8 +0,0 @@
[[IgnoredVulns]]
# yaml-rust is unmaintained.
# We tried the most promising alternative in https://github.com/kata-containers/kata-containers/pull/12509,
# but its literal quoting is not conformant.
id = "RUSTSEC-2024-0320"
ignoreUntil = 2026-10-01 # TODO(burgerdev): revisit yml library ecosystem
reason = "No alternative currently supports 'yes' strings correctly; genpolicy processes only trusted input."

View File

@@ -1,3 +1,3 @@
[toolchain]
# Keep in sync with versions.yaml
channel = "1.92"
channel = "1.91"

1860
src/agent/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -63,9 +63,9 @@ cgroups = { package = "cgroups-rs", git = "https://github.com/kata-containers/cg
# Tracing
tracing = "0.1.41"
tracing-subscriber = "0.3.20"
tracing-opentelemetry = "0.17.0"
opentelemetry = { version = "0.17.0", features = ["rt-tokio"] }
tracing-subscriber = "0.2.18"
tracing-opentelemetry = "0.13.0"
opentelemetry = { version = "0.14.0", features = ["rt-tokio-current-thread"] }
# Configuration
serde = { version = "1.0.129", features = ["derive"] }
@@ -78,6 +78,7 @@ strum_macros = "0.26.2"
tempfile = "3.19.1"
which = "4.3.0"
rstest = "0.18.0"
async-std = { version = "1.12.0", features = ["attributes"] }
# Local dependencies
kata-agent-policy = { path = "policy" }
@@ -194,6 +195,7 @@ pv_core = { git = "https://github.com/ibm-s390-linux/s390-tools", rev = "4942504
tempfile.workspace = true
which.workspace = true
rstest.workspace = true
async-std.workspace = true
test-utils.workspace = true

View File

@@ -18,8 +18,6 @@ serde_json.workspace = true
# Agent Policy
regorus = { version = "0.2.8", default-features = false, features = [
"arc",
"base64",
"base64url",
"regex",
"std",
] }

View File

@@ -857,7 +857,7 @@ fn mount_from(
dest.as_str(),
Some(mount_typ.as_str()),
flags,
Some(d.as_str()).filter(|s| !s.is_empty()),
Some(d.as_str()),
)
.inspect_err(|e| log_child!(cfd_log, "mount error: {:?}", e))?;

View File

@@ -89,7 +89,7 @@ pub fn baremount(
let destination_str = destination.to_string_lossy();
if let Ok(m) = get_linux_mount_info(destination_str.deref()) {
if m.fs_type == fs_type && !flags.contains(MsFlags::MS_REMOUNT) {
slog::info!(logger, "{source:?} is already mounted at {destination:?}");
slog_info!(logger, "{source:?} is already mounted at {destination:?}");
return Ok(());
}
}

View File

@@ -110,10 +110,8 @@ impl Namespace {
unshare(cf)?;
if ns_type == NamespaceType::Uts {
if let Some(host) = hostname {
nix::unistd::sethostname(host)?;
}
if ns_type == NamespaceType::Uts && hostname.is_some() {
nix::unistd::sethostname(hostname.unwrap())?;
}
// Bind mount the new namespace from the current thread onto the mount point to persist it.

View File

@@ -2308,6 +2308,9 @@ fn is_sealed_secret_path(source_path: &str) -> bool {
}
async fn cdh_handler_trusted_storage(oci: &mut Spec) -> Result<()> {
if !confidential_data_hub::is_cdh_client_initialized() {
return Ok(());
}
let linux = oci
.linux()
.as_ref()
@@ -2317,12 +2320,24 @@ async fn cdh_handler_trusted_storage(oci: &mut Spec) -> Result<()> {
for specdev in devices.iter() {
if specdev.path().as_path().to_str() == Some(TRUSTED_IMAGE_STORAGE_DEVICE) {
let dev_major_minor = format!("{}:{}", specdev.major(), specdev.minor());
cdh_secure_mount(
"block-device",
&dev_major_minor,
"luks2",
let secure_storage_integrity = AGENT_CONFIG.secure_storage_integrity.to_string();
info!(
sl(),
"trusted_store device major:min {}, enable data integrity {}",
dev_major_minor,
secure_storage_integrity
);
let options = std::collections::HashMap::from([
("deviceId".to_string(), dev_major_minor),
("encryptType".to_string(), "LUKS".to_string()),
("dataIntegrity".to_string(), secure_storage_integrity),
]);
confidential_data_hub::secure_mount(
"BlockDevice",
&options,
vec![],
KATA_IMAGE_WORK_DIR,
"-E lazy_journal_init",
)
.await?;
break;
@@ -2332,51 +2347,6 @@ async fn cdh_handler_trusted_storage(oci: &mut Spec) -> Result<()> {
Ok(())
}
pub(crate) async fn cdh_secure_mount(
device_type: &str,
device_id: &str,
encrypt_type: &str,
mount_point: &str,
mkfs_opts: &str,
) -> Result<()> {
if !confidential_data_hub::is_cdh_client_initialized() {
return Ok(());
}
let integrity = AGENT_CONFIG.secure_storage_integrity.to_string();
info!(
sl(),
"cdh_secure_mount: device_type {}, device_id {}, encrypt_type {}, integrity {}, mkfs_opts {}",
device_type,
device_id,
encrypt_type,
integrity,
mkfs_opts
);
let options = std::collections::HashMap::from([
("deviceId".to_string(), device_id.to_string()),
("sourceType".to_string(), "empty".to_string()),
("targetType".to_string(), "fileSystem".to_string()),
("filesystemType".to_string(), "ext4".to_string()),
("mkfsOpts".to_string(), mkfs_opts.to_string()),
("encryptionType".to_string(), encrypt_type.to_string()),
("dataIntegrity".to_string(), integrity),
]);
std::fs::create_dir_all(mount_point).inspect_err(|e| {
error!(
sl(),
"Failed to create mount point directory {}: {:?}", mount_point, e
);
})?;
confidential_data_hub::secure_mount(device_type, &options, vec![], mount_point).await?;
Ok(())
}
async fn cdh_handler_sealed_secrets(oci: &mut Spec) -> Result<()> {
if !confidential_data_hub::is_cdh_client_initialized() {
return Ok(());

View File

@@ -65,12 +65,6 @@ type UeventWatcher = (Box<dyn UeventMatcher>, oneshot::Sender<Uevent>);
pub struct StorageState {
count: Arc<AtomicU32>,
device: Arc<dyn StorageDevice>,
/// Whether the storage is shared across multiple containers (e.g.
/// block-based emptyDirs). Shared storages should not be cleaned up
/// when a container exits; cleanup happens only when the sandbox is
/// destroyed.
shared: bool,
}
impl Debug for StorageState {
@@ -80,11 +74,17 @@ impl Debug for StorageState {
}
impl StorageState {
fn new(shared: bool) -> Self {
fn new() -> Self {
StorageState {
count: Arc::new(AtomicU32::new(1)),
device: Arc::new(StorageDeviceGeneric::default()),
shared,
}
}
pub fn from_device(device: Arc<dyn StorageDevice>) -> Self {
Self {
count: Arc::new(AtomicU32::new(1)),
device,
}
}
@@ -92,10 +92,6 @@ impl StorageState {
self.device.path()
}
pub fn is_shared(&self) -> bool {
self.shared
}
pub async fn ref_count(&self) -> u32 {
self.count.load(Ordering::Relaxed)
}
@@ -175,10 +171,8 @@ impl Sandbox {
/// Add a new storage object or increase reference count of existing one.
/// The caller may detect new storage object by checking `StorageState.refcount == 1`.
/// The `shared` flag indicates if this storage is shared across multiple containers;
/// if true, cleanup will be skipped when containers exit.
#[instrument]
pub async fn add_sandbox_storage(&mut self, path: &str, shared: bool) -> StorageState {
pub async fn add_sandbox_storage(&mut self, path: &str) -> StorageState {
match self.storages.entry(path.to_string()) {
Entry::Occupied(e) => {
let state = e.get().clone();
@@ -186,7 +180,7 @@ impl Sandbox {
state
}
Entry::Vacant(e) => {
let state = StorageState::new(shared);
let state = StorageState::new();
e.insert(state.clone());
state
}
@@ -194,32 +188,22 @@ impl Sandbox {
}
/// Update the storage device associated with a path.
/// Preserves the existing shared flag and reference count.
pub fn update_sandbox_storage(
&mut self,
path: &str,
device: Arc<dyn StorageDevice>,
) -> std::result::Result<Arc<dyn StorageDevice>, Arc<dyn StorageDevice>> {
match self.storages.get(path) {
None => Err(device),
Some(existing) => {
let state = StorageState {
device,
..existing.clone()
};
// Safe to unwrap() because we have just ensured existence of entry via get().
let state = self.storages.insert(path.to_string(), state).unwrap();
Ok(state.device)
}
if !self.storages.contains_key(path) {
return Err(device);
}
let state = StorageState::from_device(device);
// Safe to unwrap() because we have just ensured existence of entry.
let state = self.storages.insert(path.to_string(), state).unwrap();
Ok(state.device)
}
/// Decrease reference count and destroy the storage object if reference count reaches zero.
///
/// For shared storages (e.g., emptyDir volumes), cleanup is skipped even when refcount
/// reaches zero. The storage entry is kept in the map so subsequent containers can reuse
/// the already-mounted storage. Actual cleanup happens when the sandbox is destroyed.
///
/// Returns `Ok(true)` if the reference count has reached zero and the storage object has been
/// removed.
#[instrument]
@@ -228,10 +212,6 @@ impl Sandbox {
None => Err(anyhow!("Sandbox storage with path {} not found", path)),
Some(state) => {
if state.dec_and_test_ref_count().await {
if state.is_shared() {
state.count.store(1, Ordering::Release);
return Ok(false);
}
if let Some(storage) = self.storages.remove(path) {
storage.device.cleanup()?;
}
@@ -740,7 +720,7 @@ mod tests {
let tmpdir_path = tmpdir.path().to_str().unwrap();
// Add a new sandbox storage
let new_storage = s.add_sandbox_storage(tmpdir_path, false).await;
let new_storage = s.add_sandbox_storage(tmpdir_path).await;
// Check the reference counter
let ref_count = new_storage.ref_count().await;
@@ -750,7 +730,7 @@ mod tests {
);
// Use the existing sandbox storage
let new_storage = s.add_sandbox_storage(tmpdir_path, false).await;
let new_storage = s.add_sandbox_storage(tmpdir_path).await;
// Since we are using existing storage, the reference counter
// should be 2 by now.
@@ -791,7 +771,7 @@ mod tests {
assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok());
s.add_sandbox_storage(destdir_path, false).await;
s.add_sandbox_storage(destdir_path).await;
let storage = StorageDeviceGeneric::new(destdir_path.to_string());
assert!(s
.update_sandbox_storage(destdir_path, Arc::new(storage))
@@ -809,7 +789,7 @@ mod tests {
let other_dir_path = other_dir.path().to_str().unwrap();
other_dir_str = other_dir_path.to_string();
s.add_sandbox_storage(other_dir_path, false).await;
s.add_sandbox_storage(other_dir_path).await;
let storage = StorageDeviceGeneric::new(other_dir_path.to_string());
assert!(s
.update_sandbox_storage(other_dir_path, Arc::new(storage))
@@ -828,9 +808,9 @@ mod tests {
let storage_path = "/tmp/testEphe";
// Add a new sandbox storage
s.add_sandbox_storage(storage_path, false).await;
s.add_sandbox_storage(storage_path).await;
// Use the existing sandbox storage
let state = s.add_sandbox_storage(storage_path, false).await;
let state = s.add_sandbox_storage(storage_path).await;
assert!(
state.ref_count().await > 1,
"Expects false as the storage is not new."

View File

@@ -6,7 +6,7 @@
use crate::linux_abi::pcipath_from_dev_tree_path;
use std::fs;
use std::os::unix::fs::{MetadataExt, PermissionsExt};
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::sync::Arc;
@@ -17,7 +17,6 @@ use kata_types::device::{
DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE,
};
use kata_types::mount::StorageDevice;
use nix::sys::stat::{major, minor};
use protocols::agent::Storage;
use tracing::instrument;
@@ -30,51 +29,10 @@ use crate::device::block_device_handler::{
};
use crate::device::nvdimm_device_handler::wait_for_pmem_device;
use crate::device::scsi_device_handler::get_scsi_device_name;
use crate::storage::{
common_storage_handler, new_device, set_ownership, StorageContext, StorageHandler,
};
use slog::Logger;
use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler};
#[cfg(target_arch = "s390x")]
use std::str::FromStr;
fn get_device_number(dev_path: &str, metadata: Option<&fs::Metadata>) -> Result<String> {
let dev_id = match metadata {
Some(m) => m.rdev(),
None => {
let m =
fs::metadata(dev_path).context(format!("get metadata on file {:?}", dev_path))?;
m.rdev()
}
};
Ok(format!("{}:{}", major(dev_id), minor(dev_id)))
}
async fn handle_block_storage(
logger: &Logger,
storage: &Storage,
dev_num: &str,
) -> Result<Arc<dyn StorageDevice>> {
let has_ephemeral_encryption = storage
.driver_options
.contains(&"encryption_key=ephemeral".to_string());
if has_ephemeral_encryption {
crate::rpc::cdh_secure_mount(
"block-device",
dev_num,
"luks2",
&storage.mount_point,
"-O ^has_journal -m 0 -i 163840 -I 128",
)
.await?;
set_ownership(logger, storage)?;
new_device(storage.mount_point.clone())
} else {
let path = common_storage_handler(logger, storage)?;
new_device(path)
}
}
#[derive(Debug)]
pub struct VirtioBlkMmioHandler {}
@@ -117,8 +75,6 @@ impl StorageHandler for VirtioBlkPciHandler {
mut storage: Storage,
ctx: &mut StorageContext,
) -> Result<Arc<dyn StorageDevice>> {
let dev_num: String;
// If hot-plugged, get the device node path based on the PCI path
// otherwise use the virt path provided in Storage Source
if storage.source.starts_with("/dev") {
@@ -128,16 +84,15 @@ impl StorageHandler for VirtioBlkPciHandler {
if mode & libc::S_IFBLK == 0 {
return Err(anyhow!("Invalid device {}", &storage.source));
}
dev_num = get_device_number(&storage.source, Some(&metadata))?;
} else {
let (root_complex, pcipath) = pcipath_from_dev_tree_path(&storage.source)?;
let dev_path =
get_virtio_blk_pci_device_name(ctx.sandbox, root_complex, &pcipath).await?;
storage.source = dev_path;
dev_num = get_device_number(&storage.source, None)?;
}
handle_block_storage(ctx.logger, &storage, &dev_num).await
let path = common_storage_handler(ctx.logger, &storage)?;
new_device(path)
}
}
@@ -196,10 +151,10 @@ impl StorageHandler for ScsiHandler {
) -> Result<Arc<dyn StorageDevice>> {
// Retrieve the device path from SCSI address.
let dev_path = get_scsi_device_name(ctx.sandbox, &storage.source).await?;
storage.source = dev_path.clone();
storage.source = dev_path;
let dev_num = get_device_number(&dev_path, None)?;
handle_block_storage(ctx.logger, &storage, &dev_num).await
let path = common_storage_handler(ctx.logger, &storage)?;
new_device(path)
}
}

View File

@@ -172,11 +172,7 @@ pub async fn add_storages(
for storage in storages {
let path = storage.mount_point.clone();
let state = sandbox
.lock()
.await
.add_sandbox_storage(&path, storage.shared)
.await;
let state = sandbox.lock().await.add_sandbox_storage(&path).await;
if state.ref_count().await > 1 {
if let Some(path) = state.path() {
if !path.is_empty() {

View File

@@ -5,8 +5,7 @@
use anyhow::Result;
use opentelemetry::sdk::propagation::TraceContextPropagator;
use opentelemetry::trace::TracerProvider;
use opentelemetry::{global, sdk::trace::Config};
use opentelemetry::{global, sdk::trace::Config, trace::TracerProvider};
use slog::{info, o, Logger};
use std::collections::HashMap;
use tracing_opentelemetry::OpenTelemetryLayer;
@@ -24,12 +23,15 @@ pub fn setup_tracing(name: &'static str, logger: &Logger) -> Result<()> {
let config = Config::default();
let builder = opentelemetry::sdk::trace::TracerProvider::builder()
.with_batch_exporter(exporter, opentelemetry::runtime::Tokio)
.with_batch_exporter(exporter, opentelemetry::runtime::TokioCurrentThread)
.with_config(config);
let provider = builder.build();
let tracer = provider.tracer(name);
// We don't need a versioned tracer.
let version = None;
let tracer = provider.get_tracer(name, version);
let _global_provider = global::set_tracer_provider(provider);

View File

@@ -10,7 +10,7 @@ libc.workspace = true
thiserror.workspace = true
opentelemetry = { workspace = true, features = ["serialize"] }
tokio-vsock.workspace = true
serde_json = "1.0"
bincode = "1.3.3"
byteorder = "1.4.3"
slog = { workspace = true, features = [
"dynamic-keys",

View File

@@ -58,7 +58,7 @@ pub enum Error {
#[error("connection error: {0}")]
ConnectionError(String),
#[error("serialisation error: {0}")]
SerialisationError(#[from] serde_json::Error),
SerialisationError(#[from] bincode::Error),
#[error("I/O error: {0}")]
IOError(#[from] std::io::Error),
}
@@ -81,7 +81,8 @@ async fn write_span(
let mut writer = writer.lock().await;
let encoded_payload: Vec<u8> =
serde_json::to_vec(span).map_err(|e| make_io_error(e.to_string()))?;
bincode::serialize(&span).map_err(|e| make_io_error(e.to_string()))?;
let payload_len: u64 = encoded_payload.len() as u64;
let mut payload_len_as_bytes: [u8; HEADER_SIZE_BYTES as usize] =

View File

@@ -48,9 +48,9 @@ vmm-sys-util = { workspace = true }
virtio-queue = { workspace = true, optional = true }
vm-memory = { workspace = true, features = ["backend-mmap"] }
crossbeam-channel = "0.5.6"
fuse-backend-rs = "0.10.5"
vfio-bindings = { workspace = true, optional = true }
vfio-ioctls = { workspace = true, optional = true }
kata-sys-util = { path = "../libs/kata-sys-util" }
[dev-dependencies]
slog-async = "2.7.0"
@@ -86,6 +86,3 @@ host-device = ["dep:vfio-bindings", "dep:vfio-ioctls", "dep:dbs-pci"]
unexpected_cfgs = { level = "warn", check-cfg = [
'cfg(feature, values("test-mock"))',
] }
[package.metadata.cargo-machete]
ignored = ["vfio-bindings"]

View File

@@ -1,15 +1,16 @@
// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::io::{Read, Write};
use std::sync::atomic::Ordering;
use std::sync::Arc;
use vm_memory::bitmap::{Bitmap, BS};
use vm_memory::mmap::NewBitmap;
use vm_memory::guest_memory::GuestMemoryIterator;
use vm_memory::mmap::{Error, NewBitmap};
use vm_memory::{
guest_memory, AtomicAccess, Bytes, FileOffset, GuestAddress, GuestMemory, GuestMemoryRegion,
GuestRegionCollectionError, GuestRegionMmap, GuestUsize, MemoryRegionAddress, ReadVolatile,
VolatileSlice, WriteVolatile,
GuestRegionMmap, GuestUsize, MemoryRegionAddress, VolatileSlice,
};
use crate::GuestRegionRaw;
@@ -66,63 +67,63 @@ impl<B: Bitmap> Bytes<MemoryRegionAddress> for GuestRegionHybrid<B> {
}
}
fn read_volatile_from<F>(
fn read_from<F>(
&self,
addr: MemoryRegionAddress,
src: &mut F,
count: usize,
) -> guest_memory::Result<usize>
where
F: ReadVolatile,
F: Read,
{
match self {
GuestRegionHybrid::Mmap(region) => region.read_volatile_from(addr, src, count),
GuestRegionHybrid::Raw(region) => region.read_volatile_from(addr, src, count),
GuestRegionHybrid::Mmap(region) => region.read_from(addr, src, count),
GuestRegionHybrid::Raw(region) => region.read_from(addr, src, count),
}
}
fn read_exact_volatile_from<F>(
fn read_exact_from<F>(
&self,
addr: MemoryRegionAddress,
src: &mut F,
count: usize,
) -> guest_memory::Result<()>
where
F: ReadVolatile,
F: Read,
{
match self {
GuestRegionHybrid::Mmap(region) => region.read_exact_volatile_from(addr, src, count),
GuestRegionHybrid::Raw(region) => region.read_exact_volatile_from(addr, src, count),
GuestRegionHybrid::Mmap(region) => region.read_exact_from(addr, src, count),
GuestRegionHybrid::Raw(region) => region.read_exact_from(addr, src, count),
}
}
fn write_volatile_to<F>(
fn write_to<F>(
&self,
addr: MemoryRegionAddress,
dst: &mut F,
count: usize,
) -> guest_memory::Result<usize>
where
F: WriteVolatile,
F: Write,
{
match self {
GuestRegionHybrid::Mmap(region) => region.write_volatile_to(addr, dst, count),
GuestRegionHybrid::Raw(region) => region.write_volatile_to(addr, dst, count),
GuestRegionHybrid::Mmap(region) => region.write_to(addr, dst, count),
GuestRegionHybrid::Raw(region) => region.write_to(addr, dst, count),
}
}
fn write_all_volatile_to<F>(
fn write_all_to<F>(
&self,
addr: MemoryRegionAddress,
dst: &mut F,
count: usize,
) -> guest_memory::Result<()>
where
F: WriteVolatile,
F: Write,
{
match self {
GuestRegionHybrid::Mmap(region) => region.write_all_volatile_to(addr, dst, count),
GuestRegionHybrid::Raw(region) => region.write_all_volatile_to(addr, dst, count),
GuestRegionHybrid::Mmap(region) => region.write_all_to(addr, dst, count),
GuestRegionHybrid::Raw(region) => region.write_all_to(addr, dst, count),
}
}
@@ -167,7 +168,7 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionHybrid<B> {
}
}
fn bitmap(&self) -> BS<'_, Self::B> {
fn bitmap(&self) -> &Self::B {
match self {
GuestRegionHybrid::Mmap(region) => region.bitmap(),
GuestRegionHybrid::Raw(region) => region.bitmap(),
@@ -188,6 +189,20 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionHybrid<B> {
}
}
unsafe fn as_slice(&self) -> Option<&[u8]> {
match self {
GuestRegionHybrid::Mmap(region) => region.as_slice(),
GuestRegionHybrid::Raw(region) => region.as_slice(),
}
}
unsafe fn as_mut_slice(&self) -> Option<&mut [u8]> {
match self {
GuestRegionHybrid::Mmap(region) => region.as_mut_slice(),
GuestRegionHybrid::Raw(region) => region.as_mut_slice(),
}
}
fn get_slice(
&self,
offset: MemoryRegionAddress,
@@ -208,39 +223,6 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionHybrid<B> {
}
}
impl<B: Bitmap> GuestRegionHybrid<B> {
/// Returns a slice corresponding to the region.
///
/// # Safety
/// This is safe because we mapped the area at addr ourselves, so this slice will not
/// overflow. However, it is possible to alias.
pub unsafe fn as_slice(&self) -> Option<&[u8]> {
match self {
GuestRegionHybrid::Mmap(region) => {
let addr = region.get_host_address(MemoryRegionAddress(0)).ok()?;
Some(std::slice::from_raw_parts(addr, region.len() as usize))
}
GuestRegionHybrid::Raw(region) => region.as_slice(),
}
}
/// Returns a mutable slice corresponding to the region.
///
/// # Safety
/// This is safe because we mapped the area at addr ourselves, so this slice will not
/// overflow. However, it is possible to alias.
#[allow(clippy::mut_from_ref)]
pub unsafe fn as_mut_slice(&self) -> Option<&mut [u8]> {
match self {
GuestRegionHybrid::Mmap(region) => {
let addr = region.get_host_address(MemoryRegionAddress(0)).ok()?;
Some(std::slice::from_raw_parts_mut(addr, region.len() as usize))
}
GuestRegionHybrid::Raw(region) => region.as_mut_slice(),
}
}
}
/// [`GuestMemory`](trait.GuestMemory.html) implementation that manage hybrid types of guest memory
/// regions.
///
@@ -266,9 +248,7 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
/// * `regions` - The vector of regions.
/// The regions shouldn't overlap and they should be sorted
/// by the starting address.
pub fn from_regions(
mut regions: Vec<GuestRegionHybrid<B>>,
) -> Result<Self, GuestRegionCollectionError> {
pub fn from_regions(mut regions: Vec<GuestRegionHybrid<B>>) -> Result<Self, Error> {
Self::from_arc_regions(regions.drain(..).map(Arc::new).collect())
}
@@ -284,11 +264,9 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
/// * `regions` - The vector of `Arc` regions.
/// The regions shouldn't overlap and they should be sorted
/// by the starting address.
pub fn from_arc_regions(
regions: Vec<Arc<GuestRegionHybrid<B>>>,
) -> Result<Self, GuestRegionCollectionError> {
pub fn from_arc_regions(regions: Vec<Arc<GuestRegionHybrid<B>>>) -> Result<Self, Error> {
if regions.is_empty() {
return Err(GuestRegionCollectionError::NoMemoryRegion);
return Err(Error::NoMemoryRegion);
}
for window in regions.windows(2) {
@@ -296,11 +274,11 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
let next = &window[1];
if prev.start_addr() > next.start_addr() {
return Err(GuestRegionCollectionError::UnsortedMemoryRegions);
return Err(Error::UnsortedMemoryRegions);
}
if prev.last_addr() >= next.start_addr() {
return Err(GuestRegionCollectionError::MemoryRegionOverlap);
return Err(Error::MemoryRegionOverlap);
}
}
@@ -314,7 +292,7 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
pub fn insert_region(
&self,
region: Arc<GuestRegionHybrid<B>>,
) -> Result<GuestMemoryHybrid<B>, GuestRegionCollectionError> {
) -> Result<GuestMemoryHybrid<B>, Error> {
let mut regions = self.regions.clone();
regions.push(region);
regions.sort_by_key(|x| x.start_addr());
@@ -332,7 +310,7 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
&self,
base: GuestAddress,
size: GuestUsize,
) -> Result<(GuestMemoryHybrid<B>, Arc<GuestRegionHybrid<B>>), GuestRegionCollectionError> {
) -> Result<(GuestMemoryHybrid<B>, Arc<GuestRegionHybrid<B>>), Error> {
if let Ok(region_index) = self.regions.binary_search_by_key(&base, |x| x.start_addr()) {
if self.regions.get(region_index).unwrap().len() as GuestUsize == size {
let mut regions = self.regions.clone();
@@ -341,13 +319,32 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
}
}
Err(GuestRegionCollectionError::NoMemoryRegion)
Err(Error::InvalidGuestRegion)
}
}
/// An iterator over the elements of `GuestMemoryHybrid`.
///
/// This struct is created by `GuestMemory::iter()`. See its documentation for more.
pub struct Iter<'a, B>(std::slice::Iter<'a, Arc<GuestRegionHybrid<B>>>);
impl<'a, B> Iterator for Iter<'a, B> {
type Item = &'a GuestRegionHybrid<B>;
fn next(&mut self) -> Option<Self::Item> {
self.0.next().map(AsRef::as_ref)
}
}
impl<'a, B: 'a> GuestMemoryIterator<'a, GuestRegionHybrid<B>> for GuestMemoryHybrid<B> {
type Iter = Iter<'a, B>;
}
impl<B: Bitmap + 'static> GuestMemory for GuestMemoryHybrid<B> {
type R = GuestRegionHybrid<B>;
type I = Self;
fn num_regions(&self) -> usize {
self.regions.len()
}
@@ -362,15 +359,15 @@ impl<B: Bitmap + 'static> GuestMemory for GuestMemoryHybrid<B> {
index.map(|x| self.regions[x].as_ref())
}
fn iter(&self) -> impl Iterator<Item = &GuestRegionHybrid<B>> {
self.regions.iter().map(AsRef::as_ref)
fn iter(&self) -> Iter<'_, B> {
Iter(self.regions.iter())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{Read, Seek, Write};
use std::io::Seek;
use vm_memory::{GuestMemoryError, MmapRegion};
use vmm_sys_util::tempfile::TempFile;
@@ -657,14 +654,14 @@ mod tests {
// Rewind file pointer after write operation.
file_to_write_mmap_region.rewind().unwrap();
guest_region
.read_volatile_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.read_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.unwrap();
let mut file_read_from_mmap_region = TempFile::new().unwrap().into_file();
file_read_from_mmap_region
.set_len(size_of_file as u64)
.unwrap();
guest_region
.write_all_volatile_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.write_all_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.unwrap();
// Rewind file pointer after write operation.
file_read_from_mmap_region.rewind().unwrap();
@@ -682,7 +679,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_region
.read_volatile_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file)
.read_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -692,7 +689,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_region
.write_volatile_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.write_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -722,14 +719,14 @@ mod tests {
// Rewind file pointer after write operation.
file_to_write_mmap_region.rewind().unwrap();
guest_region
.read_volatile_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.read_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.unwrap();
let mut file_read_from_mmap_region = TempFile::new().unwrap().into_file();
file_read_from_mmap_region
.set_len(size_of_file as u64)
.unwrap();
guest_region
.write_all_volatile_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.write_all_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.unwrap();
// Rewind file pointer after write operation.
file_read_from_mmap_region.rewind().unwrap();
@@ -747,7 +744,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_region
.read_volatile_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file)
.read_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -757,7 +754,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_region
.write_volatile_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.write_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -791,14 +788,14 @@ mod tests {
.unwrap();
file_to_write_mmap_region.rewind().unwrap();
guest_mmap_region
.read_exact_volatile_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.read_exact_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.unwrap();
let mut file_read_from_mmap_region = TempFile::new().unwrap().into_file();
file_read_from_mmap_region
.set_len(size_of_file as u64)
.unwrap();
guest_mmap_region
.write_all_volatile_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.write_all_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.unwrap();
file_read_from_mmap_region.rewind().unwrap();
let mut content = String::new();
@@ -821,14 +818,14 @@ mod tests {
.unwrap();
file_to_write_raw_region.rewind().unwrap();
guest_raw_region
.read_exact_volatile_from(write_addr, &mut file_to_write_raw_region, size_of_file)
.read_exact_from(write_addr, &mut file_to_write_raw_region, size_of_file)
.unwrap();
let mut file_read_from_raw_region = TempFile::new().unwrap().into_file();
file_read_from_raw_region
.set_len(size_of_file as u64)
.unwrap();
guest_raw_region
.write_all_volatile_to(write_addr, &mut file_read_from_raw_region, size_of_file)
.write_all_to(write_addr, &mut file_read_from_raw_region, size_of_file)
.unwrap();
file_read_from_raw_region.rewind().unwrap();
let mut content = String::new();
@@ -845,11 +842,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_mmap_region
.read_exact_volatile_from(
invalid_addr,
&mut file_to_write_mmap_region,
size_of_file
)
.read_exact_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -859,7 +852,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_mmap_region
.write_all_volatile_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.write_all_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -869,7 +862,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_raw_region
.read_exact_volatile_from(invalid_addr, &mut file_to_write_raw_region, size_of_file)
.read_exact_from(invalid_addr, &mut file_to_write_raw_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -879,7 +872,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_raw_region
.write_all_volatile_to(invalid_addr, &mut file_read_from_raw_region, size_of_file)
.write_all_to(invalid_addr, &mut file_read_from_raw_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -1083,16 +1076,13 @@ mod tests {
let guest_region = GuestMemoryHybrid::<()>::from_regions(regions);
assert!(matches!(
guest_region.err().unwrap(),
GuestRegionCollectionError::UnsortedMemoryRegions
Error::UnsortedMemoryRegions
));
// Error no memory region case.
let regions = Vec::<GuestRegionHybrid<()>>::new();
let guest_region = GuestMemoryHybrid::<()>::from_regions(regions);
assert!(matches!(
guest_region.err().unwrap(),
GuestRegionCollectionError::NoMemoryRegion
));
assert!(matches!(guest_region.err().unwrap(), Error::NoMemoryRegion));
}
#[test]

View File

@@ -1,6 +1,7 @@
// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::io::{Read, Write};
use std::sync::atomic::Ordering;
use vm_memory::bitmap::{Bitmap, BS};
@@ -8,7 +9,7 @@ use vm_memory::mmap::NewBitmap;
use vm_memory::volatile_memory::compute_offset;
use vm_memory::{
guest_memory, volatile_memory, Address, AtomicAccess, Bytes, FileOffset, GuestAddress,
GuestMemoryRegion, GuestUsize, MemoryRegionAddress, ReadVolatile, VolatileSlice, WriteVolatile,
GuestMemoryRegion, GuestUsize, MemoryRegionAddress, VolatileSlice,
};
/// Guest memory region for virtio-fs DAX window.
@@ -72,67 +73,67 @@ impl<B: Bitmap> Bytes<MemoryRegionAddress> for GuestRegionRaw<B> {
.map_err(Into::into)
}
fn read_volatile_from<F>(
fn read_from<F>(
&self,
addr: MemoryRegionAddress,
src: &mut F,
count: usize,
) -> guest_memory::Result<usize>
where
F: ReadVolatile,
F: Read,
{
let maddr = addr.raw_value() as usize;
self.as_volatile_slice()
.unwrap()
.read_volatile_from::<F>(maddr, src, count)
.read_from::<F>(maddr, src, count)
.map_err(Into::into)
}
fn read_exact_volatile_from<F>(
fn read_exact_from<F>(
&self,
addr: MemoryRegionAddress,
src: &mut F,
count: usize,
) -> guest_memory::Result<()>
where
F: ReadVolatile,
F: Read,
{
let maddr = addr.raw_value() as usize;
self.as_volatile_slice()
.unwrap()
.read_exact_volatile_from::<F>(maddr, src, count)
.read_exact_from::<F>(maddr, src, count)
.map_err(Into::into)
}
fn write_volatile_to<F>(
fn write_to<F>(
&self,
addr: MemoryRegionAddress,
dst: &mut F,
count: usize,
) -> guest_memory::Result<usize>
where
F: WriteVolatile,
F: Write,
{
let maddr = addr.raw_value() as usize;
self.as_volatile_slice()
.unwrap()
.write_volatile_to::<F>(maddr, dst, count)
.write_to::<F>(maddr, dst, count)
.map_err(Into::into)
}
fn write_all_volatile_to<F>(
fn write_all_to<F>(
&self,
addr: MemoryRegionAddress,
dst: &mut F,
count: usize,
) -> guest_memory::Result<()>
where
F: WriteVolatile,
F: Write,
{
let maddr = addr.raw_value() as usize;
self.as_volatile_slice()
.unwrap()
.write_all_volatile_to::<F>(maddr, dst, count)
.write_all_to::<F>(maddr, dst, count)
.map_err(Into::into)
}
@@ -169,8 +170,8 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionRaw<B> {
self.guest_base
}
fn bitmap(&self) -> BS<'_, Self::B> {
self.bitmap.slice_at(0)
fn bitmap(&self) -> &Self::B {
&self.bitmap
}
fn get_host_address(&self, addr: MemoryRegionAddress) -> guest_memory::Result<*mut u8> {
@@ -185,6 +186,18 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionRaw<B> {
None
}
unsafe fn as_slice(&self) -> Option<&[u8]> {
// This is safe because we mapped the area at addr ourselves, so this slice will not
// overflow. However, it is possible to alias.
Some(std::slice::from_raw_parts(self.addr, self.size))
}
unsafe fn as_mut_slice(&self) -> Option<&mut [u8]> {
// This is safe because we mapped the area at addr ourselves, so this slice will not
// overflow. However, it is possible to alias.
Some(std::slice::from_raw_parts_mut(self.addr, self.size))
}
fn get_slice(
&self,
offset: MemoryRegionAddress,
@@ -203,7 +216,6 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionRaw<B> {
(self.addr as usize + offset) as *mut _,
count,
self.bitmap.slice_at(offset),
None,
)
})
}
@@ -214,27 +226,6 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionRaw<B> {
}
}
impl<B: Bitmap> GuestRegionRaw<B> {
/// Returns a slice corresponding to the region.
///
/// # Safety
/// This is safe because we mapped the area at addr ourselves, so this slice will not
/// overflow. However, it is possible to alias.
pub unsafe fn as_slice(&self) -> Option<&[u8]> {
Some(std::slice::from_raw_parts(self.addr, self.size))
}
/// Returns a mutable slice corresponding to the region.
///
/// # Safety
/// This is safe because we mapped the area at addr ourselves, so this slice will not
/// overflow. However, it is possible to alias.
#[allow(clippy::mut_from_ref)]
pub unsafe fn as_mut_slice(&self) -> Option<&mut [u8]> {
Some(std::slice::from_raw_parts_mut(self.addr, self.size))
}
}
#[cfg(test)]
mod tests {
extern crate vmm_sys_util;
@@ -357,7 +348,7 @@ mod tests {
unsafe { GuestRegionRaw::<()>::new(GuestAddress(0x10_0000), &mut buf as *mut _, 1024) };
let s = m.get_slice(MemoryRegionAddress(2), 3).unwrap();
assert_eq!(s.ptr_guard().as_ptr(), &buf[2] as *const _);
assert_eq!(s.as_ptr(), &mut buf[2] as *mut _);
}
/*
@@ -609,7 +600,7 @@ mod tests {
File::open(Path::new("c:\\Windows\\system32\\ntoskrnl.exe")).unwrap()
};
gm.write_obj(!0u32, addr).unwrap();
gm.read_exact_volatile_from(addr, &mut file, mem::size_of::<u32>())
gm.read_exact_from(addr, &mut file, mem::size_of::<u32>())
.unwrap();
let value: u32 = gm.read_obj(addr).unwrap();
if cfg!(unix) {
@@ -619,7 +610,7 @@ mod tests {
}
let mut sink = Vec::new();
gm.write_all_volatile_to(addr, &mut sink, mem::size_of::<u32>())
gm.write_all_to(addr, &mut sink, mem::size_of::<u32>())
.unwrap();
if cfg!(unix) {
assert_eq!(sink, vec![0; mem::size_of::<u32>()]);

View File

@@ -113,23 +113,20 @@ arm64_sys_reg!(MPIDR_EL1, 3, 0, 0, 0, 5);
/// * `mem` - Reserved DRAM for current VM.
pub fn setup_regs(vcpu: &VcpuFd, cpu_id: u8, boot_ip: u64, fdt_address: u64) -> Result<()> {
// Get the register index of the PSTATE (Processor State) register.
vcpu.set_one_reg(
arm64_core_reg!(pstate),
&(PSTATE_FAULT_BITS_64 as u128).to_le_bytes(),
)
.map_err(Error::SetCoreRegister)?;
vcpu.set_one_reg(arm64_core_reg!(pstate), PSTATE_FAULT_BITS_64 as u128)
.map_err(Error::SetCoreRegister)?;
// Other vCPUs are powered off initially awaiting PSCI wakeup.
if cpu_id == 0 {
// Setting the PC (Processor Counter) to the current program address (kernel address).
vcpu.set_one_reg(arm64_core_reg!(pc), &(boot_ip as u128).to_le_bytes())
vcpu.set_one_reg(arm64_core_reg!(pc), boot_ip as u128)
.map_err(Error::SetCoreRegister)?;
// Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
// "The device tree blob (dtb) must be placed on an 8-byte boundary and must
// not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt.
// We are choosing to place it the end of DRAM. See `get_fdt_addr`.
vcpu.set_one_reg(arm64_core_reg!(regs), &(fdt_address as u128).to_le_bytes())
vcpu.set_one_reg(arm64_core_reg!(regs), fdt_address as u128)
.map_err(Error::SetCoreRegister)?;
}
Ok(())
@@ -160,10 +157,9 @@ pub fn is_system_register(regid: u64) -> bool {
///
/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd.
pub fn read_mpidr(vcpu: &VcpuFd) -> Result<u64> {
let mut reg_data = 0u128.to_le_bytes();
vcpu.get_one_reg(MPIDR_EL1, &mut reg_data)
.map_err(Error::GetSysRegister)?;
Ok(u128::from_le_bytes(reg_data) as u64)
vcpu.get_one_reg(MPIDR_EL1)
.map(|value| value as u64)
.map_err(Error::GetSysRegister)
}
#[cfg(test)]

View File

@@ -10,10 +10,10 @@ This repository contains the following submodules:
| Name | Arch| Description |
| --- | --- | --- |
| [`bootparam`](src/x86_64/bootparam.rs) | x86_64 | Magic addresses externally used to lay out x86_64 VMs |
| [`fdt`](src/aarch64/fdt.rs) | aarch64| Create FDT for Aarch64 systems |
| [`layout`](src/x86_64/layout.rs) | x86_64 | x86_64 layout constants |
| [`layout`](src/aarch64/layout.rs/) | aarch64 | aarch64 layout constants |
| [`mptable`](src/x86_64/mptable.rs) | x86_64 | MP Table configurations used for defining VM boot status |
| [fdt](src/aarch64/fdt.rs) | aarch64| Create FDT for Aarch64 systems |
| [layout](src/x86_64/layout.rs) | x86_64 | x86_64 layout constants |
| [layout](src/aarch64/layout.rs/) | aarch64 | aarch64 layout constants |
| [mptable](src/x86_64/mptable.rs) | x86_64 | MP Table configurations used for defining VM boot status |
## Acknowledgement

View File

@@ -10,6 +10,7 @@
use libc::c_char;
use std::collections::HashMap;
use std::io;
use std::mem;
use std::result;
use std::slice;
@@ -204,7 +205,7 @@ pub fn setup_mptable<M: GuestMemory>(
return Err(Error::AddressOverflow);
}
mem.write_slice(&vec![0u8; mp_size], base_mp)
mem.read_from(base_mp, &mut io::repeat(0), mp_size)
.map_err(|_| Error::Clear)?;
{
@@ -451,11 +452,23 @@ mod tests {
let mpc_offset = GuestAddress(u64::from(mpf_intel.0.physptr));
let mpc_table: MpcTableWrapper = mem.read_obj(mpc_offset).unwrap();
let mut buf = Vec::new();
mem.write_volatile_to(mpc_offset, &mut buf, mpc_table.0.length as usize)
struct Sum(u8);
impl io::Write for Sum {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
for v in buf.iter() {
self.0 = self.0.wrapping_add(*v);
}
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
let mut sum = Sum(0);
mem.write_to(mpc_offset, &mut sum, mpc_table.0.length as usize)
.unwrap();
let sum: u8 = buf.iter().fold(0u8, |acc, &v| acc.wrapping_add(v));
assert_eq!(sum, 0);
assert_eq!(sum.0, 0);
}
#[test]

View File

@@ -25,7 +25,7 @@ use std::collections::HashMap;
use std::io::{Error, ErrorKind};
use std::sync::{Arc, Mutex};
use kvm_bindings::{kvm_irq_routing_entry, KvmIrqRouting as KvmIrqRoutingWrapper};
use kvm_bindings::{kvm_irq_routing, kvm_irq_routing_entry};
use kvm_ioctls::VmFd;
use super::*;
@@ -196,18 +196,26 @@ impl KvmIrqRouting {
}
fn set_routing(&self, routes: &HashMap<u64, kvm_irq_routing_entry>) -> Result<()> {
let mut irq_routing = KvmIrqRoutingWrapper::new(routes.len())
.map_err(|_| Error::other("Failed to create KvmIrqRouting"))?;
// Allocate enough buffer memory.
let elem_sz = std::mem::size_of::<kvm_irq_routing>();
let total_sz = std::mem::size_of::<kvm_irq_routing_entry>() * routes.len() + elem_sz;
let elem_cnt = total_sz.div_ceil(elem_sz);
let mut irq_routings = Vec::<kvm_irq_routing>::with_capacity(elem_cnt);
irq_routings.resize_with(elem_cnt, Default::default);
{
let irq_routing_entries = irq_routing.as_mut_slice();
for (idx, entry) in routes.values().enumerate() {
irq_routing_entries[idx] = *entry;
}
// Prepare the irq_routing header.
let irq_routing = &mut irq_routings[0];
irq_routing.nr = routes.len() as u32;
irq_routing.flags = 0;
// Safe because we have just allocated enough memory above.
let irq_routing_entries = unsafe { irq_routing.entries.as_mut_slice(routes.len()) };
for (idx, entry) in routes.values().enumerate() {
irq_routing_entries[idx] = *entry;
}
self.vm_fd
.set_gsi_routing(&irq_routing)
.set_gsi_routing(irq_routing)
.map_err(from_sys_util_errno)?;
Ok(())

View File

@@ -242,7 +242,7 @@ mod tests {
let metrics = Arc::new(SerialDeviceMetrics::default());
let out: Arc<Mutex<Option<Box<dyn std::io::Write + Send + 'static>>>> =
let out: Arc<Mutex<Option<Box<(dyn std::io::Write + Send + 'static)>>>> =
Arc::new(Mutex::new(Some(Box::new(std::io::sink()))));
let mut serial = SerialDevice {
serial: Serial::with_events(

View File

@@ -23,22 +23,24 @@ dbs-interrupt = { workspace = true, features = [
"kvm-legacy-irq",
"kvm-msi-irq",
] }
downcast-rs = "1.2.0"
byteorder = "1.4.3"
serde = "1.0.27"
vm-memory = { workspace = true }
kvm-ioctls = { workspace = true }
kvm-bindings = { workspace = true }
vfio-ioctls = { workspace = true }
vfio-bindings = { workspace = true }
vm-memory = {workspace = true}
kvm-ioctls = {workspace = true}
kvm-bindings = {workspace = true}
vfio-ioctls = {workspace = true}
vfio-bindings = {workspace = true}
libc = "0.2.39"
virtio-queue = { workspace = true }
dbs-utils = { workspace = true }
vmm-sys-util = {workspace = true}
virtio-queue = {workspace = true}
dbs-utils = {workspace = true}
[dev-dependencies]
dbs-arch = { workspace = true }
kvm-ioctls = { workspace = true }
kvm-ioctls = {workspace = true}
test-utils = { workspace = true }
nix = { workspace = true }

View File

@@ -1174,6 +1174,7 @@ pub(crate) mod tests {
use dbs_virtio_devices::Result as VirtIoResult;
use dbs_virtio_devices::{
ActivateResult, VirtioDeviceConfig, VirtioDeviceInfo, VirtioSharedMemory,
DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FEATURES_OK, DEVICE_INIT,
};
use dbs_address_space::{AddressSpaceLayout, AddressSpaceRegion, AddressSpaceRegionType};

View File

@@ -3,7 +3,7 @@
This crate is a collection of modules that provides helpers and utilities to create a TDX Dragonball VM.
Currently this crate involves:
- `tdx-ioctls`
- tdx-ioctls
## Acknowledgement

View File

@@ -11,7 +11,7 @@ use kvm_bindings::{CpuId, __IncompleteArrayField, KVMIO};
use thiserror::Error;
use vmm_sys_util::fam::{FamStruct, FamStructWrapper};
use vmm_sys_util::ioctl::ioctl_with_val;
use vmm_sys_util::{generate_fam_struct_impl, ioctl_iowr_nr};
use vmm_sys_util::{generate_fam_struct_impl, ioctl_ioc_nr, ioctl_iowr_nr};
/// Tdx capability list.
pub type TdxCaps = FamStructWrapper<TdxCapabilities>;

Some files were not shown because too many files have changed in this diff Show More