Compare commits

..

4 Commits

Author SHA1 Message Date
Aurélien Bombo
29e74892f4 test3 2025-09-11 12:26:53 -05:00
Aurélien Bombo
5c5e2c68bc test2 2025-09-11 12:23:18 -05:00
Aurélien Bombo
fd4f7638e5 test 2025-09-11 12:15:31 -05:00
Aurélien Bombo
a5641e27db test
Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
2025-09-11 09:08:29 -05:00
2172 changed files with 77222 additions and 144701 deletions

View File

@@ -7,24 +7,20 @@
self-hosted-runner:
# Labels of self-hosted runner that linter should ignore
labels:
- amd64-nvidia-a100
- amd64-nvidia-h100-snp
- arm64-k8s
- ubuntu-22.04-arm
- garm-ubuntu-2004
- garm-ubuntu-2004-smaller
- garm-ubuntu-2204
- garm-ubuntu-2304
- garm-ubuntu-2304-smaller
- garm-ubuntu-2204-smaller
- ppc64le
- ppc64le-k8s
- ppc64le-small
- ubuntu-24.04-ppc64le
- ubuntu-24.04-s390x
- k8s-ppc64le
- metrics
- ppc64le
- riscv-builder
- sev-snp
- s390x
- s390x-large
- tdx
- ubuntu-24.04-arm
- amd64-nvidia-a100

View File

@@ -12,6 +12,7 @@ updates:
- "/src/tools/agent-ctl"
- "/src/tools/genpolicy"
- "/src/tools/kata-ctl"
- "/src/tools/runk"
- "/src/tools/trace-forwarder"
schedule:
interval: "daily"
@@ -65,9 +66,6 @@ updates:
rustix:
patterns:
- rustix
slab:
patterns:
- slab
time:
patterns:
- time

View File

@@ -2,17 +2,24 @@ name: Lint GHA workflows
on:
workflow_dispatch:
pull_request:
pull_request_target:
types:
- opened
- edited
- reopened
- synchronize
paths:
- '.github/workflows/**'
permissions: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
run-actionlint:
name: run-actionlint
env:
GH_TOKEN: ${{ github.token }}
runs-on: ubuntu-24.04

View File

@@ -17,7 +17,6 @@ permissions: {}
jobs:
run-containerd-sandboxapi:
name: run-containerd-sandboxapi
strategy:
# We can set this to true whenever we're 100% sure that
# the all the tests are not flaky, otherwise we'll fail
@@ -66,12 +65,11 @@ jobs:
run: bash tests/integration/cri-containerd/gha-run.sh run
run-containerd-stability:
name: run-containerd-stability
strategy:
fail-fast: false
matrix:
containerd_version: ['lts', 'active']
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu', 'qemu-runtime-rs']
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu', 'stratovirt']
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: ${{ matrix.containerd_version }}
@@ -109,7 +107,6 @@ jobs:
run: bash tests/stability/gha-run.sh run
run-nydus:
name: run-nydus
strategy:
# We can set this to true whenever we're 100% sure that
# the all the tests are not flaky, otherwise we'll fail
@@ -117,7 +114,7 @@ jobs:
fail-fast: false
matrix:
containerd_version: ['lts', 'active']
vmm: ['clh', 'qemu', 'dragonball', 'qemu-runtime-rs']
vmm: ['clh', 'qemu', 'dragonball', 'stratovirt']
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: ${{ matrix.containerd_version }}
@@ -147,24 +144,49 @@ jobs:
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata
run: bash tests/integration/nydus/gha-run.sh install-kata kata-artifacts
- name: Install kata-tools
run: bash tests/integration/nydus/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Run nydus tests
timeout-minutes: 10
run: bash tests/integration/nydus/gha-run.sh run
run-runk:
# Skip runk tests as we have no maintainers. TODO: Decide when to remove altogether
if: false
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: lts
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install dependencies
run: bash tests/integration/runk/gha-run.sh install-dependencies
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata
run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts
- name: Run runk tests
timeout-minutes: 10
run: bash tests/integration/runk/gha-run.sh run
run-tracing:
name: run-tracing
strategy:
fail-fast: false
matrix:
@@ -209,7 +231,6 @@ jobs:
run: bash tests/functional/tracing/gha-run.sh run
run-vfio:
name: run-vfio
strategy:
fail-fast: false
matrix:
@@ -252,8 +273,53 @@ jobs:
timeout-minutes: 15
run: bash tests/functional/vfio/gha-run.sh run
run-docker-tests:
strategy:
# We can set this to true whenever we're 100% sure that
# all the tests are not flaky, otherwise we'll fail them
# all due to a single flaky instance.
fail-fast: false
matrix:
vmm:
- clh
- qemu
- dragonball
- cloud-hypervisor
runs-on: ubuntu-22.04
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install dependencies
run: bash tests/integration/docker/gha-run.sh install-dependencies
env:
GH_TOKEN: ${{ github.token }}
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata
run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts
- name: Run docker smoke test
timeout-minutes: 5
run: bash tests/integration/docker/gha-run.sh run
run-nerdctl-tests:
name: run-nerdctl-tests
strategy:
# We can set this to true whenever we're 100% sure that
# all the tests are not flaky, otherwise we'll fail them
@@ -265,7 +331,6 @@ jobs:
- dragonball
- qemu
- cloud-hypervisor
- qemu-runtime-rs
runs-on: ubuntu-22.04
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}
@@ -314,7 +379,6 @@ jobs:
retention-days: 1
run-kata-agent-apis:
name: run-kata-agent-apis
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -340,16 +404,8 @@ jobs:
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata & kata-tools
run: |
bash tests/functional/kata-agent-apis/gha-run.sh install-kata kata-artifacts
bash tests/functional/kata-agent-apis/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Install kata
run: bash tests/functional/kata-agent-apis/gha-run.sh install-kata kata-artifacts
- name: Run kata agent api tests with agent-ctl
run: bash tests/functional/kata-agent-apis/gha-run.sh run

View File

@@ -17,7 +17,6 @@ permissions: {}
jobs:
run-containerd-sandboxapi:
name: run-containerd-sandboxapi
strategy:
# We can set this to true whenever we're 100% sure that
# the all the tests are not flaky, otherwise we'll fail
@@ -66,7 +65,6 @@ jobs:
run: bash tests/integration/cri-containerd/gha-run.sh run
run-containerd-stability:
name: run-containerd-stability
strategy:
fail-fast: false
matrix:
@@ -106,3 +104,43 @@ jobs:
- name: Run containerd-stability tests
timeout-minutes: 15
run: bash tests/stability/gha-run.sh run
run-docker-tests:
strategy:
# We can set this to true whenever we're 100% sure that
# all the tests are not flaky, otherwise we'll fail them
# all due to a single flaky instance.
fail-fast: false
matrix:
vmm: ['qemu']
runs-on: s390x-large
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install dependencies
run: bash tests/integration/docker/gha-run.sh install-dependencies
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-static-tarball-s390x${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata
run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts
- name: Run docker smoke test
timeout-minutes: 5
run: bash tests/integration/docker/gha-run.sh run

View File

@@ -17,7 +17,6 @@ permissions: {}
name: Build checks preview riscv64
jobs:
check:
name: check
runs-on: ${{ inputs.instance }}
strategy:
fail-fast: false
@@ -124,11 +123,9 @@ jobs:
echo "GITHUB_RUNNER_CI_NON_VIRT=true" >> "$GITHUB_ENV"
- name: Running `${{ matrix.command }}` for ${{ matrix.component.name }}
run: |
cd "${COMPONENT_PATH}"
${COMMAND}
cd ${{ matrix.component.path }}
${{ matrix.command }}
env:
COMMAND: ${{ matrix.command }}
COMPONENT_PATH: ${{ matrix.component.path }}
RUST_BACKTRACE: "1"
RUST_LIB_BACKTRACE: "0"
SKIP_GO_VERSION_CHECK: "1"

View File

@@ -11,13 +11,7 @@ permissions: {}
name: Build checks
jobs:
check:
name: check
runs-on: >-
${{
( contains(inputs.instance, 's390x') && matrix.component.name == 'runtime' ) && 's390x' ||
( contains(inputs.instance, 'ppc64le') && (matrix.component.name == 'runtime' || matrix.component.name == 'agent') ) && 'ppc64le' ||
inputs.instance
}}
runs-on: ${{ inputs.instance }}
strategy:
fail-fast: false
matrix:
@@ -52,7 +46,6 @@ jobs:
path: src/libs
needs:
- rust
- protobuf-compiler
- name: agent-ctl
path: src/tools/agent-ctl
needs:
@@ -63,7 +56,6 @@ jobs:
path: src/tools/kata-ctl
needs:
- rust
- protobuf-compiler
- name: trace-forwarder
path: src/tools/trace-forwarder
needs:
@@ -73,8 +65,6 @@ jobs:
needs:
- rust
- protobuf-compiler
instance:
- ${{ inputs.instance }}
steps:
- name: Adjust a permission for repo
@@ -136,11 +126,9 @@ jobs:
echo "GITHUB_RUNNER_CI_NON_VIRT=true" >> "$GITHUB_ENV"
- name: Running `${{ matrix.command }}` for ${{ matrix.component.name }}
run: |
cd "${COMPONENT_PATH}"
eval "${COMMAND}"
cd ${{ matrix.component.path }}
${{ matrix.command }}
env:
COMMAND: ${{ matrix.command }}
COMPONENT_PATH: ${{ matrix.component.path }}
RUST_BACKTRACE: "1"
RUST_LIB_BACKTRACE: "0"
SKIP_GO_VERSION_CHECK: "1"

View File

@@ -30,7 +30,6 @@ permissions: {}
jobs:
build-asset:
name: build-asset
runs-on: ubuntu-22.04
permissions:
contents: read
@@ -41,11 +40,16 @@ jobs:
matrix:
asset:
- agent
- agent-ctl
- busybox
- cloud-hypervisor
- cloud-hypervisor-glibc
- coco-guest-components
- csi-kata-directvolume
- firecracker
- genpolicy
- kata-ctl
- kata-manager
- kernel
- kernel-confidential
- kernel-dragonball-experimental
@@ -54,11 +58,12 @@ jobs:
- nydus
- ovmf
- ovmf-sev
- ovmf-tdx
- pause-image
- qemu
- qemu-snp-experimental
- qemu-tdx-experimental
- stratovirt
- trace-forwarder
- virtiofsd
stage:
- ${{ inputs.stage }}
@@ -91,6 +96,7 @@ jobs:
- name: Build ${{ matrix.asset }}
id: build
run: |
[[ "${KATA_ASSET}" == *"nvidia"* ]] && echo "KBUILD_SIGN_PIN=${{ secrets.KBUILD_SIGN_PIN }}" >> "${GITHUB_ENV}"
make "${KATA_ASSET}-tarball"
build_dir=$(readlink -f build)
# store-artifact does not work with symlink
@@ -104,19 +110,16 @@ jobs:
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
- name: Parse OCI image name and digest
id: parse-oci-segments
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
env:
KATA_ASSET: ${{ matrix.asset }}
run: |
oci_image="$(<"build/${KATA_ASSET}-oci-image")"
oci_image="$(<"build/${{ matrix.asset }}-oci-image")"
echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"
- uses: oras-project/setup-oras@22ce207df3b08e061f537244349aac6ae1d214f6 # v1.2.4
- uses: oras-project/setup-oras@5c0b487ce3fe0ce3ab0d034e63669e426e294e4d # v1.2.2
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
with:
version: "1.2.0"
@@ -148,13 +151,12 @@ jobs:
if: ${{ startsWith(matrix.asset, 'kernel-nvidia-gpu') }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-artifacts-amd64-${{ matrix.asset }}-modules${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-modules.tar.zst
name: kata-artifacts-amd64-${{ matrix.asset }}-headers${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-headers.tar.zst
retention-days: 15
if-no-files-found: error
build-asset-rootfs:
name: build-asset-rootfs
runs-on: ubuntu-22.04
needs: build-asset
permissions:
@@ -166,8 +168,6 @@ jobs:
- rootfs-image
- rootfs-image-confidential
- rootfs-image-mariner
- rootfs-image-nvidia-gpu
- rootfs-image-nvidia-gpu-confidential
- rootfs-initrd
- rootfs-initrd-confidential
- rootfs-initrd-nvidia-gpu
@@ -203,6 +203,7 @@ jobs:
- name: Build ${{ matrix.asset }}
id: build
run: |
[[ "${KATA_ASSET}" == *"nvidia"* ]] && echo "KBUILD_SIGN_PIN=${{ secrets.KBUILD_SIGN_PIN }}" >> "${GITHUB_ENV}"
./tests/gha-adjust-to-use-prebuilt-components.sh kata-artifacts "${KATA_ASSET}"
make "${KATA_ASSET}-tarball"
build_dir=$(readlink -f build)
@@ -217,7 +218,6 @@ jobs:
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
- name: store-artifact ${{ matrix.asset }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
@@ -229,7 +229,6 @@ jobs:
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
remove-rootfs-binary-artifacts:
name: remove-rootfs-binary-artifacts
runs-on: ubuntu-22.04
needs: build-asset-rootfs
strategy:
@@ -237,8 +236,8 @@ jobs:
asset:
- busybox
- coco-guest-components
- kernel-nvidia-gpu-modules
- kernel-nvidia-gpu-confidential-modules
- kernel-nvidia-gpu-headers
- kernel-nvidia-gpu-confidential-headers
- pause-image
steps:
- uses: geekyeggo/delete-artifact@f275313e70c08f6120db482d7a6b98377786765b # v5.1.0
@@ -247,7 +246,6 @@ jobs:
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
remove-rootfs-binary-artifacts-for-release:
name: remove-rootfs-binary-artifacts-for-release
runs-on: ubuntu-22.04
needs: build-asset-rootfs
strategy:
@@ -261,7 +259,6 @@ jobs:
name: kata-artifacts-amd64-${{ matrix.asset}}${{ inputs.tarball-suffix }}
build-asset-shim-v2:
name: build-asset-shim-v2
runs-on: ubuntu-22.04
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts, remove-rootfs-binary-artifacts-for-release]
permissions:
@@ -323,7 +320,6 @@ jobs:
if-no-files-found: error
create-kata-tarball:
name: create-kata-tarball
runs-on: ubuntu-22.04
needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
permissions:
@@ -359,104 +355,3 @@ jobs:
path: kata-static.tar.zst
retention-days: 15
if-no-files-found: error
build-tools-asset:
name: build-tools-asset
runs-on: ubuntu-22.04
permissions:
contents: read
packages: write
strategy:
matrix:
asset:
- agent-ctl
- csi-kata-directvolume
- genpolicy
- kata-ctl
- kata-manager
- trace-forwarder
stage:
- ${{ inputs.stage }}
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0 # This is needed in order to keep the commit ids history
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Build ${{ matrix.asset }}
id: build
run: |
make "${KATA_ASSET}-tarball"
build_dir=$(readlink -f build)
# store-artifact does not work with symlink
mkdir -p kata-tools-build && cp "${build_dir}"/kata-static-"${KATA_ASSET}"*.tar.* kata-tools-build/.
env:
KATA_ASSET: ${{ matrix.asset }}
TAR_OUTPUT: ${{ matrix.asset }}.tar.gz
PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }}
ARTEFACT_REGISTRY: ghcr.io
ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }}
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: store-artifact ${{ matrix.asset }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-tools-artifacts-amd64-${{ matrix.asset }}${{ inputs.tarball-suffix }}
path: kata-tools-build/kata-static-${{ matrix.asset }}.tar.zst
retention-days: 15
if-no-files-found: error
create-kata-tools-tarball:
name: create-kata-tools-tarball
runs-on: ubuntu-22.04
needs: [build-tools-asset]
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
fetch-tags: true
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
pattern: kata-tools-artifacts-amd64-*${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
merge-multiple: true
- name: merge-artifacts
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-tools-artifacts versions.yaml kata-tools-static.tar.zst
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-static.tar.zst
retention-days: 15
if-no-files-found: error

View File

@@ -23,15 +23,12 @@ on:
secrets:
QUAY_DEPLOYER_PASSWORD:
required: false
KBUILD_SIGN_PIN:
required: true
permissions: {}
jobs:
build-asset:
name: build-asset
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
permissions:
contents: read
packages: write
@@ -47,10 +44,10 @@ jobs:
- kernel
- kernel-dragonball-experimental
- kernel-nvidia-gpu
- kernel-cca-confidential
- nydus
- ovmf
- qemu
- stratovirt
- virtiofsd
env:
PERFORM_ATTESTATION: ${{ matrix.asset == 'agent' && inputs.push-to-registry == 'yes' && 'yes' || 'no' }}
@@ -90,19 +87,16 @@ jobs:
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
- name: Parse OCI image name and digest
id: parse-oci-segments
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
env:
KATA_ASSET: ${{ matrix.asset }}
run: |
oci_image="$(<"build/${KATA_ASSET}-oci-image")"
oci_image="$(<"build/${{ matrix.asset }}-oci-image")"
echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"
- uses: oras-project/setup-oras@22ce207df3b08e061f537244349aac6ae1d214f6 # v1.2.4
- uses: oras-project/setup-oras@5c0b487ce3fe0ce3ab0d034e63669e426e294e4d # v1.2.2
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
with:
version: "1.2.0"
@@ -134,14 +128,13 @@ jobs:
if: ${{ startsWith(matrix.asset, 'kernel-nvidia-gpu') }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-artifacts-arm64-${{ matrix.asset }}-modules${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-modules.tar.zst
name: kata-artifacts-arm64-${{ matrix.asset }}-headers${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-headers.tar.zst
retention-days: 15
if-no-files-found: error
build-asset-rootfs:
name: build-asset-rootfs
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
needs: build-asset
permissions:
contents: read
@@ -150,7 +143,6 @@ jobs:
matrix:
asset:
- rootfs-image
- rootfs-image-nvidia-gpu
- rootfs-initrd
- rootfs-initrd-nvidia-gpu
steps:
@@ -197,7 +189,6 @@ jobs:
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
- name: store-artifact ${{ matrix.asset }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
@@ -209,14 +200,13 @@ jobs:
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
remove-rootfs-binary-artifacts:
name: remove-rootfs-binary-artifacts
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
needs: build-asset-rootfs
strategy:
matrix:
asset:
- busybox
- kernel-nvidia-gpu-modules
- kernel-nvidia-gpu-headers
steps:
- uses: geekyeggo/delete-artifact@f275313e70c08f6120db482d7a6b98377786765b # v5.1.0
with:
@@ -224,8 +214,7 @@ jobs:
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
remove-rootfs-binary-artifacts-for-release:
name: remove-rootfs-binary-artifacts-for-release
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
needs: build-asset-rootfs
strategy:
matrix:
@@ -238,8 +227,7 @@ jobs:
name: kata-artifacts-arm64-${{ matrix.asset}}${{ inputs.tarball-suffix }}
build-asset-shim-v2:
name: build-asset-shim-v2
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts, remove-rootfs-binary-artifacts-for-release]
permissions:
contents: read
@@ -298,8 +286,7 @@ jobs:
if-no-files-found: error
create-kata-tarball:
name: create-kata-tarball
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
permissions:
contents: read

View File

@@ -28,11 +28,10 @@ permissions: {}
jobs:
build-asset:
name: build-asset
permissions:
contents: read
packages: write
runs-on: ubuntu-24.04-ppc64le
runs-on: ppc64le
strategy:
matrix:
asset:
@@ -88,8 +87,7 @@ jobs:
if-no-files-found: error
build-asset-rootfs:
name: build-asset-rootfs
runs-on: ubuntu-24.04-ppc64le
runs-on: ppc64le
needs: build-asset
permissions:
contents: read
@@ -155,7 +153,6 @@ jobs:
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
remove-rootfs-binary-artifacts:
name: remove-rootfs-binary-artifacts
runs-on: ubuntu-22.04
needs: build-asset-rootfs
strategy:
@@ -169,8 +166,7 @@ jobs:
name: kata-artifacts-ppc64le-${{ matrix.asset}}${{ inputs.tarball-suffix }}
build-asset-shim-v2:
name: build-asset-shim-v2
runs-on: ubuntu-24.04-ppc64le
runs-on: ppc64le
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
permissions:
contents: read
@@ -229,8 +225,7 @@ jobs:
if-no-files-found: error
create-kata-tarball:
name: create-kata-tarball
runs-on: ubuntu-24.04-ppc64le
runs-on: ppc64le
needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
permissions:
contents: read

View File

@@ -20,12 +20,14 @@ on:
required: false
type: string
default: ""
secrets:
QUAY_DEPLOYER_PASSWORD:
required: true
permissions: {}
jobs:
build-asset:
name: build-asset
runs-on: riscv-builder
permissions:
contents: read
@@ -38,6 +40,14 @@ jobs:
- kernel
- virtiofsd
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
@@ -71,5 +81,5 @@ jobs:
with:
name: kata-artifacts-riscv64-${{ matrix.asset }}${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}.tar.zst
retention-days: 3
retention-days: 15
if-no-files-found: error

View File

@@ -31,8 +31,7 @@ permissions: {}
jobs:
build-asset:
name: build-asset
runs-on: ubuntu-24.04-s390x
runs-on: s390x
permissions:
contents: read
packages: write
@@ -91,10 +90,8 @@ jobs:
- name: Parse OCI image name and digest
id: parse-oci-segments
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
env:
ASSET: ${{ matrix.asset }}
run: |
oci_image="$(<"build/${ASSET}-oci-image")"
oci_image="$(<"build/${{ matrix.asset }}-oci-image")"
echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"
@@ -122,7 +119,6 @@ jobs:
if-no-files-found: error
build-asset-rootfs:
name: build-asset-rootfs
runs-on: s390x
needs: build-asset
permissions:
@@ -190,7 +186,6 @@ jobs:
if-no-files-found: error
build-asset-boot-image-se:
name: build-asset-boot-image-se
runs-on: s390x
needs: [build-asset, build-asset-rootfs]
permissions:
@@ -240,7 +235,6 @@ jobs:
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
remove-rootfs-binary-artifacts:
name: remove-rootfs-binary-artifacts
runs-on: ubuntu-22.04
needs: [build-asset-rootfs, build-asset-boot-image-se]
strategy:
@@ -256,8 +250,7 @@ jobs:
name: kata-artifacts-s390x-${{ matrix.asset}}${{ inputs.tarball-suffix }}
build-asset-shim-v2:
name: build-asset-shim-v2
runs-on: ubuntu-24.04-s390x
runs-on: s390x
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
permissions:
contents: read
@@ -318,8 +311,7 @@ jobs:
if-no-files-found: error
create-kata-tarball:
name: create-kata-tarball
runs-on: ubuntu-24.04-s390x
runs-on: s390x
needs:
- build-asset
- build-asset-rootfs

View File

@@ -1,75 +0,0 @@
name: Build kubectl multi-arch image
on:
schedule:
# Run every Sunday at 00:00 UTC
- cron: '0 0 * * 0'
workflow_dispatch:
# Allow manual triggering
push:
branches:
- main
paths:
- 'tools/packaging/kubectl/Dockerfile'
- '.github/workflows/build-kubectl-image.yaml'
permissions: {}
env:
REGISTRY: quay.io
IMAGE_NAME: kata-containers/kubectl
jobs:
build-and-push:
name: Build and push multi-arch image
runs-on: ubuntu-24.04
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Set up QEMU
uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Login to Quay.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ${{ env.REGISTRY }}
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- name: Get kubectl version
id: kubectl-version
run: |
KUBECTL_VERSION=$(curl -L -s https://dl.k8s.io/release/stable.txt)
echo "version=${KUBECTL_VERSION}" >> "$GITHUB_OUTPUT"
- name: Generate image metadata
id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=raw,value=latest
type=raw,value={{date 'YYYYMMDD'}}
type=raw,value=${{ steps.kubectl-version.outputs.version }}
type=sha,prefix=
- name: Build and push multi-arch image
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
with:
context: tools/packaging/kubectl/
file: tools/packaging/kubectl/Dockerfile
platforms: linux/amd64,linux/arm64,linux/s390x,linux/ppc64le
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@@ -15,7 +15,6 @@ permissions: {}
jobs:
cargo-deny-runner:
name: cargo-deny-runner
runs-on: ubuntu-22.04
steps:

View File

@@ -1,34 +0,0 @@
on:
schedule:
- cron: '0 5 * * *'
name: Nightly CI for RISC-V
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
build-kata-static-tarball-riscv:
permissions:
contents: read
packages: write
id-token: write
attestations: write
uses: ./.github/workflows/build-kata-static-tarball-riscv64.yaml
with:
tarball-suffix: -${{ github.sha }}
commit-hash: ${{ github.sha }}
target-branch: ${{ github.ref_name }}
build-checks-preview:
strategy:
fail-fast: false
matrix:
instance:
- "riscv-builder"
uses: ./.github/workflows/build-checks-preview-riscv64.yaml
with:
instance: ${{ matrix.instance }}

View File

@@ -8,7 +8,6 @@ permissions: {}
jobs:
check-internal-test-result:
name: check-internal-test-result
runs-on: s390x
strategy:
fail-fast: false

View File

@@ -1,6 +1,6 @@
name: Kata Containers CI
on:
pull_request_target: # zizmor: ignore[dangerous-triggers] See #11332.
pull_request_target:
branches:
- 'main'
types:

View File

@@ -66,7 +66,6 @@ jobs:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
build-and-publish-tee-confidential-unencrypted-image:
name: build-and-publish-tee-confidential-unencrypted-image
permissions:
contents: read
packages: write

View File

@@ -86,8 +86,6 @@ jobs:
tarball-suffix: -${{ inputs.tag }}
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
secrets:
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
publish-kata-deploy-payload-arm64:
needs: build-kata-static-tarball-arm64
@@ -102,7 +100,7 @@ jobs:
tag: ${{ inputs.tag }}-arm64
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
runner: ubuntu-24.04-arm
runner: ubuntu-22.04-arm
arch: arm64
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -134,6 +132,20 @@ jobs:
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
build-kata-static-tarball-riscv64:
permissions:
contents: read
packages: write
id-token: write
attestations: write
uses: ./.github/workflows/build-kata-static-tarball-riscv64.yaml
with:
tarball-suffix: -${{ inputs.tag }}
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
publish-kata-deploy-payload-s390x:
needs: build-kata-static-tarball-s390x
permissions:
@@ -147,7 +159,7 @@ jobs:
tag: ${{ inputs.tag }}-s390x
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
runner: ubuntu-24.04-s390x
runner: s390x
arch: s390x
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -165,13 +177,12 @@ jobs:
tag: ${{ inputs.tag }}-ppc64le
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
runner: ubuntu-24.04-ppc64le
runner: ppc64le
arch: ppc64le
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
build-and-publish-tee-confidential-unencrypted-image:
name: build-and-publish-tee-confidential-unencrypted-image
permissions:
contents: read
packages: write
@@ -213,7 +224,6 @@ jobs:
file: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile
publish-csi-driver-amd64:
name: publish-csi-driver-amd64
needs: build-kata-static-tarball-amd64
permissions:
contents: read
@@ -233,14 +243,14 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64-${{ inputs.tag }}
path: kata-tools-artifacts
name: kata-static-tarball-amd64-${{ inputs.tag }}
path: kata-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Install tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
- name: Copy binary into Docker context
run: |
@@ -297,6 +307,18 @@ jobs:
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
run-k8s-tests-on-amd64:
if: ${{ inputs.skip-test != 'yes' }}
needs: publish-kata-deploy-payload-amd64
uses: ./.github/workflows/run-k8s-tests-on-amd64.yaml
with:
registry: ghcr.io
repo: ${{ github.repository_owner }}/kata-deploy-ci
tag: ${{ inputs.tag }}-amd64
commit-hash: ${{ inputs.commit-hash }}
pr-number: ${{ inputs.pr-number }}
target-branch: ${{ inputs.target-branch }}
run-k8s-tests-on-arm64:
if: ${{ inputs.skip-test != 'yes' }}
needs: publish-kata-deploy-payload-arm64
@@ -314,7 +336,6 @@ jobs:
needs: publish-kata-deploy-payload-amd64
uses: ./.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
with:
tarball-suffix: -${{ inputs.tag }}
registry: ghcr.io
repo: ${{ github.repository_owner }}/kata-deploy-ci
tag: ${{ inputs.tag }}-amd64
@@ -416,11 +437,13 @@ jobs:
{ containerd_version: lts, vmm: clh },
{ containerd_version: lts, vmm: dragonball },
{ containerd_version: lts, vmm: qemu },
{ containerd_version: lts, vmm: stratovirt },
{ containerd_version: lts, vmm: cloud-hypervisor },
{ containerd_version: lts, vmm: qemu-runtime-rs },
{ containerd_version: active, vmm: clh },
{ containerd_version: active, vmm: dragonball },
{ containerd_version: active, vmm: qemu },
{ containerd_version: active, vmm: stratovirt },
{ containerd_version: active, vmm: cloud-hypervisor },
{ containerd_version: active, vmm: qemu-runtime-rs },
]
@@ -468,13 +491,13 @@ jobs:
tarball-suffix: -${{ inputs.tag }}
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
runner: ppc64le-small
runner: ppc64le
arch: ppc64le
containerd_version: ${{ matrix.params.containerd_version }}
vmm: ${{ matrix.params.vmm }}
run-cri-containerd-tests-arm64:
if: false
if: ${{ inputs.skip-test != 'yes' }}
needs: build-kata-static-tarball-arm64
strategy:
fail-fast: false

View File

@@ -8,7 +8,6 @@ permissions: {}
jobs:
cleanup-resources:
name: cleanup-resources
runs-on: ubuntu-22.04
permissions:
id-token: write # Used for OIDC access to log into Azure

View File

@@ -15,17 +15,8 @@ concurrency:
name: Darwin tests
jobs:
test:
name: test
runs-on: macos-latest
steps:
- name: Install Protoc
run: |
f=$(mktemp)
curl -sSLo "$f" https://github.com/protocolbuffers/protobuf/releases/download/v28.2/protoc-28.2-osx-aarch_64.zip
mkdir -p "$HOME/.local"
unzip -d "$HOME/.local" "$f"
echo "$HOME/.local/bin" >> "${GITHUB_PATH}"
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -36,8 +27,5 @@ jobs:
./tests/install_go.sh -f -p
echo "/usr/local/go/bin" >> "${GITHUB_PATH}"
- name: Install Rust
run: ./tests/install_rust.sh
- name: Build utils
run: ./ci/darwin-test.sh

View File

@@ -1,14 +1,12 @@
on:
schedule:
- cron: '0 23 * * 0'
workflow_dispatch:
permissions: {}
name: Docs URL Alive Check
jobs:
test:
name: test
runs-on: ubuntu-22.04
# don't run this action on forks
if: github.repository_owner == 'kata-containers'
@@ -17,12 +15,13 @@ jobs:
steps:
- name: Set env
run: |
echo "GOPATH=${GITHUB_WORKSPACE}" >> "$GITHUB_ENV"
echo "GOPATH=${{ github.workspace }}" >> "$GITHUB_ENV"
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
persist-credentials: false
path: ./src/github.com/${{ github.repository }}
- name: Install golang
run: |
@@ -31,4 +30,4 @@ jobs:
- name: Docs URL Alive Check
run: |
make docs-url-alive-check
cd "${GOPATH}/src/github.com/${{ github.repository }}" && make docs-url-alive-check

View File

@@ -1,32 +0,0 @@
name: Documentation
on:
push:
branches:
- main
permissions: {}
jobs:
deploy-docs:
name: deploy-docs
permissions:
contents: read
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
steps:
- uses: actions/configure-pages@v5
- uses: actions/checkout@v5
with:
persist-credentials: false
- uses: actions/setup-python@v5
with:
python-version: 3.x
- run: pip install zensical
- run: zensical build --clean
- uses: actions/upload-pages-artifact@v4
with:
path: site
- uses: actions/deploy-pages@v4
id: deployment

View File

@@ -35,7 +35,6 @@ permissions: {}
jobs:
skipper:
name: skipper
runs-on: ubuntu-22.04
outputs:
skip_build: ${{ steps.skipper.outputs.skip_build }}

View File

@@ -5,14 +5,12 @@ name: Gatekeeper
# reporting the status.
on:
pull_request_target: # zizmor: ignore[dangerous-triggers] See #11332.
pull_request_target:
types:
- opened
- synchronize
- reopened
- edited
- labeled
- unlabeled
permissions: {}
@@ -22,7 +20,6 @@ concurrency:
jobs:
gatekeeper:
name: gatekeeper
runs-on: ubuntu-22.04
permissions:
actions: read

View File

@@ -7,7 +7,6 @@ permissions: {}
jobs:
govulncheck:
name: govulncheck
runs-on: ubuntu-22.04
strategy:
matrix:
@@ -40,14 +39,11 @@ jobs:
- name: Build runtime binaries
run: |
cd src/runtime
make "${MAKE_TARGET}"
make ${{ matrix.make_target }}
env:
MAKE_TARGET: ${{ matrix.make_target }}
SKIP_GO_VERSION_CHECK: "1"
- name: Run govulncheck on ${{ matrix.binary }}
env:
BINARY: ${{ matrix.binary }}
run: |
cd src/runtime
bash ../../tests/govulncheck-runner.sh "./${BINARY}"
bash ../../tests/govulncheck-runner.sh "./${{ matrix.binary }}"

View File

@@ -0,0 +1,40 @@
on:
pull_request:
types:
- opened
- edited
- reopened
- synchronize
permissions: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
kata-deploy-runtime-classes-check:
runs-on: ubuntu-22.04
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Ensure the split out runtime classes match the all-in-one file
run: |
pushd tools/packaging/kata-deploy/runtimeclasses/
echo "::group::Combine runtime classes"
for runtimeClass in $(find . -type f \( -name "*.yaml" -and -not -name "kata-runtimeClasses.yaml" \) | sort); do
echo "Adding ${runtimeClass} to the resultingRuntimeClasses.yaml"
cat "${runtimeClass}" >> resultingRuntimeClasses.yaml;
done
echo "::endgroup::"
echo "::group::Displaying the content of resultingRuntimeClasses.yaml"
cat resultingRuntimeClasses.yaml
echo "::endgroup::"
echo ""
echo "::group::Displaying the content of kata-runtimeClasses.yaml"
cat kata-runtimeClasses.yaml
echo "::endgroup::"
echo ""
diff resultingRuntimeClasses.yaml kata-runtimeClasses.yaml

View File

@@ -1,35 +0,0 @@
name: nydus-snapshotter-version-sync
on:
pull_request:
types:
- opened
- edited
- reopened
- synchronize
permissions: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
nydus-snapshotter-version-check:
name: nydus-snapshotter-version-check
runs-on: ubuntu-22.04
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Ensure nydus-snapshotter-version is in sync inside our repo
run: |
dockerfile_version=$(grep "ARG NYDUS_SNAPSHOTTER_VERSION" tools/packaging/kata-deploy/Dockerfile | cut -f2 -d'=')
versions_version=$(yq ".externals.nydus-snapshotter.version | explode(.)" versions.yaml)
if [[ "${dockerfile_version}" != "${versions_version}" ]]; then
echo "nydus-snapshotter version must be the same in the following places: "
echo "- versions.yaml: ${versions_version}"
echo "- tools/packaging/kata-deploy/Dockerfile: ${dockerfile_version}"
exit 1
fi

View File

@@ -39,7 +39,6 @@ jobs:
target-branch: ${{ github.ref_name }}
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
build-assets-s390x:
permissions:
@@ -97,7 +96,7 @@ jobs:
repo: kata-containers/kata-deploy-ci
tag: kata-containers-latest-arm64
target-branch: ${{ github.ref_name }}
runner: ubuntu-24.04-arm
runner: ubuntu-22.04-arm
arch: arm64
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -131,13 +130,12 @@ jobs:
repo: kata-containers/kata-deploy-ci
tag: kata-containers-latest-ppc64le
target-branch: ${{ github.ref_name }}
runner: ubuntu-24.04-ppc64le
runner: ppc64le
arch: ppc64le
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
publish-manifest:
name: publish-manifest
runs-on: ubuntu-22.04
permissions:
contents: read
@@ -162,42 +160,3 @@ jobs:
env:
KATA_DEPLOY_IMAGE_TAGS: "kata-containers-latest"
KATA_DEPLOY_REGISTRIES: "quay.io/kata-containers/kata-deploy-ci"
upload-helm-chart-tarball:
name: upload-helm-chart-tarball
needs: publish-manifest
runs-on: ubuntu-22.04
permissions:
packages: write # needed to push the helm chart to ghcr.io
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Install helm
uses: azure/setup-helm@fe7b79cd5ee1e45176fcad797de68ecaf3ca4814 # v4.2.0
id: install
- name: Login to the OCI registries
env:
QUAY_DEPLOYER_USERNAME: ${{ vars.QUAY_DEPLOYER_USERNAME }}
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
GITHUB_TOKEN: ${{ github.token }}
run: |
echo "${QUAY_DEPLOYER_PASSWORD}" | helm registry login quay.io --username "${QUAY_DEPLOYER_USERNAME}" --password-stdin
echo "${GITHUB_TOKEN}" | helm registry login ghcr.io --username "${GITHUB_ACTOR}" --password-stdin
- name: Push helm chart to the OCI registries
run: |
echo "Adjusting the Chart.yaml and values.yaml"
yq eval '.version = "0.0.0-dev" | .appVersion = "0.0.0-dev"' -i tools/packaging/kata-deploy/helm-chart/kata-deploy/Chart.yaml
yq eval '.image.reference = "quay.io/kata-containers/kata-deploy-ci" | .image.tag = "kata-containers-latest"' -i tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml
echo "Generating the chart package"
helm dependencies update tools/packaging/kata-deploy/helm-chart/kata-deploy
helm package tools/packaging/kata-deploy/helm-chart/kata-deploy
echo "Pushing the chart to the OCI registries"
helm push "kata-deploy-0.0.0-dev.tgz" oci://quay.io/kata-containers/kata-deploy-charts
helm push "kata-deploy-0.0.0-dev.tgz" oci://ghcr.io/kata-containers/kata-deploy-charts

View File

@@ -38,7 +38,6 @@ permissions: {}
jobs:
kata-payload:
name: kata-payload
permissions:
contents: read
packages: write
@@ -50,24 +49,6 @@ jobs:
fetch-depth: 0
persist-credentials: false
- name: Remove unnecessary directories to free up space
run: |
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/lib/jvm
sudo rm -rf /usr/share/swift
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/julia*
sudo rm -rf /opt/az
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /opt/microsoft
sudo rm -rf /opt/google
sudo rm -rf /usr/lib/firefox
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"

View File

@@ -29,7 +29,6 @@ jobs:
attestations: write
kata-deploy:
name: kata-deploy
needs: build-kata-static-tarball-amd64
permissions:
contents: read

View File

@@ -8,8 +8,6 @@ on:
secrets:
QUAY_DEPLOYER_PASSWORD:
required: true
KBUILD_SIGN_PIN:
required: true
permissions: {}
@@ -21,7 +19,6 @@ jobs:
stage: release
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
permissions:
contents: read
packages: write
@@ -29,12 +26,11 @@ jobs:
attestations: write
kata-deploy:
name: kata-deploy
needs: build-kata-static-tarball-arm64
permissions:
contents: read
packages: write
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
steps:
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0

View File

@@ -26,12 +26,11 @@ jobs:
attestations: write
kata-deploy:
name: kata-deploy
needs: build-kata-static-tarball-ppc64le
permissions:
contents: read
packages: write
runs-on: ubuntu-24.04-ppc64le
runs-on: ppc64le
steps:
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0

View File

@@ -30,12 +30,11 @@ jobs:
kata-deploy:
name: kata-deploy
needs: build-kata-static-tarball-s390x
permissions:
contents: read
packages: write
runs-on: ubuntu-24.04-s390x
runs-on: s390x
steps:
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0

View File

@@ -6,7 +6,6 @@ permissions: {}
jobs:
release:
name: release
runs-on: ubuntu-22.04
permissions:
contents: write # needed for the `gh release create` command
@@ -49,7 +48,6 @@ jobs:
target-arch: arm64
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
build-and-push-assets-s390x:
needs: release
@@ -79,7 +77,6 @@ jobs:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
publish-multi-arch-images:
name: publish-multi-arch-images
runs-on: ubuntu-22.04
needs: [build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x, build-and-push-assets-ppc64le]
permissions:
@@ -117,7 +114,6 @@ jobs:
KATA_DEPLOY_REGISTRIES: "quay.io/kata-containers/kata-deploy ghcr.io/kata-containers/kata-deploy"
upload-multi-arch-static-tarball:
name: upload-multi-arch-static-tarball
needs: [build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x, build-and-push-assets-ppc64le]
permissions:
contents: write # needed for the `gh release` commands
@@ -181,25 +177,7 @@ jobs:
GH_TOKEN: ${{ github.token }}
ARCHITECTURE: ppc64le
- name: Set KATA_TOOLS_STATIC_TARBALL env var
run: |
tarball=$(pwd)/kata-tools-static.tar.zst
echo "KATA_TOOLS_STATIC_TARBALL=${tarball}" >> "$GITHUB_ENV"
- name: Download amd64 tools artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64
- name: Upload amd64 static tarball tools to GitHub
run: |
./tools/packaging/release/release.sh upload-kata-tools-static-tarball
env:
GH_TOKEN: ${{ github.token }}
ARCHITECTURE: amd64
upload-versions-yaml:
name: upload-versions-yaml
needs: release
runs-on: ubuntu-22.04
permissions:
@@ -217,7 +195,6 @@ jobs:
GH_TOKEN: ${{ github.token }}
upload-cargo-vendored-tarball:
name: upload-cargo-vendored-tarball
needs: release
runs-on: ubuntu-22.04
permissions:
@@ -235,7 +212,6 @@ jobs:
GH_TOKEN: ${{ github.token }}
upload-libseccomp-tarball:
name: upload-libseccomp-tarball
needs: release
runs-on: ubuntu-22.04
permissions:
@@ -253,7 +229,6 @@ jobs:
GH_TOKEN: ${{ github.token }}
upload-helm-chart-tarball:
name: upload-helm-chart-tarball
needs: release
runs-on: ubuntu-22.04
permissions:
@@ -278,11 +253,10 @@ jobs:
- name: Login to the OCI registries
env:
QUAY_DEPLOYER_USERNAME: ${{ vars.QUAY_DEPLOYER_USERNAME }}
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
GITHUB_TOKEN: ${{ github.token }}
GITHUB_ACTOR: ${{ github.actor }}
run: |
echo "${QUAY_DEPLOYER_PASSWORD}" | helm registry login quay.io --username "${QUAY_DEPLOYER_USERNAME}" --password-stdin
echo "${GITHUB_TOKEN}" | helm registry login ghcr.io --username "${GITHUB_ACTOR}" --password-stdin
echo "${{ secrets.QUAY_DEPLOYER_PASSWORD }}" | helm registry login quay.io --username "${QUAY_DEPLOYER_USERNAME}" --password-stdin
echo "${{ github.token }}" | helm registry login ghcr.io --username "${GITHUB_ACTOR}" --password-stdin
- name: Push helm chart to the OCI registries
run: |
@@ -291,7 +265,6 @@ jobs:
helm push "kata-deploy-${release_version}.tgz" oci://ghcr.io/kata-containers/kata-deploy-charts
publish-release:
name: publish-release
needs: [ build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x, build-and-push-assets-ppc64le, publish-multi-arch-images, upload-multi-arch-static-tarball, upload-versions-yaml, upload-cargo-vendored-tarball, upload-libseccomp-tarball ]
runs-on: ubuntu-22.04
permissions:

View File

@@ -38,7 +38,6 @@ permissions: {}
jobs:
run-k8s-tests:
name: run-k8s-tests
strategy:
fail-fast: false
matrix:
@@ -49,6 +48,7 @@ jobs:
- dragonball
- qemu
- qemu-runtime-rs
- stratovirt
- cloud-hypervisor
instance-type:
- small
@@ -58,13 +58,16 @@ jobs:
vmm: clh
instance-type: small
genpolicy-pull-method: oci-distribution
auto-generate-policy: yes
- host_os: cbl-mariner
vmm: clh
instance-type: small
genpolicy-pull-method: containerd
auto-generate-policy: yes
- host_os: cbl-mariner
vmm: clh
instance-type: normal
auto-generate-policy: yes
runs-on: ubuntu-22.04
permissions:
contents: read
@@ -78,8 +81,10 @@ jobs:
KATA_HOST_OS: ${{ matrix.host_os }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: "vanilla"
USING_NFD: "false"
K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }}
GENPOLICY_PULL_METHOD: ${{ matrix.genpolicy-pull-method }}
AUTO_GENERATE_POLICY: ${{ matrix.auto-generate-policy }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -93,14 +98,14 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Install kata
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
- name: Download Azure CLI
uses: azure/setup-kubectl@776406bce94f63e41d621b960d78ee25c8b76ede # v4.0.1
@@ -135,19 +140,14 @@ jobs:
run: bash tests/integration/kubernetes/gha-run.sh get-cluster-credentials
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-aks
- name: Run tests
timeout-minutes: 60
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
with:
client-id: ${{ secrets.AZ_APPID }}

View File

@@ -0,0 +1,114 @@
name: CI | Run kubernetes tests on amd64
on:
workflow_call:
inputs:
registry:
required: true
type: string
repo:
required: true
type: string
tag:
required: true
type: string
pr-number:
required: true
type: string
commit-hash:
required: false
type: string
target-branch:
required: false
type: string
default: ""
permissions: {}
jobs:
run-k8s-tests-amd64:
strategy:
fail-fast: false
matrix:
vmm:
- clh #cloud-hypervisor
- dragonball
- fc #firecracker
- qemu
- cloud-hypervisor
container_runtime:
- containerd
snapshotter:
- devmapper
k8s:
- k3s
include:
- vmm: qemu
container_runtime: crio
snapshotter: ""
k8s: k0s
runs-on: ubuntu-22.04
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
DOCKER_TAG: ${{ inputs.tag }}
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: ${{ matrix.k8s }}
KUBERNETES_EXTRA_PARAMS: ${{ matrix.container_runtime != 'crio' && '' || '--cri-socket remote:unix:///var/run/crio/crio.sock --kubelet-extra-args --cgroup-driver="systemd"' }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
USING_NFD: "false"
K8S_TEST_HOST_TYPE: all
CONTAINER_RUNTIME: ${{ matrix.container_runtime }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Configure CRI-O
if: matrix.container_runtime == 'crio'
run: bash tests/integration/kubernetes/gha-run.sh setup-crio
- name: Deploy ${{ matrix.k8s }}
run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s
env:
CONTAINER_RUNTIME: ${{ matrix.container_runtime }}
- name: Configure the ${{ matrix.snapshotter }} snapshotter
if: matrix.snapshotter != ''
run: bash tests/integration/kubernetes/gha-run.sh configure-snapshotter
- name: Deploy Kata
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
- name: Install `bats`
run: bash tests/integration/kubernetes/gha-run.sh install-bats
- name: Run tests
timeout-minutes: 30
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Collect artifacts ${{ matrix.vmm }}
if: always()
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
continue-on-error: true
- name: Archive artifacts ${{ matrix.vmm }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: k8s-tests-${{ matrix.vmm }}-${{ matrix.snapshotter }}-${{ matrix.k8s }}-${{ inputs.tag }}
path: /tmp/artifacts
retention-days: 1
- name: Delete kata-deploy
if: always()
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh cleanup

View File

@@ -26,13 +26,11 @@ permissions: {}
jobs:
run-k8s-tests-on-arm64:
name: run-k8s-tests-on-arm64
strategy:
fail-fast: false
matrix:
vmm:
- qemu
- qemu-runtime-rs
k8s:
- kubeadm
runs-on: arm64-k8s
@@ -43,6 +41,7 @@ jobs:
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: ${{ matrix.k8s }}
USING_NFD: "false"
K8S_TEST_HOST_TYPE: all
TARGET_ARCH: "aarch64"
steps:
@@ -59,7 +58,7 @@ jobs:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
- name: Install `bats`
@@ -69,10 +68,6 @@ jobs:
timeout-minutes: 30
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Collect artifacts ${{ matrix.vmm }}
if: always()
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
@@ -87,5 +82,5 @@ jobs:
- name: Delete kata-deploy
if: always()
timeout-minutes: 15
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh cleanup

View File

@@ -1,10 +1,7 @@
name: CI | Run NVIDIA GPU kubernetes tests on amd64
name: CI | Run NVIDIA GPU kubernetes tests on arm64
on:
workflow_call:
inputs:
tarball-suffix:
required: true
type: string
registry:
required: true
type: string
@@ -32,24 +29,23 @@ permissions: {}
jobs:
run-nvidia-gpu-tests-on-amd64:
name: run-${{ matrix.environment.name }}-tests-on-amd64
strategy:
fail-fast: false
matrix:
environment: [
{ name: nvidia-gpu, vmm: qemu-nvidia-gpu, runner: amd64-nvidia-a100 },
{ name: nvidia-gpu-snp, vmm: qemu-nvidia-gpu-snp, runner: amd64-nvidia-h100-snp },
]
runs-on: ${{ matrix.environment.runner }}
vmm:
- qemu-nvidia-gpu
k8s:
- kubeadm
runs-on: amd64-nvidia-a100
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
DOCKER_TAG: ${{ inputs.tag }}
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.environment.vmm }}
KUBERNETES: kubeadm
KBS: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }}
K8S_TEST_HOST_TYPE: baremetal
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: ${{ matrix.k8s }}
USING_NFD: "false"
K8S_TEST_HOST_TYPE: all
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -63,69 +59,31 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Uninstall previous `kbs-client`
if: matrix.environment.name != 'nvidia-gpu'
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh uninstall-kbs-client
- name: Deploy CoCo KBS
if: matrix.environment.name != 'nvidia-gpu'
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
env:
NVIDIA_VERIFIER_MODE: remote
KBS_INGRESS: nodeport
- name: Install `kbs-client`
if: matrix.environment.name != 'nvidia-gpu'
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh install-kbs-client
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
- name: Install `bats`
run: bash tests/integration/kubernetes/gha-run.sh install-bats
- name: Run tests ${{ matrix.environment.vmm }}
- name: Run tests
timeout-minutes: 30
run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests
env:
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Collect artifacts ${{ matrix.environment.vmm }}
- name: Collect artifacts ${{ matrix.vmm }}
if: always()
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
continue-on-error: true
- name: Archive artifacts ${{ matrix.environment.vmm }}
- name: Archive artifacts ${{ matrix.vmm }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: k8s-tests-${{ matrix.environment.vmm }}-kubeadm-${{ inputs.tag }}
name: k8s-tests-${{ matrix.vmm }}-${{ matrix.k8s }}-${{ inputs.tag }}
path: /tmp/artifacts
retention-days: 1
- name: Delete kata-deploy
if: always()
timeout-minutes: 15
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh cleanup
- name: Delete CoCo KBS
if: always() && matrix.environment.name != 'nvidia-gpu'
timeout-minutes: 10
run: |
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs

View File

@@ -26,7 +26,6 @@ permissions: {}
jobs:
run-k8s-tests:
name: run-k8s-tests
strategy:
fail-fast: false
matrix:
@@ -34,7 +33,7 @@ jobs:
- qemu
k8s:
- kubeadm
runs-on: ppc64le-k8s
runs-on: k8s-ppc64le
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
@@ -43,6 +42,7 @@ jobs:
GOPATH: ${{ github.workspace }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: ${{ matrix.k8s }}
USING_NFD: "false"
TARGET_ARCH: "ppc64le"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -62,20 +62,19 @@ jobs:
./tests/install_go.sh -f -p
echo "/usr/local/go/bin" >> "$GITHUB_PATH"
- name: Prepare the runner for k8s test suite
run: bash "${HOME}/scripts/k8s_cluster_prepare.sh"
- name: Prepare the runner for k8s cluster creation
run: bash "${HOME}/scripts/k8s_cluster_cleanup.sh"
- name: Check if cluster is healthy to run the tests
run: bash "${HOME}/scripts/k8s_cluster_check.sh"
- name: Create k8s cluster using kubeadm
run: bash "${HOME}/scripts/k8s_cluster_create.sh"
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-kubeadm
- name: Run tests
timeout-minutes: 30
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Delete cluster and post cleanup actions
run: bash "${HOME}/scripts/k8s_cluster_cleanup.sh"

View File

@@ -29,7 +29,6 @@ permissions: {}
jobs:
run-k8s-tests:
name: run-k8s-tests
strategy:
fail-fast: false
matrix:
@@ -46,9 +45,11 @@ jobs:
include:
- snapshotter: devmapper
pull-type: default
using-nfd: true
deploy-cmd: configure-snapshotter
- snapshotter: nydus
pull-type: guest-pull
using-nfd: false
deploy-cmd: deploy-snapshotter
exclude:
- snapshotter: overlayfs
@@ -74,6 +75,7 @@ jobs:
KUBERNETES: ${{ matrix.k8s }}
PULL_TYPE: ${{ matrix.pull-type }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
USING_NFD: ${{ matrix.using-nfd }}
TARGET_ARCH: "s390x"
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
@@ -103,13 +105,11 @@ jobs:
# qemu-runtime-rs only works with overlayfs
# See: https://github.com/kata-containers/kata-containers/issues/10066
- name: Configure the ${{ matrix.snapshotter }} snapshotter
env:
DEPLOY_CMD: ${{ matrix.deploy-cmd }}
run: bash tests/integration/kubernetes/gha-run.sh "${DEPLOY_CMD}"
run: bash tests/integration/kubernetes/gha-run.sh ${{ matrix.deploy-cmd }}
if: ${{ matrix.snapshotter != 'overlayfs' }}
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-zvsi
- name: Uninstall previous `kbs-client`
@@ -131,18 +131,12 @@ jobs:
timeout-minutes: 60
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Delete kata-deploy
if: always()
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh cleanup-zvsi
- name: Delete CoCo KBS
if: always()
timeout-minutes: 10
run: |
if [ "${KBS}" == "true" ]; then
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs

View File

@@ -40,13 +40,11 @@ permissions: {}
jobs:
# Generate jobs for testing CoCo on non-TEE environments
run-stability-k8s-tests-coco-nontee:
name: run-stability-k8s-tests-coco-nontee
strategy:
fail-fast: false
matrix:
vmm:
- qemu-coco-dev
- qemu-coco-dev-runtime-rs
snapshotter:
- nydus
pull-type:
@@ -71,6 +69,7 @@ jobs:
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
USING_NFD: "false"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -84,14 +83,14 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Install kata
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
- name: Log into the Azure account
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
@@ -140,12 +139,7 @@ jobs:
timeout-minutes: 300
run: bash tests/stability/gha-stability-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
with:
client-id: ${{ secrets.AZ_APPID }}

View File

@@ -39,17 +39,17 @@ on:
permissions: {}
jobs:
run-k8s-tests-on-tee:
name: run-k8s-tests-on-tee
run-k8s-tests-on-tdx:
strategy:
fail-fast: false
matrix:
include:
- runner: tdx
vmm: qemu-tdx
- runner: sev-snp
vmm: qemu-snp
runs-on: ${{ matrix.runner }}
vmm:
- qemu-tdx
snapshotter:
- nydus
pull-type:
- guest-pull
runs-on: tdx
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
@@ -57,14 +57,15 @@ jobs:
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: "vanilla"
USING_NFD: "true"
KBS: "true"
K8S_TEST_HOST_TYPE: "baremetal"
KBS_INGRESS: "nodeport"
SNAPSHOTTER: "nydus"
PULL_TYPE: "guest-pull"
SNAPSHOTTER: ${{ matrix.snapshotter }}
PULL_TYPE: ${{ matrix.pull-type }}
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
GH_ITA_KEY: ${{ secrets.ITA_KEY }}
ITA_KEY: ${{ secrets.ITA_KEY }}
AUTO_GENERATE_POLICY: "yes"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -79,18 +80,13 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Deploy Snapshotter
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh deploy-snapshotter
- name: Deploy Kata
timeout-minutes: 20
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-tdx
- name: Uninstall previous `kbs-client`
timeout-minutes: 10
@@ -99,8 +95,6 @@ jobs:
- name: Deploy CoCo KBS
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
env:
ITA_KEY: ${{ env.KATA_HYPERVISOR == 'qemu-tdx' && env.GH_ITA_KEY || '' }}
- name: Install `kbs-client`
timeout-minutes: 10
@@ -114,21 +108,102 @@ jobs:
timeout-minutes: 100
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Delete kata-deploy
if: always()
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh cleanup
run: bash tests/integration/kubernetes/gha-run.sh cleanup-tdx
- name: Delete Snapshotter
if: always()
run: bash tests/integration/kubernetes/gha-run.sh cleanup-snapshotter
- name: Delete CoCo KBS
if: always()
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
- name: Delete CSI driver
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh delete-csi-driver
run-k8s-tests-sev-snp:
strategy:
fail-fast: false
matrix:
vmm:
- qemu-snp
snapshotter:
- nydus
pull-type:
- guest-pull
runs-on: sev-snp
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
DOCKER_TAG: ${{ inputs.tag }}
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBECONFIG: /home/kata/.kube/config
KUBERNETES: "vanilla"
USING_NFD: "false"
KBS: "true"
KBS_INGRESS: "nodeport"
K8S_TEST_HOST_TYPE: "baremetal"
SNAPSHOTTER: ${{ matrix.snapshotter }}
PULL_TYPE: ${{ matrix.pull-type }}
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
AUTO_GENERATE_POLICY: "yes"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
[[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] && echo "ITA_KEY=${GH_ITA_KEY}" >> "${GITHUB_ENV}"
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Deploy Snapshotter
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh deploy-snapshotter
- name: Deploy Kata
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-snp
- name: Uninstall previous `kbs-client`
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh uninstall-kbs-client
- name: Deploy CoCo KBS
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
- name: Install `kbs-client`
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh install-kbs-client
- name: Deploy CSI driver
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh deploy-csi-driver
- name: Run tests
timeout-minutes: 50
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Delete kata-deploy
if: always()
run: bash tests/integration/kubernetes/gha-run.sh cleanup-snp
- name: Delete Snapshotter
if: always()
run: bash tests/integration/kubernetes/gha-run.sh cleanup-snapshotter
- name: Delete CoCo KBS
if: always()
run: bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
- name: Delete CSI driver
timeout-minutes: 5
@@ -136,21 +211,15 @@ jobs:
# Generate jobs for testing CoCo on non-TEE environments
run-k8s-tests-coco-nontee:
name: run-k8s-tests-coco-nontee
strategy:
fail-fast: false
matrix:
vmm:
- qemu-coco-dev
- qemu-coco-dev-runtime-rs
snapshotter:
- nydus
pull-type:
- guest-pull
include:
- pull-type: experimental-force-guest-pull
vmm: qemu-coco-dev
snapshotter: ""
runs-on: ubuntu-22.04
permissions:
id-token: write # Used for OIDC access to log into Azure
@@ -170,12 +239,13 @@ jobs:
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
EXPERIMENTAL_FORCE_GUEST_PULL: ${{ matrix.pull-type == 'experimental-force-guest-pull' && matrix.vmm || '' }}
# Caution: current ingress controller used to expose the KBS service
# requires much vCPUs, lefting only a few for the tests. Depending on the
# host type chose it will result on the creation of a cluster with
# insufficient resources.
K8S_TEST_HOST_TYPE: "all"
USING_NFD: "false"
AUTO_GENERATE_POLICY: "yes"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -189,14 +259,14 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Install kata
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
- name: Log into the Azure account
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
@@ -225,12 +295,13 @@ jobs:
- name: Download credentials for the Kubernetes CLI to use them
run: bash tests/integration/kubernetes/gha-run.sh get-cluster-credentials
- name: Deploy Snapshotter
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh deploy-snapshotter
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-aks
env:
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: ${{ env.SNAPSHOTTER == 'nydus' }}
AUTO_GENERATE_POLICY: ${{ env.PULL_TYPE == 'experimental-force-guest-pull' && 'no' || 'yes' }}
- name: Deploy CoCo KBS
timeout-minutes: 10
@@ -253,7 +324,6 @@ jobs:
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
with:
client-id: ${{ secrets.AZ_APPID }}
@@ -262,104 +332,4 @@ jobs:
- name: Delete AKS cluster
if: always()
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh delete-cluster
# Generate jobs for testing CoCo on non-TEE environments with erofs-snapshotter
run-k8s-tests-coco-nontee-with-erofs-snapshotter:
name: run-k8s-tests-coco-nontee-with-erofs-snapshotter
strategy:
fail-fast: false
matrix:
vmm:
- qemu-coco-dev
snapshotter:
- erofs
pull-type:
- default
runs-on: ubuntu-24.04
environment: ci
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
DOCKER_TAG: ${{ inputs.tag }}
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
# Some tests rely on that variable to run (or not)
KBS: "false"
# Set the KBS ingress handler (empty string disables handling)
KBS_INGRESS: ""
KUBERNETES: "vanilla"
CONTAINER_ENGINE: "containerd"
CONTAINER_ENGINE_VERSION: "v2.2"
PULL_TYPE: ${{ matrix.pull-type }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: "true"
K8S_TEST_HOST_TYPE: "all"
# We are skipping the auto generated policy tests for now,
# but those should be enabled as soon as we work on that.
AUTO_GENERATE_POLICY: "no"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Remove unnecessary directories to free up space
run: |
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/lib/jvm
sudo rm -rf /usr/share/swift
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/julia*
sudo rm -rf /opt/az
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /opt/microsoft
sudo rm -rf /opt/google
sudo rm -rf /usr/lib/firefox
- name: Deploy kubernetes
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s
env:
GH_TOKEN: ${{ github.token }}
- name: Install `bats`
run: bash tests/integration/kubernetes/gha-run.sh install-bats
- name: Deploy Kata
timeout-minutes: 20
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
- name: Deploy CSI driver
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh deploy-csi-driver
- name: Run tests
timeout-minutes: 80
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests

View File

@@ -33,7 +33,6 @@ permissions: {}
jobs:
run-kata-deploy-tests:
name: run-kata-deploy-tests
strategy:
fail-fast: false
matrix:
@@ -59,6 +58,7 @@ jobs:
KATA_HOST_OS: ${{ matrix.host_os }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: "vanilla"
USING_NFD: "false"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -102,12 +102,7 @@ jobs:
- name: Run tests
run: bash tests/functional/kata-deploy/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
with:
client-id: ${{ secrets.AZ_APPID }}

View File

@@ -26,7 +26,6 @@ permissions: {}
jobs:
run-kata-deploy-tests:
name: run-kata-deploy-tests
strategy:
fail-fast: false
matrix:
@@ -45,6 +44,7 @@ jobs:
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: ${{ matrix.k8s }}
USING_NFD: "false"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -58,24 +58,6 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Remove unnecessary directories to free up space
run: |
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/lib/jvm
sudo rm -rf /usr/share/swift
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/julia*
sudo rm -rf /opt/az
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /opt/microsoft
sudo rm -rf /opt/google
sudo rm -rf /usr/lib/firefox
- name: Deploy ${{ matrix.k8s }}
run: bash tests/functional/kata-deploy/gha-run.sh deploy-k8s
@@ -84,7 +66,3 @@ jobs:
- name: Run tests
run: bash tests/functional/kata-deploy/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/functional/kata-deploy/gha-run.sh report-tests

View File

@@ -17,7 +17,6 @@ permissions: {}
jobs:
run-monitor:
name: run-monitor
strategy:
fail-fast: false
matrix:

View File

@@ -26,7 +26,6 @@ permissions: {}
jobs:
run-metrics:
name: run-metrics
strategy:
# We can set this to true whenever we're 100% sure that
# the all the tests are not flaky, otherwise we'll fail
@@ -44,6 +43,7 @@ jobs:
DOCKER_TAG: ${{ inputs.tag }}
GH_PR_NUMBER: ${{ inputs.pr-number }}
K8S_TEST_HOST_TYPE: "baremetal"
USING_NFD: "false"
KUBERNETES: kubeadm
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

53
.github/workflows/run-runk-tests.yaml vendored Normal file
View File

@@ -0,0 +1,53 @@
name: CI | Run runk tests
on:
workflow_call:
inputs:
tarball-suffix:
required: false
type: string
commit-hash:
required: false
type: string
target-branch:
required: false
type: string
default: ""
permissions: {}
jobs:
run-runk:
# Skip runk tests as we have no maintainers. TODO: Decide when to remove altogether
if: false
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: lts
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install dependencies
run: bash tests/integration/runk/gha-run.sh install-dependencies
env:
GH_TOKEN: ${{ github.token }}
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata
run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts
- name: Run runk tests
run: bash tests/integration/runk/gha-run.sh run

View File

@@ -18,7 +18,6 @@ concurrency:
jobs:
shellcheck:
name: shellcheck
runs-on: ubuntu-24.04
steps:
- name: Checkout the code

View File

@@ -19,7 +19,6 @@ concurrency:
jobs:
shellcheck-required:
name: shellcheck-required
runs-on: ubuntu-24.04
steps:
- name: Checkout the code

View File

@@ -6,21 +6,13 @@ on:
permissions: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
stale:
name: stale
runs-on: ubuntu-22.04
permissions:
actions: write # Needed to manage caches for state persistence across runs
pull-requests: write # Needed to add/remove labels, post comments, or close PRs
steps:
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
with:
stale-pr-message: 'This PR has been opened without activity for 180 days. Please comment on the issue or it will be closed in 7 days.'
stale-pr-message: 'This PR has been opened without with no activity for 180 days. Comment on the issue otherwise it will be closed in 7 days'
days-before-pr-stale: 180
days-before-pr-close: 7
days-before-issue-stale: -1

View File

@@ -28,9 +28,21 @@ jobs:
fail-fast: false
matrix:
instance:
- "ubuntu-24.04-arm"
- "ubuntu-24.04-s390x"
- "ubuntu-24.04-ppc64le"
- "ubuntu-22.04-arm"
- "s390x"
- "ppc64le"
uses: ./.github/workflows/build-checks.yaml
with:
instance: ${{ matrix.instance }}
build-checks-preview:
needs: skipper
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
strategy:
fail-fast: false
matrix:
instance:
- "riscv-builder"
uses: ./.github/workflows/build-checks-preview-riscv64.yaml
with:
instance: ${{ matrix.instance }}

View File

@@ -22,7 +22,6 @@ jobs:
target-branch: ${{ github.event.pull_request.base.ref }}
check-kernel-config-version:
name: check-kernel-config-version
needs: skipper
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
runs-on: ubuntu-22.04
@@ -55,7 +54,6 @@ jobs:
instance: ubuntu-22.04
build-checks-depending-on-kvm:
name: build-checks-depending-on-kvm
runs-on: ubuntu-22.04
needs: skipper
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
@@ -90,16 +88,13 @@ jobs:
- name: Running `${{ matrix.command }}` for ${{ matrix.component }}
run: |
export PATH="$PATH:${HOME}/.cargo/bin"
cd "${COMPONENT_PATH}"
eval "${COMMAND}"
cd ${{ matrix.component-path }}
${{ matrix.command }}
env:
COMMAND: ${{ matrix.command }}
COMPONENT_PATH: ${{ matrix.component-path }}
RUST_BACKTRACE: "1"
RUST_LIB_BACKTRACE: "0"
static-checks:
name: static-checks
runs-on: ubuntu-22.04
needs: skipper
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
@@ -122,13 +117,13 @@ jobs:
path: ./src/github.com/${{ github.repository }}
- name: Install yq
run: |
cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"
cd "${GOPATH}/src/github.com/${{ github.repository }}"
./ci/install_yq.sh
env:
INSTALL_IN_GOPATH: false
- name: Install golang
run: |
cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"
cd "${GOPATH}/src/github.com/${{ github.repository }}"
./tests/install_go.sh -f -p
echo "/usr/local/go/bin" >> "$GITHUB_PATH"
- name: Install system dependencies
@@ -136,7 +131,7 @@ jobs:
sudo apt-get update && sudo apt-get -y install moreutils hunspell hunspell-en-gb hunspell-en-us pandoc
- name: Install open-policy-agent
run: |
cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"
cd "${GOPATH}/src/github.com/${{ github.repository }}"
./tests/install_opa.sh
- name: Install regorus
env:
@@ -144,13 +139,11 @@ jobs:
ARTEFACT_REGISTRY_USERNAME: "${{ github.actor }}"
ARTEFACT_REGISTRY_PASSWORD: "${{ secrets.GITHUB_TOKEN }}"
run: |
"${GOPATH}/src/github.com/${GITHUB_REPOSITORY}/tests/install_regorus.sh"
"${GOPATH}/src/github.com/${{ github.repository }}/tests/install_regorus.sh"
- name: Run check
env:
CMD: ${{ matrix.cmd }}
run: |
export PATH="${PATH}:${GOPATH}/bin"
cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}" && ${CMD}
cd "${GOPATH}/src/github.com/${{ github.repository }}" && ${{ matrix.cmd }}
govulncheck:
needs: skipper
@@ -158,7 +151,6 @@ jobs:
uses: ./.github/workflows/govulncheck.yaml
codegen:
name: codegen
runs-on: ubuntu-22.04
needs: skipper
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}

View File

@@ -1,6 +1,8 @@
name: GHA security analysis
on:
push:
branches: ["main"]
pull_request:
permissions: {}
@@ -11,8 +13,10 @@ concurrency:
jobs:
zizmor:
name: zizmor
runs-on: ubuntu-22.04
permissions:
contents: read
security-events: write
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -21,9 +25,6 @@ jobs:
persist-credentials: false
- name: Run zizmor
uses: zizmorcore/zizmor-action@e673c3917a1aef3c65c972347ed84ccd013ecda4 # v0.2.0
uses: zizmorcore/zizmor-action@f52a838cfabf134edcbaa7c8b3677dde20045018 # v0.1.1
with:
advanced-security: false
annotations: true
persona: auditor
version: v1.13.0

3
.github/zizmor.yml vendored
View File

@@ -1,3 +0,0 @@
rules:
undocumented-permissions:
disable: true

2
.gitignore vendored
View File

@@ -18,5 +18,3 @@ src/tools/log-parser/kata-log-parser
tools/packaging/static-build/agent/install_libseccomp.sh
.envrc
.direnv
**/.DS_Store
site/

View File

@@ -1,140 +0,0 @@
[workspace.package]
authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
edition = "2018"
license = "Apache-2.0"
rust-version = "1.88"
[workspace]
members = [
# Dragonball
"src/dragonball",
"src/dragonball/dbs_acpi",
"src/dragonball/dbs_address_space",
"src/dragonball/dbs_allocator",
"src/dragonball/dbs_arch",
"src/dragonball/dbs_boot",
"src/dragonball/dbs_device",
"src/dragonball/dbs_interrupt",
"src/dragonball/dbs_legacy_devices",
"src/dragonball/dbs_pci",
"src/dragonball/dbs_tdx",
"src/dragonball/dbs_upcall",
"src/dragonball/dbs_utils",
"src/dragonball/dbs_virtio_devices",
# runtime-rs
"src/runtime-rs",
"src/runtime-rs/crates/agent",
"src/runtime-rs/crates/hypervisor",
"src/runtime-rs/crates/persist",
"src/runtime-rs/crates/resource",
"src/runtime-rs/crates/runtimes",
"src/runtime-rs/crates/service",
"src/runtime-rs/crates/shim",
"src/runtime-rs/crates/shim-ctl",
"src/runtime-rs/tests/utils",
]
resolver = "2"
# TODO: Add all excluded crates to root workspace
exclude = [
"src/agent",
"src/tools",
"src/libs",
# kata-deploy binary is standalone and has its own Cargo.toml for now
"tools/packaging/kata-deploy/binary",
# We are cloning and building rust packages under
# "tools/packaging/kata-deploy/local-build/build" folder, which may mislead
# those packages to think they are part of the kata root workspace
"tools/packaging/kata-deploy/local-build/build",
]
[workspace.dependencies]
# Rust-VMM crates
event-manager = "0.2.1"
kvm-bindings = "0.6.0"
kvm-ioctls = "=0.12.1"
linux-loader = "0.8.0"
seccompiler = "0.5.0"
vfio-bindings = "0.3.0"
vfio-ioctls = "0.1.0"
virtio-bindings = "0.1.0"
virtio-queue = "0.7.0"
vm-fdt = "0.2.0"
vm-memory = "0.10.0"
vm-superio = "0.5.0"
vmm-sys-util = "0.11.0"
# Local dependencies from Dragonball Sandbox crates
dragonball = { path = "src/dragonball" }
dbs-acpi = { path = "src/dragonball/dbs_acpi" }
dbs-address-space = { path = "src/dragonball/dbs_address_space" }
dbs-allocator = { path = "src/dragonball/dbs_allocator" }
dbs-arch = { path = "src/dragonball/dbs_arch" }
dbs-boot = { path = "src/dragonball/dbs_boot" }
dbs-device = { path = "src/dragonball/dbs_device" }
dbs-interrupt = { path = "src/dragonball/dbs_interrupt" }
dbs-legacy-devices = { path = "src/dragonball/dbs_legacy_devices" }
dbs-pci = { path = "src/dragonball/dbs_pci" }
dbs-tdx = { path = "src/dragonball/dbs_tdx" }
dbs-upcall = { path = "src/dragonball/dbs_upcall" }
dbs-utils = { path = "src/dragonball/dbs_utils" }
dbs-virtio-devices = { path = "src/dragonball/dbs_virtio_devices" }
# Local dependencies from runtime-rs
agent = { path = "src/runtime-rs/crates/agent" }
hypervisor = { path = "src/runtime-rs/crates/hypervisor" }
persist = { path = "src/runtime-rs/crates/persist" }
resource = { path = "src/runtime-rs/crates/resource" }
runtimes = { path = "src/runtime-rs/crates/runtimes" }
service = { path = "src/runtime-rs/crates/service" }
tests_utils = { path = "src/runtime-rs/tests/utils" }
ch-config = { path = "src/runtime-rs/crates/hypervisor/ch-config" }
common = { path = "src/runtime-rs/crates/runtimes/common" }
linux_container = { path = "src/runtime-rs/crates/runtimes/linux_container" }
virt_container = { path = "src/runtime-rs/crates/runtimes/virt_container" }
wasm_container = { path = "src/runtime-rs/crates/runtimes/wasm_container" }
# Local dependencies from `src/lib`
kata-sys-util = { path = "src/libs/kata-sys-util" }
kata-types = { path = "src/libs/kata-types", features = ["safe-path"] }
logging = { path = "src/libs/logging" }
protocols = { path = "src/libs/protocols", features = ["async"] }
runtime-spec = { path = "src/libs/runtime-spec" }
safe-path = { path = "src/libs/safe-path" }
shim-interface = { path = "src/libs/shim-interface" }
test-utils = { path = "src/libs/test-utils" }
# Outside dependencies
actix-rt = "2.7.0"
anyhow = "1.0"
async-trait = "0.1.48"
containerd-shim = { version = "0.10.0", features = ["async"] }
containerd-shim-protos = { version = "0.10.0", features = ["async"] }
go-flag = "0.1.0"
hyper = "0.14.20"
hyperlocal = "0.8.0"
lazy_static = "1.4"
libc = "0.2"
log = "0.4.14"
netns-rs = "0.1.0"
# Note: nix needs to stay sync'd with libs versions
nix = "0.26.4"
oci-spec = { version = "0.8.1", features = ["runtime"] }
protobuf = "3.7.2"
rand = "0.8.4"
serde = { version = "1.0.145", features = ["derive"] }
serde_json = "1.0.91"
sha2 = "0.10.9"
slog = "2.5.2"
slog-scope = "4.4.0"
strum = { version = "0.24.0", features = ["derive"] }
tempfile = "3.19.1"
thiserror = "1.0"
tokio = "1.46.1"
tracing = "0.1.41"
tracing-opentelemetry = "0.18.0"
ttrpc = "0.8.4"
url = "2.5.4"

View File

@@ -18,6 +18,7 @@ TOOLS =
TOOLS += agent-ctl
TOOLS += kata-ctl
TOOLS += log-parser
TOOLS += runk
TOOLS += trace-forwarder
STANDARD_TARGETS = build check clean install static-checks-build test vendor
@@ -49,14 +50,10 @@ docs-url-alive-check:
build-and-publish-kata-debug:
bash tools/packaging/kata-debug/kata-debug-build-and-upload-payload.sh ${KATA_DEBUG_REGISTRY} ${KATA_DEBUG_TAG}
docs-serve:
docker run --rm -p 8000:8000 -v ./docs:/docs:ro -v ${PWD}/zensical.toml:/zensical.toml:ro zensical/zensical serve --config-file /zensical.toml -a 0.0.0.0:8000
.PHONY: \
all \
kata-tarball \
install-tarball \
default \
static-checks \
docs-url-alive-check \
docs-serve
docs-url-alive-check

View File

@@ -139,6 +139,7 @@ The table below lists the remaining parts of the project:
| [`agent-ctl`](src/tools/agent-ctl) | utility | Tool that provides low-level access for testing the agent. |
| [`kata-ctl`](src/tools/kata-ctl) | utility | Tool that provides advanced commands and debug facilities. |
| [`trace-forwarder`](src/tools/trace-forwarder) | utility | Agent tracing helper. |
| [`runk`](src/tools/runk) | utility | Standard OCI container runtime based on the agent. |
| [`ci`](.github/workflows) | CI | Continuous Integration configuration files and scripts. |
| [`ocp-ci`](ci/openshift-ci/README.md) | CI | Continuous Integration configuration for the OpenShift pipelines. |
| [`katacontainers.io`](https://github.com/kata-containers/www.katacontainers.io) | Source for the [`katacontainers.io`](https://www.katacontainers.io) site. |

View File

@@ -1 +1 @@
3.26.0
3.20.0

View File

@@ -8,7 +8,6 @@ set -e
cidir=$(dirname "$0")
runtimedir=${cidir}/../src/runtime
genpolicydir=${cidir}/../src/tools/genpolicy
build_working_packages() {
# working packages:
@@ -41,11 +40,3 @@ build_working_packages() {
}
build_working_packages
build_genpolicy() {
echo "building genpolicy"
pushd "${genpolicydir}" &>/dev/null
make TRIPLE=aarch64-apple-darwin build
}
build_genpolicy

View File

@@ -11,10 +11,6 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${script_dir}/../tests/common.bash"
# Path to the ORAS cache helper for downloading tarballs (sourced when needed)
# Use ORAS_CACHE_HELPER env var (set by build.sh in Docker) or fallback to repo path
oras_cache_helper="${ORAS_CACHE_HELPER:-${script_dir}/../tools/packaging/scripts/download-with-oras-cache.sh}"
# The following variables if set on the environment will change the behavior
# of gperf and libseccomp configure scripts, that may lead this script to
# fail. So let's ensure they are unset here.
@@ -48,9 +44,6 @@ fi
gperf_tarball="gperf-${gperf_version}.tar.gz"
gperf_tarball_url="${gperf_url}/${gperf_tarball}"
# Use ORAS cache for gperf downloads (gperf upstream can be unreliable)
USE_ORAS_CACHE="${USE_ORAS_CACHE:-yes}"
# We need to build the libseccomp library from sources to create a static
# library for the musl libc.
# However, ppc64le, riscv64 and s390x have no musl targets in Rust. Hence, we do
@@ -75,23 +68,7 @@ trap finish EXIT
build_and_install_gperf() {
echo "Build and install gperf version ${gperf_version}"
mkdir -p "${gperf_install_dir}"
# Use ORAS cache if available and enabled
if [[ "${USE_ORAS_CACHE}" == "yes" ]] && [[ -f "${oras_cache_helper}" ]]; then
echo "Using ORAS cache for gperf download"
source "${oras_cache_helper}"
local cached_tarball
cached_tarball=$(download_component gperf "$(pwd)")
if [[ -f "${cached_tarball}" ]]; then
gperf_tarball="${cached_tarball}"
else
echo "ORAS cache download failed, falling back to direct download"
curl -sLO "${gperf_tarball_url}"
fi
else
curl -sLO "${gperf_tarball_url}"
fi
curl -sLO "${gperf_tarball_url}"
tar -xf "${gperf_tarball}"
pushd "gperf-${gperf_version}"
# Unset $CC for configure, we will always use native for gperf

View File

@@ -46,12 +46,16 @@ fi
[[ ${SELINUX_PERMISSIVE} == "yes" ]] && oc delete -f "${deployments_dir}/machineconfig_selinux.yaml.in"
# Delete kata-containers
helm uninstall kata-deploy --wait --namespace kube-system
pushd "${katacontainers_repo_dir}/tools/packaging/kata-deploy" || { echo "Failed to push to ${katacontainers_repo_dir}/tools/packaging/kata-deploy"; exit 125; }
oc delete -f kata-deploy/base/kata-deploy.yaml
oc -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
oc apply -f kata-cleanup/base/kata-cleanup.yaml
echo "Wait for all related pods to be gone"
( repeats=1; for _ in $(seq 1 600); do
oc get pods -l name="kubelet-kata-cleanup" --no-headers=true -n kube-system 2>&1 | grep "No resources found" -q && ((repeats++)) || repeats=1
[[ "${repeats}" -gt 5 ]] && echo kata-cleanup finished && break
sleep 1
done) || { echo "There are still some kata-cleanup related pods after 600 iterations"; oc get all -n kube-system; exit 1; }
oc delete -f kata-cleanup/base/kata-cleanup.yaml
oc delete -f kata-rbac/base/kata-rbac.yaml
oc delete -f runtimeclasses/kata-runtimeClasses.yaml

View File

@@ -43,22 +43,19 @@ WORKAROUND_9206_CRIO=${WORKAROUND_9206_CRIO:-no}
# Leverage kata-deploy to install Kata Containers in the cluster.
#
apply_kata_deploy() {
if ! command -v helm &>/dev/null; then
echo "Helm not installed, installing in current location..."
PATH=".:${PATH}"
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | HELM_INSTALL_DIR='.' bash -s -- --no-sudo
fi
local deploy_file="tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
pushd "${katacontainers_repo_dir}" || die
sed -ri "s#(\s+image:) .*#\1 ${KATA_DEPLOY_IMAGE}#" "${deploy_file}"
info "Applying kata-deploy"
oc apply -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
oc label --overwrite ns kube-system pod-security.kubernetes.io/enforce=privileged pod-security.kubernetes.io/warn=baseline pod-security.kubernetes.io/audit=baseline
local version chart
version='0.0.0-dev'
chart="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"
oc apply -f "${deploy_file}"
oc -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod
# Ensure any potential leftover is cleaned up ... and this secret usually is not in case of previous failures
oc delete secret sh.helm.release.v1.kata-deploy.v1 -n kube-system || true
echo "Installing kata using helm ${chart} ${version} (sha printed in helm output)"
helm install kata-deploy --wait --namespace kube-system --set "image.reference=${KATA_DEPLOY_IMAGE%%:*},image.tag=${KATA_DEPLOY_IMAGE##*:}" "${chart}" --version "${version}"
info "Adding the kata runtime classes"
oc apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
popd || die
}
@@ -177,13 +174,13 @@ wait_for_app_pods_message() {
local namespace="$5"
[[ -z "${pod_count}" ]] && pod_count=1
[[ -z "${timeout}" ]] && timeout=60
[[ -n "${namespace}" ]] && namespace=("-n" "${namespace}")
[[ -n "${namespace}" ]] && namespace=" -n ${namespace} "
local pod
local pods
local i
SECONDS=0
while :; do
mapfile -t pods < <(oc get pods -l app="${app}" --no-headers=true "${namespace[@]}" | awk '{print $1}')
mapfile -t pods < <(oc get pods -l app="${app}" --no-headers=true "${namespace}" | awk '{print $1}')
[[ "${#pods}" -ge "${pod_count}" ]] && break
if [[ "${SECONDS}" -gt "${timeout}" ]]; then
printf "Unable to find ${pod_count} pods for '-l app=\"${app}\"' in ${SECONDS}s (%s)" "${pods[@]}"
@@ -193,7 +190,7 @@ wait_for_app_pods_message() {
local log
for pod in "${pods[@]}"; do
while :; do
log=$(oc logs "${namespace[@]}" "${pod}")
log=$(oc logs "${namespace}" "${pod}")
echo "${log}" | grep "${message}" -q && echo "Found $(echo "${log}" | grep "${message}") in ${pod}'s log (${SECONDS})" && break;
if [[ "${SECONDS}" -gt "${timeout}" ]]; then
echo -n "Message '${message}' not present in '${pod}' pod of the '-l app=\"${app}\"' "

View File

@@ -12,33 +12,6 @@
SCRIPT_DIR=$(dirname "$0")
##################
# Helper functions
##################
# Sparse "git clone" supporting old git version
# $1 - origin
# $2 - revision
# $3- - sparse checkout paths
# Note: uses pushd to change into the clonned directory!
git_sparse_clone() {
local origin="$1"
local revision="$2"
shift 2
local sparse_paths=("$@")
local repo
repo=$(basename -s .git "${origin}")
git init "${repo}"
pushd "${repo}" || exit 1
git remote add origin "${origin}"
git fetch --depth 1 origin "${revision}"
git sparse-checkout init --cone
git sparse-checkout set "${sparse_paths[@]}"
git checkout FETCH_HEAD
}
###############################
# Disable security to allow e2e
###############################
@@ -143,50 +116,33 @@ az network vnet subnet update \
for NODE_NAME in $(kubectl get nodes -o jsonpath='{.items[*].metadata.name}'); do [[ "${NODE_NAME}" =~ 'worker' ]] && kubectl label node "${NODE_NAME}" node.kubernetes.io/worker=; done
# CAA artifacts
if [[ -z "${CAA_TAG}" ]]; then
if [[ -n "${CAA_IMAGE}" ]]; then
echo "CAA_IMAGE (${CAA_IMAGE}) is set but CAA_TAG isn't, which is not supported. Please specify both or none"
exit 1
fi
TAGS="$(curl https://quay.io/api/v1/repository/confidential-containers/cloud-api-adaptor/tag/?onlyActiveTags=true)"
DIGEST=$(echo "${TAGS}" | jq -r '.tags[] | select(.name | contains("latest-amd64")) | .manifest_digest')
CAA_TAG="$(echo "${TAGS}" | jq -r '.tags[] | select(.manifest_digest | contains("'"${DIGEST}"'")) | .name' | grep -v "latest")"
fi
if [[ -z "${CAA_IMAGE}" ]]; then
CAA_IMAGE="quay.io/confidential-containers/cloud-api-adaptor"
fi
CAA_IMAGE="quay.io/confidential-containers/cloud-api-adaptor"
TAGS="$(curl https://quay.io/api/v1/repository/confidential-containers/cloud-api-adaptor/tag/?onlyActiveTags=true)"
DIGEST=$(echo "${TAGS}" | jq -r '.tags[] | select(.name | contains("latest-amd64")) | .manifest_digest')
CAA_TAG="$(echo "${TAGS}" | jq -r '.tags[] | select(.manifest_digest | contains("'"${DIGEST}"'")) | .name' | grep -v "latest")"
# Get latest PP image
#
# You can list the CI images by:
# az sig image-version list-community --location "eastus" --public-gallery-name "cocopodvm-d0e4f35f-5530-4b9c-8596-112487cdea85" --gallery-image-definition "podvm_image0" --output table
# or the release images by:
# az sig image-version list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --gallery-image-definition "peerpod-podvm-fedora" --output table
# or the release debug images by:
# az sig image-version list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --gallery-image-definition "peerpod-podvm-fedora-debug" --output table
#
# Note there are other flavours of the released images, you can list them by:
# az sig image-definition list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --output table
if [[ -z "${PP_IMAGE_ID}" ]]; then
SUCCESS_TIME=$(curl -s \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/confidential-containers/cloud-api-adaptor/actions/workflows/azure-nightly-build.yml/runs?status=success" \
| jq -r '.workflow_runs[0].updated_at')
PP_IMAGE_ID="/CommunityGalleries/cocopodvm-d0e4f35f-5530-4b9c-8596-112487cdea85/Images/podvm_image0/Versions/$(date -u -jf "%Y-%m-%dT%H:%M:%SZ" "${SUCCESS_TIME}" "+%Y.%m.%d" 2>/dev/null || date -d "${SUCCESS_TIME}" +%Y.%m.%d)"
fi
SUCCESS_TIME=$(curl -s \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/confidential-containers/cloud-api-adaptor/actions/workflows/azure-nightly-build.yml/runs?status=success" \
| jq -r '.workflow_runs[0].updated_at')
PP_IMAGE_ID="/CommunityGalleries/cocopodvm-d0e4f35f-5530-4b9c-8596-112487cdea85/Images/podvm_image0/Versions/$(date -u -jf "%Y-%m-%dT%H:%M:%SZ" "${SUCCESS_TIME}" "+%Y.%m.%d" 2>/dev/null || date -d "${SUCCESS_TIME}" +%Y.%m.%d)"
echo "AZURE_REGION=\"${AZURE_REGION}\""
echo "PP_REGION=\"${PP_REGION}\""
echo "AZURE_RESOURCE_GROUP=\"${AZURE_RESOURCE_GROUP}\""
echo "PP_RESOURCE_GROUP=\"${PP_RESOURCE_GROUP}\""
echo "PP_SUBNET_ID=\"${PP_SUBNET_ID}\""
echo "CAA_IMAGE=\"${CAA_IMAGE}\""
echo "CAA_TAG=\"${CAA_TAG}\""
echo "PP_IMAGE_ID=\"${PP_IMAGE_ID}\""
echo "AZURE_REGION: \"${AZURE_REGION}\""
echo "PP_REGION: \"${PP_REGION}\""
echo "AZURE_RESOURCE_GROUP: \"${AZURE_RESOURCE_GROUP}\""
echo "PP_RESOURCE_GROUP: \"${PP_RESOURCE_GROUP}\""
echo "PP_SUBNET_ID: \"${PP_SUBNET_ID}\""
echo "CAA_TAG: \"${CAA_TAG}\""
echo "PP_IMAGE_ID: \"${PP_IMAGE_ID}\""
# Clone and configure caa
git_sparse_clone "https://github.com/confidential-containers/cloud-api-adaptor.git" "${CAA_GIT_SHA:-main}" "src/cloud-api-adaptor/install/"
echo "CAA_GIT_SHA=\"$(git rev-parse HEAD)\""
git clone --depth 1 --no-checkout https://github.com/confidential-containers/cloud-api-adaptor.git
pushd cloud-api-adaptor
git sparse-checkout init --cone
git sparse-checkout set src/cloud-api-adaptor/install/
git checkout
echo "CAA_GIT_SHA: \"$(git rev-parse HEAD)\""
pushd src/cloud-api-adaptor
cat <<EOF > install/overlays/azure/workload-identity.yaml
apiVersion: apps/v1
@@ -252,8 +208,12 @@ echo "AZURE_CLIENT_SECRET=${AZURE_CLIENT_SECRET}" >> install/overlays/azure/serv
echo "AZURE_TENANT_ID=${AZURE_TENANT_ID}" >> install/overlays/azure/service-principal.env
# Deploy Operator
git_sparse_clone "https://github.com/confidential-containers/operator" "${OPERATOR_SHA:-main}" "config/"
echo "OPERATOR_SHA=\"$(git rev-parse HEAD)\""
git clone --depth 1 --no-checkout https://github.com/confidential-containers/operator
pushd operator
git sparse-checkout init --cone
git sparse-checkout set "config/"
git checkout
echo "OPERATOR_SHA: \"$(git rev-parse HEAD)\""
oc apply -k "config/release"
oc apply -k "config/samples/ccruntime/peer-pods"
popd
@@ -267,7 +227,7 @@ popd
SECONDS=0
( while [[ "${SECONDS}" -lt 360 ]]; do
kubectl get runtimeclass | grep -q kata-remote && exit 0
done; exit 1 ) || { echo "kata-remote runtimeclass not initialized in 60s"; kubectl -n confidential-containers-system get all; echo; echo "kubectl -n confidential-containers-system describe all"; kubectl -n confidential-containers-system describe all; echo; echo CAA; kubectl -n confidential-containers-system logs daemonset.apps/cloud-api-adaptor-daemonset; echo pre-install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-pre-install-daemon; echo install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-daemon-install; exit 1; }
done; exit 1 ) || { echo "kata-remote runtimeclass not initialized in 60s"; kubectl -n confidential-containers-system get all; echo; echo CAA; kubectl -n confidential-containers-system logs daemonset.apps/cloud-api-adaptor-daemonset; echo pre-install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-pre-install-daemon; echo install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-daemon-install; exit 1; }
################

View File

@@ -125,7 +125,7 @@ If you want to enable SELinux in Permissive mode, add `enforcing=0` to the kerne
Enable full debug as follows:
```bash
$ sudo sed -i -E 's/^(\s*enable_debug\s*=\s*)false/\1true/' /etc/kata-containers/configuration.toml
$ sudo sed -i -e 's/^# *\(enable_debug\).*=.*$/\1 = true/g' /etc/kata-containers/configuration.toml
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug initcall_debug"/g' /etc/kata-containers/configuration.toml
```
@@ -450,7 +450,7 @@ You can build and install the guest kernel image as shown [here](../tools/packag
# Install a hypervisor
When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the
`QEMU` VMM is installed automatically. Cloud-Hypervisor, Firecracker and StratoVirt VMMs are available from the [release tarballs](https://github.com/kata-containers/kata-containers/releases), as well as through [`kata-deploy`](../tools/packaging/kata-deploy/helm-chart/README.md).
`QEMU` VMM is installed automatically. Cloud-Hypervisor, Firecracker and StratoVirt VMMs are available from the [release tarballs](https://github.com/kata-containers/kata-containers/releases), as well as through [`kata-deploy`](../tools/packaging/kata-deploy/README.md).
You may choose to manually build your VMM/hypervisor.
## Build a custom QEMU

View File

@@ -166,65 +166,19 @@ moment.
See [this issue](https://github.com/kata-containers/runtime/issues/2812) for more details.
[Another issue](https://github.com/kata-containers/kata-containers/issues/1728) focuses on the case of `emptyDir`.
### Kubernetes [hostPath][k8s-hostpath] volumes
## Host resource sharing
In Kata, Kubernetes hostPath volumes can mount host directories and
regular files into the guest VM via filesystem sharing, if it is enabled
through the `shared_fs` [configuration][runtime-config] flag.
By default:
- Non-TEE environment: Filesystem sharing is used to mount host files.
- TEE environment: Filesystem sharing is disabled. Instead, host files
are copied into the guest VM when the container starts, and file
changes are *not* synchronized between the host and the guest.
In some cases, the behavior of hostPath volumes in Kata is further
different compared to `runc` containers:
**Mounting host block devices**: When a hostPath volume is of type
[`BlockDevice`][k8s-blockdevice], Kata hotplugs the host block device
into the guest and exposes it directly to the container.
**Mounting guest devices**: When the source path of a hostPath volume is
under `/dev`, and the path either corresponds to a host device or is not
accessible by the Kata shim, the Kata agent bind mounts the source path
directly from the *guest* filesystem into the container.
[runtime-config]: /src/runtime/README.md#configuration
[k8s-hostpath]: https://kubernetes.io/docs/concepts/storage/volumes/#hostpath
[k8s-blockdevice]: https://kubernetes.io/docs/concepts/storage/volumes/#hostpath-volume-types
### Mounting `procfs` and `sysfs`
For security reasons, the following mounts are disallowed:
| Type | Source | Destination | Rationale |
|-------------------|-----------|----------------------------------|----------------|
| `bind` | `!= proc` | `/proc` | CVE-2019-16884 |
| `bind` | `*` | `/proc/*` (see exceptions below) | CVE-2019-16884 |
| `proc \|\| sysfs` | `*` | not a directory (e.g. symlink) | CVE-2019-19921 |
For bind mounts under /proc, these destinations are allowed:
* `/proc/cpuinfo`
* `/proc/diskstats`
* `/proc/meminfo`
* `/proc/stat`
* `/proc/swaps`
* `/proc/uptime`
* `/proc/loadavg`
* `/proc/net/dev`
## Privileged containers
### Privileged containers
Privileged support in Kata is essentially different from `runc` containers.
The container runs with elevated capabilities within the guest.
The container runs with elevated capabilities within the guest and is granted
access to guest devices instead of the host devices.
This is also true with using `securityContext privileged=true` with Kubernetes.
Importantly, the default behavior to pass the host devices to a
privileged container is not supported in Kata Containers and needs to be
disabled, see [Privileged Kata Containers](how-to/privileged.md).
The container may also be granted full access to a subset of host devices
(https://github.com/kata-containers/runtime/issues/1568).
See [Privileged Kata Containers](how-to/privileged.md) for how to configure some of this behavior.
# Appendices

View File

@@ -83,7 +83,3 @@ Documents that help to understand and contribute to Kata Containers.
If you have a suggestion for how we can improve the
[website](https://katacontainers.io), please raise an issue (or a PR) on
[the repository that holds the source for the website](https://github.com/OpenStackweb/kata-netlify-refresh).
### Toolchain Guidance
* [Toolchain Guidance](./Toochain-Guidance.md)

View File

@@ -1,39 +0,0 @@
# Toolchains
As a community we want to strike a balance between having up-to-date toolchains, to receive the
latest security fixes and to be able to benefit from new features and packages, whilst not being
too bleeding edge and disrupting downstream and other consumers. As a result we have the following
guidelines (note, not hard rules) for our go and rust toolchains that we are attempting to try out:
## Go toolchain
Go is released [every six months](https://go.dev/wiki/Go-Release-Cycle) with support for the
[last two major release versions](https://go.dev/doc/devel/release#policy). We always want to
ensure that we are on a supported version so we receive security fixes. To try and make
things easier for some of our users, we aim to be using the older of the two supported major
versions, unless there is a compelling reason to adopt the newer version.
In practice this means that we bump our major version of the go toolchain every six months to
version (1.x-1) in response to a new version (1.x) coming out, which makes our current version
(1.x-2) no longer supported. We will bump the minor version whenever required to satisfy
dependency updates, or security fixes.
Our go toolchain version is recorded in [`versions.yaml`](../versions.yaml) under
`.languages.golang.version` and should match with the version in our `go.mod` files.
## Rust toolchain
Rust has a [six week](https://doc.rust-lang.org/book/appendix-05-editions.html#:~:text=The%20Rust%20language%20and%20compiler,these%20tiny%20changes%20add%20up.)
release cycle and they only support the latest stable release, so if we wanted to remain on a
supported release we would only ever build with the latest stable and bump every 6 weeks.
However feedback from our community has indicated that this is a challenge as downstream consumers
often want to get rust from their distro, or downstream fork and these struggle to keep up with
the six week release schedule. As a result the community has agreed to try out a policy of
"stable-2", where we aim to build with a rust version that is two versions behind the latest stable
version.
In practice this should mean that we bump our rust toolchain every six weeks, to version
1.x-2 when 1.x is released as stable and we should be picking up the latest point release
of that version, if there were any.
The rust-toolchain that we are using is recorded in [`rust-toolchain.toml`](../rust-toolchain.toml).

View File

@@ -198,7 +198,7 @@ fn join_params_with_dash(str: &str, num: i32) -> Result<String> {
return Err("number must be positive");
}
let result = format!("{str}-{num}");
let result = format!("{}-{}", str, num);
Ok(result)
}
@@ -253,13 +253,13 @@ mod tests {
// Run the tests
for (i, d) in tests.iter().enumerate() {
// Create a string containing details of the test
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
// Call the function under test
let result = join_params_with_dash(d.str, d.num);
// Update the test details string with the results of the call
let msg = format!("{msg}, result: {result:?}");
let msg = format!("{}, result: {:?}", msg, result);
// Perform the checks
if d.result.is_ok() {
@@ -267,8 +267,8 @@ mod tests {
continue;
}
let expected_error = format!("{d.result.as_ref().unwrap_err()}");
let actual_error = format!("{result.unwrap_err()}");
let expected_error = format!("{}", d.result.as_ref().unwrap_err());
let actual_error = format!("{}", result.unwrap_err());
assert!(actual_error == expected_error, msg);
}
}

View File

@@ -1,9 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
<!-- Dark background matching the site -->
<rect width="32" height="32" rx="4" fill="#1a1a2e"/>
<!-- Kata logo scaled and centered -->
<g transform="translate(-27, -2) scale(0.75)">
<path d="M70.925 25.22L58.572 37.523 46.27 25.22l2.192-2.192 10.11 10.11 10.11-10.11zm-6.575-.2l-3.188-3.188 3.188-3.188 3.188 3.188zm-4.93-2.54l3.736 3.736-3.736 3.736zm-1.694 7.422l-8.07-8.07 8.07-8.07zm1.694-16.14l3.686 3.686-3.686 3.686zm-13.15 4.682L58.572 6.143l12.353 12.303-2.192 2.192-10.16-10.11-10.11 10.11zm26.997 0L58.572 3.752 43.878 18.446l3.387 3.387-3.387 3.387 14.694 14.694L73.266 25.22l-3.337-3.387z" fill="#f15b3e"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 710 B

View File

@@ -51,7 +51,6 @@ containers started after the VM has been launched.
Users can check to see if the container uses the `devicemapper` block
device as its rootfs by calling `mount(8)` within the container. If
the `devicemapper` block device is used, the root filesystem (`/`)
will be mounted from `/dev/vda`. Users can enable direct mounting of
the underlying block device by setting the runtime
[configuration](README.md#configuration) flag `disable_block_device_use` to
`false`.
will be mounted from `/dev/vda`. Users can disable direct mounting of
the underlying block device through the runtime
[configuration](README.md#configuration).

View File

@@ -31,7 +31,6 @@
- [Setting Sysctls with Kata](how-to-use-sysctls-with-kata.md)
- [What Is VMCache and How To Enable It](what-is-vm-cache-and-how-do-I-use-it.md)
- [What Is VM Templating and How To Enable It](what-is-vm-templating-and-how-do-I-use-it.md)
- [How to Use Template in runtime-rs](how-to-use-template-in-runtime-rs.md)
- [Privileged Kata Containers](privileged.md)
- [How to load kernel modules in Kata Containers](how-to-load-kernel-modules-with-kata.md)
- [How to use Kata Containers with `virtio-mem`](how-to-use-virtio-mem-with-kata.md)
@@ -49,4 +48,3 @@
- [How to use the Kata Agent Policy](how-to-use-the-kata-agent-policy.md)
- [How to pull images in the guest](how-to-pull-images-in-guest-with-kata.md)
- [How to use mem-agent to decrease the memory usage of Kata container](how-to-use-memory-agent.md)
- [How to use seccomp with runtime-rs](how-to-use-seccomp-with-runtime-rs.md)

View File

@@ -256,7 +256,7 @@ spec:
values:
- NODE_NAME
volumes:
- name: trusted-image-storage
- name: trusted-storage
persistentVolumeClaim:
claimName: trusted-pvc
containers:

View File

@@ -318,7 +318,7 @@ Finally, an operational kata container with IBM Secure Execution is now running.
It is reasonable to expect that the manual steps mentioned above can be easily executed.
Typically, you can use
[kata-deploy](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/README.md)
[kata-deploy](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/README.md)
to install Kata Containers on a Kubernetes cluster. However, when leveraging IBM Secure Execution,
you need to employ the confidential container's
[operator](https://github.com/confidential-containers/operator).

View File

@@ -50,7 +50,7 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor |
| `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` |
| `io.katacontainers.config.hypervisor.default_vcpus` | float32| the default vCPUs assigned for a VM by the hypervisor |
| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disable hotplugging host block devices to guest VMs for container rootfs |
| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disallow a block device from being used |
| `io.katacontainers.config.hypervisor.disable_image_nvdimm` | `boolean` | specify if a `nvdimm` device should be used as rootfs for the guest (QEMU) |
| `io.katacontainers.config.hypervisor.disable_vhost_net` | `boolean` | specify if `vhost-net` is not available on the host |
| `io.katacontainers.config.hypervisor.enable_hugepages` | `boolean` | if the memory should be `pre-allocated` from huge pages |
@@ -97,8 +97,6 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.use_legacy_serial` | `boolean` | uses legacy serial device for guest's console (QEMU) |
| `io.katacontainers.config.hypervisor.default_gpus` | uint32 | the minimum number of GPUs required for the VM. Only used by remote hypervisor to help with instance selection |
| `io.katacontainers.config.hypervisor.default_gpu_model` | string | the GPU model required for the VM. Only used by remote hypervisor to help with instance selection |
| `io.katacontainers.config.hypervisor.block_device_num_queues` | `usize` | The number of queues to use for block devices (runtime-rs only) |
| `io.katacontainers.config.hypervisor.block_device_queue_size` | uint32 | The size of the of the queue to use for block devices (runtime-rs only) |
## Container Options
| Key | Value Type | Comments |

View File

@@ -104,20 +104,12 @@ LOW_WATER_MARK=32768
sudo dmsetup create "${POOL_NAME}" \
--table "0 ${LENGTH_IN_SECTORS} thin-pool ${META_DEV} ${DATA_DEV} ${DATA_BLOCK_SIZE} ${LOW_WATER_MARK}"
# Determine plugin name based on containerd config version
CONFIG_VERSION=$(containerd config dump | awk '/^version/ {print $3}')
if [ "$CONFIG_VERSION" -ge 2 ]; then
PLUGIN="io.containerd.snapshotter.v1.devmapper"
else
PLUGIN="devmapper"
fi
cat << EOF
#
# Add this to your config.toml configuration file and restart containerd daemon
#
[plugins]
[plugins."${PLUGIN}"]
[plugins.devmapper]
pool_name = "${POOL_NAME}"
root_path = "${DATA_DIR}"
base_image_size = "10GB"

View File

@@ -1,44 +0,0 @@
## Introduction
To enhance security, Kata Containers supports using seccomp to restrict the hypervisor's system calls. Previously, this was only supported for a subset of hypervisors in runtime-go. Now, the runtime-rs also supports seccomp. This document describes how to enable/disable the seccomp feature for the corresponding hypervisor in runtime-rs.
## Pre-requisites
1. Ensure your system's kernel supports **seccomp**.
2. Confirm that each of the following virtual machines can run correctly on your system.
## Configure seccomp
With the exception of `qemu`, seccomp is enabled by default for all other supported hypervisors. Their corresponding built-in functionalities are also enabled by default.
### QEMU
As with runtime-go, you need to modify the following in your **configuration file**. These parameters will be passed directly to the `qemu` startup command line. For more details on the parameters, you can refer to: [https://www.qemu.org/docs/master/system/qemu-manpage.html](https://www.qemu.org/docs/master/system/qemu-manpage.html)
``` toml
# Qemu seccomp sandbox feature
# comma-separated list of seccomp sandbox features to control the syscall access.
# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"`
# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
# Another note: enabling this feature may reduce performance, you may enable
# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
seccompsandbox="on,obsolete=deny,spawn=deny,resourcecontrol=deny"
```
### Cloud Hypervisor, Firecracker and Dragonball
The **seccomp** functionality is enabled by default for the following three hypervisors: `cloud hypervisor`, `firecracker`, and `dragonball`.
The seccomp rules for `cloud hypervisor` and `firecracker` are built directly into their executable files. For `dragonball`, the relevant configuration is currently located at `src/runtime-rs/crates/hypervisor/src/dragonball/seccomp.rs`.
To disable this functionality for these hypervisors, you can modify the following configuration options in your **configuration file**.
``` toml
# Disable the 'seccomp' feature from Cloud Hypervisor, firecracker or dragonball, default false
disable_seccomp = true
```
## Implementation details
For `qemu`, `cloud hypervisor`, and `firecracker`, their **seccomp** functionality is built into the respective executable files you are using. **runtime-rs** simply provides command-line arguments for their launch based on the configuration file.
For `dragonball`, a set of allowed system calls is currently provided for the entire **runtime-rs** process, and the process is prevented from using any system calls outside of this whitelist. As mentioned above, this set is located at `src/runtime-rs/crates/hypervisor/src/dragonball/seccomp.rs`.

View File

@@ -1,119 +0,0 @@
# How to Use Template in runtime-rs
## What is VM Templating
VM templating is a Kata Containers feature that enables new VM creation using a cloning technique. When enabled, new VMs are created by cloning from a pre-created template VM, and they will share the same initramfs, kernel and agent memory in readonly mode. It is very much like a process fork done by the kernel but here we *fork* VMs.
For more details on VM templating, refer to the [What is VM templating and how do I use it](./what-is-vm-templating-and-how-do-I-use-it.md) article.
## How to Enable VM Templating
VM templating can be enabled by changing your Kata Containers config file (`/opt/kata/share/defaults/kata-containers/runtime-rs/configuration.toml`, overridden by `/etc/kata-containers/configuration.toml` if provided) such that:
- `qemu` version `v4.1.0` or above is specified in `hypervisor.qemu`->`path` section
- `enable_template = true`
- `template_path = "/run/vc/vm/template"` (default value, can be customized as needed)
- `initrd =` is set
- `image =` option is commented out or removed
- `shared_fs =` option is commented out or removed
- `default_memory =` should be set to more than 256MB
Then you can create a VM template for later usage by calling:
### Initialize and create the VM template
The `factory init` command creates a VM template by launching a new VM, initializing the Kata Agent, then pausing and saving its state (memory and device snapshots) to the template directory. This saved template is used to rapidly clone new VMs using QEMU's memory sharing capabilities.
```bash
sudo kata-ctl factory init
```
### Check the status of the VM template
The `factory status` command checks whether a VM template currently exists by verifying the presence of template files (memory snapshot and device state). It will output "VM factory is on" if the template exists, or "VM factory is off" otherwise.
```bash
sudo kata-ctl factory status
```
### Destroy and clean up the VM template
The `factory destroy` command removes the VM template by remove the `tmpfs` filesystem and deleting the template directory along with all its contents.
```bash
sudo kata-ctl factory destroy
```
## How to Create a New VM from VM Template
In the Go version of Kata Containers, the VM templating mechanism is implemented using virtio-9p (9pfs). However, 9pfs is not supported in runtime-rs due to its poor performance, limited cache coherence, and security risks. Instead, runtime-rs adopts `VirtioFS` as the default mechanism to provide rootfs for containers and VMs.
Yet, when enabling the VM template mechanism, `VirtioFS` introduces conflicts in memory sharing because its DAX-based shared memory mapping overlaps with the template's page-sharing design. To resolve these conflicts and ensure strict isolation between cloned VMs, runtime-rs replaces `VirtioFS` with the snapshotter approach — specifically, the `blockfile` snapshotter.
The `blockfile` snapshotter is used in runtime-rs because it provides each VM with an independent block-based root filesystem, ensuring strong isolation and full compatibility with the VM templating mechanism.
### Configure Snapshotter
#### Check if `Blockfile` Snapshotter is Available
```bash
ctr plugins ls | grep blockfile
```
If not available, continue with the following steps:
#### Create Scratch File
```bash
dd if=/dev/zero of=/opt/containerd/blockfile bs=1M count=500
sudo mkfs.ext4 /opt/containerd/blockfile
```
#### Configure containerd
Edit the containerd configuration file:
```bash
sudo vim /etc/containerd/config.toml
```
Add or modify the following configuration for the `blockfile` snapshotter:
```toml
[plugins."io.containerd.snapshotter.v1.blockfile"]
scratch_file = "/opt/containerd/blockfile"
root_path = ""
fs_type = "ext4"
mount_options = []
recreate_scratch = true
```
#### Restart containerd
After modifying the configuration, restart containerd to apply changes:
```bash
sudo systemctl restart containerd
```
### Run Container with `blockfile` Snapshotter
After the VM template is created, you can pull an image and run a container using the `blockfile` snapshotter:
```bash
ctr run --rm -t --snapshotter blockfile docker.io/library/busybox:latest template sh
```
We can verify whether a VM was launched from a template or started normally by checking the launch parameters — if the parameters contain `incoming`, it indicates that the VM was started from a template rather than created directly.
## Performance Test
The comparative experiment between **template-based VM** creation and **direct VM** creation showed that the template-based approach achieved a ≈ **73.2%** reduction in startup latency (average launch time of **0.6s** vs. **0.82s**) and a ≈ **79.8%** reduction in memory usage (average memory usage of **178.2 MiB** vs. **223.2 MiB**), demonstrating significant improvements in VM startup efficiency and resource utilization.
The test script is as follows:
```bash
# Clear the page cache, dentries, and inodes to free up memory
echo 3 | sudo tee /proc/sys/vm/drop_caches
# Display the current memory usage
free -h
# Create 100 normal VMs and template-based VMs, and track the time
time for I in $(seq 100); do
echo -n " ${I}th" # Display the iteration number
ctr run -d --runtime io.containerd.kata.v2 --snapshotter blockfile docker.io/library/busybox:latest normal/template${I}
done
# Display the memory usage again after running the test
free -h

View File

@@ -32,24 +32,11 @@ Kubernetes users can encode in `base64` format their Policy documents, and add t
### Encode a Policy file
For example, the [`allow-all-except-exec-process.rego`](../../src/kata-opa/allow-all-except-exec-process.rego) sample policy file is different from the [default Policy](../../src/kata-opa/allow-all.rego) because it rejects any `ExecProcess` requests. To encode this policy file, you need to:
- Embed the policy inside an init data struct
- Compress
- Base64 encode
For example:
For example, the [`allow-all-except-exec-process.rego`](../../src/kata-opa/allow-all-except-exec-process.rego) sample policy file is different from the [default Policy](../../src/kata-opa/allow-all.rego) because it rejects any `ExecProcess` requests. You can encode this policy file:
```bash
$ STRING="$(< allow-all-except-exec-process.rego)"
$ cat <<EOF | gzip -c | base64 -w0
version = "0.1.0"
algorithm = "sha256"
[data]
"policy.rego" = '''
$STRING
'''
EOF
H4sIAAAAAAAAA42UTW/TQBCG7/4Vq/QQOCQKQXCo1ENIAkRqiGWnpBJCaGKP7RXrXTM7DnV/PRMiVUh07R582J3H8/XO7AnJa2fVjRrNpm+ms1EEpnSkuarPd76C+bv3oyj6lgPD92jUOKOzbkpYupEA4/E4ulJL13Sky4rVq+y1ms/mb9VWZ+S8K1iM1DgClijRlcBpvLqf3OoMrcfJJkfLutBI12rRQFbhZD6dCRfJ4SeUqOSz/OMSNopyLKA1rBZ5vkjiLyhBj458gr9a9KyubxRTi/9i6W9oQualcR5TzrUNElLZR20waCcExqWzDNoi9WMp2PzoHkLQSi7JdQPUJ+QtMuksWLQQu912fZK+BZHz7QolaRN0c6s9bywjFZBhL5W4lsPEFuvPjhvTlh+6mNwx2MudNdLDZXwnf4SYGFo/3O64NWZTy+SEgAQhT1lECQZKsHan4UgXLGUw+FWTzHjh0woIt661HGxJgh4xT0RoV6/w1IO19XAOKfJFTxmxva6DRQsX/12jIKBLC0Y0Er2DuUutxMM5nak9QaZt2cOwf4En1ww42nN3OK+w14/B4u+a/CWLesHWTYU1Eph+GS/w0470Y/1LcgDNA40/yKOMzw/tE7N+wOx/NwUYj9H5qf4DsX93tO4FAAA=
$ base64 -w 0 allow-all-except-exec-process.rego
cGFja2FnZSBhZ2VudF9wb2xpY3kKCmRlZmF1bHQgQWRkQVJQTmVpZ2hib3JzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgQWRkU3dhcFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENsb3NlU3RkaW5SZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBDb3B5RmlsZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZUNvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZVNhbmRib3hSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBEZXN0cm95U2FuZGJveFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEdldE1ldHJpY3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBHZXRPT01FdmVudFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEd1ZXN0RGV0YWlsc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IExpc3RJbnRlcmZhY2VzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgTGlzdFJvdXRlc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IE1lbUhvdHBsdWdCeVByb2JlUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgT25saW5lQ1BVTWVtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUGF1c2VDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBQdWxsSW1hZ2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBSZWFkU3RyZWFtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlU3RhbGVWaXJ0aW9mc1NoYXJlTW91bnRzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzZWVkUmFuZG9tRGV2UmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzdW1lQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU2V0R3Vlc3REYXRlVGltZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNldFBvbGljeVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNpZ25hbFByb2Nlc3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBTdGFydENvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXJ0VHJhY2luZ1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXRzQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU3RvcFRyYWNpbmdSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBUdHlXaW5SZXNpemVSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVFcGhlbWVyYWxNb3VudHNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVJbnRlcmZhY2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVSb3V0ZXNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBXYWl0UHJvY2Vzc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFdyaXRlU3RyZWFtUmVxdWVzdCA6PSB0cnVlCgpkZWZhdWx0IEV4ZWNQcm9jZXNzUmVxdWVzdCA6PSBmYWxzZQo=
```
### Attach the Policy to a pod
@@ -62,7 +49,7 @@ kind: Pod
metadata:
name: policy-exec-rejected
annotations:
io.katacontainers.config.hypervisor.cc_init_data: H4sIAAAAAAAAA42UTW/TQBCG7/4Vq/QQOCQKQXCo1ENIAkRqiGWnpBJCaGKP7RXrXTM7DnV/PRMiVUh07R582J3H8/XO7AnJa2fVjRrNpm+ms1EEpnSkuarPd76C+bv3oyj6lgPD92jUOKOzbkpYupEA4/E4ulJL13Sky4rVq+y1ms/mb9VWZ+S8K1iM1DgClijRlcBpvLqf3OoMrcfJJkfLutBI12rRQFbhZD6dCRfJ4SeUqOSz/OMSNopyLKA1rBZ5vkjiLyhBj458gr9a9KyubxRTi/9i6W9oQualcR5TzrUNElLZR20waCcExqWzDNoi9WMp2PzoHkLQSi7JdQPUJ+QtMuksWLQQu912fZK+BZHz7QolaRN0c6s9bywjFZBhL5W4lsPEFuvPjhvTlh+6mNwx2MudNdLDZXwnf4SYGFo/3O64NWZTy+SEgAQhT1lECQZKsHan4UgXLGUw+FWTzHjh0woIt661HGxJgh4xT0RoV6/w1IO19XAOKfJFTxmxva6DRQsX/12jIKBLC0Y0Er2DuUutxMM5nak9QaZt2cOwf4En1ww42nN3OK+w14/B4u+a/CWLesHWTYU1Eph+GS/w0470Y/1LcgDNA40/yKOMzw/tE7N+wOx/NwUYj9H5qf4DsX93tO4FAAA=
io.katacontainers.config.agent.policy: cGFja2FnZSBhZ2VudF9wb2xpY3kKCmRlZmF1bHQgQWRkQVJQTmVpZ2hib3JzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgQWRkU3dhcFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENsb3NlU3RkaW5SZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBDb3B5RmlsZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZUNvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZVNhbmRib3hSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBEZXN0cm95U2FuZGJveFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEdldE1ldHJpY3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBHZXRPT01FdmVudFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEd1ZXN0RGV0YWlsc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IExpc3RJbnRlcmZhY2VzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgTGlzdFJvdXRlc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IE1lbUhvdHBsdWdCeVByb2JlUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgT25saW5lQ1BVTWVtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUGF1c2VDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBQdWxsSW1hZ2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBSZWFkU3RyZWFtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlU3RhbGVWaXJ0aW9mc1NoYXJlTW91bnRzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzZWVkUmFuZG9tRGV2UmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzdW1lQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU2V0R3Vlc3REYXRlVGltZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNldFBvbGljeVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNpZ25hbFByb2Nlc3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBTdGFydENvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXJ0VHJhY2luZ1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXRzQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU3RvcFRyYWNpbmdSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBUdHlXaW5SZXNpemVSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVFcGhlbWVyYWxNb3VudHNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVJbnRlcmZhY2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVSb3V0ZXNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBXYWl0UHJvY2Vzc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFdyaXRlU3RyZWFtUmVxdWVzdCA6PSB0cnVlCgpkZWZhdWx0IEV4ZWNQcm9jZXNzUmVxdWVzdCA6PSBmYWxzZQo=
spec:
runtimeClassName: kata
containers:
@@ -79,7 +66,7 @@ Create the pod:
$ kubectl apply -f pod1.yaml
```
While creating the Pod sandbox, the Kata Shim will notice the `io.katacontainers.config.hypervisor.cc_init_data` annotation and will create the init data device on the host and mount it on the guest as a block device. The agent then reads the init data struct from this device and sets the policy if present.
While creating the Pod sandbox, the Kata Shim will notice the `io.katacontainers.config.agent.policy` annotation and will send the Policy document to the Kata Agent - by sending a `SetPolicy` request. Note that this request will fail if the default Policy, included in the Guest image, doesn't allow this `SetPolicy` request. If the `SetPolicy` request is rejected by the Guest, the Kata Shim will fail to start the Pod sandbox.
# How is the Policy being enforced?

View File

@@ -6,4 +6,4 @@ Container deployments utilize explicit or implicit file sharing between host fil
As of the 2.0 release of Kata Containers, [virtio-fs](https://virtio-fs.gitlab.io/) is the default filesystem sharing mechanism.
virtio-fs support works out of the box for `cloud-hypervisor` and `qemu`, when Kata Containers is deployed using `kata-deploy`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](../../tools/packaging/kata-deploy/helm-chart/README.md).
virtio-fs support works out of the box for `cloud-hypervisor` and `qemu`, when Kata Containers is deployed using `kata-deploy`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](../../tools/packaging/kata-deploy/README.md#kubernetes-quick-start).

View File

@@ -1,25 +1,22 @@
# Privileged Kata Containers
> [!WARNING]
> Whilst this functionality is supported, it can decrease the security of Kata Containers if not configured correctly.
Kata Containers supports creation of containers that are "privileged" (i.e. have additional capabilities and access
that is not normally granted).
## Enabling privileged containers without host devices
## Warnings
> [!TIP]
> When Kata Containers is installed through
> [kata-deploy](/tools/packaging/kata-deploy/helm-chart/README.md#kata-deploy-helm-chart), this mitigation is configured
> out of the box, hence there is no action required in that case.
**Warning:** Whilst this functionality is supported, it can decrease the security of Kata Containers if not configured
correctly.
By default, a privileged container attempts to expose all devices from the host. This is generally not supported in Kata
Containers as the container is running a different kernel than the host.
### Host Devices
Instead, the following sections document how to disable this behavior in different container runtimes. Note that this
mitigation does not affect a container's ability to mount *guest* devices.
By default, when privileged is enabled for a container, all the `/dev/*` block devices from the host are mounted
into the guest. This will allow the privileged container inside the Kata guest to gain access to mount any block device
from the host, a potentially undesirable side-effect that decreases the security of Kata.
## Containerd
The following sections document how to configure this behavior in different container runtimes.
#### Containerd
The Containerd allows configuring the privileged host devices behavior for each runtime in the containerd config. This is
done with the `privileged_without_host_devices` option. Setting this to `true` will disable hot plugging of the host
@@ -46,7 +43,7 @@ See below example config:
- [How to use Kata Containers and containerd with Kubernetes](how-to-use-k8s-with-containerd-and-kata.md)
- [Containerd CRI config documentation](https://github.com/containerd/containerd/blob/main/docs/cri/config.md)
## CRI-O
#### CRI-O
Similar to containerd, CRI-O allows configuring the privileged host devices
behavior for each runtime in the CRI config. This is done with the

View File

@@ -8,11 +8,50 @@ Kata Containers requires nested virtualization or bare metal. Check
[hardware requirements](./../../README.md#hardware-requirements) to see if your system is capable of running Kata
Containers.
The Kata Deploy Helm chart is the preferred way to install all of the binaries and
## Packaged installation methods
The packaged installation method uses your distribution's native package format (such as RPM or DEB).
> **Note:**
>
> We encourage you to select an installation method that provides
> automatic updates, to ensure you get the latest security updates and
> bug fixes.
| Installation method | Description | Automatic updates | Use case |
|------------------------------------------------------|----------------------------------------------------------------------------------------------|-------------------|-----------------------------------------------------------------------------------------------|
| [Using official distro packages](#official-packages) | Kata packages provided by Linux distributions official repositories | yes | Recommended for most users. |
| [Automatic](#automatic-installation) | Run a single command to install a full system | **No!** | For those wanting the latest release quickly. |
| [Using kata-deploy Helm chart](#kata-deploy-helm-chart) | The preferred way to deploy the Kata Containers distributed binaries on a Kubernetes cluster | **No!** | Best way to give it a try on kata-containers on an already up and running Kubernetes cluster. |
### Kata Deploy Helm Chart
The Kata Deploy Helm chart is a convenient way to install all of the binaries and
artifacts required to run Kata Containers on Kubernetes.
[Use Kata Deploy Helm Chart](/tools/packaging/kata-deploy/helm-chart/README.md) to install Kata Containers on a Kubernetes Cluster.
### Official packages
Kata packages are provided by official distribution repositories for:
| Distribution (link to installation guide) | Minimum versions |
|----------------------------------------------------------|--------------------------------------------------------------------------------|
| [CentOS](centos-installation-guide.md) | 8 |
| [Fedora](fedora-installation-guide.md) | 34 |
### Automatic Installation
[Use `kata-manager`](/utils/README.md) to automatically install a working Kata Containers system.
## Installing on a Cloud Service Platform
* [Amazon Web Services (AWS)](aws-installation-guide.md)
* [Google Compute Engine (GCE)](gce-installation-guide.md)
* [Microsoft Azure](azure-installation-guide.md)
* [Minikube](minikube-installation-guide.md)
* [VEXXHOST OpenStack Cloud](vexxhost-installation-guide.md)
## Further information
* [upgrading document](../Upgrading.md)

View File

@@ -0,0 +1,135 @@
# Install Kata Containers on Amazon Web Services
Kata Containers on Amazon Web Services (AWS) makes use of [i3.metal](https://aws.amazon.com/ec2/instance-types/i3/) instances. Most of the installation procedure is identical to that for Kata on your preferred distribution, except that you have to run it on bare metal instances since AWS doesn't support nested virtualization yet. This guide walks you through creating an i3.metal instance.
## Install and Configure AWS CLI
### Requirements
* Python:
* Python 2 version 2.6.5+
* Python 3 version 3.3+
### Install
Install with this command:
```bash
$ pip install awscli --upgrade --user
```
### Configure
First, verify it:
```bash
$ aws --version
```
Then configure it:
```bash
$ aws configure
```
Specify the required parameters:
```
AWS Access Key ID []: <your-key-id-from-iam>
AWS Secret Access Key []: <your-secret-access-key-from-iam>
Default region name []: <your-aws-region-for-your-i3-metal-instance>
Default output format [None]: <yaml-or-json-or-empty>
```
Alternatively, you can create the files: `~/.aws/credentials` and `~/.aws/config`:
```bash
$ cat <<EOF > ~/.aws/credentials
[default]
aws_access_key_id = <your-key-id-from-iam>
aws_secret_access_key = <your-secret-access-key-from-iam>
EOF
$ cat <<EOF > ~/.aws/config
[default]
region = <your-aws-region-for-your-i3-metal-instance>
EOF
```
For more information on how to get AWS credentials please refer to [this guide](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html). Alternatively, you can ask the administrator of your AWS account to issue one with the AWS CLI:
```sh
$ aws_username="myusername"
$ aws iam create-access-key --user-name="$aws_username"
```
More general AWS CLI guidelines can be found [here](https://docs.aws.amazon.com/cli/latest/userguide/installing.html).
## Create or Import an EC2 SSH key pair
You will need this to access your instance.
To create:
```bash
$ aws ec2 create-key-pair --key-name MyKeyPair | grep KeyMaterial | cut -d: -f2- | tr -d ' \n\"\,' > MyKeyPair.pem
$ chmod 400 MyKeyPair.pem
```
Alternatively to import using your public SSH key:
```bash
$ aws ec2 import-key-pair --key-name "MyKeyPair" --public-key-material file://MyKeyPair.pub
```
## Launch i3.metal instance
Get the latest Bionic Ubuntu AMI (Amazon Image) or the latest AMI for the Linux distribution you would like to use. For example:
```bash
$ aws ec2 describe-images --owners 099720109477 --filters "Name=name,Values=ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server*" --query 'sort_by(Images, &CreationDate)[].ImageId '
```
This command will produce output similar to the following:
```
[
...
"ami-063aa838bd7631e0b",
"ami-03d5270fcb641f79b"
]
```
Launch the EC2 instance and pick IP the `INSTANCEID`:
```bash
$ aws ec2 run-instances --image-id ami-03d5270fcb641f79b --count 1 --instance-type i3.metal --key-name MyKeyPair --associate-public-ip-address > /tmp/aws.json
$ export INSTANCEID=$(grep InstanceId /tmp/aws.json | cut -d: -f2- | tr -d ' \n\"\,')
```
Wait for the instance to come up, the output of the following command should be `running`:
```bash
$ aws ec2 describe-instances --instance-id=${INSTANCEID} | grep running | cut -d: -f2- | tr -d ' \"\,'
```
Get the public IP address for the instances:
```bash
$ export IP=$(aws ec2 describe-instances --instance-id=${INSTANCEID} | grep PublicIpAddress | cut -d: -f2- | tr -d ' \n\"\,')
```
Refer to [this guide](https://docs.aws.amazon.com/cli/latest/userguide/cli-ec2-launch.html) for more details on how to launch instances with the AWS CLI.
SSH into the machine
```bash
$ ssh -i MyKeyPair.pem ubuntu@${IP}
```
Go onto the next step.
## Install Kata
The process for installing Kata itself on bare metal is identical to that of a virtualization-enabled VM.
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](../install/README.md).

View File

@@ -0,0 +1,18 @@
# Install Kata Containers on Microsoft Azure
Kata Containers on Azure use nested virtualization to provide an identical installation
experience to Kata on your preferred Linux distribution.
This guide assumes you have an Azure account set up and tools to remotely login to your virtual
machine (SSH). Instructions will use the Azure Portal to avoid
local dependencies and setup.
## Create a new virtual machine with nesting support
Create a new virtual machine with:
* Nesting support (v3 series)
* your distro of choice
## Set up with distribution specific quick start
Follow distribution specific [install guides](../install/README.md#packaged-installation-methods).

View File

@@ -0,0 +1,21 @@
# Install Kata Containers on CentOS
1. Install the Kata Containers components with the following commands:
```bash
$ sudo -E dnf install -y centos-release-advanced-virtualization
$ sudo -E dnf module disable -y virt:rhel
$ source /etc/os-release
$ cat <<EOF | sudo -E tee /etc/yum.repos.d/kata-containers.repo
[kata-containers]
name=Kata Containers
baseurl=http://mirror.centos.org/\$contentdir/\$releasever/virt/\$basearch/kata-containers
enabled=1
gpgcheck=1
skip_if_unavailable=1
EOF
$ sudo -E dnf install -y kata-containers
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -0,0 +1,10 @@
# Install Kata Containers on Fedora
1. Install the Kata Containers components with the following commands:
```bash
$ sudo -E dnf -y install kata-containers
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -0,0 +1,127 @@
# Install Kata Containers on Google Compute Engine
Kata Containers on Google Compute Engine (GCE) makes use of [nested virtualization](https://cloud.google.com/compute/docs/instances/enable-nested-virtualization-vm-instances). Most of the installation procedure is identical to that for Kata on your preferred distribution, but enabling nested virtualization currently requires extra steps on GCE. This guide walks you through creating an image and instance with nested virtualization enabled. Note that `kata-runtime check` checks for nested virtualization, but does not fail if support is not found.
As a pre-requisite this guide assumes an installed and configured instance of the [Google Cloud SDK](https://cloud.google.com/sdk/downloads). For a zero-configuration option, all of the commands below were been tested under [Google Cloud Shell](https://cloud.google.com/shell/) (as of Jun 2018). Verify your `gcloud` installation and configuration:
```bash
$ gcloud info || { echo "ERROR: no Google Cloud SDK"; exit 1; }
```
## Create an Image with Nested Virtualization Enabled
VM images on GCE are grouped into families under projects. Officially supported images are automatically discoverable with `gcloud compute images list`. That command produces a list similar to the following (likely with different image names):
```bash
$ gcloud compute images list
NAME PROJECT FAMILY DEPRECATED STATUS
centos-7-v20180523 centos-cloud centos-7 READY
coreos-stable-1745-5-0-v20180531 coreos-cloud coreos-stable READY
cos-beta-67-10575-45-0 cos-cloud cos-beta READY
cos-stable-66-10452-89-0 cos-cloud cos-stable READY
debian-9-stretch-v20180510 debian-cloud debian-9 READY
rhel-7-v20180522 rhel-cloud rhel-7 READY
sles-11-sp4-v20180523 suse-cloud sles-11 READY
ubuntu-1604-xenial-v20180522 ubuntu-os-cloud ubuntu-1604-lts READY
ubuntu-1804-bionic-v20180522 ubuntu-os-cloud ubuntu-1804-lts READY
```
Each distribution has its own project, and each project can host images for multiple versions of the distribution, typically grouped into families. We recommend you select images by project and family, rather than by name. This ensures any scripts or other automation always works with a non-deprecated image, including security updates, updates to GCE-specific scripts, etc.
### Create the Image
The following example (substitute your preferred distribution project and image family) produces an image with nested virtualization enabled in your currently active GCE project:
```bash
$ SOURCE_IMAGE_PROJECT=ubuntu-os-cloud
$ SOURCE_IMAGE_FAMILY=ubuntu-1804-lts
$ IMAGE_NAME=${SOURCE_IMAGE_FAMILY}-nested
$ gcloud compute images create \
--source-image-project $SOURCE_IMAGE_PROJECT \
--source-image-family $SOURCE_IMAGE_FAMILY \
--licenses=https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx \
$IMAGE_NAME
```
If successful, `gcloud` reports that the image was created. Verify that the image has the nested virtualization license with `gcloud compute images describe $IMAGE_NAME`. This produces output like the following (some fields have been removed for clarity and to redact personal info):
```yaml
diskSizeGb: '10'
kind: compute#image
licenseCodes:
- '1002001'
- '5926592092274602096'
licenses:
- https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx
- https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/licenses/ubuntu-1804-lts
name: ubuntu-1804-lts-nested
sourceImage: https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20180522
sourceImageId: '3280575157699667619'
sourceType: RAW
status: READY
```
The primary criterion of interest here is the presence of the `enable-vmx` license. Without that licence Kata will not work. Without that license Kata does not work. The presence of that license instructs the Google Compute Engine hypervisor to enable Intel's VT-x instructions in virtual machines created from the image. Note that nested virtualization is only available in VMs running on Intel Haswell or later CPU micro-architectures.
### Verify VMX is Available
Assuming you created a nested-enabled image using the previous instructions, verify that VMs created from this image are VMX-enabled with the following:
1. Create a VM from the image created previously:
```bash
$ gcloud compute instances create \
--image $IMAGE_NAME \
--machine-type n1-standard-2 \
--min-cpu-platform "Intel Broadwell" \
kata-testing
```
> **NOTE**: In most zones the `--min-cpu-platform` argument can be omitted. It is only necessary in GCE Zones that include hosts based on Intel's Ivybridge platform.
2. Verify that the VMX CPUID flag is set:
```bash
$ gcloud compute ssh kata-testing
# While ssh'd into the VM:
$ [ -z "$(lscpu|grep GenuineIntel)" ] && { echo "ERROR: Need an Intel CPU"; exit 1; }
```
If this fails, ensure you created your instance from the correct image and that the previously listed `enable-vmx` license is included.
## Install Kata
The process for installing Kata itself on a virtualization-enabled VM is identical to that for bare metal.
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](../install/README.md).
## Create a Kata-enabled Image
Optionally, after installing Kata, create an image to preserve the fruits of your labor:
```bash
$ gcloud compute instances stop kata-testing
$ gcloud compute images create \
--source-disk kata-testing \
kata-base
```
The result is an image that includes any changes made to the `kata-testing` instance as well as the `enable-vmx` flag. Verify this with `gcloud compute images describe kata-base`. The result, which omits some fields for clarity, should be similar to the following:
```yaml
diskSizeGb: '10'
kind: compute#image
licenseCodes:
- '1002001'
- '5926592092274602096'
licenses:
- https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx
- https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/licenses/ubuntu-1804-lts
name: kata-base
selfLink: https://www.googleapis.com/compute/v1/projects/my-kata-project/global/images/kata-base
sourceDisk: https://www.googleapis.com/compute/v1/projects/my-kata-project/zones/us-west1-a/disks/kata-testing
sourceType: RAW
status: READY
```

View File

@@ -32,7 +32,7 @@ architectures:
### Kata Deploy Installation
Follow the [`kata-deploy`](../../tools/packaging/kata-deploy/helm-chart/README.md).
Follow the [`kata-deploy`](../../tools/packaging/kata-deploy/README.md).
### Official packages
`ToDo`
### Automatic Installation

View File

@@ -103,8 +103,48 @@ $ minikube ssh "grep -c -E 'vmx|svm' /proc/cpuinfo"
## Installing Kata Containers
You can now install the Kata Containers runtime components
[following the official instructions](../../tools/packaging/kata-deploy/helm-chart).
You can now install the Kata Containers runtime components. You will need a local copy of some Kata
Containers components to help with this, and then use `kubectl` on the host (that Minikube has already
configured for you) to deploy them:
```sh
$ git clone https://github.com/kata-containers/kata-containers.git
$ cd kata-containers/tools/packaging/kata-deploy
$ kubectl apply -f kata-rbac/base/kata-rbac.yaml
$ kubectl apply -f kata-deploy/base/kata-deploy.yaml
```
This installs the Kata Containers components into `/opt/kata` inside the Minikube node. It can take
a few minutes for the operation to complete. You can check the installation has worked by checking
the status of the `kata-deploy` pod, which will be executing
[this script](../../tools/packaging/kata-deploy/scripts/kata-deploy.sh),
and will be executing a `sleep infinity` once it has successfully completed its work.
You can accomplish this by running the following:
```sh
$ podname=$(kubectl -n kube-system get pods -o=name | grep -F kata-deploy | sed 's?pod/??')
$ kubectl -n kube-system exec ${podname} -- ps -ef | grep -F infinity
```
> *NOTE:* This check only works for single node clusters, which is the default for Minikube.
> For multi-node clusters, the check would need to be adapted to check `kata-deploy` had
> completed on all nodes.
## Enabling Kata Containers
Now you have installed the Kata Containers components in the Minikube node. Next, you need to configure
Kubernetes `RuntimeClass` to know when to use Kata Containers to run a pod.
### Register the runtime
Now register the `kata qemu` runtime with that class. This should result in no errors:
```sh
$ cd kata-containers/tools/packaging/kata-deploy/runtimeclasses
$ kubectl apply -f kata-runtimeClasses.yaml
```
The Kata Containers installation process should be complete and enabled in the Minikube cluster.
## Testing Kata Containers

View File

@@ -0,0 +1,16 @@
# Install Kata Containers on VEXXHOST
Kata Containers on VEXXHOST use nested virtualization to provide an identical
installation experience to Kata on your preferred Linux distribution.
This guide assumes you have an OpenStack public cloud account set up and tools
to remotely connect to your virtual machine (SSH).
## Create a new virtual machine with nesting support
All regions support nested virtualization using the V2 flavors (those prefixed
with v2). The recommended machine type for container workloads is `v2-highcpu` range.
## Set up with distribution specific quick start
Follow distribution specific [install guides](../install/README.md#packaged-installation-methods).

View File

@@ -48,7 +48,7 @@ $ make test
- Run a test in the current package in verbose mode:
```bash
# Example
# Example
$ test="config::tests::test_get_log_level"
$ cargo test "$test" -vv -- --exact --nocapture
@@ -223,7 +223,7 @@ What's wrong with this function?
```rust
fn foo(config: &Config, path_prefix: String, container_id: String, pid: String) -> Result<()> {
let mut full_path = format!("{path_prefix}/{container_id}");
let mut full_path = format!("{}/{}", path_prefix, container_id);
let _ = remove_recursively(&mut full_path);

View File

@@ -3,4 +3,4 @@
Kata Containers supports passing certain GPUs from the host into the container. Select the GPU vendor for detailed information:
- [Intel Discrete GPUs](Intel-Discrete-GPU-passthrough-and-Kata.md)/[Intel Integrated GPUs](Intel-GPU-passthrough-and-Kata.md)
- [NVIDIA GPUs](NVIDIA-GPU-passthrough-and-Kata.md) and [Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers](NVIDIA-GPU-passthrough-and-Kata-QEMU.md)
- [NVIDIA](NVIDIA-GPU-passthrough-and-Kata.md)

View File

@@ -1,569 +0,0 @@
# Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers
This page provides:
1. A description of the components involved when running GPU workloads with
Kata Containers using the NVIDIA TEE and non-TEE GPU runtime classes.
1. An explanation of the orchestration flow on a Kubernetes node for this
scenario.
1. A deployment guide enabling to utilize these runtime classes.
The goal is to educate readers familiar with Kubernetes and Kata Containers
on NVIDIA's reference implementation which is reflected in Kata CI's build
and test framework. With this, we aim to enable readers to leverage this
stack, or to use the principles behind this stack in order to run GPU
workloads on their variant of the Kata Containers stack.
We assume the reader is familiar with Kubernetes, Kata Containers, and
Confidential Containers.
> **Note:**
>
> The current supported mode for enabling GPU workloads in the TEE scenario
> is single GPU passthrough (one GPU per pod) on AMD64 platforms (AMD SEV-SNP
> being the only supported TEE scenario so far with support for Intel TDX being
> on the way).
## Component Overview
Before providing deployment guidance, we describe the components involved to
support running GPU workloads. We start from a top to bottom perspective
from the NVIDIA GPU operator via the Kata runtime to the components within
the NVIDIA GPU Utility Virtual Machine (UVM) root filesystem.
### NVIDIA GPU Operator
A central component is the
[NVIDIA GPU operator](https://github.com/NVIDIA/gpu-operator) which can be
deployed onto your cluster as a helm chart. Installing the GPU operator
delivers various operands on your nodes in the form of Kubernetes DaemonSets.
These operands are vital to support the flow of orchestrating pod manifests
using NVIDIA GPU runtime classes with GPU passthrough on your nodes. Without
getting into the details, the most important operands and their
responsibilities are:
- **nvidia-vfio-manager:** Binding discovered NVIDIA GPUs to the `vfio-pci`
driver for VFIO passthrough.
- **nvidia-cc-manager:** Transitioning GPUs into confidential computing (CC)
and non-CC mode (see the
[NVIDIA/k8s-cc-manager](https://github.com/NVIDIA/k8s-cc-manager)
repository).
- **nvidia-kata-manager:** Creating host-side CDI specifications for GPU
passthrough, resulting in the file `/var/run/cdi/nvidia.yaml`, containing
`kind: nvidia.com/pgpu` (see the
[NVIDIA/k8s-kata-manager](https://github.com/NVIDIA/k8s-kata-manager)
repository).
- **nvidia-sandbox-device-plugin** (see the
[NVIDIA/sandbox-device-plugin](https://github.com/NVIDIA/sandbox-device-plugin)
repository):
- Allocating GPUs during pod deployment.
- Discovering NVIDIA GPUs, their capabilities, and advertising these to
the Kubernetes control plane (allocatable resources as type
`nvidia.com/pgpu` resources will appear for the node and GPU Device IDs
will be registered with Kubelet). These GPUs can thus be allocated as
container resources in your pod manifests. See below GPU operator
deployment instructions for the use of the key `pgpu`, controlled via a
variable.
To summarize, the GPU operator manages the GPUs on each node, allowing for
simple orchestration of pod manifests using Kata Containers. Once the cluster
with GPU operator and Kata bits is up and running, the end user can schedule
Kata NVIDIA GPU workloads, using resource limits and the
`kata-qemu-nvidia-gpu` or `kata-qemu-nvidia-gpu-snp` runtime classes, for
example:
```yaml
apiVersion: v1
kind: Pod
...
spec:
...
runtimeClassName: kata-qemu-nvidia-gpu-snp
...
resources:
limits:
"nvidia.com/pgpu": 1
...
```
When this happens, the Kubelet calls into the sandbox device plugin to
allocate a GPU. The sandbox device plugin returns `DeviceSpec` entries to the
Kubelet for the allocated GPU. The Kubelet uses internal device IDs for
tracking of allocated GPUs and includes the device specifications in the CRI
request when scheduling the pod through containerd. Containerd processes the
device specifications and includes the device configuration in the OCI
runtime spec used to invoke the Kata runtime during the create container
request.
### Kata runtime
The Kata runtime for the NVIDIA GPU handlers is configured to cold-plug VFIO
devices (`cold_plug_vfio` is set to `root-port` while
`hot_plug_vfio` is set to `no-port`). Cold-plug is by design the only
supported mode for NVIDIA GPU passthrough of the NVIDIA reference stack.
With cold-plug, the Kata runtime attaches the GPU at VM launch time, when
creating the pod sandbox. This happens *before* the create container request,
i.e., before the Kata runtime receives the OCI spec including device
configurations from containerd. Thus, a mechanism to acquire the device
information is required. This is done by the runtime calling the
`coldPlugDevices()` function during sandbox creation. In this function,
the runtime queries Kubelet's Pod Resources API to discover allocated GPU
device IDs (e.g., `nvidia.com/pgpu = [vfio0]`). The runtime formats these as
CDI device identifiers and injects them into the OCI spec using
`config.InjectCDIDevices()`. The runtime then consults the host CDI
specifications and determines the device path the GPU is backed by
(e.g., `/dev/vfio/devices/vfio0`). Finally, the runtime resolves the device's
PCI BDF (e.g., `0000:21:00`) and cold-plugs the GPU by launching QEMU with
relevant parameters for device passthrough (e.g.,
`-device vfio-pci,host=0000:21:00.0,x-pci-vendor-id=0x10de,x-pci-device-id=0x2321,bus=rp0,iommufd=iommufdvfio-faf829f2ea7aec330`).
The runtime also creates *inner runtime* CDI annotations
which map host VFIO devices to guest GPU devices. These are annotations
intended for the kata-agent, here referred to as the inner runtime (inside the
UVM), to properly handle GPU passthrough into containers. These annotations
serve as metadata providing the kata-agent with the information needed to
attach the passthrough devices to the correct container.
The annotations are key-value pairs consisting of `cdi.k8s.io/vfio<num>` keys
(derived from the host VFIO device path, e.g., `/dev/vfio/devices/vfio1`) and
`nvidia.com/gpu=<index>` values (referencing the corresponding device in the
guest CDI spec). These annotations are injected by the runtime during container
creation via the `annotateContainerWithVFIOMetadata` function (see
`container.go`).
We continue describing the orchestration flow inside the UVM in the next
section.
### Kata NVIDIA GPU UVM
#### UVM composition
To better understand the orchestration flow inside the NVIDIA GPU UVM, we
first look at the components its root filesystem contains. Should you decide
to use your own root filesystem to enable NVIDIA GPU scenarios, this should
give you a good idea on what ingredients you need.
From a file system perspective, the UVM is composed of two files: a standard
Kata kernel image and the NVIDIA GPU rootfs in initrd or disk image format.
These two files are being utilized for the QEMU launch command when the UVM
is created.
The two most important pieces in Kata Container's build recipes for the
NVIDIA GPU root filesystem are the `nvidia_chroot.sh` and `nvidia_rootfs.sh`
files. The build follows a two-stage process. In the first stage, a
full-fledged Ubuntu-based root filesystem is composed within a chroot
environment. In this stage, NVIDIA kernel modules are built and signed
against the current Kata kernel and relevant NVIDIA packages are installed.
In the second stage, a chiseled build is performed: Only relevant contents
from the first stage are copied and compressed into a new distro-less root
filesystem folder. Kata's build infrastructure then turns this root
filesystem into the NVIDIA initrd and image files.
The resulting root filesystem contains the following software components:
- NVRC - the
[NVIDIA Runtime Container init system](https://github.com/NVIDIA/nvrc/tree/main)
- NVIDIA drivers (kernel modules)
- NVIDIA user space driver libraries
- NVIDIA user space tools
- kata-agent
- confidential computing guest components: the attestation agent,
confidential data hub and api-server-rest binaries
- CRI-O pause container (for the guest image-pull method)
- BusyBox utilities (provides a base set of libraries and binaries, and a
linker)
- some supporting files, such as file containing a list of supported GPU
device IDs which NVRC reads
#### UVM orchestration flow
When the Kata runtime asks QEMU to launch the VM, the UVM's Linux kernel
boots and mounts the root filesystem. After this, NVRC starts as the initial
process.
NVRC scans for NVIDIA GPUs on the PCI bus, loads the
NVIDIA kernel modules, waits for driver initialization, creates the device nodes,
and initializes the GPU hardware (using the `nvidia-smi` binary). NVRC also
creates the guest-side CDI specification file (using the
`nvidia-ctk cdi generate` command). This file specifies devices of
`kind: nvidia.com/gpu`, i.e., GPUs appearing to be physical GPUs on regular
bare metal systems. The guest CDI specification also contains `containerEdits`
for each device, specifying device nodes (e.g., `/dev/nvidia0`,
`/dev/nvidiactl`), library mounts, and environment variables to be mounted
into the container which receives the passthrough GPU.
Then, NVRC forks the Kata agent while continuing to run as the
init system. This allows NVRC to handle ongoing GPU management tasks
while kata-agent focuses on container lifecycle management. See the
[NVRC sources](https://github.com/NVIDIA/nvrc/blob/main/src/main.rs) for an
overview on the steps carried out by NVRC.
When the Kata runtime sends the create container request, the Kata agent
parses the inner runtime CDI annotation. For example, for the inner runtime
annotation `"cdi.k8s.io/vfio1": "nvidia.com/gpu=0"`, the agent looks up device
`0` in the guest CDI specification with `kind: nvidia.com/gpu`.
The Kata agent also reads the guest CDI specification's `containerEdits`
section and injects relevant contents into the OCI spec of the respective
container. The kata agent then creates and starts a `rustjail` container
based on the final OCI spec. The container now has relevant device nodes,
binaries and low-level libraries available, and can start a user application
linked against the CUDA runtime API (e.g., `libcudart.so` and other
libraries). When used, the CUDA runtime API in turn calls the CUDA driver
API and kernel drivers, interacting with the pass-through GPU device.
An additional step is exercised in our CI samples: when using images from an
authenticated registry, the guest-pull mechanism triggers attestation using
trustee's Key Broker Service (KBS) for secure release of the NGC API
authentication key used to access the NVCR container registry. As part of
this, the attestation agent exercises composite attestation and transitions
the GPU into `Ready` state (without this, the GPU has to explicitly be
transitioned into `Ready` state by passing the `nvrc.smi.srs=1` kernel
parameter via the shim config, causing NVRC to transition the GPU into the
`Ready` state).
## Deployment Guidance
This guidance assumes you use bare-metal machines with proper support for
Kata's non-TEE and TEE GPU workload deployment scenarios for your Kubernetes
nodes. We provide guidance based on the upstream Kata CI procedures for the
NVIDIA GPU CI validation jobs. Note that, this setup:
- uses the guest image pull method to pull container image layers
- uses the genpolicy tool to attach Kata agent security policies to the pod
manifest
- has dedicated (composite) attestation tests, a CUDA vectorAdd test, and a
NIM/RA test sample with secure API key release
A similar deployment guide and scenario description can be found in NVIDIA resources
under
[Early Access: NVIDIA GPU Operator with Confidential Containers based on Kata](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/confidential-containers.html).
### Requirements
The requirements for the TEE scenario are:
- Ubuntu 25.10 as host OS
- CPU with AMD SEV-SNP support with proper BIOS/UEFI version and settings
- CC-capable Hopper/Blackwell GPU with proper VBIOS version.
BIOS and VBIOS configuration is out of scope for this guide. Other resources,
such as the documentation found on the
[NVIDIA Trusted Computing Solutions](https://docs.nvidia.com/nvtrust/index.html)
page and the above linked NVIDIA documentation, provide guidance on
selecting proper hardware and on properly configuring its firmware and OS.
### Installation
#### Containerd and Kubernetes
First, set up your Kubernetes cluster. For instance, in Kata CI, our NVIDIA
jobs use a single-node vanilla Kubernetes cluster with a 2.x containerd
version and Kata's current supported Kubernetes version. We set this cluster
up using the `deploy_k8s` function from `tests/integration/kubernetes/gha-run.sh`
as follows:
```bash
$ export KUBERNETES="vanilla"
$ export CONTAINER_ENGINE="containerd"
$ export CONTAINER_ENGINE_VERSION="v2.1"
$ source tests/gha-run-k8s-common.sh
$ deploy_k8s
```
> **Note:**
>
> We recommend to configure your Kubelet with a higher
> `runtimeRequestTimeout` timeout value than the two minute default timeout.
> Using the guest-pull mechanism, pulling large images may take a significant
> amount of time and may delay container start, possibly leading your Kubelet
> to de-allocate your pod before it transitions from the *container created*
> to the *container running* state.
> **Note:**
>
> The NVIDIA GPU runtime classes use VFIO cold-plug which, as
> described above, requires the Kata runtime to query Kubelet's Pod Resources
> API to discover allocated GPU devices during sandbox creation. For
> Kubernetes versions **older than 1.34**, you must explicitly enable the
> `KubeletPodResourcesGet` feature gate in your Kubelet configuration. For
> Kubernetes 1.34 and later, this feature is enabled by default.
#### GPU Operator
Assuming you have the helm tools installed, deploy the latest version of the
GPU Operator as a helm chart (minimum version: `v25.10.0`):
```bash
$ helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && helm repo update
$ helm install --wait --generate-name \
-n gpu-operator --create-namespace \
nvidia/gpu-operator \
--set sandboxWorkloads.enabled=true \
--set sandboxWorkloads.defaultWorkload=vm-passthrough \
--set kataManager.enabled=true \
--set kataManager.config.runtimeClasses=null \
--set kataManager.repository=nvcr.io/nvidia/cloud-native \
--set kataManager.image=k8s-kata-manager \
--set kataManager.version=v0.2.4 \
--set ccManager.enabled=true \
--set ccManager.defaultMode=on \
--set ccManager.repository=nvcr.io/nvidia/cloud-native \
--set ccManager.image=k8s-cc-manager \
--set ccManager.version=v0.2.0 \
--set sandboxDevicePlugin.repository=nvcr.io/nvidia/cloud-native \
--set sandboxDevicePlugin.image=nvidia-sandbox-device-plugin \
--set sandboxDevicePlugin.version=v0.0.1 \
--set 'sandboxDevicePlugin.env[0].name=P_GPU_ALIAS' \
--set 'sandboxDevicePlugin.env[0].value=pgpu' \
--set nfd.enabled=true \
--set nfd.nodefeaturerules=true
```
> **Note:**
>
> For heterogeneous clusters with different GPU types, you can omit
> the `P_GPU_ALIAS` environment variable lines. This will cause the sandbox
> device plugin to create GPU model-specific resource types (e.g.,
> `nvidia.com/GH100_H100L_94GB`) instead of the generic `nvidia.com/pgpu`,
> which in turn can be used by pods through respective resource limits.
> For simplicity, this guide uses the generic alias.
> **Note:**
>
> Using `--set sandboxWorkloads.defaultWorkload=vm-passthrough` causes all
> your nodes to be labeled for GPU VM passthrough. Remove this parameter if
> you intend to only use selected nodes for this scenario, and label these
> nodes by hand, using:
> `kubectl label node <node-name> nvidia.com/gpu.workload.config=vm-passthrough`.
#### Kata Containers
Install the latest Kata Containers helm chart, similar to
[existing documentation](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/README.md)
(minimum version: `3.24.0`).
```bash
$ export VERSION=$(curl -sSL https://api.github.com/repos/kata-containers/kata-containers/releases/latest | jq .tag_name | tr -d '"')
$ export CHART="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"
$ helm install kata-deploy \
--namespace kata-system \
--create-namespace \
-f "https://raw.githubusercontent.com/kata-containers/kata-containers/refs/tags/${VERSION}/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-nvidia-gpu.values.yaml" \
--set nfd.enabled=false \
--set shims.qemu-nvidia-gpu-tdx.enabled=false \
--wait --timeout 10m --atomic \
"${CHART}" --version "${VERSION}"
```
#### Trustee's KBS for remote attestation
For our Kata CI runners we use Trustee's KBS for composite attestation for
secure key release, for instance, for test scenarios which use authenticated
container images. In such scenarios, the credentials to access the
authenticated container registry are only released to the confidential guest
after successful attestation. Please see the section below for more
information about this.
```bash
$ export NVIDIA_VERIFIER_MODE="remote"
$ export KBS_INGRESS="nodeport"
$ bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
$ bash tests/integration/kubernetes/gha-run.sh install-kbs-client
```
Please note, that Trustee can also be deployed via any other upstream
mechanism as documented by the
[confidential-containers repository](https://github.com/confidential-containers/trustee).
For our architecture it is important to set up KBS in the remote verifier
mode which requires entering a licensing agreement with NVIDIA, see the
[notes in confidential-containers repository](https://github.com/confidential-containers/trustee/blob/main/deps/verifier/src/nvidia/README.md).
### Cluster validation and preparation
If you did not use the `sandboxWorkloads.defaultWorkload=vm-passthrough`
parameter during GPU operator deployment, label your nodes for GPU VM
passthrough, for the example of using all nodes for GPU passthrough, run:
```bash
$ kubectl label nodes --all nvidia.com/gpu.workload.config=vm-passthrough --overwrite
```
Check if the `nvidia-cc-manager` pod is running if you intend to run GPU TEE
scenarios. If not, you need to manually label the node as CC capable. Current
GPU Operator node feature rules do not yet recognize all CC capable GPU PCI
IDs. Run the following command:
```bash
$ kubectl label nodes --all nvidia.com/cc.capable=true
```
After this, assure the `nvidia-cc-manager` pod is running. With the suggested
parameters for GPU Operator deployment, the `nvidia-cc-manager` will
automatically transition the GPU into CC mode.
After deployment, you can transition your node(s) to the desired CC state,
using either the `on` or `off` value, depending on your scenario. For the
non-CC scenario, transition to the `off` state via:
`kubectl label nodes --all nvidia.com/cc.mode=off` and wait until all pods
are back running. When an actual change is exercised, various GPU operator
operands will be restarted.
Ensure all pods are running:
```bash
$ kubectl get pods -A
```
On your node(s), ensure for correct driver binding. Your GPU device should be
bound to the VFIO driver, i.e., showing `Kernel driver in use: vfio-pci`
when running:
```bash
$ lspci -nnk -d 10de:
```
### Run the CUDA vectorAdd sample
Create the following file:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: cuda-vectoradd-kata
namespace: default
annotations:
io.katacontainers.config.hypervisor.kernel_params: "nvrc.smi.srs=1"
spec:
runtimeClassName: ${GPU_RUNTIME_CLASS_NAME}
restartPolicy: Never
containers:
- name: cuda-vectoradd
image: "nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0-ubuntu22.04"
resources:
limits:
nvidia.com/pgpu: "1"
memory: 16Gi
```
Depending on your scenario and on the CC state, export your desired runtime
class name define the environment variable:
```bash
$ export GPU_RUNTIME_CLASS_NAME="kata-qemu-nvidia-gpu-snp"
```
Then, deploy the sample Kubernetes pod manifest and observe the pod logs:
```bash
$ envsubst < ./cuda-vectoradd-kata.yaml.in | kubectl apply -f -
$ kubectl wait --for=condition=Ready pod/cuda-vectoradd-kata --timeout=60s
$ kubectl logs -n default cuda-vectoradd-kata
```
Expect the following output:
```
[Vector addition of 50000 elements]
Copy input data from the host memory to the CUDA device
CUDA kernel launch with 196 blocks of 256 threads
Copy output data from the CUDA device to the host memory
Test PASSED
Done
```
To stop the pod, run: `kubectl delete pod cuda-vectoradd-kata`.
### Next steps
#### Transition between CC and non-CC mode
Use the previously described node labeling approach to transition between
the CC and non-CC mode. In case of the non-CC mode, you can use the
`kata-qemu-nvidia-gpu` value for the `GPU_RUNTIME_CLASS_NAME` runtime class
variable in the above CUDA vectorAdd sample. The `kata-qemu-nvidia-gpu-snp`
runtime class will **NOT** work in this mode - and vice versa.
#### Run Kata CI tests locally
Upstream Kata CI runs the CUDA vectorAdd test, a composite attestation test,
and a basic NIM/RAG deployment. Running CI tests for the TEE GPU scenario
requires KBS to be deployed (except for the CUDA vectorAdd test). The best
place to get started running these tests locally is to look into our
[NVIDIA CI workflow manifest](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml)
and into the underling
[run_kubernetes_nv_tests.sh](https://github.com/kata-containers/kata-containers/blob/main/tests/integration/kubernetes/run_kubernetes_nv_tests.sh)
script. For example, to run the CUDA vectorAdd scenario against the TEE GPU
runtime class use the following commands:
```bash
# create the kata runtime class the test framework uses
$ export KATA_HYPERVISOR=qemu-nvidia-gpu-snp
$ kubectl delete runtimeclass kata --ignore-not-found
$ kubectl get runtimeclass "kata-${KATA_HYPERVISOR}" -o json | \
jq '.metadata.name = "kata" | del(.metadata.uid, .metadata.resourceVersion, .metadata.creationTimestamp)' | \
kubectl apply -f -
$ cd tests/integration/kubernetes
$ K8S_TEST_NV="k8s-nvidia-cuda.bats" ./gha-run.sh run-nv-tests
```
> **Note:**
>
> The other scenarios require an NGC API key to run, i.e., to export the
> `NGC_API_KEY` variable with a valid NGC API key.
#### Deploy pods using attestation
Attestation is a fundamental piece of the confidential containers solution.
In our upstream CI we use attestation at the example of leveraging the
authenticated container image pull mechanism where container images reside
in the authenticated NVCR registry (`k8s-nvidia-nim.bats`), and for
requesting secrets from KBS (`k8s-confidential-attestation.bats`). KBS will
release the image pull secret to a confidential guest. To get the
authentication credentials from inside the guest, KBS must already be
deployed and configured. In our CI samples, we configure KBS with the guest
image pull secret, a resource policy, and launch the pod with certain kernel
command line parameters:
`"agent.image_registry_auth=kbs:///default/credentials/nvcr agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"`.
The `agent.aa_kbc_params` option is a general configuration for attestation.
For your use case, you need to set the IP address and port under which KBS
is reachable through the `CC_KBS_ADDR` variable (see our CI sample). This
tells the guest how to reach KBS. Something like this must be set whenever
attestation is used, but on its own this parameter does not trigger
attestation. The `agent.image_registry_auth` option tells the guest to ask
for a resource from KBS and use it as the authentication configuration. When
this is set, the guest will request this resource at boot (and trigger
attestation) regardless of which image is being pulled.
To deploy your own pods using authenticated container images, or secure key
release for attestation, follow steps similar to our mentioned CI samples.
#### Deploy pods with Kata agent security policies
With GPU passthrough being supported by the
[genpolicy tool](https://github.com/kata-containers/kata-containers/tree/main/src/tools/genpolicy),
you can use the tool to create a Kata agent security policy. Our CI deploys
all sample pod manifests with a Kata agent security policy.
#### Deploy pods using your own containers and manifests
You can author pod manifests leveraging your own containers, for instance,
containers built using the CUDA container toolkit. We recommend to start
with a CUDA base container.
The GPU is transitioned into the `Ready` state via attestation, for instance,
when pulling authenticated images. If your deployment scenario does not use
attestation, please refer back to the CUDA vectorAdd pod manifest. In this
manifest, we ensure that NVRC sets the GPU to `Ready` state by adding the
following annotation in the manifest:
`io.katacontainers.config.hypervisor.kernel_params: "nvrc.smi.srs=1"`
> **Notes:**
>
> - musl-based container images (e.g., using Alpine), or distro-less
> containers are not supported.
> - for the TEE scenario, only single-GPU passthrough per pod is supported,
> so your pod resource limit must be: `nvidia.com/pgpu: "1"` (on a system
> with multiple GPUs, you can thus pass through one GPU per pod).

View File

@@ -1,25 +1,10 @@
# Using NVIDIA GPU device with Kata Containers
This page gives an overview on the different modes in which GPUs can be passed
to a Kata Containers container, provides host system requirements, explains how
Kata Containers guest components can be built to support the NVIDIA GPU
scenario, and gives practical usage examples using `ctr`.
Please see the guide
[Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers](NVIDIA-GPU-passthrough-and-Kata-QEMU.md)
for a documentation of an end-to-end reference implementation of a Kata
Containers stack for GPU passthrough using QEMU, the go-based Kata Runtime,
and an NVIDIA-specific root filesystem. This reference implementation is built
and validated in Kata's CI, and it can be used to test GPU workloads with Kata
components and Kubernetes out of the box.
## Comparison between Passthrough and vGPU Modes
An NVIDIA GPU device can be passed to a Kata Containers container using GPU
passthrough (NVIDIA GPU passthrough mode) as well as GPU mediated passthrough
passthrough (NVIDIA GPU pass-through mode) as well as GPU mediated passthrough
(NVIDIA `vGPU` mode).
NVIDIA GPU passthrough mode, an entire physical GPU is directly assigned to one
NVIDIA GPU pass-through mode, an entire physical GPU is directly assigned to one
VM, bypassing the NVIDIA Virtual GPU Manager. In this mode of operation, the GPU
is accessed exclusively by the NVIDIA driver running in the VM to which it is
assigned. The GPU is not shared among VMs.
@@ -35,20 +20,18 @@ with [MIG-slices](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
| Technology | Description | Behavior | Detail |
| --- | --- | --- | --- |
| NVIDIA GPU passthrough mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
| NVIDIA GPU pass-through mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
| NVIDIA vGPU time-sliced | GPU time-sliced | Physical GPU time-sliced for multiple VMs | Mediated passthrough |
| NVIDIA vGPU MIG-backed | GPU with MIG-slices | Physical GPU MIG-sliced for multiple VMs | Mediated passthrough |
## Host Requirements
## Hardware Requirements
### Hardware
NVIDIA GPUs recommended for virtualization:
NVIDIA GPUs Recommended for Virtualization:
- NVIDIA Tesla (T4, M10, P6, V100 or newer)
- NVIDIA Quadro RTX 6000/8000
### Firmware
## Host BIOS Requirements
Some hardware requires a larger PCI BARs window, for example, NVIDIA Tesla P100,
K40m
@@ -72,7 +55,9 @@ Some hardware vendors use a different name in BIOS, such as:
If one is using a GPU based on the Ampere architecture and later additionally
SR-IOV needs to be enabled for the `vGPU` use-case.
### Kernel
The following steps outline the workflow for using an NVIDIA GPU with Kata.
## Host Kernel Requirements
The following configurations need to be enabled on your host kernel:
@@ -85,13 +70,7 @@ The following configurations need to be enabled on your host kernel:
Your host kernel needs to be booted with `intel_iommu=on` on the kernel command
line.
## Build the Kata Components
This section explains how to build an environment with Kata Containers bits
supporting the GPU scenario. We first deploy and configure the regular Kata
components, then describe how to build the guest kernel and root filesystem.
### Install and configure Kata Containers
## Install and configure Kata Containers
To use non-large BARs devices (for example, NVIDIA Tesla T4), you need Kata
version 1.3.0 or above. Follow the [Kata Containers setup
@@ -122,7 +101,7 @@ hotplug_vfio_on_root_bus = true
pcie_root_port = 1
```
### Build guest kernel with GPU support
## Build Kata Containers kernel with GPU support
The default guest kernel installed with Kata Containers does not provide GPU
support. To use an NVIDIA GPU with Kata Containers, you need to build a kernel
@@ -181,11 +160,11 @@ code, using `Dragonball VMM` for NVIDIA GPU `hot-plug/hot-unplug` requires apply
addition to the above kernel configuration items. Follow these steps to build for NVIDIA GPU `hot-[un]plug`
for `Dragonball`:
```sh
# Prepare .config to support both upcall and nvidia gpu
```sh
# Prepare .config to support both upcall and nvidia gpu
$ ./build-kernel.sh -v 5.10.25 -e -t dragonball -g nvidia -f setup
# Build guest kernel to support both upcall and nvidia gpu
# Build guest kernel to support both upcall and nvidia gpu
$ ./build-kernel.sh -v 5.10.25 -e -t dragonball -g nvidia build
# Install guest kernel to support both upcall and nvidia gpu
@@ -217,7 +196,303 @@ Before using the new guest kernel, please update the `kernel` parameters in
kernel = "/usr/share/kata-containers/vmlinuz-nvidia-gpu.container"
```
### Build Guest OS with NVIDIA Driver and Toolkit
## NVIDIA GPU pass-through mode with Kata Containers
Use the following steps to pass an NVIDIA GPU device in pass-through mode with Kata:
1. Find the Bus-Device-Function (BDF) for the GPU device on the host:
```sh
$ sudo lspci -nn -D | grep -i nvidia
0000:d0:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:20b9] (rev a1)
```
> PCI address `0000:d0:00.0` is assigned to the hardware GPU device.
> `10de:20b9` is the device ID of the hardware GPU device.
2. Find the IOMMU group for the GPU device:
```sh
$ BDF="0000:d0:00.0"
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
```
The previous output shows that the GPU belongs to IOMMU group 192. The next
step is to bind the GPU to the VFIO-PCI driver.
```sh
$ BDF="0000:d0:00.0"
$ DEV="/sys/bus/pci/devices/$BDF"
$ echo "vfio-pci" > $DEV/driver_override
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
# To return the device to the standard driver, we simply clear the
# driver_override and reprobe the device, ex:
$ echo > $DEV/preferred_driver
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
```
3. Check the IOMMU group number under `/dev/vfio`:
```sh
$ ls -l /dev/vfio
total 0
crw------- 1 zvonkok zvonkok 243, 0 Mar 18 03:06 192
crw-rw-rw- 1 root root 10, 196 Mar 18 02:27 vfio
```
4. Start a Kata container with the GPU device:
```sh
# You may need to `modprobe vhost-vsock` if you get
# host system doesn't support vsock: stat /dev/vhost-vsock
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch uname -r
```
5. Run `lspci` within the container to verify the GPU device is seen in the list
of the PCI devices. Note the vendor-device id of the GPU (`10de:20b9`) in the `lspci` output.
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -nn | grep '10de:20b9'"
```
6. Additionally, you can check the PCI BARs space of the NVIDIA GPU device in the container:
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -s 02:00.0 -vv | grep Region"
```
> **Note**: If you see a message similar to the above, the BAR space of the NVIDIA
> GPU has been successfully allocated.
## NVIDIA vGPU mode with Kata Containers
NVIDIA vGPU is a licensed product on all supported GPU boards. A software license
is required to enable all vGPU features within the guest VM. NVIDIA vGPU manager
needs to be installed on the host to configure GPUs in vGPU mode. See [NVIDIA Virtual GPU Software Documentation v14.0 through 14.1](https://docs.nvidia.com/grid/14.0/) for more details.
### NVIDIA vGPU time-sliced
In the time-sliced mode, the GPU is not partitioned and the workload uses the
whole GPU and shares access to the GPU engines. Processes are scheduled in
series. The best effort scheduler is the default one and can be exchanged by
other scheduling policies see the documentation above how to do that.
Beware if you had `MIG` enabled before to disable `MIG` on the GPU if you want
to use `time-sliced` `vGPU`.
```sh
$ sudo nvidia-smi -mig 0
```
Enable the virtual functions for the physical GPU in the `sysfs` file system.
```sh
$ sudo /usr/lib/nvidia/sriov-manage -e 0000:41:00.0
```
Get the `BDF` of the available virtual function on the GPU, and choose one for the
following steps.
```sh
$ cd /sys/bus/pci/devices/0000:41:00.0/
$ ls -l | grep virtfn
```
#### List all available vGPU instances
The following shell snippet will walk the `sysfs` and only print instances
that are available, that can be created.
```sh
# The 00.0 is often the PF of the device the VFs will have the funciont in the
# BDF incremented by some values so e.g. the very first VF is 0000:41:00.4
cd /sys/bus/pci/devices/0000:41:00.0/
for vf in $(ls -d virtfn*)
do
BDF=$(basename $(readlink -f $vf))
for md in $(ls -d $vf/mdev_supported_types/*)
do
AVAIL=$(cat $md/available_instances)
NAME=$(cat $md/name)
DIR=$(basename $md)
if [ $AVAIL -gt 0 ]; then
echo "| BDF | INSTANCES | NAME | DIR |"
echo "+--------------+-----------+----------------+------------+"
printf "| %12s |%10d |%15s | %10s |\n\n" "$BDF" "$AVAIL" "$NAME" "$DIR"
fi
done
done
```
If there are available instances you get something like this (for the first VF),
beware that the output is highly dependent on the GPU you have, if there is no
output check again if `MIG` is really disabled.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-4C | nvidia-692 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-8C | nvidia-693 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-10C | nvidia-694 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-16C | nvidia-695 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-20C | nvidia-696 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-40C | nvidia-697 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-80C | nvidia-698 |
```
Change to the `mdev_supported_types` directory for the virtual function on which
you want to create the `vGPU`. Taking the first output as an example:
```sh
$ cd virtfn0/mdev_supported_types/nvidia-692
$ UUIDGEN=$(uuidgen)
$ sudo bash -c "echo $UUIDGEN > create"
```
Confirm that the `vGPU` was created. You should see the `UUID` pointing to a
subdirectory of the `sysfs` space.
```sh
$ ls -l /sys/bus/mdev/devices/
```
Get the `IOMMU` group number and verify there is a `VFIO` device created to use
with Kata.
```sh
$ ls -l /sys/bus/mdev/devices/*/
$ ls -l /dev/vfio
```
Use the `VFIO` device created in the same way as in the pass-through use-case.
Beware that the guest needs the NVIDIA guest drivers, so one would need to build
a new guest `OS` image.
### NVIDIA vGPU MIG-backed
We're not going into detail what `MIG` is but briefly it is a technology to
partition the hardware into independent instances with guaranteed quality of
service. For more details see [NVIDIA Multi-Instance GPU User Guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
First enable `MIG` mode for a GPU, depending on the platform you're running
a reboot would be necessary. Some platforms support GPU reset.
```sh
$ sudo nvidia-smi -mig 1
```
If the platform supports a GPU reset one can run, otherwise you will get a
warning to reboot the server.
```sh
$ sudo nvidia-smi --gpu-reset
```
The driver per default provides a number of profiles that users can opt-in when
configuring the MIG feature.
```sh
$ sudo nvidia-smi mig -lgip
+-----------------------------------------------------------------------------+
| GPU instance profiles: |
| GPU Name ID Instances Memory P2P SM DEC ENC |
| Free/Total GiB CE JPEG OFA |
|=============================================================================|
| 0 MIG 1g.10gb 19 7/7 9.50 No 14 0 0 |
| 1 0 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 1g.10gb+me 20 1/1 9.50 No 14 1 0 |
| 1 1 1 |
+-----------------------------------------------------------------------------+
| 0 MIG 2g.20gb 14 3/3 19.50 No 28 1 0 |
| 2 0 0 |
+-----------------------------------------------------------------------------+
...
```
Create the GPU instances that correspond to the `vGPU` types of the `MIG-backed`
`vGPUs` that you will create [NVIDIA A100 PCIe 80GB Virtual GPU Types](https://docs.nvidia.com/grid/13.0/grid-vgpu-user-guide/index.html#vgpu-types-nvidia-a100-pcie-80gb).
```sh
# MIG 1g.10gb --> vGPU A100D-1-10C
$ sudo nvidia-smi mig -cgi 19
```
List the GPU instances and get the GPU instance id to create the compute
instance.
```sh
$ sudo nvidia-smi mig -lgi # list the created GPU instances
$ sudo nvidia-smi mig -cci -gi 9 # each GPU instance can have several compute
# instances. Instance -> Workload
```
Verify that the compute instances were created within the GPU instance
```sh
$ nvidia-smi
... snip ...
+-----------------------------------------------------------------------------+
| MIG devices: |
+------------------+----------------------+-----------+-----------------------+
| GPU GI CI MIG | Memory-Usage | Vol| Shared |
| ID ID Dev | BAR1-Usage | SM Unc| CE ENC DEC OFA JPG|
| | | ECC| |
|==================+======================+===========+=======================|
| 0 9 0 0 | 0MiB / 9728MiB | 14 0 | 1 0 0 0 0 |
| | 0MiB / 4095MiB | | |
+------------------+----------------------+-----------+-----------------------+
... snip ...
```
We can use the [snippet](#list-all-available-vgpu-instances) from before to list
the available `vGPU` instances, this time `MIG-backed`.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.5 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:01.6 | 1 |GRID A100D-1-10C | nvidia-699 |
... snip ...
```
Repeat the steps after the [snippet](#list-all-available-vgpu-instances) listing
to create the corresponding `mdev` device and use the guest `OS` created in the
previous section with `time-sliced` `vGPUs`.
## Install NVIDIA Driver + Toolkit in Kata Containers Guest OS
Consult the [Developer-Guide](https://github.com/kata-containers/kata-containers/blob/main/docs/Developer-Guide.md#create-a-rootfs-image) on how to create a
rootfs base image for a distribution of your choice. This is going to be used as
@@ -308,12 +583,9 @@ Enable the `guest_hook_path` in Kata's `configuration.toml`
guest_hook_path = "/usr/share/oci/hooks"
```
As the last step one can remove the additional packages and files that were added
to the `$ROOTFS_DIR` to keep it as small as possible.
One has built a NVIDIA rootfs, kernel and now we can run any GPU container
without installing the drivers into the container. Check NVIDIA device status
with `nvidia-smi`:
with `nvidia-smi`
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/nvidia/cuda:11.6.0-base-ubuntu20.04" cuda nvidia-smi
@@ -339,309 +611,8 @@ Fri Mar 18 10:36:59 2022
+-----------------------------------------------------------------------------+
```
## Usage Examples with Kata Containers
The following sections give usage examples for this based on the different modes.
### NVIDIA GPU passthrough mode
Use the following steps to pass an NVIDIA GPU device in passthrough mode with Kata:
1. Find the Bus-Device-Function (BDF) for the GPU device on the host:
```sh
$ sudo lspci -nn -D | grep -i nvidia
0000:d0:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:20b9] (rev a1)
```
> PCI address `0000:d0:00.0` is assigned to the hardware GPU device.
> `10de:20b9` is the device ID of the hardware GPU device.
2. Find the IOMMU group for the GPU device:
```sh
$ BDF="0000:d0:00.0"
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
```
The previous output shows that the GPU belongs to IOMMU group 192. The next
step is to bind the GPU to the VFIO-PCI driver.
```sh
$ BDF="0000:d0:00.0"
$ DEV="/sys/bus/pci/devices/$BDF"
$ echo "vfio-pci" > $DEV/driver_override
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
# To return the device to the standard driver, we simply clear the
# driver_override and reprobe the device, ex:
$ echo > $DEV/preferred_driver
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
```
3. Check the IOMMU group number under `/dev/vfio`:
```sh
$ ls -l /dev/vfio
total 0
crw------- 1 zvonkok zvonkok 243, 0 Mar 18 03:06 192
crw-rw-rw- 1 root root 10, 196 Mar 18 02:27 vfio
```
4. Start a Kata container with the GPU device:
```sh
# You may need to `modprobe vhost-vsock` if you get
# host system doesn't support vsock: stat /dev/vhost-vsock
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch uname -r
```
5. Run `lspci` within the container to verify the GPU device is seen in the list
of the PCI devices. Note the vendor-device id of the GPU (`10de:20b9`) in the `lspci` output.
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -nn | grep '10de:20b9'"
```
6. Additionally, you can check the PCI BARs space of the NVIDIA GPU device in the container:
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -s 02:00.0 -vv | grep Region"
```
> **Note**: If you see a message similar to the above, the BAR space of the NVIDIA
> GPU has been successfully allocated.
### NVIDIA vGPU mode
NVIDIA vGPU is a licensed product on all supported GPU boards. A software license
is required to enable all vGPU features within the guest VM. NVIDIA vGPU manager
needs to be installed on the host to configure GPUs in vGPU mode. See
[NVIDIA Virtual GPU Software Documentation v14.0 through 14.1](https://docs.nvidia.com/grid/14.0/)
for more details.
#### NVIDIA vGPU time-sliced
In the time-sliced mode, the GPU is not partitioned and the workload uses the
whole GPU and shares access to the GPU engines. Processes are scheduled in
series. The best effort scheduler is the default one and can be exchanged by
other scheduling policies see the documentation above how to do that.
Beware if you had `MIG` enabled before to disable `MIG` on the GPU if you want
to use `time-sliced` `vGPU`.
```sh
$ sudo nvidia-smi -mig 0
```
Enable the virtual functions for the physical GPU in the `sysfs` file system.
```sh
$ sudo /usr/lib/nvidia/sriov-manage -e 0000:41:00.0
```
Get the `BDF` of the available virtual function on the GPU, and choose one for the
following steps.
```sh
$ cd /sys/bus/pci/devices/0000:41:00.0/
$ ls -l | grep virtfn
```
##### List all available vGPU instances
The following shell snippet will walk the `sysfs` and only print instances
that are available, that can be created.
```sh
# The 00.0 is often the PF of the device. The VFs will have the function in the
# BDF incremented by some values so e.g. the very first VF is 0000:41:00.4
cd /sys/bus/pci/devices/0000:41:00.0/
for vf in $(ls -d virtfn*)
do
BDF=$(basename $(readlink -f $vf))
for md in $(ls -d $vf/mdev_supported_types/*)
do
AVAIL=$(cat $md/available_instances)
NAME=$(cat $md/name)
DIR=$(basename $md)
if [ $AVAIL -gt 0 ]; then
echo "| BDF | INSTANCES | NAME | DIR |"
echo "+--------------+-----------+----------------+------------+"
printf "| %12s |%10d |%15s | %10s |\n\n" "$BDF" "$AVAIL" "$NAME" "$DIR"
fi
done
done
```
If there are available instances you get something like this (for the first VF),
beware that the output is highly dependent on the GPU you have, if there is no
output check again if `MIG` is really disabled.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-4C | nvidia-692 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-8C | nvidia-693 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-10C | nvidia-694 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-16C | nvidia-695 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-20C | nvidia-696 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-40C | nvidia-697 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-80C | nvidia-698 |
```
Change to the `mdev_supported_types` directory for the virtual function on which
you want to create the `vGPU`. Taking the first output as an example:
```sh
$ cd virtfn0/mdev_supported_types/nvidia-692
$ UUIDGEN=$(uuidgen)
$ sudo bash -c "echo $UUIDGEN > create"
```
Confirm that the `vGPU` was created. You should see the `UUID` pointing to a
subdirectory of the `sysfs` space.
```sh
$ ls -l /sys/bus/mdev/devices/
```
Get the `IOMMU` group number and verify there is a `VFIO` device created to use
with Kata.
```sh
$ ls -l /sys/bus/mdev/devices/*/
$ ls -l /dev/vfio
```
Use the `VFIO` device created in the same way as in the passthrough use-case.
Beware that the guest needs the NVIDIA guest drivers, so one would need to build
a new guest `OS` image.
#### NVIDIA vGPU MIG-backed
We're not going into detail what `MIG` is but briefly it is a technology to
partition the hardware into independent instances with guaranteed quality of
service. For more details see
[NVIDIA Multi-Instance GPU User Guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
First enable `MIG` mode for a GPU, depending on the platform you're running
a reboot would be necessary. Some platforms support GPU reset.
```sh
$ sudo nvidia-smi -mig 1
```
If the platform supports a GPU reset one can run, otherwise you will get a
warning to reboot the server.
```sh
$ sudo nvidia-smi --gpu-reset
```
The driver per default provides a number of profiles that users can opt-in when
configuring the MIG feature.
```sh
$ sudo nvidia-smi mig -lgip
+-----------------------------------------------------------------------------+
| GPU instance profiles: |
| GPU Name ID Instances Memory P2P SM DEC ENC |
| Free/Total GiB CE JPEG OFA |
|=============================================================================|
| 0 MIG 1g.10gb 19 7/7 9.50 No 14 0 0 |
| 1 0 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 1g.10gb+me 20 1/1 9.50 No 14 1 0 |
| 1 1 1 |
+-----------------------------------------------------------------------------+
| 0 MIG 2g.20gb 14 3/3 19.50 No 28 1 0 |
| 2 0 0 |
+-----------------------------------------------------------------------------+
...
```
Create the GPU instances that correspond to the `vGPU` types of the `MIG-backed`
`vGPUs` that you will create
[NVIDIA A100 PCIe 80GB Virtual GPU Types](https://docs.nvidia.com/grid/13.0/grid-vgpu-user-guide/index.html#vgpu-types-nvidia-a100-pcie-80gb).
```sh
# MIG 1g.10gb --> vGPU A100D-1-10C
$ sudo nvidia-smi mig -cgi 19
```
List the GPU instances and get the GPU instance id to create the compute
instance.
```sh
$ sudo nvidia-smi mig -lgi # list the created GPU instances
$ sudo nvidia-smi mig -cci -gi 9 # each GPU instance can have several compute
# instances. Instance -> Workload
```
Verify that the compute instances were created within the GPU instance
```sh
$ nvidia-smi
... snip ...
+-----------------------------------------------------------------------------+
| MIG devices: |
+------------------+----------------------+-----------+-----------------------+
| GPU GI CI MIG | Memory-Usage | Vol| Shared |
| ID ID Dev | BAR1-Usage | SM Unc| CE ENC DEC OFA JPG|
| | | ECC| |
|==================+======================+===========+=======================|
| 0 9 0 0 | 0MiB / 9728MiB | 14 0 | 1 0 0 0 0 |
| | 0MiB / 4095MiB | | |
+------------------+----------------------+-----------+-----------------------+
... snip ...
```
We can use the [snippet](#list-all-available-vgpu-instances) from before to list
the available `vGPU` instances, this time `MIG-backed`.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.5 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:01.6 | 1 |GRID A100D-1-10C | nvidia-699 |
... snip ...
```
Repeat the steps after the [snippet](#list-all-available-vgpu-instances) listing
to create the corresponding `mdev` device and use the guest `OS` created in the
previous section with `time-sliced` `vGPUs`.
As the last step one can remove the additional packages and files that were added
to the `$ROOTFS_DIR` to keep it as small as possible.
## References

View File

@@ -1,20 +1,24 @@
# Table of Contents
**Note:**: This guide used to contain an end-to-end flow to build a
custom Kata containers root filesystem with QAT out-of-tree SR-IOV virtual
function driver and run QAT enabled containers. The former is no longer necessary
so the instructions are dropped. If the use-case is still of interest, please file
an issue in either of the QAT Kubernetes specific repos linked below.
# Introduction
Intel® QuickAssist Technology (QAT) provides hardware acceleration
for security (cryptography) and compression. Kata Containers can enable
these acceleration functions for containers using QAT SR-IOV with the
support from [Intel QAT Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)
or [Intel QAT DRA Resource Driver for Kubernetes](https://github.com/intel/intel-resource-drivers-for-kubernetes).
for security (cryptography) and compression. These instructions cover the
steps for the latest [Ubuntu LTS release](https://ubuntu.com/download/desktop)
which already include the QAT host driver. These instructions can be adapted to
any Linux distribution. These instructions guide the user on how to download
the kernel sources, compile kernel driver modules against those sources, and
load them onto the host as well as preparing a specially built Kata Containers
kernel and custom Kata Containers rootfs.
## More Information
* Download kernel sources
* Compile Kata kernel
* Compile kernel driver modules against those sources
* Download rootfs
* Add driver modules to rootfs
* Build rootfs image
## Helpful Links before starting
[Intel® QuickAssist Technology at `01.org`](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html)
@@ -22,6 +26,554 @@ or [Intel QAT DRA Resource Driver for Kubernetes](https://github.com/intel/intel
[Intel Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)
[Intel DRA Resource Driver for Kubernetes](https://github.com/intel/intel-resource-drivers-for-kubernetes)
[Intel® QuickAssist Technology for Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)
## Steps to enable Intel® QAT in Kata Containers
There are some steps to complete only once, some steps to complete with every
reboot, and some steps to complete when the host kernel changes.
## Script variables
The following list of variables must be set before running through the
scripts. These variables refer to locations to store modules and configuration
files on the host and links to the drivers to use. Modify these as
needed to point to updated drivers or different install locations.
### Set environment variables (Every Reboot)
Make sure to check [`01.org`](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html) for
the latest driver.
```bash
$ export QAT_DRIVER_VER=qat1.7.l.4.14.0-00031.tar.gz
$ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER}
$ export QAT_CONF_LOCATION=~/QAT_conf
$ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/main/demo/openssl-qat-engine/Dockerfile
$ export QAT_SRC=~/src/QAT
$ export GOPATH=~/src/go
$ export KATA_KERNEL_LOCATION=~/kata
$ export KATA_ROOTFS_LOCATION=~/kata
```
## Prepare the Ubuntu Host
The host could be a bare metal instance or a virtual machine. If using a
virtual machine, make sure that KVM nesting is enabled. The following
instructions reference an Intel® C62X chipset. Some of the instructions must be
modified if using a different Intel® QAT device. The Intel® QAT chipset can be
identified by executing the following.
### Identify which PCI Bus the Intel® QAT card is on
```bash
$ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
```
### Install necessary packages for Ubuntu
These packages are necessary to compile the Kata kernel, Intel® QAT driver, and to
prepare the rootfs for Kata. [Docker](https://docs.docker.com/engine/install/ubuntu/)
also needs to be installed to be able to build the rootfs. To test that
everything works a Kubernetes pod is started requesting Intel® QAT resources. For the
pass through of the virtual functions the kernel boot parameter needs to have
`INTEL_IOMMU=on`.
```bash
$ sudo apt update
$ sudo apt install -y golang-go build-essential python pkg-config zlib1g-dev libudev-dev bison libelf-dev flex libtool automake autotools-dev autoconf bc libpixman-1-dev coreutils libssl-dev
$ sudo sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=""/GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"/' /etc/default/grub
$ sudo update-grub
$ sudo reboot
```
### Download Intel® QAT drivers
This will download the [Intel® QAT drivers](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html).
Make sure to check the website for the latest version.
```bash
$ mkdir -p $QAT_SRC
$ cd $QAT_SRC
$ curl -L $QAT_DRIVER_URL | tar zx
```
### Copy Intel® QAT configuration files and enable virtual functions
Modify the instructions below as necessary if using a different Intel® QAT hardware
platform. You can learn more about customizing configuration files at the
[Intel® QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
This section starts from a base config file and changes the `SSL` section to
`SHIM` to support the OpenSSL engine. There are more tweaks that you can make
depending on the use case and how many Intel® QAT engines should be run. You
can find more information about how to customize in the
[Intel® QuickAssist Technology Software for Linux* - Programmer's Guide.](https://www.intel.com/content/www/us/en/content-details/709196/intel-quickassist-technology-api-programmer-s-guide.html)
> **Note: This section assumes that a Intel® QAT `c6xx` platform is used.**
```bash
$ mkdir -p $QAT_CONF_LOCATION
$ cp $QAT_SRC/quickassist/utilities/adf_ctl/conf_files/c6xxvf_dev0.conf.vm $QAT_CONF_LOCATION/c6xxvf_dev0.conf
$ sed -i 's/\[SSL\]/\[SHIM\]/g' $QAT_CONF_LOCATION/c6xxvf_dev0.conf
```
### Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)
To enable virtual functions, the host OS should have IOMMU groups enabled. In
the UEFI Firmware Intel® Virtualization Technology for Directed I/O
(Intel® VT-d) must be enabled. Also, the kernel boot parameter should be
`intel_iommu=on` or `intel_iommu=ifgx_off`. This should have been set from
the instructions above. Check the output of `/proc/cmdline` to confirm. The
following commands assume you installed an Intel® QAT card, IOMMU is on, and
VT-d is enabled. The vendor and device ID add to the `VFIO-PCI` driver so that
each exposed virtual function can be bound to the `VFIO-PCI` driver. Once
complete, each virtual function passes into a Kata Containers container using
the PCIe device passthrough feature. For Kubernetes, the
[Intel device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes)
for Kubernetes handles the binding of the driver, but the VFs still must be
enabled.
```bash
$ sudo modprobe vfio-pci
$ QAT_PCI_BUS_PF_NUMBERS=$((lspci -d :435 && lspci -d :37c8 && lspci -d :19e2 && lspci -d :6f54) | cut -d ' ' -f 1)
$ QAT_PCI_BUS_PF_1=$(echo $QAT_PCI_BUS_PF_NUMBERS | cut -d ' ' -f 1)
$ echo 16 | sudo tee /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/sriov_numvfs
$ QAT_PCI_ID_VF=$(cat /sys/bus/pci/devices/0000:${QAT_PCI_BUS_PF_1}/virtfn0/uevent | grep PCI_ID)
$ QAT_VENDOR_AND_ID_VF=$(echo ${QAT_PCI_ID_VF/PCI_ID=} | sed 's/:/ /')
$ echo $QAT_VENDOR_AND_ID_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/new_id
```
Loop through all the virtual functions and bind to the VFIO driver
```bash
$ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
do QAT_PCI_BUS_VF=$(basename $(readlink $f))
echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/c6xxvf/unbind
echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/bind
done
```
### Check Intel® QAT virtual functions are enabled
If the following command returns empty, then the virtual functions are not
properly enabled. This command checks the enumerated device IDs for just the
virtual functions. Using the Intel® QAT as an example, the physical device ID
is `37c8` and virtual function device ID is `37c9`. The following command checks
if VF's are enabled for any of the currently known Intel® QAT device ID's. The
following `ls` command should show the 16 VF's bound to `VFIO-PCI`.
```bash
$ for i in 0442 0443 37c9 19e3; do lspci -d 8086:$i; done
```
Another way to check is to see what PCI devices that `VFIO-PCI` is mapped to.
It should match the device ID's of the VF's.
```bash
$ ls -la /sys/bus/pci/drivers/vfio-pci
```
## Prepare Kata Containers
### Download Kata kernel Source
This example automatically uses the latest Kata kernel supported by Kata. It
follows the instructions from the
[packaging kernel repository](../../tools/packaging/kernel)
and uses the latest Kata kernel
[config](../../tools/packaging/kernel/configs).
There are some patches that must be installed as well, which the
`build-kernel.sh` script should automatically apply. If you are using a
different kernel version, then you might need to manually apply them. Since
the Kata Containers kernel has a minimal set of kernel flags set, you must
create a Intel® QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
Update the config to set some of the `CRYPTO` flags to enabled. This might
change with different kernel versions. The following instructions were tested
with kernel `v5.4.0-64-generic`.
```bash
$ mkdir -p $GOPATH
$ cd $GOPATH
$ go get -v github.com/kata-containers/kata-containers
$ cat << EOF > $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/configs/fragments/common/qat.conf
CONFIG_PCIEAER=y
CONFIG_UIO=y
CONFIG_CRYPTO_HW=y
CONFIG_CRYPTO_DEV_QAT_C62XVF=m
CONFIG_CRYPTO_CBC=y
CONFIG_MODULES=y
CONFIG_MODULE_SIG=y
CONFIG_CRYPTO_AUTHENC=y
CONFIG_CRYPTO_DH=y
EOF
$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh setup
```
### Build Kata kernel
```bash
$ cd $GOPATH
$ export LINUX_VER=$(ls -d kata-linux-*)
$ sed -i 's/EXTRAVERSION =/EXTRAVERSION = .qat.container/' $LINUX_VER/Makefile
$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh build
```
### Copy Kata kernel
```bash
$ export KATA_KERNEL_NAME=vmlinux-${LINUX_VER}_qat
$ mkdir -p $KATA_KERNEL_LOCATION
$ cp ${GOPATH}/${LINUX_VER}/vmlinux ${KATA_KERNEL_LOCATION}/${KATA_KERNEL_NAME}
```
### Prepare Kata root filesystem
These instructions build upon the OS builder instructions located in the
[Developer Guide](../Developer-Guide.md). At this point it is recommended that
[Docker](https://docs.docker.com/engine/install/ubuntu/) is installed first, and
then [Kata-deploy](../../tools/packaging/kata-deploy)
is use to install Kata. This will make sure that the correct `agent` version
is installed into the rootfs in the steps below.
The following instructions use Ubuntu as the root filesystem with systemd as
the init and will add in the `kmod` binary, which is not a standard binary in
a Kata rootfs image. The `kmod` binary is necessary to load the Intel® QAT
kernel modules when the virtual machine rootfs boots.
```bash
$ export OSBUILDER=$GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder
$ export ROOTFS_DIR=${OSBUILDER}/rootfs-builder/rootfs
$ export EXTRA_PKGS='kmod'
```
Make sure that the `kata-agent` version matches the installed `kata-runtime`
version. Also make sure the `kata-runtime` install location is in your `PATH`
variable. The following `AGENT_VERSION` can be set manually to match
the `kata-runtime` version if the following commands don't work.
```bash
$ export PATH=$PATH:/opt/kata/bin
$ cd $GOPATH
$ export AGENT_VERSION=$(kata-runtime version | head -n 1 | grep -o "[0-9.]\+")
$ cd ${OSBUILDER}/rootfs-builder
$ sudo rm -rf ${ROOTFS_DIR}
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh ubuntu'
```
### Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs
After the Kata Containers kernel builds with the proper configuration flags,
you must build the Intel® QAT drivers against that Kata Containers kernel
version in a similar way they were previously built for the host OS. You must
set the `KERNEL_SOURCE_ROOT` variable to the Kata Containers kernel source
directory and build the Intel® QAT drivers again. The `make` command will
install the Intel® QAT modules into the Kata rootfs.
```bash
$ cd $GOPATH
$ export LINUX_VER=$(ls -d kata*)
$ export KERNEL_MAJOR_VERSION=$(awk '/^VERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_PATHLEVEL=$(awk '/^PATCHLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_SUBLEVEL=$(awk '/^SUBLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_EXTRAVERSION=$(awk '/^EXTRAVERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_ROOTFS_DIR=${KERNEL_MAJOR_VERSION}.${KERNEL_PATHLEVEL}.${KERNEL_SUBLEVEL}${KERNEL_EXTRAVERSION}
$ cd $QAT_SRC
$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --enable-icp-sriov=guest
$ sudo -E make all -j $(nproc)
$ sudo -E make INSTALL_MOD_PATH=$ROOTFS_DIR qat-driver-install -j $(nproc)
```
The `usdm_drv` module also needs to be copied into the rootfs modules path and
`depmod` should be run.
```bash
$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers
$ sudo depmod -a -b ${ROOTFS_DIR} ${KERNEL_ROOTFS_DIR}
$ cd ${OSBUILDER}/image-builder
$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
```
> **Note: Ignore any errors on modules.builtin and modules.order when running
> `depmod`.**
### Copy Kata rootfs
```bash
$ mkdir -p $KATA_ROOTFS_LOCATION
$ cp ${OSBUILDER}/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
```
## Verify Intel® QAT works in a container
The following instructions uses a OpenSSL Dockerfile that builds the
Intel® QAT engine to allow OpenSSL to offload crypto functions. It is a
convenient way to test that VFIO device passthrough for the Intel® QAT VFs are
working properly with the Kata Containers VM.
### Build OpenSSL Intel® QAT engine container
Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/demo/openssl-qat-engine)
to build a container image with an optimized OpenSSL engine for
Intel® QAT. Using `docker build` with the Kata Containers runtime can sometimes
have issues. Therefore, make sure that `runc` is the default Docker container
runtime.
```bash
$ cd $QAT_SRC
$ curl -O $QAT_DOCKERFILE
$ sudo docker build -t openssl-qat-engine .
```
> **Note: The Intel® QAT driver version in this container might not match the
> Intel® QAT driver compiled and loaded on the host when compiling.**
### Test Intel® QAT with the ctr tool
The `ctr` tool can be used to interact with the containerd daemon. It may be
more convenient to use this tool to verify the kernel and image instead of
setting up a Kubernetes cluster. The correct Kata runtimes need to be added
to the containerd `config.toml`. Below is a sample snippet that can be added
to allow QEMU and Cloud Hypervisor (CLH) to work with `ctr`.
```
[plugins.cri.containerd.runtimes.kata-qemu]
runtime_type = "io.containerd.kata-qemu.v2"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
[plugins.cri.containerd.runtimes.kata-qemu.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
[plugins.cri.containerd.runtimes.kata-clh]
runtime_type = "io.containerd.kata-clh.v2"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
[plugins.cri.containerd.runtimes.kata-clh.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-clh.toml"
```
In addition, containerd expects the binary to be in `/usr/local/bin` so add
this small script so that it redirects to be able to use either QEMU or
Cloud Hypervisor with Kata.
```bash
$ echo '#!/usr/bin/env bash' | sudo tee /usr/local/bin/containerd-shim-kata-qemu-v2
$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-qemu.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-qemu-v2
$ sudo chmod +x /usr/local/bin/containerd-shim-kata-qemu-v2
$ echo '#!/usr/bin/env bash' | sudo tee /usr/local/bin/containerd-shim-kata-clh-v2
$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-clh.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-clh-v2
$ sudo chmod +x /usr/local/bin/containerd-shim-kata-clh-v2
```
After the OpenSSL image is built and imported into containerd, a Intel® QAT
virtual function exposed in the step above can be added to the `ctr` command.
Make sure to change the `/dev/vfio` number to one that actually exists on the
host system. When using the `ctr` tool, the`configuration.toml` for Kata needs
to point to the custom Kata kernel and rootfs built above and the Intel® QAT
modules in the Kata rootfs need to load at boot. The following steps assume that
`kata-deploy` was used to install Kata and QEMU is being tested. If using a
different hypervisor, different install method for Kata, or a different
Intel® QAT chipset then the command will need to be modified.
> **Note: The following was tested with
[containerd v1.4.6](https://github.com/containerd/containerd/releases/tag/v1.4.6).**
```bash
$ config_file="/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
$ sudo sed -i "/kernel =/c kernel = "\"${KATA_ROOTFS_LOCATION}/${KATA_KERNEL_NAME}\""" $config_file
$ sudo sed -i "/image =/c image = "\"${KATA_KERNEL_LOCATION}/kata-containers.img\""" $config_file
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 modules-load=usdm_drv,qat_c62xvf"/g' $config_file
$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
$ sudo ctr images import openssl-qat-engine.tar
$ sudo ctr run --runtime io.containerd.run.kata-qemu.v2 --privileged -t --rm --device=/dev/vfio/180 --mount type=bind,src=/dev,dst=/dev,options=rbind:rw --mount type=bind,src=${QAT_CONF_LOCATION}/c6xxvf_dev0.conf,dst=/etc/c6xxvf_dev0.conf,options=rbind:rw docker.io/library/openssl-qat-engine:latest bash
```
Below are some commands to run in the container image to verify Intel® QAT is
working
```sh
root@67561dc2757a/ # cat /proc/modules
qat_c62xvf 16384 - - Live 0xffffffffc00d9000 (OE)
usdm_drv 86016 - - Live 0xffffffffc00e8000 (OE)
intel_qat 249856 - - Live 0xffffffffc009b000 (OE)
root@67561dc2757a/ # adf_ctl restart
Restarting all devices.
Processing /etc/c6xxvf_dev0.conf
root@67561dc2757a/ # adf_ctl status
Checking status of all devices.
There is 1 QAT acceleration device(s) in the system:
qat_dev0 - type: c6xxvf, inst_id: 0, node_id: 0, bsf: 0000:01:01.0, #accel: 1 #engines: 1 state: up
root@67561dc2757a/ # openssl engine -c -t qat-hw
(qat-hw) Reference implementation of QAT crypto engine v0.6.1
[RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
[ available ]
```
### Test Intel® QAT in Kubernetes
Start a Kubernetes cluster with containerd as the CRI. The host should
already be setup with 16 virtual functions of the Intel® QAT card bound to
`VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing of devices.
You might need to disable Docker before initializing Kubernetes. Be aware
that the OpenSSL container image built above will need to be exported from
Docker and imported into containerd.
If Kata is installed through [`kata-deploy`](../../tools/packaging/kata-deploy/README.md)
there will be multiple `configuration.toml` files associated with different
hypervisors. Rather than add in the custom Kata kernel, Kata rootfs, and
kernel modules to each `configuration.toml` as the default, instead use
[annotations](../how-to/how-to-load-kernel-modules-with-kata.md)
in the Kubernetes YAML file to tell Kata which kernel and rootfs to use. The
easy way to do this is to use `kata-deploy` which will install the Kata binaries
to `/opt` and properly configure the `/etc/containerd/config.toml` with annotation
support. However, the `configuration.toml` needs to enable support for
annotations as well. The following configures both QEMU and Cloud Hypervisor
`configuration.toml` files that are currently available with Kata Container
versions 2.0 and higher.
```bash
$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-qemu.toml
$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-clh.toml
```
Export the OpenSSL image from Docker and import into containerd.
```bash
$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
$ sudo ctr -n=k8s.io images import openssl-qat-engine.tar
```
The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/main/cmd/qat_plugin/README.md)
needs to be started so that the virtual functions can be discovered and
used by Kubernetes.
The following YAML file can be used to start a Kata container with Intel® QAT
support. If Kata is installed with `kata-deploy`, then the containerd
`configuration.toml` should have all of the Kata runtime classes already
populated and annotations supported. To use a Intel® QAT virtual function, the
Intel® QAT plugin needs to be started after the VF's are bound to `VFIO-PCI` as
described [above](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot).
Edit the following to point to the correct Kata kernel and rootfs location
built with Intel® QAT support.
```bash
$ cat << EOF > kata-openssl-qat.yaml
apiVersion: v1
kind: Pod
metadata:
name: kata-openssl-qat
labels:
app: kata-openssl-qat
annotations:
io.katacontainers.config.hypervisor.kernel: "$KATA_KERNEL_LOCATION/$KATA_KERNEL_NAME"
io.katacontainers.config.hypervisor.image: "$KATA_ROOTFS_LOCATION/kata-containers.img"
io.katacontainers.config.hypervisor.kernel_params: "modules-load=usdm_drv,qat_c62xvf"
spec:
runtimeClassName: kata-qemu
containers:
- name: kata-openssl-qat
image: docker.io/library/openssl-qat-engine:latest
imagePullPolicy: IfNotPresent
resources:
limits:
qat.intel.com/generic: 1
cpu: 1
securityContext:
capabilities:
add: ["IPC_LOCK", "SYS_ADMIN"]
volumeMounts:
- mountPath: /etc/c6xxvf_dev0.conf
name: etc-mount
- mountPath: /dev
name: dev-mount
volumes:
- name: dev-mount
hostPath:
path: /dev
- name: etc-mount
hostPath:
path: $QAT_CONF_LOCATION/c6xxvf_dev0.conf
EOF
```
Use `kubectl` to start the pod. Verify that Intel® QAT card acceleration is
working with the Intel® QAT engine.
```bash
$ kubectl apply -f kata-openssl-qat.yaml
```
```sh
$ kubectl exec -it kata-openssl-qat -- adf_ctl restart
Restarting all devices.
Processing /etc/c6xxvf_dev0.conf
$ kubectl exec -it kata-openssl-qat -- adf_ctl status
Checking status of all devices.
There is 1 QAT acceleration device(s) in the system:
qat_dev0 - type: c6xxvf, inst_id: 0, node_id: 0, bsf: 0000:01:01.0, #accel: 1 #engines: 1 state: up
$ kubectl exec -it kata-openssl-qat -- openssl engine -c -t qat-hw
(qat-hw) Reference implementation of QAT crypto engine v0.6.1
[RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
[ available ]
```
### Troubleshooting
* Check that `/dev/vfio` has VFs enabled.
```sh
$ ls /dev/vfio
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 vfio
```
* Check that the modules load when inside the Kata Container.
```sh
bash-5.0# grep -E "qat|usdm_drv" /proc/modules
qat_c62xvf 16384 - - Live 0x0000000000000000 (O)
usdm_drv 86016 - - Live 0x0000000000000000 (O)
intel_qat 184320 - - Live 0x0000000000000000 (O)
```
* Verify that at least the first `c6xxvf_dev0.conf` file mounts inside the
container image in `/etc`. You will need one configuration file for each VF
passed into the container.
```sh
bash-5.0# ls /etc
c6xxvf_dev0.conf c6xxvf_dev11.conf c6xxvf_dev14.conf c6xxvf_dev3.conf c6xxvf_dev6.conf c6xxvf_dev9.conf resolv.conf
c6xxvf_dev1.conf c6xxvf_dev12.conf c6xxvf_dev15.conf c6xxvf_dev4.conf c6xxvf_dev7.conf hostname
c6xxvf_dev10.conf c6xxvf_dev13.conf c6xxvf_dev2.conf c6xxvf_dev5.conf c6xxvf_dev8.conf hosts
```
* Check `dmesg` inside the container to see if there are any issues with the
Intel® QAT driver.
* If there are issues building the OpenSSL Intel® QAT container image, then
check to make sure that runc is the default runtime for building container.
```sh
$ cat /etc/systemd/system/docker.service.d/50-runtime.conf
[Service]
Environment="DOCKER_DEFAULT_RUNTIME=--default-runtime runc"
```
## Optional Scripts
### Verify Intel® QAT card counters are incremented
To check the built in firmware counters, the Intel® QAT driver has to be compiled
and installed to the host and can't rely on the built in host driver. The
counters will increase when the accelerator is actively being used. To verify
Intel® QAT is actively accelerating the containerized application, use the
following instructions to check if any of the counters increment. Make
sure to change the PCI Device ID to match whats in the system.
```bash
$ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b1\:00.0/fw_counters
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b3\:00.0/fw_counters
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b5\:00.0/fw_counters
```

View File

@@ -1,3 +1,3 @@
[toolchain]
# Keep in sync with versions.yaml
channel = "1.89"
channel = "1.85.1"

103
src/agent/Cargo.lock generated
View File

@@ -459,9 +459,15 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [
"bit-vec",
"bit-vec 0.8.0",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bit-vec"
version = "0.8.0"
@@ -659,6 +665,30 @@ dependencies = [
"shlex",
]
[[package]]
name = "cdi"
version = "0.1.0"
source = "git+https://github.com/cncf-tags/container-device-interface-rs?rev=3b1e83dda5efcc83c7a4f134466ec006b37109c9#3b1e83dda5efcc83c7a4f134466ec006b37109c9"
dependencies = [
"anyhow",
"clap",
"const_format",
"jsonschema",
"lazy_static",
"libc",
"nix 0.24.3",
"notify",
"oci-spec",
"once_cell",
"path-clean",
"regex",
"semver",
"serde",
"serde_derive",
"serde_json",
"serde_yaml",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
@@ -778,31 +808,6 @@ dependencies = [
"unicode-xid",
]
[[package]]
name = "container-device-interface"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2605001b0e8214dae8af146a43ccaa965d960403e330f174c21327154530df8b"
dependencies = [
"anyhow",
"clap",
"const_format",
"jsonschema",
"lazy_static",
"libc",
"nix 0.24.3",
"notify",
"oci-spec",
"once_cell",
"path-clean",
"regex",
"semver",
"serde",
"serde_derive",
"serde_json",
"serde_yaml",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
@@ -1207,9 +1212,9 @@ dependencies = [
[[package]]
name = "fancy-regex"
version = "0.16.2"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "998b056554fbe42e03ae0e152895cd1a7e1002aec800fdc6635d20270260c46f"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
dependencies = [
"bit-set",
"regex-automata 0.4.9",
@@ -1244,6 +1249,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece"
dependencies = [
"crc32fast",
"libz-sys",
"miniz_oxide",
]
@@ -2007,9 +2013,9 @@ dependencies = [
[[package]]
name = "jsonschema"
version = "0.33.0"
version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d46662859bc5f60a145b75f4632fbadc84e829e45df6c5de74cfc8e05acb96b5"
checksum = "f1b46a0365a611fbf1d2143104dcf910aada96fafd295bab16c60b802bf6fa1d"
dependencies = [
"ahash 0.8.12",
"base64 0.22.1",
@@ -2043,11 +2049,11 @@ dependencies = [
"async-trait",
"base64 0.22.1",
"capctl",
"cdi",
"cfg-if",
"cgroups-rs",
"clap",
"const_format",
"container-device-interface",
"derivative",
"futures",
"ipnetwork",
@@ -2058,7 +2064,7 @@ dependencies = [
"libc",
"log",
"logging",
"mem-agent",
"mem-agent-lib",
"netlink-packet-core",
"netlink-packet-route",
"netlink-sys 0.7.0",
@@ -2259,6 +2265,17 @@ dependencies = [
"uuid 0.8.2",
]
[[package]]
name = "libz-sys"
version = "1.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d"
dependencies = [
"cc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "linux-raw-sys"
version = "0.3.8"
@@ -2333,7 +2350,7 @@ dependencies = [
]
[[package]]
name = "mem-agent"
name = "mem-agent-lib"
version = "0.2.0"
dependencies = [
"anyhow",
@@ -3405,9 +3422,9 @@ dependencies = [
[[package]]
name = "referencing"
version = "0.33.0"
version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e9c261f7ce75418b3beadfb3f0eb1299fe8eb9640deba45ffa2cb783098697d"
checksum = "c8eff4fa778b5c2a57e85c5f2fe3a709c52f0e60d23146e2151cbef5893f420e"
dependencies = [
"ahash 0.8.12",
"fluent-uri 0.3.2",
@@ -3701,7 +3718,7 @@ dependencies = [
"anyhow",
"async-trait",
"awaitgroup",
"bit-vec",
"bit-vec 0.6.3",
"capctl",
"caps",
"cfg-if",
@@ -4021,9 +4038,12 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
[[package]]
name = "slab"
version = "0.4.11"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
dependencies = [
"autocfg",
]
[[package]]
name = "slash-formatter"
@@ -4305,7 +4325,6 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
name = "test-utils"
version = "0.1.0"
dependencies = [
"libc",
"nix 0.26.4",
]
@@ -4804,6 +4823,12 @@ version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "943ce29a8a743eb10d6082545d861b24f9d1b160b7d741e0f2cdf726bec909c5"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "version_check"
version = "0.9.5"

View File

@@ -13,12 +13,8 @@ lazy_static = "1.3.0"
ttrpc = { version = "0.8.4", features = ["async"], default-features = false }
protobuf = "3.7.2"
libc = "0.2.94"
# Notes:
# - Needs to stay in sync with libs
# - Upgrading to 0.27+ will require code changes (see #11842)
# Notes: nix needs to stay in sync with libs
nix = "0.26.4"
capctl = "0.2.0"
scan_fmt = "0.2.6"
scopeguard = "1.0.0"
@@ -85,10 +81,10 @@ kata-agent-policy = { path = "policy" }
rustjail = { path = "rustjail" }
vsock-exporter = { path = "vsock-exporter" }
mem-agent = { path = "../libs/mem-agent" }
mem-agent = { path = "../mem-agent", package = "mem-agent-lib" }
kata-sys-util = { path = "../libs/kata-sys-util" }
kata-types = { path = "../libs/kata-types", features = ["safe-path"] }
kata-types = { path = "../libs/kata-types" }
# Note: this crate sets the slog 'max_*' features which allows the log level
# to be modified at runtime.
logging = { path = "../libs/logging" }
@@ -167,6 +163,9 @@ clap.workspace = true
strum.workspace = true
strum_macros.workspace = true
# Agent Policy
cdi = { git = "https://github.com/cncf-tags/container-device-interface-rs", rev = "3b1e83dda5efcc83c7a4f134466ec006b37109c9" }
# Local dependencies
kata-agent-policy = { workspace = true, optional = true }
mem-agent.workspace = true
@@ -186,8 +185,6 @@ base64 = "0.22"
sha2 = "0.10.8"
async-compression = { version = "0.4.22", features = ["tokio", "gzip"] }
container-device-interface = "0.1.1"
[target.'cfg(target_arch = "s390x")'.dependencies]
pv_core = { git = "https://github.com/ibm-s390-linux/s390-tools", rev = "4942504a9a2977d49989a5e5b7c1c8e07dc0fa41", package = "s390_pv_core" }
@@ -206,7 +203,6 @@ lto = true
seccomp = ["rustjail/seccomp"]
standard-oci-runtime = ["rustjail/standard-oci-runtime"]
agent-policy = ["kata-agent-policy"]
init-data = []
[[bin]]
name = "kata-agent"

View File

@@ -41,14 +41,6 @@ ifeq ($(AGENT_POLICY),yes)
override EXTRA_RUSTFEATURES += agent-policy
endif
##VAR INIT_DATA=yes|no define if agent enables the init data feature
INIT_DATA ?= yes
# Enable the init data fature of rust build
ifeq ($(INIT_DATA),yes)
override EXTRA_RUSTFEATURES += init-data
endif
include ../../utils.mk
##VAR STANDARD_OCI_RUNTIME=yes|no define if agent enables standard oci runtime feature
@@ -130,7 +122,7 @@ $(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
$(TARGET_PATH): show-summary
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) $(if $(findstring release,$(BUILD_TYPE)),--release) $(EXTRA_RUSTFEATURES)
$(GENERATED_FILES): %: %.in $(VERSION_FILE)
$(GENERATED_FILES): %: %.in
@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
##TARGET optimize: optimized build

Some files were not shown because too many files have changed in this diff Show More