mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-03-06 12:52:07 +00:00
Compare commits
4 Commits
3.26.0
...
sprt/test-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
29e74892f4 | ||
|
|
5c5e2c68bc | ||
|
|
fd4f7638e5 | ||
|
|
a5641e27db |
12
.github/actionlint.yaml
vendored
12
.github/actionlint.yaml
vendored
@@ -7,24 +7,20 @@
|
||||
self-hosted-runner:
|
||||
# Labels of self-hosted runner that linter should ignore
|
||||
labels:
|
||||
- amd64-nvidia-a100
|
||||
- amd64-nvidia-h100-snp
|
||||
- arm64-k8s
|
||||
- ubuntu-22.04-arm
|
||||
- garm-ubuntu-2004
|
||||
- garm-ubuntu-2004-smaller
|
||||
- garm-ubuntu-2204
|
||||
- garm-ubuntu-2304
|
||||
- garm-ubuntu-2304-smaller
|
||||
- garm-ubuntu-2204-smaller
|
||||
- ppc64le
|
||||
- ppc64le-k8s
|
||||
- ppc64le-small
|
||||
- ubuntu-24.04-ppc64le
|
||||
- ubuntu-24.04-s390x
|
||||
- k8s-ppc64le
|
||||
- metrics
|
||||
- ppc64le
|
||||
- riscv-builder
|
||||
- sev-snp
|
||||
- s390x
|
||||
- s390x-large
|
||||
- tdx
|
||||
- ubuntu-24.04-arm
|
||||
- amd64-nvidia-a100
|
||||
|
||||
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
@@ -12,6 +12,7 @@ updates:
|
||||
- "/src/tools/agent-ctl"
|
||||
- "/src/tools/genpolicy"
|
||||
- "/src/tools/kata-ctl"
|
||||
- "/src/tools/runk"
|
||||
- "/src/tools/trace-forwarder"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
@@ -65,9 +66,6 @@ updates:
|
||||
rustix:
|
||||
patterns:
|
||||
- rustix
|
||||
slab:
|
||||
patterns:
|
||||
- slab
|
||||
time:
|
||||
patterns:
|
||||
- time
|
||||
|
||||
11
.github/workflows/actionlint.yaml
vendored
11
.github/workflows/actionlint.yaml
vendored
@@ -2,17 +2,24 @@ name: Lint GHA workflows
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
- edited
|
||||
- reopened
|
||||
- synchronize
|
||||
paths:
|
||||
- '.github/workflows/**'
|
||||
|
||||
permissions: {}
|
||||
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
run-actionlint:
|
||||
name: run-actionlint
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
114
.github/workflows/basic-ci-amd64.yaml
vendored
114
.github/workflows/basic-ci-amd64.yaml
vendored
@@ -17,7 +17,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-containerd-sandboxapi:
|
||||
name: run-containerd-sandboxapi
|
||||
strategy:
|
||||
# We can set this to true whenever we're 100% sure that
|
||||
# the all the tests are not flaky, otherwise we'll fail
|
||||
@@ -66,12 +65,11 @@ jobs:
|
||||
run: bash tests/integration/cri-containerd/gha-run.sh run
|
||||
|
||||
run-containerd-stability:
|
||||
name: run-containerd-stability
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
containerd_version: ['lts', 'active']
|
||||
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu', 'qemu-runtime-rs']
|
||||
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu', 'stratovirt']
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
CONTAINERD_VERSION: ${{ matrix.containerd_version }}
|
||||
@@ -109,7 +107,6 @@ jobs:
|
||||
run: bash tests/stability/gha-run.sh run
|
||||
|
||||
run-nydus:
|
||||
name: run-nydus
|
||||
strategy:
|
||||
# We can set this to true whenever we're 100% sure that
|
||||
# the all the tests are not flaky, otherwise we'll fail
|
||||
@@ -117,7 +114,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
containerd_version: ['lts', 'active']
|
||||
vmm: ['clh', 'qemu', 'dragonball', 'qemu-runtime-rs']
|
||||
vmm: ['clh', 'qemu', 'dragonball', 'stratovirt']
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
CONTAINERD_VERSION: ${{ matrix.containerd_version }}
|
||||
@@ -147,24 +144,49 @@ jobs:
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: get-kata-tools-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-tools-artifacts
|
||||
|
||||
- name: Install kata
|
||||
run: bash tests/integration/nydus/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Install kata-tools
|
||||
run: bash tests/integration/nydus/gha-run.sh install-kata-tools kata-tools-artifacts
|
||||
|
||||
- name: Run nydus tests
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/nydus/gha-run.sh run
|
||||
|
||||
run-runk:
|
||||
# Skip runk tests as we have no maintainers. TODO: Decide when to remove altogether
|
||||
if: false
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
CONTAINERD_VERSION: lts
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/runk/gha-run.sh install-dependencies
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata
|
||||
run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Run runk tests
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/runk/gha-run.sh run
|
||||
|
||||
run-tracing:
|
||||
name: run-tracing
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -209,7 +231,6 @@ jobs:
|
||||
run: bash tests/functional/tracing/gha-run.sh run
|
||||
|
||||
run-vfio:
|
||||
name: run-vfio
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -252,8 +273,53 @@ jobs:
|
||||
timeout-minutes: 15
|
||||
run: bash tests/functional/vfio/gha-run.sh run
|
||||
|
||||
run-docker-tests:
|
||||
strategy:
|
||||
# We can set this to true whenever we're 100% sure that
|
||||
# all the tests are not flaky, otherwise we'll fail them
|
||||
# all due to a single flaky instance.
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- clh
|
||||
- qemu
|
||||
- dragonball
|
||||
- cloud-hypervisor
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/docker/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata
|
||||
run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Run docker smoke test
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/docker/gha-run.sh run
|
||||
|
||||
run-nerdctl-tests:
|
||||
name: run-nerdctl-tests
|
||||
strategy:
|
||||
# We can set this to true whenever we're 100% sure that
|
||||
# all the tests are not flaky, otherwise we'll fail them
|
||||
@@ -265,7 +331,6 @@ jobs:
|
||||
- dragonball
|
||||
- qemu
|
||||
- cloud-hypervisor
|
||||
- qemu-runtime-rs
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
@@ -314,7 +379,6 @@ jobs:
|
||||
retention-days: 1
|
||||
|
||||
run-kata-agent-apis:
|
||||
name: run-kata-agent-apis
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
@@ -340,16 +404,8 @@ jobs:
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: get-kata-tools-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-tools-artifacts
|
||||
|
||||
- name: Install kata & kata-tools
|
||||
run: |
|
||||
bash tests/functional/kata-agent-apis/gha-run.sh install-kata kata-artifacts
|
||||
bash tests/functional/kata-agent-apis/gha-run.sh install-kata-tools kata-tools-artifacts
|
||||
- name: Install kata
|
||||
run: bash tests/functional/kata-agent-apis/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Run kata agent api tests with agent-ctl
|
||||
run: bash tests/functional/kata-agent-apis/gha-run.sh run
|
||||
|
||||
42
.github/workflows/basic-ci-s390x.yaml
vendored
42
.github/workflows/basic-ci-s390x.yaml
vendored
@@ -17,7 +17,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-containerd-sandboxapi:
|
||||
name: run-containerd-sandboxapi
|
||||
strategy:
|
||||
# We can set this to true whenever we're 100% sure that
|
||||
# the all the tests are not flaky, otherwise we'll fail
|
||||
@@ -66,7 +65,6 @@ jobs:
|
||||
run: bash tests/integration/cri-containerd/gha-run.sh run
|
||||
|
||||
run-containerd-stability:
|
||||
name: run-containerd-stability
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -106,3 +104,43 @@ jobs:
|
||||
- name: Run containerd-stability tests
|
||||
timeout-minutes: 15
|
||||
run: bash tests/stability/gha-run.sh run
|
||||
|
||||
run-docker-tests:
|
||||
strategy:
|
||||
# We can set this to true whenever we're 100% sure that
|
||||
# all the tests are not flaky, otherwise we'll fail them
|
||||
# all due to a single flaky instance.
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm: ['qemu']
|
||||
runs-on: s390x-large
|
||||
env:
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/docker/gha-run.sh install-dependencies
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-static-tarball-s390x${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata
|
||||
run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Run docker smoke test
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/docker/gha-run.sh run
|
||||
|
||||
@@ -17,7 +17,6 @@ permissions: {}
|
||||
name: Build checks preview riscv64
|
||||
jobs:
|
||||
check:
|
||||
name: check
|
||||
runs-on: ${{ inputs.instance }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -124,11 +123,9 @@ jobs:
|
||||
echo "GITHUB_RUNNER_CI_NON_VIRT=true" >> "$GITHUB_ENV"
|
||||
- name: Running `${{ matrix.command }}` for ${{ matrix.component.name }}
|
||||
run: |
|
||||
cd "${COMPONENT_PATH}"
|
||||
${COMMAND}
|
||||
cd ${{ matrix.component.path }}
|
||||
${{ matrix.command }}
|
||||
env:
|
||||
COMMAND: ${{ matrix.command }}
|
||||
COMPONENT_PATH: ${{ matrix.component.path }}
|
||||
RUST_BACKTRACE: "1"
|
||||
RUST_LIB_BACKTRACE: "0"
|
||||
SKIP_GO_VERSION_CHECK: "1"
|
||||
|
||||
18
.github/workflows/build-checks.yaml
vendored
18
.github/workflows/build-checks.yaml
vendored
@@ -11,13 +11,7 @@ permissions: {}
|
||||
name: Build checks
|
||||
jobs:
|
||||
check:
|
||||
name: check
|
||||
runs-on: >-
|
||||
${{
|
||||
( contains(inputs.instance, 's390x') && matrix.component.name == 'runtime' ) && 's390x' ||
|
||||
( contains(inputs.instance, 'ppc64le') && (matrix.component.name == 'runtime' || matrix.component.name == 'agent') ) && 'ppc64le' ||
|
||||
inputs.instance
|
||||
}}
|
||||
runs-on: ${{ inputs.instance }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -52,7 +46,6 @@ jobs:
|
||||
path: src/libs
|
||||
needs:
|
||||
- rust
|
||||
- protobuf-compiler
|
||||
- name: agent-ctl
|
||||
path: src/tools/agent-ctl
|
||||
needs:
|
||||
@@ -63,7 +56,6 @@ jobs:
|
||||
path: src/tools/kata-ctl
|
||||
needs:
|
||||
- rust
|
||||
- protobuf-compiler
|
||||
- name: trace-forwarder
|
||||
path: src/tools/trace-forwarder
|
||||
needs:
|
||||
@@ -73,8 +65,6 @@ jobs:
|
||||
needs:
|
||||
- rust
|
||||
- protobuf-compiler
|
||||
instance:
|
||||
- ${{ inputs.instance }}
|
||||
|
||||
steps:
|
||||
- name: Adjust a permission for repo
|
||||
@@ -136,11 +126,9 @@ jobs:
|
||||
echo "GITHUB_RUNNER_CI_NON_VIRT=true" >> "$GITHUB_ENV"
|
||||
- name: Running `${{ matrix.command }}` for ${{ matrix.component.name }}
|
||||
run: |
|
||||
cd "${COMPONENT_PATH}"
|
||||
eval "${COMMAND}"
|
||||
cd ${{ matrix.component.path }}
|
||||
${{ matrix.command }}
|
||||
env:
|
||||
COMMAND: ${{ matrix.command }}
|
||||
COMPONENT_PATH: ${{ matrix.component.path }}
|
||||
RUST_BACKTRACE: "1"
|
||||
RUST_LIB_BACKTRACE: "0"
|
||||
SKIP_GO_VERSION_CHECK: "1"
|
||||
|
||||
@@ -30,7 +30,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
build-asset:
|
||||
name: build-asset
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -41,11 +40,16 @@ jobs:
|
||||
matrix:
|
||||
asset:
|
||||
- agent
|
||||
- agent-ctl
|
||||
- busybox
|
||||
- cloud-hypervisor
|
||||
- cloud-hypervisor-glibc
|
||||
- coco-guest-components
|
||||
- csi-kata-directvolume
|
||||
- firecracker
|
||||
- genpolicy
|
||||
- kata-ctl
|
||||
- kata-manager
|
||||
- kernel
|
||||
- kernel-confidential
|
||||
- kernel-dragonball-experimental
|
||||
@@ -54,11 +58,12 @@ jobs:
|
||||
- nydus
|
||||
- ovmf
|
||||
- ovmf-sev
|
||||
- ovmf-tdx
|
||||
- pause-image
|
||||
- qemu
|
||||
- qemu-snp-experimental
|
||||
- qemu-tdx-experimental
|
||||
- stratovirt
|
||||
- trace-forwarder
|
||||
- virtiofsd
|
||||
stage:
|
||||
- ${{ inputs.stage }}
|
||||
@@ -91,6 +96,7 @@ jobs:
|
||||
- name: Build ${{ matrix.asset }}
|
||||
id: build
|
||||
run: |
|
||||
[[ "${KATA_ASSET}" == *"nvidia"* ]] && echo "KBUILD_SIGN_PIN=${{ secrets.KBUILD_SIGN_PIN }}" >> "${GITHUB_ENV}"
|
||||
make "${KATA_ASSET}-tarball"
|
||||
build_dir=$(readlink -f build)
|
||||
# store-artifact does not work with symlink
|
||||
@@ -104,19 +110,16 @@ jobs:
|
||||
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
|
||||
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
|
||||
|
||||
- name: Parse OCI image name and digest
|
||||
id: parse-oci-segments
|
||||
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
|
||||
env:
|
||||
KATA_ASSET: ${{ matrix.asset }}
|
||||
run: |
|
||||
oci_image="$(<"build/${KATA_ASSET}-oci-image")"
|
||||
oci_image="$(<"build/${{ matrix.asset }}-oci-image")"
|
||||
echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
|
||||
echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- uses: oras-project/setup-oras@22ce207df3b08e061f537244349aac6ae1d214f6 # v1.2.4
|
||||
- uses: oras-project/setup-oras@5c0b487ce3fe0ce3ab0d034e63669e426e294e4d # v1.2.2
|
||||
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
|
||||
with:
|
||||
version: "1.2.0"
|
||||
@@ -148,13 +151,12 @@ jobs:
|
||||
if: ${{ startsWith(matrix.asset, 'kernel-nvidia-gpu') }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: kata-artifacts-amd64-${{ matrix.asset }}-modules${{ inputs.tarball-suffix }}
|
||||
path: kata-build/kata-static-${{ matrix.asset }}-modules.tar.zst
|
||||
name: kata-artifacts-amd64-${{ matrix.asset }}-headers${{ inputs.tarball-suffix }}
|
||||
path: kata-build/kata-static-${{ matrix.asset }}-headers.tar.zst
|
||||
retention-days: 15
|
||||
if-no-files-found: error
|
||||
|
||||
build-asset-rootfs:
|
||||
name: build-asset-rootfs
|
||||
runs-on: ubuntu-22.04
|
||||
needs: build-asset
|
||||
permissions:
|
||||
@@ -166,8 +168,6 @@ jobs:
|
||||
- rootfs-image
|
||||
- rootfs-image-confidential
|
||||
- rootfs-image-mariner
|
||||
- rootfs-image-nvidia-gpu
|
||||
- rootfs-image-nvidia-gpu-confidential
|
||||
- rootfs-initrd
|
||||
- rootfs-initrd-confidential
|
||||
- rootfs-initrd-nvidia-gpu
|
||||
@@ -203,6 +203,7 @@ jobs:
|
||||
- name: Build ${{ matrix.asset }}
|
||||
id: build
|
||||
run: |
|
||||
[[ "${KATA_ASSET}" == *"nvidia"* ]] && echo "KBUILD_SIGN_PIN=${{ secrets.KBUILD_SIGN_PIN }}" >> "${GITHUB_ENV}"
|
||||
./tests/gha-adjust-to-use-prebuilt-components.sh kata-artifacts "${KATA_ASSET}"
|
||||
make "${KATA_ASSET}-tarball"
|
||||
build_dir=$(readlink -f build)
|
||||
@@ -217,7 +218,6 @@ jobs:
|
||||
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
|
||||
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
|
||||
|
||||
- name: store-artifact ${{ matrix.asset }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
@@ -229,7 +229,6 @@ jobs:
|
||||
|
||||
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
|
||||
remove-rootfs-binary-artifacts:
|
||||
name: remove-rootfs-binary-artifacts
|
||||
runs-on: ubuntu-22.04
|
||||
needs: build-asset-rootfs
|
||||
strategy:
|
||||
@@ -237,8 +236,8 @@ jobs:
|
||||
asset:
|
||||
- busybox
|
||||
- coco-guest-components
|
||||
- kernel-nvidia-gpu-modules
|
||||
- kernel-nvidia-gpu-confidential-modules
|
||||
- kernel-nvidia-gpu-headers
|
||||
- kernel-nvidia-gpu-confidential-headers
|
||||
- pause-image
|
||||
steps:
|
||||
- uses: geekyeggo/delete-artifact@f275313e70c08f6120db482d7a6b98377786765b # v5.1.0
|
||||
@@ -247,7 +246,6 @@ jobs:
|
||||
|
||||
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
|
||||
remove-rootfs-binary-artifacts-for-release:
|
||||
name: remove-rootfs-binary-artifacts-for-release
|
||||
runs-on: ubuntu-22.04
|
||||
needs: build-asset-rootfs
|
||||
strategy:
|
||||
@@ -261,7 +259,6 @@ jobs:
|
||||
name: kata-artifacts-amd64-${{ matrix.asset}}${{ inputs.tarball-suffix }}
|
||||
|
||||
build-asset-shim-v2:
|
||||
name: build-asset-shim-v2
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts, remove-rootfs-binary-artifacts-for-release]
|
||||
permissions:
|
||||
@@ -323,7 +320,6 @@ jobs:
|
||||
if-no-files-found: error
|
||||
|
||||
create-kata-tarball:
|
||||
name: create-kata-tarball
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
|
||||
permissions:
|
||||
@@ -359,104 +355,3 @@ jobs:
|
||||
path: kata-static.tar.zst
|
||||
retention-days: 15
|
||||
if-no-files-found: error
|
||||
|
||||
build-tools-asset:
|
||||
name: build-tools-asset
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
strategy:
|
||||
matrix:
|
||||
asset:
|
||||
- agent-ctl
|
||||
- csi-kata-directvolume
|
||||
- genpolicy
|
||||
- kata-ctl
|
||||
- kata-manager
|
||||
- trace-forwarder
|
||||
stage:
|
||||
- ${{ inputs.stage }}
|
||||
steps:
|
||||
- name: Login to Kata Containers quay.io
|
||||
if: ${{ inputs.push-to-registry == 'yes' }}
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
|
||||
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0 # This is needed in order to keep the commit ids history
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Build ${{ matrix.asset }}
|
||||
id: build
|
||||
run: |
|
||||
make "${KATA_ASSET}-tarball"
|
||||
build_dir=$(readlink -f build)
|
||||
# store-artifact does not work with symlink
|
||||
mkdir -p kata-tools-build && cp "${build_dir}"/kata-static-"${KATA_ASSET}"*.tar.* kata-tools-build/.
|
||||
env:
|
||||
KATA_ASSET: ${{ matrix.asset }}
|
||||
TAR_OUTPUT: ${{ matrix.asset }}.tar.gz
|
||||
PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }}
|
||||
ARTEFACT_REGISTRY: ghcr.io
|
||||
ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }}
|
||||
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
|
||||
|
||||
- name: store-artifact ${{ matrix.asset }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: kata-tools-artifacts-amd64-${{ matrix.asset }}${{ inputs.tarball-suffix }}
|
||||
path: kata-tools-build/kata-static-${{ matrix.asset }}.tar.zst
|
||||
retention-days: 15
|
||||
if-no-files-found: error
|
||||
|
||||
create-kata-tools-tarball:
|
||||
name: create-kata-tools-tarball
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [build-tools-asset]
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
fetch-tags: true
|
||||
persist-credentials: false
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
- name: get-artifacts
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
pattern: kata-tools-artifacts-amd64-*${{ inputs.tarball-suffix }}
|
||||
path: kata-tools-artifacts
|
||||
merge-multiple: true
|
||||
- name: merge-artifacts
|
||||
run: |
|
||||
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-tools-artifacts versions.yaml kata-tools-static.tar.zst
|
||||
env:
|
||||
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
|
||||
- name: store-artifacts
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-tools-static.tar.zst
|
||||
retention-days: 15
|
||||
if-no-files-found: error
|
||||
|
||||
@@ -23,15 +23,12 @@ on:
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: false
|
||||
KBUILD_SIGN_PIN:
|
||||
required: true
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
build-asset:
|
||||
name: build-asset
|
||||
runs-on: ubuntu-24.04-arm
|
||||
runs-on: ubuntu-22.04-arm
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
@@ -47,10 +44,10 @@ jobs:
|
||||
- kernel
|
||||
- kernel-dragonball-experimental
|
||||
- kernel-nvidia-gpu
|
||||
- kernel-cca-confidential
|
||||
- nydus
|
||||
- ovmf
|
||||
- qemu
|
||||
- stratovirt
|
||||
- virtiofsd
|
||||
env:
|
||||
PERFORM_ATTESTATION: ${{ matrix.asset == 'agent' && inputs.push-to-registry == 'yes' && 'yes' || 'no' }}
|
||||
@@ -90,19 +87,16 @@ jobs:
|
||||
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
|
||||
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
|
||||
|
||||
- name: Parse OCI image name and digest
|
||||
id: parse-oci-segments
|
||||
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
|
||||
env:
|
||||
KATA_ASSET: ${{ matrix.asset }}
|
||||
run: |
|
||||
oci_image="$(<"build/${KATA_ASSET}-oci-image")"
|
||||
oci_image="$(<"build/${{ matrix.asset }}-oci-image")"
|
||||
echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
|
||||
echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- uses: oras-project/setup-oras@22ce207df3b08e061f537244349aac6ae1d214f6 # v1.2.4
|
||||
- uses: oras-project/setup-oras@5c0b487ce3fe0ce3ab0d034e63669e426e294e4d # v1.2.2
|
||||
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
|
||||
with:
|
||||
version: "1.2.0"
|
||||
@@ -134,14 +128,13 @@ jobs:
|
||||
if: ${{ startsWith(matrix.asset, 'kernel-nvidia-gpu') }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: kata-artifacts-arm64-${{ matrix.asset }}-modules${{ inputs.tarball-suffix }}
|
||||
path: kata-build/kata-static-${{ matrix.asset }}-modules.tar.zst
|
||||
name: kata-artifacts-arm64-${{ matrix.asset }}-headers${{ inputs.tarball-suffix }}
|
||||
path: kata-build/kata-static-${{ matrix.asset }}-headers.tar.zst
|
||||
retention-days: 15
|
||||
if-no-files-found: error
|
||||
|
||||
build-asset-rootfs:
|
||||
name: build-asset-rootfs
|
||||
runs-on: ubuntu-24.04-arm
|
||||
runs-on: ubuntu-22.04-arm
|
||||
needs: build-asset
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -150,7 +143,6 @@ jobs:
|
||||
matrix:
|
||||
asset:
|
||||
- rootfs-image
|
||||
- rootfs-image-nvidia-gpu
|
||||
- rootfs-initrd
|
||||
- rootfs-initrd-nvidia-gpu
|
||||
steps:
|
||||
@@ -197,7 +189,6 @@ jobs:
|
||||
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
|
||||
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
|
||||
|
||||
- name: store-artifact ${{ matrix.asset }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
@@ -209,14 +200,13 @@ jobs:
|
||||
|
||||
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
|
||||
remove-rootfs-binary-artifacts:
|
||||
name: remove-rootfs-binary-artifacts
|
||||
runs-on: ubuntu-24.04-arm
|
||||
runs-on: ubuntu-22.04-arm
|
||||
needs: build-asset-rootfs
|
||||
strategy:
|
||||
matrix:
|
||||
asset:
|
||||
- busybox
|
||||
- kernel-nvidia-gpu-modules
|
||||
- kernel-nvidia-gpu-headers
|
||||
steps:
|
||||
- uses: geekyeggo/delete-artifact@f275313e70c08f6120db482d7a6b98377786765b # v5.1.0
|
||||
with:
|
||||
@@ -224,8 +214,7 @@ jobs:
|
||||
|
||||
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
|
||||
remove-rootfs-binary-artifacts-for-release:
|
||||
name: remove-rootfs-binary-artifacts-for-release
|
||||
runs-on: ubuntu-24.04-arm
|
||||
runs-on: ubuntu-22.04-arm
|
||||
needs: build-asset-rootfs
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -238,8 +227,7 @@ jobs:
|
||||
name: kata-artifacts-arm64-${{ matrix.asset}}${{ inputs.tarball-suffix }}
|
||||
|
||||
build-asset-shim-v2:
|
||||
name: build-asset-shim-v2
|
||||
runs-on: ubuntu-24.04-arm
|
||||
runs-on: ubuntu-22.04-arm
|
||||
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts, remove-rootfs-binary-artifacts-for-release]
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -298,8 +286,7 @@ jobs:
|
||||
if-no-files-found: error
|
||||
|
||||
create-kata-tarball:
|
||||
name: create-kata-tarball
|
||||
runs-on: ubuntu-24.04-arm
|
||||
runs-on: ubuntu-22.04-arm
|
||||
needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -28,11 +28,10 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
build-asset:
|
||||
name: build-asset
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
runs-on: ubuntu-24.04-ppc64le
|
||||
runs-on: ppc64le
|
||||
strategy:
|
||||
matrix:
|
||||
asset:
|
||||
@@ -88,8 +87,7 @@ jobs:
|
||||
if-no-files-found: error
|
||||
|
||||
build-asset-rootfs:
|
||||
name: build-asset-rootfs
|
||||
runs-on: ubuntu-24.04-ppc64le
|
||||
runs-on: ppc64le
|
||||
needs: build-asset
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -155,7 +153,6 @@ jobs:
|
||||
|
||||
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
|
||||
remove-rootfs-binary-artifacts:
|
||||
name: remove-rootfs-binary-artifacts
|
||||
runs-on: ubuntu-22.04
|
||||
needs: build-asset-rootfs
|
||||
strategy:
|
||||
@@ -169,8 +166,7 @@ jobs:
|
||||
name: kata-artifacts-ppc64le-${{ matrix.asset}}${{ inputs.tarball-suffix }}
|
||||
|
||||
build-asset-shim-v2:
|
||||
name: build-asset-shim-v2
|
||||
runs-on: ubuntu-24.04-ppc64le
|
||||
runs-on: ppc64le
|
||||
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -229,8 +225,7 @@ jobs:
|
||||
if-no-files-found: error
|
||||
|
||||
create-kata-tarball:
|
||||
name: create-kata-tarball
|
||||
runs-on: ubuntu-24.04-ppc64le
|
||||
runs-on: ppc64le
|
||||
needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -20,12 +20,14 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: true
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
build-asset:
|
||||
name: build-asset
|
||||
runs-on: riscv-builder
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -38,6 +40,14 @@ jobs:
|
||||
- kernel
|
||||
- virtiofsd
|
||||
steps:
|
||||
- name: Login to Kata Containers quay.io
|
||||
if: ${{ inputs.push-to-registry == 'yes' }}
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
|
||||
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
@@ -71,5 +81,5 @@ jobs:
|
||||
with:
|
||||
name: kata-artifacts-riscv64-${{ matrix.asset }}${{ inputs.tarball-suffix }}
|
||||
path: kata-build/kata-static-${{ matrix.asset }}.tar.zst
|
||||
retention-days: 3
|
||||
retention-days: 15
|
||||
if-no-files-found: error
|
||||
|
||||
@@ -31,8 +31,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
build-asset:
|
||||
name: build-asset
|
||||
runs-on: ubuntu-24.04-s390x
|
||||
runs-on: s390x
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
@@ -91,10 +90,8 @@ jobs:
|
||||
- name: Parse OCI image name and digest
|
||||
id: parse-oci-segments
|
||||
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
|
||||
env:
|
||||
ASSET: ${{ matrix.asset }}
|
||||
run: |
|
||||
oci_image="$(<"build/${ASSET}-oci-image")"
|
||||
oci_image="$(<"build/${{ matrix.asset }}-oci-image")"
|
||||
echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
|
||||
echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
@@ -122,7 +119,6 @@ jobs:
|
||||
if-no-files-found: error
|
||||
|
||||
build-asset-rootfs:
|
||||
name: build-asset-rootfs
|
||||
runs-on: s390x
|
||||
needs: build-asset
|
||||
permissions:
|
||||
@@ -190,7 +186,6 @@ jobs:
|
||||
if-no-files-found: error
|
||||
|
||||
build-asset-boot-image-se:
|
||||
name: build-asset-boot-image-se
|
||||
runs-on: s390x
|
||||
needs: [build-asset, build-asset-rootfs]
|
||||
permissions:
|
||||
@@ -240,7 +235,6 @@ jobs:
|
||||
|
||||
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
|
||||
remove-rootfs-binary-artifacts:
|
||||
name: remove-rootfs-binary-artifacts
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [build-asset-rootfs, build-asset-boot-image-se]
|
||||
strategy:
|
||||
@@ -256,8 +250,7 @@ jobs:
|
||||
name: kata-artifacts-s390x-${{ matrix.asset}}${{ inputs.tarball-suffix }}
|
||||
|
||||
build-asset-shim-v2:
|
||||
name: build-asset-shim-v2
|
||||
runs-on: ubuntu-24.04-s390x
|
||||
runs-on: s390x
|
||||
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -318,8 +311,7 @@ jobs:
|
||||
if-no-files-found: error
|
||||
|
||||
create-kata-tarball:
|
||||
name: create-kata-tarball
|
||||
runs-on: ubuntu-24.04-s390x
|
||||
runs-on: s390x
|
||||
needs:
|
||||
- build-asset
|
||||
- build-asset-rootfs
|
||||
|
||||
75
.github/workflows/build-kubectl-image.yaml
vendored
75
.github/workflows/build-kubectl-image.yaml
vendored
@@ -1,75 +0,0 @@
|
||||
name: Build kubectl multi-arch image
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run every Sunday at 00:00 UTC
|
||||
- cron: '0 0 * * 0'
|
||||
workflow_dispatch:
|
||||
# Allow manual triggering
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'tools/packaging/kubectl/Dockerfile'
|
||||
- '.github/workflows/build-kubectl-image.yaml'
|
||||
|
||||
permissions: {}
|
||||
|
||||
env:
|
||||
REGISTRY: quay.io
|
||||
IMAGE_NAME: kata-containers/kubectl
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
name: Build and push multi-arch image
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||
|
||||
- name: Login to Quay.io
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
|
||||
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
- name: Get kubectl version
|
||||
id: kubectl-version
|
||||
run: |
|
||||
KUBECTL_VERSION=$(curl -L -s https://dl.k8s.io/release/stable.txt)
|
||||
echo "version=${KUBECTL_VERSION}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Generate image metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=raw,value=latest
|
||||
type=raw,value={{date 'YYYYMMDD'}}
|
||||
type=raw,value=${{ steps.kubectl-version.outputs.version }}
|
||||
type=sha,prefix=
|
||||
|
||||
- name: Build and push multi-arch image
|
||||
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
|
||||
with:
|
||||
context: tools/packaging/kubectl/
|
||||
file: tools/packaging/kubectl/Dockerfile
|
||||
platforms: linux/amd64,linux/arm64,linux/s390x,linux/ppc64le
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
1
.github/workflows/cargo-deny-runner.yaml
vendored
1
.github/workflows/cargo-deny-runner.yaml
vendored
@@ -15,7 +15,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
cargo-deny-runner:
|
||||
name: cargo-deny-runner
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
|
||||
34
.github/workflows/ci-nightly-riscv.yaml
vendored
34
.github/workflows/ci-nightly-riscv.yaml
vendored
@@ -1,34 +0,0 @@
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 5 * * *'
|
||||
|
||||
name: Nightly CI for RISC-V
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
build-kata-static-tarball-riscv:
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
id-token: write
|
||||
attestations: write
|
||||
uses: ./.github/workflows/build-kata-static-tarball-riscv64.yaml
|
||||
with:
|
||||
tarball-suffix: -${{ github.sha }}
|
||||
commit-hash: ${{ github.sha }}
|
||||
target-branch: ${{ github.ref_name }}
|
||||
|
||||
build-checks-preview:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
instance:
|
||||
- "riscv-builder"
|
||||
uses: ./.github/workflows/build-checks-preview-riscv64.yaml
|
||||
with:
|
||||
instance: ${{ matrix.instance }}
|
||||
1
.github/workflows/ci-nightly-s390x.yaml
vendored
1
.github/workflows/ci-nightly-s390x.yaml
vendored
@@ -8,7 +8,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
check-internal-test-result:
|
||||
name: check-internal-test-result
|
||||
runs-on: s390x
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
||||
2
.github/workflows/ci-on-push.yaml
vendored
2
.github/workflows/ci-on-push.yaml
vendored
@@ -1,6 +1,6 @@
|
||||
name: Kata Containers CI
|
||||
on:
|
||||
pull_request_target: # zizmor: ignore[dangerous-triggers] See #11332.
|
||||
pull_request_target:
|
||||
branches:
|
||||
- 'main'
|
||||
types:
|
||||
|
||||
1
.github/workflows/ci-weekly.yaml
vendored
1
.github/workflows/ci-weekly.yaml
vendored
@@ -66,7 +66,6 @@ jobs:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
build-and-publish-tee-confidential-unencrypted-image:
|
||||
name: build-and-publish-tee-confidential-unencrypted-image
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
53
.github/workflows/ci.yaml
vendored
53
.github/workflows/ci.yaml
vendored
@@ -86,8 +86,6 @@ jobs:
|
||||
tarball-suffix: -${{ inputs.tag }}
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
secrets:
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
publish-kata-deploy-payload-arm64:
|
||||
needs: build-kata-static-tarball-arm64
|
||||
@@ -102,7 +100,7 @@ jobs:
|
||||
tag: ${{ inputs.tag }}-arm64
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
runner: ubuntu-24.04-arm
|
||||
runner: ubuntu-22.04-arm
|
||||
arch: arm64
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
@@ -134,6 +132,20 @@ jobs:
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
build-kata-static-tarball-riscv64:
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
id-token: write
|
||||
attestations: write
|
||||
uses: ./.github/workflows/build-kata-static-tarball-riscv64.yaml
|
||||
with:
|
||||
tarball-suffix: -${{ inputs.tag }}
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
publish-kata-deploy-payload-s390x:
|
||||
needs: build-kata-static-tarball-s390x
|
||||
permissions:
|
||||
@@ -147,7 +159,7 @@ jobs:
|
||||
tag: ${{ inputs.tag }}-s390x
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
runner: ubuntu-24.04-s390x
|
||||
runner: s390x
|
||||
arch: s390x
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
@@ -165,13 +177,12 @@ jobs:
|
||||
tag: ${{ inputs.tag }}-ppc64le
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
runner: ubuntu-24.04-ppc64le
|
||||
runner: ppc64le
|
||||
arch: ppc64le
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
build-and-publish-tee-confidential-unencrypted-image:
|
||||
name: build-and-publish-tee-confidential-unencrypted-image
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
@@ -213,7 +224,6 @@ jobs:
|
||||
file: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile
|
||||
|
||||
publish-csi-driver-amd64:
|
||||
name: publish-csi-driver-amd64
|
||||
needs: build-kata-static-tarball-amd64
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -233,14 +243,14 @@ jobs:
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: get-kata-tools-tarball
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-tools-static-tarball-amd64-${{ inputs.tag }}
|
||||
path: kata-tools-artifacts
|
||||
name: kata-static-tarball-amd64-${{ inputs.tag }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata-tools
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
|
||||
- name: Install tools
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
|
||||
|
||||
- name: Copy binary into Docker context
|
||||
run: |
|
||||
@@ -297,6 +307,18 @@ jobs:
|
||||
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
|
||||
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
|
||||
|
||||
run-k8s-tests-on-amd64:
|
||||
if: ${{ inputs.skip-test != 'yes' }}
|
||||
needs: publish-kata-deploy-payload-amd64
|
||||
uses: ./.github/workflows/run-k8s-tests-on-amd64.yaml
|
||||
with:
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-amd64
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
|
||||
run-k8s-tests-on-arm64:
|
||||
if: ${{ inputs.skip-test != 'yes' }}
|
||||
needs: publish-kata-deploy-payload-arm64
|
||||
@@ -314,7 +336,6 @@ jobs:
|
||||
needs: publish-kata-deploy-payload-amd64
|
||||
uses: ./.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
|
||||
with:
|
||||
tarball-suffix: -${{ inputs.tag }}
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-amd64
|
||||
@@ -416,11 +437,13 @@ jobs:
|
||||
{ containerd_version: lts, vmm: clh },
|
||||
{ containerd_version: lts, vmm: dragonball },
|
||||
{ containerd_version: lts, vmm: qemu },
|
||||
{ containerd_version: lts, vmm: stratovirt },
|
||||
{ containerd_version: lts, vmm: cloud-hypervisor },
|
||||
{ containerd_version: lts, vmm: qemu-runtime-rs },
|
||||
{ containerd_version: active, vmm: clh },
|
||||
{ containerd_version: active, vmm: dragonball },
|
||||
{ containerd_version: active, vmm: qemu },
|
||||
{ containerd_version: active, vmm: stratovirt },
|
||||
{ containerd_version: active, vmm: cloud-hypervisor },
|
||||
{ containerd_version: active, vmm: qemu-runtime-rs },
|
||||
]
|
||||
@@ -468,13 +491,13 @@ jobs:
|
||||
tarball-suffix: -${{ inputs.tag }}
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
runner: ppc64le-small
|
||||
runner: ppc64le
|
||||
arch: ppc64le
|
||||
containerd_version: ${{ matrix.params.containerd_version }}
|
||||
vmm: ${{ matrix.params.vmm }}
|
||||
|
||||
run-cri-containerd-tests-arm64:
|
||||
if: false
|
||||
if: ${{ inputs.skip-test != 'yes' }}
|
||||
needs: build-kata-static-tarball-arm64
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
||||
1
.github/workflows/cleanup-resources.yaml
vendored
1
.github/workflows/cleanup-resources.yaml
vendored
@@ -8,7 +8,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
cleanup-resources:
|
||||
name: cleanup-resources
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
id-token: write # Used for OIDC access to log into Azure
|
||||
|
||||
12
.github/workflows/darwin-tests.yaml
vendored
12
.github/workflows/darwin-tests.yaml
vendored
@@ -15,17 +15,8 @@ concurrency:
|
||||
name: Darwin tests
|
||||
jobs:
|
||||
test:
|
||||
name: test
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
- name: Install Protoc
|
||||
run: |
|
||||
f=$(mktemp)
|
||||
curl -sSLo "$f" https://github.com/protocolbuffers/protobuf/releases/download/v28.2/protoc-28.2-osx-aarch_64.zip
|
||||
mkdir -p "$HOME/.local"
|
||||
unzip -d "$HOME/.local" "$f"
|
||||
echo "$HOME/.local/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@@ -36,8 +27,5 @@ jobs:
|
||||
./tests/install_go.sh -f -p
|
||||
echo "/usr/local/go/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
- name: Install Rust
|
||||
run: ./tests/install_rust.sh
|
||||
|
||||
- name: Build utils
|
||||
run: ./ci/darwin-test.sh
|
||||
|
||||
7
.github/workflows/docs-url-alive-check.yaml
vendored
7
.github/workflows/docs-url-alive-check.yaml
vendored
@@ -1,14 +1,12 @@
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 23 * * 0'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions: {}
|
||||
|
||||
name: Docs URL Alive Check
|
||||
jobs:
|
||||
test:
|
||||
name: test
|
||||
runs-on: ubuntu-22.04
|
||||
# don't run this action on forks
|
||||
if: github.repository_owner == 'kata-containers'
|
||||
@@ -17,12 +15,13 @@ jobs:
|
||||
steps:
|
||||
- name: Set env
|
||||
run: |
|
||||
echo "GOPATH=${GITHUB_WORKSPACE}" >> "$GITHUB_ENV"
|
||||
echo "GOPATH=${{ github.workspace }}" >> "$GITHUB_ENV"
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
path: ./src/github.com/${{ github.repository }}
|
||||
|
||||
- name: Install golang
|
||||
run: |
|
||||
@@ -31,4 +30,4 @@ jobs:
|
||||
|
||||
- name: Docs URL Alive Check
|
||||
run: |
|
||||
make docs-url-alive-check
|
||||
cd "${GOPATH}/src/github.com/${{ github.repository }}" && make docs-url-alive-check
|
||||
|
||||
32
.github/workflows/docs.yaml
vendored
32
.github/workflows/docs.yaml
vendored
@@ -1,32 +0,0 @@
|
||||
name: Documentation
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
permissions: {}
|
||||
jobs:
|
||||
deploy-docs:
|
||||
name: deploy-docs
|
||||
permissions:
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/configure-pages@v5
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- run: pip install zensical
|
||||
- run: zensical build --clean
|
||||
- uses: actions/upload-pages-artifact@v4
|
||||
with:
|
||||
path: site
|
||||
- uses: actions/deploy-pages@v4
|
||||
id: deployment
|
||||
1
.github/workflows/gatekeeper-skipper.yaml
vendored
1
.github/workflows/gatekeeper-skipper.yaml
vendored
@@ -35,7 +35,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
skipper:
|
||||
name: skipper
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
skip_build: ${{ steps.skipper.outputs.skip_build }}
|
||||
|
||||
5
.github/workflows/gatekeeper.yaml
vendored
5
.github/workflows/gatekeeper.yaml
vendored
@@ -5,14 +5,12 @@ name: Gatekeeper
|
||||
# reporting the status.
|
||||
|
||||
on:
|
||||
pull_request_target: # zizmor: ignore[dangerous-triggers] See #11332.
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
- reopened
|
||||
- edited
|
||||
- labeled
|
||||
- unlabeled
|
||||
|
||||
permissions: {}
|
||||
|
||||
@@ -22,7 +20,6 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
gatekeeper:
|
||||
name: gatekeeper
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
|
||||
8
.github/workflows/govulncheck.yaml
vendored
8
.github/workflows/govulncheck.yaml
vendored
@@ -7,7 +7,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
govulncheck:
|
||||
name: govulncheck
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -40,14 +39,11 @@ jobs:
|
||||
- name: Build runtime binaries
|
||||
run: |
|
||||
cd src/runtime
|
||||
make "${MAKE_TARGET}"
|
||||
make ${{ matrix.make_target }}
|
||||
env:
|
||||
MAKE_TARGET: ${{ matrix.make_target }}
|
||||
SKIP_GO_VERSION_CHECK: "1"
|
||||
|
||||
- name: Run govulncheck on ${{ matrix.binary }}
|
||||
env:
|
||||
BINARY: ${{ matrix.binary }}
|
||||
run: |
|
||||
cd src/runtime
|
||||
bash ../../tests/govulncheck-runner.sh "./${BINARY}"
|
||||
bash ../../tests/govulncheck-runner.sh "./${{ matrix.binary }}"
|
||||
|
||||
40
.github/workflows/kata-runtime-classes-sync.yaml
vendored
Normal file
40
.github/workflows/kata-runtime-classes-sync.yaml
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- edited
|
||||
- reopened
|
||||
- synchronize
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
kata-deploy-runtime-classes-check:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Ensure the split out runtime classes match the all-in-one file
|
||||
run: |
|
||||
pushd tools/packaging/kata-deploy/runtimeclasses/
|
||||
echo "::group::Combine runtime classes"
|
||||
for runtimeClass in $(find . -type f \( -name "*.yaml" -and -not -name "kata-runtimeClasses.yaml" \) | sort); do
|
||||
echo "Adding ${runtimeClass} to the resultingRuntimeClasses.yaml"
|
||||
cat "${runtimeClass}" >> resultingRuntimeClasses.yaml;
|
||||
done
|
||||
echo "::endgroup::"
|
||||
echo "::group::Displaying the content of resultingRuntimeClasses.yaml"
|
||||
cat resultingRuntimeClasses.yaml
|
||||
echo "::endgroup::"
|
||||
echo ""
|
||||
echo "::group::Displaying the content of kata-runtimeClasses.yaml"
|
||||
cat kata-runtimeClasses.yaml
|
||||
echo "::endgroup::"
|
||||
echo ""
|
||||
diff resultingRuntimeClasses.yaml kata-runtimeClasses.yaml
|
||||
@@ -1,35 +0,0 @@
|
||||
name: nydus-snapshotter-version-sync
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- edited
|
||||
- reopened
|
||||
- synchronize
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
nydus-snapshotter-version-check:
|
||||
name: nydus-snapshotter-version-check
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Ensure nydus-snapshotter-version is in sync inside our repo
|
||||
run: |
|
||||
dockerfile_version=$(grep "ARG NYDUS_SNAPSHOTTER_VERSION" tools/packaging/kata-deploy/Dockerfile | cut -f2 -d'=')
|
||||
versions_version=$(yq ".externals.nydus-snapshotter.version | explode(.)" versions.yaml)
|
||||
if [[ "${dockerfile_version}" != "${versions_version}" ]]; then
|
||||
echo "nydus-snapshotter version must be the same in the following places: "
|
||||
echo "- versions.yaml: ${versions_version}"
|
||||
echo "- tools/packaging/kata-deploy/Dockerfile: ${dockerfile_version}"
|
||||
exit 1
|
||||
fi
|
||||
45
.github/workflows/payload-after-push.yaml
vendored
45
.github/workflows/payload-after-push.yaml
vendored
@@ -39,7 +39,6 @@ jobs:
|
||||
target-branch: ${{ github.ref_name }}
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
build-assets-s390x:
|
||||
permissions:
|
||||
@@ -97,7 +96,7 @@ jobs:
|
||||
repo: kata-containers/kata-deploy-ci
|
||||
tag: kata-containers-latest-arm64
|
||||
target-branch: ${{ github.ref_name }}
|
||||
runner: ubuntu-24.04-arm
|
||||
runner: ubuntu-22.04-arm
|
||||
arch: arm64
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
@@ -131,13 +130,12 @@ jobs:
|
||||
repo: kata-containers/kata-deploy-ci
|
||||
tag: kata-containers-latest-ppc64le
|
||||
target-branch: ${{ github.ref_name }}
|
||||
runner: ubuntu-24.04-ppc64le
|
||||
runner: ppc64le
|
||||
arch: ppc64le
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
publish-manifest:
|
||||
name: publish-manifest
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -162,42 +160,3 @@ jobs:
|
||||
env:
|
||||
KATA_DEPLOY_IMAGE_TAGS: "kata-containers-latest"
|
||||
KATA_DEPLOY_REGISTRIES: "quay.io/kata-containers/kata-deploy-ci"
|
||||
|
||||
upload-helm-chart-tarball:
|
||||
name: upload-helm-chart-tarball
|
||||
needs: publish-manifest
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
packages: write # needed to push the helm chart to ghcr.io
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install helm
|
||||
uses: azure/setup-helm@fe7b79cd5ee1e45176fcad797de68ecaf3ca4814 # v4.2.0
|
||||
id: install
|
||||
|
||||
- name: Login to the OCI registries
|
||||
env:
|
||||
QUAY_DEPLOYER_USERNAME: ${{ vars.QUAY_DEPLOYER_USERNAME }}
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
echo "${QUAY_DEPLOYER_PASSWORD}" | helm registry login quay.io --username "${QUAY_DEPLOYER_USERNAME}" --password-stdin
|
||||
echo "${GITHUB_TOKEN}" | helm registry login ghcr.io --username "${GITHUB_ACTOR}" --password-stdin
|
||||
|
||||
- name: Push helm chart to the OCI registries
|
||||
run: |
|
||||
echo "Adjusting the Chart.yaml and values.yaml"
|
||||
yq eval '.version = "0.0.0-dev" | .appVersion = "0.0.0-dev"' -i tools/packaging/kata-deploy/helm-chart/kata-deploy/Chart.yaml
|
||||
yq eval '.image.reference = "quay.io/kata-containers/kata-deploy-ci" | .image.tag = "kata-containers-latest"' -i tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml
|
||||
|
||||
echo "Generating the chart package"
|
||||
helm dependencies update tools/packaging/kata-deploy/helm-chart/kata-deploy
|
||||
helm package tools/packaging/kata-deploy/helm-chart/kata-deploy
|
||||
|
||||
echo "Pushing the chart to the OCI registries"
|
||||
helm push "kata-deploy-0.0.0-dev.tgz" oci://quay.io/kata-containers/kata-deploy-charts
|
||||
helm push "kata-deploy-0.0.0-dev.tgz" oci://ghcr.io/kata-containers/kata-deploy-charts
|
||||
|
||||
@@ -38,7 +38,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
kata-payload:
|
||||
name: kata-payload
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
@@ -50,24 +49,6 @@ jobs:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Remove unnecessary directories to free up space
|
||||
run: |
|
||||
sudo rm -rf /usr/local/.ghcup
|
||||
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo rm -rf /opt/ghc
|
||||
sudo rm -rf /usr/local/share/boost
|
||||
sudo rm -rf /usr/lib/jvm
|
||||
sudo rm -rf /usr/share/swift
|
||||
sudo rm -rf /usr/local/share/powershell
|
||||
sudo rm -rf /usr/local/julia*
|
||||
sudo rm -rf /opt/az
|
||||
sudo rm -rf /usr/local/share/chromium
|
||||
sudo rm -rf /opt/microsoft
|
||||
sudo rm -rf /opt/google
|
||||
sudo rm -rf /usr/lib/firefox
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
|
||||
1
.github/workflows/release-amd64.yaml
vendored
1
.github/workflows/release-amd64.yaml
vendored
@@ -29,7 +29,6 @@ jobs:
|
||||
attestations: write
|
||||
|
||||
kata-deploy:
|
||||
name: kata-deploy
|
||||
needs: build-kata-static-tarball-amd64
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
6
.github/workflows/release-arm64.yaml
vendored
6
.github/workflows/release-arm64.yaml
vendored
@@ -8,8 +8,6 @@ on:
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: true
|
||||
KBUILD_SIGN_PIN:
|
||||
required: true
|
||||
|
||||
permissions: {}
|
||||
|
||||
@@ -21,7 +19,6 @@ jobs:
|
||||
stage: release
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
@@ -29,12 +26,11 @@ jobs:
|
||||
attestations: write
|
||||
|
||||
kata-deploy:
|
||||
name: kata-deploy
|
||||
needs: build-kata-static-tarball-arm64
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
runs-on: ubuntu-24.04-arm
|
||||
runs-on: ubuntu-22.04-arm
|
||||
steps:
|
||||
- name: Login to Kata Containers ghcr.io
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
|
||||
3
.github/workflows/release-ppc64le.yaml
vendored
3
.github/workflows/release-ppc64le.yaml
vendored
@@ -26,12 +26,11 @@ jobs:
|
||||
attestations: write
|
||||
|
||||
kata-deploy:
|
||||
name: kata-deploy
|
||||
needs: build-kata-static-tarball-ppc64le
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
runs-on: ubuntu-24.04-ppc64le
|
||||
runs-on: ppc64le
|
||||
steps:
|
||||
- name: Login to Kata Containers ghcr.io
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
|
||||
3
.github/workflows/release-s390x.yaml
vendored
3
.github/workflows/release-s390x.yaml
vendored
@@ -30,12 +30,11 @@ jobs:
|
||||
|
||||
|
||||
kata-deploy:
|
||||
name: kata-deploy
|
||||
needs: build-kata-static-tarball-s390x
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
runs-on: ubuntu-24.04-s390x
|
||||
runs-on: s390x
|
||||
steps:
|
||||
- name: Login to Kata Containers ghcr.io
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
|
||||
33
.github/workflows/release.yaml
vendored
33
.github/workflows/release.yaml
vendored
@@ -6,7 +6,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
release:
|
||||
name: release
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
contents: write # needed for the `gh release create` command
|
||||
@@ -49,7 +48,6 @@ jobs:
|
||||
target-arch: arm64
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
build-and-push-assets-s390x:
|
||||
needs: release
|
||||
@@ -79,7 +77,6 @@ jobs:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
publish-multi-arch-images:
|
||||
name: publish-multi-arch-images
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x, build-and-push-assets-ppc64le]
|
||||
permissions:
|
||||
@@ -117,7 +114,6 @@ jobs:
|
||||
KATA_DEPLOY_REGISTRIES: "quay.io/kata-containers/kata-deploy ghcr.io/kata-containers/kata-deploy"
|
||||
|
||||
upload-multi-arch-static-tarball:
|
||||
name: upload-multi-arch-static-tarball
|
||||
needs: [build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x, build-and-push-assets-ppc64le]
|
||||
permissions:
|
||||
contents: write # needed for the `gh release` commands
|
||||
@@ -181,25 +177,7 @@ jobs:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
ARCHITECTURE: ppc64le
|
||||
|
||||
- name: Set KATA_TOOLS_STATIC_TARBALL env var
|
||||
run: |
|
||||
tarball=$(pwd)/kata-tools-static.tar.zst
|
||||
echo "KATA_TOOLS_STATIC_TARBALL=${tarball}" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Download amd64 tools artifacts
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-tools-static-tarball-amd64
|
||||
|
||||
- name: Upload amd64 static tarball tools to GitHub
|
||||
run: |
|
||||
./tools/packaging/release/release.sh upload-kata-tools-static-tarball
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
ARCHITECTURE: amd64
|
||||
|
||||
upload-versions-yaml:
|
||||
name: upload-versions-yaml
|
||||
needs: release
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
@@ -217,7 +195,6 @@ jobs:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
upload-cargo-vendored-tarball:
|
||||
name: upload-cargo-vendored-tarball
|
||||
needs: release
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
@@ -235,7 +212,6 @@ jobs:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
upload-libseccomp-tarball:
|
||||
name: upload-libseccomp-tarball
|
||||
needs: release
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
@@ -253,7 +229,6 @@ jobs:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
upload-helm-chart-tarball:
|
||||
name: upload-helm-chart-tarball
|
||||
needs: release
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
@@ -278,11 +253,10 @@ jobs:
|
||||
- name: Login to the OCI registries
|
||||
env:
|
||||
QUAY_DEPLOYER_USERNAME: ${{ vars.QUAY_DEPLOYER_USERNAME }}
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
GITHUB_ACTOR: ${{ github.actor }}
|
||||
run: |
|
||||
echo "${QUAY_DEPLOYER_PASSWORD}" | helm registry login quay.io --username "${QUAY_DEPLOYER_USERNAME}" --password-stdin
|
||||
echo "${GITHUB_TOKEN}" | helm registry login ghcr.io --username "${GITHUB_ACTOR}" --password-stdin
|
||||
echo "${{ secrets.QUAY_DEPLOYER_PASSWORD }}" | helm registry login quay.io --username "${QUAY_DEPLOYER_USERNAME}" --password-stdin
|
||||
echo "${{ github.token }}" | helm registry login ghcr.io --username "${GITHUB_ACTOR}" --password-stdin
|
||||
|
||||
- name: Push helm chart to the OCI registries
|
||||
run: |
|
||||
@@ -291,7 +265,6 @@ jobs:
|
||||
helm push "kata-deploy-${release_version}.tgz" oci://ghcr.io/kata-containers/kata-deploy-charts
|
||||
|
||||
publish-release:
|
||||
name: publish-release
|
||||
needs: [ build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x, build-and-push-assets-ppc64le, publish-multi-arch-images, upload-multi-arch-static-tarball, upload-versions-yaml, upload-cargo-vendored-tarball, upload-libseccomp-tarball ]
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
|
||||
24
.github/workflows/run-k8s-tests-on-aks.yaml
vendored
24
.github/workflows/run-k8s-tests-on-aks.yaml
vendored
@@ -38,7 +38,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-k8s-tests:
|
||||
name: run-k8s-tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -49,6 +48,7 @@ jobs:
|
||||
- dragonball
|
||||
- qemu
|
||||
- qemu-runtime-rs
|
||||
- stratovirt
|
||||
- cloud-hypervisor
|
||||
instance-type:
|
||||
- small
|
||||
@@ -58,13 +58,16 @@ jobs:
|
||||
vmm: clh
|
||||
instance-type: small
|
||||
genpolicy-pull-method: oci-distribution
|
||||
auto-generate-policy: yes
|
||||
- host_os: cbl-mariner
|
||||
vmm: clh
|
||||
instance-type: small
|
||||
genpolicy-pull-method: containerd
|
||||
auto-generate-policy: yes
|
||||
- host_os: cbl-mariner
|
||||
vmm: clh
|
||||
instance-type: normal
|
||||
auto-generate-policy: yes
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -78,8 +81,10 @@ jobs:
|
||||
KATA_HOST_OS: ${{ matrix.host_os }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBERNETES: "vanilla"
|
||||
USING_NFD: "false"
|
||||
K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }}
|
||||
GENPOLICY_PULL_METHOD: ${{ matrix.genpolicy-pull-method }}
|
||||
AUTO_GENERATE_POLICY: ${{ matrix.auto-generate-policy }}
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@@ -93,14 +98,14 @@ jobs:
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: get-kata-tools-tarball
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-tools-artifacts
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata-tools
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
|
||||
- name: Install kata
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
|
||||
|
||||
- name: Download Azure CLI
|
||||
uses: azure/setup-kubectl@776406bce94f63e41d621b960d78ee25c8b76ede # v4.0.1
|
||||
@@ -135,19 +140,14 @@ jobs:
|
||||
run: bash tests/integration/kubernetes/gha-run.sh get-cluster-credentials
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-aks
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 60
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-tests
|
||||
|
||||
- name: Report tests
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh report-tests
|
||||
|
||||
- name: Refresh OIDC token in case access token expired
|
||||
if: always()
|
||||
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
|
||||
with:
|
||||
client-id: ${{ secrets.AZ_APPID }}
|
||||
|
||||
114
.github/workflows/run-k8s-tests-on-amd64.yaml
vendored
Normal file
114
.github/workflows/run-k8s-tests-on-amd64.yaml
vendored
Normal file
@@ -0,0 +1,114 @@
|
||||
name: CI | Run kubernetes tests on amd64
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
registry:
|
||||
required: true
|
||||
type: string
|
||||
repo:
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
required: true
|
||||
type: string
|
||||
pr-number:
|
||||
required: true
|
||||
type: string
|
||||
commit-hash:
|
||||
required: false
|
||||
type: string
|
||||
target-branch:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
run-k8s-tests-amd64:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- clh #cloud-hypervisor
|
||||
- dragonball
|
||||
- fc #firecracker
|
||||
- qemu
|
||||
- cloud-hypervisor
|
||||
container_runtime:
|
||||
- containerd
|
||||
snapshotter:
|
||||
- devmapper
|
||||
k8s:
|
||||
- k3s
|
||||
include:
|
||||
- vmm: qemu
|
||||
container_runtime: crio
|
||||
snapshotter: ""
|
||||
k8s: k0s
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
DOCKER_TAG: ${{ inputs.tag }}
|
||||
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBERNETES: ${{ matrix.k8s }}
|
||||
KUBERNETES_EXTRA_PARAMS: ${{ matrix.container_runtime != 'crio' && '' || '--cri-socket remote:unix:///var/run/crio/crio.sock --kubelet-extra-args --cgroup-driver="systemd"' }}
|
||||
SNAPSHOTTER: ${{ matrix.snapshotter }}
|
||||
USING_NFD: "false"
|
||||
K8S_TEST_HOST_TYPE: all
|
||||
CONTAINER_RUNTIME: ${{ matrix.container_runtime }}
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Configure CRI-O
|
||||
if: matrix.container_runtime == 'crio'
|
||||
run: bash tests/integration/kubernetes/gha-run.sh setup-crio
|
||||
|
||||
- name: Deploy ${{ matrix.k8s }}
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s
|
||||
env:
|
||||
CONTAINER_RUNTIME: ${{ matrix.container_runtime }}
|
||||
|
||||
- name: Configure the ${{ matrix.snapshotter }} snapshotter
|
||||
if: matrix.snapshotter != ''
|
||||
run: bash tests/integration/kubernetes/gha-run.sh configure-snapshotter
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
|
||||
|
||||
- name: Install `bats`
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-bats
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 30
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-tests
|
||||
|
||||
- name: Collect artifacts ${{ matrix.vmm }}
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
|
||||
continue-on-error: true
|
||||
|
||||
- name: Archive artifacts ${{ matrix.vmm }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: k8s-tests-${{ matrix.vmm }}-${{ matrix.snapshotter }}-${{ matrix.k8s }}-${{ inputs.tag }}
|
||||
path: /tmp/artifacts
|
||||
retention-days: 1
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup
|
||||
11
.github/workflows/run-k8s-tests-on-arm64.yaml
vendored
11
.github/workflows/run-k8s-tests-on-arm64.yaml
vendored
@@ -26,13 +26,11 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-k8s-tests-on-arm64:
|
||||
name: run-k8s-tests-on-arm64
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- qemu
|
||||
- qemu-runtime-rs
|
||||
k8s:
|
||||
- kubeadm
|
||||
runs-on: arm64-k8s
|
||||
@@ -43,6 +41,7 @@ jobs:
|
||||
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBERNETES: ${{ matrix.k8s }}
|
||||
USING_NFD: "false"
|
||||
K8S_TEST_HOST_TYPE: all
|
||||
TARGET_ARCH: "aarch64"
|
||||
steps:
|
||||
@@ -59,7 +58,7 @@ jobs:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
|
||||
|
||||
- name: Install `bats`
|
||||
@@ -69,10 +68,6 @@ jobs:
|
||||
timeout-minutes: 30
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-tests
|
||||
|
||||
- name: Report tests
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh report-tests
|
||||
|
||||
- name: Collect artifacts ${{ matrix.vmm }}
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
|
||||
@@ -87,5 +82,5 @@ jobs:
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
timeout-minutes: 15
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
name: CI | Run NVIDIA GPU kubernetes tests on amd64
|
||||
name: CI | Run NVIDIA GPU kubernetes tests on arm64
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
tarball-suffix:
|
||||
required: true
|
||||
type: string
|
||||
registry:
|
||||
required: true
|
||||
type: string
|
||||
@@ -32,24 +29,23 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-nvidia-gpu-tests-on-amd64:
|
||||
name: run-${{ matrix.environment.name }}-tests-on-amd64
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
environment: [
|
||||
{ name: nvidia-gpu, vmm: qemu-nvidia-gpu, runner: amd64-nvidia-a100 },
|
||||
{ name: nvidia-gpu-snp, vmm: qemu-nvidia-gpu-snp, runner: amd64-nvidia-h100-snp },
|
||||
]
|
||||
runs-on: ${{ matrix.environment.runner }}
|
||||
vmm:
|
||||
- qemu-nvidia-gpu
|
||||
k8s:
|
||||
- kubeadm
|
||||
runs-on: amd64-nvidia-a100
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
DOCKER_TAG: ${{ inputs.tag }}
|
||||
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||
KATA_HYPERVISOR: ${{ matrix.environment.vmm }}
|
||||
KUBERNETES: kubeadm
|
||||
KBS: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }}
|
||||
K8S_TEST_HOST_TYPE: baremetal
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBERNETES: ${{ matrix.k8s }}
|
||||
USING_NFD: "false"
|
||||
K8S_TEST_HOST_TYPE: all
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@@ -63,69 +59,31 @@ jobs:
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: get-kata-tools-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-tools-artifacts
|
||||
|
||||
- name: Install kata-tools
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
|
||||
|
||||
- name: Uninstall previous `kbs-client`
|
||||
if: matrix.environment.name != 'nvidia-gpu'
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh uninstall-kbs-client
|
||||
|
||||
- name: Deploy CoCo KBS
|
||||
if: matrix.environment.name != 'nvidia-gpu'
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
|
||||
env:
|
||||
NVIDIA_VERIFIER_MODE: remote
|
||||
KBS_INGRESS: nodeport
|
||||
|
||||
- name: Install `kbs-client`
|
||||
if: matrix.environment.name != 'nvidia-gpu'
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kbs-client
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
|
||||
|
||||
- name: Install `bats`
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-bats
|
||||
|
||||
- name: Run tests ${{ matrix.environment.vmm }}
|
||||
- name: Run tests
|
||||
timeout-minutes: 30
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests
|
||||
env:
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
|
||||
- name: Report tests
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh report-tests
|
||||
|
||||
- name: Collect artifacts ${{ matrix.environment.vmm }}
|
||||
- name: Collect artifacts ${{ matrix.vmm }}
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
|
||||
continue-on-error: true
|
||||
|
||||
- name: Archive artifacts ${{ matrix.environment.vmm }}
|
||||
- name: Archive artifacts ${{ matrix.vmm }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: k8s-tests-${{ matrix.environment.vmm }}-kubeadm-${{ inputs.tag }}
|
||||
name: k8s-tests-${{ matrix.vmm }}-${{ matrix.k8s }}-${{ inputs.tag }}
|
||||
path: /tmp/artifacts
|
||||
retention-days: 1
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
timeout-minutes: 15
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup
|
||||
|
||||
- name: Delete CoCo KBS
|
||||
if: always() && matrix.environment.name != 'nvidia-gpu'
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
|
||||
|
||||
19
.github/workflows/run-k8s-tests-on-ppc64le.yaml
vendored
19
.github/workflows/run-k8s-tests-on-ppc64le.yaml
vendored
@@ -26,7 +26,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-k8s-tests:
|
||||
name: run-k8s-tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -34,7 +33,7 @@ jobs:
|
||||
- qemu
|
||||
k8s:
|
||||
- kubeadm
|
||||
runs-on: ppc64le-k8s
|
||||
runs-on: k8s-ppc64le
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
@@ -43,6 +42,7 @@ jobs:
|
||||
GOPATH: ${{ github.workspace }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBERNETES: ${{ matrix.k8s }}
|
||||
USING_NFD: "false"
|
||||
TARGET_ARCH: "ppc64le"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
@@ -62,20 +62,19 @@ jobs:
|
||||
./tests/install_go.sh -f -p
|
||||
echo "/usr/local/go/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Prepare the runner for k8s test suite
|
||||
run: bash "${HOME}/scripts/k8s_cluster_prepare.sh"
|
||||
- name: Prepare the runner for k8s cluster creation
|
||||
run: bash "${HOME}/scripts/k8s_cluster_cleanup.sh"
|
||||
|
||||
- name: Check if cluster is healthy to run the tests
|
||||
run: bash "${HOME}/scripts/k8s_cluster_check.sh"
|
||||
- name: Create k8s cluster using kubeadm
|
||||
run: bash "${HOME}/scripts/k8s_cluster_create.sh"
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-kubeadm
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 30
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-tests
|
||||
|
||||
- name: Report tests
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh report-tests
|
||||
- name: Delete cluster and post cleanup actions
|
||||
run: bash "${HOME}/scripts/k8s_cluster_cleanup.sh"
|
||||
|
||||
16
.github/workflows/run-k8s-tests-on-zvsi.yaml
vendored
16
.github/workflows/run-k8s-tests-on-zvsi.yaml
vendored
@@ -29,7 +29,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-k8s-tests:
|
||||
name: run-k8s-tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -46,9 +45,11 @@ jobs:
|
||||
include:
|
||||
- snapshotter: devmapper
|
||||
pull-type: default
|
||||
using-nfd: true
|
||||
deploy-cmd: configure-snapshotter
|
||||
- snapshotter: nydus
|
||||
pull-type: guest-pull
|
||||
using-nfd: false
|
||||
deploy-cmd: deploy-snapshotter
|
||||
exclude:
|
||||
- snapshotter: overlayfs
|
||||
@@ -74,6 +75,7 @@ jobs:
|
||||
KUBERNETES: ${{ matrix.k8s }}
|
||||
PULL_TYPE: ${{ matrix.pull-type }}
|
||||
SNAPSHOTTER: ${{ matrix.snapshotter }}
|
||||
USING_NFD: ${{ matrix.using-nfd }}
|
||||
TARGET_ARCH: "s390x"
|
||||
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
|
||||
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
|
||||
@@ -103,13 +105,11 @@ jobs:
|
||||
# qemu-runtime-rs only works with overlayfs
|
||||
# See: https://github.com/kata-containers/kata-containers/issues/10066
|
||||
- name: Configure the ${{ matrix.snapshotter }} snapshotter
|
||||
env:
|
||||
DEPLOY_CMD: ${{ matrix.deploy-cmd }}
|
||||
run: bash tests/integration/kubernetes/gha-run.sh "${DEPLOY_CMD}"
|
||||
run: bash tests/integration/kubernetes/gha-run.sh ${{ matrix.deploy-cmd }}
|
||||
if: ${{ matrix.snapshotter != 'overlayfs' }}
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-zvsi
|
||||
|
||||
- name: Uninstall previous `kbs-client`
|
||||
@@ -131,18 +131,12 @@ jobs:
|
||||
timeout-minutes: 60
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-tests
|
||||
|
||||
- name: Report tests
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh report-tests
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup-zvsi
|
||||
|
||||
- name: Delete CoCo KBS
|
||||
if: always()
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
if [ "${KBS}" == "true" ]; then
|
||||
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
|
||||
|
||||
@@ -40,13 +40,11 @@ permissions: {}
|
||||
jobs:
|
||||
# Generate jobs for testing CoCo on non-TEE environments
|
||||
run-stability-k8s-tests-coco-nontee:
|
||||
name: run-stability-k8s-tests-coco-nontee
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- qemu-coco-dev
|
||||
- qemu-coco-dev-runtime-rs
|
||||
snapshotter:
|
||||
- nydus
|
||||
pull-type:
|
||||
@@ -71,6 +69,7 @@ jobs:
|
||||
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
|
||||
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
|
||||
SNAPSHOTTER: ${{ matrix.snapshotter }}
|
||||
USING_NFD: "false"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@@ -84,14 +83,14 @@ jobs:
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: get-kata-tools-tarball
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-tools-artifacts
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata-tools
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
|
||||
- name: Install kata
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
|
||||
|
||||
- name: Log into the Azure account
|
||||
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
|
||||
@@ -140,12 +139,7 @@ jobs:
|
||||
timeout-minutes: 300
|
||||
run: bash tests/stability/gha-stability-run.sh run-tests
|
||||
|
||||
- name: Report tests
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh report-tests
|
||||
|
||||
- name: Refresh OIDC token in case access token expired
|
||||
if: always()
|
||||
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
|
||||
with:
|
||||
client-id: ${{ secrets.AZ_APPID }}
|
||||
|
||||
268
.github/workflows/run-kata-coco-tests.yaml
vendored
268
.github/workflows/run-kata-coco-tests.yaml
vendored
@@ -39,17 +39,17 @@ on:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
run-k8s-tests-on-tee:
|
||||
name: run-k8s-tests-on-tee
|
||||
run-k8s-tests-on-tdx:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- runner: tdx
|
||||
vmm: qemu-tdx
|
||||
- runner: sev-snp
|
||||
vmm: qemu-snp
|
||||
runs-on: ${{ matrix.runner }}
|
||||
vmm:
|
||||
- qemu-tdx
|
||||
snapshotter:
|
||||
- nydus
|
||||
pull-type:
|
||||
- guest-pull
|
||||
runs-on: tdx
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
@@ -57,14 +57,15 @@ jobs:
|
||||
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBERNETES: "vanilla"
|
||||
USING_NFD: "true"
|
||||
KBS: "true"
|
||||
K8S_TEST_HOST_TYPE: "baremetal"
|
||||
KBS_INGRESS: "nodeport"
|
||||
SNAPSHOTTER: "nydus"
|
||||
PULL_TYPE: "guest-pull"
|
||||
SNAPSHOTTER: ${{ matrix.snapshotter }}
|
||||
PULL_TYPE: ${{ matrix.pull-type }}
|
||||
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
|
||||
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
|
||||
GH_ITA_KEY: ${{ secrets.ITA_KEY }}
|
||||
ITA_KEY: ${{ secrets.ITA_KEY }}
|
||||
AUTO_GENERATE_POLICY: "yes"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
@@ -79,18 +80,13 @@ jobs:
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: get-kata-tools-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-tools-artifacts
|
||||
|
||||
- name: Install kata-tools
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
|
||||
- name: Deploy Snapshotter
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-snapshotter
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 20
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-tdx
|
||||
|
||||
- name: Uninstall previous `kbs-client`
|
||||
timeout-minutes: 10
|
||||
@@ -99,8 +95,6 @@ jobs:
|
||||
- name: Deploy CoCo KBS
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
|
||||
env:
|
||||
ITA_KEY: ${{ env.KATA_HYPERVISOR == 'qemu-tdx' && env.GH_ITA_KEY || '' }}
|
||||
|
||||
- name: Install `kbs-client`
|
||||
timeout-minutes: 10
|
||||
@@ -114,21 +108,102 @@ jobs:
|
||||
timeout-minutes: 100
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-tests
|
||||
|
||||
- name: Report tests
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh report-tests
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
timeout-minutes: 15
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup-tdx
|
||||
|
||||
- name: Delete Snapshotter
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup-snapshotter
|
||||
|
||||
- name: Delete CoCo KBS
|
||||
if: always()
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
|
||||
|
||||
- name: Delete CSI driver
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/kubernetes/gha-run.sh delete-csi-driver
|
||||
|
||||
run-k8s-tests-sev-snp:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- qemu-snp
|
||||
snapshotter:
|
||||
- nydus
|
||||
pull-type:
|
||||
- guest-pull
|
||||
runs-on: sev-snp
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
DOCKER_TAG: ${{ inputs.tag }}
|
||||
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBECONFIG: /home/kata/.kube/config
|
||||
KUBERNETES: "vanilla"
|
||||
USING_NFD: "false"
|
||||
KBS: "true"
|
||||
KBS_INGRESS: "nodeport"
|
||||
K8S_TEST_HOST_TYPE: "baremetal"
|
||||
SNAPSHOTTER: ${{ matrix.snapshotter }}
|
||||
PULL_TYPE: ${{ matrix.pull-type }}
|
||||
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
|
||||
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
|
||||
AUTO_GENERATE_POLICY: "yes"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
[[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] && echo "ITA_KEY=${GH_ITA_KEY}" >> "${GITHUB_ENV}"
|
||||
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Deploy Snapshotter
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-snapshotter
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-snp
|
||||
|
||||
- name: Uninstall previous `kbs-client`
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh uninstall-kbs-client
|
||||
|
||||
- name: Deploy CoCo KBS
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
|
||||
|
||||
- name: Install `kbs-client`
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kbs-client
|
||||
|
||||
- name: Deploy CSI driver
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-csi-driver
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 50
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-tests
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup-snp
|
||||
|
||||
- name: Delete Snapshotter
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup-snapshotter
|
||||
|
||||
- name: Delete CoCo KBS
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
|
||||
|
||||
- name: Delete CSI driver
|
||||
timeout-minutes: 5
|
||||
@@ -136,21 +211,15 @@ jobs:
|
||||
|
||||
# Generate jobs for testing CoCo on non-TEE environments
|
||||
run-k8s-tests-coco-nontee:
|
||||
name: run-k8s-tests-coco-nontee
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- qemu-coco-dev
|
||||
- qemu-coco-dev-runtime-rs
|
||||
snapshotter:
|
||||
- nydus
|
||||
pull-type:
|
||||
- guest-pull
|
||||
include:
|
||||
- pull-type: experimental-force-guest-pull
|
||||
vmm: qemu-coco-dev
|
||||
snapshotter: ""
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
id-token: write # Used for OIDC access to log into Azure
|
||||
@@ -170,12 +239,13 @@ jobs:
|
||||
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
|
||||
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
|
||||
SNAPSHOTTER: ${{ matrix.snapshotter }}
|
||||
EXPERIMENTAL_FORCE_GUEST_PULL: ${{ matrix.pull-type == 'experimental-force-guest-pull' && matrix.vmm || '' }}
|
||||
# Caution: current ingress controller used to expose the KBS service
|
||||
# requires much vCPUs, lefting only a few for the tests. Depending on the
|
||||
# host type chose it will result on the creation of a cluster with
|
||||
# insufficient resources.
|
||||
K8S_TEST_HOST_TYPE: "all"
|
||||
USING_NFD: "false"
|
||||
AUTO_GENERATE_POLICY: "yes"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@@ -189,14 +259,14 @@ jobs:
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: get-kata-tools-tarball
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-tools-artifacts
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata-tools
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
|
||||
- name: Install kata
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
|
||||
|
||||
- name: Log into the Azure account
|
||||
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
|
||||
@@ -225,12 +295,13 @@ jobs:
|
||||
- name: Download credentials for the Kubernetes CLI to use them
|
||||
run: bash tests/integration/kubernetes/gha-run.sh get-cluster-credentials
|
||||
|
||||
- name: Deploy Snapshotter
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-snapshotter
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-aks
|
||||
env:
|
||||
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: ${{ env.SNAPSHOTTER == 'nydus' }}
|
||||
AUTO_GENERATE_POLICY: ${{ env.PULL_TYPE == 'experimental-force-guest-pull' && 'no' || 'yes' }}
|
||||
|
||||
- name: Deploy CoCo KBS
|
||||
timeout-minutes: 10
|
||||
@@ -253,7 +324,6 @@ jobs:
|
||||
run: bash tests/integration/kubernetes/gha-run.sh report-tests
|
||||
|
||||
- name: Refresh OIDC token in case access token expired
|
||||
if: always()
|
||||
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
|
||||
with:
|
||||
client-id: ${{ secrets.AZ_APPID }}
|
||||
@@ -262,104 +332,4 @@ jobs:
|
||||
|
||||
- name: Delete AKS cluster
|
||||
if: always()
|
||||
timeout-minutes: 15
|
||||
run: bash tests/integration/kubernetes/gha-run.sh delete-cluster
|
||||
|
||||
# Generate jobs for testing CoCo on non-TEE environments with erofs-snapshotter
|
||||
run-k8s-tests-coco-nontee-with-erofs-snapshotter:
|
||||
name: run-k8s-tests-coco-nontee-with-erofs-snapshotter
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- qemu-coco-dev
|
||||
snapshotter:
|
||||
- erofs
|
||||
pull-type:
|
||||
- default
|
||||
runs-on: ubuntu-24.04
|
||||
environment: ci
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
DOCKER_TAG: ${{ inputs.tag }}
|
||||
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
# Some tests rely on that variable to run (or not)
|
||||
KBS: "false"
|
||||
# Set the KBS ingress handler (empty string disables handling)
|
||||
KBS_INGRESS: ""
|
||||
KUBERNETES: "vanilla"
|
||||
CONTAINER_ENGINE: "containerd"
|
||||
CONTAINER_ENGINE_VERSION: "v2.2"
|
||||
PULL_TYPE: ${{ matrix.pull-type }}
|
||||
SNAPSHOTTER: ${{ matrix.snapshotter }}
|
||||
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: "true"
|
||||
K8S_TEST_HOST_TYPE: "all"
|
||||
# We are skipping the auto generated policy tests for now,
|
||||
# but those should be enabled as soon as we work on that.
|
||||
AUTO_GENERATE_POLICY: "no"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: get-kata-tools-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-tools-artifacts
|
||||
|
||||
- name: Install kata-tools
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
|
||||
|
||||
- name: Remove unnecessary directories to free up space
|
||||
run: |
|
||||
sudo rm -rf /usr/local/.ghcup
|
||||
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo rm -rf /opt/ghc
|
||||
sudo rm -rf /usr/local/share/boost
|
||||
sudo rm -rf /usr/lib/jvm
|
||||
sudo rm -rf /usr/share/swift
|
||||
sudo rm -rf /usr/local/share/powershell
|
||||
sudo rm -rf /usr/local/julia*
|
||||
sudo rm -rf /opt/az
|
||||
sudo rm -rf /usr/local/share/chromium
|
||||
sudo rm -rf /opt/microsoft
|
||||
sudo rm -rf /opt/google
|
||||
sudo rm -rf /usr/lib/firefox
|
||||
|
||||
- name: Deploy kubernetes
|
||||
timeout-minutes: 15
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: Install `bats`
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-bats
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 20
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
|
||||
|
||||
- name: Deploy CSI driver
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-csi-driver
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 80
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-tests
|
||||
|
||||
- name: Report tests
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh report-tests
|
||||
|
||||
@@ -33,7 +33,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-kata-deploy-tests:
|
||||
name: run-kata-deploy-tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -59,6 +58,7 @@ jobs:
|
||||
KATA_HOST_OS: ${{ matrix.host_os }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBERNETES: "vanilla"
|
||||
USING_NFD: "false"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@@ -102,12 +102,7 @@ jobs:
|
||||
- name: Run tests
|
||||
run: bash tests/functional/kata-deploy/gha-run.sh run-tests
|
||||
|
||||
- name: Report tests
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh report-tests
|
||||
|
||||
- name: Refresh OIDC token in case access token expired
|
||||
if: always()
|
||||
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
|
||||
with:
|
||||
client-id: ${{ secrets.AZ_APPID }}
|
||||
|
||||
24
.github/workflows/run-kata-deploy-tests.yaml
vendored
24
.github/workflows/run-kata-deploy-tests.yaml
vendored
@@ -26,7 +26,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-kata-deploy-tests:
|
||||
name: run-kata-deploy-tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -45,6 +44,7 @@ jobs:
|
||||
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBERNETES: ${{ matrix.k8s }}
|
||||
USING_NFD: "false"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@@ -58,24 +58,6 @@ jobs:
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Remove unnecessary directories to free up space
|
||||
run: |
|
||||
sudo rm -rf /usr/local/.ghcup
|
||||
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo rm -rf /opt/ghc
|
||||
sudo rm -rf /usr/local/share/boost
|
||||
sudo rm -rf /usr/lib/jvm
|
||||
sudo rm -rf /usr/share/swift
|
||||
sudo rm -rf /usr/local/share/powershell
|
||||
sudo rm -rf /usr/local/julia*
|
||||
sudo rm -rf /opt/az
|
||||
sudo rm -rf /usr/local/share/chromium
|
||||
sudo rm -rf /opt/microsoft
|
||||
sudo rm -rf /opt/google
|
||||
sudo rm -rf /usr/lib/firefox
|
||||
|
||||
- name: Deploy ${{ matrix.k8s }}
|
||||
run: bash tests/functional/kata-deploy/gha-run.sh deploy-k8s
|
||||
|
||||
@@ -84,7 +66,3 @@ jobs:
|
||||
|
||||
- name: Run tests
|
||||
run: bash tests/functional/kata-deploy/gha-run.sh run-tests
|
||||
|
||||
- name: Report tests
|
||||
if: always()
|
||||
run: bash tests/functional/kata-deploy/gha-run.sh report-tests
|
||||
|
||||
@@ -17,7 +17,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-monitor:
|
||||
name: run-monitor
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
|
||||
2
.github/workflows/run-metrics.yaml
vendored
2
.github/workflows/run-metrics.yaml
vendored
@@ -26,7 +26,6 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-metrics:
|
||||
name: run-metrics
|
||||
strategy:
|
||||
# We can set this to true whenever we're 100% sure that
|
||||
# the all the tests are not flaky, otherwise we'll fail
|
||||
@@ -44,6 +43,7 @@ jobs:
|
||||
DOCKER_TAG: ${{ inputs.tag }}
|
||||
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||
K8S_TEST_HOST_TYPE: "baremetal"
|
||||
USING_NFD: "false"
|
||||
KUBERNETES: kubeadm
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
53
.github/workflows/run-runk-tests.yaml
vendored
Normal file
53
.github/workflows/run-runk-tests.yaml
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
name: CI | Run runk tests
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
tarball-suffix:
|
||||
required: false
|
||||
type: string
|
||||
commit-hash:
|
||||
required: false
|
||||
type: string
|
||||
target-branch:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
run-runk:
|
||||
# Skip runk tests as we have no maintainers. TODO: Decide when to remove altogether
|
||||
if: false
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
CONTAINERD_VERSION: lts
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/runk/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata
|
||||
run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Run runk tests
|
||||
run: bash tests/integration/runk/gha-run.sh run
|
||||
1
.github/workflows/shellcheck.yaml
vendored
1
.github/workflows/shellcheck.yaml
vendored
@@ -18,7 +18,6 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
shellcheck:
|
||||
name: shellcheck
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout the code
|
||||
|
||||
1
.github/workflows/shellcheck_required.yaml
vendored
1
.github/workflows/shellcheck_required.yaml
vendored
@@ -19,7 +19,6 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
shellcheck-required:
|
||||
name: shellcheck-required
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout the code
|
||||
|
||||
10
.github/workflows/stale.yaml
vendored
10
.github/workflows/stale.yaml
vendored
@@ -6,21 +6,13 @@ on:
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
name: stale
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: write # Needed to manage caches for state persistence across runs
|
||||
pull-requests: write # Needed to add/remove labels, post comments, or close PRs
|
||||
steps:
|
||||
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
|
||||
with:
|
||||
stale-pr-message: 'This PR has been opened without activity for 180 days. Please comment on the issue or it will be closed in 7 days.'
|
||||
stale-pr-message: 'This PR has been opened without with no activity for 180 days. Comment on the issue otherwise it will be closed in 7 days'
|
||||
days-before-pr-stale: 180
|
||||
days-before-pr-close: 7
|
||||
days-before-issue-stale: -1
|
||||
|
||||
18
.github/workflows/static-checks-self-hosted.yaml
vendored
18
.github/workflows/static-checks-self-hosted.yaml
vendored
@@ -28,9 +28,21 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
instance:
|
||||
- "ubuntu-24.04-arm"
|
||||
- "ubuntu-24.04-s390x"
|
||||
- "ubuntu-24.04-ppc64le"
|
||||
- "ubuntu-22.04-arm"
|
||||
- "s390x"
|
||||
- "ppc64le"
|
||||
uses: ./.github/workflows/build-checks.yaml
|
||||
with:
|
||||
instance: ${{ matrix.instance }}
|
||||
|
||||
build-checks-preview:
|
||||
needs: skipper
|
||||
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
instance:
|
||||
- "riscv-builder"
|
||||
uses: ./.github/workflows/build-checks-preview-riscv64.yaml
|
||||
with:
|
||||
instance: ${{ matrix.instance }}
|
||||
|
||||
22
.github/workflows/static-checks.yaml
vendored
22
.github/workflows/static-checks.yaml
vendored
@@ -22,7 +22,6 @@ jobs:
|
||||
target-branch: ${{ github.event.pull_request.base.ref }}
|
||||
|
||||
check-kernel-config-version:
|
||||
name: check-kernel-config-version
|
||||
needs: skipper
|
||||
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
|
||||
runs-on: ubuntu-22.04
|
||||
@@ -55,7 +54,6 @@ jobs:
|
||||
instance: ubuntu-22.04
|
||||
|
||||
build-checks-depending-on-kvm:
|
||||
name: build-checks-depending-on-kvm
|
||||
runs-on: ubuntu-22.04
|
||||
needs: skipper
|
||||
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
|
||||
@@ -90,16 +88,13 @@ jobs:
|
||||
- name: Running `${{ matrix.command }}` for ${{ matrix.component }}
|
||||
run: |
|
||||
export PATH="$PATH:${HOME}/.cargo/bin"
|
||||
cd "${COMPONENT_PATH}"
|
||||
eval "${COMMAND}"
|
||||
cd ${{ matrix.component-path }}
|
||||
${{ matrix.command }}
|
||||
env:
|
||||
COMMAND: ${{ matrix.command }}
|
||||
COMPONENT_PATH: ${{ matrix.component-path }}
|
||||
RUST_BACKTRACE: "1"
|
||||
RUST_LIB_BACKTRACE: "0"
|
||||
|
||||
static-checks:
|
||||
name: static-checks
|
||||
runs-on: ubuntu-22.04
|
||||
needs: skipper
|
||||
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
|
||||
@@ -122,13 +117,13 @@ jobs:
|
||||
path: ./src/github.com/${{ github.repository }}
|
||||
- name: Install yq
|
||||
run: |
|
||||
cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"
|
||||
cd "${GOPATH}/src/github.com/${{ github.repository }}"
|
||||
./ci/install_yq.sh
|
||||
env:
|
||||
INSTALL_IN_GOPATH: false
|
||||
- name: Install golang
|
||||
run: |
|
||||
cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"
|
||||
cd "${GOPATH}/src/github.com/${{ github.repository }}"
|
||||
./tests/install_go.sh -f -p
|
||||
echo "/usr/local/go/bin" >> "$GITHUB_PATH"
|
||||
- name: Install system dependencies
|
||||
@@ -136,7 +131,7 @@ jobs:
|
||||
sudo apt-get update && sudo apt-get -y install moreutils hunspell hunspell-en-gb hunspell-en-us pandoc
|
||||
- name: Install open-policy-agent
|
||||
run: |
|
||||
cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"
|
||||
cd "${GOPATH}/src/github.com/${{ github.repository }}"
|
||||
./tests/install_opa.sh
|
||||
- name: Install regorus
|
||||
env:
|
||||
@@ -144,13 +139,11 @@ jobs:
|
||||
ARTEFACT_REGISTRY_USERNAME: "${{ github.actor }}"
|
||||
ARTEFACT_REGISTRY_PASSWORD: "${{ secrets.GITHUB_TOKEN }}"
|
||||
run: |
|
||||
"${GOPATH}/src/github.com/${GITHUB_REPOSITORY}/tests/install_regorus.sh"
|
||||
"${GOPATH}/src/github.com/${{ github.repository }}/tests/install_regorus.sh"
|
||||
- name: Run check
|
||||
env:
|
||||
CMD: ${{ matrix.cmd }}
|
||||
run: |
|
||||
export PATH="${PATH}:${GOPATH}/bin"
|
||||
cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}" && ${CMD}
|
||||
cd "${GOPATH}/src/github.com/${{ github.repository }}" && ${{ matrix.cmd }}
|
||||
|
||||
govulncheck:
|
||||
needs: skipper
|
||||
@@ -158,7 +151,6 @@ jobs:
|
||||
uses: ./.github/workflows/govulncheck.yaml
|
||||
|
||||
codegen:
|
||||
name: codegen
|
||||
runs-on: ubuntu-22.04
|
||||
needs: skipper
|
||||
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
|
||||
|
||||
11
.github/workflows/zizmor.yaml
vendored
11
.github/workflows/zizmor.yaml
vendored
@@ -1,6 +1,8 @@
|
||||
name: GHA security analysis
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
pull_request:
|
||||
|
||||
permissions: {}
|
||||
@@ -11,8 +13,10 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
zizmor:
|
||||
name: zizmor
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
contents: read
|
||||
security-events: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
@@ -21,9 +25,6 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Run zizmor
|
||||
uses: zizmorcore/zizmor-action@e673c3917a1aef3c65c972347ed84ccd013ecda4 # v0.2.0
|
||||
uses: zizmorcore/zizmor-action@f52a838cfabf134edcbaa7c8b3677dde20045018 # v0.1.1
|
||||
with:
|
||||
advanced-security: false
|
||||
annotations: true
|
||||
persona: auditor
|
||||
version: v1.13.0
|
||||
|
||||
3
.github/zizmor.yml
vendored
3
.github/zizmor.yml
vendored
@@ -1,3 +0,0 @@
|
||||
rules:
|
||||
undocumented-permissions:
|
||||
disable: true
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -18,5 +18,3 @@ src/tools/log-parser/kata-log-parser
|
||||
tools/packaging/static-build/agent/install_libseccomp.sh
|
||||
.envrc
|
||||
.direnv
|
||||
**/.DS_Store
|
||||
site/
|
||||
|
||||
140
Cargo.toml
140
Cargo.toml
@@ -1,140 +0,0 @@
|
||||
[workspace.package]
|
||||
authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
|
||||
edition = "2018"
|
||||
license = "Apache-2.0"
|
||||
rust-version = "1.88"
|
||||
|
||||
[workspace]
|
||||
members = [
|
||||
# Dragonball
|
||||
"src/dragonball",
|
||||
"src/dragonball/dbs_acpi",
|
||||
"src/dragonball/dbs_address_space",
|
||||
"src/dragonball/dbs_allocator",
|
||||
"src/dragonball/dbs_arch",
|
||||
"src/dragonball/dbs_boot",
|
||||
"src/dragonball/dbs_device",
|
||||
"src/dragonball/dbs_interrupt",
|
||||
"src/dragonball/dbs_legacy_devices",
|
||||
"src/dragonball/dbs_pci",
|
||||
"src/dragonball/dbs_tdx",
|
||||
"src/dragonball/dbs_upcall",
|
||||
"src/dragonball/dbs_utils",
|
||||
"src/dragonball/dbs_virtio_devices",
|
||||
|
||||
# runtime-rs
|
||||
"src/runtime-rs",
|
||||
"src/runtime-rs/crates/agent",
|
||||
"src/runtime-rs/crates/hypervisor",
|
||||
"src/runtime-rs/crates/persist",
|
||||
"src/runtime-rs/crates/resource",
|
||||
"src/runtime-rs/crates/runtimes",
|
||||
"src/runtime-rs/crates/service",
|
||||
"src/runtime-rs/crates/shim",
|
||||
"src/runtime-rs/crates/shim-ctl",
|
||||
"src/runtime-rs/tests/utils",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
# TODO: Add all excluded crates to root workspace
|
||||
exclude = [
|
||||
"src/agent",
|
||||
"src/tools",
|
||||
"src/libs",
|
||||
|
||||
# kata-deploy binary is standalone and has its own Cargo.toml for now
|
||||
"tools/packaging/kata-deploy/binary",
|
||||
|
||||
# We are cloning and building rust packages under
|
||||
# "tools/packaging/kata-deploy/local-build/build" folder, which may mislead
|
||||
# those packages to think they are part of the kata root workspace
|
||||
"tools/packaging/kata-deploy/local-build/build",
|
||||
]
|
||||
|
||||
[workspace.dependencies]
|
||||
# Rust-VMM crates
|
||||
event-manager = "0.2.1"
|
||||
kvm-bindings = "0.6.0"
|
||||
kvm-ioctls = "=0.12.1"
|
||||
linux-loader = "0.8.0"
|
||||
seccompiler = "0.5.0"
|
||||
vfio-bindings = "0.3.0"
|
||||
vfio-ioctls = "0.1.0"
|
||||
virtio-bindings = "0.1.0"
|
||||
virtio-queue = "0.7.0"
|
||||
vm-fdt = "0.2.0"
|
||||
vm-memory = "0.10.0"
|
||||
vm-superio = "0.5.0"
|
||||
vmm-sys-util = "0.11.0"
|
||||
|
||||
# Local dependencies from Dragonball Sandbox crates
|
||||
dragonball = { path = "src/dragonball" }
|
||||
dbs-acpi = { path = "src/dragonball/dbs_acpi" }
|
||||
dbs-address-space = { path = "src/dragonball/dbs_address_space" }
|
||||
dbs-allocator = { path = "src/dragonball/dbs_allocator" }
|
||||
dbs-arch = { path = "src/dragonball/dbs_arch" }
|
||||
dbs-boot = { path = "src/dragonball/dbs_boot" }
|
||||
dbs-device = { path = "src/dragonball/dbs_device" }
|
||||
dbs-interrupt = { path = "src/dragonball/dbs_interrupt" }
|
||||
dbs-legacy-devices = { path = "src/dragonball/dbs_legacy_devices" }
|
||||
dbs-pci = { path = "src/dragonball/dbs_pci" }
|
||||
dbs-tdx = { path = "src/dragonball/dbs_tdx" }
|
||||
dbs-upcall = { path = "src/dragonball/dbs_upcall" }
|
||||
dbs-utils = { path = "src/dragonball/dbs_utils" }
|
||||
dbs-virtio-devices = { path = "src/dragonball/dbs_virtio_devices" }
|
||||
|
||||
# Local dependencies from runtime-rs
|
||||
agent = { path = "src/runtime-rs/crates/agent" }
|
||||
hypervisor = { path = "src/runtime-rs/crates/hypervisor" }
|
||||
persist = { path = "src/runtime-rs/crates/persist" }
|
||||
resource = { path = "src/runtime-rs/crates/resource" }
|
||||
runtimes = { path = "src/runtime-rs/crates/runtimes" }
|
||||
service = { path = "src/runtime-rs/crates/service" }
|
||||
tests_utils = { path = "src/runtime-rs/tests/utils" }
|
||||
ch-config = { path = "src/runtime-rs/crates/hypervisor/ch-config" }
|
||||
common = { path = "src/runtime-rs/crates/runtimes/common" }
|
||||
linux_container = { path = "src/runtime-rs/crates/runtimes/linux_container" }
|
||||
virt_container = { path = "src/runtime-rs/crates/runtimes/virt_container" }
|
||||
wasm_container = { path = "src/runtime-rs/crates/runtimes/wasm_container" }
|
||||
|
||||
# Local dependencies from `src/lib`
|
||||
kata-sys-util = { path = "src/libs/kata-sys-util" }
|
||||
kata-types = { path = "src/libs/kata-types", features = ["safe-path"] }
|
||||
logging = { path = "src/libs/logging" }
|
||||
protocols = { path = "src/libs/protocols", features = ["async"] }
|
||||
runtime-spec = { path = "src/libs/runtime-spec" }
|
||||
safe-path = { path = "src/libs/safe-path" }
|
||||
shim-interface = { path = "src/libs/shim-interface" }
|
||||
test-utils = { path = "src/libs/test-utils" }
|
||||
|
||||
# Outside dependencies
|
||||
actix-rt = "2.7.0"
|
||||
anyhow = "1.0"
|
||||
async-trait = "0.1.48"
|
||||
containerd-shim = { version = "0.10.0", features = ["async"] }
|
||||
containerd-shim-protos = { version = "0.10.0", features = ["async"] }
|
||||
go-flag = "0.1.0"
|
||||
hyper = "0.14.20"
|
||||
hyperlocal = "0.8.0"
|
||||
lazy_static = "1.4"
|
||||
libc = "0.2"
|
||||
log = "0.4.14"
|
||||
netns-rs = "0.1.0"
|
||||
# Note: nix needs to stay sync'd with libs versions
|
||||
nix = "0.26.4"
|
||||
oci-spec = { version = "0.8.1", features = ["runtime"] }
|
||||
protobuf = "3.7.2"
|
||||
rand = "0.8.4"
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
serde_json = "1.0.91"
|
||||
sha2 = "0.10.9"
|
||||
slog = "2.5.2"
|
||||
slog-scope = "4.4.0"
|
||||
strum = { version = "0.24.0", features = ["derive"] }
|
||||
tempfile = "3.19.1"
|
||||
thiserror = "1.0"
|
||||
tokio = "1.46.1"
|
||||
tracing = "0.1.41"
|
||||
tracing-opentelemetry = "0.18.0"
|
||||
ttrpc = "0.8.4"
|
||||
url = "2.5.4"
|
||||
7
Makefile
7
Makefile
@@ -18,6 +18,7 @@ TOOLS =
|
||||
TOOLS += agent-ctl
|
||||
TOOLS += kata-ctl
|
||||
TOOLS += log-parser
|
||||
TOOLS += runk
|
||||
TOOLS += trace-forwarder
|
||||
|
||||
STANDARD_TARGETS = build check clean install static-checks-build test vendor
|
||||
@@ -49,14 +50,10 @@ docs-url-alive-check:
|
||||
build-and-publish-kata-debug:
|
||||
bash tools/packaging/kata-debug/kata-debug-build-and-upload-payload.sh ${KATA_DEBUG_REGISTRY} ${KATA_DEBUG_TAG}
|
||||
|
||||
docs-serve:
|
||||
docker run --rm -p 8000:8000 -v ./docs:/docs:ro -v ${PWD}/zensical.toml:/zensical.toml:ro zensical/zensical serve --config-file /zensical.toml -a 0.0.0.0:8000
|
||||
|
||||
.PHONY: \
|
||||
all \
|
||||
kata-tarball \
|
||||
install-tarball \
|
||||
default \
|
||||
static-checks \
|
||||
docs-url-alive-check \
|
||||
docs-serve
|
||||
docs-url-alive-check
|
||||
|
||||
@@ -139,6 +139,7 @@ The table below lists the remaining parts of the project:
|
||||
| [`agent-ctl`](src/tools/agent-ctl) | utility | Tool that provides low-level access for testing the agent. |
|
||||
| [`kata-ctl`](src/tools/kata-ctl) | utility | Tool that provides advanced commands and debug facilities. |
|
||||
| [`trace-forwarder`](src/tools/trace-forwarder) | utility | Agent tracing helper. |
|
||||
| [`runk`](src/tools/runk) | utility | Standard OCI container runtime based on the agent. |
|
||||
| [`ci`](.github/workflows) | CI | Continuous Integration configuration files and scripts. |
|
||||
| [`ocp-ci`](ci/openshift-ci/README.md) | CI | Continuous Integration configuration for the OpenShift pipelines. |
|
||||
| [`katacontainers.io`](https://github.com/kata-containers/www.katacontainers.io) | Source for the [`katacontainers.io`](https://www.katacontainers.io) site. |
|
||||
|
||||
@@ -8,7 +8,6 @@ set -e
|
||||
|
||||
cidir=$(dirname "$0")
|
||||
runtimedir=${cidir}/../src/runtime
|
||||
genpolicydir=${cidir}/../src/tools/genpolicy
|
||||
|
||||
build_working_packages() {
|
||||
# working packages:
|
||||
@@ -41,11 +40,3 @@ build_working_packages() {
|
||||
}
|
||||
|
||||
build_working_packages
|
||||
|
||||
build_genpolicy() {
|
||||
echo "building genpolicy"
|
||||
pushd "${genpolicydir}" &>/dev/null
|
||||
make TRIPLE=aarch64-apple-darwin build
|
||||
}
|
||||
|
||||
build_genpolicy
|
||||
|
||||
@@ -11,10 +11,6 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
source "${script_dir}/../tests/common.bash"
|
||||
|
||||
# Path to the ORAS cache helper for downloading tarballs (sourced when needed)
|
||||
# Use ORAS_CACHE_HELPER env var (set by build.sh in Docker) or fallback to repo path
|
||||
oras_cache_helper="${ORAS_CACHE_HELPER:-${script_dir}/../tools/packaging/scripts/download-with-oras-cache.sh}"
|
||||
|
||||
# The following variables if set on the environment will change the behavior
|
||||
# of gperf and libseccomp configure scripts, that may lead this script to
|
||||
# fail. So let's ensure they are unset here.
|
||||
@@ -48,9 +44,6 @@ fi
|
||||
gperf_tarball="gperf-${gperf_version}.tar.gz"
|
||||
gperf_tarball_url="${gperf_url}/${gperf_tarball}"
|
||||
|
||||
# Use ORAS cache for gperf downloads (gperf upstream can be unreliable)
|
||||
USE_ORAS_CACHE="${USE_ORAS_CACHE:-yes}"
|
||||
|
||||
# We need to build the libseccomp library from sources to create a static
|
||||
# library for the musl libc.
|
||||
# However, ppc64le, riscv64 and s390x have no musl targets in Rust. Hence, we do
|
||||
@@ -75,23 +68,7 @@ trap finish EXIT
|
||||
build_and_install_gperf() {
|
||||
echo "Build and install gperf version ${gperf_version}"
|
||||
mkdir -p "${gperf_install_dir}"
|
||||
|
||||
# Use ORAS cache if available and enabled
|
||||
if [[ "${USE_ORAS_CACHE}" == "yes" ]] && [[ -f "${oras_cache_helper}" ]]; then
|
||||
echo "Using ORAS cache for gperf download"
|
||||
source "${oras_cache_helper}"
|
||||
local cached_tarball
|
||||
cached_tarball=$(download_component gperf "$(pwd)")
|
||||
if [[ -f "${cached_tarball}" ]]; then
|
||||
gperf_tarball="${cached_tarball}"
|
||||
else
|
||||
echo "ORAS cache download failed, falling back to direct download"
|
||||
curl -sLO "${gperf_tarball_url}"
|
||||
fi
|
||||
else
|
||||
curl -sLO "${gperf_tarball_url}"
|
||||
fi
|
||||
|
||||
curl -sLO "${gperf_tarball_url}"
|
||||
tar -xf "${gperf_tarball}"
|
||||
pushd "gperf-${gperf_version}"
|
||||
# Unset $CC for configure, we will always use native for gperf
|
||||
|
||||
@@ -46,12 +46,16 @@ fi
|
||||
[[ ${SELINUX_PERMISSIVE} == "yes" ]] && oc delete -f "${deployments_dir}/machineconfig_selinux.yaml.in"
|
||||
|
||||
# Delete kata-containers
|
||||
helm uninstall kata-deploy --wait --namespace kube-system
|
||||
pushd "${katacontainers_repo_dir}/tools/packaging/kata-deploy" || { echo "Failed to push to ${katacontainers_repo_dir}/tools/packaging/kata-deploy"; exit 125; }
|
||||
oc delete -f kata-deploy/base/kata-deploy.yaml
|
||||
oc -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
|
||||
oc apply -f kata-cleanup/base/kata-cleanup.yaml
|
||||
echo "Wait for all related pods to be gone"
|
||||
( repeats=1; for _ in $(seq 1 600); do
|
||||
oc get pods -l name="kubelet-kata-cleanup" --no-headers=true -n kube-system 2>&1 | grep "No resources found" -q && ((repeats++)) || repeats=1
|
||||
[[ "${repeats}" -gt 5 ]] && echo kata-cleanup finished && break
|
||||
sleep 1
|
||||
done) || { echo "There are still some kata-cleanup related pods after 600 iterations"; oc get all -n kube-system; exit 1; }
|
||||
oc delete -f kata-cleanup/base/kata-cleanup.yaml
|
||||
oc delete -f kata-rbac/base/kata-rbac.yaml
|
||||
oc delete -f runtimeclasses/kata-runtimeClasses.yaml
|
||||
|
||||
@@ -43,22 +43,19 @@ WORKAROUND_9206_CRIO=${WORKAROUND_9206_CRIO:-no}
|
||||
# Leverage kata-deploy to install Kata Containers in the cluster.
|
||||
#
|
||||
apply_kata_deploy() {
|
||||
if ! command -v helm &>/dev/null; then
|
||||
echo "Helm not installed, installing in current location..."
|
||||
PATH=".:${PATH}"
|
||||
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | HELM_INSTALL_DIR='.' bash -s -- --no-sudo
|
||||
fi
|
||||
local deploy_file="tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
|
||||
pushd "${katacontainers_repo_dir}" || die
|
||||
sed -ri "s#(\s+image:) .*#\1 ${KATA_DEPLOY_IMAGE}#" "${deploy_file}"
|
||||
|
||||
info "Applying kata-deploy"
|
||||
oc apply -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
|
||||
oc label --overwrite ns kube-system pod-security.kubernetes.io/enforce=privileged pod-security.kubernetes.io/warn=baseline pod-security.kubernetes.io/audit=baseline
|
||||
local version chart
|
||||
version='0.0.0-dev'
|
||||
chart="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"
|
||||
oc apply -f "${deploy_file}"
|
||||
oc -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod
|
||||
|
||||
# Ensure any potential leftover is cleaned up ... and this secret usually is not in case of previous failures
|
||||
oc delete secret sh.helm.release.v1.kata-deploy.v1 -n kube-system || true
|
||||
|
||||
echo "Installing kata using helm ${chart} ${version} (sha printed in helm output)"
|
||||
helm install kata-deploy --wait --namespace kube-system --set "image.reference=${KATA_DEPLOY_IMAGE%%:*},image.tag=${KATA_DEPLOY_IMAGE##*:}" "${chart}" --version "${version}"
|
||||
info "Adding the kata runtime classes"
|
||||
oc apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
|
||||
popd || die
|
||||
}
|
||||
|
||||
|
||||
@@ -177,13 +174,13 @@ wait_for_app_pods_message() {
|
||||
local namespace="$5"
|
||||
[[ -z "${pod_count}" ]] && pod_count=1
|
||||
[[ -z "${timeout}" ]] && timeout=60
|
||||
[[ -n "${namespace}" ]] && namespace=("-n" "${namespace}")
|
||||
[[ -n "${namespace}" ]] && namespace=" -n ${namespace} "
|
||||
local pod
|
||||
local pods
|
||||
local i
|
||||
SECONDS=0
|
||||
while :; do
|
||||
mapfile -t pods < <(oc get pods -l app="${app}" --no-headers=true "${namespace[@]}" | awk '{print $1}')
|
||||
mapfile -t pods < <(oc get pods -l app="${app}" --no-headers=true "${namespace}" | awk '{print $1}')
|
||||
[[ "${#pods}" -ge "${pod_count}" ]] && break
|
||||
if [[ "${SECONDS}" -gt "${timeout}" ]]; then
|
||||
printf "Unable to find ${pod_count} pods for '-l app=\"${app}\"' in ${SECONDS}s (%s)" "${pods[@]}"
|
||||
@@ -193,7 +190,7 @@ wait_for_app_pods_message() {
|
||||
local log
|
||||
for pod in "${pods[@]}"; do
|
||||
while :; do
|
||||
log=$(oc logs "${namespace[@]}" "${pod}")
|
||||
log=$(oc logs "${namespace}" "${pod}")
|
||||
echo "${log}" | grep "${message}" -q && echo "Found $(echo "${log}" | grep "${message}") in ${pod}'s log (${SECONDS})" && break;
|
||||
if [[ "${SECONDS}" -gt "${timeout}" ]]; then
|
||||
echo -n "Message '${message}' not present in '${pod}' pod of the '-l app=\"${app}\"' "
|
||||
|
||||
@@ -12,33 +12,6 @@
|
||||
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
|
||||
##################
|
||||
# Helper functions
|
||||
##################
|
||||
|
||||
# Sparse "git clone" supporting old git version
|
||||
# $1 - origin
|
||||
# $2 - revision
|
||||
# $3- - sparse checkout paths
|
||||
# Note: uses pushd to change into the clonned directory!
|
||||
git_sparse_clone() {
|
||||
local origin="$1"
|
||||
local revision="$2"
|
||||
shift 2
|
||||
local sparse_paths=("$@")
|
||||
|
||||
local repo
|
||||
repo=$(basename -s .git "${origin}")
|
||||
|
||||
git init "${repo}"
|
||||
pushd "${repo}" || exit 1
|
||||
git remote add origin "${origin}"
|
||||
git fetch --depth 1 origin "${revision}"
|
||||
git sparse-checkout init --cone
|
||||
git sparse-checkout set "${sparse_paths[@]}"
|
||||
git checkout FETCH_HEAD
|
||||
}
|
||||
|
||||
###############################
|
||||
# Disable security to allow e2e
|
||||
###############################
|
||||
@@ -143,50 +116,33 @@ az network vnet subnet update \
|
||||
for NODE_NAME in $(kubectl get nodes -o jsonpath='{.items[*].metadata.name}'); do [[ "${NODE_NAME}" =~ 'worker' ]] && kubectl label node "${NODE_NAME}" node.kubernetes.io/worker=; done
|
||||
|
||||
# CAA artifacts
|
||||
if [[ -z "${CAA_TAG}" ]]; then
|
||||
if [[ -n "${CAA_IMAGE}" ]]; then
|
||||
echo "CAA_IMAGE (${CAA_IMAGE}) is set but CAA_TAG isn't, which is not supported. Please specify both or none"
|
||||
exit 1
|
||||
fi
|
||||
TAGS="$(curl https://quay.io/api/v1/repository/confidential-containers/cloud-api-adaptor/tag/?onlyActiveTags=true)"
|
||||
DIGEST=$(echo "${TAGS}" | jq -r '.tags[] | select(.name | contains("latest-amd64")) | .manifest_digest')
|
||||
CAA_TAG="$(echo "${TAGS}" | jq -r '.tags[] | select(.manifest_digest | contains("'"${DIGEST}"'")) | .name' | grep -v "latest")"
|
||||
fi
|
||||
if [[ -z "${CAA_IMAGE}" ]]; then
|
||||
CAA_IMAGE="quay.io/confidential-containers/cloud-api-adaptor"
|
||||
fi
|
||||
CAA_IMAGE="quay.io/confidential-containers/cloud-api-adaptor"
|
||||
TAGS="$(curl https://quay.io/api/v1/repository/confidential-containers/cloud-api-adaptor/tag/?onlyActiveTags=true)"
|
||||
DIGEST=$(echo "${TAGS}" | jq -r '.tags[] | select(.name | contains("latest-amd64")) | .manifest_digest')
|
||||
CAA_TAG="$(echo "${TAGS}" | jq -r '.tags[] | select(.manifest_digest | contains("'"${DIGEST}"'")) | .name' | grep -v "latest")"
|
||||
|
||||
# Get latest PP image
|
||||
#
|
||||
# You can list the CI images by:
|
||||
# az sig image-version list-community --location "eastus" --public-gallery-name "cocopodvm-d0e4f35f-5530-4b9c-8596-112487cdea85" --gallery-image-definition "podvm_image0" --output table
|
||||
# or the release images by:
|
||||
# az sig image-version list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --gallery-image-definition "peerpod-podvm-fedora" --output table
|
||||
# or the release debug images by:
|
||||
# az sig image-version list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --gallery-image-definition "peerpod-podvm-fedora-debug" --output table
|
||||
#
|
||||
# Note there are other flavours of the released images, you can list them by:
|
||||
# az sig image-definition list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --output table
|
||||
if [[ -z "${PP_IMAGE_ID}" ]]; then
|
||||
SUCCESS_TIME=$(curl -s \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
"https://api.github.com/repos/confidential-containers/cloud-api-adaptor/actions/workflows/azure-nightly-build.yml/runs?status=success" \
|
||||
| jq -r '.workflow_runs[0].updated_at')
|
||||
PP_IMAGE_ID="/CommunityGalleries/cocopodvm-d0e4f35f-5530-4b9c-8596-112487cdea85/Images/podvm_image0/Versions/$(date -u -jf "%Y-%m-%dT%H:%M:%SZ" "${SUCCESS_TIME}" "+%Y.%m.%d" 2>/dev/null || date -d "${SUCCESS_TIME}" +%Y.%m.%d)"
|
||||
fi
|
||||
SUCCESS_TIME=$(curl -s \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
"https://api.github.com/repos/confidential-containers/cloud-api-adaptor/actions/workflows/azure-nightly-build.yml/runs?status=success" \
|
||||
| jq -r '.workflow_runs[0].updated_at')
|
||||
PP_IMAGE_ID="/CommunityGalleries/cocopodvm-d0e4f35f-5530-4b9c-8596-112487cdea85/Images/podvm_image0/Versions/$(date -u -jf "%Y-%m-%dT%H:%M:%SZ" "${SUCCESS_TIME}" "+%Y.%m.%d" 2>/dev/null || date -d "${SUCCESS_TIME}" +%Y.%m.%d)"
|
||||
|
||||
echo "AZURE_REGION=\"${AZURE_REGION}\""
|
||||
echo "PP_REGION=\"${PP_REGION}\""
|
||||
echo "AZURE_RESOURCE_GROUP=\"${AZURE_RESOURCE_GROUP}\""
|
||||
echo "PP_RESOURCE_GROUP=\"${PP_RESOURCE_GROUP}\""
|
||||
echo "PP_SUBNET_ID=\"${PP_SUBNET_ID}\""
|
||||
echo "CAA_IMAGE=\"${CAA_IMAGE}\""
|
||||
echo "CAA_TAG=\"${CAA_TAG}\""
|
||||
echo "PP_IMAGE_ID=\"${PP_IMAGE_ID}\""
|
||||
echo "AZURE_REGION: \"${AZURE_REGION}\""
|
||||
echo "PP_REGION: \"${PP_REGION}\""
|
||||
echo "AZURE_RESOURCE_GROUP: \"${AZURE_RESOURCE_GROUP}\""
|
||||
echo "PP_RESOURCE_GROUP: \"${PP_RESOURCE_GROUP}\""
|
||||
echo "PP_SUBNET_ID: \"${PP_SUBNET_ID}\""
|
||||
echo "CAA_TAG: \"${CAA_TAG}\""
|
||||
echo "PP_IMAGE_ID: \"${PP_IMAGE_ID}\""
|
||||
|
||||
# Clone and configure caa
|
||||
git_sparse_clone "https://github.com/confidential-containers/cloud-api-adaptor.git" "${CAA_GIT_SHA:-main}" "src/cloud-api-adaptor/install/"
|
||||
echo "CAA_GIT_SHA=\"$(git rev-parse HEAD)\""
|
||||
git clone --depth 1 --no-checkout https://github.com/confidential-containers/cloud-api-adaptor.git
|
||||
pushd cloud-api-adaptor
|
||||
git sparse-checkout init --cone
|
||||
git sparse-checkout set src/cloud-api-adaptor/install/
|
||||
git checkout
|
||||
echo "CAA_GIT_SHA: \"$(git rev-parse HEAD)\""
|
||||
pushd src/cloud-api-adaptor
|
||||
cat <<EOF > install/overlays/azure/workload-identity.yaml
|
||||
apiVersion: apps/v1
|
||||
@@ -252,8 +208,12 @@ echo "AZURE_CLIENT_SECRET=${AZURE_CLIENT_SECRET}" >> install/overlays/azure/serv
|
||||
echo "AZURE_TENANT_ID=${AZURE_TENANT_ID}" >> install/overlays/azure/service-principal.env
|
||||
|
||||
# Deploy Operator
|
||||
git_sparse_clone "https://github.com/confidential-containers/operator" "${OPERATOR_SHA:-main}" "config/"
|
||||
echo "OPERATOR_SHA=\"$(git rev-parse HEAD)\""
|
||||
git clone --depth 1 --no-checkout https://github.com/confidential-containers/operator
|
||||
pushd operator
|
||||
git sparse-checkout init --cone
|
||||
git sparse-checkout set "config/"
|
||||
git checkout
|
||||
echo "OPERATOR_SHA: \"$(git rev-parse HEAD)\""
|
||||
oc apply -k "config/release"
|
||||
oc apply -k "config/samples/ccruntime/peer-pods"
|
||||
popd
|
||||
@@ -267,7 +227,7 @@ popd
|
||||
SECONDS=0
|
||||
( while [[ "${SECONDS}" -lt 360 ]]; do
|
||||
kubectl get runtimeclass | grep -q kata-remote && exit 0
|
||||
done; exit 1 ) || { echo "kata-remote runtimeclass not initialized in 60s"; kubectl -n confidential-containers-system get all; echo; echo "kubectl -n confidential-containers-system describe all"; kubectl -n confidential-containers-system describe all; echo; echo CAA; kubectl -n confidential-containers-system logs daemonset.apps/cloud-api-adaptor-daemonset; echo pre-install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-pre-install-daemon; echo install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-daemon-install; exit 1; }
|
||||
done; exit 1 ) || { echo "kata-remote runtimeclass not initialized in 60s"; kubectl -n confidential-containers-system get all; echo; echo CAA; kubectl -n confidential-containers-system logs daemonset.apps/cloud-api-adaptor-daemonset; echo pre-install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-pre-install-daemon; echo install; kubectl -n confidential-containers-system logs daemonset.apps/cc-operator-daemon-install; exit 1; }
|
||||
|
||||
|
||||
################
|
||||
|
||||
@@ -125,7 +125,7 @@ If you want to enable SELinux in Permissive mode, add `enforcing=0` to the kerne
|
||||
Enable full debug as follows:
|
||||
|
||||
```bash
|
||||
$ sudo sed -i -E 's/^(\s*enable_debug\s*=\s*)false/\1true/' /etc/kata-containers/configuration.toml
|
||||
$ sudo sed -i -e 's/^# *\(enable_debug\).*=.*$/\1 = true/g' /etc/kata-containers/configuration.toml
|
||||
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug initcall_debug"/g' /etc/kata-containers/configuration.toml
|
||||
```
|
||||
|
||||
@@ -450,7 +450,7 @@ You can build and install the guest kernel image as shown [here](../tools/packag
|
||||
# Install a hypervisor
|
||||
|
||||
When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the
|
||||
`QEMU` VMM is installed automatically. Cloud-Hypervisor, Firecracker and StratoVirt VMMs are available from the [release tarballs](https://github.com/kata-containers/kata-containers/releases), as well as through [`kata-deploy`](../tools/packaging/kata-deploy/helm-chart/README.md).
|
||||
`QEMU` VMM is installed automatically. Cloud-Hypervisor, Firecracker and StratoVirt VMMs are available from the [release tarballs](https://github.com/kata-containers/kata-containers/releases), as well as through [`kata-deploy`](../tools/packaging/kata-deploy/README.md).
|
||||
You may choose to manually build your VMM/hypervisor.
|
||||
|
||||
## Build a custom QEMU
|
||||
|
||||
@@ -166,65 +166,19 @@ moment.
|
||||
See [this issue](https://github.com/kata-containers/runtime/issues/2812) for more details.
|
||||
[Another issue](https://github.com/kata-containers/kata-containers/issues/1728) focuses on the case of `emptyDir`.
|
||||
|
||||
### Kubernetes [hostPath][k8s-hostpath] volumes
|
||||
## Host resource sharing
|
||||
|
||||
In Kata, Kubernetes hostPath volumes can mount host directories and
|
||||
regular files into the guest VM via filesystem sharing, if it is enabled
|
||||
through the `shared_fs` [configuration][runtime-config] flag.
|
||||
|
||||
By default:
|
||||
|
||||
- Non-TEE environment: Filesystem sharing is used to mount host files.
|
||||
- TEE environment: Filesystem sharing is disabled. Instead, host files
|
||||
are copied into the guest VM when the container starts, and file
|
||||
changes are *not* synchronized between the host and the guest.
|
||||
|
||||
In some cases, the behavior of hostPath volumes in Kata is further
|
||||
different compared to `runc` containers:
|
||||
|
||||
**Mounting host block devices**: When a hostPath volume is of type
|
||||
[`BlockDevice`][k8s-blockdevice], Kata hotplugs the host block device
|
||||
into the guest and exposes it directly to the container.
|
||||
|
||||
**Mounting guest devices**: When the source path of a hostPath volume is
|
||||
under `/dev`, and the path either corresponds to a host device or is not
|
||||
accessible by the Kata shim, the Kata agent bind mounts the source path
|
||||
directly from the *guest* filesystem into the container.
|
||||
|
||||
[runtime-config]: /src/runtime/README.md#configuration
|
||||
[k8s-hostpath]: https://kubernetes.io/docs/concepts/storage/volumes/#hostpath
|
||||
[k8s-blockdevice]: https://kubernetes.io/docs/concepts/storage/volumes/#hostpath-volume-types
|
||||
|
||||
### Mounting `procfs` and `sysfs`
|
||||
|
||||
For security reasons, the following mounts are disallowed:
|
||||
|
||||
| Type | Source | Destination | Rationale |
|
||||
|-------------------|-----------|----------------------------------|----------------|
|
||||
| `bind` | `!= proc` | `/proc` | CVE-2019-16884 |
|
||||
| `bind` | `*` | `/proc/*` (see exceptions below) | CVE-2019-16884 |
|
||||
| `proc \|\| sysfs` | `*` | not a directory (e.g. symlink) | CVE-2019-19921 |
|
||||
|
||||
For bind mounts under /proc, these destinations are allowed:
|
||||
|
||||
* `/proc/cpuinfo`
|
||||
* `/proc/diskstats`
|
||||
* `/proc/meminfo`
|
||||
* `/proc/stat`
|
||||
* `/proc/swaps`
|
||||
* `/proc/uptime`
|
||||
* `/proc/loadavg`
|
||||
* `/proc/net/dev`
|
||||
|
||||
## Privileged containers
|
||||
### Privileged containers
|
||||
|
||||
Privileged support in Kata is essentially different from `runc` containers.
|
||||
The container runs with elevated capabilities within the guest.
|
||||
The container runs with elevated capabilities within the guest and is granted
|
||||
access to guest devices instead of the host devices.
|
||||
This is also true with using `securityContext privileged=true` with Kubernetes.
|
||||
|
||||
Importantly, the default behavior to pass the host devices to a
|
||||
privileged container is not supported in Kata Containers and needs to be
|
||||
disabled, see [Privileged Kata Containers](how-to/privileged.md).
|
||||
The container may also be granted full access to a subset of host devices
|
||||
(https://github.com/kata-containers/runtime/issues/1568).
|
||||
|
||||
See [Privileged Kata Containers](how-to/privileged.md) for how to configure some of this behavior.
|
||||
|
||||
# Appendices
|
||||
|
||||
|
||||
@@ -83,7 +83,3 @@ Documents that help to understand and contribute to Kata Containers.
|
||||
If you have a suggestion for how we can improve the
|
||||
[website](https://katacontainers.io), please raise an issue (or a PR) on
|
||||
[the repository that holds the source for the website](https://github.com/OpenStackweb/kata-netlify-refresh).
|
||||
|
||||
### Toolchain Guidance
|
||||
|
||||
* [Toolchain Guidance](./Toochain-Guidance.md)
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
# Toolchains
|
||||
|
||||
As a community we want to strike a balance between having up-to-date toolchains, to receive the
|
||||
latest security fixes and to be able to benefit from new features and packages, whilst not being
|
||||
too bleeding edge and disrupting downstream and other consumers. As a result we have the following
|
||||
guidelines (note, not hard rules) for our go and rust toolchains that we are attempting to try out:
|
||||
|
||||
## Go toolchain
|
||||
|
||||
Go is released [every six months](https://go.dev/wiki/Go-Release-Cycle) with support for the
|
||||
[last two major release versions](https://go.dev/doc/devel/release#policy). We always want to
|
||||
ensure that we are on a supported version so we receive security fixes. To try and make
|
||||
things easier for some of our users, we aim to be using the older of the two supported major
|
||||
versions, unless there is a compelling reason to adopt the newer version.
|
||||
|
||||
In practice this means that we bump our major version of the go toolchain every six months to
|
||||
version (1.x-1) in response to a new version (1.x) coming out, which makes our current version
|
||||
(1.x-2) no longer supported. We will bump the minor version whenever required to satisfy
|
||||
dependency updates, or security fixes.
|
||||
|
||||
Our go toolchain version is recorded in [`versions.yaml`](../versions.yaml) under
|
||||
`.languages.golang.version` and should match with the version in our `go.mod` files.
|
||||
|
||||
## Rust toolchain
|
||||
|
||||
Rust has a [six week](https://doc.rust-lang.org/book/appendix-05-editions.html#:~:text=The%20Rust%20language%20and%20compiler,these%20tiny%20changes%20add%20up.)
|
||||
release cycle and they only support the latest stable release, so if we wanted to remain on a
|
||||
supported release we would only ever build with the latest stable and bump every 6 weeks.
|
||||
However feedback from our community has indicated that this is a challenge as downstream consumers
|
||||
often want to get rust from their distro, or downstream fork and these struggle to keep up with
|
||||
the six week release schedule. As a result the community has agreed to try out a policy of
|
||||
"stable-2", where we aim to build with a rust version that is two versions behind the latest stable
|
||||
version.
|
||||
|
||||
In practice this should mean that we bump our rust toolchain every six weeks, to version
|
||||
1.x-2 when 1.x is released as stable and we should be picking up the latest point release
|
||||
of that version, if there were any.
|
||||
|
||||
The rust-toolchain that we are using is recorded in [`rust-toolchain.toml`](../rust-toolchain.toml).
|
||||
@@ -198,7 +198,7 @@ fn join_params_with_dash(str: &str, num: i32) -> Result<String> {
|
||||
return Err("number must be positive");
|
||||
}
|
||||
|
||||
let result = format!("{str}-{num}");
|
||||
let result = format!("{}-{}", str, num);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
@@ -253,13 +253,13 @@ mod tests {
|
||||
// Run the tests
|
||||
for (i, d) in tests.iter().enumerate() {
|
||||
// Create a string containing details of the test
|
||||
let msg = format!("test[{i}]: {d:?}");
|
||||
let msg = format!("test[{}]: {:?}", i, d);
|
||||
|
||||
// Call the function under test
|
||||
let result = join_params_with_dash(d.str, d.num);
|
||||
|
||||
// Update the test details string with the results of the call
|
||||
let msg = format!("{msg}, result: {result:?}");
|
||||
let msg = format!("{}, result: {:?}", msg, result);
|
||||
|
||||
// Perform the checks
|
||||
if d.result.is_ok() {
|
||||
@@ -267,8 +267,8 @@ mod tests {
|
||||
continue;
|
||||
}
|
||||
|
||||
let expected_error = format!("{d.result.as_ref().unwrap_err()}");
|
||||
let actual_error = format!("{result.unwrap_err()}");
|
||||
let expected_error = format!("{}", d.result.as_ref().unwrap_err());
|
||||
let actual_error = format!("{}", result.unwrap_err());
|
||||
assert!(actual_error == expected_error, msg);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
|
||||
<!-- Dark background matching the site -->
|
||||
<rect width="32" height="32" rx="4" fill="#1a1a2e"/>
|
||||
|
||||
<!-- Kata logo scaled and centered -->
|
||||
<g transform="translate(-27, -2) scale(0.75)">
|
||||
<path d="M70.925 25.22L58.572 37.523 46.27 25.22l2.192-2.192 10.11 10.11 10.11-10.11zm-6.575-.2l-3.188-3.188 3.188-3.188 3.188 3.188zm-4.93-2.54l3.736 3.736-3.736 3.736zm-1.694 7.422l-8.07-8.07 8.07-8.07zm1.694-16.14l3.686 3.686-3.686 3.686zm-13.15 4.682L58.572 6.143l12.353 12.303-2.192 2.192-10.16-10.11-10.11 10.11zm26.997 0L58.572 3.752 43.878 18.446l3.387 3.387-3.387 3.387 14.694 14.694L73.266 25.22l-3.337-3.387z" fill="#f15b3e"/>
|
||||
</g>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 710 B |
@@ -51,7 +51,6 @@ containers started after the VM has been launched.
|
||||
Users can check to see if the container uses the `devicemapper` block
|
||||
device as its rootfs by calling `mount(8)` within the container. If
|
||||
the `devicemapper` block device is used, the root filesystem (`/`)
|
||||
will be mounted from `/dev/vda`. Users can enable direct mounting of
|
||||
the underlying block device by setting the runtime
|
||||
[configuration](README.md#configuration) flag `disable_block_device_use` to
|
||||
`false`.
|
||||
will be mounted from `/dev/vda`. Users can disable direct mounting of
|
||||
the underlying block device through the runtime
|
||||
[configuration](README.md#configuration).
|
||||
|
||||
@@ -31,7 +31,6 @@
|
||||
- [Setting Sysctls with Kata](how-to-use-sysctls-with-kata.md)
|
||||
- [What Is VMCache and How To Enable It](what-is-vm-cache-and-how-do-I-use-it.md)
|
||||
- [What Is VM Templating and How To Enable It](what-is-vm-templating-and-how-do-I-use-it.md)
|
||||
- [How to Use Template in runtime-rs](how-to-use-template-in-runtime-rs.md)
|
||||
- [Privileged Kata Containers](privileged.md)
|
||||
- [How to load kernel modules in Kata Containers](how-to-load-kernel-modules-with-kata.md)
|
||||
- [How to use Kata Containers with `virtio-mem`](how-to-use-virtio-mem-with-kata.md)
|
||||
@@ -49,4 +48,3 @@
|
||||
- [How to use the Kata Agent Policy](how-to-use-the-kata-agent-policy.md)
|
||||
- [How to pull images in the guest](how-to-pull-images-in-guest-with-kata.md)
|
||||
- [How to use mem-agent to decrease the memory usage of Kata container](how-to-use-memory-agent.md)
|
||||
- [How to use seccomp with runtime-rs](how-to-use-seccomp-with-runtime-rs.md)
|
||||
@@ -256,7 +256,7 @@ spec:
|
||||
values:
|
||||
- NODE_NAME
|
||||
volumes:
|
||||
- name: trusted-image-storage
|
||||
- name: trusted-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: trusted-pvc
|
||||
containers:
|
||||
|
||||
@@ -318,7 +318,7 @@ Finally, an operational kata container with IBM Secure Execution is now running.
|
||||
|
||||
It is reasonable to expect that the manual steps mentioned above can be easily executed.
|
||||
Typically, you can use
|
||||
[kata-deploy](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/README.md)
|
||||
[kata-deploy](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/README.md)
|
||||
to install Kata Containers on a Kubernetes cluster. However, when leveraging IBM Secure Execution,
|
||||
you need to employ the confidential container's
|
||||
[operator](https://github.com/confidential-containers/operator).
|
||||
|
||||
@@ -50,7 +50,7 @@ There are several kinds of Kata configurations and they are listed below.
|
||||
| `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor |
|
||||
| `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` |
|
||||
| `io.katacontainers.config.hypervisor.default_vcpus` | float32| the default vCPUs assigned for a VM by the hypervisor |
|
||||
| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disable hotplugging host block devices to guest VMs for container rootfs |
|
||||
| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disallow a block device from being used |
|
||||
| `io.katacontainers.config.hypervisor.disable_image_nvdimm` | `boolean` | specify if a `nvdimm` device should be used as rootfs for the guest (QEMU) |
|
||||
| `io.katacontainers.config.hypervisor.disable_vhost_net` | `boolean` | specify if `vhost-net` is not available on the host |
|
||||
| `io.katacontainers.config.hypervisor.enable_hugepages` | `boolean` | if the memory should be `pre-allocated` from huge pages |
|
||||
@@ -97,8 +97,6 @@ There are several kinds of Kata configurations and they are listed below.
|
||||
| `io.katacontainers.config.hypervisor.use_legacy_serial` | `boolean` | uses legacy serial device for guest's console (QEMU) |
|
||||
| `io.katacontainers.config.hypervisor.default_gpus` | uint32 | the minimum number of GPUs required for the VM. Only used by remote hypervisor to help with instance selection |
|
||||
| `io.katacontainers.config.hypervisor.default_gpu_model` | string | the GPU model required for the VM. Only used by remote hypervisor to help with instance selection |
|
||||
| `io.katacontainers.config.hypervisor.block_device_num_queues` | `usize` | The number of queues to use for block devices (runtime-rs only) |
|
||||
| `io.katacontainers.config.hypervisor.block_device_queue_size` | uint32 | The size of the of the queue to use for block devices (runtime-rs only) |
|
||||
|
||||
## Container Options
|
||||
| Key | Value Type | Comments |
|
||||
|
||||
@@ -104,20 +104,12 @@ LOW_WATER_MARK=32768
|
||||
sudo dmsetup create "${POOL_NAME}" \
|
||||
--table "0 ${LENGTH_IN_SECTORS} thin-pool ${META_DEV} ${DATA_DEV} ${DATA_BLOCK_SIZE} ${LOW_WATER_MARK}"
|
||||
|
||||
# Determine plugin name based on containerd config version
|
||||
CONFIG_VERSION=$(containerd config dump | awk '/^version/ {print $3}')
|
||||
if [ "$CONFIG_VERSION" -ge 2 ]; then
|
||||
PLUGIN="io.containerd.snapshotter.v1.devmapper"
|
||||
else
|
||||
PLUGIN="devmapper"
|
||||
fi
|
||||
|
||||
cat << EOF
|
||||
#
|
||||
# Add this to your config.toml configuration file and restart containerd daemon
|
||||
#
|
||||
[plugins]
|
||||
[plugins."${PLUGIN}"]
|
||||
[plugins.devmapper]
|
||||
pool_name = "${POOL_NAME}"
|
||||
root_path = "${DATA_DIR}"
|
||||
base_image_size = "10GB"
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
## Introduction
|
||||
|
||||
To enhance security, Kata Containers supports using seccomp to restrict the hypervisor's system calls. Previously, this was only supported for a subset of hypervisors in runtime-go. Now, the runtime-rs also supports seccomp. This document describes how to enable/disable the seccomp feature for the corresponding hypervisor in runtime-rs.
|
||||
|
||||
## Pre-requisites
|
||||
|
||||
1. Ensure your system's kernel supports **seccomp**.
|
||||
2. Confirm that each of the following virtual machines can run correctly on your system.
|
||||
|
||||
## Configure seccomp
|
||||
|
||||
With the exception of `qemu`, seccomp is enabled by default for all other supported hypervisors. Their corresponding built-in functionalities are also enabled by default.
|
||||
|
||||
### QEMU
|
||||
|
||||
As with runtime-go, you need to modify the following in your **configuration file**. These parameters will be passed directly to the `qemu` startup command line. For more details on the parameters, you can refer to: [https://www.qemu.org/docs/master/system/qemu-manpage.html](https://www.qemu.org/docs/master/system/qemu-manpage.html)
|
||||
|
||||
``` toml
|
||||
# Qemu seccomp sandbox feature
|
||||
# comma-separated list of seccomp sandbox features to control the syscall access.
|
||||
# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"`
|
||||
# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
|
||||
# Another note: enabling this feature may reduce performance, you may enable
|
||||
# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
|
||||
seccompsandbox="on,obsolete=deny,spawn=deny,resourcecontrol=deny"
|
||||
```
|
||||
### Cloud Hypervisor, Firecracker and Dragonball
|
||||
|
||||
The **seccomp** functionality is enabled by default for the following three hypervisors: `cloud hypervisor`, `firecracker`, and `dragonball`.
|
||||
|
||||
The seccomp rules for `cloud hypervisor` and `firecracker` are built directly into their executable files. For `dragonball`, the relevant configuration is currently located at `src/runtime-rs/crates/hypervisor/src/dragonball/seccomp.rs`.
|
||||
|
||||
To disable this functionality for these hypervisors, you can modify the following configuration options in your **configuration file**.
|
||||
|
||||
``` toml
|
||||
# Disable the 'seccomp' feature from Cloud Hypervisor, firecracker or dragonball, default false
|
||||
disable_seccomp = true
|
||||
```
|
||||
|
||||
## Implementation details
|
||||
|
||||
For `qemu`, `cloud hypervisor`, and `firecracker`, their **seccomp** functionality is built into the respective executable files you are using. **runtime-rs** simply provides command-line arguments for their launch based on the configuration file.
|
||||
|
||||
For `dragonball`, a set of allowed system calls is currently provided for the entire **runtime-rs** process, and the process is prevented from using any system calls outside of this whitelist. As mentioned above, this set is located at `src/runtime-rs/crates/hypervisor/src/dragonball/seccomp.rs`.
|
||||
@@ -1,119 +0,0 @@
|
||||
# How to Use Template in runtime-rs
|
||||
|
||||
## What is VM Templating
|
||||
|
||||
VM templating is a Kata Containers feature that enables new VM creation using a cloning technique. When enabled, new VMs are created by cloning from a pre-created template VM, and they will share the same initramfs, kernel and agent memory in readonly mode. It is very much like a process fork done by the kernel but here we *fork* VMs.
|
||||
|
||||
For more details on VM templating, refer to the [What is VM templating and how do I use it](./what-is-vm-templating-and-how-do-I-use-it.md) article.
|
||||
|
||||
## How to Enable VM Templating
|
||||
|
||||
VM templating can be enabled by changing your Kata Containers config file (`/opt/kata/share/defaults/kata-containers/runtime-rs/configuration.toml`, overridden by `/etc/kata-containers/configuration.toml` if provided) such that:
|
||||
|
||||
- `qemu` version `v4.1.0` or above is specified in `hypervisor.qemu`->`path` section
|
||||
- `enable_template = true`
|
||||
- `template_path = "/run/vc/vm/template"` (default value, can be customized as needed)
|
||||
- `initrd =` is set
|
||||
- `image =` option is commented out or removed
|
||||
- `shared_fs =` option is commented out or removed
|
||||
- `default_memory =` should be set to more than 256MB
|
||||
|
||||
Then you can create a VM template for later usage by calling:
|
||||
|
||||
### Initialize and create the VM template
|
||||
The `factory init` command creates a VM template by launching a new VM, initializing the Kata Agent, then pausing and saving its state (memory and device snapshots) to the template directory. This saved template is used to rapidly clone new VMs using QEMU's memory sharing capabilities.
|
||||
|
||||
```bash
|
||||
sudo kata-ctl factory init
|
||||
```
|
||||
|
||||
### Check the status of the VM template
|
||||
|
||||
The `factory status` command checks whether a VM template currently exists by verifying the presence of template files (memory snapshot and device state). It will output "VM factory is on" if the template exists, or "VM factory is off" otherwise.
|
||||
|
||||
```bash
|
||||
sudo kata-ctl factory status
|
||||
```
|
||||
|
||||
### Destroy and clean up the VM template
|
||||
|
||||
The `factory destroy` command removes the VM template by remove the `tmpfs` filesystem and deleting the template directory along with all its contents.
|
||||
|
||||
```bash
|
||||
sudo kata-ctl factory destroy
|
||||
```
|
||||
|
||||
## How to Create a New VM from VM Template
|
||||
In the Go version of Kata Containers, the VM templating mechanism is implemented using virtio-9p (9pfs). However, 9pfs is not supported in runtime-rs due to its poor performance, limited cache coherence, and security risks. Instead, runtime-rs adopts `VirtioFS` as the default mechanism to provide rootfs for containers and VMs.
|
||||
|
||||
Yet, when enabling the VM template mechanism, `VirtioFS` introduces conflicts in memory sharing because its DAX-based shared memory mapping overlaps with the template's page-sharing design. To resolve these conflicts and ensure strict isolation between cloned VMs, runtime-rs replaces `VirtioFS` with the snapshotter approach — specifically, the `blockfile` snapshotter.
|
||||
|
||||
The `blockfile` snapshotter is used in runtime-rs because it provides each VM with an independent block-based root filesystem, ensuring strong isolation and full compatibility with the VM templating mechanism.
|
||||
|
||||
### Configure Snapshotter
|
||||
|
||||
#### Check if `Blockfile` Snapshotter is Available
|
||||
```bash
|
||||
ctr plugins ls | grep blockfile
|
||||
```
|
||||
|
||||
If not available, continue with the following steps:
|
||||
|
||||
#### Create Scratch File
|
||||
```bash
|
||||
dd if=/dev/zero of=/opt/containerd/blockfile bs=1M count=500
|
||||
sudo mkfs.ext4 /opt/containerd/blockfile
|
||||
```
|
||||
|
||||
#### Configure containerd
|
||||
Edit the containerd configuration file:
|
||||
```bash
|
||||
sudo vim /etc/containerd/config.toml
|
||||
```
|
||||
Add or modify the following configuration for the `blockfile` snapshotter:
|
||||
```toml
|
||||
[plugins."io.containerd.snapshotter.v1.blockfile"]
|
||||
scratch_file = "/opt/containerd/blockfile"
|
||||
root_path = ""
|
||||
fs_type = "ext4"
|
||||
mount_options = []
|
||||
recreate_scratch = true
|
||||
```
|
||||
|
||||
#### Restart containerd
|
||||
After modifying the configuration, restart containerd to apply changes:
|
||||
|
||||
```bash
|
||||
sudo systemctl restart containerd
|
||||
```
|
||||
|
||||
### Run Container with `blockfile` Snapshotter
|
||||
After the VM template is created, you can pull an image and run a container using the `blockfile` snapshotter:
|
||||
|
||||
```bash
|
||||
ctr run --rm -t --snapshotter blockfile docker.io/library/busybox:latest template sh
|
||||
```
|
||||
|
||||
We can verify whether a VM was launched from a template or started normally by checking the launch parameters — if the parameters contain `incoming`, it indicates that the VM was started from a template rather than created directly.
|
||||
|
||||
## Performance Test
|
||||
|
||||
The comparative experiment between **template-based VM** creation and **direct VM** creation showed that the template-based approach achieved a ≈ **73.2%** reduction in startup latency (average launch time of **0.6s** vs. **0.82s**) and a ≈ **79.8%** reduction in memory usage (average memory usage of **178.2 MiB** vs. **223.2 MiB**), demonstrating significant improvements in VM startup efficiency and resource utilization.
|
||||
|
||||
The test script is as follows:
|
||||
|
||||
```bash
|
||||
# Clear the page cache, dentries, and inodes to free up memory
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches
|
||||
|
||||
# Display the current memory usage
|
||||
free -h
|
||||
|
||||
# Create 100 normal VMs and template-based VMs, and track the time
|
||||
time for I in $(seq 100); do
|
||||
echo -n " ${I}th" # Display the iteration number
|
||||
ctr run -d --runtime io.containerd.kata.v2 --snapshotter blockfile docker.io/library/busybox:latest normal/template${I}
|
||||
done
|
||||
|
||||
# Display the memory usage again after running the test
|
||||
free -h
|
||||
@@ -32,24 +32,11 @@ Kubernetes users can encode in `base64` format their Policy documents, and add t
|
||||
|
||||
### Encode a Policy file
|
||||
|
||||
For example, the [`allow-all-except-exec-process.rego`](../../src/kata-opa/allow-all-except-exec-process.rego) sample policy file is different from the [default Policy](../../src/kata-opa/allow-all.rego) because it rejects any `ExecProcess` requests. To encode this policy file, you need to:
|
||||
- Embed the policy inside an init data struct
|
||||
- Compress
|
||||
- Base64 encode
|
||||
For example:
|
||||
For example, the [`allow-all-except-exec-process.rego`](../../src/kata-opa/allow-all-except-exec-process.rego) sample policy file is different from the [default Policy](../../src/kata-opa/allow-all.rego) because it rejects any `ExecProcess` requests. You can encode this policy file:
|
||||
|
||||
```bash
|
||||
$ STRING="$(< allow-all-except-exec-process.rego)"
|
||||
$ cat <<EOF | gzip -c | base64 -w0
|
||||
version = "0.1.0"
|
||||
algorithm = "sha256"
|
||||
|
||||
[data]
|
||||
"policy.rego" = '''
|
||||
$STRING
|
||||
'''
|
||||
EOF
|
||||
H4sIAAAAAAAAA42UTW/TQBCG7/4Vq/QQOCQKQXCo1ENIAkRqiGWnpBJCaGKP7RXrXTM7DnV/PRMiVUh07R582J3H8/XO7AnJa2fVjRrNpm+ms1EEpnSkuarPd76C+bv3oyj6lgPD92jUOKOzbkpYupEA4/E4ulJL13Sky4rVq+y1ms/mb9VWZ+S8K1iM1DgClijRlcBpvLqf3OoMrcfJJkfLutBI12rRQFbhZD6dCRfJ4SeUqOSz/OMSNopyLKA1rBZ5vkjiLyhBj458gr9a9KyubxRTi/9i6W9oQualcR5TzrUNElLZR20waCcExqWzDNoi9WMp2PzoHkLQSi7JdQPUJ+QtMuksWLQQu912fZK+BZHz7QolaRN0c6s9bywjFZBhL5W4lsPEFuvPjhvTlh+6mNwx2MudNdLDZXwnf4SYGFo/3O64NWZTy+SEgAQhT1lECQZKsHan4UgXLGUw+FWTzHjh0woIt661HGxJgh4xT0RoV6/w1IO19XAOKfJFTxmxva6DRQsX/12jIKBLC0Y0Er2DuUutxMM5nak9QaZt2cOwf4En1ww42nN3OK+w14/B4u+a/CWLesHWTYU1Eph+GS/w0470Y/1LcgDNA40/yKOMzw/tE7N+wOx/NwUYj9H5qf4DsX93tO4FAAA=
|
||||
$ base64 -w 0 allow-all-except-exec-process.rego
|
||||
cGFja2FnZSBhZ2VudF9wb2xpY3kKCmRlZmF1bHQgQWRkQVJQTmVpZ2hib3JzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgQWRkU3dhcFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENsb3NlU3RkaW5SZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBDb3B5RmlsZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZUNvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZVNhbmRib3hSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBEZXN0cm95U2FuZGJveFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEdldE1ldHJpY3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBHZXRPT01FdmVudFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEd1ZXN0RGV0YWlsc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IExpc3RJbnRlcmZhY2VzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgTGlzdFJvdXRlc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IE1lbUhvdHBsdWdCeVByb2JlUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgT25saW5lQ1BVTWVtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUGF1c2VDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBQdWxsSW1hZ2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBSZWFkU3RyZWFtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlU3RhbGVWaXJ0aW9mc1NoYXJlTW91bnRzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzZWVkUmFuZG9tRGV2UmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzdW1lQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU2V0R3Vlc3REYXRlVGltZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNldFBvbGljeVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNpZ25hbFByb2Nlc3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBTdGFydENvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXJ0VHJhY2luZ1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXRzQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU3RvcFRyYWNpbmdSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBUdHlXaW5SZXNpemVSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVFcGhlbWVyYWxNb3VudHNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVJbnRlcmZhY2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVSb3V0ZXNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBXYWl0UHJvY2Vzc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFdyaXRlU3RyZWFtUmVxdWVzdCA6PSB0cnVlCgpkZWZhdWx0IEV4ZWNQcm9jZXNzUmVxdWVzdCA6PSBmYWxzZQo=
|
||||
```
|
||||
|
||||
### Attach the Policy to a pod
|
||||
@@ -62,7 +49,7 @@ kind: Pod
|
||||
metadata:
|
||||
name: policy-exec-rejected
|
||||
annotations:
|
||||
io.katacontainers.config.hypervisor.cc_init_data: H4sIAAAAAAAAA42UTW/TQBCG7/4Vq/QQOCQKQXCo1ENIAkRqiGWnpBJCaGKP7RXrXTM7DnV/PRMiVUh07R582J3H8/XO7AnJa2fVjRrNpm+ms1EEpnSkuarPd76C+bv3oyj6lgPD92jUOKOzbkpYupEA4/E4ulJL13Sky4rVq+y1ms/mb9VWZ+S8K1iM1DgClijRlcBpvLqf3OoMrcfJJkfLutBI12rRQFbhZD6dCRfJ4SeUqOSz/OMSNopyLKA1rBZ5vkjiLyhBj458gr9a9KyubxRTi/9i6W9oQualcR5TzrUNElLZR20waCcExqWzDNoi9WMp2PzoHkLQSi7JdQPUJ+QtMuksWLQQu912fZK+BZHz7QolaRN0c6s9bywjFZBhL5W4lsPEFuvPjhvTlh+6mNwx2MudNdLDZXwnf4SYGFo/3O64NWZTy+SEgAQhT1lECQZKsHan4UgXLGUw+FWTzHjh0woIt661HGxJgh4xT0RoV6/w1IO19XAOKfJFTxmxva6DRQsX/12jIKBLC0Y0Er2DuUutxMM5nak9QaZt2cOwf4En1ww42nN3OK+w14/B4u+a/CWLesHWTYU1Eph+GS/w0470Y/1LcgDNA40/yKOMzw/tE7N+wOx/NwUYj9H5qf4DsX93tO4FAAA=
|
||||
io.katacontainers.config.agent.policy: cGFja2FnZSBhZ2VudF9wb2xpY3kKCmRlZmF1bHQgQWRkQVJQTmVpZ2hib3JzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgQWRkU3dhcFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENsb3NlU3RkaW5SZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBDb3B5RmlsZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZUNvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IENyZWF0ZVNhbmRib3hSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBEZXN0cm95U2FuZGJveFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEdldE1ldHJpY3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBHZXRPT01FdmVudFJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IEd1ZXN0RGV0YWlsc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IExpc3RJbnRlcmZhY2VzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgTGlzdFJvdXRlc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IE1lbUhvdHBsdWdCeVByb2JlUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgT25saW5lQ1BVTWVtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUGF1c2VDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBQdWxsSW1hZ2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBSZWFkU3RyZWFtUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVtb3ZlU3RhbGVWaXJ0aW9mc1NoYXJlTW91bnRzUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzZWVkUmFuZG9tRGV2UmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgUmVzdW1lQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU2V0R3Vlc3REYXRlVGltZVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNldFBvbGljeVJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFNpZ25hbFByb2Nlc3NSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBTdGFydENvbnRhaW5lclJlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXJ0VHJhY2luZ1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFN0YXRzQ29udGFpbmVyUmVxdWVzdCA6PSB0cnVlCmRlZmF1bHQgU3RvcFRyYWNpbmdSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBUdHlXaW5SZXNpemVSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVDb250YWluZXJSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVFcGhlbWVyYWxNb3VudHNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVJbnRlcmZhY2VSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBVcGRhdGVSb3V0ZXNSZXF1ZXN0IDo9IHRydWUKZGVmYXVsdCBXYWl0UHJvY2Vzc1JlcXVlc3QgOj0gdHJ1ZQpkZWZhdWx0IFdyaXRlU3RyZWFtUmVxdWVzdCA6PSB0cnVlCgpkZWZhdWx0IEV4ZWNQcm9jZXNzUmVxdWVzdCA6PSBmYWxzZQo=
|
||||
spec:
|
||||
runtimeClassName: kata
|
||||
containers:
|
||||
@@ -79,7 +66,7 @@ Create the pod:
|
||||
$ kubectl apply -f pod1.yaml
|
||||
```
|
||||
|
||||
While creating the Pod sandbox, the Kata Shim will notice the `io.katacontainers.config.hypervisor.cc_init_data` annotation and will create the init data device on the host and mount it on the guest as a block device. The agent then reads the init data struct from this device and sets the policy if present.
|
||||
While creating the Pod sandbox, the Kata Shim will notice the `io.katacontainers.config.agent.policy` annotation and will send the Policy document to the Kata Agent - by sending a `SetPolicy` request. Note that this request will fail if the default Policy, included in the Guest image, doesn't allow this `SetPolicy` request. If the `SetPolicy` request is rejected by the Guest, the Kata Shim will fail to start the Pod sandbox.
|
||||
|
||||
# How is the Policy being enforced?
|
||||
|
||||
|
||||
@@ -6,4 +6,4 @@ Container deployments utilize explicit or implicit file sharing between host fil
|
||||
|
||||
As of the 2.0 release of Kata Containers, [virtio-fs](https://virtio-fs.gitlab.io/) is the default filesystem sharing mechanism.
|
||||
|
||||
virtio-fs support works out of the box for `cloud-hypervisor` and `qemu`, when Kata Containers is deployed using `kata-deploy`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](../../tools/packaging/kata-deploy/helm-chart/README.md).
|
||||
virtio-fs support works out of the box for `cloud-hypervisor` and `qemu`, when Kata Containers is deployed using `kata-deploy`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](../../tools/packaging/kata-deploy/README.md#kubernetes-quick-start).
|
||||
|
||||
@@ -1,25 +1,22 @@
|
||||
# Privileged Kata Containers
|
||||
|
||||
> [!WARNING]
|
||||
> Whilst this functionality is supported, it can decrease the security of Kata Containers if not configured correctly.
|
||||
|
||||
Kata Containers supports creation of containers that are "privileged" (i.e. have additional capabilities and access
|
||||
that is not normally granted).
|
||||
|
||||
## Enabling privileged containers without host devices
|
||||
## Warnings
|
||||
|
||||
> [!TIP]
|
||||
> When Kata Containers is installed through
|
||||
> [kata-deploy](/tools/packaging/kata-deploy/helm-chart/README.md#kata-deploy-helm-chart), this mitigation is configured
|
||||
> out of the box, hence there is no action required in that case.
|
||||
**Warning:** Whilst this functionality is supported, it can decrease the security of Kata Containers if not configured
|
||||
correctly.
|
||||
|
||||
By default, a privileged container attempts to expose all devices from the host. This is generally not supported in Kata
|
||||
Containers as the container is running a different kernel than the host.
|
||||
### Host Devices
|
||||
|
||||
Instead, the following sections document how to disable this behavior in different container runtimes. Note that this
|
||||
mitigation does not affect a container's ability to mount *guest* devices.
|
||||
By default, when privileged is enabled for a container, all the `/dev/*` block devices from the host are mounted
|
||||
into the guest. This will allow the privileged container inside the Kata guest to gain access to mount any block device
|
||||
from the host, a potentially undesirable side-effect that decreases the security of Kata.
|
||||
|
||||
## Containerd
|
||||
The following sections document how to configure this behavior in different container runtimes.
|
||||
|
||||
#### Containerd
|
||||
|
||||
The Containerd allows configuring the privileged host devices behavior for each runtime in the containerd config. This is
|
||||
done with the `privileged_without_host_devices` option. Setting this to `true` will disable hot plugging of the host
|
||||
@@ -46,7 +43,7 @@ See below example config:
|
||||
- [How to use Kata Containers and containerd with Kubernetes](how-to-use-k8s-with-containerd-and-kata.md)
|
||||
- [Containerd CRI config documentation](https://github.com/containerd/containerd/blob/main/docs/cri/config.md)
|
||||
|
||||
## CRI-O
|
||||
#### CRI-O
|
||||
|
||||
Similar to containerd, CRI-O allows configuring the privileged host devices
|
||||
behavior for each runtime in the CRI config. This is done with the
|
||||
|
||||
@@ -8,11 +8,50 @@ Kata Containers requires nested virtualization or bare metal. Check
|
||||
[hardware requirements](./../../README.md#hardware-requirements) to see if your system is capable of running Kata
|
||||
Containers.
|
||||
|
||||
The Kata Deploy Helm chart is the preferred way to install all of the binaries and
|
||||
## Packaged installation methods
|
||||
|
||||
The packaged installation method uses your distribution's native package format (such as RPM or DEB).
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> We encourage you to select an installation method that provides
|
||||
> automatic updates, to ensure you get the latest security updates and
|
||||
> bug fixes.
|
||||
|
||||
| Installation method | Description | Automatic updates | Use case |
|
||||
|------------------------------------------------------|----------------------------------------------------------------------------------------------|-------------------|-----------------------------------------------------------------------------------------------|
|
||||
| [Using official distro packages](#official-packages) | Kata packages provided by Linux distributions official repositories | yes | Recommended for most users. |
|
||||
| [Automatic](#automatic-installation) | Run a single command to install a full system | **No!** | For those wanting the latest release quickly. |
|
||||
| [Using kata-deploy Helm chart](#kata-deploy-helm-chart) | The preferred way to deploy the Kata Containers distributed binaries on a Kubernetes cluster | **No!** | Best way to give it a try on kata-containers on an already up and running Kubernetes cluster. |
|
||||
|
||||
### Kata Deploy Helm Chart
|
||||
|
||||
The Kata Deploy Helm chart is a convenient way to install all of the binaries and
|
||||
artifacts required to run Kata Containers on Kubernetes.
|
||||
|
||||
[Use Kata Deploy Helm Chart](/tools/packaging/kata-deploy/helm-chart/README.md) to install Kata Containers on a Kubernetes Cluster.
|
||||
|
||||
### Official packages
|
||||
|
||||
Kata packages are provided by official distribution repositories for:
|
||||
|
||||
| Distribution (link to installation guide) | Minimum versions |
|
||||
|----------------------------------------------------------|--------------------------------------------------------------------------------|
|
||||
| [CentOS](centos-installation-guide.md) | 8 |
|
||||
| [Fedora](fedora-installation-guide.md) | 34 |
|
||||
|
||||
### Automatic Installation
|
||||
|
||||
[Use `kata-manager`](/utils/README.md) to automatically install a working Kata Containers system.
|
||||
|
||||
## Installing on a Cloud Service Platform
|
||||
|
||||
* [Amazon Web Services (AWS)](aws-installation-guide.md)
|
||||
* [Google Compute Engine (GCE)](gce-installation-guide.md)
|
||||
* [Microsoft Azure](azure-installation-guide.md)
|
||||
* [Minikube](minikube-installation-guide.md)
|
||||
* [VEXXHOST OpenStack Cloud](vexxhost-installation-guide.md)
|
||||
|
||||
## Further information
|
||||
|
||||
* [upgrading document](../Upgrading.md)
|
||||
|
||||
135
docs/install/aws-installation-guide.md
Normal file
135
docs/install/aws-installation-guide.md
Normal file
@@ -0,0 +1,135 @@
|
||||
# Install Kata Containers on Amazon Web Services
|
||||
|
||||
Kata Containers on Amazon Web Services (AWS) makes use of [i3.metal](https://aws.amazon.com/ec2/instance-types/i3/) instances. Most of the installation procedure is identical to that for Kata on your preferred distribution, except that you have to run it on bare metal instances since AWS doesn't support nested virtualization yet. This guide walks you through creating an i3.metal instance.
|
||||
|
||||
## Install and Configure AWS CLI
|
||||
|
||||
### Requirements
|
||||
|
||||
* Python:
|
||||
* Python 2 version 2.6.5+
|
||||
* Python 3 version 3.3+
|
||||
|
||||
### Install
|
||||
|
||||
Install with this command:
|
||||
|
||||
```bash
|
||||
$ pip install awscli --upgrade --user
|
||||
```
|
||||
|
||||
### Configure
|
||||
|
||||
First, verify it:
|
||||
|
||||
```bash
|
||||
$ aws --version
|
||||
```
|
||||
|
||||
Then configure it:
|
||||
|
||||
```bash
|
||||
$ aws configure
|
||||
```
|
||||
|
||||
Specify the required parameters:
|
||||
|
||||
```
|
||||
AWS Access Key ID []: <your-key-id-from-iam>
|
||||
AWS Secret Access Key []: <your-secret-access-key-from-iam>
|
||||
Default region name []: <your-aws-region-for-your-i3-metal-instance>
|
||||
Default output format [None]: <yaml-or-json-or-empty>
|
||||
```
|
||||
|
||||
Alternatively, you can create the files: `~/.aws/credentials` and `~/.aws/config`:
|
||||
|
||||
```bash
|
||||
$ cat <<EOF > ~/.aws/credentials
|
||||
[default]
|
||||
aws_access_key_id = <your-key-id-from-iam>
|
||||
aws_secret_access_key = <your-secret-access-key-from-iam>
|
||||
EOF
|
||||
$ cat <<EOF > ~/.aws/config
|
||||
[default]
|
||||
region = <your-aws-region-for-your-i3-metal-instance>
|
||||
EOF
|
||||
```
|
||||
|
||||
For more information on how to get AWS credentials please refer to [this guide](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html). Alternatively, you can ask the administrator of your AWS account to issue one with the AWS CLI:
|
||||
|
||||
```sh
|
||||
$ aws_username="myusername"
|
||||
$ aws iam create-access-key --user-name="$aws_username"
|
||||
```
|
||||
|
||||
More general AWS CLI guidelines can be found [here](https://docs.aws.amazon.com/cli/latest/userguide/installing.html).
|
||||
|
||||
## Create or Import an EC2 SSH key pair
|
||||
|
||||
You will need this to access your instance.
|
||||
|
||||
To create:
|
||||
|
||||
```bash
|
||||
$ aws ec2 create-key-pair --key-name MyKeyPair | grep KeyMaterial | cut -d: -f2- | tr -d ' \n\"\,' > MyKeyPair.pem
|
||||
$ chmod 400 MyKeyPair.pem
|
||||
```
|
||||
|
||||
Alternatively to import using your public SSH key:
|
||||
|
||||
```bash
|
||||
$ aws ec2 import-key-pair --key-name "MyKeyPair" --public-key-material file://MyKeyPair.pub
|
||||
```
|
||||
|
||||
## Launch i3.metal instance
|
||||
|
||||
Get the latest Bionic Ubuntu AMI (Amazon Image) or the latest AMI for the Linux distribution you would like to use. For example:
|
||||
|
||||
```bash
|
||||
$ aws ec2 describe-images --owners 099720109477 --filters "Name=name,Values=ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server*" --query 'sort_by(Images, &CreationDate)[].ImageId '
|
||||
```
|
||||
|
||||
This command will produce output similar to the following:
|
||||
|
||||
```
|
||||
[
|
||||
...
|
||||
"ami-063aa838bd7631e0b",
|
||||
"ami-03d5270fcb641f79b"
|
||||
]
|
||||
```
|
||||
|
||||
Launch the EC2 instance and pick IP the `INSTANCEID`:
|
||||
|
||||
```bash
|
||||
$ aws ec2 run-instances --image-id ami-03d5270fcb641f79b --count 1 --instance-type i3.metal --key-name MyKeyPair --associate-public-ip-address > /tmp/aws.json
|
||||
$ export INSTANCEID=$(grep InstanceId /tmp/aws.json | cut -d: -f2- | tr -d ' \n\"\,')
|
||||
```
|
||||
|
||||
Wait for the instance to come up, the output of the following command should be `running`:
|
||||
|
||||
```bash
|
||||
$ aws ec2 describe-instances --instance-id=${INSTANCEID} | grep running | cut -d: -f2- | tr -d ' \"\,'
|
||||
```
|
||||
|
||||
Get the public IP address for the instances:
|
||||
|
||||
```bash
|
||||
$ export IP=$(aws ec2 describe-instances --instance-id=${INSTANCEID} | grep PublicIpAddress | cut -d: -f2- | tr -d ' \n\"\,')
|
||||
```
|
||||
|
||||
Refer to [this guide](https://docs.aws.amazon.com/cli/latest/userguide/cli-ec2-launch.html) for more details on how to launch instances with the AWS CLI.
|
||||
|
||||
SSH into the machine
|
||||
|
||||
```bash
|
||||
$ ssh -i MyKeyPair.pem ubuntu@${IP}
|
||||
```
|
||||
|
||||
Go onto the next step.
|
||||
|
||||
## Install Kata
|
||||
|
||||
The process for installing Kata itself on bare metal is identical to that of a virtualization-enabled VM.
|
||||
|
||||
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](../install/README.md).
|
||||
18
docs/install/azure-installation-guide.md
Normal file
18
docs/install/azure-installation-guide.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Install Kata Containers on Microsoft Azure
|
||||
|
||||
Kata Containers on Azure use nested virtualization to provide an identical installation
|
||||
experience to Kata on your preferred Linux distribution.
|
||||
|
||||
This guide assumes you have an Azure account set up and tools to remotely login to your virtual
|
||||
machine (SSH). Instructions will use the Azure Portal to avoid
|
||||
local dependencies and setup.
|
||||
|
||||
## Create a new virtual machine with nesting support
|
||||
|
||||
Create a new virtual machine with:
|
||||
* Nesting support (v3 series)
|
||||
* your distro of choice
|
||||
|
||||
## Set up with distribution specific quick start
|
||||
|
||||
Follow distribution specific [install guides](../install/README.md#packaged-installation-methods).
|
||||
21
docs/install/centos-installation-guide.md
Normal file
21
docs/install/centos-installation-guide.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# Install Kata Containers on CentOS
|
||||
|
||||
1. Install the Kata Containers components with the following commands:
|
||||
|
||||
```bash
|
||||
$ sudo -E dnf install -y centos-release-advanced-virtualization
|
||||
$ sudo -E dnf module disable -y virt:rhel
|
||||
$ source /etc/os-release
|
||||
$ cat <<EOF | sudo -E tee /etc/yum.repos.d/kata-containers.repo
|
||||
[kata-containers]
|
||||
name=Kata Containers
|
||||
baseurl=http://mirror.centos.org/\$contentdir/\$releasever/virt/\$basearch/kata-containers
|
||||
enabled=1
|
||||
gpgcheck=1
|
||||
skip_if_unavailable=1
|
||||
EOF
|
||||
$ sudo -E dnf install -y kata-containers
|
||||
```
|
||||
|
||||
2. Decide which container manager to use and select the corresponding link that follows:
|
||||
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
|
||||
10
docs/install/fedora-installation-guide.md
Normal file
10
docs/install/fedora-installation-guide.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Install Kata Containers on Fedora
|
||||
|
||||
1. Install the Kata Containers components with the following commands:
|
||||
|
||||
```bash
|
||||
$ sudo -E dnf -y install kata-containers
|
||||
```
|
||||
|
||||
2. Decide which container manager to use and select the corresponding link that follows:
|
||||
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
|
||||
127
docs/install/gce-installation-guide.md
Normal file
127
docs/install/gce-installation-guide.md
Normal file
@@ -0,0 +1,127 @@
|
||||
# Install Kata Containers on Google Compute Engine
|
||||
|
||||
Kata Containers on Google Compute Engine (GCE) makes use of [nested virtualization](https://cloud.google.com/compute/docs/instances/enable-nested-virtualization-vm-instances). Most of the installation procedure is identical to that for Kata on your preferred distribution, but enabling nested virtualization currently requires extra steps on GCE. This guide walks you through creating an image and instance with nested virtualization enabled. Note that `kata-runtime check` checks for nested virtualization, but does not fail if support is not found.
|
||||
|
||||
As a pre-requisite this guide assumes an installed and configured instance of the [Google Cloud SDK](https://cloud.google.com/sdk/downloads). For a zero-configuration option, all of the commands below were been tested under [Google Cloud Shell](https://cloud.google.com/shell/) (as of Jun 2018). Verify your `gcloud` installation and configuration:
|
||||
|
||||
```bash
|
||||
$ gcloud info || { echo "ERROR: no Google Cloud SDK"; exit 1; }
|
||||
```
|
||||
|
||||
## Create an Image with Nested Virtualization Enabled
|
||||
|
||||
VM images on GCE are grouped into families under projects. Officially supported images are automatically discoverable with `gcloud compute images list`. That command produces a list similar to the following (likely with different image names):
|
||||
|
||||
```bash
|
||||
$ gcloud compute images list
|
||||
NAME PROJECT FAMILY DEPRECATED STATUS
|
||||
centos-7-v20180523 centos-cloud centos-7 READY
|
||||
coreos-stable-1745-5-0-v20180531 coreos-cloud coreos-stable READY
|
||||
cos-beta-67-10575-45-0 cos-cloud cos-beta READY
|
||||
cos-stable-66-10452-89-0 cos-cloud cos-stable READY
|
||||
debian-9-stretch-v20180510 debian-cloud debian-9 READY
|
||||
rhel-7-v20180522 rhel-cloud rhel-7 READY
|
||||
sles-11-sp4-v20180523 suse-cloud sles-11 READY
|
||||
ubuntu-1604-xenial-v20180522 ubuntu-os-cloud ubuntu-1604-lts READY
|
||||
ubuntu-1804-bionic-v20180522 ubuntu-os-cloud ubuntu-1804-lts READY
|
||||
```
|
||||
|
||||
Each distribution has its own project, and each project can host images for multiple versions of the distribution, typically grouped into families. We recommend you select images by project and family, rather than by name. This ensures any scripts or other automation always works with a non-deprecated image, including security updates, updates to GCE-specific scripts, etc.
|
||||
|
||||
### Create the Image
|
||||
|
||||
The following example (substitute your preferred distribution project and image family) produces an image with nested virtualization enabled in your currently active GCE project:
|
||||
|
||||
```bash
|
||||
$ SOURCE_IMAGE_PROJECT=ubuntu-os-cloud
|
||||
$ SOURCE_IMAGE_FAMILY=ubuntu-1804-lts
|
||||
$ IMAGE_NAME=${SOURCE_IMAGE_FAMILY}-nested
|
||||
|
||||
$ gcloud compute images create \
|
||||
--source-image-project $SOURCE_IMAGE_PROJECT \
|
||||
--source-image-family $SOURCE_IMAGE_FAMILY \
|
||||
--licenses=https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx \
|
||||
$IMAGE_NAME
|
||||
```
|
||||
|
||||
If successful, `gcloud` reports that the image was created. Verify that the image has the nested virtualization license with `gcloud compute images describe $IMAGE_NAME`. This produces output like the following (some fields have been removed for clarity and to redact personal info):
|
||||
|
||||
```yaml
|
||||
diskSizeGb: '10'
|
||||
kind: compute#image
|
||||
licenseCodes:
|
||||
- '1002001'
|
||||
- '5926592092274602096'
|
||||
licenses:
|
||||
- https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx
|
||||
- https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/licenses/ubuntu-1804-lts
|
||||
name: ubuntu-1804-lts-nested
|
||||
sourceImage: https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20180522
|
||||
sourceImageId: '3280575157699667619'
|
||||
sourceType: RAW
|
||||
status: READY
|
||||
```
|
||||
|
||||
The primary criterion of interest here is the presence of the `enable-vmx` license. Without that licence Kata will not work. Without that license Kata does not work. The presence of that license instructs the Google Compute Engine hypervisor to enable Intel's VT-x instructions in virtual machines created from the image. Note that nested virtualization is only available in VMs running on Intel Haswell or later CPU micro-architectures.
|
||||
|
||||
### Verify VMX is Available
|
||||
|
||||
Assuming you created a nested-enabled image using the previous instructions, verify that VMs created from this image are VMX-enabled with the following:
|
||||
|
||||
1. Create a VM from the image created previously:
|
||||
|
||||
```bash
|
||||
$ gcloud compute instances create \
|
||||
--image $IMAGE_NAME \
|
||||
--machine-type n1-standard-2 \
|
||||
--min-cpu-platform "Intel Broadwell" \
|
||||
kata-testing
|
||||
```
|
||||
|
||||
> **NOTE**: In most zones the `--min-cpu-platform` argument can be omitted. It is only necessary in GCE Zones that include hosts based on Intel's Ivybridge platform.
|
||||
|
||||
2. Verify that the VMX CPUID flag is set:
|
||||
|
||||
```bash
|
||||
$ gcloud compute ssh kata-testing
|
||||
|
||||
# While ssh'd into the VM:
|
||||
$ [ -z "$(lscpu|grep GenuineIntel)" ] && { echo "ERROR: Need an Intel CPU"; exit 1; }
|
||||
```
|
||||
|
||||
If this fails, ensure you created your instance from the correct image and that the previously listed `enable-vmx` license is included.
|
||||
|
||||
## Install Kata
|
||||
|
||||
The process for installing Kata itself on a virtualization-enabled VM is identical to that for bare metal.
|
||||
|
||||
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](../install/README.md).
|
||||
|
||||
## Create a Kata-enabled Image
|
||||
|
||||
Optionally, after installing Kata, create an image to preserve the fruits of your labor:
|
||||
|
||||
```bash
|
||||
$ gcloud compute instances stop kata-testing
|
||||
$ gcloud compute images create \
|
||||
--source-disk kata-testing \
|
||||
kata-base
|
||||
```
|
||||
|
||||
The result is an image that includes any changes made to the `kata-testing` instance as well as the `enable-vmx` flag. Verify this with `gcloud compute images describe kata-base`. The result, which omits some fields for clarity, should be similar to the following:
|
||||
|
||||
```yaml
|
||||
diskSizeGb: '10'
|
||||
kind: compute#image
|
||||
licenseCodes:
|
||||
- '1002001'
|
||||
- '5926592092274602096'
|
||||
licenses:
|
||||
- https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx
|
||||
- https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/licenses/ubuntu-1804-lts
|
||||
name: kata-base
|
||||
selfLink: https://www.googleapis.com/compute/v1/projects/my-kata-project/global/images/kata-base
|
||||
sourceDisk: https://www.googleapis.com/compute/v1/projects/my-kata-project/zones/us-west1-a/disks/kata-testing
|
||||
sourceType: RAW
|
||||
status: READY
|
||||
```
|
||||
@@ -32,7 +32,7 @@ architectures:
|
||||
|
||||
### Kata Deploy Installation
|
||||
|
||||
Follow the [`kata-deploy`](../../tools/packaging/kata-deploy/helm-chart/README.md).
|
||||
Follow the [`kata-deploy`](../../tools/packaging/kata-deploy/README.md).
|
||||
### Official packages
|
||||
`ToDo`
|
||||
### Automatic Installation
|
||||
|
||||
@@ -103,8 +103,48 @@ $ minikube ssh "grep -c -E 'vmx|svm' /proc/cpuinfo"
|
||||
|
||||
## Installing Kata Containers
|
||||
|
||||
You can now install the Kata Containers runtime components
|
||||
[following the official instructions](../../tools/packaging/kata-deploy/helm-chart).
|
||||
You can now install the Kata Containers runtime components. You will need a local copy of some Kata
|
||||
Containers components to help with this, and then use `kubectl` on the host (that Minikube has already
|
||||
configured for you) to deploy them:
|
||||
|
||||
```sh
|
||||
$ git clone https://github.com/kata-containers/kata-containers.git
|
||||
$ cd kata-containers/tools/packaging/kata-deploy
|
||||
$ kubectl apply -f kata-rbac/base/kata-rbac.yaml
|
||||
$ kubectl apply -f kata-deploy/base/kata-deploy.yaml
|
||||
```
|
||||
|
||||
This installs the Kata Containers components into `/opt/kata` inside the Minikube node. It can take
|
||||
a few minutes for the operation to complete. You can check the installation has worked by checking
|
||||
the status of the `kata-deploy` pod, which will be executing
|
||||
[this script](../../tools/packaging/kata-deploy/scripts/kata-deploy.sh),
|
||||
and will be executing a `sleep infinity` once it has successfully completed its work.
|
||||
You can accomplish this by running the following:
|
||||
|
||||
```sh
|
||||
$ podname=$(kubectl -n kube-system get pods -o=name | grep -F kata-deploy | sed 's?pod/??')
|
||||
$ kubectl -n kube-system exec ${podname} -- ps -ef | grep -F infinity
|
||||
```
|
||||
|
||||
> *NOTE:* This check only works for single node clusters, which is the default for Minikube.
|
||||
> For multi-node clusters, the check would need to be adapted to check `kata-deploy` had
|
||||
> completed on all nodes.
|
||||
|
||||
## Enabling Kata Containers
|
||||
|
||||
Now you have installed the Kata Containers components in the Minikube node. Next, you need to configure
|
||||
Kubernetes `RuntimeClass` to know when to use Kata Containers to run a pod.
|
||||
|
||||
### Register the runtime
|
||||
|
||||
Now register the `kata qemu` runtime with that class. This should result in no errors:
|
||||
|
||||
```sh
|
||||
$ cd kata-containers/tools/packaging/kata-deploy/runtimeclasses
|
||||
$ kubectl apply -f kata-runtimeClasses.yaml
|
||||
```
|
||||
|
||||
The Kata Containers installation process should be complete and enabled in the Minikube cluster.
|
||||
|
||||
## Testing Kata Containers
|
||||
|
||||
|
||||
16
docs/install/vexxhost-installation-guide.md
Normal file
16
docs/install/vexxhost-installation-guide.md
Normal file
@@ -0,0 +1,16 @@
|
||||
# Install Kata Containers on VEXXHOST
|
||||
|
||||
Kata Containers on VEXXHOST use nested virtualization to provide an identical
|
||||
installation experience to Kata on your preferred Linux distribution.
|
||||
|
||||
This guide assumes you have an OpenStack public cloud account set up and tools
|
||||
to remotely connect to your virtual machine (SSH).
|
||||
|
||||
## Create a new virtual machine with nesting support
|
||||
|
||||
All regions support nested virtualization using the V2 flavors (those prefixed
|
||||
with v2). The recommended machine type for container workloads is `v2-highcpu` range.
|
||||
|
||||
## Set up with distribution specific quick start
|
||||
|
||||
Follow distribution specific [install guides](../install/README.md#packaged-installation-methods).
|
||||
@@ -48,7 +48,7 @@ $ make test
|
||||
- Run a test in the current package in verbose mode:
|
||||
|
||||
```bash
|
||||
# Example
|
||||
# Example
|
||||
$ test="config::tests::test_get_log_level"
|
||||
|
||||
$ cargo test "$test" -vv -- --exact --nocapture
|
||||
@@ -223,7 +223,7 @@ What's wrong with this function?
|
||||
|
||||
```rust
|
||||
fn foo(config: &Config, path_prefix: String, container_id: String, pid: String) -> Result<()> {
|
||||
let mut full_path = format!("{path_prefix}/{container_id}");
|
||||
let mut full_path = format!("{}/{}", path_prefix, container_id);
|
||||
|
||||
let _ = remove_recursively(&mut full_path);
|
||||
|
||||
|
||||
@@ -3,4 +3,4 @@
|
||||
Kata Containers supports passing certain GPUs from the host into the container. Select the GPU vendor for detailed information:
|
||||
|
||||
- [Intel Discrete GPUs](Intel-Discrete-GPU-passthrough-and-Kata.md)/[Intel Integrated GPUs](Intel-GPU-passthrough-and-Kata.md)
|
||||
- [NVIDIA GPUs](NVIDIA-GPU-passthrough-and-Kata.md) and [Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers](NVIDIA-GPU-passthrough-and-Kata-QEMU.md)
|
||||
- [NVIDIA](NVIDIA-GPU-passthrough-and-Kata.md)
|
||||
|
||||
@@ -1,569 +0,0 @@
|
||||
# Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers
|
||||
|
||||
This page provides:
|
||||
1. A description of the components involved when running GPU workloads with
|
||||
Kata Containers using the NVIDIA TEE and non-TEE GPU runtime classes.
|
||||
1. An explanation of the orchestration flow on a Kubernetes node for this
|
||||
scenario.
|
||||
1. A deployment guide enabling to utilize these runtime classes.
|
||||
|
||||
The goal is to educate readers familiar with Kubernetes and Kata Containers
|
||||
on NVIDIA's reference implementation which is reflected in Kata CI's build
|
||||
and test framework. With this, we aim to enable readers to leverage this
|
||||
stack, or to use the principles behind this stack in order to run GPU
|
||||
workloads on their variant of the Kata Containers stack.
|
||||
|
||||
We assume the reader is familiar with Kubernetes, Kata Containers, and
|
||||
Confidential Containers.
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> The current supported mode for enabling GPU workloads in the TEE scenario
|
||||
> is single GPU passthrough (one GPU per pod) on AMD64 platforms (AMD SEV-SNP
|
||||
> being the only supported TEE scenario so far with support for Intel TDX being
|
||||
> on the way).
|
||||
|
||||
## Component Overview
|
||||
|
||||
Before providing deployment guidance, we describe the components involved to
|
||||
support running GPU workloads. We start from a top to bottom perspective
|
||||
from the NVIDIA GPU operator via the Kata runtime to the components within
|
||||
the NVIDIA GPU Utility Virtual Machine (UVM) root filesystem.
|
||||
|
||||
### NVIDIA GPU Operator
|
||||
|
||||
A central component is the
|
||||
[NVIDIA GPU operator](https://github.com/NVIDIA/gpu-operator) which can be
|
||||
deployed onto your cluster as a helm chart. Installing the GPU operator
|
||||
delivers various operands on your nodes in the form of Kubernetes DaemonSets.
|
||||
These operands are vital to support the flow of orchestrating pod manifests
|
||||
using NVIDIA GPU runtime classes with GPU passthrough on your nodes. Without
|
||||
getting into the details, the most important operands and their
|
||||
responsibilities are:
|
||||
|
||||
- **nvidia-vfio-manager:** Binding discovered NVIDIA GPUs to the `vfio-pci`
|
||||
driver for VFIO passthrough.
|
||||
- **nvidia-cc-manager:** Transitioning GPUs into confidential computing (CC)
|
||||
and non-CC mode (see the
|
||||
[NVIDIA/k8s-cc-manager](https://github.com/NVIDIA/k8s-cc-manager)
|
||||
repository).
|
||||
- **nvidia-kata-manager:** Creating host-side CDI specifications for GPU
|
||||
passthrough, resulting in the file `/var/run/cdi/nvidia.yaml`, containing
|
||||
`kind: nvidia.com/pgpu` (see the
|
||||
[NVIDIA/k8s-kata-manager](https://github.com/NVIDIA/k8s-kata-manager)
|
||||
repository).
|
||||
- **nvidia-sandbox-device-plugin** (see the
|
||||
[NVIDIA/sandbox-device-plugin](https://github.com/NVIDIA/sandbox-device-plugin)
|
||||
repository):
|
||||
- Allocating GPUs during pod deployment.
|
||||
- Discovering NVIDIA GPUs, their capabilities, and advertising these to
|
||||
the Kubernetes control plane (allocatable resources as type
|
||||
`nvidia.com/pgpu` resources will appear for the node and GPU Device IDs
|
||||
will be registered with Kubelet). These GPUs can thus be allocated as
|
||||
container resources in your pod manifests. See below GPU operator
|
||||
deployment instructions for the use of the key `pgpu`, controlled via a
|
||||
variable.
|
||||
|
||||
To summarize, the GPU operator manages the GPUs on each node, allowing for
|
||||
simple orchestration of pod manifests using Kata Containers. Once the cluster
|
||||
with GPU operator and Kata bits is up and running, the end user can schedule
|
||||
Kata NVIDIA GPU workloads, using resource limits and the
|
||||
`kata-qemu-nvidia-gpu` or `kata-qemu-nvidia-gpu-snp` runtime classes, for
|
||||
example:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
...
|
||||
spec:
|
||||
...
|
||||
runtimeClassName: kata-qemu-nvidia-gpu-snp
|
||||
...
|
||||
resources:
|
||||
limits:
|
||||
"nvidia.com/pgpu": 1
|
||||
...
|
||||
```
|
||||
|
||||
When this happens, the Kubelet calls into the sandbox device plugin to
|
||||
allocate a GPU. The sandbox device plugin returns `DeviceSpec` entries to the
|
||||
Kubelet for the allocated GPU. The Kubelet uses internal device IDs for
|
||||
tracking of allocated GPUs and includes the device specifications in the CRI
|
||||
request when scheduling the pod through containerd. Containerd processes the
|
||||
device specifications and includes the device configuration in the OCI
|
||||
runtime spec used to invoke the Kata runtime during the create container
|
||||
request.
|
||||
|
||||
### Kata runtime
|
||||
|
||||
The Kata runtime for the NVIDIA GPU handlers is configured to cold-plug VFIO
|
||||
devices (`cold_plug_vfio` is set to `root-port` while
|
||||
`hot_plug_vfio` is set to `no-port`). Cold-plug is by design the only
|
||||
supported mode for NVIDIA GPU passthrough of the NVIDIA reference stack.
|
||||
|
||||
With cold-plug, the Kata runtime attaches the GPU at VM launch time, when
|
||||
creating the pod sandbox. This happens *before* the create container request,
|
||||
i.e., before the Kata runtime receives the OCI spec including device
|
||||
configurations from containerd. Thus, a mechanism to acquire the device
|
||||
information is required. This is done by the runtime calling the
|
||||
`coldPlugDevices()` function during sandbox creation. In this function,
|
||||
the runtime queries Kubelet's Pod Resources API to discover allocated GPU
|
||||
device IDs (e.g., `nvidia.com/pgpu = [vfio0]`). The runtime formats these as
|
||||
CDI device identifiers and injects them into the OCI spec using
|
||||
`config.InjectCDIDevices()`. The runtime then consults the host CDI
|
||||
specifications and determines the device path the GPU is backed by
|
||||
(e.g., `/dev/vfio/devices/vfio0`). Finally, the runtime resolves the device's
|
||||
PCI BDF (e.g., `0000:21:00`) and cold-plugs the GPU by launching QEMU with
|
||||
relevant parameters for device passthrough (e.g.,
|
||||
`-device vfio-pci,host=0000:21:00.0,x-pci-vendor-id=0x10de,x-pci-device-id=0x2321,bus=rp0,iommufd=iommufdvfio-faf829f2ea7aec330`).
|
||||
|
||||
The runtime also creates *inner runtime* CDI annotations
|
||||
which map host VFIO devices to guest GPU devices. These are annotations
|
||||
intended for the kata-agent, here referred to as the inner runtime (inside the
|
||||
UVM), to properly handle GPU passthrough into containers. These annotations
|
||||
serve as metadata providing the kata-agent with the information needed to
|
||||
attach the passthrough devices to the correct container.
|
||||
The annotations are key-value pairs consisting of `cdi.k8s.io/vfio<num>` keys
|
||||
(derived from the host VFIO device path, e.g., `/dev/vfio/devices/vfio1`) and
|
||||
`nvidia.com/gpu=<index>` values (referencing the corresponding device in the
|
||||
guest CDI spec). These annotations are injected by the runtime during container
|
||||
creation via the `annotateContainerWithVFIOMetadata` function (see
|
||||
`container.go`).
|
||||
|
||||
We continue describing the orchestration flow inside the UVM in the next
|
||||
section.
|
||||
|
||||
### Kata NVIDIA GPU UVM
|
||||
|
||||
#### UVM composition
|
||||
|
||||
To better understand the orchestration flow inside the NVIDIA GPU UVM, we
|
||||
first look at the components its root filesystem contains. Should you decide
|
||||
to use your own root filesystem to enable NVIDIA GPU scenarios, this should
|
||||
give you a good idea on what ingredients you need.
|
||||
|
||||
From a file system perspective, the UVM is composed of two files: a standard
|
||||
Kata kernel image and the NVIDIA GPU rootfs in initrd or disk image format.
|
||||
These two files are being utilized for the QEMU launch command when the UVM
|
||||
is created.
|
||||
|
||||
The two most important pieces in Kata Container's build recipes for the
|
||||
NVIDIA GPU root filesystem are the `nvidia_chroot.sh` and `nvidia_rootfs.sh`
|
||||
files. The build follows a two-stage process. In the first stage, a
|
||||
full-fledged Ubuntu-based root filesystem is composed within a chroot
|
||||
environment. In this stage, NVIDIA kernel modules are built and signed
|
||||
against the current Kata kernel and relevant NVIDIA packages are installed.
|
||||
In the second stage, a chiseled build is performed: Only relevant contents
|
||||
from the first stage are copied and compressed into a new distro-less root
|
||||
filesystem folder. Kata's build infrastructure then turns this root
|
||||
filesystem into the NVIDIA initrd and image files.
|
||||
|
||||
The resulting root filesystem contains the following software components:
|
||||
|
||||
- NVRC - the
|
||||
[NVIDIA Runtime Container init system](https://github.com/NVIDIA/nvrc/tree/main)
|
||||
- NVIDIA drivers (kernel modules)
|
||||
- NVIDIA user space driver libraries
|
||||
- NVIDIA user space tools
|
||||
- kata-agent
|
||||
- confidential computing guest components: the attestation agent,
|
||||
confidential data hub and api-server-rest binaries
|
||||
- CRI-O pause container (for the guest image-pull method)
|
||||
- BusyBox utilities (provides a base set of libraries and binaries, and a
|
||||
linker)
|
||||
- some supporting files, such as file containing a list of supported GPU
|
||||
device IDs which NVRC reads
|
||||
|
||||
#### UVM orchestration flow
|
||||
|
||||
When the Kata runtime asks QEMU to launch the VM, the UVM's Linux kernel
|
||||
boots and mounts the root filesystem. After this, NVRC starts as the initial
|
||||
process.
|
||||
|
||||
NVRC scans for NVIDIA GPUs on the PCI bus, loads the
|
||||
NVIDIA kernel modules, waits for driver initialization, creates the device nodes,
|
||||
and initializes the GPU hardware (using the `nvidia-smi` binary). NVRC also
|
||||
creates the guest-side CDI specification file (using the
|
||||
`nvidia-ctk cdi generate` command). This file specifies devices of
|
||||
`kind: nvidia.com/gpu`, i.e., GPUs appearing to be physical GPUs on regular
|
||||
bare metal systems. The guest CDI specification also contains `containerEdits`
|
||||
for each device, specifying device nodes (e.g., `/dev/nvidia0`,
|
||||
`/dev/nvidiactl`), library mounts, and environment variables to be mounted
|
||||
into the container which receives the passthrough GPU.
|
||||
|
||||
Then, NVRC forks the Kata agent while continuing to run as the
|
||||
init system. This allows NVRC to handle ongoing GPU management tasks
|
||||
while kata-agent focuses on container lifecycle management. See the
|
||||
[NVRC sources](https://github.com/NVIDIA/nvrc/blob/main/src/main.rs) for an
|
||||
overview on the steps carried out by NVRC.
|
||||
|
||||
When the Kata runtime sends the create container request, the Kata agent
|
||||
parses the inner runtime CDI annotation. For example, for the inner runtime
|
||||
annotation `"cdi.k8s.io/vfio1": "nvidia.com/gpu=0"`, the agent looks up device
|
||||
`0` in the guest CDI specification with `kind: nvidia.com/gpu`.
|
||||
|
||||
The Kata agent also reads the guest CDI specification's `containerEdits`
|
||||
section and injects relevant contents into the OCI spec of the respective
|
||||
container. The kata agent then creates and starts a `rustjail` container
|
||||
based on the final OCI spec. The container now has relevant device nodes,
|
||||
binaries and low-level libraries available, and can start a user application
|
||||
linked against the CUDA runtime API (e.g., `libcudart.so` and other
|
||||
libraries). When used, the CUDA runtime API in turn calls the CUDA driver
|
||||
API and kernel drivers, interacting with the pass-through GPU device.
|
||||
|
||||
An additional step is exercised in our CI samples: when using images from an
|
||||
authenticated registry, the guest-pull mechanism triggers attestation using
|
||||
trustee's Key Broker Service (KBS) for secure release of the NGC API
|
||||
authentication key used to access the NVCR container registry. As part of
|
||||
this, the attestation agent exercises composite attestation and transitions
|
||||
the GPU into `Ready` state (without this, the GPU has to explicitly be
|
||||
transitioned into `Ready` state by passing the `nvrc.smi.srs=1` kernel
|
||||
parameter via the shim config, causing NVRC to transition the GPU into the
|
||||
`Ready` state).
|
||||
|
||||
## Deployment Guidance
|
||||
|
||||
This guidance assumes you use bare-metal machines with proper support for
|
||||
Kata's non-TEE and TEE GPU workload deployment scenarios for your Kubernetes
|
||||
nodes. We provide guidance based on the upstream Kata CI procedures for the
|
||||
NVIDIA GPU CI validation jobs. Note that, this setup:
|
||||
|
||||
- uses the guest image pull method to pull container image layers
|
||||
- uses the genpolicy tool to attach Kata agent security policies to the pod
|
||||
manifest
|
||||
- has dedicated (composite) attestation tests, a CUDA vectorAdd test, and a
|
||||
NIM/RA test sample with secure API key release
|
||||
|
||||
A similar deployment guide and scenario description can be found in NVIDIA resources
|
||||
under
|
||||
[Early Access: NVIDIA GPU Operator with Confidential Containers based on Kata](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/confidential-containers.html).
|
||||
|
||||
### Requirements
|
||||
|
||||
The requirements for the TEE scenario are:
|
||||
|
||||
- Ubuntu 25.10 as host OS
|
||||
- CPU with AMD SEV-SNP support with proper BIOS/UEFI version and settings
|
||||
- CC-capable Hopper/Blackwell GPU with proper VBIOS version.
|
||||
|
||||
BIOS and VBIOS configuration is out of scope for this guide. Other resources,
|
||||
such as the documentation found on the
|
||||
[NVIDIA Trusted Computing Solutions](https://docs.nvidia.com/nvtrust/index.html)
|
||||
page and the above linked NVIDIA documentation, provide guidance on
|
||||
selecting proper hardware and on properly configuring its firmware and OS.
|
||||
|
||||
### Installation
|
||||
|
||||
#### Containerd and Kubernetes
|
||||
|
||||
First, set up your Kubernetes cluster. For instance, in Kata CI, our NVIDIA
|
||||
jobs use a single-node vanilla Kubernetes cluster with a 2.x containerd
|
||||
version and Kata's current supported Kubernetes version. We set this cluster
|
||||
up using the `deploy_k8s` function from `tests/integration/kubernetes/gha-run.sh`
|
||||
as follows:
|
||||
|
||||
```bash
|
||||
$ export KUBERNETES="vanilla"
|
||||
$ export CONTAINER_ENGINE="containerd"
|
||||
$ export CONTAINER_ENGINE_VERSION="v2.1"
|
||||
$ source tests/gha-run-k8s-common.sh
|
||||
$ deploy_k8s
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> We recommend to configure your Kubelet with a higher
|
||||
> `runtimeRequestTimeout` timeout value than the two minute default timeout.
|
||||
> Using the guest-pull mechanism, pulling large images may take a significant
|
||||
> amount of time and may delay container start, possibly leading your Kubelet
|
||||
> to de-allocate your pod before it transitions from the *container created*
|
||||
> to the *container running* state.
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> The NVIDIA GPU runtime classes use VFIO cold-plug which, as
|
||||
> described above, requires the Kata runtime to query Kubelet's Pod Resources
|
||||
> API to discover allocated GPU devices during sandbox creation. For
|
||||
> Kubernetes versions **older than 1.34**, you must explicitly enable the
|
||||
> `KubeletPodResourcesGet` feature gate in your Kubelet configuration. For
|
||||
> Kubernetes 1.34 and later, this feature is enabled by default.
|
||||
|
||||
#### GPU Operator
|
||||
|
||||
Assuming you have the helm tools installed, deploy the latest version of the
|
||||
GPU Operator as a helm chart (minimum version: `v25.10.0`):
|
||||
|
||||
```bash
|
||||
$ helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && helm repo update
|
||||
$ helm install --wait --generate-name \
|
||||
-n gpu-operator --create-namespace \
|
||||
nvidia/gpu-operator \
|
||||
--set sandboxWorkloads.enabled=true \
|
||||
--set sandboxWorkloads.defaultWorkload=vm-passthrough \
|
||||
--set kataManager.enabled=true \
|
||||
--set kataManager.config.runtimeClasses=null \
|
||||
--set kataManager.repository=nvcr.io/nvidia/cloud-native \
|
||||
--set kataManager.image=k8s-kata-manager \
|
||||
--set kataManager.version=v0.2.4 \
|
||||
--set ccManager.enabled=true \
|
||||
--set ccManager.defaultMode=on \
|
||||
--set ccManager.repository=nvcr.io/nvidia/cloud-native \
|
||||
--set ccManager.image=k8s-cc-manager \
|
||||
--set ccManager.version=v0.2.0 \
|
||||
--set sandboxDevicePlugin.repository=nvcr.io/nvidia/cloud-native \
|
||||
--set sandboxDevicePlugin.image=nvidia-sandbox-device-plugin \
|
||||
--set sandboxDevicePlugin.version=v0.0.1 \
|
||||
--set 'sandboxDevicePlugin.env[0].name=P_GPU_ALIAS' \
|
||||
--set 'sandboxDevicePlugin.env[0].value=pgpu' \
|
||||
--set nfd.enabled=true \
|
||||
--set nfd.nodefeaturerules=true
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> For heterogeneous clusters with different GPU types, you can omit
|
||||
> the `P_GPU_ALIAS` environment variable lines. This will cause the sandbox
|
||||
> device plugin to create GPU model-specific resource types (e.g.,
|
||||
> `nvidia.com/GH100_H100L_94GB`) instead of the generic `nvidia.com/pgpu`,
|
||||
> which in turn can be used by pods through respective resource limits.
|
||||
> For simplicity, this guide uses the generic alias.
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> Using `--set sandboxWorkloads.defaultWorkload=vm-passthrough` causes all
|
||||
> your nodes to be labeled for GPU VM passthrough. Remove this parameter if
|
||||
> you intend to only use selected nodes for this scenario, and label these
|
||||
> nodes by hand, using:
|
||||
> `kubectl label node <node-name> nvidia.com/gpu.workload.config=vm-passthrough`.
|
||||
|
||||
#### Kata Containers
|
||||
|
||||
Install the latest Kata Containers helm chart, similar to
|
||||
[existing documentation](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/README.md)
|
||||
(minimum version: `3.24.0`).
|
||||
|
||||
```bash
|
||||
$ export VERSION=$(curl -sSL https://api.github.com/repos/kata-containers/kata-containers/releases/latest | jq .tag_name | tr -d '"')
|
||||
$ export CHART="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"
|
||||
|
||||
$ helm install kata-deploy \
|
||||
--namespace kata-system \
|
||||
--create-namespace \
|
||||
-f "https://raw.githubusercontent.com/kata-containers/kata-containers/refs/tags/${VERSION}/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-nvidia-gpu.values.yaml" \
|
||||
--set nfd.enabled=false \
|
||||
--set shims.qemu-nvidia-gpu-tdx.enabled=false \
|
||||
--wait --timeout 10m --atomic \
|
||||
"${CHART}" --version "${VERSION}"
|
||||
```
|
||||
|
||||
#### Trustee's KBS for remote attestation
|
||||
|
||||
For our Kata CI runners we use Trustee's KBS for composite attestation for
|
||||
secure key release, for instance, for test scenarios which use authenticated
|
||||
container images. In such scenarios, the credentials to access the
|
||||
authenticated container registry are only released to the confidential guest
|
||||
after successful attestation. Please see the section below for more
|
||||
information about this.
|
||||
|
||||
```bash
|
||||
$ export NVIDIA_VERIFIER_MODE="remote"
|
||||
$ export KBS_INGRESS="nodeport"
|
||||
$ bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
|
||||
$ bash tests/integration/kubernetes/gha-run.sh install-kbs-client
|
||||
```
|
||||
|
||||
Please note, that Trustee can also be deployed via any other upstream
|
||||
mechanism as documented by the
|
||||
[confidential-containers repository](https://github.com/confidential-containers/trustee).
|
||||
For our architecture it is important to set up KBS in the remote verifier
|
||||
mode which requires entering a licensing agreement with NVIDIA, see the
|
||||
[notes in confidential-containers repository](https://github.com/confidential-containers/trustee/blob/main/deps/verifier/src/nvidia/README.md).
|
||||
|
||||
### Cluster validation and preparation
|
||||
|
||||
If you did not use the `sandboxWorkloads.defaultWorkload=vm-passthrough`
|
||||
parameter during GPU operator deployment, label your nodes for GPU VM
|
||||
passthrough, for the example of using all nodes for GPU passthrough, run:
|
||||
|
||||
```bash
|
||||
$ kubectl label nodes --all nvidia.com/gpu.workload.config=vm-passthrough --overwrite
|
||||
```
|
||||
|
||||
Check if the `nvidia-cc-manager` pod is running if you intend to run GPU TEE
|
||||
scenarios. If not, you need to manually label the node as CC capable. Current
|
||||
GPU Operator node feature rules do not yet recognize all CC capable GPU PCI
|
||||
IDs. Run the following command:
|
||||
|
||||
```bash
|
||||
$ kubectl label nodes --all nvidia.com/cc.capable=true
|
||||
```
|
||||
|
||||
After this, assure the `nvidia-cc-manager` pod is running. With the suggested
|
||||
parameters for GPU Operator deployment, the `nvidia-cc-manager` will
|
||||
automatically transition the GPU into CC mode.
|
||||
|
||||
After deployment, you can transition your node(s) to the desired CC state,
|
||||
using either the `on` or `off` value, depending on your scenario. For the
|
||||
non-CC scenario, transition to the `off` state via:
|
||||
`kubectl label nodes --all nvidia.com/cc.mode=off` and wait until all pods
|
||||
are back running. When an actual change is exercised, various GPU operator
|
||||
operands will be restarted.
|
||||
|
||||
Ensure all pods are running:
|
||||
|
||||
```bash
|
||||
$ kubectl get pods -A
|
||||
```
|
||||
|
||||
On your node(s), ensure for correct driver binding. Your GPU device should be
|
||||
bound to the VFIO driver, i.e., showing `Kernel driver in use: vfio-pci`
|
||||
when running:
|
||||
|
||||
```bash
|
||||
$ lspci -nnk -d 10de:
|
||||
```
|
||||
|
||||
### Run the CUDA vectorAdd sample
|
||||
|
||||
Create the following file:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: cuda-vectoradd-kata
|
||||
namespace: default
|
||||
annotations:
|
||||
io.katacontainers.config.hypervisor.kernel_params: "nvrc.smi.srs=1"
|
||||
spec:
|
||||
runtimeClassName: ${GPU_RUNTIME_CLASS_NAME}
|
||||
restartPolicy: Never
|
||||
containers:
|
||||
- name: cuda-vectoradd
|
||||
image: "nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0-ubuntu22.04"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/pgpu: "1"
|
||||
memory: 16Gi
|
||||
```
|
||||
|
||||
Depending on your scenario and on the CC state, export your desired runtime
|
||||
class name define the environment variable:
|
||||
|
||||
```bash
|
||||
$ export GPU_RUNTIME_CLASS_NAME="kata-qemu-nvidia-gpu-snp"
|
||||
```
|
||||
|
||||
Then, deploy the sample Kubernetes pod manifest and observe the pod logs:
|
||||
|
||||
```bash
|
||||
$ envsubst < ./cuda-vectoradd-kata.yaml.in | kubectl apply -f -
|
||||
$ kubectl wait --for=condition=Ready pod/cuda-vectoradd-kata --timeout=60s
|
||||
$ kubectl logs -n default cuda-vectoradd-kata
|
||||
```
|
||||
|
||||
Expect the following output:
|
||||
|
||||
```
|
||||
[Vector addition of 50000 elements]
|
||||
Copy input data from the host memory to the CUDA device
|
||||
CUDA kernel launch with 196 blocks of 256 threads
|
||||
Copy output data from the CUDA device to the host memory
|
||||
Test PASSED
|
||||
Done
|
||||
```
|
||||
|
||||
To stop the pod, run: `kubectl delete pod cuda-vectoradd-kata`.
|
||||
|
||||
### Next steps
|
||||
|
||||
#### Transition between CC and non-CC mode
|
||||
|
||||
Use the previously described node labeling approach to transition between
|
||||
the CC and non-CC mode. In case of the non-CC mode, you can use the
|
||||
`kata-qemu-nvidia-gpu` value for the `GPU_RUNTIME_CLASS_NAME` runtime class
|
||||
variable in the above CUDA vectorAdd sample. The `kata-qemu-nvidia-gpu-snp`
|
||||
runtime class will **NOT** work in this mode - and vice versa.
|
||||
|
||||
#### Run Kata CI tests locally
|
||||
|
||||
Upstream Kata CI runs the CUDA vectorAdd test, a composite attestation test,
|
||||
and a basic NIM/RAG deployment. Running CI tests for the TEE GPU scenario
|
||||
requires KBS to be deployed (except for the CUDA vectorAdd test). The best
|
||||
place to get started running these tests locally is to look into our
|
||||
[NVIDIA CI workflow manifest](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml)
|
||||
and into the underling
|
||||
[run_kubernetes_nv_tests.sh](https://github.com/kata-containers/kata-containers/blob/main/tests/integration/kubernetes/run_kubernetes_nv_tests.sh)
|
||||
script. For example, to run the CUDA vectorAdd scenario against the TEE GPU
|
||||
runtime class use the following commands:
|
||||
|
||||
```bash
|
||||
# create the kata runtime class the test framework uses
|
||||
$ export KATA_HYPERVISOR=qemu-nvidia-gpu-snp
|
||||
$ kubectl delete runtimeclass kata --ignore-not-found
|
||||
$ kubectl get runtimeclass "kata-${KATA_HYPERVISOR}" -o json | \
|
||||
jq '.metadata.name = "kata" | del(.metadata.uid, .metadata.resourceVersion, .metadata.creationTimestamp)' | \
|
||||
kubectl apply -f -
|
||||
$ cd tests/integration/kubernetes
|
||||
$ K8S_TEST_NV="k8s-nvidia-cuda.bats" ./gha-run.sh run-nv-tests
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> The other scenarios require an NGC API key to run, i.e., to export the
|
||||
> `NGC_API_KEY` variable with a valid NGC API key.
|
||||
|
||||
#### Deploy pods using attestation
|
||||
|
||||
Attestation is a fundamental piece of the confidential containers solution.
|
||||
In our upstream CI we use attestation at the example of leveraging the
|
||||
authenticated container image pull mechanism where container images reside
|
||||
in the authenticated NVCR registry (`k8s-nvidia-nim.bats`), and for
|
||||
requesting secrets from KBS (`k8s-confidential-attestation.bats`). KBS will
|
||||
release the image pull secret to a confidential guest. To get the
|
||||
authentication credentials from inside the guest, KBS must already be
|
||||
deployed and configured. In our CI samples, we configure KBS with the guest
|
||||
image pull secret, a resource policy, and launch the pod with certain kernel
|
||||
command line parameters:
|
||||
`"agent.image_registry_auth=kbs:///default/credentials/nvcr agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"`.
|
||||
|
||||
The `agent.aa_kbc_params` option is a general configuration for attestation.
|
||||
For your use case, you need to set the IP address and port under which KBS
|
||||
is reachable through the `CC_KBS_ADDR` variable (see our CI sample). This
|
||||
tells the guest how to reach KBS. Something like this must be set whenever
|
||||
attestation is used, but on its own this parameter does not trigger
|
||||
attestation. The `agent.image_registry_auth` option tells the guest to ask
|
||||
for a resource from KBS and use it as the authentication configuration. When
|
||||
this is set, the guest will request this resource at boot (and trigger
|
||||
attestation) regardless of which image is being pulled.
|
||||
|
||||
To deploy your own pods using authenticated container images, or secure key
|
||||
release for attestation, follow steps similar to our mentioned CI samples.
|
||||
|
||||
#### Deploy pods with Kata agent security policies
|
||||
|
||||
With GPU passthrough being supported by the
|
||||
[genpolicy tool](https://github.com/kata-containers/kata-containers/tree/main/src/tools/genpolicy),
|
||||
you can use the tool to create a Kata agent security policy. Our CI deploys
|
||||
all sample pod manifests with a Kata agent security policy.
|
||||
|
||||
#### Deploy pods using your own containers and manifests
|
||||
|
||||
You can author pod manifests leveraging your own containers, for instance,
|
||||
containers built using the CUDA container toolkit. We recommend to start
|
||||
with a CUDA base container.
|
||||
|
||||
The GPU is transitioned into the `Ready` state via attestation, for instance,
|
||||
when pulling authenticated images. If your deployment scenario does not use
|
||||
attestation, please refer back to the CUDA vectorAdd pod manifest. In this
|
||||
manifest, we ensure that NVRC sets the GPU to `Ready` state by adding the
|
||||
following annotation in the manifest:
|
||||
`io.katacontainers.config.hypervisor.kernel_params: "nvrc.smi.srs=1"`
|
||||
|
||||
> **Notes:**
|
||||
>
|
||||
> - musl-based container images (e.g., using Alpine), or distro-less
|
||||
> containers are not supported.
|
||||
> - for the TEE scenario, only single-GPU passthrough per pod is supported,
|
||||
> so your pod resource limit must be: `nvidia.com/pgpu: "1"` (on a system
|
||||
> with multiple GPUs, you can thus pass through one GPU per pod).
|
||||
@@ -1,25 +1,10 @@
|
||||
# Using NVIDIA GPU device with Kata Containers
|
||||
|
||||
This page gives an overview on the different modes in which GPUs can be passed
|
||||
to a Kata Containers container, provides host system requirements, explains how
|
||||
Kata Containers guest components can be built to support the NVIDIA GPU
|
||||
scenario, and gives practical usage examples using `ctr`.
|
||||
|
||||
Please see the guide
|
||||
[Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers](NVIDIA-GPU-passthrough-and-Kata-QEMU.md)
|
||||
for a documentation of an end-to-end reference implementation of a Kata
|
||||
Containers stack for GPU passthrough using QEMU, the go-based Kata Runtime,
|
||||
and an NVIDIA-specific root filesystem. This reference implementation is built
|
||||
and validated in Kata's CI, and it can be used to test GPU workloads with Kata
|
||||
components and Kubernetes out of the box.
|
||||
|
||||
## Comparison between Passthrough and vGPU Modes
|
||||
|
||||
An NVIDIA GPU device can be passed to a Kata Containers container using GPU
|
||||
passthrough (NVIDIA GPU passthrough mode) as well as GPU mediated passthrough
|
||||
passthrough (NVIDIA GPU pass-through mode) as well as GPU mediated passthrough
|
||||
(NVIDIA `vGPU` mode).
|
||||
|
||||
NVIDIA GPU passthrough mode, an entire physical GPU is directly assigned to one
|
||||
NVIDIA GPU pass-through mode, an entire physical GPU is directly assigned to one
|
||||
VM, bypassing the NVIDIA Virtual GPU Manager. In this mode of operation, the GPU
|
||||
is accessed exclusively by the NVIDIA driver running in the VM to which it is
|
||||
assigned. The GPU is not shared among VMs.
|
||||
@@ -35,20 +20,18 @@ with [MIG-slices](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
|
||||
|
||||
| Technology | Description | Behavior | Detail |
|
||||
| --- | --- | --- | --- |
|
||||
| NVIDIA GPU passthrough mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
|
||||
| NVIDIA GPU pass-through mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
|
||||
| NVIDIA vGPU time-sliced | GPU time-sliced | Physical GPU time-sliced for multiple VMs | Mediated passthrough |
|
||||
| NVIDIA vGPU MIG-backed | GPU with MIG-slices | Physical GPU MIG-sliced for multiple VMs | Mediated passthrough |
|
||||
|
||||
## Host Requirements
|
||||
## Hardware Requirements
|
||||
|
||||
### Hardware
|
||||
|
||||
NVIDIA GPUs recommended for virtualization:
|
||||
NVIDIA GPUs Recommended for Virtualization:
|
||||
|
||||
- NVIDIA Tesla (T4, M10, P6, V100 or newer)
|
||||
- NVIDIA Quadro RTX 6000/8000
|
||||
|
||||
### Firmware
|
||||
## Host BIOS Requirements
|
||||
|
||||
Some hardware requires a larger PCI BARs window, for example, NVIDIA Tesla P100,
|
||||
K40m
|
||||
@@ -72,7 +55,9 @@ Some hardware vendors use a different name in BIOS, such as:
|
||||
If one is using a GPU based on the Ampere architecture and later additionally
|
||||
SR-IOV needs to be enabled for the `vGPU` use-case.
|
||||
|
||||
### Kernel
|
||||
The following steps outline the workflow for using an NVIDIA GPU with Kata.
|
||||
|
||||
## Host Kernel Requirements
|
||||
|
||||
The following configurations need to be enabled on your host kernel:
|
||||
|
||||
@@ -85,13 +70,7 @@ The following configurations need to be enabled on your host kernel:
|
||||
Your host kernel needs to be booted with `intel_iommu=on` on the kernel command
|
||||
line.
|
||||
|
||||
## Build the Kata Components
|
||||
|
||||
This section explains how to build an environment with Kata Containers bits
|
||||
supporting the GPU scenario. We first deploy and configure the regular Kata
|
||||
components, then describe how to build the guest kernel and root filesystem.
|
||||
|
||||
### Install and configure Kata Containers
|
||||
## Install and configure Kata Containers
|
||||
|
||||
To use non-large BARs devices (for example, NVIDIA Tesla T4), you need Kata
|
||||
version 1.3.0 or above. Follow the [Kata Containers setup
|
||||
@@ -122,7 +101,7 @@ hotplug_vfio_on_root_bus = true
|
||||
pcie_root_port = 1
|
||||
```
|
||||
|
||||
### Build guest kernel with GPU support
|
||||
## Build Kata Containers kernel with GPU support
|
||||
|
||||
The default guest kernel installed with Kata Containers does not provide GPU
|
||||
support. To use an NVIDIA GPU with Kata Containers, you need to build a kernel
|
||||
@@ -181,11 +160,11 @@ code, using `Dragonball VMM` for NVIDIA GPU `hot-plug/hot-unplug` requires apply
|
||||
addition to the above kernel configuration items. Follow these steps to build for NVIDIA GPU `hot-[un]plug`
|
||||
for `Dragonball`:
|
||||
|
||||
```sh
|
||||
# Prepare .config to support both upcall and nvidia gpu
|
||||
```sh
|
||||
# Prepare .config to support both upcall and nvidia gpu
|
||||
$ ./build-kernel.sh -v 5.10.25 -e -t dragonball -g nvidia -f setup
|
||||
|
||||
# Build guest kernel to support both upcall and nvidia gpu
|
||||
# Build guest kernel to support both upcall and nvidia gpu
|
||||
$ ./build-kernel.sh -v 5.10.25 -e -t dragonball -g nvidia build
|
||||
|
||||
# Install guest kernel to support both upcall and nvidia gpu
|
||||
@@ -217,7 +196,303 @@ Before using the new guest kernel, please update the `kernel` parameters in
|
||||
kernel = "/usr/share/kata-containers/vmlinuz-nvidia-gpu.container"
|
||||
```
|
||||
|
||||
### Build Guest OS with NVIDIA Driver and Toolkit
|
||||
## NVIDIA GPU pass-through mode with Kata Containers
|
||||
|
||||
Use the following steps to pass an NVIDIA GPU device in pass-through mode with Kata:
|
||||
|
||||
1. Find the Bus-Device-Function (BDF) for the GPU device on the host:
|
||||
|
||||
```sh
|
||||
$ sudo lspci -nn -D | grep -i nvidia
|
||||
0000:d0:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:20b9] (rev a1)
|
||||
```
|
||||
|
||||
> PCI address `0000:d0:00.0` is assigned to the hardware GPU device.
|
||||
> `10de:20b9` is the device ID of the hardware GPU device.
|
||||
|
||||
2. Find the IOMMU group for the GPU device:
|
||||
|
||||
```sh
|
||||
$ BDF="0000:d0:00.0"
|
||||
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
|
||||
```
|
||||
|
||||
The previous output shows that the GPU belongs to IOMMU group 192. The next
|
||||
step is to bind the GPU to the VFIO-PCI driver.
|
||||
|
||||
```sh
|
||||
$ BDF="0000:d0:00.0"
|
||||
$ DEV="/sys/bus/pci/devices/$BDF"
|
||||
$ echo "vfio-pci" > $DEV/driver_override
|
||||
$ echo $BDF > $DEV/driver/unbind
|
||||
$ echo $BDF > /sys/bus/pci/drivers_probe
|
||||
# To return the device to the standard driver, we simply clear the
|
||||
# driver_override and reprobe the device, ex:
|
||||
$ echo > $DEV/preferred_driver
|
||||
$ echo $BDF > $DEV/driver/unbind
|
||||
$ echo $BDF > /sys/bus/pci/drivers_probe
|
||||
```
|
||||
|
||||
3. Check the IOMMU group number under `/dev/vfio`:
|
||||
|
||||
```sh
|
||||
$ ls -l /dev/vfio
|
||||
total 0
|
||||
crw------- 1 zvonkok zvonkok 243, 0 Mar 18 03:06 192
|
||||
crw-rw-rw- 1 root root 10, 196 Mar 18 02:27 vfio
|
||||
```
|
||||
|
||||
4. Start a Kata container with the GPU device:
|
||||
|
||||
```sh
|
||||
# You may need to `modprobe vhost-vsock` if you get
|
||||
# host system doesn't support vsock: stat /dev/vhost-vsock
|
||||
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch uname -r
|
||||
```
|
||||
|
||||
5. Run `lspci` within the container to verify the GPU device is seen in the list
|
||||
of the PCI devices. Note the vendor-device id of the GPU (`10de:20b9`) in the `lspci` output.
|
||||
|
||||
```sh
|
||||
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -nn | grep '10de:20b9'"
|
||||
```
|
||||
|
||||
6. Additionally, you can check the PCI BARs space of the NVIDIA GPU device in the container:
|
||||
|
||||
```sh
|
||||
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -s 02:00.0 -vv | grep Region"
|
||||
```
|
||||
|
||||
> **Note**: If you see a message similar to the above, the BAR space of the NVIDIA
|
||||
> GPU has been successfully allocated.
|
||||
|
||||
## NVIDIA vGPU mode with Kata Containers
|
||||
|
||||
NVIDIA vGPU is a licensed product on all supported GPU boards. A software license
|
||||
is required to enable all vGPU features within the guest VM. NVIDIA vGPU manager
|
||||
needs to be installed on the host to configure GPUs in vGPU mode. See [NVIDIA Virtual GPU Software Documentation v14.0 through 14.1](https://docs.nvidia.com/grid/14.0/) for more details.
|
||||
|
||||
### NVIDIA vGPU time-sliced
|
||||
|
||||
In the time-sliced mode, the GPU is not partitioned and the workload uses the
|
||||
whole GPU and shares access to the GPU engines. Processes are scheduled in
|
||||
series. The best effort scheduler is the default one and can be exchanged by
|
||||
other scheduling policies see the documentation above how to do that.
|
||||
|
||||
Beware if you had `MIG` enabled before to disable `MIG` on the GPU if you want
|
||||
to use `time-sliced` `vGPU`.
|
||||
|
||||
```sh
|
||||
$ sudo nvidia-smi -mig 0
|
||||
```
|
||||
|
||||
Enable the virtual functions for the physical GPU in the `sysfs` file system.
|
||||
|
||||
```sh
|
||||
$ sudo /usr/lib/nvidia/sriov-manage -e 0000:41:00.0
|
||||
```
|
||||
|
||||
Get the `BDF` of the available virtual function on the GPU, and choose one for the
|
||||
following steps.
|
||||
|
||||
```sh
|
||||
$ cd /sys/bus/pci/devices/0000:41:00.0/
|
||||
$ ls -l | grep virtfn
|
||||
```
|
||||
|
||||
#### List all available vGPU instances
|
||||
|
||||
The following shell snippet will walk the `sysfs` and only print instances
|
||||
that are available, that can be created.
|
||||
|
||||
```sh
|
||||
# The 00.0 is often the PF of the device the VFs will have the funciont in the
|
||||
# BDF incremented by some values so e.g. the very first VF is 0000:41:00.4
|
||||
|
||||
cd /sys/bus/pci/devices/0000:41:00.0/
|
||||
|
||||
for vf in $(ls -d virtfn*)
|
||||
do
|
||||
BDF=$(basename $(readlink -f $vf))
|
||||
for md in $(ls -d $vf/mdev_supported_types/*)
|
||||
do
|
||||
AVAIL=$(cat $md/available_instances)
|
||||
NAME=$(cat $md/name)
|
||||
DIR=$(basename $md)
|
||||
|
||||
if [ $AVAIL -gt 0 ]; then
|
||||
echo "| BDF | INSTANCES | NAME | DIR |"
|
||||
echo "+--------------+-----------+----------------+------------+"
|
||||
printf "| %12s |%10d |%15s | %10s |\n\n" "$BDF" "$AVAIL" "$NAME" "$DIR"
|
||||
fi
|
||||
|
||||
done
|
||||
done
|
||||
```
|
||||
|
||||
If there are available instances you get something like this (for the first VF),
|
||||
beware that the output is highly dependent on the GPU you have, if there is no
|
||||
output check again if `MIG` is really disabled.
|
||||
|
||||
```sh
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-4C | nvidia-692 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-8C | nvidia-693 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-10C | nvidia-694 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-16C | nvidia-695 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-20C | nvidia-696 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-40C | nvidia-697 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-80C | nvidia-698 |
|
||||
|
||||
```
|
||||
|
||||
Change to the `mdev_supported_types` directory for the virtual function on which
|
||||
you want to create the `vGPU`. Taking the first output as an example:
|
||||
|
||||
```sh
|
||||
$ cd virtfn0/mdev_supported_types/nvidia-692
|
||||
$ UUIDGEN=$(uuidgen)
|
||||
$ sudo bash -c "echo $UUIDGEN > create"
|
||||
```
|
||||
|
||||
Confirm that the `vGPU` was created. You should see the `UUID` pointing to a
|
||||
subdirectory of the `sysfs` space.
|
||||
|
||||
```sh
|
||||
$ ls -l /sys/bus/mdev/devices/
|
||||
```
|
||||
|
||||
Get the `IOMMU` group number and verify there is a `VFIO` device created to use
|
||||
with Kata.
|
||||
|
||||
```sh
|
||||
$ ls -l /sys/bus/mdev/devices/*/
|
||||
$ ls -l /dev/vfio
|
||||
```
|
||||
|
||||
Use the `VFIO` device created in the same way as in the pass-through use-case.
|
||||
Beware that the guest needs the NVIDIA guest drivers, so one would need to build
|
||||
a new guest `OS` image.
|
||||
|
||||
### NVIDIA vGPU MIG-backed
|
||||
|
||||
We're not going into detail what `MIG` is but briefly it is a technology to
|
||||
partition the hardware into independent instances with guaranteed quality of
|
||||
service. For more details see [NVIDIA Multi-Instance GPU User Guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
|
||||
|
||||
First enable `MIG` mode for a GPU, depending on the platform you're running
|
||||
a reboot would be necessary. Some platforms support GPU reset.
|
||||
|
||||
```sh
|
||||
$ sudo nvidia-smi -mig 1
|
||||
```
|
||||
|
||||
If the platform supports a GPU reset one can run, otherwise you will get a
|
||||
warning to reboot the server.
|
||||
|
||||
```sh
|
||||
$ sudo nvidia-smi --gpu-reset
|
||||
```
|
||||
|
||||
The driver per default provides a number of profiles that users can opt-in when
|
||||
configuring the MIG feature.
|
||||
|
||||
```sh
|
||||
$ sudo nvidia-smi mig -lgip
|
||||
+-----------------------------------------------------------------------------+
|
||||
| GPU instance profiles: |
|
||||
| GPU Name ID Instances Memory P2P SM DEC ENC |
|
||||
| Free/Total GiB CE JPEG OFA |
|
||||
|=============================================================================|
|
||||
| 0 MIG 1g.10gb 19 7/7 9.50 No 14 0 0 |
|
||||
| 1 0 0 |
|
||||
+-----------------------------------------------------------------------------+
|
||||
| 0 MIG 1g.10gb+me 20 1/1 9.50 No 14 1 0 |
|
||||
| 1 1 1 |
|
||||
+-----------------------------------------------------------------------------+
|
||||
| 0 MIG 2g.20gb 14 3/3 19.50 No 28 1 0 |
|
||||
| 2 0 0 |
|
||||
+-----------------------------------------------------------------------------+
|
||||
...
|
||||
```
|
||||
|
||||
Create the GPU instances that correspond to the `vGPU` types of the `MIG-backed`
|
||||
`vGPUs` that you will create [NVIDIA A100 PCIe 80GB Virtual GPU Types](https://docs.nvidia.com/grid/13.0/grid-vgpu-user-guide/index.html#vgpu-types-nvidia-a100-pcie-80gb).
|
||||
|
||||
```sh
|
||||
# MIG 1g.10gb --> vGPU A100D-1-10C
|
||||
$ sudo nvidia-smi mig -cgi 19
|
||||
```
|
||||
|
||||
List the GPU instances and get the GPU instance id to create the compute
|
||||
instance.
|
||||
|
||||
```sh
|
||||
$ sudo nvidia-smi mig -lgi # list the created GPU instances
|
||||
$ sudo nvidia-smi mig -cci -gi 9 # each GPU instance can have several compute
|
||||
# instances. Instance -> Workload
|
||||
```
|
||||
|
||||
Verify that the compute instances were created within the GPU instance
|
||||
|
||||
```sh
|
||||
$ nvidia-smi
|
||||
... snip ...
|
||||
+-----------------------------------------------------------------------------+
|
||||
| MIG devices: |
|
||||
+------------------+----------------------+-----------+-----------------------+
|
||||
| GPU GI CI MIG | Memory-Usage | Vol| Shared |
|
||||
| ID ID Dev | BAR1-Usage | SM Unc| CE ENC DEC OFA JPG|
|
||||
| | | ECC| |
|
||||
|==================+======================+===========+=======================|
|
||||
| 0 9 0 0 | 0MiB / 9728MiB | 14 0 | 1 0 0 0 0 |
|
||||
| | 0MiB / 4095MiB | | |
|
||||
+------------------+----------------------+-----------+-----------------------+
|
||||
... snip ...
|
||||
```
|
||||
|
||||
We can use the [snippet](#list-all-available-vgpu-instances) from before to list
|
||||
the available `vGPU` instances, this time `MIG-backed`.
|
||||
|
||||
```sh
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 |GRID A100D-1-10C | nvidia-699 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.5 | 1 |GRID A100D-1-10C | nvidia-699 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:01.6 | 1 |GRID A100D-1-10C | nvidia-699 |
|
||||
... snip ...
|
||||
```
|
||||
|
||||
Repeat the steps after the [snippet](#list-all-available-vgpu-instances) listing
|
||||
to create the corresponding `mdev` device and use the guest `OS` created in the
|
||||
previous section with `time-sliced` `vGPUs`.
|
||||
|
||||
## Install NVIDIA Driver + Toolkit in Kata Containers Guest OS
|
||||
|
||||
Consult the [Developer-Guide](https://github.com/kata-containers/kata-containers/blob/main/docs/Developer-Guide.md#create-a-rootfs-image) on how to create a
|
||||
rootfs base image for a distribution of your choice. This is going to be used as
|
||||
@@ -308,12 +583,9 @@ Enable the `guest_hook_path` in Kata's `configuration.toml`
|
||||
guest_hook_path = "/usr/share/oci/hooks"
|
||||
```
|
||||
|
||||
As the last step one can remove the additional packages and files that were added
|
||||
to the `$ROOTFS_DIR` to keep it as small as possible.
|
||||
|
||||
One has built a NVIDIA rootfs, kernel and now we can run any GPU container
|
||||
without installing the drivers into the container. Check NVIDIA device status
|
||||
with `nvidia-smi`:
|
||||
with `nvidia-smi`
|
||||
|
||||
```sh
|
||||
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/nvidia/cuda:11.6.0-base-ubuntu20.04" cuda nvidia-smi
|
||||
@@ -339,309 +611,8 @@ Fri Mar 18 10:36:59 2022
|
||||
+-----------------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
## Usage Examples with Kata Containers
|
||||
|
||||
The following sections give usage examples for this based on the different modes.
|
||||
|
||||
### NVIDIA GPU passthrough mode
|
||||
|
||||
Use the following steps to pass an NVIDIA GPU device in passthrough mode with Kata:
|
||||
|
||||
1. Find the Bus-Device-Function (BDF) for the GPU device on the host:
|
||||
|
||||
```sh
|
||||
$ sudo lspci -nn -D | grep -i nvidia
|
||||
0000:d0:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:20b9] (rev a1)
|
||||
```
|
||||
|
||||
> PCI address `0000:d0:00.0` is assigned to the hardware GPU device.
|
||||
> `10de:20b9` is the device ID of the hardware GPU device.
|
||||
|
||||
2. Find the IOMMU group for the GPU device:
|
||||
|
||||
```sh
|
||||
$ BDF="0000:d0:00.0"
|
||||
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
|
||||
```
|
||||
|
||||
The previous output shows that the GPU belongs to IOMMU group 192. The next
|
||||
step is to bind the GPU to the VFIO-PCI driver.
|
||||
|
||||
```sh
|
||||
$ BDF="0000:d0:00.0"
|
||||
$ DEV="/sys/bus/pci/devices/$BDF"
|
||||
$ echo "vfio-pci" > $DEV/driver_override
|
||||
$ echo $BDF > $DEV/driver/unbind
|
||||
$ echo $BDF > /sys/bus/pci/drivers_probe
|
||||
# To return the device to the standard driver, we simply clear the
|
||||
# driver_override and reprobe the device, ex:
|
||||
$ echo > $DEV/preferred_driver
|
||||
$ echo $BDF > $DEV/driver/unbind
|
||||
$ echo $BDF > /sys/bus/pci/drivers_probe
|
||||
```
|
||||
|
||||
3. Check the IOMMU group number under `/dev/vfio`:
|
||||
|
||||
```sh
|
||||
$ ls -l /dev/vfio
|
||||
total 0
|
||||
crw------- 1 zvonkok zvonkok 243, 0 Mar 18 03:06 192
|
||||
crw-rw-rw- 1 root root 10, 196 Mar 18 02:27 vfio
|
||||
```
|
||||
|
||||
4. Start a Kata container with the GPU device:
|
||||
|
||||
```sh
|
||||
# You may need to `modprobe vhost-vsock` if you get
|
||||
# host system doesn't support vsock: stat /dev/vhost-vsock
|
||||
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch uname -r
|
||||
```
|
||||
|
||||
5. Run `lspci` within the container to verify the GPU device is seen in the list
|
||||
of the PCI devices. Note the vendor-device id of the GPU (`10de:20b9`) in the `lspci` output.
|
||||
|
||||
```sh
|
||||
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -nn | grep '10de:20b9'"
|
||||
```
|
||||
|
||||
6. Additionally, you can check the PCI BARs space of the NVIDIA GPU device in the container:
|
||||
|
||||
```sh
|
||||
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -s 02:00.0 -vv | grep Region"
|
||||
```
|
||||
|
||||
> **Note**: If you see a message similar to the above, the BAR space of the NVIDIA
|
||||
> GPU has been successfully allocated.
|
||||
|
||||
### NVIDIA vGPU mode
|
||||
|
||||
NVIDIA vGPU is a licensed product on all supported GPU boards. A software license
|
||||
is required to enable all vGPU features within the guest VM. NVIDIA vGPU manager
|
||||
needs to be installed on the host to configure GPUs in vGPU mode. See
|
||||
[NVIDIA Virtual GPU Software Documentation v14.0 through 14.1](https://docs.nvidia.com/grid/14.0/)
|
||||
for more details.
|
||||
|
||||
#### NVIDIA vGPU time-sliced
|
||||
|
||||
In the time-sliced mode, the GPU is not partitioned and the workload uses the
|
||||
whole GPU and shares access to the GPU engines. Processes are scheduled in
|
||||
series. The best effort scheduler is the default one and can be exchanged by
|
||||
other scheduling policies see the documentation above how to do that.
|
||||
|
||||
Beware if you had `MIG` enabled before to disable `MIG` on the GPU if you want
|
||||
to use `time-sliced` `vGPU`.
|
||||
|
||||
```sh
|
||||
$ sudo nvidia-smi -mig 0
|
||||
```
|
||||
|
||||
Enable the virtual functions for the physical GPU in the `sysfs` file system.
|
||||
|
||||
```sh
|
||||
$ sudo /usr/lib/nvidia/sriov-manage -e 0000:41:00.0
|
||||
```
|
||||
|
||||
Get the `BDF` of the available virtual function on the GPU, and choose one for the
|
||||
following steps.
|
||||
|
||||
```sh
|
||||
$ cd /sys/bus/pci/devices/0000:41:00.0/
|
||||
$ ls -l | grep virtfn
|
||||
```
|
||||
|
||||
##### List all available vGPU instances
|
||||
|
||||
The following shell snippet will walk the `sysfs` and only print instances
|
||||
that are available, that can be created.
|
||||
|
||||
```sh
|
||||
# The 00.0 is often the PF of the device. The VFs will have the function in the
|
||||
# BDF incremented by some values so e.g. the very first VF is 0000:41:00.4
|
||||
|
||||
cd /sys/bus/pci/devices/0000:41:00.0/
|
||||
|
||||
for vf in $(ls -d virtfn*)
|
||||
do
|
||||
BDF=$(basename $(readlink -f $vf))
|
||||
for md in $(ls -d $vf/mdev_supported_types/*)
|
||||
do
|
||||
AVAIL=$(cat $md/available_instances)
|
||||
NAME=$(cat $md/name)
|
||||
DIR=$(basename $md)
|
||||
|
||||
if [ $AVAIL -gt 0 ]; then
|
||||
echo "| BDF | INSTANCES | NAME | DIR |"
|
||||
echo "+--------------+-----------+----------------+------------+"
|
||||
printf "| %12s |%10d |%15s | %10s |\n\n" "$BDF" "$AVAIL" "$NAME" "$DIR"
|
||||
fi
|
||||
|
||||
done
|
||||
done
|
||||
```
|
||||
|
||||
If there are available instances you get something like this (for the first VF),
|
||||
beware that the output is highly dependent on the GPU you have, if there is no
|
||||
output check again if `MIG` is really disabled.
|
||||
|
||||
```sh
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-4C | nvidia-692 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-8C | nvidia-693 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-10C | nvidia-694 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-16C | nvidia-695 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-20C | nvidia-696 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-40C | nvidia-697 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 | GRID A100D-80C | nvidia-698 |
|
||||
|
||||
```
|
||||
|
||||
Change to the `mdev_supported_types` directory for the virtual function on which
|
||||
you want to create the `vGPU`. Taking the first output as an example:
|
||||
|
||||
```sh
|
||||
$ cd virtfn0/mdev_supported_types/nvidia-692
|
||||
$ UUIDGEN=$(uuidgen)
|
||||
$ sudo bash -c "echo $UUIDGEN > create"
|
||||
```
|
||||
|
||||
Confirm that the `vGPU` was created. You should see the `UUID` pointing to a
|
||||
subdirectory of the `sysfs` space.
|
||||
|
||||
```sh
|
||||
$ ls -l /sys/bus/mdev/devices/
|
||||
```
|
||||
|
||||
Get the `IOMMU` group number and verify there is a `VFIO` device created to use
|
||||
with Kata.
|
||||
|
||||
```sh
|
||||
$ ls -l /sys/bus/mdev/devices/*/
|
||||
$ ls -l /dev/vfio
|
||||
```
|
||||
|
||||
Use the `VFIO` device created in the same way as in the passthrough use-case.
|
||||
Beware that the guest needs the NVIDIA guest drivers, so one would need to build
|
||||
a new guest `OS` image.
|
||||
|
||||
#### NVIDIA vGPU MIG-backed
|
||||
|
||||
We're not going into detail what `MIG` is but briefly it is a technology to
|
||||
partition the hardware into independent instances with guaranteed quality of
|
||||
service. For more details see
|
||||
[NVIDIA Multi-Instance GPU User Guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
|
||||
|
||||
First enable `MIG` mode for a GPU, depending on the platform you're running
|
||||
a reboot would be necessary. Some platforms support GPU reset.
|
||||
|
||||
```sh
|
||||
$ sudo nvidia-smi -mig 1
|
||||
```
|
||||
|
||||
If the platform supports a GPU reset one can run, otherwise you will get a
|
||||
warning to reboot the server.
|
||||
|
||||
```sh
|
||||
$ sudo nvidia-smi --gpu-reset
|
||||
```
|
||||
|
||||
The driver per default provides a number of profiles that users can opt-in when
|
||||
configuring the MIG feature.
|
||||
|
||||
```sh
|
||||
$ sudo nvidia-smi mig -lgip
|
||||
+-----------------------------------------------------------------------------+
|
||||
| GPU instance profiles: |
|
||||
| GPU Name ID Instances Memory P2P SM DEC ENC |
|
||||
| Free/Total GiB CE JPEG OFA |
|
||||
|=============================================================================|
|
||||
| 0 MIG 1g.10gb 19 7/7 9.50 No 14 0 0 |
|
||||
| 1 0 0 |
|
||||
+-----------------------------------------------------------------------------+
|
||||
| 0 MIG 1g.10gb+me 20 1/1 9.50 No 14 1 0 |
|
||||
| 1 1 1 |
|
||||
+-----------------------------------------------------------------------------+
|
||||
| 0 MIG 2g.20gb 14 3/3 19.50 No 28 1 0 |
|
||||
| 2 0 0 |
|
||||
+-----------------------------------------------------------------------------+
|
||||
...
|
||||
```
|
||||
|
||||
Create the GPU instances that correspond to the `vGPU` types of the `MIG-backed`
|
||||
`vGPUs` that you will create
|
||||
[NVIDIA A100 PCIe 80GB Virtual GPU Types](https://docs.nvidia.com/grid/13.0/grid-vgpu-user-guide/index.html#vgpu-types-nvidia-a100-pcie-80gb).
|
||||
|
||||
```sh
|
||||
# MIG 1g.10gb --> vGPU A100D-1-10C
|
||||
$ sudo nvidia-smi mig -cgi 19
|
||||
```
|
||||
|
||||
List the GPU instances and get the GPU instance id to create the compute
|
||||
instance.
|
||||
|
||||
```sh
|
||||
$ sudo nvidia-smi mig -lgi # list the created GPU instances
|
||||
$ sudo nvidia-smi mig -cci -gi 9 # each GPU instance can have several compute
|
||||
# instances. Instance -> Workload
|
||||
```
|
||||
|
||||
Verify that the compute instances were created within the GPU instance
|
||||
|
||||
```sh
|
||||
$ nvidia-smi
|
||||
... snip ...
|
||||
+-----------------------------------------------------------------------------+
|
||||
| MIG devices: |
|
||||
+------------------+----------------------+-----------+-----------------------+
|
||||
| GPU GI CI MIG | Memory-Usage | Vol| Shared |
|
||||
| ID ID Dev | BAR1-Usage | SM Unc| CE ENC DEC OFA JPG|
|
||||
| | | ECC| |
|
||||
|==================+======================+===========+=======================|
|
||||
| 0 9 0 0 | 0MiB / 9728MiB | 14 0 | 1 0 0 0 0 |
|
||||
| | 0MiB / 4095MiB | | |
|
||||
+------------------+----------------------+-----------+-----------------------+
|
||||
... snip ...
|
||||
```
|
||||
|
||||
We can use the [snippet](#list-all-available-vgpu-instances) from before to list
|
||||
the available `vGPU` instances, this time `MIG-backed`.
|
||||
|
||||
```sh
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.4 | 1 |GRID A100D-1-10C | nvidia-699 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:00.5 | 1 |GRID A100D-1-10C | nvidia-699 |
|
||||
|
||||
| BDF | INSTANCES | NAME | DIR |
|
||||
+--------------+-----------+----------------+------------+
|
||||
| 0000:41:01.6 | 1 |GRID A100D-1-10C | nvidia-699 |
|
||||
... snip ...
|
||||
```
|
||||
|
||||
Repeat the steps after the [snippet](#list-all-available-vgpu-instances) listing
|
||||
to create the corresponding `mdev` device and use the guest `OS` created in the
|
||||
previous section with `time-sliced` `vGPUs`.
|
||||
As the last step one can remove the additional packages and files that were added
|
||||
to the `$ROOTFS_DIR` to keep it as small as possible.
|
||||
|
||||
## References
|
||||
|
||||
|
||||
@@ -1,20 +1,24 @@
|
||||
# Table of Contents
|
||||
|
||||
**Note:**: This guide used to contain an end-to-end flow to build a
|
||||
custom Kata containers root filesystem with QAT out-of-tree SR-IOV virtual
|
||||
function driver and run QAT enabled containers. The former is no longer necessary
|
||||
so the instructions are dropped. If the use-case is still of interest, please file
|
||||
an issue in either of the QAT Kubernetes specific repos linked below.
|
||||
|
||||
# Introduction
|
||||
|
||||
Intel® QuickAssist Technology (QAT) provides hardware acceleration
|
||||
for security (cryptography) and compression. Kata Containers can enable
|
||||
these acceleration functions for containers using QAT SR-IOV with the
|
||||
support from [Intel QAT Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)
|
||||
or [Intel QAT DRA Resource Driver for Kubernetes](https://github.com/intel/intel-resource-drivers-for-kubernetes).
|
||||
for security (cryptography) and compression. These instructions cover the
|
||||
steps for the latest [Ubuntu LTS release](https://ubuntu.com/download/desktop)
|
||||
which already include the QAT host driver. These instructions can be adapted to
|
||||
any Linux distribution. These instructions guide the user on how to download
|
||||
the kernel sources, compile kernel driver modules against those sources, and
|
||||
load them onto the host as well as preparing a specially built Kata Containers
|
||||
kernel and custom Kata Containers rootfs.
|
||||
|
||||
## More Information
|
||||
* Download kernel sources
|
||||
* Compile Kata kernel
|
||||
* Compile kernel driver modules against those sources
|
||||
* Download rootfs
|
||||
* Add driver modules to rootfs
|
||||
* Build rootfs image
|
||||
|
||||
## Helpful Links before starting
|
||||
|
||||
[Intel® QuickAssist Technology at `01.org`](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html)
|
||||
|
||||
@@ -22,6 +26,554 @@ or [Intel QAT DRA Resource Driver for Kubernetes](https://github.com/intel/intel
|
||||
|
||||
[Intel Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)
|
||||
|
||||
[Intel DRA Resource Driver for Kubernetes](https://github.com/intel/intel-resource-drivers-for-kubernetes)
|
||||
|
||||
[Intel® QuickAssist Technology for Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)
|
||||
|
||||
## Steps to enable Intel® QAT in Kata Containers
|
||||
|
||||
There are some steps to complete only once, some steps to complete with every
|
||||
reboot, and some steps to complete when the host kernel changes.
|
||||
|
||||
## Script variables
|
||||
|
||||
The following list of variables must be set before running through the
|
||||
scripts. These variables refer to locations to store modules and configuration
|
||||
files on the host and links to the drivers to use. Modify these as
|
||||
needed to point to updated drivers or different install locations.
|
||||
|
||||
### Set environment variables (Every Reboot)
|
||||
|
||||
Make sure to check [`01.org`](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html) for
|
||||
the latest driver.
|
||||
|
||||
```bash
|
||||
$ export QAT_DRIVER_VER=qat1.7.l.4.14.0-00031.tar.gz
|
||||
$ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER}
|
||||
$ export QAT_CONF_LOCATION=~/QAT_conf
|
||||
$ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/main/demo/openssl-qat-engine/Dockerfile
|
||||
$ export QAT_SRC=~/src/QAT
|
||||
$ export GOPATH=~/src/go
|
||||
$ export KATA_KERNEL_LOCATION=~/kata
|
||||
$ export KATA_ROOTFS_LOCATION=~/kata
|
||||
```
|
||||
|
||||
## Prepare the Ubuntu Host
|
||||
|
||||
The host could be a bare metal instance or a virtual machine. If using a
|
||||
virtual machine, make sure that KVM nesting is enabled. The following
|
||||
instructions reference an Intel® C62X chipset. Some of the instructions must be
|
||||
modified if using a different Intel® QAT device. The Intel® QAT chipset can be
|
||||
identified by executing the following.
|
||||
|
||||
### Identify which PCI Bus the Intel® QAT card is on
|
||||
|
||||
```bash
|
||||
$ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
|
||||
```
|
||||
|
||||
### Install necessary packages for Ubuntu
|
||||
|
||||
These packages are necessary to compile the Kata kernel, Intel® QAT driver, and to
|
||||
prepare the rootfs for Kata. [Docker](https://docs.docker.com/engine/install/ubuntu/)
|
||||
also needs to be installed to be able to build the rootfs. To test that
|
||||
everything works a Kubernetes pod is started requesting Intel® QAT resources. For the
|
||||
pass through of the virtual functions the kernel boot parameter needs to have
|
||||
`INTEL_IOMMU=on`.
|
||||
|
||||
```bash
|
||||
$ sudo apt update
|
||||
$ sudo apt install -y golang-go build-essential python pkg-config zlib1g-dev libudev-dev bison libelf-dev flex libtool automake autotools-dev autoconf bc libpixman-1-dev coreutils libssl-dev
|
||||
$ sudo sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=""/GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"/' /etc/default/grub
|
||||
$ sudo update-grub
|
||||
$ sudo reboot
|
||||
```
|
||||
|
||||
### Download Intel® QAT drivers
|
||||
|
||||
This will download the [Intel® QAT drivers](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html).
|
||||
Make sure to check the website for the latest version.
|
||||
|
||||
```bash
|
||||
$ mkdir -p $QAT_SRC
|
||||
$ cd $QAT_SRC
|
||||
$ curl -L $QAT_DRIVER_URL | tar zx
|
||||
```
|
||||
|
||||
### Copy Intel® QAT configuration files and enable virtual functions
|
||||
|
||||
Modify the instructions below as necessary if using a different Intel® QAT hardware
|
||||
platform. You can learn more about customizing configuration files at the
|
||||
[Intel® QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
|
||||
This section starts from a base config file and changes the `SSL` section to
|
||||
`SHIM` to support the OpenSSL engine. There are more tweaks that you can make
|
||||
depending on the use case and how many Intel® QAT engines should be run. You
|
||||
can find more information about how to customize in the
|
||||
[Intel® QuickAssist Technology Software for Linux* - Programmer's Guide.](https://www.intel.com/content/www/us/en/content-details/709196/intel-quickassist-technology-api-programmer-s-guide.html)
|
||||
|
||||
> **Note: This section assumes that a Intel® QAT `c6xx` platform is used.**
|
||||
|
||||
```bash
|
||||
$ mkdir -p $QAT_CONF_LOCATION
|
||||
$ cp $QAT_SRC/quickassist/utilities/adf_ctl/conf_files/c6xxvf_dev0.conf.vm $QAT_CONF_LOCATION/c6xxvf_dev0.conf
|
||||
$ sed -i 's/\[SSL\]/\[SHIM\]/g' $QAT_CONF_LOCATION/c6xxvf_dev0.conf
|
||||
```
|
||||
|
||||
### Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)
|
||||
|
||||
To enable virtual functions, the host OS should have IOMMU groups enabled. In
|
||||
the UEFI Firmware Intel® Virtualization Technology for Directed I/O
|
||||
(Intel® VT-d) must be enabled. Also, the kernel boot parameter should be
|
||||
`intel_iommu=on` or `intel_iommu=ifgx_off`. This should have been set from
|
||||
the instructions above. Check the output of `/proc/cmdline` to confirm. The
|
||||
following commands assume you installed an Intel® QAT card, IOMMU is on, and
|
||||
VT-d is enabled. The vendor and device ID add to the `VFIO-PCI` driver so that
|
||||
each exposed virtual function can be bound to the `VFIO-PCI` driver. Once
|
||||
complete, each virtual function passes into a Kata Containers container using
|
||||
the PCIe device passthrough feature. For Kubernetes, the
|
||||
[Intel device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes)
|
||||
for Kubernetes handles the binding of the driver, but the VF’s still must be
|
||||
enabled.
|
||||
|
||||
```bash
|
||||
$ sudo modprobe vfio-pci
|
||||
$ QAT_PCI_BUS_PF_NUMBERS=$((lspci -d :435 && lspci -d :37c8 && lspci -d :19e2 && lspci -d :6f54) | cut -d ' ' -f 1)
|
||||
$ QAT_PCI_BUS_PF_1=$(echo $QAT_PCI_BUS_PF_NUMBERS | cut -d ' ' -f 1)
|
||||
$ echo 16 | sudo tee /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/sriov_numvfs
|
||||
$ QAT_PCI_ID_VF=$(cat /sys/bus/pci/devices/0000:${QAT_PCI_BUS_PF_1}/virtfn0/uevent | grep PCI_ID)
|
||||
$ QAT_VENDOR_AND_ID_VF=$(echo ${QAT_PCI_ID_VF/PCI_ID=} | sed 's/:/ /')
|
||||
$ echo $QAT_VENDOR_AND_ID_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/new_id
|
||||
```
|
||||
|
||||
Loop through all the virtual functions and bind to the VFIO driver
|
||||
|
||||
```bash
|
||||
$ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
|
||||
do QAT_PCI_BUS_VF=$(basename $(readlink $f))
|
||||
echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/c6xxvf/unbind
|
||||
echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/bind
|
||||
done
|
||||
```
|
||||
|
||||
### Check Intel® QAT virtual functions are enabled
|
||||
|
||||
If the following command returns empty, then the virtual functions are not
|
||||
properly enabled. This command checks the enumerated device IDs for just the
|
||||
virtual functions. Using the Intel® QAT as an example, the physical device ID
|
||||
is `37c8` and virtual function device ID is `37c9`. The following command checks
|
||||
if VF's are enabled for any of the currently known Intel® QAT device ID's. The
|
||||
following `ls` command should show the 16 VF's bound to `VFIO-PCI`.
|
||||
|
||||
```bash
|
||||
$ for i in 0442 0443 37c9 19e3; do lspci -d 8086:$i; done
|
||||
```
|
||||
|
||||
Another way to check is to see what PCI devices that `VFIO-PCI` is mapped to.
|
||||
It should match the device ID's of the VF's.
|
||||
|
||||
```bash
|
||||
$ ls -la /sys/bus/pci/drivers/vfio-pci
|
||||
```
|
||||
|
||||
## Prepare Kata Containers
|
||||
|
||||
### Download Kata kernel Source
|
||||
|
||||
This example automatically uses the latest Kata kernel supported by Kata. It
|
||||
follows the instructions from the
|
||||
[packaging kernel repository](../../tools/packaging/kernel)
|
||||
and uses the latest Kata kernel
|
||||
[config](../../tools/packaging/kernel/configs).
|
||||
There are some patches that must be installed as well, which the
|
||||
`build-kernel.sh` script should automatically apply. If you are using a
|
||||
different kernel version, then you might need to manually apply them. Since
|
||||
the Kata Containers kernel has a minimal set of kernel flags set, you must
|
||||
create a Intel® QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
|
||||
Update the config to set some of the `CRYPTO` flags to enabled. This might
|
||||
change with different kernel versions. The following instructions were tested
|
||||
with kernel `v5.4.0-64-generic`.
|
||||
|
||||
```bash
|
||||
$ mkdir -p $GOPATH
|
||||
$ cd $GOPATH
|
||||
$ go get -v github.com/kata-containers/kata-containers
|
||||
$ cat << EOF > $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/configs/fragments/common/qat.conf
|
||||
CONFIG_PCIEAER=y
|
||||
CONFIG_UIO=y
|
||||
CONFIG_CRYPTO_HW=y
|
||||
CONFIG_CRYPTO_DEV_QAT_C62XVF=m
|
||||
CONFIG_CRYPTO_CBC=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_SIG=y
|
||||
CONFIG_CRYPTO_AUTHENC=y
|
||||
CONFIG_CRYPTO_DH=y
|
||||
EOF
|
||||
$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh setup
|
||||
```
|
||||
|
||||
### Build Kata kernel
|
||||
|
||||
```bash
|
||||
$ cd $GOPATH
|
||||
$ export LINUX_VER=$(ls -d kata-linux-*)
|
||||
$ sed -i 's/EXTRAVERSION =/EXTRAVERSION = .qat.container/' $LINUX_VER/Makefile
|
||||
$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh build
|
||||
```
|
||||
|
||||
### Copy Kata kernel
|
||||
|
||||
```bash
|
||||
$ export KATA_KERNEL_NAME=vmlinux-${LINUX_VER}_qat
|
||||
$ mkdir -p $KATA_KERNEL_LOCATION
|
||||
$ cp ${GOPATH}/${LINUX_VER}/vmlinux ${KATA_KERNEL_LOCATION}/${KATA_KERNEL_NAME}
|
||||
```
|
||||
|
||||
### Prepare Kata root filesystem
|
||||
|
||||
These instructions build upon the OS builder instructions located in the
|
||||
[Developer Guide](../Developer-Guide.md). At this point it is recommended that
|
||||
[Docker](https://docs.docker.com/engine/install/ubuntu/) is installed first, and
|
||||
then [Kata-deploy](../../tools/packaging/kata-deploy)
|
||||
is use to install Kata. This will make sure that the correct `agent` version
|
||||
is installed into the rootfs in the steps below.
|
||||
|
||||
The following instructions use Ubuntu as the root filesystem with systemd as
|
||||
the init and will add in the `kmod` binary, which is not a standard binary in
|
||||
a Kata rootfs image. The `kmod` binary is necessary to load the Intel® QAT
|
||||
kernel modules when the virtual machine rootfs boots.
|
||||
|
||||
```bash
|
||||
$ export OSBUILDER=$GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder
|
||||
$ export ROOTFS_DIR=${OSBUILDER}/rootfs-builder/rootfs
|
||||
$ export EXTRA_PKGS='kmod'
|
||||
```
|
||||
|
||||
Make sure that the `kata-agent` version matches the installed `kata-runtime`
|
||||
version. Also make sure the `kata-runtime` install location is in your `PATH`
|
||||
variable. The following `AGENT_VERSION` can be set manually to match
|
||||
the `kata-runtime` version if the following commands don't work.
|
||||
|
||||
```bash
|
||||
$ export PATH=$PATH:/opt/kata/bin
|
||||
$ cd $GOPATH
|
||||
$ export AGENT_VERSION=$(kata-runtime version | head -n 1 | grep -o "[0-9.]\+")
|
||||
$ cd ${OSBUILDER}/rootfs-builder
|
||||
$ sudo rm -rf ${ROOTFS_DIR}
|
||||
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh ubuntu'
|
||||
```
|
||||
|
||||
### Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs
|
||||
|
||||
After the Kata Containers kernel builds with the proper configuration flags,
|
||||
you must build the Intel® QAT drivers against that Kata Containers kernel
|
||||
version in a similar way they were previously built for the host OS. You must
|
||||
set the `KERNEL_SOURCE_ROOT` variable to the Kata Containers kernel source
|
||||
directory and build the Intel® QAT drivers again. The `make` command will
|
||||
install the Intel® QAT modules into the Kata rootfs.
|
||||
|
||||
```bash
|
||||
$ cd $GOPATH
|
||||
$ export LINUX_VER=$(ls -d kata*)
|
||||
$ export KERNEL_MAJOR_VERSION=$(awk '/^VERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
|
||||
$ export KERNEL_PATHLEVEL=$(awk '/^PATCHLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
|
||||
$ export KERNEL_SUBLEVEL=$(awk '/^SUBLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
|
||||
$ export KERNEL_EXTRAVERSION=$(awk '/^EXTRAVERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
|
||||
$ export KERNEL_ROOTFS_DIR=${KERNEL_MAJOR_VERSION}.${KERNEL_PATHLEVEL}.${KERNEL_SUBLEVEL}${KERNEL_EXTRAVERSION}
|
||||
$ cd $QAT_SRC
|
||||
$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --enable-icp-sriov=guest
|
||||
$ sudo -E make all -j $(nproc)
|
||||
$ sudo -E make INSTALL_MOD_PATH=$ROOTFS_DIR qat-driver-install -j $(nproc)
|
||||
```
|
||||
|
||||
The `usdm_drv` module also needs to be copied into the rootfs modules path and
|
||||
`depmod` should be run.
|
||||
|
||||
```bash
|
||||
$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers
|
||||
$ sudo depmod -a -b ${ROOTFS_DIR} ${KERNEL_ROOTFS_DIR}
|
||||
$ cd ${OSBUILDER}/image-builder
|
||||
$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
|
||||
```
|
||||
|
||||
> **Note: Ignore any errors on modules.builtin and modules.order when running
|
||||
> `depmod`.**
|
||||
|
||||
### Copy Kata rootfs
|
||||
|
||||
```bash
|
||||
$ mkdir -p $KATA_ROOTFS_LOCATION
|
||||
$ cp ${OSBUILDER}/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
|
||||
```
|
||||
|
||||
## Verify Intel® QAT works in a container
|
||||
|
||||
The following instructions uses a OpenSSL Dockerfile that builds the
|
||||
Intel® QAT engine to allow OpenSSL to offload crypto functions. It is a
|
||||
convenient way to test that VFIO device passthrough for the Intel® QAT VF’s are
|
||||
working properly with the Kata Containers VM.
|
||||
|
||||
### Build OpenSSL Intel® QAT engine container
|
||||
|
||||
Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/demo/openssl-qat-engine)
|
||||
to build a container image with an optimized OpenSSL engine for
|
||||
Intel® QAT. Using `docker build` with the Kata Containers runtime can sometimes
|
||||
have issues. Therefore, make sure that `runc` is the default Docker container
|
||||
runtime.
|
||||
|
||||
```bash
|
||||
$ cd $QAT_SRC
|
||||
$ curl -O $QAT_DOCKERFILE
|
||||
$ sudo docker build -t openssl-qat-engine .
|
||||
```
|
||||
|
||||
> **Note: The Intel® QAT driver version in this container might not match the
|
||||
> Intel® QAT driver compiled and loaded on the host when compiling.**
|
||||
|
||||
### Test Intel® QAT with the ctr tool
|
||||
|
||||
The `ctr` tool can be used to interact with the containerd daemon. It may be
|
||||
more convenient to use this tool to verify the kernel and image instead of
|
||||
setting up a Kubernetes cluster. The correct Kata runtimes need to be added
|
||||
to the containerd `config.toml`. Below is a sample snippet that can be added
|
||||
to allow QEMU and Cloud Hypervisor (CLH) to work with `ctr`.
|
||||
|
||||
```
|
||||
[plugins.cri.containerd.runtimes.kata-qemu]
|
||||
runtime_type = "io.containerd.kata-qemu.v2"
|
||||
privileged_without_host_devices = true
|
||||
pod_annotations = ["io.katacontainers.*"]
|
||||
[plugins.cri.containerd.runtimes.kata-qemu.options]
|
||||
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
|
||||
[plugins.cri.containerd.runtimes.kata-clh]
|
||||
runtime_type = "io.containerd.kata-clh.v2"
|
||||
privileged_without_host_devices = true
|
||||
pod_annotations = ["io.katacontainers.*"]
|
||||
[plugins.cri.containerd.runtimes.kata-clh.options]
|
||||
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-clh.toml"
|
||||
```
|
||||
|
||||
In addition, containerd expects the binary to be in `/usr/local/bin` so add
|
||||
this small script so that it redirects to be able to use either QEMU or
|
||||
Cloud Hypervisor with Kata.
|
||||
|
||||
```bash
|
||||
$ echo '#!/usr/bin/env bash' | sudo tee /usr/local/bin/containerd-shim-kata-qemu-v2
|
||||
$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-qemu.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-qemu-v2
|
||||
$ sudo chmod +x /usr/local/bin/containerd-shim-kata-qemu-v2
|
||||
$ echo '#!/usr/bin/env bash' | sudo tee /usr/local/bin/containerd-shim-kata-clh-v2
|
||||
$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-clh.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-clh-v2
|
||||
$ sudo chmod +x /usr/local/bin/containerd-shim-kata-clh-v2
|
||||
```
|
||||
|
||||
After the OpenSSL image is built and imported into containerd, a Intel® QAT
|
||||
virtual function exposed in the step above can be added to the `ctr` command.
|
||||
Make sure to change the `/dev/vfio` number to one that actually exists on the
|
||||
host system. When using the `ctr` tool, the`configuration.toml` for Kata needs
|
||||
to point to the custom Kata kernel and rootfs built above and the Intel® QAT
|
||||
modules in the Kata rootfs need to load at boot. The following steps assume that
|
||||
`kata-deploy` was used to install Kata and QEMU is being tested. If using a
|
||||
different hypervisor, different install method for Kata, or a different
|
||||
Intel® QAT chipset then the command will need to be modified.
|
||||
|
||||
> **Note: The following was tested with
|
||||
[containerd v1.4.6](https://github.com/containerd/containerd/releases/tag/v1.4.6).**
|
||||
|
||||
```bash
|
||||
$ config_file="/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
|
||||
$ sudo sed -i "/kernel =/c kernel = "\"${KATA_ROOTFS_LOCATION}/${KATA_KERNEL_NAME}\""" $config_file
|
||||
$ sudo sed -i "/image =/c image = "\"${KATA_KERNEL_LOCATION}/kata-containers.img\""" $config_file
|
||||
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 modules-load=usdm_drv,qat_c62xvf"/g' $config_file
|
||||
$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
|
||||
$ sudo ctr images import openssl-qat-engine.tar
|
||||
$ sudo ctr run --runtime io.containerd.run.kata-qemu.v2 --privileged -t --rm --device=/dev/vfio/180 --mount type=bind,src=/dev,dst=/dev,options=rbind:rw --mount type=bind,src=${QAT_CONF_LOCATION}/c6xxvf_dev0.conf,dst=/etc/c6xxvf_dev0.conf,options=rbind:rw docker.io/library/openssl-qat-engine:latest bash
|
||||
```
|
||||
|
||||
Below are some commands to run in the container image to verify Intel® QAT is
|
||||
working
|
||||
|
||||
```sh
|
||||
root@67561dc2757a/ # cat /proc/modules
|
||||
qat_c62xvf 16384 - - Live 0xffffffffc00d9000 (OE)
|
||||
usdm_drv 86016 - - Live 0xffffffffc00e8000 (OE)
|
||||
intel_qat 249856 - - Live 0xffffffffc009b000 (OE)
|
||||
|
||||
root@67561dc2757a/ # adf_ctl restart
|
||||
Restarting all devices.
|
||||
Processing /etc/c6xxvf_dev0.conf
|
||||
|
||||
root@67561dc2757a/ # adf_ctl status
|
||||
Checking status of all devices.
|
||||
There is 1 QAT acceleration device(s) in the system:
|
||||
qat_dev0 - type: c6xxvf, inst_id: 0, node_id: 0, bsf: 0000:01:01.0, #accel: 1 #engines: 1 state: up
|
||||
|
||||
root@67561dc2757a/ # openssl engine -c -t qat-hw
|
||||
(qat-hw) Reference implementation of QAT crypto engine v0.6.1
|
||||
[RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
|
||||
[ available ]
|
||||
```
|
||||
|
||||
### Test Intel® QAT in Kubernetes
|
||||
|
||||
Start a Kubernetes cluster with containerd as the CRI. The host should
|
||||
already be setup with 16 virtual functions of the Intel® QAT card bound to
|
||||
`VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing of devices.
|
||||
You might need to disable Docker before initializing Kubernetes. Be aware
|
||||
that the OpenSSL container image built above will need to be exported from
|
||||
Docker and imported into containerd.
|
||||
|
||||
If Kata is installed through [`kata-deploy`](../../tools/packaging/kata-deploy/README.md)
|
||||
there will be multiple `configuration.toml` files associated with different
|
||||
hypervisors. Rather than add in the custom Kata kernel, Kata rootfs, and
|
||||
kernel modules to each `configuration.toml` as the default, instead use
|
||||
[annotations](../how-to/how-to-load-kernel-modules-with-kata.md)
|
||||
in the Kubernetes YAML file to tell Kata which kernel and rootfs to use. The
|
||||
easy way to do this is to use `kata-deploy` which will install the Kata binaries
|
||||
to `/opt` and properly configure the `/etc/containerd/config.toml` with annotation
|
||||
support. However, the `configuration.toml` needs to enable support for
|
||||
annotations as well. The following configures both QEMU and Cloud Hypervisor
|
||||
`configuration.toml` files that are currently available with Kata Container
|
||||
versions 2.0 and higher.
|
||||
|
||||
```bash
|
||||
$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-qemu.toml
|
||||
$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-clh.toml
|
||||
```
|
||||
|
||||
Export the OpenSSL image from Docker and import into containerd.
|
||||
|
||||
```bash
|
||||
$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
|
||||
$ sudo ctr -n=k8s.io images import openssl-qat-engine.tar
|
||||
```
|
||||
|
||||
The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/main/cmd/qat_plugin/README.md)
|
||||
needs to be started so that the virtual functions can be discovered and
|
||||
used by Kubernetes.
|
||||
|
||||
The following YAML file can be used to start a Kata container with Intel® QAT
|
||||
support. If Kata is installed with `kata-deploy`, then the containerd
|
||||
`configuration.toml` should have all of the Kata runtime classes already
|
||||
populated and annotations supported. To use a Intel® QAT virtual function, the
|
||||
Intel® QAT plugin needs to be started after the VF's are bound to `VFIO-PCI` as
|
||||
described [above](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot).
|
||||
Edit the following to point to the correct Kata kernel and rootfs location
|
||||
built with Intel® QAT support.
|
||||
|
||||
```bash
|
||||
$ cat << EOF > kata-openssl-qat.yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: kata-openssl-qat
|
||||
labels:
|
||||
app: kata-openssl-qat
|
||||
annotations:
|
||||
io.katacontainers.config.hypervisor.kernel: "$KATA_KERNEL_LOCATION/$KATA_KERNEL_NAME"
|
||||
io.katacontainers.config.hypervisor.image: "$KATA_ROOTFS_LOCATION/kata-containers.img"
|
||||
io.katacontainers.config.hypervisor.kernel_params: "modules-load=usdm_drv,qat_c62xvf"
|
||||
spec:
|
||||
runtimeClassName: kata-qemu
|
||||
containers:
|
||||
- name: kata-openssl-qat
|
||||
image: docker.io/library/openssl-qat-engine:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
resources:
|
||||
limits:
|
||||
qat.intel.com/generic: 1
|
||||
cpu: 1
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: ["IPC_LOCK", "SYS_ADMIN"]
|
||||
volumeMounts:
|
||||
- mountPath: /etc/c6xxvf_dev0.conf
|
||||
name: etc-mount
|
||||
- mountPath: /dev
|
||||
name: dev-mount
|
||||
volumes:
|
||||
- name: dev-mount
|
||||
hostPath:
|
||||
path: /dev
|
||||
- name: etc-mount
|
||||
hostPath:
|
||||
path: $QAT_CONF_LOCATION/c6xxvf_dev0.conf
|
||||
EOF
|
||||
```
|
||||
|
||||
Use `kubectl` to start the pod. Verify that Intel® QAT card acceleration is
|
||||
working with the Intel® QAT engine.
|
||||
```bash
|
||||
$ kubectl apply -f kata-openssl-qat.yaml
|
||||
```
|
||||
|
||||
```sh
|
||||
$ kubectl exec -it kata-openssl-qat -- adf_ctl restart
|
||||
Restarting all devices.
|
||||
Processing /etc/c6xxvf_dev0.conf
|
||||
|
||||
$ kubectl exec -it kata-openssl-qat -- adf_ctl status
|
||||
Checking status of all devices.
|
||||
There is 1 QAT acceleration device(s) in the system:
|
||||
qat_dev0 - type: c6xxvf, inst_id: 0, node_id: 0, bsf: 0000:01:01.0, #accel: 1 #engines: 1 state: up
|
||||
|
||||
$ kubectl exec -it kata-openssl-qat -- openssl engine -c -t qat-hw
|
||||
(qat-hw) Reference implementation of QAT crypto engine v0.6.1
|
||||
[RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
|
||||
[ available ]
|
||||
```
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
* Check that `/dev/vfio` has VF’s enabled.
|
||||
|
||||
```sh
|
||||
$ ls /dev/vfio
|
||||
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 vfio
|
||||
```
|
||||
|
||||
* Check that the modules load when inside the Kata Container.
|
||||
|
||||
```sh
|
||||
bash-5.0# grep -E "qat|usdm_drv" /proc/modules
|
||||
qat_c62xvf 16384 - - Live 0x0000000000000000 (O)
|
||||
usdm_drv 86016 - - Live 0x0000000000000000 (O)
|
||||
intel_qat 184320 - - Live 0x0000000000000000 (O)
|
||||
```
|
||||
|
||||
* Verify that at least the first `c6xxvf_dev0.conf` file mounts inside the
|
||||
container image in `/etc`. You will need one configuration file for each VF
|
||||
passed into the container.
|
||||
|
||||
```sh
|
||||
bash-5.0# ls /etc
|
||||
c6xxvf_dev0.conf c6xxvf_dev11.conf c6xxvf_dev14.conf c6xxvf_dev3.conf c6xxvf_dev6.conf c6xxvf_dev9.conf resolv.conf
|
||||
c6xxvf_dev1.conf c6xxvf_dev12.conf c6xxvf_dev15.conf c6xxvf_dev4.conf c6xxvf_dev7.conf hostname
|
||||
c6xxvf_dev10.conf c6xxvf_dev13.conf c6xxvf_dev2.conf c6xxvf_dev5.conf c6xxvf_dev8.conf hosts
|
||||
```
|
||||
|
||||
* Check `dmesg` inside the container to see if there are any issues with the
|
||||
Intel® QAT driver.
|
||||
|
||||
* If there are issues building the OpenSSL Intel® QAT container image, then
|
||||
check to make sure that runc is the default runtime for building container.
|
||||
|
||||
```sh
|
||||
$ cat /etc/systemd/system/docker.service.d/50-runtime.conf
|
||||
[Service]
|
||||
Environment="DOCKER_DEFAULT_RUNTIME=--default-runtime runc"
|
||||
```
|
||||
|
||||
## Optional Scripts
|
||||
|
||||
### Verify Intel® QAT card counters are incremented
|
||||
|
||||
To check the built in firmware counters, the Intel® QAT driver has to be compiled
|
||||
and installed to the host and can't rely on the built in host driver. The
|
||||
counters will increase when the accelerator is actively being used. To verify
|
||||
Intel® QAT is actively accelerating the containerized application, use the
|
||||
following instructions to check if any of the counters increment. Make
|
||||
sure to change the PCI Device ID to match whats in the system.
|
||||
|
||||
```bash
|
||||
$ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
|
||||
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b1\:00.0/fw_counters
|
||||
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b3\:00.0/fw_counters
|
||||
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b5\:00.0/fw_counters
|
||||
```
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
[toolchain]
|
||||
# Keep in sync with versions.yaml
|
||||
channel = "1.89"
|
||||
channel = "1.85.1"
|
||||
|
||||
103
src/agent/Cargo.lock
generated
103
src/agent/Cargo.lock
generated
@@ -459,9 +459,15 @@ version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
|
||||
dependencies = [
|
||||
"bit-vec",
|
||||
"bit-vec 0.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-vec"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
||||
|
||||
[[package]]
|
||||
name = "bit-vec"
|
||||
version = "0.8.0"
|
||||
@@ -659,6 +665,30 @@ dependencies = [
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cdi"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/cncf-tags/container-device-interface-rs?rev=3b1e83dda5efcc83c7a4f134466ec006b37109c9#3b1e83dda5efcc83c7a4f134466ec006b37109c9"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"const_format",
|
||||
"jsonschema",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"nix 0.24.3",
|
||||
"notify",
|
||||
"oci-spec",
|
||||
"once_cell",
|
||||
"path-clean",
|
||||
"regex",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
@@ -778,31 +808,6 @@ dependencies = [
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "container-device-interface"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2605001b0e8214dae8af146a43ccaa965d960403e330f174c21327154530df8b"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"const_format",
|
||||
"jsonschema",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"nix 0.24.3",
|
||||
"notify",
|
||||
"oci-spec",
|
||||
"once_cell",
|
||||
"path-clean",
|
||||
"regex",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation-sys"
|
||||
version = "0.8.7"
|
||||
@@ -1207,9 +1212,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fancy-regex"
|
||||
version = "0.16.2"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "998b056554fbe42e03ae0e152895cd1a7e1002aec800fdc6635d20270260c46f"
|
||||
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"regex-automata 0.4.9",
|
||||
@@ -1244,6 +1249,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"libz-sys",
|
||||
"miniz_oxide",
|
||||
]
|
||||
|
||||
@@ -2007,9 +2013,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "jsonschema"
|
||||
version = "0.33.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d46662859bc5f60a145b75f4632fbadc84e829e45df6c5de74cfc8e05acb96b5"
|
||||
checksum = "f1b46a0365a611fbf1d2143104dcf910aada96fafd295bab16c60b802bf6fa1d"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"base64 0.22.1",
|
||||
@@ -2043,11 +2049,11 @@ dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.22.1",
|
||||
"capctl",
|
||||
"cdi",
|
||||
"cfg-if",
|
||||
"cgroups-rs",
|
||||
"clap",
|
||||
"const_format",
|
||||
"container-device-interface",
|
||||
"derivative",
|
||||
"futures",
|
||||
"ipnetwork",
|
||||
@@ -2058,7 +2064,7 @@ dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"logging",
|
||||
"mem-agent",
|
||||
"mem-agent-lib",
|
||||
"netlink-packet-core",
|
||||
"netlink-packet-route",
|
||||
"netlink-sys 0.7.0",
|
||||
@@ -2259,6 +2265,17 @@ dependencies = [
|
||||
"uuid 0.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libz-sys"
|
||||
version = "1.1.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.3.8"
|
||||
@@ -2333,7 +2350,7 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mem-agent"
|
||||
name = "mem-agent-lib"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
@@ -3405,9 +3422,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "referencing"
|
||||
version = "0.33.0"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e9c261f7ce75418b3beadfb3f0eb1299fe8eb9640deba45ffa2cb783098697d"
|
||||
checksum = "c8eff4fa778b5c2a57e85c5f2fe3a709c52f0e60d23146e2151cbef5893f420e"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"fluent-uri 0.3.2",
|
||||
@@ -3701,7 +3718,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"awaitgroup",
|
||||
"bit-vec",
|
||||
"bit-vec 0.6.3",
|
||||
"capctl",
|
||||
"caps",
|
||||
"cfg-if",
|
||||
@@ -4021,9 +4038,12 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.11"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
|
||||
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slash-formatter"
|
||||
@@ -4305,7 +4325,6 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
|
||||
name = "test-utils"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"nix 0.26.4",
|
||||
]
|
||||
|
||||
@@ -4804,6 +4823,12 @@ version = "1.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "943ce29a8a743eb10d6082545d861b24f9d1b160b7d741e0f2cdf726bec909c5"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.5"
|
||||
|
||||
@@ -13,12 +13,8 @@ lazy_static = "1.3.0"
|
||||
ttrpc = { version = "0.8.4", features = ["async"], default-features = false }
|
||||
protobuf = "3.7.2"
|
||||
libc = "0.2.94"
|
||||
|
||||
# Notes:
|
||||
# - Needs to stay in sync with libs
|
||||
# - Upgrading to 0.27+ will require code changes (see #11842)
|
||||
# Notes: nix needs to stay in sync with libs
|
||||
nix = "0.26.4"
|
||||
|
||||
capctl = "0.2.0"
|
||||
scan_fmt = "0.2.6"
|
||||
scopeguard = "1.0.0"
|
||||
@@ -85,10 +81,10 @@ kata-agent-policy = { path = "policy" }
|
||||
rustjail = { path = "rustjail" }
|
||||
vsock-exporter = { path = "vsock-exporter" }
|
||||
|
||||
mem-agent = { path = "../libs/mem-agent" }
|
||||
mem-agent = { path = "../mem-agent", package = "mem-agent-lib" }
|
||||
|
||||
kata-sys-util = { path = "../libs/kata-sys-util" }
|
||||
kata-types = { path = "../libs/kata-types", features = ["safe-path"] }
|
||||
kata-types = { path = "../libs/kata-types" }
|
||||
# Note: this crate sets the slog 'max_*' features which allows the log level
|
||||
# to be modified at runtime.
|
||||
logging = { path = "../libs/logging" }
|
||||
@@ -167,6 +163,9 @@ clap.workspace = true
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
|
||||
# Agent Policy
|
||||
cdi = { git = "https://github.com/cncf-tags/container-device-interface-rs", rev = "3b1e83dda5efcc83c7a4f134466ec006b37109c9" }
|
||||
|
||||
# Local dependencies
|
||||
kata-agent-policy = { workspace = true, optional = true }
|
||||
mem-agent.workspace = true
|
||||
@@ -186,8 +185,6 @@ base64 = "0.22"
|
||||
sha2 = "0.10.8"
|
||||
async-compression = { version = "0.4.22", features = ["tokio", "gzip"] }
|
||||
|
||||
container-device-interface = "0.1.1"
|
||||
|
||||
[target.'cfg(target_arch = "s390x")'.dependencies]
|
||||
pv_core = { git = "https://github.com/ibm-s390-linux/s390-tools", rev = "4942504a9a2977d49989a5e5b7c1c8e07dc0fa41", package = "s390_pv_core" }
|
||||
|
||||
@@ -206,7 +203,6 @@ lto = true
|
||||
seccomp = ["rustjail/seccomp"]
|
||||
standard-oci-runtime = ["rustjail/standard-oci-runtime"]
|
||||
agent-policy = ["kata-agent-policy"]
|
||||
init-data = []
|
||||
|
||||
[[bin]]
|
||||
name = "kata-agent"
|
||||
|
||||
@@ -41,14 +41,6 @@ ifeq ($(AGENT_POLICY),yes)
|
||||
override EXTRA_RUSTFEATURES += agent-policy
|
||||
endif
|
||||
|
||||
##VAR INIT_DATA=yes|no define if agent enables the init data feature
|
||||
INIT_DATA ?= yes
|
||||
|
||||
# Enable the init data fature of rust build
|
||||
ifeq ($(INIT_DATA),yes)
|
||||
override EXTRA_RUSTFEATURES += init-data
|
||||
endif
|
||||
|
||||
include ../../utils.mk
|
||||
|
||||
##VAR STANDARD_OCI_RUNTIME=yes|no define if agent enables standard oci runtime feature
|
||||
@@ -130,7 +122,7 @@ $(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
|
||||
$(TARGET_PATH): show-summary
|
||||
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) $(if $(findstring release,$(BUILD_TYPE)),--release) $(EXTRA_RUSTFEATURES)
|
||||
|
||||
$(GENERATED_FILES): %: %.in $(VERSION_FILE)
|
||||
$(GENERATED_FILES): %: %.in
|
||||
@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
|
||||
|
||||
##TARGET optimize: optimized build
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user