Compare commits

..

3 Commits

Author SHA1 Message Date
Fupan Li
c376ab5de7 runtime-rs: fix the issue of hot-unplug memory smaller
It should do nothing instead of return an error when
hot-unplug the memory to the size smaller than static
plugged memory size.

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2025-11-10 14:17:44 +08:00
Fupan Li
200abc815c tests: fix the issue of missing teardown pods
Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2025-11-10 10:30:17 +08:00
Fupan Li
333d12dc47 runtime: fix the issue of update interface error
Since the network device hotplug is an asynchronous operation,
it's possible that the hotplug operation had returned, but
the network device hasn't ready in guest, thus it's better to
retry on this operation to wait until the device ready in guest.

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2025-11-07 11:29:57 +08:00
986 changed files with 26816 additions and 49168 deletions

View File

@@ -10,6 +10,11 @@ self-hosted-runner:
- amd64-nvidia-a100
- amd64-nvidia-h100-snp
- arm64-k8s
- containerd-v1.7-overlayfs
- containerd-v2.0-overlayfs
- containerd-v2.1-overlayfs
- containerd-v2.2
- containerd-v2.2-overlayfs
- garm-ubuntu-2004
- garm-ubuntu-2004-smaller
- garm-ubuntu-2204
@@ -20,11 +25,10 @@ self-hosted-runner:
- ppc64le-k8s
- ppc64le-small
- ubuntu-24.04-ppc64le
- ubuntu-24.04-s390x
- metrics
- riscv-builder
- sev-snp
- s390x
- s390x-large
- tdx
- ubuntu-24.04-arm
- ubuntu-22.04-arm

View File

@@ -12,6 +12,7 @@ updates:
- "/src/tools/agent-ctl"
- "/src/tools/genpolicy"
- "/src/tools/kata-ctl"
- "/src/tools/runk"
- "/src/tools/trace-forwarder"
schedule:
interval: "daily"

View File

@@ -71,7 +71,7 @@ jobs:
fail-fast: false
matrix:
containerd_version: ['lts', 'active']
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu', 'qemu-runtime-rs']
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu']
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: ${{ matrix.containerd_version }}
@@ -117,7 +117,7 @@ jobs:
fail-fast: false
matrix:
containerd_version: ['lts', 'active']
vmm: ['clh', 'qemu', 'dragonball', 'qemu-runtime-rs']
vmm: ['clh', 'qemu', 'dragonball']
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: ${{ matrix.containerd_version }}
@@ -147,22 +147,49 @@ jobs:
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata
run: bash tests/integration/nydus/gha-run.sh install-kata kata-artifacts
- name: Install kata-tools
run: bash tests/integration/nydus/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Run nydus tests
timeout-minutes: 10
run: bash tests/integration/nydus/gha-run.sh run
run-runk:
name: run-runk
# Skip runk tests as we have no maintainers. TODO: Decide when to remove altogether
if: false
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: lts
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install dependencies
run: bash tests/integration/runk/gha-run.sh install-dependencies
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata
run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts
- name: Run runk tests
timeout-minutes: 10
run: bash tests/integration/runk/gha-run.sh run
run-tracing:
name: run-tracing
strategy:
@@ -252,6 +279,50 @@ jobs:
timeout-minutes: 15
run: bash tests/functional/vfio/gha-run.sh run
run-docker-tests:
name: run-docker-tests
strategy:
# We can set this to true whenever we're 100% sure that
# all the tests are not flaky, otherwise we'll fail them
# all due to a single flaky instance.
fail-fast: false
matrix:
vmm:
- qemu
runs-on: ubuntu-22.04
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install dependencies
run: bash tests/integration/docker/gha-run.sh install-dependencies
env:
GH_TOKEN: ${{ github.token }}
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata
run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts
- name: Run docker smoke test
timeout-minutes: 5
run: bash tests/integration/docker/gha-run.sh run
run-nerdctl-tests:
name: run-nerdctl-tests
strategy:
@@ -265,7 +336,6 @@ jobs:
- dragonball
- qemu
- cloud-hypervisor
- qemu-runtime-rs
runs-on: ubuntu-22.04
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}
@@ -340,16 +410,8 @@ jobs:
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata & kata-tools
run: |
bash tests/functional/kata-agent-apis/gha-run.sh install-kata kata-artifacts
bash tests/functional/kata-agent-apis/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Install kata
run: bash tests/functional/kata-agent-apis/gha-run.sh install-kata kata-artifacts
- name: Run kata agent api tests with agent-ctl
run: bash tests/functional/kata-agent-apis/gha-run.sh run

View File

@@ -106,3 +106,44 @@ jobs:
- name: Run containerd-stability tests
timeout-minutes: 15
run: bash tests/stability/gha-run.sh run
run-docker-tests:
name: run-docker-tests
strategy:
# We can set this to true whenever we're 100% sure that
# all the tests are not flaky, otherwise we'll fail them
# all due to a single flaky instance.
fail-fast: false
matrix:
vmm: ['qemu']
runs-on: s390x-large
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install dependencies
run: bash tests/integration/docker/gha-run.sh install-dependencies
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-static-tarball-s390x${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata
run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts
- name: Run docker smoke test
timeout-minutes: 5
run: bash tests/integration/docker/gha-run.sh run

View File

@@ -12,12 +12,7 @@ name: Build checks
jobs:
check:
name: check
runs-on: >-
${{
( contains(inputs.instance, 's390x') && matrix.component.name == 'runtime' ) && 's390x' ||
( contains(inputs.instance, 'ppc64le') && (matrix.component.name == 'runtime' || matrix.component.name == 'agent') ) && 'ppc64le' ||
inputs.instance
}}
runs-on: ${{ matrix.component.name == 'runtime' && inputs.instance == 'ubuntu-24.04-s390x' && 's390x' || matrix.component.name == 'runtime' && inputs.instance == 'ubuntu-24.04-ppc64le' && 'ppc64le' || inputs.instance }}
strategy:
fail-fast: false
matrix:
@@ -73,8 +68,6 @@ jobs:
needs:
- rust
- protobuf-compiler
instance:
- ${{ inputs.instance }}
steps:
- name: Adjust a permission for repo

View File

@@ -41,11 +41,16 @@ jobs:
matrix:
asset:
- agent
- agent-ctl
- busybox
- cloud-hypervisor
- cloud-hypervisor-glibc
- coco-guest-components
- csi-kata-directvolume
- firecracker
- genpolicy
- kata-ctl
- kata-manager
- kernel
- kernel-confidential
- kernel-dragonball-experimental
@@ -54,11 +59,11 @@ jobs:
- nydus
- ovmf
- ovmf-sev
- ovmf-tdx
- pause-image
- qemu
- qemu-snp-experimental
- qemu-tdx-experimental
- trace-forwarder
- virtiofsd
stage:
- ${{ inputs.stage }}
@@ -116,7 +121,7 @@ jobs:
echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"
- uses: oras-project/setup-oras@22ce207df3b08e061f537244349aac6ae1d214f6 # v1.2.4
- uses: oras-project/setup-oras@5c0b487ce3fe0ce3ab0d034e63669e426e294e4d # v1.2.2
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
with:
version: "1.2.0"
@@ -148,8 +153,8 @@ jobs:
if: ${{ startsWith(matrix.asset, 'kernel-nvidia-gpu') }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-artifacts-amd64-${{ matrix.asset }}-modules${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-modules.tar.zst
name: kata-artifacts-amd64-${{ matrix.asset }}-headers${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-headers.tar.zst
retention-days: 15
if-no-files-found: error
@@ -166,8 +171,6 @@ jobs:
- rootfs-image
- rootfs-image-confidential
- rootfs-image-mariner
- rootfs-image-nvidia-gpu
- rootfs-image-nvidia-gpu-confidential
- rootfs-initrd
- rootfs-initrd-confidential
- rootfs-initrd-nvidia-gpu
@@ -237,8 +240,8 @@ jobs:
asset:
- busybox
- coco-guest-components
- kernel-nvidia-gpu-modules
- kernel-nvidia-gpu-confidential-modules
- kernel-nvidia-gpu-headers
- kernel-nvidia-gpu-confidential-headers
- pause-image
steps:
- uses: geekyeggo/delete-artifact@f275313e70c08f6120db482d7a6b98377786765b # v5.1.0
@@ -359,104 +362,3 @@ jobs:
path: kata-static.tar.zst
retention-days: 15
if-no-files-found: error
build-tools-asset:
name: build-tools-asset
runs-on: ubuntu-22.04
permissions:
contents: read
packages: write
strategy:
matrix:
asset:
- agent-ctl
- csi-kata-directvolume
- genpolicy
- kata-ctl
- kata-manager
- trace-forwarder
stage:
- ${{ inputs.stage }}
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0 # This is needed in order to keep the commit ids history
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Build ${{ matrix.asset }}
id: build
run: |
make "${KATA_ASSET}-tarball"
build_dir=$(readlink -f build)
# store-artifact does not work with symlink
mkdir -p kata-tools-build && cp "${build_dir}"/kata-static-"${KATA_ASSET}"*.tar.* kata-tools-build/.
env:
KATA_ASSET: ${{ matrix.asset }}
TAR_OUTPUT: ${{ matrix.asset }}.tar.gz
PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }}
ARTEFACT_REGISTRY: ghcr.io
ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }}
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: store-artifact ${{ matrix.asset }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-tools-artifacts-amd64-${{ matrix.asset }}${{ inputs.tarball-suffix }}
path: kata-tools-build/kata-static-${{ matrix.asset }}.tar.zst
retention-days: 15
if-no-files-found: error
create-kata-tools-tarball:
name: create-kata-tools-tarball
runs-on: ubuntu-22.04
needs: [build-tools-asset]
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
fetch-tags: true
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
pattern: kata-tools-artifacts-amd64-*${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
merge-multiple: true
- name: merge-artifacts
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-tools-artifacts versions.yaml kata-tools-static.tar.zst
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-static.tar.zst
retention-days: 15
if-no-files-found: error

View File

@@ -31,7 +31,7 @@ permissions: {}
jobs:
build-asset:
name: build-asset
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
permissions:
contents: read
packages: write
@@ -102,7 +102,7 @@ jobs:
echo "oci-name=${oci_image%@*}" >> "$GITHUB_OUTPUT"
echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"
- uses: oras-project/setup-oras@22ce207df3b08e061f537244349aac6ae1d214f6 # v1.2.4
- uses: oras-project/setup-oras@5c0b487ce3fe0ce3ab0d034e63669e426e294e4d # v1.2.2
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
with:
version: "1.2.0"
@@ -134,14 +134,14 @@ jobs:
if: ${{ startsWith(matrix.asset, 'kernel-nvidia-gpu') }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-artifacts-arm64-${{ matrix.asset }}-modules${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-modules.tar.zst
name: kata-artifacts-arm64-${{ matrix.asset }}-headers${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-headers.tar.zst
retention-days: 15
if-no-files-found: error
build-asset-rootfs:
name: build-asset-rootfs
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
needs: build-asset
permissions:
contents: read
@@ -150,7 +150,6 @@ jobs:
matrix:
asset:
- rootfs-image
- rootfs-image-nvidia-gpu
- rootfs-initrd
- rootfs-initrd-nvidia-gpu
steps:
@@ -210,13 +209,13 @@ jobs:
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
remove-rootfs-binary-artifacts:
name: remove-rootfs-binary-artifacts
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
needs: build-asset-rootfs
strategy:
matrix:
asset:
- busybox
- kernel-nvidia-gpu-modules
- kernel-nvidia-gpu-headers
steps:
- uses: geekyeggo/delete-artifact@f275313e70c08f6120db482d7a6b98377786765b # v5.1.0
with:
@@ -225,7 +224,7 @@ jobs:
# We don't need the binaries installed in the rootfs as part of the release tarball, so can delete them now we've built the rootfs
remove-rootfs-binary-artifacts-for-release:
name: remove-rootfs-binary-artifacts-for-release
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
needs: build-asset-rootfs
strategy:
matrix:
@@ -239,7 +238,7 @@ jobs:
build-asset-shim-v2:
name: build-asset-shim-v2
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts, remove-rootfs-binary-artifacts-for-release]
permissions:
contents: read
@@ -299,7 +298,7 @@ jobs:
create-kata-tarball:
name: create-kata-tarball
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
permissions:
contents: read

View File

@@ -32,7 +32,7 @@ jobs:
permissions:
contents: read
packages: write
runs-on: ubuntu-24.04-ppc64le
runs-on: ppc64le-small
strategy:
matrix:
asset:
@@ -89,7 +89,7 @@ jobs:
build-asset-rootfs:
name: build-asset-rootfs
runs-on: ubuntu-24.04-ppc64le
runs-on: ppc64le-small
needs: build-asset
permissions:
contents: read
@@ -170,7 +170,7 @@ jobs:
build-asset-shim-v2:
name: build-asset-shim-v2
runs-on: ubuntu-24.04-ppc64le
runs-on: ppc64le-small
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
permissions:
contents: read
@@ -230,7 +230,7 @@ jobs:
create-kata-tarball:
name: create-kata-tarball
runs-on: ubuntu-24.04-ppc64le
runs-on: ppc64le-small
needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
permissions:
contents: read

View File

@@ -20,6 +20,9 @@ on:
required: false
type: string
default: ""
secrets:
QUAY_DEPLOYER_PASSWORD:
required: true
permissions: {}
@@ -38,6 +41,14 @@ jobs:
- kernel
- virtiofsd
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
@@ -71,5 +82,5 @@ jobs:
with:
name: kata-artifacts-riscv64-${{ matrix.asset }}${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}.tar.zst
retention-days: 3
retention-days: 15
if-no-files-found: error

View File

@@ -32,7 +32,7 @@ permissions: {}
jobs:
build-asset:
name: build-asset
runs-on: ubuntu-24.04-s390x
runs-on: s390x
permissions:
contents: read
packages: write
@@ -257,7 +257,7 @@ jobs:
build-asset-shim-v2:
name: build-asset-shim-v2
runs-on: ubuntu-24.04-s390x
runs-on: s390x
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
permissions:
contents: read
@@ -319,7 +319,7 @@ jobs:
create-kata-tarball:
name: create-kata-tarball
runs-on: ubuntu-24.04-s390x
runs-on: s390x
needs:
- build-asset
- build-asset-rootfs

View File

@@ -1,75 +0,0 @@
name: Build kubectl multi-arch image
on:
schedule:
# Run every Sunday at 00:00 UTC
- cron: '0 0 * * 0'
workflow_dispatch:
# Allow manual triggering
push:
branches:
- main
paths:
- 'tools/packaging/kubectl/Dockerfile'
- '.github/workflows/build-kubectl-image.yaml'
permissions: {}
env:
REGISTRY: quay.io
IMAGE_NAME: kata-containers/kubectl
jobs:
build-and-push:
name: Build and push multi-arch image
runs-on: ubuntu-24.04
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Set up QEMU
uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Login to Quay.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ${{ env.REGISTRY }}
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- name: Get kubectl version
id: kubectl-version
run: |
KUBECTL_VERSION=$(curl -L -s https://dl.k8s.io/release/stable.txt)
echo "version=${KUBECTL_VERSION}" >> "$GITHUB_OUTPUT"
- name: Generate image metadata
id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=raw,value=latest
type=raw,value={{date 'YYYYMMDD'}}
type=raw,value=${{ steps.kubectl-version.outputs.version }}
type=sha,prefix=
- name: Build and push multi-arch image
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
with:
context: tools/packaging/kubectl/
file: tools/packaging/kubectl/Dockerfile
platforms: linux/amd64,linux/arm64,linux/s390x,linux/ppc64le
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@@ -1,34 +0,0 @@
on:
schedule:
- cron: '0 5 * * *'
name: Nightly CI for RISC-V
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
build-kata-static-tarball-riscv:
permissions:
contents: read
packages: write
id-token: write
attestations: write
uses: ./.github/workflows/build-kata-static-tarball-riscv64.yaml
with:
tarball-suffix: -${{ github.sha }}
commit-hash: ${{ github.sha }}
target-branch: ${{ github.ref_name }}
build-checks-preview:
strategy:
fail-fast: false
matrix:
instance:
- "riscv-builder"
uses: ./.github/workflows/build-checks-preview-riscv64.yaml
with:
instance: ${{ matrix.instance }}

View File

@@ -102,7 +102,7 @@ jobs:
tag: ${{ inputs.tag }}-arm64
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
runner: ubuntu-24.04-arm
runner: ubuntu-22.04-arm
arch: arm64
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -134,6 +134,20 @@ jobs:
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
build-kata-static-tarball-riscv64:
permissions:
contents: read
packages: write
id-token: write
attestations: write
uses: ./.github/workflows/build-kata-static-tarball-riscv64.yaml
with:
tarball-suffix: -${{ inputs.tag }}
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
publish-kata-deploy-payload-s390x:
needs: build-kata-static-tarball-s390x
permissions:
@@ -147,7 +161,7 @@ jobs:
tag: ${{ inputs.tag }}-s390x
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
runner: ubuntu-24.04-s390x
runner: s390x
arch: s390x
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -165,7 +179,7 @@ jobs:
tag: ${{ inputs.tag }}-ppc64le
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
runner: ubuntu-24.04-ppc64le
runner: ppc64le-small
arch: ppc64le
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -233,14 +247,14 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64-${{ inputs.tag }}
path: kata-tools-artifacts
name: kata-static-tarball-amd64-${{ inputs.tag }}
path: kata-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Install tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
- name: Copy binary into Docker context
run: |
@@ -314,7 +328,6 @@ jobs:
needs: publish-kata-deploy-payload-amd64
uses: ./.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
with:
tarball-suffix: -${{ inputs.tag }}
registry: ghcr.io
repo: ${{ github.repository_owner }}/kata-deploy-ci
tag: ${{ inputs.tag }}-amd64
@@ -474,7 +487,7 @@ jobs:
vmm: ${{ matrix.params.vmm }}
run-cri-containerd-tests-arm64:
if: false
if: ${{ inputs.skip-test != 'yes' }}
needs: build-kata-static-tarball-arm64
strategy:
fail-fast: false

View File

@@ -1,32 +0,0 @@
name: Documentation
on:
push:
branches:
- main
permissions: {}
jobs:
deploy-docs:
name: deploy-docs
permissions:
contents: read
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
steps:
- uses: actions/configure-pages@v5
- uses: actions/checkout@v5
with:
persist-credentials: false
- uses: actions/setup-python@v5
with:
python-version: 3.x
- run: pip install zensical
- run: zensical build --clean
- uses: actions/upload-pages-artifact@v4
with:
path: site
- uses: actions/deploy-pages@v4
id: deployment

View File

@@ -10,9 +10,7 @@ on:
- opened
- synchronize
- reopened
- edited
- labeled
- unlabeled
permissions: {}

View File

@@ -0,0 +1,43 @@
name: kata-runtime-classes-sync
on:
pull_request:
types:
- opened
- edited
- reopened
- synchronize
permissions: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
kata-deploy-runtime-classes-check:
name: kata-deploy-runtime-classes-check
runs-on: ubuntu-22.04
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Ensure the split out runtime classes match the all-in-one file
run: |
pushd tools/packaging/kata-deploy/runtimeclasses/
echo "::group::Combine runtime classes"
for runtimeClass in $(find . -type f \( -name "*.yaml" -and -not -name "kata-runtimeClasses.yaml" \) | sort); do
echo "Adding ${runtimeClass} to the resultingRuntimeClasses.yaml"
cat "${runtimeClass}" >> resultingRuntimeClasses.yaml;
done
echo "::endgroup::"
echo "::group::Displaying the content of resultingRuntimeClasses.yaml"
cat resultingRuntimeClasses.yaml
echo "::endgroup::"
echo ""
echo "::group::Displaying the content of kata-runtimeClasses.yaml"
cat kata-runtimeClasses.yaml
echo "::endgroup::"
echo ""
diff resultingRuntimeClasses.yaml kata-runtimeClasses.yaml

View File

@@ -97,7 +97,7 @@ jobs:
repo: kata-containers/kata-deploy-ci
tag: kata-containers-latest-arm64
target-branch: ${{ github.ref_name }}
runner: ubuntu-24.04-arm
runner: ubuntu-22.04-arm
arch: arm64
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -131,7 +131,7 @@ jobs:
repo: kata-containers/kata-deploy-ci
tag: kata-containers-latest-ppc64le
target-branch: ${{ github.ref_name }}
runner: ubuntu-24.04-ppc64le
runner: ppc64le-small
arch: ppc64le
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}

View File

@@ -50,24 +50,6 @@ jobs:
fetch-depth: 0
persist-credentials: false
- name: Remove unnecessary directories to free up space
run: |
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/lib/jvm
sudo rm -rf /usr/share/swift
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/julia*
sudo rm -rf /opt/az
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /opt/microsoft
sudo rm -rf /opt/google
sudo rm -rf /usr/lib/firefox
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"

View File

@@ -34,7 +34,7 @@ jobs:
permissions:
contents: read
packages: write
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-22.04-arm
steps:
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0

View File

@@ -31,7 +31,7 @@ jobs:
permissions:
contents: read
packages: write
runs-on: ubuntu-24.04-ppc64le
runs-on: ppc64le-small
steps:
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0

View File

@@ -35,7 +35,7 @@ jobs:
permissions:
contents: read
packages: write
runs-on: ubuntu-24.04-s390x
runs-on: s390x
steps:
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0

View File

@@ -181,23 +181,6 @@ jobs:
GH_TOKEN: ${{ github.token }}
ARCHITECTURE: ppc64le
- name: Set KATA_TOOLS_STATIC_TARBALL env var
run: |
tarball=$(pwd)/kata-tools-static.tar.zst
echo "KATA_TOOLS_STATIC_TARBALL=${tarball}" >> "$GITHUB_ENV"
- name: Download amd64 tools artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64
- name: Upload amd64 static tarball tools to GitHub
run: |
./tools/packaging/release/release.sh upload-kata-tools-static-tarball
env:
GH_TOKEN: ${{ github.token }}
ARCHITECTURE: amd64
upload-versions-yaml:
name: upload-versions-yaml
needs: release

View File

@@ -0,0 +1,167 @@
name: CI | Run containerd guest pull stability tests
on:
schedule:
- cron: "0 */1 * * *" #run every hour
permissions: {}
# This job relies on k8s pre-installed using kubeadm
jobs:
run-containerd-guest-pull-stability-tests:
name: run-containerd-guest-pull-stability-tests-${{ matrix.environment.test-type }}-${{ matrix.environment.containerd }}
strategy:
fail-fast: false
matrix:
environment: [
{ test-type: multi-snapshotter, containerd: v2.2 },
{ test-type: force-guest-pull, containerd: v1.7 },
{ test-type: force-guest-pull, containerd: v2.0 },
{ test-type: force-guest-pull, containerd: v2.1 },
{ test-type: force-guest-pull, containerd: v2.2 },
]
env:
# I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here.
IMAGES_LIST: quay.io/mongodb/mongodb-community-server@sha256:8b73733842da21b6bbb6df4d7b2449229bb3135d2ec8c6880314d88205772a11 ghcr.io/edgelesssys/redis@sha256:ecb0a964c259a166a1eb62f0eb19621d42bd1cce0bc9bb0c71c828911d4ba93d
runs-on: containerd-${{ matrix.environment.test-type }}-${{ matrix.environment.containerd }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Rotate the journal
run: sudo journalctl --rotate --vacuum-time 1s
- name: Pull the kata-deploy image to be used
run: sudo ctr -n k8s.io image pull quay.io/kata-containers/kata-deploy-ci:kata-containers-latest
- name: Deploy Kata Containers
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
env:
KATA_HYPERVISOR: qemu-coco-dev
KUBERNETES: vanilla
SNAPSHOTTER: ${{ matrix.environment.test-type == 'multi-snapshotter' && 'nydus' || '' }}
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: ${{ matrix.environment.test-type == 'multi-snapshotter' }}
EXPERIMENTAL_FORCE_GUEST_PULL: ${{ matrix.environment.test-type == 'force-guest-pull' && 'qemu-coco-dev' || '' }}
# This is needed as we may hit the createContainerTimeout
- name: Adjust Kata Containers' create_container_timeout
run: |
sudo sed -i -e 's/^\(create_container_timeout\).*=.*$/\1 = 600/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
grep "create_container_timeout.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
# This is needed in order to have enough tmpfs space inside the guest to pull the image
- name: Adjust Kata Containers' default_memory
run: |
sudo sed -i -e 's/^\(default_memory\).*=.*$/\1 = 4096/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
grep "default_memory.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
- name: Run a few containers using overlayfs
run: |
# I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
# shellcheck disable=SC2086
for img in ${IMAGES_LIST}; do
echo "overlayfs | Using on image: ${img}"
pod="$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
kubectl run "${pod}" \
-it --rm \
--restart=Never \
--image="${img}" \
--image-pull-policy=Always \
--pod-running-timeout=10m \
-- uname -r
done
- name: Run a the same few containers using a different snapshotter
run: |
# I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
# shellcheck disable=SC2086
for img in ${IMAGES_LIST}; do
echo "nydus | Using on image: ${img}"
pod="kata-$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
kubectl run "${pod}" \
-it --rm \
--restart=Never \
--image="${img}" \
--image-pull-policy=Always \
--pod-running-timeout=10m \
--overrides='{
"spec": {
"runtimeClassName": "kata-qemu-coco-dev"
}
}' \
-- uname -r
done
- name: Uninstall Kata Containers
run: bash tests/integration/kubernetes/gha-run.sh cleanup
env:
KATA_HYPERVISOR: qemu-coco-dev
KUBERNETES: vanilla
SNAPSHOTTER: nydus
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: true
- name: Run a few containers using overlayfs
run: |
# I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
# shellcheck disable=SC2086
for img in ${IMAGES_LIST}; do
echo "overlayfs | Using on image: ${img}"
pod="$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
kubectl run "${pod}" \
-it --rm \
--restart=Never \
--image=${img} \
--image-pull-policy=Always \
--pod-running-timeout=10m \
-- uname -r
done
- name: Deploy Kata Containers
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
env:
KATA_HYPERVISOR: qemu-coco-dev
KUBERNETES: vanilla
SNAPSHOTTER: nydus
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: true
# This is needed as we may hit the createContainerTimeout
- name: Adjust Kata Containers' create_container_timeout
run: |
sudo sed -i -e 's/^\(create_container_timeout\).*=.*$/\1 = 600/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
grep "create_container_timeout.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
# This is needed in order to have enough tmpfs space inside the guest to pull the image
- name: Adjust Kata Containers' default_memory
run: |
sudo sed -i -e 's/^\(default_memory\).*=.*$/\1 = 4096/g' /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
grep "default_memory.*=" /opt/kata/share/defaults/kata-containers/configuration-qemu-coco-dev.toml
- name: Run a the same few containers using a different snapshotter
run: |
# I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here
# shellcheck disable=SC2086
for img in ${IMAGES_LIST}; do
echo "nydus | Using on image: ${img}"
pod="kata-$(echo ${img} | tr ':.@/' '-' | awk '{print substr($0,1,56)}')"
kubectl run "${pod}" \
-it --rm \
--restart=Never \
--image="${img}" \
--image-pull-policy=Always \
--pod-running-timeout=10m \
--overrides='{
"spec": {
"runtimeClassName": "kata-qemu-coco-dev"
}
}' \
-- uname -r
done
- name: Uninstall Kata Containers
run: bash tests/integration/kubernetes/gha-run.sh cleanup || true
if: always()
env:
KATA_HYPERVISOR: qemu-coco-dev
KUBERNETES: vanilla
SNAPSHOTTER: nydus
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: true

View File

@@ -93,14 +93,14 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Install kata
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
- name: Download Azure CLI
uses: azure/setup-kubectl@776406bce94f63e41d621b960d78ee25c8b76ede # v4.0.1
@@ -142,10 +142,6 @@ jobs:
timeout-minutes: 60
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0

View File

@@ -32,7 +32,6 @@ jobs:
matrix:
vmm:
- qemu
- qemu-runtime-rs
k8s:
- kubeadm
runs-on: arm64-k8s
@@ -69,10 +68,6 @@ jobs:
timeout-minutes: 30
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Collect artifacts ${{ matrix.vmm }}
if: always()
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts

View File

@@ -1,10 +1,7 @@
name: CI | Run NVIDIA GPU kubernetes tests on amd64
name: CI | Run NVIDIA GPU kubernetes tests on arm64
on:
workflow_call:
inputs:
tarball-suffix:
required: true
type: string
registry:
required: true
type: string
@@ -48,8 +45,7 @@ jobs:
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.environment.vmm }}
KUBERNETES: kubeadm
KBS: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }}
K8S_TEST_HOST_TYPE: baremetal
K8S_TEST_HOST_TYPE: all
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -63,33 +59,6 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Uninstall previous `kbs-client`
if: matrix.environment.name != 'nvidia-gpu'
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh uninstall-kbs-client
- name: Deploy CoCo KBS
if: matrix.environment.name != 'nvidia-gpu'
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
env:
NVIDIA_VERIFIER_MODE: remote
KBS_INGRESS: nodeport
- name: Install `kbs-client`
if: matrix.environment.name != 'nvidia-gpu'
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh install-kbs-client
- name: Deploy Kata
timeout-minutes: 20
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
@@ -102,11 +71,6 @@ jobs:
run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests
env:
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Collect artifacts ${{ matrix.environment.vmm }}
if: always()
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
@@ -123,9 +87,3 @@ jobs:
if: always()
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh cleanup
- name: Delete CoCo KBS
if: always() && matrix.environment.name != 'nvidia-gpu'
timeout-minutes: 10
run: |
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs

View File

@@ -75,7 +75,3 @@ jobs:
- name: Run tests
timeout-minutes: 30
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests

View File

@@ -131,18 +131,12 @@ jobs:
timeout-minutes: 60
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Delete kata-deploy
if: always()
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh cleanup-zvsi
- name: Delete CoCo KBS
if: always()
timeout-minutes: 10
run: |
if [ "${KBS}" == "true" ]; then
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs

View File

@@ -46,7 +46,6 @@ jobs:
matrix:
vmm:
- qemu-coco-dev
- qemu-coco-dev-runtime-rs
snapshotter:
- nydus
pull-type:
@@ -84,14 +83,14 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Install kata
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
- name: Log into the Azure account
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
@@ -140,10 +139,6 @@ jobs:
timeout-minutes: 300
run: bash tests/stability/gha-stability-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0

View File

@@ -79,15 +79,6 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Deploy Kata
timeout-minutes: 20
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
@@ -120,12 +111,10 @@ jobs:
- name: Delete kata-deploy
if: always()
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh cleanup
- name: Delete CoCo KBS
if: always()
timeout-minutes: 10
run: |
[[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] && echo "ITA_KEY=${GH_ITA_KEY}" >> "${GITHUB_ENV}"
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
@@ -142,14 +131,12 @@ jobs:
matrix:
vmm:
- qemu-coco-dev
- qemu-coco-dev-runtime-rs
snapshotter:
- nydus
pull-type:
- guest-pull
include:
- pull-type: experimental-force-guest-pull
vmm: qemu-coco-dev
snapshotter: ""
runs-on: ubuntu-22.04
permissions:
@@ -170,7 +157,6 @@ jobs:
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
EXPERIMENTAL_FORCE_GUEST_PULL: ${{ matrix.pull-type == 'experimental-force-guest-pull' && matrix.vmm || '' }}
# Caution: current ingress controller used to expose the KBS service
# requires much vCPUs, lefting only a few for the tests. Depending on the
# host type chose it will result on the creation of a cluster with
@@ -189,14 +175,14 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Install kata
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
- name: Log into the Azure account
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
@@ -229,6 +215,7 @@ jobs:
timeout-minutes: 20
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-aks
env:
EXPERIMENTAL_FORCE_GUEST_PULL: ${{ env.PULL_TYPE == 'experimental-force-guest-pull' && env.KATA_HYPERVISOR || '' }}
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: ${{ env.SNAPSHOTTER == 'nydus' }}
AUTO_GENERATE_POLICY: ${{ env.PULL_TYPE == 'experimental-force-guest-pull' && 'no' || 'yes' }}
@@ -262,7 +249,6 @@ jobs:
- name: Delete AKS cluster
if: always()
timeout-minutes: 15
run: bash tests/integration/kubernetes/gha-run.sh delete-cluster
# Generate jobs for testing CoCo on non-TEE environments with erofs-snapshotter
@@ -312,15 +298,6 @@ jobs:
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: get-kata-tools-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-tools-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-tools-artifacts
- name: Install kata-tools
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
- name: Remove unnecessary directories to free up space
run: |
sudo rm -rf /usr/local/.ghcup
@@ -329,6 +306,7 @@ jobs:
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/lib/jvm
sudo rm -rf /usr/share/swift
sudo rm -rf /usr/local/share/powershell

View File

@@ -102,10 +102,6 @@ jobs:
- name: Run tests
run: bash tests/functional/kata-deploy/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0

View File

@@ -66,6 +66,7 @@ jobs:
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/lib/jvm
sudo rm -rf /usr/share/swift
sudo rm -rf /usr/local/share/powershell
@@ -84,7 +85,3 @@ jobs:
- name: Run tests
run: bash tests/functional/kata-deploy/gha-run.sh run-tests
- name: Report tests
if: always()
run: bash tests/functional/kata-deploy/gha-run.sh report-tests

54
.github/workflows/run-runk-tests.yaml vendored Normal file
View File

@@ -0,0 +1,54 @@
name: CI | Run runk tests
on:
workflow_call:
inputs:
tarball-suffix:
required: false
type: string
commit-hash:
required: false
type: string
target-branch:
required: false
type: string
default: ""
permissions: {}
jobs:
run-runk:
name: run-runk
# Skip runk tests as we have no maintainers. TODO: Decide when to remove altogether
if: false
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: lts
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Install dependencies
run: bash tests/integration/runk/gha-run.sh install-dependencies
env:
GH_TOKEN: ${{ github.token }}
- name: get-kata-tarball
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
path: kata-artifacts
- name: Install kata
run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts
- name: Run runk tests
run: bash tests/integration/runk/gha-run.sh run

View File

@@ -6,21 +6,14 @@ on:
permissions: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
stale:
name: stale
runs-on: ubuntu-22.04
permissions:
actions: write # Needed to manage caches for state persistence across runs
pull-requests: write # Needed to add/remove labels, post comments, or close PRs
steps:
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
with:
stale-pr-message: 'This PR has been opened without activity for 180 days. Please comment on the issue or it will be closed in 7 days.'
stale-pr-message: 'This PR has been opened without with no activity for 180 days. Comment on the issue otherwise it will be closed in 7 days'
days-before-pr-stale: 180
days-before-pr-close: 7
days-before-issue-stale: -1

View File

@@ -28,9 +28,21 @@ jobs:
fail-fast: false
matrix:
instance:
- "ubuntu-24.04-arm"
- "ubuntu-24.04-s390x"
- "ubuntu-22.04-arm"
- "s390x"
- "ubuntu-24.04-ppc64le"
uses: ./.github/workflows/build-checks.yaml
with:
instance: ${{ matrix.instance }}
build-checks-preview:
needs: skipper
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
strategy:
fail-fast: false
matrix:
instance:
- "riscv-builder"
uses: ./.github/workflows/build-checks-preview-riscv64.yaml
with:
instance: ${{ matrix.instance }}

2
.gitignore vendored
View File

@@ -18,5 +18,3 @@ src/tools/log-parser/kata-log-parser
tools/packaging/static-build/agent/install_libseccomp.sh
.envrc
.direnv
**/.DS_Store
site/

View File

@@ -1,140 +0,0 @@
[workspace.package]
authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
edition = "2018"
license = "Apache-2.0"
rust-version = "1.88"
[workspace]
members = [
# Dragonball
"src/dragonball",
"src/dragonball/dbs_acpi",
"src/dragonball/dbs_address_space",
"src/dragonball/dbs_allocator",
"src/dragonball/dbs_arch",
"src/dragonball/dbs_boot",
"src/dragonball/dbs_device",
"src/dragonball/dbs_interrupt",
"src/dragonball/dbs_legacy_devices",
"src/dragonball/dbs_pci",
"src/dragonball/dbs_tdx",
"src/dragonball/dbs_upcall",
"src/dragonball/dbs_utils",
"src/dragonball/dbs_virtio_devices",
# runtime-rs
"src/runtime-rs",
"src/runtime-rs/crates/agent",
"src/runtime-rs/crates/hypervisor",
"src/runtime-rs/crates/persist",
"src/runtime-rs/crates/resource",
"src/runtime-rs/crates/runtimes",
"src/runtime-rs/crates/service",
"src/runtime-rs/crates/shim",
"src/runtime-rs/crates/shim-ctl",
"src/runtime-rs/tests/utils",
]
resolver = "2"
# TODO: Add all excluded crates to root workspace
exclude = [
"src/agent",
"src/tools",
"src/libs",
# kata-deploy binary is standalone and has its own Cargo.toml for now
"tools/packaging/kata-deploy/binary",
# We are cloning and building rust packages under
# "tools/packaging/kata-deploy/local-build/build" folder, which may mislead
# those packages to think they are part of the kata root workspace
"tools/packaging/kata-deploy/local-build/build",
]
[workspace.dependencies]
# Rust-VMM crates
event-manager = "0.2.1"
kvm-bindings = "0.6.0"
kvm-ioctls = "=0.12.1"
linux-loader = "0.8.0"
seccompiler = "0.5.0"
vfio-bindings = "0.3.0"
vfio-ioctls = "0.1.0"
virtio-bindings = "0.1.0"
virtio-queue = "0.7.0"
vm-fdt = "0.2.0"
vm-memory = "0.10.0"
vm-superio = "0.5.0"
vmm-sys-util = "0.11.0"
# Local dependencies from Dragonball Sandbox crates
dragonball = { path = "src/dragonball" }
dbs-acpi = { path = "src/dragonball/dbs_acpi" }
dbs-address-space = { path = "src/dragonball/dbs_address_space" }
dbs-allocator = { path = "src/dragonball/dbs_allocator" }
dbs-arch = { path = "src/dragonball/dbs_arch" }
dbs-boot = { path = "src/dragonball/dbs_boot" }
dbs-device = { path = "src/dragonball/dbs_device" }
dbs-interrupt = { path = "src/dragonball/dbs_interrupt" }
dbs-legacy-devices = { path = "src/dragonball/dbs_legacy_devices" }
dbs-pci = { path = "src/dragonball/dbs_pci" }
dbs-tdx = { path = "src/dragonball/dbs_tdx" }
dbs-upcall = { path = "src/dragonball/dbs_upcall" }
dbs-utils = { path = "src/dragonball/dbs_utils" }
dbs-virtio-devices = { path = "src/dragonball/dbs_virtio_devices" }
# Local dependencies from runtime-rs
agent = { path = "src/runtime-rs/crates/agent" }
hypervisor = { path = "src/runtime-rs/crates/hypervisor" }
persist = { path = "src/runtime-rs/crates/persist" }
resource = { path = "src/runtime-rs/crates/resource" }
runtimes = { path = "src/runtime-rs/crates/runtimes" }
service = { path = "src/runtime-rs/crates/service" }
tests_utils = { path = "src/runtime-rs/tests/utils" }
ch-config = { path = "src/runtime-rs/crates/hypervisor/ch-config" }
common = { path = "src/runtime-rs/crates/runtimes/common" }
linux_container = { path = "src/runtime-rs/crates/runtimes/linux_container" }
virt_container = { path = "src/runtime-rs/crates/runtimes/virt_container" }
wasm_container = { path = "src/runtime-rs/crates/runtimes/wasm_container" }
# Local dependencies from `src/lib`
kata-sys-util = { path = "src/libs/kata-sys-util" }
kata-types = { path = "src/libs/kata-types", features = ["safe-path"] }
logging = { path = "src/libs/logging" }
protocols = { path = "src/libs/protocols", features = ["async"] }
runtime-spec = { path = "src/libs/runtime-spec" }
safe-path = { path = "src/libs/safe-path" }
shim-interface = { path = "src/libs/shim-interface" }
test-utils = { path = "src/libs/test-utils" }
# Outside dependencies
actix-rt = "2.7.0"
anyhow = "1.0"
async-trait = "0.1.48"
containerd-shim = { version = "0.10.0", features = ["async"] }
containerd-shim-protos = { version = "0.10.0", features = ["async"] }
go-flag = "0.1.0"
hyper = "0.14.20"
hyperlocal = "0.8.0"
lazy_static = "1.4"
libc = "0.2"
log = "0.4.14"
netns-rs = "0.1.0"
# Note: nix needs to stay sync'd with libs versions
nix = "0.26.4"
oci-spec = { version = "0.8.1", features = ["runtime"] }
protobuf = "3.7.2"
rand = "0.8.4"
serde = { version = "1.0.145", features = ["derive"] }
serde_json = "1.0.91"
sha2 = "0.10.9"
slog = "2.5.2"
slog-scope = "4.4.0"
strum = { version = "0.24.0", features = ["derive"] }
tempfile = "3.19.1"
thiserror = "1.0"
tokio = "1.46.1"
tracing = "0.1.41"
tracing-opentelemetry = "0.18.0"
ttrpc = "0.8.4"
url = "2.5.4"

View File

@@ -18,6 +18,7 @@ TOOLS =
TOOLS += agent-ctl
TOOLS += kata-ctl
TOOLS += log-parser
TOOLS += runk
TOOLS += trace-forwarder
STANDARD_TARGETS = build check clean install static-checks-build test vendor
@@ -49,14 +50,10 @@ docs-url-alive-check:
build-and-publish-kata-debug:
bash tools/packaging/kata-debug/kata-debug-build-and-upload-payload.sh ${KATA_DEBUG_REGISTRY} ${KATA_DEBUG_TAG}
docs-serve:
docker run --rm -p 8000:8000 -v ./docs:/docs:ro -v ${PWD}/zensical.toml:/zensical.toml:ro zensical/zensical serve --config-file /zensical.toml -a 0.0.0.0:8000
.PHONY: \
all \
kata-tarball \
install-tarball \
default \
static-checks \
docs-url-alive-check \
docs-serve
docs-url-alive-check

View File

@@ -139,6 +139,7 @@ The table below lists the remaining parts of the project:
| [`agent-ctl`](src/tools/agent-ctl) | utility | Tool that provides low-level access for testing the agent. |
| [`kata-ctl`](src/tools/kata-ctl) | utility | Tool that provides advanced commands and debug facilities. |
| [`trace-forwarder`](src/tools/trace-forwarder) | utility | Agent tracing helper. |
| [`runk`](src/tools/runk) | utility | Standard OCI container runtime based on the agent. |
| [`ci`](.github/workflows) | CI | Continuous Integration configuration files and scripts. |
| [`ocp-ci`](ci/openshift-ci/README.md) | CI | Continuous Integration configuration for the OpenShift pipelines. |
| [`katacontainers.io`](https://github.com/kata-containers/www.katacontainers.io) | Source for the [`katacontainers.io`](https://www.katacontainers.io) site. |

View File

@@ -1 +1 @@
3.26.0
3.22.0

View File

@@ -11,10 +11,6 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${script_dir}/../tests/common.bash"
# Path to the ORAS cache helper for downloading tarballs (sourced when needed)
# Use ORAS_CACHE_HELPER env var (set by build.sh in Docker) or fallback to repo path
oras_cache_helper="${ORAS_CACHE_HELPER:-${script_dir}/../tools/packaging/scripts/download-with-oras-cache.sh}"
# The following variables if set on the environment will change the behavior
# of gperf and libseccomp configure scripts, that may lead this script to
# fail. So let's ensure they are unset here.
@@ -48,9 +44,6 @@ fi
gperf_tarball="gperf-${gperf_version}.tar.gz"
gperf_tarball_url="${gperf_url}/${gperf_tarball}"
# Use ORAS cache for gperf downloads (gperf upstream can be unreliable)
USE_ORAS_CACHE="${USE_ORAS_CACHE:-yes}"
# We need to build the libseccomp library from sources to create a static
# library for the musl libc.
# However, ppc64le, riscv64 and s390x have no musl targets in Rust. Hence, we do
@@ -75,23 +68,7 @@ trap finish EXIT
build_and_install_gperf() {
echo "Build and install gperf version ${gperf_version}"
mkdir -p "${gperf_install_dir}"
# Use ORAS cache if available and enabled
if [[ "${USE_ORAS_CACHE}" == "yes" ]] && [[ -f "${oras_cache_helper}" ]]; then
echo "Using ORAS cache for gperf download"
source "${oras_cache_helper}"
local cached_tarball
cached_tarball=$(download_component gperf "$(pwd)")
if [[ -f "${cached_tarball}" ]]; then
gperf_tarball="${cached_tarball}"
else
echo "ORAS cache download failed, falling back to direct download"
curl -sLO "${gperf_tarball_url}"
fi
else
curl -sLO "${gperf_tarball_url}"
fi
curl -sLO "${gperf_tarball_url}"
tar -xf "${gperf_tarball}"
pushd "gperf-${gperf_version}"
# Unset $CC for configure, we will always use native for gperf

View File

@@ -46,12 +46,16 @@ fi
[[ ${SELINUX_PERMISSIVE} == "yes" ]] && oc delete -f "${deployments_dir}/machineconfig_selinux.yaml.in"
# Delete kata-containers
helm uninstall kata-deploy --wait --namespace kube-system
pushd "${katacontainers_repo_dir}/tools/packaging/kata-deploy" || { echo "Failed to push to ${katacontainers_repo_dir}/tools/packaging/kata-deploy"; exit 125; }
oc delete -f kata-deploy/base/kata-deploy.yaml
oc -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
oc apply -f kata-cleanup/base/kata-cleanup.yaml
echo "Wait for all related pods to be gone"
( repeats=1; for _ in $(seq 1 600); do
oc get pods -l name="kubelet-kata-cleanup" --no-headers=true -n kube-system 2>&1 | grep "No resources found" -q && ((repeats++)) || repeats=1
[[ "${repeats}" -gt 5 ]] && echo kata-cleanup finished && break
sleep 1
done) || { echo "There are still some kata-cleanup related pods after 600 iterations"; oc get all -n kube-system; exit 1; }
oc delete -f kata-cleanup/base/kata-cleanup.yaml
oc delete -f kata-rbac/base/kata-rbac.yaml
oc delete -f runtimeclasses/kata-runtimeClasses.yaml

View File

@@ -51,13 +51,13 @@ apply_kata_deploy() {
oc label --overwrite ns kube-system pod-security.kubernetes.io/enforce=privileged pod-security.kubernetes.io/warn=baseline pod-security.kubernetes.io/audit=baseline
local version chart
version='0.0.0-dev'
version=$(curl -sSL https://api.github.com/repos/kata-containers/kata-containers/releases/latest | jq .tag_name | tr -d '"')
chart="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"
# Ensure any potential leftover is cleaned up ... and this secret usually is not in case of previous failures
oc delete secret sh.helm.release.v1.kata-deploy.v1 -n kube-system || true
echo "Installing kata using helm ${chart} ${version} (sha printed in helm output)"
echo "Installing kata using helm ${chart} ${version}"
helm install kata-deploy --wait --namespace kube-system --set "image.reference=${KATA_DEPLOY_IMAGE%%:*},image.tag=${KATA_DEPLOY_IMAGE##*:}" "${chart}" --version "${version}"
}

View File

@@ -157,16 +157,6 @@ if [[ -z "${CAA_IMAGE}" ]]; then
fi
# Get latest PP image
#
# You can list the CI images by:
# az sig image-version list-community --location "eastus" --public-gallery-name "cocopodvm-d0e4f35f-5530-4b9c-8596-112487cdea85" --gallery-image-definition "podvm_image0" --output table
# or the release images by:
# az sig image-version list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --gallery-image-definition "peerpod-podvm-fedora" --output table
# or the release debug images by:
# az sig image-version list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --gallery-image-definition "peerpod-podvm-fedora-debug" --output table
#
# Note there are other flavours of the released images, you can list them by:
# az sig image-definition list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --output table
if [[ -z "${PP_IMAGE_ID}" ]]; then
SUCCESS_TIME=$(curl -s \
-H "Accept: application/vnd.github+json" \

View File

@@ -125,7 +125,7 @@ If you want to enable SELinux in Permissive mode, add `enforcing=0` to the kerne
Enable full debug as follows:
```bash
$ sudo sed -i -E 's/^(\s*enable_debug\s*=\s*)false/\1true/' /etc/kata-containers/configuration.toml
$ sudo sed -i -e 's/^# *\(enable_debug\).*=.*$/\1 = true/g' /etc/kata-containers/configuration.toml
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug initcall_debug"/g' /etc/kata-containers/configuration.toml
```

View File

@@ -83,7 +83,3 @@ Documents that help to understand and contribute to Kata Containers.
If you have a suggestion for how we can improve the
[website](https://katacontainers.io), please raise an issue (or a PR) on
[the repository that holds the source for the website](https://github.com/OpenStackweb/kata-netlify-refresh).
### Toolchain Guidance
* [Toolchain Guidance](./Toochain-Guidance.md)

View File

@@ -1,39 +0,0 @@
# Toolchains
As a community we want to strike a balance between having up-to-date toolchains, to receive the
latest security fixes and to be able to benefit from new features and packages, whilst not being
too bleeding edge and disrupting downstream and other consumers. As a result we have the following
guidelines (note, not hard rules) for our go and rust toolchains that we are attempting to try out:
## Go toolchain
Go is released [every six months](https://go.dev/wiki/Go-Release-Cycle) with support for the
[last two major release versions](https://go.dev/doc/devel/release#policy). We always want to
ensure that we are on a supported version so we receive security fixes. To try and make
things easier for some of our users, we aim to be using the older of the two supported major
versions, unless there is a compelling reason to adopt the newer version.
In practice this means that we bump our major version of the go toolchain every six months to
version (1.x-1) in response to a new version (1.x) coming out, which makes our current version
(1.x-2) no longer supported. We will bump the minor version whenever required to satisfy
dependency updates, or security fixes.
Our go toolchain version is recorded in [`versions.yaml`](../versions.yaml) under
`.languages.golang.version` and should match with the version in our `go.mod` files.
## Rust toolchain
Rust has a [six week](https://doc.rust-lang.org/book/appendix-05-editions.html#:~:text=The%20Rust%20language%20and%20compiler,these%20tiny%20changes%20add%20up.)
release cycle and they only support the latest stable release, so if we wanted to remain on a
supported release we would only ever build with the latest stable and bump every 6 weeks.
However feedback from our community has indicated that this is a challenge as downstream consumers
often want to get rust from their distro, or downstream fork and these struggle to keep up with
the six week release schedule. As a result the community has agreed to try out a policy of
"stable-2", where we aim to build with a rust version that is two versions behind the latest stable
version.
In practice this should mean that we bump our rust toolchain every six weeks, to version
1.x-2 when 1.x is released as stable and we should be picking up the latest point release
of that version, if there were any.
The rust-toolchain that we are using is recorded in [`rust-toolchain.toml`](../rust-toolchain.toml).

View File

@@ -198,7 +198,7 @@ fn join_params_with_dash(str: &str, num: i32) -> Result<String> {
return Err("number must be positive");
}
let result = format!("{str}-{num}");
let result = format!("{}-{}", str, num);
Ok(result)
}
@@ -253,13 +253,13 @@ mod tests {
// Run the tests
for (i, d) in tests.iter().enumerate() {
// Create a string containing details of the test
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
// Call the function under test
let result = join_params_with_dash(d.str, d.num);
// Update the test details string with the results of the call
let msg = format!("{msg}, result: {result:?}");
let msg = format!("{}, result: {:?}", msg, result);
// Perform the checks
if d.result.is_ok() {
@@ -267,8 +267,8 @@ mod tests {
continue;
}
let expected_error = format!("{d.result.as_ref().unwrap_err()}");
let actual_error = format!("{result.unwrap_err()}");
let expected_error = format!("{}", d.result.as_ref().unwrap_err());
let actual_error = format!("{}", result.unwrap_err());
assert!(actual_error == expected_error, msg);
}
}

View File

@@ -1,9 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
<!-- Dark background matching the site -->
<rect width="32" height="32" rx="4" fill="#1a1a2e"/>
<!-- Kata logo scaled and centered -->
<g transform="translate(-27, -2) scale(0.75)">
<path d="M70.925 25.22L58.572 37.523 46.27 25.22l2.192-2.192 10.11 10.11 10.11-10.11zm-6.575-.2l-3.188-3.188 3.188-3.188 3.188 3.188zm-4.93-2.54l3.736 3.736-3.736 3.736zm-1.694 7.422l-8.07-8.07 8.07-8.07zm1.694-16.14l3.686 3.686-3.686 3.686zm-13.15 4.682L58.572 6.143l12.353 12.303-2.192 2.192-10.16-10.11-10.11 10.11zm26.997 0L58.572 3.752 43.878 18.446l3.387 3.387-3.387 3.387 14.694 14.694L73.266 25.22l-3.337-3.387z" fill="#f15b3e"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 710 B

View File

@@ -51,7 +51,6 @@ containers started after the VM has been launched.
Users can check to see if the container uses the `devicemapper` block
device as its rootfs by calling `mount(8)` within the container. If
the `devicemapper` block device is used, the root filesystem (`/`)
will be mounted from `/dev/vda`. Users can enable direct mounting of
the underlying block device by setting the runtime
[configuration](README.md#configuration) flag `disable_block_device_use` to
`false`.
will be mounted from `/dev/vda`. Users can disable direct mounting of
the underlying block device through the runtime
[configuration](README.md#configuration).

View File

@@ -256,7 +256,7 @@ spec:
values:
- NODE_NAME
volumes:
- name: trusted-image-storage
- name: trusted-storage
persistentVolumeClaim:
claimName: trusted-pvc
containers:

View File

@@ -50,7 +50,7 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor |
| `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` |
| `io.katacontainers.config.hypervisor.default_vcpus` | float32| the default vCPUs assigned for a VM by the hypervisor |
| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disable hotplugging host block devices to guest VMs for container rootfs |
| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disallow a block device from being used |
| `io.katacontainers.config.hypervisor.disable_image_nvdimm` | `boolean` | specify if a `nvdimm` device should be used as rootfs for the guest (QEMU) |
| `io.katacontainers.config.hypervisor.disable_vhost_net` | `boolean` | specify if `vhost-net` is not available on the host |
| `io.katacontainers.config.hypervisor.enable_hugepages` | `boolean` | if the memory should be `pre-allocated` from huge pages |
@@ -97,8 +97,6 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.use_legacy_serial` | `boolean` | uses legacy serial device for guest's console (QEMU) |
| `io.katacontainers.config.hypervisor.default_gpus` | uint32 | the minimum number of GPUs required for the VM. Only used by remote hypervisor to help with instance selection |
| `io.katacontainers.config.hypervisor.default_gpu_model` | string | the GPU model required for the VM. Only used by remote hypervisor to help with instance selection |
| `io.katacontainers.config.hypervisor.block_device_num_queues` | `usize` | The number of queues to use for block devices (runtime-rs only) |
| `io.katacontainers.config.hypervisor.block_device_queue_size` | uint32 | The size of the of the queue to use for block devices (runtime-rs only) |
## Container Options
| Key | Value Type | Comments |

View File

@@ -103,8 +103,48 @@ $ minikube ssh "grep -c -E 'vmx|svm' /proc/cpuinfo"
## Installing Kata Containers
You can now install the Kata Containers runtime components
[following the official instructions](../../tools/packaging/kata-deploy/helm-chart).
You can now install the Kata Containers runtime components. You will need a local copy of some Kata
Containers components to help with this, and then use `kubectl` on the host (that Minikube has already
configured for you) to deploy them:
```sh
$ git clone https://github.com/kata-containers/kata-containers.git
$ cd kata-containers/tools/packaging/kata-deploy
$ kubectl apply -f kata-rbac/base/kata-rbac.yaml
$ kubectl apply -f kata-deploy/base/kata-deploy.yaml
```
This installs the Kata Containers components into `/opt/kata` inside the Minikube node. It can take
a few minutes for the operation to complete. You can check the installation has worked by checking
the status of the `kata-deploy` pod, which will be executing
[this script](../../tools/packaging/kata-deploy/scripts/kata-deploy.sh),
and will be executing a `sleep infinity` once it has successfully completed its work.
You can accomplish this by running the following:
```sh
$ podname=$(kubectl -n kube-system get pods -o=name | grep -F kata-deploy | sed 's?pod/??')
$ kubectl -n kube-system exec ${podname} -- ps -ef | grep -F infinity
```
> *NOTE:* This check only works for single node clusters, which is the default for Minikube.
> For multi-node clusters, the check would need to be adapted to check `kata-deploy` had
> completed on all nodes.
## Enabling Kata Containers
Now you have installed the Kata Containers components in the Minikube node. Next, you need to configure
Kubernetes `RuntimeClass` to know when to use Kata Containers to run a pod.
### Register the runtime
Now register the `kata qemu` runtime with that class. This should result in no errors:
```sh
$ cd kata-containers/tools/packaging/kata-deploy/runtimeclasses
$ kubectl apply -f kata-runtimeClasses.yaml
```
The Kata Containers installation process should be complete and enabled in the Minikube cluster.
## Testing Kata Containers

View File

@@ -48,7 +48,7 @@ $ make test
- Run a test in the current package in verbose mode:
```bash
# Example
# Example
$ test="config::tests::test_get_log_level"
$ cargo test "$test" -vv -- --exact --nocapture
@@ -223,7 +223,7 @@ What's wrong with this function?
```rust
fn foo(config: &Config, path_prefix: String, container_id: String, pid: String) -> Result<()> {
let mut full_path = format!("{path_prefix}/{container_id}");
let mut full_path = format!("{}/{}", path_prefix, container_id);
let _ = remove_recursively(&mut full_path);

View File

@@ -3,4 +3,4 @@
Kata Containers supports passing certain GPUs from the host into the container. Select the GPU vendor for detailed information:
- [Intel Discrete GPUs](Intel-Discrete-GPU-passthrough-and-Kata.md)/[Intel Integrated GPUs](Intel-GPU-passthrough-and-Kata.md)
- [NVIDIA GPUs](NVIDIA-GPU-passthrough-and-Kata.md) and [Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers](NVIDIA-GPU-passthrough-and-Kata-QEMU.md)
- [NVIDIA](NVIDIA-GPU-passthrough-and-Kata.md)

View File

@@ -1,569 +0,0 @@
# Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers
This page provides:
1. A description of the components involved when running GPU workloads with
Kata Containers using the NVIDIA TEE and non-TEE GPU runtime classes.
1. An explanation of the orchestration flow on a Kubernetes node for this
scenario.
1. A deployment guide enabling to utilize these runtime classes.
The goal is to educate readers familiar with Kubernetes and Kata Containers
on NVIDIA's reference implementation which is reflected in Kata CI's build
and test framework. With this, we aim to enable readers to leverage this
stack, or to use the principles behind this stack in order to run GPU
workloads on their variant of the Kata Containers stack.
We assume the reader is familiar with Kubernetes, Kata Containers, and
Confidential Containers.
> **Note:**
>
> The current supported mode for enabling GPU workloads in the TEE scenario
> is single GPU passthrough (one GPU per pod) on AMD64 platforms (AMD SEV-SNP
> being the only supported TEE scenario so far with support for Intel TDX being
> on the way).
## Component Overview
Before providing deployment guidance, we describe the components involved to
support running GPU workloads. We start from a top to bottom perspective
from the NVIDIA GPU operator via the Kata runtime to the components within
the NVIDIA GPU Utility Virtual Machine (UVM) root filesystem.
### NVIDIA GPU Operator
A central component is the
[NVIDIA GPU operator](https://github.com/NVIDIA/gpu-operator) which can be
deployed onto your cluster as a helm chart. Installing the GPU operator
delivers various operands on your nodes in the form of Kubernetes DaemonSets.
These operands are vital to support the flow of orchestrating pod manifests
using NVIDIA GPU runtime classes with GPU passthrough on your nodes. Without
getting into the details, the most important operands and their
responsibilities are:
- **nvidia-vfio-manager:** Binding discovered NVIDIA GPUs to the `vfio-pci`
driver for VFIO passthrough.
- **nvidia-cc-manager:** Transitioning GPUs into confidential computing (CC)
and non-CC mode (see the
[NVIDIA/k8s-cc-manager](https://github.com/NVIDIA/k8s-cc-manager)
repository).
- **nvidia-kata-manager:** Creating host-side CDI specifications for GPU
passthrough, resulting in the file `/var/run/cdi/nvidia.yaml`, containing
`kind: nvidia.com/pgpu` (see the
[NVIDIA/k8s-kata-manager](https://github.com/NVIDIA/k8s-kata-manager)
repository).
- **nvidia-sandbox-device-plugin** (see the
[NVIDIA/sandbox-device-plugin](https://github.com/NVIDIA/sandbox-device-plugin)
repository):
- Allocating GPUs during pod deployment.
- Discovering NVIDIA GPUs, their capabilities, and advertising these to
the Kubernetes control plane (allocatable resources as type
`nvidia.com/pgpu` resources will appear for the node and GPU Device IDs
will be registered with Kubelet). These GPUs can thus be allocated as
container resources in your pod manifests. See below GPU operator
deployment instructions for the use of the key `pgpu`, controlled via a
variable.
To summarize, the GPU operator manages the GPUs on each node, allowing for
simple orchestration of pod manifests using Kata Containers. Once the cluster
with GPU operator and Kata bits is up and running, the end user can schedule
Kata NVIDIA GPU workloads, using resource limits and the
`kata-qemu-nvidia-gpu` or `kata-qemu-nvidia-gpu-snp` runtime classes, for
example:
```yaml
apiVersion: v1
kind: Pod
...
spec:
...
runtimeClassName: kata-qemu-nvidia-gpu-snp
...
resources:
limits:
"nvidia.com/pgpu": 1
...
```
When this happens, the Kubelet calls into the sandbox device plugin to
allocate a GPU. The sandbox device plugin returns `DeviceSpec` entries to the
Kubelet for the allocated GPU. The Kubelet uses internal device IDs for
tracking of allocated GPUs and includes the device specifications in the CRI
request when scheduling the pod through containerd. Containerd processes the
device specifications and includes the device configuration in the OCI
runtime spec used to invoke the Kata runtime during the create container
request.
### Kata runtime
The Kata runtime for the NVIDIA GPU handlers is configured to cold-plug VFIO
devices (`cold_plug_vfio` is set to `root-port` while
`hot_plug_vfio` is set to `no-port`). Cold-plug is by design the only
supported mode for NVIDIA GPU passthrough of the NVIDIA reference stack.
With cold-plug, the Kata runtime attaches the GPU at VM launch time, when
creating the pod sandbox. This happens *before* the create container request,
i.e., before the Kata runtime receives the OCI spec including device
configurations from containerd. Thus, a mechanism to acquire the device
information is required. This is done by the runtime calling the
`coldPlugDevices()` function during sandbox creation. In this function,
the runtime queries Kubelet's Pod Resources API to discover allocated GPU
device IDs (e.g., `nvidia.com/pgpu = [vfio0]`). The runtime formats these as
CDI device identifiers and injects them into the OCI spec using
`config.InjectCDIDevices()`. The runtime then consults the host CDI
specifications and determines the device path the GPU is backed by
(e.g., `/dev/vfio/devices/vfio0`). Finally, the runtime resolves the device's
PCI BDF (e.g., `0000:21:00`) and cold-plugs the GPU by launching QEMU with
relevant parameters for device passthrough (e.g.,
`-device vfio-pci,host=0000:21:00.0,x-pci-vendor-id=0x10de,x-pci-device-id=0x2321,bus=rp0,iommufd=iommufdvfio-faf829f2ea7aec330`).
The runtime also creates *inner runtime* CDI annotations
which map host VFIO devices to guest GPU devices. These are annotations
intended for the kata-agent, here referred to as the inner runtime (inside the
UVM), to properly handle GPU passthrough into containers. These annotations
serve as metadata providing the kata-agent with the information needed to
attach the passthrough devices to the correct container.
The annotations are key-value pairs consisting of `cdi.k8s.io/vfio<num>` keys
(derived from the host VFIO device path, e.g., `/dev/vfio/devices/vfio1`) and
`nvidia.com/gpu=<index>` values (referencing the corresponding device in the
guest CDI spec). These annotations are injected by the runtime during container
creation via the `annotateContainerWithVFIOMetadata` function (see
`container.go`).
We continue describing the orchestration flow inside the UVM in the next
section.
### Kata NVIDIA GPU UVM
#### UVM composition
To better understand the orchestration flow inside the NVIDIA GPU UVM, we
first look at the components its root filesystem contains. Should you decide
to use your own root filesystem to enable NVIDIA GPU scenarios, this should
give you a good idea on what ingredients you need.
From a file system perspective, the UVM is composed of two files: a standard
Kata kernel image and the NVIDIA GPU rootfs in initrd or disk image format.
These two files are being utilized for the QEMU launch command when the UVM
is created.
The two most important pieces in Kata Container's build recipes for the
NVIDIA GPU root filesystem are the `nvidia_chroot.sh` and `nvidia_rootfs.sh`
files. The build follows a two-stage process. In the first stage, a
full-fledged Ubuntu-based root filesystem is composed within a chroot
environment. In this stage, NVIDIA kernel modules are built and signed
against the current Kata kernel and relevant NVIDIA packages are installed.
In the second stage, a chiseled build is performed: Only relevant contents
from the first stage are copied and compressed into a new distro-less root
filesystem folder. Kata's build infrastructure then turns this root
filesystem into the NVIDIA initrd and image files.
The resulting root filesystem contains the following software components:
- NVRC - the
[NVIDIA Runtime Container init system](https://github.com/NVIDIA/nvrc/tree/main)
- NVIDIA drivers (kernel modules)
- NVIDIA user space driver libraries
- NVIDIA user space tools
- kata-agent
- confidential computing guest components: the attestation agent,
confidential data hub and api-server-rest binaries
- CRI-O pause container (for the guest image-pull method)
- BusyBox utilities (provides a base set of libraries and binaries, and a
linker)
- some supporting files, such as file containing a list of supported GPU
device IDs which NVRC reads
#### UVM orchestration flow
When the Kata runtime asks QEMU to launch the VM, the UVM's Linux kernel
boots and mounts the root filesystem. After this, NVRC starts as the initial
process.
NVRC scans for NVIDIA GPUs on the PCI bus, loads the
NVIDIA kernel modules, waits for driver initialization, creates the device nodes,
and initializes the GPU hardware (using the `nvidia-smi` binary). NVRC also
creates the guest-side CDI specification file (using the
`nvidia-ctk cdi generate` command). This file specifies devices of
`kind: nvidia.com/gpu`, i.e., GPUs appearing to be physical GPUs on regular
bare metal systems. The guest CDI specification also contains `containerEdits`
for each device, specifying device nodes (e.g., `/dev/nvidia0`,
`/dev/nvidiactl`), library mounts, and environment variables to be mounted
into the container which receives the passthrough GPU.
Then, NVRC forks the Kata agent while continuing to run as the
init system. This allows NVRC to handle ongoing GPU management tasks
while kata-agent focuses on container lifecycle management. See the
[NVRC sources](https://github.com/NVIDIA/nvrc/blob/main/src/main.rs) for an
overview on the steps carried out by NVRC.
When the Kata runtime sends the create container request, the Kata agent
parses the inner runtime CDI annotation. For example, for the inner runtime
annotation `"cdi.k8s.io/vfio1": "nvidia.com/gpu=0"`, the agent looks up device
`0` in the guest CDI specification with `kind: nvidia.com/gpu`.
The Kata agent also reads the guest CDI specification's `containerEdits`
section and injects relevant contents into the OCI spec of the respective
container. The kata agent then creates and starts a `rustjail` container
based on the final OCI spec. The container now has relevant device nodes,
binaries and low-level libraries available, and can start a user application
linked against the CUDA runtime API (e.g., `libcudart.so` and other
libraries). When used, the CUDA runtime API in turn calls the CUDA driver
API and kernel drivers, interacting with the pass-through GPU device.
An additional step is exercised in our CI samples: when using images from an
authenticated registry, the guest-pull mechanism triggers attestation using
trustee's Key Broker Service (KBS) for secure release of the NGC API
authentication key used to access the NVCR container registry. As part of
this, the attestation agent exercises composite attestation and transitions
the GPU into `Ready` state (without this, the GPU has to explicitly be
transitioned into `Ready` state by passing the `nvrc.smi.srs=1` kernel
parameter via the shim config, causing NVRC to transition the GPU into the
`Ready` state).
## Deployment Guidance
This guidance assumes you use bare-metal machines with proper support for
Kata's non-TEE and TEE GPU workload deployment scenarios for your Kubernetes
nodes. We provide guidance based on the upstream Kata CI procedures for the
NVIDIA GPU CI validation jobs. Note that, this setup:
- uses the guest image pull method to pull container image layers
- uses the genpolicy tool to attach Kata agent security policies to the pod
manifest
- has dedicated (composite) attestation tests, a CUDA vectorAdd test, and a
NIM/RA test sample with secure API key release
A similar deployment guide and scenario description can be found in NVIDIA resources
under
[Early Access: NVIDIA GPU Operator with Confidential Containers based on Kata](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/confidential-containers.html).
### Requirements
The requirements for the TEE scenario are:
- Ubuntu 25.10 as host OS
- CPU with AMD SEV-SNP support with proper BIOS/UEFI version and settings
- CC-capable Hopper/Blackwell GPU with proper VBIOS version.
BIOS and VBIOS configuration is out of scope for this guide. Other resources,
such as the documentation found on the
[NVIDIA Trusted Computing Solutions](https://docs.nvidia.com/nvtrust/index.html)
page and the above linked NVIDIA documentation, provide guidance on
selecting proper hardware and on properly configuring its firmware and OS.
### Installation
#### Containerd and Kubernetes
First, set up your Kubernetes cluster. For instance, in Kata CI, our NVIDIA
jobs use a single-node vanilla Kubernetes cluster with a 2.x containerd
version and Kata's current supported Kubernetes version. We set this cluster
up using the `deploy_k8s` function from `tests/integration/kubernetes/gha-run.sh`
as follows:
```bash
$ export KUBERNETES="vanilla"
$ export CONTAINER_ENGINE="containerd"
$ export CONTAINER_ENGINE_VERSION="v2.1"
$ source tests/gha-run-k8s-common.sh
$ deploy_k8s
```
> **Note:**
>
> We recommend to configure your Kubelet with a higher
> `runtimeRequestTimeout` timeout value than the two minute default timeout.
> Using the guest-pull mechanism, pulling large images may take a significant
> amount of time and may delay container start, possibly leading your Kubelet
> to de-allocate your pod before it transitions from the *container created*
> to the *container running* state.
> **Note:**
>
> The NVIDIA GPU runtime classes use VFIO cold-plug which, as
> described above, requires the Kata runtime to query Kubelet's Pod Resources
> API to discover allocated GPU devices during sandbox creation. For
> Kubernetes versions **older than 1.34**, you must explicitly enable the
> `KubeletPodResourcesGet` feature gate in your Kubelet configuration. For
> Kubernetes 1.34 and later, this feature is enabled by default.
#### GPU Operator
Assuming you have the helm tools installed, deploy the latest version of the
GPU Operator as a helm chart (minimum version: `v25.10.0`):
```bash
$ helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && helm repo update
$ helm install --wait --generate-name \
-n gpu-operator --create-namespace \
nvidia/gpu-operator \
--set sandboxWorkloads.enabled=true \
--set sandboxWorkloads.defaultWorkload=vm-passthrough \
--set kataManager.enabled=true \
--set kataManager.config.runtimeClasses=null \
--set kataManager.repository=nvcr.io/nvidia/cloud-native \
--set kataManager.image=k8s-kata-manager \
--set kataManager.version=v0.2.4 \
--set ccManager.enabled=true \
--set ccManager.defaultMode=on \
--set ccManager.repository=nvcr.io/nvidia/cloud-native \
--set ccManager.image=k8s-cc-manager \
--set ccManager.version=v0.2.0 \
--set sandboxDevicePlugin.repository=nvcr.io/nvidia/cloud-native \
--set sandboxDevicePlugin.image=nvidia-sandbox-device-plugin \
--set sandboxDevicePlugin.version=v0.0.1 \
--set 'sandboxDevicePlugin.env[0].name=P_GPU_ALIAS' \
--set 'sandboxDevicePlugin.env[0].value=pgpu' \
--set nfd.enabled=true \
--set nfd.nodefeaturerules=true
```
> **Note:**
>
> For heterogeneous clusters with different GPU types, you can omit
> the `P_GPU_ALIAS` environment variable lines. This will cause the sandbox
> device plugin to create GPU model-specific resource types (e.g.,
> `nvidia.com/GH100_H100L_94GB`) instead of the generic `nvidia.com/pgpu`,
> which in turn can be used by pods through respective resource limits.
> For simplicity, this guide uses the generic alias.
> **Note:**
>
> Using `--set sandboxWorkloads.defaultWorkload=vm-passthrough` causes all
> your nodes to be labeled for GPU VM passthrough. Remove this parameter if
> you intend to only use selected nodes for this scenario, and label these
> nodes by hand, using:
> `kubectl label node <node-name> nvidia.com/gpu.workload.config=vm-passthrough`.
#### Kata Containers
Install the latest Kata Containers helm chart, similar to
[existing documentation](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/README.md)
(minimum version: `3.24.0`).
```bash
$ export VERSION=$(curl -sSL https://api.github.com/repos/kata-containers/kata-containers/releases/latest | jq .tag_name | tr -d '"')
$ export CHART="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"
$ helm install kata-deploy \
--namespace kata-system \
--create-namespace \
-f "https://raw.githubusercontent.com/kata-containers/kata-containers/refs/tags/${VERSION}/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-nvidia-gpu.values.yaml" \
--set nfd.enabled=false \
--set shims.qemu-nvidia-gpu-tdx.enabled=false \
--wait --timeout 10m --atomic \
"${CHART}" --version "${VERSION}"
```
#### Trustee's KBS for remote attestation
For our Kata CI runners we use Trustee's KBS for composite attestation for
secure key release, for instance, for test scenarios which use authenticated
container images. In such scenarios, the credentials to access the
authenticated container registry are only released to the confidential guest
after successful attestation. Please see the section below for more
information about this.
```bash
$ export NVIDIA_VERIFIER_MODE="remote"
$ export KBS_INGRESS="nodeport"
$ bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
$ bash tests/integration/kubernetes/gha-run.sh install-kbs-client
```
Please note, that Trustee can also be deployed via any other upstream
mechanism as documented by the
[confidential-containers repository](https://github.com/confidential-containers/trustee).
For our architecture it is important to set up KBS in the remote verifier
mode which requires entering a licensing agreement with NVIDIA, see the
[notes in confidential-containers repository](https://github.com/confidential-containers/trustee/blob/main/deps/verifier/src/nvidia/README.md).
### Cluster validation and preparation
If you did not use the `sandboxWorkloads.defaultWorkload=vm-passthrough`
parameter during GPU operator deployment, label your nodes for GPU VM
passthrough, for the example of using all nodes for GPU passthrough, run:
```bash
$ kubectl label nodes --all nvidia.com/gpu.workload.config=vm-passthrough --overwrite
```
Check if the `nvidia-cc-manager` pod is running if you intend to run GPU TEE
scenarios. If not, you need to manually label the node as CC capable. Current
GPU Operator node feature rules do not yet recognize all CC capable GPU PCI
IDs. Run the following command:
```bash
$ kubectl label nodes --all nvidia.com/cc.capable=true
```
After this, assure the `nvidia-cc-manager` pod is running. With the suggested
parameters for GPU Operator deployment, the `nvidia-cc-manager` will
automatically transition the GPU into CC mode.
After deployment, you can transition your node(s) to the desired CC state,
using either the `on` or `off` value, depending on your scenario. For the
non-CC scenario, transition to the `off` state via:
`kubectl label nodes --all nvidia.com/cc.mode=off` and wait until all pods
are back running. When an actual change is exercised, various GPU operator
operands will be restarted.
Ensure all pods are running:
```bash
$ kubectl get pods -A
```
On your node(s), ensure for correct driver binding. Your GPU device should be
bound to the VFIO driver, i.e., showing `Kernel driver in use: vfio-pci`
when running:
```bash
$ lspci -nnk -d 10de:
```
### Run the CUDA vectorAdd sample
Create the following file:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: cuda-vectoradd-kata
namespace: default
annotations:
io.katacontainers.config.hypervisor.kernel_params: "nvrc.smi.srs=1"
spec:
runtimeClassName: ${GPU_RUNTIME_CLASS_NAME}
restartPolicy: Never
containers:
- name: cuda-vectoradd
image: "nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0-ubuntu22.04"
resources:
limits:
nvidia.com/pgpu: "1"
memory: 16Gi
```
Depending on your scenario and on the CC state, export your desired runtime
class name define the environment variable:
```bash
$ export GPU_RUNTIME_CLASS_NAME="kata-qemu-nvidia-gpu-snp"
```
Then, deploy the sample Kubernetes pod manifest and observe the pod logs:
```bash
$ envsubst < ./cuda-vectoradd-kata.yaml.in | kubectl apply -f -
$ kubectl wait --for=condition=Ready pod/cuda-vectoradd-kata --timeout=60s
$ kubectl logs -n default cuda-vectoradd-kata
```
Expect the following output:
```
[Vector addition of 50000 elements]
Copy input data from the host memory to the CUDA device
CUDA kernel launch with 196 blocks of 256 threads
Copy output data from the CUDA device to the host memory
Test PASSED
Done
```
To stop the pod, run: `kubectl delete pod cuda-vectoradd-kata`.
### Next steps
#### Transition between CC and non-CC mode
Use the previously described node labeling approach to transition between
the CC and non-CC mode. In case of the non-CC mode, you can use the
`kata-qemu-nvidia-gpu` value for the `GPU_RUNTIME_CLASS_NAME` runtime class
variable in the above CUDA vectorAdd sample. The `kata-qemu-nvidia-gpu-snp`
runtime class will **NOT** work in this mode - and vice versa.
#### Run Kata CI tests locally
Upstream Kata CI runs the CUDA vectorAdd test, a composite attestation test,
and a basic NIM/RAG deployment. Running CI tests for the TEE GPU scenario
requires KBS to be deployed (except for the CUDA vectorAdd test). The best
place to get started running these tests locally is to look into our
[NVIDIA CI workflow manifest](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml)
and into the underling
[run_kubernetes_nv_tests.sh](https://github.com/kata-containers/kata-containers/blob/main/tests/integration/kubernetes/run_kubernetes_nv_tests.sh)
script. For example, to run the CUDA vectorAdd scenario against the TEE GPU
runtime class use the following commands:
```bash
# create the kata runtime class the test framework uses
$ export KATA_HYPERVISOR=qemu-nvidia-gpu-snp
$ kubectl delete runtimeclass kata --ignore-not-found
$ kubectl get runtimeclass "kata-${KATA_HYPERVISOR}" -o json | \
jq '.metadata.name = "kata" | del(.metadata.uid, .metadata.resourceVersion, .metadata.creationTimestamp)' | \
kubectl apply -f -
$ cd tests/integration/kubernetes
$ K8S_TEST_NV="k8s-nvidia-cuda.bats" ./gha-run.sh run-nv-tests
```
> **Note:**
>
> The other scenarios require an NGC API key to run, i.e., to export the
> `NGC_API_KEY` variable with a valid NGC API key.
#### Deploy pods using attestation
Attestation is a fundamental piece of the confidential containers solution.
In our upstream CI we use attestation at the example of leveraging the
authenticated container image pull mechanism where container images reside
in the authenticated NVCR registry (`k8s-nvidia-nim.bats`), and for
requesting secrets from KBS (`k8s-confidential-attestation.bats`). KBS will
release the image pull secret to a confidential guest. To get the
authentication credentials from inside the guest, KBS must already be
deployed and configured. In our CI samples, we configure KBS with the guest
image pull secret, a resource policy, and launch the pod with certain kernel
command line parameters:
`"agent.image_registry_auth=kbs:///default/credentials/nvcr agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"`.
The `agent.aa_kbc_params` option is a general configuration for attestation.
For your use case, you need to set the IP address and port under which KBS
is reachable through the `CC_KBS_ADDR` variable (see our CI sample). This
tells the guest how to reach KBS. Something like this must be set whenever
attestation is used, but on its own this parameter does not trigger
attestation. The `agent.image_registry_auth` option tells the guest to ask
for a resource from KBS and use it as the authentication configuration. When
this is set, the guest will request this resource at boot (and trigger
attestation) regardless of which image is being pulled.
To deploy your own pods using authenticated container images, or secure key
release for attestation, follow steps similar to our mentioned CI samples.
#### Deploy pods with Kata agent security policies
With GPU passthrough being supported by the
[genpolicy tool](https://github.com/kata-containers/kata-containers/tree/main/src/tools/genpolicy),
you can use the tool to create a Kata agent security policy. Our CI deploys
all sample pod manifests with a Kata agent security policy.
#### Deploy pods using your own containers and manifests
You can author pod manifests leveraging your own containers, for instance,
containers built using the CUDA container toolkit. We recommend to start
with a CUDA base container.
The GPU is transitioned into the `Ready` state via attestation, for instance,
when pulling authenticated images. If your deployment scenario does not use
attestation, please refer back to the CUDA vectorAdd pod manifest. In this
manifest, we ensure that NVRC sets the GPU to `Ready` state by adding the
following annotation in the manifest:
`io.katacontainers.config.hypervisor.kernel_params: "nvrc.smi.srs=1"`
> **Notes:**
>
> - musl-based container images (e.g., using Alpine), or distro-less
> containers are not supported.
> - for the TEE scenario, only single-GPU passthrough per pod is supported,
> so your pod resource limit must be: `nvidia.com/pgpu: "1"` (on a system
> with multiple GPUs, you can thus pass through one GPU per pod).

View File

@@ -1,25 +1,10 @@
# Using NVIDIA GPU device with Kata Containers
This page gives an overview on the different modes in which GPUs can be passed
to a Kata Containers container, provides host system requirements, explains how
Kata Containers guest components can be built to support the NVIDIA GPU
scenario, and gives practical usage examples using `ctr`.
Please see the guide
[Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers](NVIDIA-GPU-passthrough-and-Kata-QEMU.md)
for a documentation of an end-to-end reference implementation of a Kata
Containers stack for GPU passthrough using QEMU, the go-based Kata Runtime,
and an NVIDIA-specific root filesystem. This reference implementation is built
and validated in Kata's CI, and it can be used to test GPU workloads with Kata
components and Kubernetes out of the box.
## Comparison between Passthrough and vGPU Modes
An NVIDIA GPU device can be passed to a Kata Containers container using GPU
passthrough (NVIDIA GPU passthrough mode) as well as GPU mediated passthrough
passthrough (NVIDIA GPU pass-through mode) as well as GPU mediated passthrough
(NVIDIA `vGPU` mode).
NVIDIA GPU passthrough mode, an entire physical GPU is directly assigned to one
NVIDIA GPU pass-through mode, an entire physical GPU is directly assigned to one
VM, bypassing the NVIDIA Virtual GPU Manager. In this mode of operation, the GPU
is accessed exclusively by the NVIDIA driver running in the VM to which it is
assigned. The GPU is not shared among VMs.
@@ -35,20 +20,18 @@ with [MIG-slices](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
| Technology | Description | Behavior | Detail |
| --- | --- | --- | --- |
| NVIDIA GPU passthrough mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
| NVIDIA GPU pass-through mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
| NVIDIA vGPU time-sliced | GPU time-sliced | Physical GPU time-sliced for multiple VMs | Mediated passthrough |
| NVIDIA vGPU MIG-backed | GPU with MIG-slices | Physical GPU MIG-sliced for multiple VMs | Mediated passthrough |
## Host Requirements
## Hardware Requirements
### Hardware
NVIDIA GPUs recommended for virtualization:
NVIDIA GPUs Recommended for Virtualization:
- NVIDIA Tesla (T4, M10, P6, V100 or newer)
- NVIDIA Quadro RTX 6000/8000
### Firmware
## Host BIOS Requirements
Some hardware requires a larger PCI BARs window, for example, NVIDIA Tesla P100,
K40m
@@ -72,7 +55,9 @@ Some hardware vendors use a different name in BIOS, such as:
If one is using a GPU based on the Ampere architecture and later additionally
SR-IOV needs to be enabled for the `vGPU` use-case.
### Kernel
The following steps outline the workflow for using an NVIDIA GPU with Kata.
## Host Kernel Requirements
The following configurations need to be enabled on your host kernel:
@@ -85,13 +70,7 @@ The following configurations need to be enabled on your host kernel:
Your host kernel needs to be booted with `intel_iommu=on` on the kernel command
line.
## Build the Kata Components
This section explains how to build an environment with Kata Containers bits
supporting the GPU scenario. We first deploy and configure the regular Kata
components, then describe how to build the guest kernel and root filesystem.
### Install and configure Kata Containers
## Install and configure Kata Containers
To use non-large BARs devices (for example, NVIDIA Tesla T4), you need Kata
version 1.3.0 or above. Follow the [Kata Containers setup
@@ -122,7 +101,7 @@ hotplug_vfio_on_root_bus = true
pcie_root_port = 1
```
### Build guest kernel with GPU support
## Build Kata Containers kernel with GPU support
The default guest kernel installed with Kata Containers does not provide GPU
support. To use an NVIDIA GPU with Kata Containers, you need to build a kernel
@@ -181,11 +160,11 @@ code, using `Dragonball VMM` for NVIDIA GPU `hot-plug/hot-unplug` requires apply
addition to the above kernel configuration items. Follow these steps to build for NVIDIA GPU `hot-[un]plug`
for `Dragonball`:
```sh
# Prepare .config to support both upcall and nvidia gpu
```sh
# Prepare .config to support both upcall and nvidia gpu
$ ./build-kernel.sh -v 5.10.25 -e -t dragonball -g nvidia -f setup
# Build guest kernel to support both upcall and nvidia gpu
# Build guest kernel to support both upcall and nvidia gpu
$ ./build-kernel.sh -v 5.10.25 -e -t dragonball -g nvidia build
# Install guest kernel to support both upcall and nvidia gpu
@@ -217,7 +196,303 @@ Before using the new guest kernel, please update the `kernel` parameters in
kernel = "/usr/share/kata-containers/vmlinuz-nvidia-gpu.container"
```
### Build Guest OS with NVIDIA Driver and Toolkit
## NVIDIA GPU pass-through mode with Kata Containers
Use the following steps to pass an NVIDIA GPU device in pass-through mode with Kata:
1. Find the Bus-Device-Function (BDF) for the GPU device on the host:
```sh
$ sudo lspci -nn -D | grep -i nvidia
0000:d0:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:20b9] (rev a1)
```
> PCI address `0000:d0:00.0` is assigned to the hardware GPU device.
> `10de:20b9` is the device ID of the hardware GPU device.
2. Find the IOMMU group for the GPU device:
```sh
$ BDF="0000:d0:00.0"
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
```
The previous output shows that the GPU belongs to IOMMU group 192. The next
step is to bind the GPU to the VFIO-PCI driver.
```sh
$ BDF="0000:d0:00.0"
$ DEV="/sys/bus/pci/devices/$BDF"
$ echo "vfio-pci" > $DEV/driver_override
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
# To return the device to the standard driver, we simply clear the
# driver_override and reprobe the device, ex:
$ echo > $DEV/preferred_driver
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
```
3. Check the IOMMU group number under `/dev/vfio`:
```sh
$ ls -l /dev/vfio
total 0
crw------- 1 zvonkok zvonkok 243, 0 Mar 18 03:06 192
crw-rw-rw- 1 root root 10, 196 Mar 18 02:27 vfio
```
4. Start a Kata container with the GPU device:
```sh
# You may need to `modprobe vhost-vsock` if you get
# host system doesn't support vsock: stat /dev/vhost-vsock
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch uname -r
```
5. Run `lspci` within the container to verify the GPU device is seen in the list
of the PCI devices. Note the vendor-device id of the GPU (`10de:20b9`) in the `lspci` output.
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -nn | grep '10de:20b9'"
```
6. Additionally, you can check the PCI BARs space of the NVIDIA GPU device in the container:
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -s 02:00.0 -vv | grep Region"
```
> **Note**: If you see a message similar to the above, the BAR space of the NVIDIA
> GPU has been successfully allocated.
## NVIDIA vGPU mode with Kata Containers
NVIDIA vGPU is a licensed product on all supported GPU boards. A software license
is required to enable all vGPU features within the guest VM. NVIDIA vGPU manager
needs to be installed on the host to configure GPUs in vGPU mode. See [NVIDIA Virtual GPU Software Documentation v14.0 through 14.1](https://docs.nvidia.com/grid/14.0/) for more details.
### NVIDIA vGPU time-sliced
In the time-sliced mode, the GPU is not partitioned and the workload uses the
whole GPU and shares access to the GPU engines. Processes are scheduled in
series. The best effort scheduler is the default one and can be exchanged by
other scheduling policies see the documentation above how to do that.
Beware if you had `MIG` enabled before to disable `MIG` on the GPU if you want
to use `time-sliced` `vGPU`.
```sh
$ sudo nvidia-smi -mig 0
```
Enable the virtual functions for the physical GPU in the `sysfs` file system.
```sh
$ sudo /usr/lib/nvidia/sriov-manage -e 0000:41:00.0
```
Get the `BDF` of the available virtual function on the GPU, and choose one for the
following steps.
```sh
$ cd /sys/bus/pci/devices/0000:41:00.0/
$ ls -l | grep virtfn
```
#### List all available vGPU instances
The following shell snippet will walk the `sysfs` and only print instances
that are available, that can be created.
```sh
# The 00.0 is often the PF of the device the VFs will have the funciont in the
# BDF incremented by some values so e.g. the very first VF is 0000:41:00.4
cd /sys/bus/pci/devices/0000:41:00.0/
for vf in $(ls -d virtfn*)
do
BDF=$(basename $(readlink -f $vf))
for md in $(ls -d $vf/mdev_supported_types/*)
do
AVAIL=$(cat $md/available_instances)
NAME=$(cat $md/name)
DIR=$(basename $md)
if [ $AVAIL -gt 0 ]; then
echo "| BDF | INSTANCES | NAME | DIR |"
echo "+--------------+-----------+----------------+------------+"
printf "| %12s |%10d |%15s | %10s |\n\n" "$BDF" "$AVAIL" "$NAME" "$DIR"
fi
done
done
```
If there are available instances you get something like this (for the first VF),
beware that the output is highly dependent on the GPU you have, if there is no
output check again if `MIG` is really disabled.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-4C | nvidia-692 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-8C | nvidia-693 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-10C | nvidia-694 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-16C | nvidia-695 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-20C | nvidia-696 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-40C | nvidia-697 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-80C | nvidia-698 |
```
Change to the `mdev_supported_types` directory for the virtual function on which
you want to create the `vGPU`. Taking the first output as an example:
```sh
$ cd virtfn0/mdev_supported_types/nvidia-692
$ UUIDGEN=$(uuidgen)
$ sudo bash -c "echo $UUIDGEN > create"
```
Confirm that the `vGPU` was created. You should see the `UUID` pointing to a
subdirectory of the `sysfs` space.
```sh
$ ls -l /sys/bus/mdev/devices/
```
Get the `IOMMU` group number and verify there is a `VFIO` device created to use
with Kata.
```sh
$ ls -l /sys/bus/mdev/devices/*/
$ ls -l /dev/vfio
```
Use the `VFIO` device created in the same way as in the pass-through use-case.
Beware that the guest needs the NVIDIA guest drivers, so one would need to build
a new guest `OS` image.
### NVIDIA vGPU MIG-backed
We're not going into detail what `MIG` is but briefly it is a technology to
partition the hardware into independent instances with guaranteed quality of
service. For more details see [NVIDIA Multi-Instance GPU User Guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
First enable `MIG` mode for a GPU, depending on the platform you're running
a reboot would be necessary. Some platforms support GPU reset.
```sh
$ sudo nvidia-smi -mig 1
```
If the platform supports a GPU reset one can run, otherwise you will get a
warning to reboot the server.
```sh
$ sudo nvidia-smi --gpu-reset
```
The driver per default provides a number of profiles that users can opt-in when
configuring the MIG feature.
```sh
$ sudo nvidia-smi mig -lgip
+-----------------------------------------------------------------------------+
| GPU instance profiles: |
| GPU Name ID Instances Memory P2P SM DEC ENC |
| Free/Total GiB CE JPEG OFA |
|=============================================================================|
| 0 MIG 1g.10gb 19 7/7 9.50 No 14 0 0 |
| 1 0 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 1g.10gb+me 20 1/1 9.50 No 14 1 0 |
| 1 1 1 |
+-----------------------------------------------------------------------------+
| 0 MIG 2g.20gb 14 3/3 19.50 No 28 1 0 |
| 2 0 0 |
+-----------------------------------------------------------------------------+
...
```
Create the GPU instances that correspond to the `vGPU` types of the `MIG-backed`
`vGPUs` that you will create [NVIDIA A100 PCIe 80GB Virtual GPU Types](https://docs.nvidia.com/grid/13.0/grid-vgpu-user-guide/index.html#vgpu-types-nvidia-a100-pcie-80gb).
```sh
# MIG 1g.10gb --> vGPU A100D-1-10C
$ sudo nvidia-smi mig -cgi 19
```
List the GPU instances and get the GPU instance id to create the compute
instance.
```sh
$ sudo nvidia-smi mig -lgi # list the created GPU instances
$ sudo nvidia-smi mig -cci -gi 9 # each GPU instance can have several compute
# instances. Instance -> Workload
```
Verify that the compute instances were created within the GPU instance
```sh
$ nvidia-smi
... snip ...
+-----------------------------------------------------------------------------+
| MIG devices: |
+------------------+----------------------+-----------+-----------------------+
| GPU GI CI MIG | Memory-Usage | Vol| Shared |
| ID ID Dev | BAR1-Usage | SM Unc| CE ENC DEC OFA JPG|
| | | ECC| |
|==================+======================+===========+=======================|
| 0 9 0 0 | 0MiB / 9728MiB | 14 0 | 1 0 0 0 0 |
| | 0MiB / 4095MiB | | |
+------------------+----------------------+-----------+-----------------------+
... snip ...
```
We can use the [snippet](#list-all-available-vgpu-instances) from before to list
the available `vGPU` instances, this time `MIG-backed`.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.5 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:01.6 | 1 |GRID A100D-1-10C | nvidia-699 |
... snip ...
```
Repeat the steps after the [snippet](#list-all-available-vgpu-instances) listing
to create the corresponding `mdev` device and use the guest `OS` created in the
previous section with `time-sliced` `vGPUs`.
## Install NVIDIA Driver + Toolkit in Kata Containers Guest OS
Consult the [Developer-Guide](https://github.com/kata-containers/kata-containers/blob/main/docs/Developer-Guide.md#create-a-rootfs-image) on how to create a
rootfs base image for a distribution of your choice. This is going to be used as
@@ -308,12 +583,9 @@ Enable the `guest_hook_path` in Kata's `configuration.toml`
guest_hook_path = "/usr/share/oci/hooks"
```
As the last step one can remove the additional packages and files that were added
to the `$ROOTFS_DIR` to keep it as small as possible.
One has built a NVIDIA rootfs, kernel and now we can run any GPU container
without installing the drivers into the container. Check NVIDIA device status
with `nvidia-smi`:
with `nvidia-smi`
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/nvidia/cuda:11.6.0-base-ubuntu20.04" cuda nvidia-smi
@@ -339,309 +611,8 @@ Fri Mar 18 10:36:59 2022
+-----------------------------------------------------------------------------+
```
## Usage Examples with Kata Containers
The following sections give usage examples for this based on the different modes.
### NVIDIA GPU passthrough mode
Use the following steps to pass an NVIDIA GPU device in passthrough mode with Kata:
1. Find the Bus-Device-Function (BDF) for the GPU device on the host:
```sh
$ sudo lspci -nn -D | grep -i nvidia
0000:d0:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:20b9] (rev a1)
```
> PCI address `0000:d0:00.0` is assigned to the hardware GPU device.
> `10de:20b9` is the device ID of the hardware GPU device.
2. Find the IOMMU group for the GPU device:
```sh
$ BDF="0000:d0:00.0"
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
```
The previous output shows that the GPU belongs to IOMMU group 192. The next
step is to bind the GPU to the VFIO-PCI driver.
```sh
$ BDF="0000:d0:00.0"
$ DEV="/sys/bus/pci/devices/$BDF"
$ echo "vfio-pci" > $DEV/driver_override
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
# To return the device to the standard driver, we simply clear the
# driver_override and reprobe the device, ex:
$ echo > $DEV/preferred_driver
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
```
3. Check the IOMMU group number under `/dev/vfio`:
```sh
$ ls -l /dev/vfio
total 0
crw------- 1 zvonkok zvonkok 243, 0 Mar 18 03:06 192
crw-rw-rw- 1 root root 10, 196 Mar 18 02:27 vfio
```
4. Start a Kata container with the GPU device:
```sh
# You may need to `modprobe vhost-vsock` if you get
# host system doesn't support vsock: stat /dev/vhost-vsock
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch uname -r
```
5. Run `lspci` within the container to verify the GPU device is seen in the list
of the PCI devices. Note the vendor-device id of the GPU (`10de:20b9`) in the `lspci` output.
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -nn | grep '10de:20b9'"
```
6. Additionally, you can check the PCI BARs space of the NVIDIA GPU device in the container:
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -s 02:00.0 -vv | grep Region"
```
> **Note**: If you see a message similar to the above, the BAR space of the NVIDIA
> GPU has been successfully allocated.
### NVIDIA vGPU mode
NVIDIA vGPU is a licensed product on all supported GPU boards. A software license
is required to enable all vGPU features within the guest VM. NVIDIA vGPU manager
needs to be installed on the host to configure GPUs in vGPU mode. See
[NVIDIA Virtual GPU Software Documentation v14.0 through 14.1](https://docs.nvidia.com/grid/14.0/)
for more details.
#### NVIDIA vGPU time-sliced
In the time-sliced mode, the GPU is not partitioned and the workload uses the
whole GPU and shares access to the GPU engines. Processes are scheduled in
series. The best effort scheduler is the default one and can be exchanged by
other scheduling policies see the documentation above how to do that.
Beware if you had `MIG` enabled before to disable `MIG` on the GPU if you want
to use `time-sliced` `vGPU`.
```sh
$ sudo nvidia-smi -mig 0
```
Enable the virtual functions for the physical GPU in the `sysfs` file system.
```sh
$ sudo /usr/lib/nvidia/sriov-manage -e 0000:41:00.0
```
Get the `BDF` of the available virtual function on the GPU, and choose one for the
following steps.
```sh
$ cd /sys/bus/pci/devices/0000:41:00.0/
$ ls -l | grep virtfn
```
##### List all available vGPU instances
The following shell snippet will walk the `sysfs` and only print instances
that are available, that can be created.
```sh
# The 00.0 is often the PF of the device. The VFs will have the function in the
# BDF incremented by some values so e.g. the very first VF is 0000:41:00.4
cd /sys/bus/pci/devices/0000:41:00.0/
for vf in $(ls -d virtfn*)
do
BDF=$(basename $(readlink -f $vf))
for md in $(ls -d $vf/mdev_supported_types/*)
do
AVAIL=$(cat $md/available_instances)
NAME=$(cat $md/name)
DIR=$(basename $md)
if [ $AVAIL -gt 0 ]; then
echo "| BDF | INSTANCES | NAME | DIR |"
echo "+--------------+-----------+----------------+------------+"
printf "| %12s |%10d |%15s | %10s |\n\n" "$BDF" "$AVAIL" "$NAME" "$DIR"
fi
done
done
```
If there are available instances you get something like this (for the first VF),
beware that the output is highly dependent on the GPU you have, if there is no
output check again if `MIG` is really disabled.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-4C | nvidia-692 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-8C | nvidia-693 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-10C | nvidia-694 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-16C | nvidia-695 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-20C | nvidia-696 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-40C | nvidia-697 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-80C | nvidia-698 |
```
Change to the `mdev_supported_types` directory for the virtual function on which
you want to create the `vGPU`. Taking the first output as an example:
```sh
$ cd virtfn0/mdev_supported_types/nvidia-692
$ UUIDGEN=$(uuidgen)
$ sudo bash -c "echo $UUIDGEN > create"
```
Confirm that the `vGPU` was created. You should see the `UUID` pointing to a
subdirectory of the `sysfs` space.
```sh
$ ls -l /sys/bus/mdev/devices/
```
Get the `IOMMU` group number and verify there is a `VFIO` device created to use
with Kata.
```sh
$ ls -l /sys/bus/mdev/devices/*/
$ ls -l /dev/vfio
```
Use the `VFIO` device created in the same way as in the passthrough use-case.
Beware that the guest needs the NVIDIA guest drivers, so one would need to build
a new guest `OS` image.
#### NVIDIA vGPU MIG-backed
We're not going into detail what `MIG` is but briefly it is a technology to
partition the hardware into independent instances with guaranteed quality of
service. For more details see
[NVIDIA Multi-Instance GPU User Guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
First enable `MIG` mode for a GPU, depending on the platform you're running
a reboot would be necessary. Some platforms support GPU reset.
```sh
$ sudo nvidia-smi -mig 1
```
If the platform supports a GPU reset one can run, otherwise you will get a
warning to reboot the server.
```sh
$ sudo nvidia-smi --gpu-reset
```
The driver per default provides a number of profiles that users can opt-in when
configuring the MIG feature.
```sh
$ sudo nvidia-smi mig -lgip
+-----------------------------------------------------------------------------+
| GPU instance profiles: |
| GPU Name ID Instances Memory P2P SM DEC ENC |
| Free/Total GiB CE JPEG OFA |
|=============================================================================|
| 0 MIG 1g.10gb 19 7/7 9.50 No 14 0 0 |
| 1 0 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 1g.10gb+me 20 1/1 9.50 No 14 1 0 |
| 1 1 1 |
+-----------------------------------------------------------------------------+
| 0 MIG 2g.20gb 14 3/3 19.50 No 28 1 0 |
| 2 0 0 |
+-----------------------------------------------------------------------------+
...
```
Create the GPU instances that correspond to the `vGPU` types of the `MIG-backed`
`vGPUs` that you will create
[NVIDIA A100 PCIe 80GB Virtual GPU Types](https://docs.nvidia.com/grid/13.0/grid-vgpu-user-guide/index.html#vgpu-types-nvidia-a100-pcie-80gb).
```sh
# MIG 1g.10gb --> vGPU A100D-1-10C
$ sudo nvidia-smi mig -cgi 19
```
List the GPU instances and get the GPU instance id to create the compute
instance.
```sh
$ sudo nvidia-smi mig -lgi # list the created GPU instances
$ sudo nvidia-smi mig -cci -gi 9 # each GPU instance can have several compute
# instances. Instance -> Workload
```
Verify that the compute instances were created within the GPU instance
```sh
$ nvidia-smi
... snip ...
+-----------------------------------------------------------------------------+
| MIG devices: |
+------------------+----------------------+-----------+-----------------------+
| GPU GI CI MIG | Memory-Usage | Vol| Shared |
| ID ID Dev | BAR1-Usage | SM Unc| CE ENC DEC OFA JPG|
| | | ECC| |
|==================+======================+===========+=======================|
| 0 9 0 0 | 0MiB / 9728MiB | 14 0 | 1 0 0 0 0 |
| | 0MiB / 4095MiB | | |
+------------------+----------------------+-----------+-----------------------+
... snip ...
```
We can use the [snippet](#list-all-available-vgpu-instances) from before to list
the available `vGPU` instances, this time `MIG-backed`.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.5 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:01.6 | 1 |GRID A100D-1-10C | nvidia-699 |
... snip ...
```
Repeat the steps after the [snippet](#list-all-available-vgpu-instances) listing
to create the corresponding `mdev` device and use the guest `OS` created in the
previous section with `time-sliced` `vGPUs`.
As the last step one can remove the additional packages and files that were added
to the `$ROOTFS_DIR` to keep it as small as possible.
## References

View File

@@ -1,20 +1,24 @@
# Table of Contents
**Note:**: This guide used to contain an end-to-end flow to build a
custom Kata containers root filesystem with QAT out-of-tree SR-IOV virtual
function driver and run QAT enabled containers. The former is no longer necessary
so the instructions are dropped. If the use-case is still of interest, please file
an issue in either of the QAT Kubernetes specific repos linked below.
# Introduction
Intel® QuickAssist Technology (QAT) provides hardware acceleration
for security (cryptography) and compression. Kata Containers can enable
these acceleration functions for containers using QAT SR-IOV with the
support from [Intel QAT Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)
or [Intel QAT DRA Resource Driver for Kubernetes](https://github.com/intel/intel-resource-drivers-for-kubernetes).
for security (cryptography) and compression. These instructions cover the
steps for the latest [Ubuntu LTS release](https://ubuntu.com/download/desktop)
which already include the QAT host driver. These instructions can be adapted to
any Linux distribution. These instructions guide the user on how to download
the kernel sources, compile kernel driver modules against those sources, and
load them onto the host as well as preparing a specially built Kata Containers
kernel and custom Kata Containers rootfs.
## More Information
* Download kernel sources
* Compile Kata kernel
* Compile kernel driver modules against those sources
* Download rootfs
* Add driver modules to rootfs
* Build rootfs image
## Helpful Links before starting
[Intel® QuickAssist Technology at `01.org`](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html)
@@ -22,6 +26,554 @@ or [Intel QAT DRA Resource Driver for Kubernetes](https://github.com/intel/intel
[Intel Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)
[Intel DRA Resource Driver for Kubernetes](https://github.com/intel/intel-resource-drivers-for-kubernetes)
[Intel® QuickAssist Technology for Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)
## Steps to enable Intel® QAT in Kata Containers
There are some steps to complete only once, some steps to complete with every
reboot, and some steps to complete when the host kernel changes.
## Script variables
The following list of variables must be set before running through the
scripts. These variables refer to locations to store modules and configuration
files on the host and links to the drivers to use. Modify these as
needed to point to updated drivers or different install locations.
### Set environment variables (Every Reboot)
Make sure to check [`01.org`](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html) for
the latest driver.
```bash
$ export QAT_DRIVER_VER=qat1.7.l.4.14.0-00031.tar.gz
$ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER}
$ export QAT_CONF_LOCATION=~/QAT_conf
$ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/main/demo/openssl-qat-engine/Dockerfile
$ export QAT_SRC=~/src/QAT
$ export GOPATH=~/src/go
$ export KATA_KERNEL_LOCATION=~/kata
$ export KATA_ROOTFS_LOCATION=~/kata
```
## Prepare the Ubuntu Host
The host could be a bare metal instance or a virtual machine. If using a
virtual machine, make sure that KVM nesting is enabled. The following
instructions reference an Intel® C62X chipset. Some of the instructions must be
modified if using a different Intel® QAT device. The Intel® QAT chipset can be
identified by executing the following.
### Identify which PCI Bus the Intel® QAT card is on
```bash
$ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
```
### Install necessary packages for Ubuntu
These packages are necessary to compile the Kata kernel, Intel® QAT driver, and to
prepare the rootfs for Kata. [Docker](https://docs.docker.com/engine/install/ubuntu/)
also needs to be installed to be able to build the rootfs. To test that
everything works a Kubernetes pod is started requesting Intel® QAT resources. For the
pass through of the virtual functions the kernel boot parameter needs to have
`INTEL_IOMMU=on`.
```bash
$ sudo apt update
$ sudo apt install -y golang-go build-essential python pkg-config zlib1g-dev libudev-dev bison libelf-dev flex libtool automake autotools-dev autoconf bc libpixman-1-dev coreutils libssl-dev
$ sudo sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=""/GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"/' /etc/default/grub
$ sudo update-grub
$ sudo reboot
```
### Download Intel® QAT drivers
This will download the [Intel® QAT drivers](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html).
Make sure to check the website for the latest version.
```bash
$ mkdir -p $QAT_SRC
$ cd $QAT_SRC
$ curl -L $QAT_DRIVER_URL | tar zx
```
### Copy Intel® QAT configuration files and enable virtual functions
Modify the instructions below as necessary if using a different Intel® QAT hardware
platform. You can learn more about customizing configuration files at the
[Intel® QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
This section starts from a base config file and changes the `SSL` section to
`SHIM` to support the OpenSSL engine. There are more tweaks that you can make
depending on the use case and how many Intel® QAT engines should be run. You
can find more information about how to customize in the
[Intel® QuickAssist Technology Software for Linux* - Programmer's Guide.](https://www.intel.com/content/www/us/en/content-details/709196/intel-quickassist-technology-api-programmer-s-guide.html)
> **Note: This section assumes that a Intel® QAT `c6xx` platform is used.**
```bash
$ mkdir -p $QAT_CONF_LOCATION
$ cp $QAT_SRC/quickassist/utilities/adf_ctl/conf_files/c6xxvf_dev0.conf.vm $QAT_CONF_LOCATION/c6xxvf_dev0.conf
$ sed -i 's/\[SSL\]/\[SHIM\]/g' $QAT_CONF_LOCATION/c6xxvf_dev0.conf
```
### Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)
To enable virtual functions, the host OS should have IOMMU groups enabled. In
the UEFI Firmware Intel® Virtualization Technology for Directed I/O
(Intel® VT-d) must be enabled. Also, the kernel boot parameter should be
`intel_iommu=on` or `intel_iommu=ifgx_off`. This should have been set from
the instructions above. Check the output of `/proc/cmdline` to confirm. The
following commands assume you installed an Intel® QAT card, IOMMU is on, and
VT-d is enabled. The vendor and device ID add to the `VFIO-PCI` driver so that
each exposed virtual function can be bound to the `VFIO-PCI` driver. Once
complete, each virtual function passes into a Kata Containers container using
the PCIe device passthrough feature. For Kubernetes, the
[Intel device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes)
for Kubernetes handles the binding of the driver, but the VFs still must be
enabled.
```bash
$ sudo modprobe vfio-pci
$ QAT_PCI_BUS_PF_NUMBERS=$((lspci -d :435 && lspci -d :37c8 && lspci -d :19e2 && lspci -d :6f54) | cut -d ' ' -f 1)
$ QAT_PCI_BUS_PF_1=$(echo $QAT_PCI_BUS_PF_NUMBERS | cut -d ' ' -f 1)
$ echo 16 | sudo tee /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/sriov_numvfs
$ QAT_PCI_ID_VF=$(cat /sys/bus/pci/devices/0000:${QAT_PCI_BUS_PF_1}/virtfn0/uevent | grep PCI_ID)
$ QAT_VENDOR_AND_ID_VF=$(echo ${QAT_PCI_ID_VF/PCI_ID=} | sed 's/:/ /')
$ echo $QAT_VENDOR_AND_ID_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/new_id
```
Loop through all the virtual functions and bind to the VFIO driver
```bash
$ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
do QAT_PCI_BUS_VF=$(basename $(readlink $f))
echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/c6xxvf/unbind
echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/bind
done
```
### Check Intel® QAT virtual functions are enabled
If the following command returns empty, then the virtual functions are not
properly enabled. This command checks the enumerated device IDs for just the
virtual functions. Using the Intel® QAT as an example, the physical device ID
is `37c8` and virtual function device ID is `37c9`. The following command checks
if VF's are enabled for any of the currently known Intel® QAT device ID's. The
following `ls` command should show the 16 VF's bound to `VFIO-PCI`.
```bash
$ for i in 0442 0443 37c9 19e3; do lspci -d 8086:$i; done
```
Another way to check is to see what PCI devices that `VFIO-PCI` is mapped to.
It should match the device ID's of the VF's.
```bash
$ ls -la /sys/bus/pci/drivers/vfio-pci
```
## Prepare Kata Containers
### Download Kata kernel Source
This example automatically uses the latest Kata kernel supported by Kata. It
follows the instructions from the
[packaging kernel repository](../../tools/packaging/kernel)
and uses the latest Kata kernel
[config](../../tools/packaging/kernel/configs).
There are some patches that must be installed as well, which the
`build-kernel.sh` script should automatically apply. If you are using a
different kernel version, then you might need to manually apply them. Since
the Kata Containers kernel has a minimal set of kernel flags set, you must
create a Intel® QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
Update the config to set some of the `CRYPTO` flags to enabled. This might
change with different kernel versions. The following instructions were tested
with kernel `v5.4.0-64-generic`.
```bash
$ mkdir -p $GOPATH
$ cd $GOPATH
$ go get -v github.com/kata-containers/kata-containers
$ cat << EOF > $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/configs/fragments/common/qat.conf
CONFIG_PCIEAER=y
CONFIG_UIO=y
CONFIG_CRYPTO_HW=y
CONFIG_CRYPTO_DEV_QAT_C62XVF=m
CONFIG_CRYPTO_CBC=y
CONFIG_MODULES=y
CONFIG_MODULE_SIG=y
CONFIG_CRYPTO_AUTHENC=y
CONFIG_CRYPTO_DH=y
EOF
$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh setup
```
### Build Kata kernel
```bash
$ cd $GOPATH
$ export LINUX_VER=$(ls -d kata-linux-*)
$ sed -i 's/EXTRAVERSION =/EXTRAVERSION = .qat.container/' $LINUX_VER/Makefile
$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh build
```
### Copy Kata kernel
```bash
$ export KATA_KERNEL_NAME=vmlinux-${LINUX_VER}_qat
$ mkdir -p $KATA_KERNEL_LOCATION
$ cp ${GOPATH}/${LINUX_VER}/vmlinux ${KATA_KERNEL_LOCATION}/${KATA_KERNEL_NAME}
```
### Prepare Kata root filesystem
These instructions build upon the OS builder instructions located in the
[Developer Guide](../Developer-Guide.md). At this point it is recommended that
[Docker](https://docs.docker.com/engine/install/ubuntu/) is installed first, and
then [Kata-deploy](../../tools/packaging/kata-deploy)
is use to install Kata. This will make sure that the correct `agent` version
is installed into the rootfs in the steps below.
The following instructions use Ubuntu as the root filesystem with systemd as
the init and will add in the `kmod` binary, which is not a standard binary in
a Kata rootfs image. The `kmod` binary is necessary to load the Intel® QAT
kernel modules when the virtual machine rootfs boots.
```bash
$ export OSBUILDER=$GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder
$ export ROOTFS_DIR=${OSBUILDER}/rootfs-builder/rootfs
$ export EXTRA_PKGS='kmod'
```
Make sure that the `kata-agent` version matches the installed `kata-runtime`
version. Also make sure the `kata-runtime` install location is in your `PATH`
variable. The following `AGENT_VERSION` can be set manually to match
the `kata-runtime` version if the following commands don't work.
```bash
$ export PATH=$PATH:/opt/kata/bin
$ cd $GOPATH
$ export AGENT_VERSION=$(kata-runtime version | head -n 1 | grep -o "[0-9.]\+")
$ cd ${OSBUILDER}/rootfs-builder
$ sudo rm -rf ${ROOTFS_DIR}
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh ubuntu'
```
### Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs
After the Kata Containers kernel builds with the proper configuration flags,
you must build the Intel® QAT drivers against that Kata Containers kernel
version in a similar way they were previously built for the host OS. You must
set the `KERNEL_SOURCE_ROOT` variable to the Kata Containers kernel source
directory and build the Intel® QAT drivers again. The `make` command will
install the Intel® QAT modules into the Kata rootfs.
```bash
$ cd $GOPATH
$ export LINUX_VER=$(ls -d kata*)
$ export KERNEL_MAJOR_VERSION=$(awk '/^VERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_PATHLEVEL=$(awk '/^PATCHLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_SUBLEVEL=$(awk '/^SUBLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_EXTRAVERSION=$(awk '/^EXTRAVERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_ROOTFS_DIR=${KERNEL_MAJOR_VERSION}.${KERNEL_PATHLEVEL}.${KERNEL_SUBLEVEL}${KERNEL_EXTRAVERSION}
$ cd $QAT_SRC
$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --enable-icp-sriov=guest
$ sudo -E make all -j $(nproc)
$ sudo -E make INSTALL_MOD_PATH=$ROOTFS_DIR qat-driver-install -j $(nproc)
```
The `usdm_drv` module also needs to be copied into the rootfs modules path and
`depmod` should be run.
```bash
$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers
$ sudo depmod -a -b ${ROOTFS_DIR} ${KERNEL_ROOTFS_DIR}
$ cd ${OSBUILDER}/image-builder
$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
```
> **Note: Ignore any errors on modules.builtin and modules.order when running
> `depmod`.**
### Copy Kata rootfs
```bash
$ mkdir -p $KATA_ROOTFS_LOCATION
$ cp ${OSBUILDER}/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
```
## Verify Intel® QAT works in a container
The following instructions uses a OpenSSL Dockerfile that builds the
Intel® QAT engine to allow OpenSSL to offload crypto functions. It is a
convenient way to test that VFIO device passthrough for the Intel® QAT VFs are
working properly with the Kata Containers VM.
### Build OpenSSL Intel® QAT engine container
Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/demo/openssl-qat-engine)
to build a container image with an optimized OpenSSL engine for
Intel® QAT. Using `docker build` with the Kata Containers runtime can sometimes
have issues. Therefore, make sure that `runc` is the default Docker container
runtime.
```bash
$ cd $QAT_SRC
$ curl -O $QAT_DOCKERFILE
$ sudo docker build -t openssl-qat-engine .
```
> **Note: The Intel® QAT driver version in this container might not match the
> Intel® QAT driver compiled and loaded on the host when compiling.**
### Test Intel® QAT with the ctr tool
The `ctr` tool can be used to interact with the containerd daemon. It may be
more convenient to use this tool to verify the kernel and image instead of
setting up a Kubernetes cluster. The correct Kata runtimes need to be added
to the containerd `config.toml`. Below is a sample snippet that can be added
to allow QEMU and Cloud Hypervisor (CLH) to work with `ctr`.
```
[plugins.cri.containerd.runtimes.kata-qemu]
runtime_type = "io.containerd.kata-qemu.v2"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
[plugins.cri.containerd.runtimes.kata-qemu.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
[plugins.cri.containerd.runtimes.kata-clh]
runtime_type = "io.containerd.kata-clh.v2"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
[plugins.cri.containerd.runtimes.kata-clh.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-clh.toml"
```
In addition, containerd expects the binary to be in `/usr/local/bin` so add
this small script so that it redirects to be able to use either QEMU or
Cloud Hypervisor with Kata.
```bash
$ echo '#!/usr/bin/env bash' | sudo tee /usr/local/bin/containerd-shim-kata-qemu-v2
$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-qemu.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-qemu-v2
$ sudo chmod +x /usr/local/bin/containerd-shim-kata-qemu-v2
$ echo '#!/usr/bin/env bash' | sudo tee /usr/local/bin/containerd-shim-kata-clh-v2
$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-clh.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-clh-v2
$ sudo chmod +x /usr/local/bin/containerd-shim-kata-clh-v2
```
After the OpenSSL image is built and imported into containerd, a Intel® QAT
virtual function exposed in the step above can be added to the `ctr` command.
Make sure to change the `/dev/vfio` number to one that actually exists on the
host system. When using the `ctr` tool, the`configuration.toml` for Kata needs
to point to the custom Kata kernel and rootfs built above and the Intel® QAT
modules in the Kata rootfs need to load at boot. The following steps assume that
`kata-deploy` was used to install Kata and QEMU is being tested. If using a
different hypervisor, different install method for Kata, or a different
Intel® QAT chipset then the command will need to be modified.
> **Note: The following was tested with
[containerd v1.4.6](https://github.com/containerd/containerd/releases/tag/v1.4.6).**
```bash
$ config_file="/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
$ sudo sed -i "/kernel =/c kernel = "\"${KATA_ROOTFS_LOCATION}/${KATA_KERNEL_NAME}\""" $config_file
$ sudo sed -i "/image =/c image = "\"${KATA_KERNEL_LOCATION}/kata-containers.img\""" $config_file
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 modules-load=usdm_drv,qat_c62xvf"/g' $config_file
$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
$ sudo ctr images import openssl-qat-engine.tar
$ sudo ctr run --runtime io.containerd.run.kata-qemu.v2 --privileged -t --rm --device=/dev/vfio/180 --mount type=bind,src=/dev,dst=/dev,options=rbind:rw --mount type=bind,src=${QAT_CONF_LOCATION}/c6xxvf_dev0.conf,dst=/etc/c6xxvf_dev0.conf,options=rbind:rw docker.io/library/openssl-qat-engine:latest bash
```
Below are some commands to run in the container image to verify Intel® QAT is
working
```sh
root@67561dc2757a/ # cat /proc/modules
qat_c62xvf 16384 - - Live 0xffffffffc00d9000 (OE)
usdm_drv 86016 - - Live 0xffffffffc00e8000 (OE)
intel_qat 249856 - - Live 0xffffffffc009b000 (OE)
root@67561dc2757a/ # adf_ctl restart
Restarting all devices.
Processing /etc/c6xxvf_dev0.conf
root@67561dc2757a/ # adf_ctl status
Checking status of all devices.
There is 1 QAT acceleration device(s) in the system:
qat_dev0 - type: c6xxvf, inst_id: 0, node_id: 0, bsf: 0000:01:01.0, #accel: 1 #engines: 1 state: up
root@67561dc2757a/ # openssl engine -c -t qat-hw
(qat-hw) Reference implementation of QAT crypto engine v0.6.1
[RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
[ available ]
```
### Test Intel® QAT in Kubernetes
Start a Kubernetes cluster with containerd as the CRI. The host should
already be setup with 16 virtual functions of the Intel® QAT card bound to
`VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing of devices.
You might need to disable Docker before initializing Kubernetes. Be aware
that the OpenSSL container image built above will need to be exported from
Docker and imported into containerd.
If Kata is installed through [`kata-deploy`](../../tools/packaging/kata-deploy/helm-chart/README.md)
there will be multiple `configuration.toml` files associated with different
hypervisors. Rather than add in the custom Kata kernel, Kata rootfs, and
kernel modules to each `configuration.toml` as the default, instead use
[annotations](../how-to/how-to-load-kernel-modules-with-kata.md)
in the Kubernetes YAML file to tell Kata which kernel and rootfs to use. The
easy way to do this is to use `kata-deploy` which will install the Kata binaries
to `/opt` and properly configure the `/etc/containerd/config.toml` with annotation
support. However, the `configuration.toml` needs to enable support for
annotations as well. The following configures both QEMU and Cloud Hypervisor
`configuration.toml` files that are currently available with Kata Container
versions 2.0 and higher.
```bash
$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-qemu.toml
$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-clh.toml
```
Export the OpenSSL image from Docker and import into containerd.
```bash
$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
$ sudo ctr -n=k8s.io images import openssl-qat-engine.tar
```
The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/main/cmd/qat_plugin/README.md)
needs to be started so that the virtual functions can be discovered and
used by Kubernetes.
The following YAML file can be used to start a Kata container with Intel® QAT
support. If Kata is installed with `kata-deploy`, then the containerd
`configuration.toml` should have all of the Kata runtime classes already
populated and annotations supported. To use a Intel® QAT virtual function, the
Intel® QAT plugin needs to be started after the VF's are bound to `VFIO-PCI` as
described [above](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot).
Edit the following to point to the correct Kata kernel and rootfs location
built with Intel® QAT support.
```bash
$ cat << EOF > kata-openssl-qat.yaml
apiVersion: v1
kind: Pod
metadata:
name: kata-openssl-qat
labels:
app: kata-openssl-qat
annotations:
io.katacontainers.config.hypervisor.kernel: "$KATA_KERNEL_LOCATION/$KATA_KERNEL_NAME"
io.katacontainers.config.hypervisor.image: "$KATA_ROOTFS_LOCATION/kata-containers.img"
io.katacontainers.config.hypervisor.kernel_params: "modules-load=usdm_drv,qat_c62xvf"
spec:
runtimeClassName: kata-qemu
containers:
- name: kata-openssl-qat
image: docker.io/library/openssl-qat-engine:latest
imagePullPolicy: IfNotPresent
resources:
limits:
qat.intel.com/generic: 1
cpu: 1
securityContext:
capabilities:
add: ["IPC_LOCK", "SYS_ADMIN"]
volumeMounts:
- mountPath: /etc/c6xxvf_dev0.conf
name: etc-mount
- mountPath: /dev
name: dev-mount
volumes:
- name: dev-mount
hostPath:
path: /dev
- name: etc-mount
hostPath:
path: $QAT_CONF_LOCATION/c6xxvf_dev0.conf
EOF
```
Use `kubectl` to start the pod. Verify that Intel® QAT card acceleration is
working with the Intel® QAT engine.
```bash
$ kubectl apply -f kata-openssl-qat.yaml
```
```sh
$ kubectl exec -it kata-openssl-qat -- adf_ctl restart
Restarting all devices.
Processing /etc/c6xxvf_dev0.conf
$ kubectl exec -it kata-openssl-qat -- adf_ctl status
Checking status of all devices.
There is 1 QAT acceleration device(s) in the system:
qat_dev0 - type: c6xxvf, inst_id: 0, node_id: 0, bsf: 0000:01:01.0, #accel: 1 #engines: 1 state: up
$ kubectl exec -it kata-openssl-qat -- openssl engine -c -t qat-hw
(qat-hw) Reference implementation of QAT crypto engine v0.6.1
[RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
[ available ]
```
### Troubleshooting
* Check that `/dev/vfio` has VFs enabled.
```sh
$ ls /dev/vfio
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 vfio
```
* Check that the modules load when inside the Kata Container.
```sh
bash-5.0# grep -E "qat|usdm_drv" /proc/modules
qat_c62xvf 16384 - - Live 0x0000000000000000 (O)
usdm_drv 86016 - - Live 0x0000000000000000 (O)
intel_qat 184320 - - Live 0x0000000000000000 (O)
```
* Verify that at least the first `c6xxvf_dev0.conf` file mounts inside the
container image in `/etc`. You will need one configuration file for each VF
passed into the container.
```sh
bash-5.0# ls /etc
c6xxvf_dev0.conf c6xxvf_dev11.conf c6xxvf_dev14.conf c6xxvf_dev3.conf c6xxvf_dev6.conf c6xxvf_dev9.conf resolv.conf
c6xxvf_dev1.conf c6xxvf_dev12.conf c6xxvf_dev15.conf c6xxvf_dev4.conf c6xxvf_dev7.conf hostname
c6xxvf_dev10.conf c6xxvf_dev13.conf c6xxvf_dev2.conf c6xxvf_dev5.conf c6xxvf_dev8.conf hosts
```
* Check `dmesg` inside the container to see if there are any issues with the
Intel® QAT driver.
* If there are issues building the OpenSSL Intel® QAT container image, then
check to make sure that runc is the default runtime for building container.
```sh
$ cat /etc/systemd/system/docker.service.d/50-runtime.conf
[Service]
Environment="DOCKER_DEFAULT_RUNTIME=--default-runtime runc"
```
## Optional Scripts
### Verify Intel® QAT card counters are incremented
To check the built in firmware counters, the Intel® QAT driver has to be compiled
and installed to the host and can't rely on the built in host driver. The
counters will increase when the accelerator is actively being used. To verify
Intel® QAT is actively accelerating the containerized application, use the
following instructions to check if any of the counters increment. Make
sure to change the PCI Device ID to match whats in the system.
```bash
$ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b1\:00.0/fw_counters
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b3\:00.0/fw_counters
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b5\:00.0/fw_counters
```

View File

@@ -1,3 +1,3 @@
[toolchain]
# Keep in sync with versions.yaml
channel = "1.89"
channel = "1.85.1"

45
src/agent/Cargo.lock generated
View File

@@ -459,9 +459,15 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [
"bit-vec",
"bit-vec 0.8.0",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bit-vec"
version = "0.8.0"
@@ -780,9 +786,9 @@ dependencies = [
[[package]]
name = "container-device-interface"
version = "0.1.2"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2605001b0e8214dae8af146a43ccaa965d960403e330f174c21327154530df8b"
checksum = "653849f0c250f73d9afab4b2a9a6b07adaee1f34c44ffa6f2d2c3f9392002c1a"
dependencies = [
"anyhow",
"clap",
@@ -1207,9 +1213,9 @@ dependencies = [
[[package]]
name = "fancy-regex"
version = "0.16.2"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "998b056554fbe42e03ae0e152895cd1a7e1002aec800fdc6635d20270260c46f"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
dependencies = [
"bit-set",
"regex-automata 0.4.9",
@@ -1244,6 +1250,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece"
dependencies = [
"crc32fast",
"libz-sys",
"miniz_oxide",
]
@@ -2007,9 +2014,9 @@ dependencies = [
[[package]]
name = "jsonschema"
version = "0.33.0"
version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d46662859bc5f60a145b75f4632fbadc84e829e45df6c5de74cfc8e05acb96b5"
checksum = "f1b46a0365a611fbf1d2143104dcf910aada96fafd295bab16c60b802bf6fa1d"
dependencies = [
"ahash 0.8.12",
"base64 0.22.1",
@@ -2259,6 +2266,17 @@ dependencies = [
"uuid 0.8.2",
]
[[package]]
name = "libz-sys"
version = "1.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d"
dependencies = [
"cc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "linux-raw-sys"
version = "0.3.8"
@@ -3405,9 +3423,9 @@ dependencies = [
[[package]]
name = "referencing"
version = "0.33.0"
version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e9c261f7ce75418b3beadfb3f0eb1299fe8eb9640deba45ffa2cb783098697d"
checksum = "c8eff4fa778b5c2a57e85c5f2fe3a709c52f0e60d23146e2151cbef5893f420e"
dependencies = [
"ahash 0.8.12",
"fluent-uri 0.3.2",
@@ -3701,7 +3719,7 @@ dependencies = [
"anyhow",
"async-trait",
"awaitgroup",
"bit-vec",
"bit-vec 0.6.3",
"capctl",
"caps",
"cfg-if",
@@ -4305,7 +4323,6 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
name = "test-utils"
version = "0.1.0"
dependencies = [
"libc",
"nix 0.26.4",
]
@@ -4804,6 +4821,12 @@ version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "943ce29a8a743eb10d6082545d861b24f9d1b160b7d741e0f2cdf726bec909c5"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "version_check"
version = "0.9.5"

View File

@@ -186,7 +186,7 @@ base64 = "0.22"
sha2 = "0.10.8"
async-compression = { version = "0.4.22", features = ["tokio", "gzip"] }
container-device-interface = "0.1.1"
container-device-interface = "0.1.0"
[target.'cfg(target_arch = "s390x")'.dependencies]
pv_core = { git = "https://github.com/ibm-s390-linux/s390-tools", rev = "4942504a9a2977d49989a5e5b7c1c8e07dc0fa41", package = "s390_pv_core" }
@@ -206,7 +206,6 @@ lto = true
seccomp = ["rustjail/seccomp"]
standard-oci-runtime = ["rustjail/standard-oci-runtime"]
agent-policy = ["kata-agent-policy"]
init-data = []
[[bin]]
name = "kata-agent"

View File

@@ -41,14 +41,6 @@ ifeq ($(AGENT_POLICY),yes)
override EXTRA_RUSTFEATURES += agent-policy
endif
##VAR INIT_DATA=yes|no define if agent enables the init data feature
INIT_DATA ?= yes
# Enable the init data fature of rust build
ifeq ($(INIT_DATA),yes)
override EXTRA_RUSTFEATURES += init-data
endif
include ../../utils.mk
##VAR STANDARD_OCI_RUNTIME=yes|no define if agent enables standard oci runtime feature
@@ -130,7 +122,7 @@ $(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
$(TARGET_PATH): show-summary
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) $(if $(findstring release,$(BUILD_TYPE)),--release) $(EXTRA_RUSTFEATURES)
$(GENERATED_FILES): %: %.in $(VERSION_FILE)
$(GENERATED_FILES): %: %.in
@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
##TARGET optimize: optimized build

View File

@@ -10,7 +10,7 @@ use anyhow::{bail, Result};
use slog::{debug, error, info, warn};
use tokio::io::AsyncWriteExt;
static POLICY_LOG_FILE: &str = "/tmp/policy.jsonl";
static POLICY_LOG_FILE: &str = "/tmp/policy.txt";
static POLICY_DEFAULT_FILE: &str = "/etc/kata-opa/default-policy.rego";
/// Convenience macro to obtain the scope logger
@@ -26,7 +26,7 @@ pub struct AgentPolicy {
/// When true policy errors are ignored, for debug purposes.
allow_failures: bool,
/// "/tmp/policy.jsonl" log file for policy activity.
/// "/tmp/policy.txt" log file for policy activity.
log_file: Option<tokio::fs::File>,
/// Regorus engine
@@ -213,7 +213,7 @@ impl AgentPolicy {
// The Policy text can be obtained directly from the pod YAML.
}
_ => {
let log_entry = format!("{{\"kind\":\"{ep}\",\"request\":{input}}}\n");
let log_entry = format!("[\"ep\":\"{ep}\",{input}],\n\n");
if let Err(e) = log_file.write_all(log_entry.as_bytes()).await {
warn!(sl!(), "policy: log_eval_input: write_all failed: {}", e);

View File

@@ -44,7 +44,7 @@ async-trait.workspace = true
inotify = "0.9.2"
libseccomp = { version = "0.3.0", optional = true }
zbus = "3.12.0"
bit-vec = "0.8.0"
bit-vec = "0.6.3"
xattr = "0.2.3"
# Local dependencies

View File

@@ -21,7 +21,7 @@ fn to_capshashset(cfd_log: RawFd, capabilities: &Option<HashSet<LinuxCapability>
let binding: HashSet<LinuxCapability> = HashSet::new();
let caps = capabilities.as_ref().unwrap_or(&binding);
for cap in caps.iter() {
match Capability::from_str(&format!("CAP_{cap}")) {
match Capability::from_str(&format!("CAP_{}", cap)) {
Err(_) => {
log_child!(cfd_log, "{} is not a cap", &cap.to_string());
continue;

View File

@@ -1097,7 +1097,7 @@ impl Manager {
devices_group_info
);
Self::setup_allowed_all_mode(pod_cg).with_context(|| {
format!("Setup allowed all devices mode for {pod_cpath}")
format!("Setup allowed all devices mode for {}", pod_cpath)
})?;
devices_group_info.allowed_all = true;
}
@@ -1109,11 +1109,11 @@ impl Manager {
if !is_allowded_all {
Self::setup_devcg_whitelist(pod_cg).with_context(|| {
format!("Setup device cgroup whitelist for {pod_cpath}")
format!("Setup device cgroup whitelist for {}", pod_cpath)
})?;
} else {
Self::setup_allowed_all_mode(pod_cg)
.with_context(|| format!("Setup allowed all mode for {pod_cpath}"))?;
.with_context(|| format!("Setup allowed all mode for {}", pod_cpath))?;
devices_group_info.allowed_all = true;
}
@@ -1132,7 +1132,7 @@ impl Manager {
if let Some(devices_group_info) = devices_group_info.as_ref() {
if !devices_group_info.allowed_all {
Self::setup_devcg_whitelist(&cg)
.with_context(|| format!("Setup device cgroup whitelist for {cpath}"))?;
.with_context(|| format!("Setup device cgroup whitelist for {}", cpath))?;
}
}

View File

@@ -57,8 +57,8 @@ fn parse_parent(slice: String) -> Result<String> {
if subslice.is_empty() {
return Err(anyhow!("invalid slice name: {}", slice));
}
slice_path = format!("{slice_path}/{prefix}{subslice}{SLICE_SUFFIX}");
prefix = format!("{prefix}{subslice}-");
slice_path = format!("{}/{}{}{}", slice_path, prefix, subslice, SLICE_SUFFIX);
prefix = format!("{}{}-", prefix, subslice);
}
slice_path.remove(0);
Ok(slice_path)
@@ -68,9 +68,9 @@ fn get_unit_name(prefix: String, name: String) -> String {
if name.ends_with(SLICE_SUFFIX) {
name
} else if prefix.is_empty() {
format!("{name}{SCOPE_SUFFIX}")
format!("{}{}", name, SCOPE_SUFFIX)
} else {
format!("{prefix}-{name}{SCOPE_SUFFIX}")
format!("{}-{}{}", prefix, name, SCOPE_SUFFIX)
}
}

View File

@@ -346,7 +346,7 @@ pub fn init_child() {
Ok(_) => log_child!(cfd_log, "temporary parent process exit successfully"),
Err(e) => {
log_child!(cfd_log, "temporary parent process exit:child exit: {:?}", e);
let _ = write_sync(cwfd, SYNC_FAILED, format!("{e:?}").as_str());
let _ = write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
}
}
}
@@ -544,13 +544,13 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
sched::setns(fd, s).or_else(|e| {
if s == CloneFlags::CLONE_NEWUSER {
if e != Errno::EINVAL {
let _ = write_sync(cwfd, SYNC_FAILED, format!("{e:?}").as_str());
let _ = write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
return Err(e);
}
Ok(())
} else {
let _ = write_sync(cwfd, SYNC_FAILED, format!("{e:?}").as_str());
let _ = write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
Err(e)
}
})?;
@@ -685,7 +685,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
let _ = write_sync(
cwfd,
SYNC_FAILED,
format!("setgroups failed: {e:?}").as_str(),
format!("setgroups failed: {:?}", e).as_str(),
);
})?;
}
@@ -808,7 +808,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
if init {
let fd = fcntl::open(
format!("/proc/self/fd/{fifofd}").as_str(),
format!("/proc/self/fd/{}", fifofd).as_str(),
OFlag::O_RDONLY | OFlag::O_CLOEXEC,
Mode::from_bits_truncate(0),
)?;
@@ -1171,14 +1171,14 @@ impl BaseContainer for LinuxContainer {
.stderr(child_stderr)
.env(INIT, format!("{}", p.init))
.env(NO_PIVOT, format!("{}", self.config.no_pivot_root))
.env(CRFD_FD, format!("{crfd}"))
.env(CWFD_FD, format!("{cwfd}"))
.env(CLOG_FD, format!("{cfd_log}"))
.env(CRFD_FD, format!("{}", crfd))
.env(CWFD_FD, format!("{}", cwfd))
.env(CLOG_FD, format!("{}", cfd_log))
.env(CONSOLE_SOCKET_FD, console_name)
.env(PIDNS_ENABLED, format!("{}", pidns.enabled));
if p.init {
child = child.env(FIFO_FD, format!("{fifofd}"));
child = child.env(FIFO_FD, format!("{}", fifofd));
}
if pidns.fd.is_some() {
@@ -1588,11 +1588,9 @@ async fn join_namespaces(
cm.apply(p.pid)?;
}
if p.init {
if let Some(resource) = res {
info!(logger, "set properties to cgroups!");
cm.set(resource, false)?;
}
if p.init && res.is_some() {
info!(logger, "set properties to cgroups!");
cm.set(res.unwrap(), false)?;
}
info!(logger, "notify child to continue");
@@ -1689,7 +1687,7 @@ impl LinuxContainer {
return anyhow!(e).context(format!("container {} already exists", id.as_str()));
}
anyhow!(e).context(format!("fail to create container directory {root}"))
anyhow!(e).context(format!("fail to create container directory {}", root))
})?;
unistd::chown(
@@ -1697,7 +1695,7 @@ impl LinuxContainer {
Some(unistd::getuid()),
Some(unistd::getgid()),
)
.context(format!("Cannot change owner of container {id} root"))?;
.context(format!("Cannot change owner of container {} root", id))?;
let spec = config.spec.as_ref().unwrap();
let linux_cgroups_path = spec

View File

@@ -528,7 +528,7 @@ pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<(
// Change to the new root so that the pivot_root actually acts on it.
unistd::fchdir(newroot)?;
pivot_root(".", ".").context(format!("failed to pivot_root on {path:?}"))?;
pivot_root(".", ".").context(format!("failed to pivot_root on {:?}", path))?;
// Currently our "." is oldroot (according to the current kernel code).
// However, purely for safety, we will fchdir(oldroot) since there isn't
@@ -752,6 +752,15 @@ fn parse_mount(m: &Mount) -> (MsFlags, MsFlags, String) {
(flags, pgflags, data.join(","))
}
// This function constructs a canonicalized path by combining the `rootfs` and `unsafe_path` elements.
// The resulting path is guaranteed to be ("below" / "in a directory under") the `rootfs` directory.
//
// Parameters:
//
// - `rootfs` is the absolute path to the root of the containers root filesystem directory.
// - `unsafe_path` is path inside a container. It is unsafe since it may try to "escape" from the containers
// rootfs by using one or more "../" path elements or is its a symlink to path.
fn mount_from(
cfd_log: RawFd,
m: &Mount,
@@ -920,7 +929,7 @@ fn create_devices(devices: &[LinuxDevice], bind: bool) -> Result<()> {
for dev in DEFAULT_DEVICES.iter() {
let dev_path = dev.path().display().to_string();
let path = Path::new(&dev_path[1..]);
op(dev, path).context(format!("Creating container device {dev:?}"))?;
op(dev, path).context(format!("Creating container device {:?}", dev))?;
}
for dev in devices {
let dev_path = &dev.path();
@@ -932,9 +941,9 @@ fn create_devices(devices: &[LinuxDevice], bind: bool) -> Result<()> {
anyhow!(msg)
})?;
if let Some(dir) = path.parent() {
fs::create_dir_all(dir).context(format!("Creating container device {dev:?}"))?;
fs::create_dir_all(dir).context(format!("Creating container device {:?}", dev))?;
}
op(dev, path).context(format!("Creating container device {dev:?}"))?;
op(dev, path).context(format!("Creating container device {:?}", dev))?;
}
stat::umask(old);
Ok(())

View File

@@ -18,10 +18,10 @@ pub fn is_enabled() -> Result<bool> {
pub fn add_mount_label(data: &mut String, label: &str) {
if data.is_empty() {
let context = format!("context=\"{label}\"");
let context = format!("context=\"{}\"", label);
data.push_str(&context);
} else {
let context = format!(",context=\"{label}\"");
let context = format!(",context=\"{}\"", label);
data.push_str(&context);
}
}

View File

@@ -68,7 +68,7 @@ mod tests {
#[test]
fn test_from_str() {
let device = Address::from_str("a.1").unwrap();
assert_eq!(format!("{device}"), "0a.0001");
assert_eq!(format!("{}", device), "0a.0001");
assert!(Address::from_str("").is_err());
assert!(Address::from_str(".").is_err());

View File

@@ -102,10 +102,10 @@ mod tests {
fn test_new_device() {
// Valid devices
let device = Device::new(0, 0).unwrap();
assert_eq!(format!("{device}"), "0.0.0000");
assert_eq!(format!("{}", device), "0.0.0000");
let device = Device::new(3, 0xffff).unwrap();
assert_eq!(format!("{device}"), "0.3.ffff");
assert_eq!(format!("{}", device), "0.3.ffff");
// Invalid device
let device = Device::new(4, 0);
@@ -116,13 +116,13 @@ mod tests {
fn test_device_from_str() {
// Valid devices
let device = Device::from_str("0.0.0").unwrap();
assert_eq!(format!("{device}"), "0.0.0000");
assert_eq!(format!("{}", device), "0.0.0000");
let device = Device::from_str("0.0.0000").unwrap();
assert_eq!(format!("{device}"), "0.0.0000");
assert_eq!(format!("{}", device), "0.0.0000");
let device = Device::from_str("0.3.ffff").unwrap();
assert_eq!(format!("{device}"), "0.3.ffff");
assert_eq!(format!("{}", device), "0.3.ffff");
// Invalid devices
let device = Device::from_str("0.0");

View File

@@ -110,7 +110,7 @@ impl CDHClient {
pub async fn get_resource(&self, resource_path: &str) -> Result<Vec<u8>> {
let req = GetResourceRequest {
ResourcePath: format!("kbs://{resource_path}"),
ResourcePath: format!("kbs://{}", resource_path),
..Default::default()
};
let res = self

View File

@@ -260,7 +260,7 @@ impl Default for AgentConfig {
debug_console_vport: 0,
log_vport: 0,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: format!("{VSOCK_ADDR}:{DEFAULT_AGENT_VSOCK_PORT}"),
server_addr: format!("{}:{}", VSOCK_ADDR, DEFAULT_AGENT_VSOCK_PORT),
passfd_listener_port: 0,
cgroup_no_v1: String::from(""),
unified_cgroup_hierarchy: false,
@@ -269,7 +269,7 @@ impl Default for AgentConfig {
no_proxy: String::from(""),
guest_components_rest_api: GuestComponentsFeatures::default(),
guest_components_procs: GuestComponentsProcs::default(),
secure_storage_integrity: true,
secure_storage_integrity: false,
#[cfg(feature = "agent-policy")]
policy_file: String::from(""),
mem_agent: None,
@@ -417,7 +417,7 @@ impl AgentConfig {
// generate our config from it.
// The agent will fail to start if the configuration file is not present,
// or if it can't be parsed properly.
if param.starts_with(format!("{CONFIG_FILE}=").as_str()) {
if param.starts_with(format!("{}=", CONFIG_FILE).as_str()) {
let config_file = get_string_value(param)?;
return AgentConfig::from_config_file(&config_file)
.context("AgentConfig from kernel cmdline");
@@ -651,7 +651,7 @@ impl AgentConfig {
#[instrument]
pub fn from_config_file(file: &str) -> Result<AgentConfig> {
let config = fs::read_to_string(file)
.with_context(|| format!("Failed to read config file {file}"))?;
.with_context(|| format!("Failed to read config file {}", file))?;
AgentConfig::from_str(&config)
}
@@ -668,7 +668,7 @@ impl AgentConfig {
}
if let Ok(value) = env::var(TRACING_ENV_VAR) {
let name_value = format!("{TRACING_ENV_VAR}={value}");
let name_value = format!("{}={}", TRACING_ENV_VAR, value);
self.tracing = get_bool_value(&name_value).unwrap_or(false);
}
@@ -911,7 +911,7 @@ mod tests {
no_proxy: "",
guest_components_rest_api: GuestComponentsFeatures::default(),
guest_components_procs: GuestComponentsProcs::default(),
secure_storage_integrity: true,
secure_storage_integrity: false,
#[cfg(feature = "agent-policy")]
policy_file: "",
mem_agent: None,
@@ -1364,7 +1364,7 @@ mod tests {
},
TestData {
contents: "",
secure_storage_integrity: true,
secure_storage_integrity: false,
..Default::default()
},
TestData {
@@ -1442,7 +1442,7 @@ mod tests {
// Now, test various combinations of file contents and environment
// variables.
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let file_path = dir.path().join("cmdline");
@@ -1470,36 +1470,40 @@ mod tests {
let config =
AgentConfig::from_cmdline(filename, vec![]).expect("Failed to parse command line");
assert_eq!(d.debug_console, config.debug_console, "{msg}");
assert_eq!(d.dev_mode, config.dev_mode, "{msg}");
assert_eq!(d.cgroup_no_v1, config.cgroup_no_v1, "{msg}");
assert_eq!(d.debug_console, config.debug_console, "{}", msg);
assert_eq!(d.dev_mode, config.dev_mode, "{}", msg);
assert_eq!(d.cgroup_no_v1, config.cgroup_no_v1, "{}", msg);
assert_eq!(
d.unified_cgroup_hierarchy, config.unified_cgroup_hierarchy,
"{msg}"
"{}",
msg
);
assert_eq!(d.log_level, config.log_level, "{msg}");
assert_eq!(d.hotplug_timeout, config.hotplug_timeout, "{msg}");
assert_eq!(d.container_pipe_size, config.container_pipe_size, "{msg}");
assert_eq!(d.server_addr, config.server_addr, "{msg}");
assert_eq!(d.tracing, config.tracing, "{msg}");
assert_eq!(d.https_proxy, config.https_proxy, "{msg}");
assert_eq!(d.no_proxy, config.no_proxy, "{msg}");
assert_eq!(d.log_level, config.log_level, "{}", msg);
assert_eq!(d.hotplug_timeout, config.hotplug_timeout, "{}", msg);
assert_eq!(d.container_pipe_size, config.container_pipe_size, "{}", msg);
assert_eq!(d.server_addr, config.server_addr, "{}", msg);
assert_eq!(d.tracing, config.tracing, "{}", msg);
assert_eq!(d.https_proxy, config.https_proxy, "{}", msg);
assert_eq!(d.no_proxy, config.no_proxy, "{}", msg);
assert_eq!(
d.guest_components_rest_api, config.guest_components_rest_api,
"{msg}"
"{}",
msg
);
assert_eq!(
d.guest_components_procs, config.guest_components_procs,
"{msg}"
"{}",
msg
);
assert_eq!(
d.secure_storage_integrity, config.secure_storage_integrity,
"{msg}"
"{}",
msg
);
#[cfg(feature = "agent-policy")]
assert_eq!(d.policy_file, config.policy_file, "{msg}");
assert_eq!(d.policy_file, config.policy_file, "{}", msg);
assert_eq!(d.mem_agent, config.mem_agent, "{msg}");
assert_eq!(d.mem_agent, config.mem_agent, "{}", msg);
for v in vars_to_unset {
env::remove_var(v);
@@ -1564,7 +1568,7 @@ mod tests {
#[case("panic", Ok(slog::Level::Critical))]
fn test_logrus_to_slog_level(#[case] input: &str, #[case] expected: Result<slog::Level>) {
let result = logrus_to_slog_level(input);
let msg = format!("expected: {expected:?}, result: {result:?}");
let msg = format!("expected: {:?}, result: {:?}", expected, result);
assert_result!(expected, result, msg);
}
@@ -1589,7 +1593,7 @@ mod tests {
#[case("agent.log=panic", Ok(slog::Level::Critical))]
fn test_get_log_level(#[case] input: &str, #[case] expected: Result<slog::Level>) {
let result = get_log_level(input);
let msg = format!("expected: {expected:?}, result: {result:?}");
let msg = format!("expected: {:?}, result: {:?}", expected, result);
assert_result!(expected, result, msg);
}
@@ -1632,7 +1636,7 @@ Caused by:
#[case("agent.cdi_timeout=320", Ok(time::Duration::from_secs(320)))]
fn test_timeout(#[case] param: &str, #[case] expected: Result<time::Duration>) {
let result = get_timeout(param);
let msg = format!("expected: {expected:?}, result: {result:?}");
let msg = format!("expected: {:?}, result: {:?}", expected, result);
assert_result!(expected, result, msg);
}
@@ -1672,7 +1676,7 @@ Caused by:
)))]
fn test_get_container_pipe_size(#[case] param: &str, #[case] expected: Result<i32>) {
let result = get_container_pipe_size(param);
let msg = format!("expected: {expected:?}, result: {result:?}");
let msg = format!("expected: {:?}, result: {:?}", expected, result);
assert_result!(expected, result, msg);
}
@@ -1693,7 +1697,7 @@ Caused by:
#[case("x= = ", Ok(" = ".into()))]
fn test_get_string_value(#[case] param: &str, #[case] expected: Result<String>) {
let result = get_string_value(param);
let msg = format!("expected: {expected:?}, result: {result:?}");
let msg = format!("expected: {:?}, result: {:?}", expected, result);
assert_result!(expected, result, msg);
}
@@ -1712,7 +1716,7 @@ Caused by:
#[case] expected: Result<GuestComponentsFeatures>,
) {
let result = get_guest_components_features_value(input);
let msg = format!("expected: {expected:?}, result: {result:?}");
let msg = format!("expected: {:?}, result: {:?}", expected, result);
assert_result!(expected, result, msg);
}
@@ -1735,7 +1739,7 @@ Caused by:
#[case] expected: Result<GuestComponentsProcs>,
) {
let result = get_guest_components_procs_value(param);
let msg = format!("expected: {expected:?}, result: {result:?}");
let msg = format!("expected: {:?}, result: {:?}", expected, result);
assert_result!(expected, result, msg);
}

View File

@@ -16,10 +16,7 @@ use crate::pci;
use crate::sandbox::Sandbox;
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
use anyhow::{anyhow, Context, Result};
#[cfg(target_arch = "s390x")]
use kata_types::device::DRIVER_BLK_CCW_TYPE;
use kata_types::device::{DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE};
use kata_types::device::{DRIVER_BLK_CCW_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE};
use protocols::agent::Device;
use regex::Regex;
use std::path::Path;
@@ -31,7 +28,6 @@ use tracing::instrument;
#[derive(Debug)]
pub struct VirtioBlkPciDeviceHandler {}
#[cfg(target_arch = "s390x")]
#[derive(Debug)]
pub struct VirtioBlkCcwDeviceHandler {}
@@ -56,7 +52,6 @@ impl DeviceHandler for VirtioBlkPciDeviceHandler {
}
}
#[cfg(target_arch = "s390x")]
#[async_trait::async_trait]
impl DeviceHandler for VirtioBlkCcwDeviceHandler {
#[instrument]
@@ -169,7 +164,7 @@ pub struct VirtioBlkPciMatcher {
impl VirtioBlkPciMatcher {
pub fn new(relpath: &str) -> VirtioBlkPciMatcher {
let root_bus = create_pci_root_bus_path();
let re = format!(r"^{root_bus}{relpath}/virtio[0-9]+/block/");
let re = format!(r"^{}{}/virtio[0-9]+/block/", root_bus, relpath);
VirtioBlkPciMatcher {
rex: Regex::new(&re).expect("BUG: failed to compile VirtioBlkPciMatcher regex"),
@@ -191,7 +186,7 @@ pub struct VirtioBlkMmioMatcher {
impl VirtioBlkMmioMatcher {
pub fn new(devname: &str) -> VirtioBlkMmioMatcher {
VirtioBlkMmioMatcher {
suffix: format!(r"/block/{devname}"),
suffix: format!(r"/block/{}", devname),
}
}
}
@@ -211,8 +206,10 @@ pub struct VirtioBlkCCWMatcher {
#[cfg(target_arch = "s390x")]
impl VirtioBlkCCWMatcher {
pub fn new(root_bus_path: &str, device: &ccw::Device) -> Self {
let re =
format!(r"^{root_bus_path}/0\.[0-3]\.[0-9a-f]{{1,4}}/{device}/virtio[0-9]+/block/");
let re = format!(
r"^{}/0\.[0-3]\.[0-9a-f]{{1,4}}/{}/virtio[0-9]+/block/",
root_bus_path, device
);
VirtioBlkCCWMatcher {
rex: Regex::new(&re).expect("BUG: failed to compile VirtioBlkCCWMatcher regex"),
}
@@ -241,12 +238,12 @@ mod tests {
uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
uev_a.subsystem = BLOCK.to_string();
uev_a.devname = devname.to_string();
uev_a.devpath = format!("{root_bus}{relpath_a}/virtio4/block/{devname}");
uev_a.devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath_a, devname);
let matcher_a = VirtioBlkPciMatcher::new(relpath_a);
let mut uev_b = uev_a.clone();
let relpath_b = "/0000:00:0a.0/0000:00:0b.0";
uev_b.devpath = format!("{root_bus}{relpath_b}/virtio0/block/{devname}");
uev_b.devpath = format!("{}{}/virtio0/block/{}", root_bus, relpath_b, devname);
let matcher_b = VirtioBlkPciMatcher::new(relpath_b);
assert!(matcher_a.is_match(&uev_a));
@@ -267,7 +264,10 @@ mod tests {
uev.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
uev.subsystem = subsystem.to_string();
uev.devname = devname.to_string();
uev.devpath = format!("{root_bus}/0.0.0001/{relpath}/virtio1/{subsystem}/{devname}");
uev.devpath = format!(
"{}/0.0.0001/{}/virtio1/{}/{}",
root_bus, relpath, subsystem, devname
);
// Valid path
let device = ccw::Device::from_str(relpath).unwrap();
@@ -275,25 +275,40 @@ mod tests {
assert!(matcher.is_match(&uev));
// Invalid paths
uev.devpath = format!("{root_bus}/0.0.0001/0.0.0003/virtio1/{subsystem}/{devname}");
uev.devpath = format!(
"{}/0.0.0001/0.0.0003/virtio1/{}/{}",
root_bus, subsystem, devname
);
assert!(!matcher.is_match(&uev));
uev.devpath = format!("0.0.0001/{relpath}/virtio1/{subsystem}/{devname}");
uev.devpath = format!("0.0.0001/{}/virtio1/{}/{}", relpath, subsystem, devname);
assert!(!matcher.is_match(&uev));
uev.devpath = format!("{root_bus}/0.0.0001/{relpath}/virtio/{subsystem}/{devname}");
uev.devpath = format!(
"{}/0.0.0001/{}/virtio/{}/{}",
root_bus, relpath, subsystem, devname
);
assert!(!matcher.is_match(&uev));
uev.devpath = format!("{root_bus}/0.0.0001/{relpath}/virtio1");
uev.devpath = format!("{}/0.0.0001/{}/virtio1", root_bus, relpath);
assert!(!matcher.is_match(&uev));
uev.devpath = format!("{root_bus}/1.0.0001/{relpath}/virtio1/{subsystem}/{devname}");
uev.devpath = format!(
"{}/1.0.0001/{}/virtio1/{}/{}",
root_bus, relpath, subsystem, devname
);
assert!(!matcher.is_match(&uev));
uev.devpath = format!("{root_bus}/0.4.0001/{relpath}/virtio1/{subsystem}/{devname}");
uev.devpath = format!(
"{}/0.4.0001/{}/virtio1/{}/{}",
root_bus, relpath, subsystem, devname
);
assert!(!matcher.is_match(&uev));
uev.devpath = format!("{root_bus}/0.0.10000/{relpath}/virtio1/{subsystem}/{devname}");
uev.devpath = format!(
"{}/0.0.10000/{}/virtio1/{}/{}",
root_bus, relpath, subsystem, devname
);
assert!(!matcher.is_match(&uev));
}
@@ -306,13 +321,17 @@ mod tests {
uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
uev_a.subsystem = BLOCK.to_string();
uev_a.devname = devname_a.to_string();
uev_a.devpath =
format!("/sys/devices/virtio-mmio-cmdline/virtio-mmio.0/virtio0/block/{devname_a}");
uev_a.devpath = format!(
"/sys/devices/virtio-mmio-cmdline/virtio-mmio.0/virtio0/block/{}",
devname_a
);
let matcher_a = VirtioBlkMmioMatcher::new(devname_a);
let mut uev_b = uev_a.clone();
uev_b.devpath =
format!("/sys/devices/virtio-mmio-cmdline/virtio-mmio.4/virtio4/block/{devname_b}");
uev_b.devpath = format!(
"/sys/devices/virtio-mmio-cmdline/virtio-mmio.4/virtio4/block/{}",
devname_b
);
let matcher_b = VirtioBlkMmioMatcher::new(devname_b);
assert!(matcher_a.is_match(&uev_a));

View File

@@ -425,7 +425,7 @@ pub fn update_env_pci(
let mut guest_addrs = Vec::<String>::new();
for host_addr_str in val.split(',') {
let host_addr = pci::Address::from_str(host_addr_str)
.with_context(|| format!("Can't parse {name} environment variable"))?;
.with_context(|| format!("Can't parse {} environment variable", name))?;
let host_guest = pcimap
.get(cid)
.ok_or_else(|| anyhow!("No PCI mapping found for container {}", cid))?;
@@ -433,11 +433,11 @@ pub fn update_env_pci(
.get(&host_addr)
.ok_or_else(|| anyhow!("Unable to translate host PCI address {}", host_addr))?;
guest_addrs.push(format!("{guest_addr}"));
addr_map.insert(host_addr_str.to_string(), format!("{guest_addr}"));
guest_addrs.push(format!("{}", guest_addr));
addr_map.insert(host_addr_str.to_string(), format!("{}", guest_addr));
}
pci_dev_map.insert(format!("{name}_INFO"), addr_map);
pci_dev_map.insert(format!("{}_INFO", name), addr_map);
envvar.replace_range(eqpos + 1.., guest_addrs.join(",").as_str());
}
@@ -526,7 +526,7 @@ fn update_spec_devices(
"Missing devices in OCI spec: {:?}",
updates
.keys()
.map(|d| format!("{d:?}"))
.map(|d| format!("{:?}", d))
.collect::<Vec<_>>()
.join(" ")
));
@@ -572,7 +572,7 @@ pub fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<Str
for i in 0..pcipath.len() {
let bdf = format!("{}:{}", bus, pcipath[i]);
relpath = format!("{relpath}/{bdf}");
relpath = format!("{}/{}", relpath, bdf);
if i == pcipath.len() - 1 {
// Final device need not be a bridge
@@ -580,7 +580,7 @@ pub fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<Str
}
// Find out the bus exposed by bridge
let bridgebuspath = format!("{root_bus_sysfs}{relpath}/pci_bus");
let bridgebuspath = format!("{}{}/pci_bus", root_bus_sysfs, relpath);
let mut files: Vec<_> = fs::read_dir(&bridgebuspath)?.collect();
match files.pop() {
@@ -1120,7 +1120,7 @@ mod tests {
// Create mock sysfs files to indicate that 0000:00:02.0 is a bridge to bus 01
let bridge2bus = "0000:01";
let bus2path = format!("{bridge2path}/pci_bus/{bridge2bus}");
let bus2path = format!("{}/pci_bus/{}", bridge2path, bridge2bus);
fs::create_dir_all(bus2path).unwrap();
@@ -1134,9 +1134,9 @@ mod tests {
assert!(relpath.is_err());
// Create mock sysfs files for a bridge at 0000:01:03.0 to bus 02
let bridge3path = format!("{bridge2path}/0000:01:03.0");
let bridge3path = format!("{}/0000:01:03.0", bridge2path);
let bridge3bus = "0000:02";
let bus3path = format!("{bridge3path}/pci_bus/{bridge3bus}");
let bus3path = format!("{}/pci_bus/{}", bridge3path, bridge3bus);
fs::create_dir_all(bus3path).unwrap();
@@ -1169,7 +1169,7 @@ mod tests {
let devname = "vda";
let root_bus = create_pci_root_bus_path();
let relpath = "/0000:00:0a.0/0000:03:0b.0";
let devpath = format!("{root_bus}{relpath}/virtio4/block/{devname}");
let devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath, devname);
let mut uev = crate::uevent::Uevent::default();
uev.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
@@ -1272,7 +1272,7 @@ mod tests {
cdi_timeout,
)
.await;
println!("modfied spec {spec:?}");
println!("modfied spec {:?}", spec);
assert!(res.is_ok(), "{}", res.err().unwrap());
let linux = spec.linux().as_ref().unwrap();

View File

@@ -69,7 +69,7 @@ impl NetPciMatcher {
let root_bus = create_pci_root_bus_path();
NetPciMatcher {
devpath: format!("{root_bus}{relpath}"),
devpath: format!("{}{}", root_bus, relpath),
}
}
}
@@ -106,7 +106,10 @@ struct NetCcwMatcher {
#[cfg(target_arch = "s390x")]
impl NetCcwMatcher {
pub fn new(root_bus_path: &str, device: &ccw::Device) -> Self {
let re = format!(r"{root_bus_path}/0\.[0-3]\.[0-9a-f]{{1,4}}/{device}/virtio[0-9]+/net/");
let re = format!(
r"{}/0\.[0-3]\.[0-9a-f]{{1,4}}/{}/virtio[0-9]+/net/",
root_bus_path, device
);
NetCcwMatcher {
re: Regex::new(&re).expect("BUG: failed to compile NetCCWMatcher regex"),
}
@@ -136,7 +139,7 @@ mod tests {
let mut uev_a = crate::uevent::Uevent::default();
uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
uev_a.devpath = format!("{root_bus}{relpath_a}");
uev_a.devpath = format!("{}{}", root_bus, relpath_a);
uev_a.subsystem = String::from("net");
uev_a.interface = String::from("eth0");
let matcher_a = NetPciMatcher::new(relpath_a);
@@ -144,7 +147,7 @@ mod tests {
let relpath_b = "/0000:00:02.0/0000:01:02.0";
let mut uev_b = uev_a.clone();
uev_b.devpath = format!("{root_bus}{relpath_b}");
uev_b.devpath = format!("{}{}", root_bus, relpath_b);
let matcher_b = NetPciMatcher::new(relpath_b);
assert!(matcher_a.is_match(&uev_a));
@@ -155,7 +158,7 @@ mod tests {
let relpath_c = "/0000:00:02.0/0000:01:03.0";
let net_substr = "/net/eth0";
let mut uev_c = uev_a.clone();
uev_c.devpath = format!("{root_bus}{relpath_c}{net_substr}");
uev_c.devpath = format!("{}{}{}", root_bus, relpath_c, net_substr);
let matcher_c = NetPciMatcher::new(relpath_c);
assert!(matcher_c.is_match(&uev_c));

View File

@@ -67,7 +67,7 @@ pub struct PmemBlockMatcher {
impl PmemBlockMatcher {
pub fn new(devname: &str) -> PmemBlockMatcher {
let suffix = format!(r"/block/{devname}");
let suffix = format!(r"/block/{}", devname);
PmemBlockMatcher { suffix }
}

View File

@@ -58,7 +58,7 @@ pub struct ScsiBlockMatcher {
impl ScsiBlockMatcher {
pub fn new(scsi_addr: &str) -> ScsiBlockMatcher {
let search = format!(r"/0:0:{scsi_addr}/block/");
let search = format!(r"/0:0:{}/block/", scsi_addr);
ScsiBlockMatcher { search }
}
@@ -118,14 +118,18 @@ mod tests {
uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
uev_a.subsystem = BLOCK.to_string();
uev_a.devname = devname.to_string();
uev_a.devpath =
format!("{root_bus}/0000:00:00.0/virtio0/host0/target0:0:0/0:0:{addr_a}/block/sda");
uev_a.devpath = format!(
"{}/0000:00:00.0/virtio0/host0/target0:0:0/0:0:{}/block/sda",
root_bus, addr_a
);
let matcher_a = ScsiBlockMatcher::new(addr_a);
let mut uev_b = uev_a.clone();
let addr_b = "2:0";
uev_b.devpath =
format!("{root_bus}/0000:00:00.0/virtio0/host0/target0:0:2/0:0:{addr_b}/block/sdb");
uev_b.devpath = format!(
"{}/0000:00:00.0/virtio0/host0/target0:0:2/0:0:{}/block/sdb",
root_bus, addr_b
);
let matcher_b = ScsiBlockMatcher::new(addr_b);
assert!(matcher_a.is_match(&uev_a));

View File

@@ -170,7 +170,7 @@ pub struct VfioMatcher {
impl VfioMatcher {
pub fn new(grp: IommuGroup) -> VfioMatcher {
VfioMatcher {
syspath: format!("/devices/virtual/vfio/{grp}"),
syspath: format!("/devices/virtual/vfio/{}", grp),
}
}
}
@@ -215,7 +215,7 @@ impl PciMatcher {
pub fn new(relpath: &str) -> Result<PciMatcher> {
let root_bus = create_pci_root_bus_path();
Ok(PciMatcher {
devpath: format!("{root_bus}{relpath}"),
devpath: format!("{}{}", root_bus, relpath),
})
}
}
@@ -425,12 +425,12 @@ mod tests {
let mut uev_a = crate::uevent::Uevent::default();
uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
uev_a.devname = format!("vfio/{grpa}");
uev_a.devpath = format!("/devices/virtual/vfio/{grpa}");
uev_a.devname = format!("vfio/{}", grpa);
uev_a.devpath = format!("/devices/virtual/vfio/{}", grpa);
let matcher_a = VfioMatcher::new(grpa);
let mut uev_b = uev_a.clone();
uev_b.devpath = format!("/devices/virtual/vfio/{grpb}");
uev_b.devpath = format!("/devices/virtual/vfio/{}", grpb);
let matcher_b = VfioMatcher::new(grpb);
assert!(matcher_a.is_match(&uev_a));
@@ -531,12 +531,12 @@ mod tests {
async fn test_vfio_ap_matcher() {
let subsystem = "ap";
let card = "0a";
let relpath = format!("{card}.0001");
let relpath = format!("{}.0001", card);
let mut uev = Uevent::default();
uev.action = U_EVENT_ACTION_ADD.to_string();
uev.subsystem = subsystem.to_string();
uev.devpath = format!("{AP_ROOT_BUS_PATH}/card{card}/{relpath}");
uev.devpath = format!("{}/card{}/{}", AP_ROOT_BUS_PATH, card, relpath);
let ap_address = ap::Address::from_str(&relpath).unwrap();
let matcher = ApMatcher::new(ap_address);
@@ -548,7 +548,7 @@ mod tests {
assert!(!matcher.is_match(&uev_remove));
let mut uev_other_device = uev.clone();
uev_other_device.devpath = format!("{AP_ROOT_BUS_PATH}/card{card}/{card}.0002");
uev_other_device.devpath = format!("{}/card{}/{}.0002", AP_ROOT_BUS_PATH, card, card);
assert!(!matcher.is_match(&uev_other_device));
}
}

View File

@@ -9,7 +9,6 @@
// SPDX-License-Identifier: Apache-2.0
//
#[cfg(feature = "init-data")]
use std::{os::unix::fs::FileTypeExt, path::Path};
use anyhow::{bail, Context, Result};
@@ -38,24 +37,8 @@ pub const AA_CONFIG_PATH: &str = concatcp!(INITDATA_PATH, "/aa.toml");
pub const CDH_CONFIG_PATH: &str = concatcp!(INITDATA_PATH, "/cdh.toml");
/// Magic number of initdata device
#[cfg(feature = "init-data")]
pub const INITDATA_MAGIC_NUMBER: &[u8] = b"initdata";
/// initdata device with disk type 'vd*'
#[cfg(feature = "init-data")]
const INITDATA_PREFIX_DISK_VDX: &str = "vd";
/// initdata device with disk type 'sd*'
#[cfg(feature = "init-data")]
const INITDATA_PREFIX_DISK_SDX: &str = "sd";
#[cfg(not(feature = "init-data"))]
async fn detect_initdata_device(logger: &Logger) -> Result<Option<String>> {
debug!(logger, "Initdata is disabled");
Ok(None)
}
#[cfg(feature = "init-data")]
async fn detect_initdata_device(logger: &Logger) -> Result<Option<String>> {
let dev_dir = Path::new("/dev");
let mut read_dir = tokio::fs::read_dir(dev_dir).await?;
@@ -63,15 +46,9 @@ async fn detect_initdata_device(logger: &Logger) -> Result<Option<String>> {
let filename = entry.file_name();
let filename = filename.to_string_lossy();
debug!(logger, "Initdata check device `{filename}`");
// Currently there're two disk types supported:
// virtio-blk (vd*) and virtio-scsi (sd*)
if !filename.starts_with(INITDATA_PREFIX_DISK_VDX)
&& !filename.starts_with(INITDATA_PREFIX_DISK_SDX)
{
if !filename.starts_with("vd") {
continue;
}
let path = entry.path();
debug!(logger, "Initdata find potential device: `{path:?}`");

View File

@@ -301,12 +301,12 @@ async fn real_main(init_mode: bool) -> std::result::Result<(), Box<dyn std::erro
tracer::end_tracing();
}
eprintln!("{NAME} shutdown complete");
eprintln!("{} shutdown complete", NAME);
let mut wait_errors: Vec<tokio::task::JoinError> = vec![];
for result in results {
if let Err(e) = result {
eprintln!("wait task error: {e:#?}");
eprintln!("wait task error: {:#?}", e);
wait_errors.push(e);
}
}
@@ -746,7 +746,7 @@ mod tests {
skip_if_root!();
}
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
defer!({
// XXX: Never try to close rfd, because it will be closed by PipeStream in
@@ -759,7 +759,7 @@ mod tests {
shutdown_tx.send(true).unwrap();
let result = create_logger_task(rfd, d.vsock_port, shutdown_rx).await;
let msg = format!("{msg}, result: {result:?}");
let msg = format!("{}, result: {:?}", msg, result);
assert_result!(d.result, result, msg);
}
}

View File

@@ -239,7 +239,7 @@ fn update_guest_metrics() {
Ok(kernel_stats) => {
set_gauge_vec_cpu_time(&GUEST_CPU_TIME, "total", &kernel_stats.total);
for (i, cpu_time) in kernel_stats.cpu_time.iter().enumerate() {
set_gauge_vec_cpu_time(&GUEST_CPU_TIME, format!("{i}").as_str(), cpu_time);
set_gauge_vec_cpu_time(&GUEST_CPU_TIME, format!("{}", i).as_str(), cpu_time);
}
}
}

View File

@@ -88,7 +88,7 @@ pub fn baremount(
let destination_str = destination.to_string_lossy();
if let Ok(m) = get_linux_mount_info(destination_str.deref()) {
if m.fs_type == fs_type && !flags.contains(MsFlags::MS_REMOUNT) {
if m.fs_type == fs_type {
slog_info!(logger, "{source:?} is already mounted at {destination:?}");
return Ok(());
}
@@ -177,7 +177,7 @@ pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Resul
let content = fs::read_to_string(mount_file)
.map_err(|e| anyhow!("read mount file {}: {}", mount_file, e))?;
let re = Regex::new(format!("device .+ mounted on {mount_point} with fstype (.+)").as_str())?;
let re = Regex::new(format!("device .+ mounted on {} with fstype (.+)", mount_point).as_str())?;
// Read the file line by line using the lines() iterator from std::io::BufRead.
for line in content.lines() {
@@ -355,7 +355,8 @@ mod tests {
let flags = MsFlags::MS_RDONLY;
let options = "mode=755";
println!(
"testing if already mounted baremount({source:?} {destination:?} {fs_type:?})"
"testing if already mounted baremount({:?} {:?} {:?})",
source, destination, fs_type
);
assert!(baremount(source, destination, fs_type, flags, options, &logger).is_ok());
}
@@ -460,7 +461,7 @@ mod tests {
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
skip_loop_by_user!(msg, d.test_user);
@@ -504,7 +505,7 @@ mod tests {
let result = baremount(src, dest, d.fs_type, d.flags, d.options, &logger);
let msg = format!("{msg}: result: {result:?}");
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), "{}", msg);
@@ -516,7 +517,7 @@ mod tests {
}
let err = result.unwrap_err();
let error_msg = format!("{err}");
let error_msg = format!("{}", err);
assert!(
error_msg.contains(d.error_contains),
@@ -618,11 +619,11 @@ mod tests {
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let result = remove_mounts(&d.mounts);
let msg = format!("{msg}: result: {result:?}");
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), "{}", msg);
@@ -698,15 +699,15 @@ mod tests {
.iter()
.enumerate()
{
let msg = format!("missing mount file test[{i}] with mountpoint: {mp}");
let msg = format!("missing mount file test[{}] with mountpoint: {}", i, mp);
let result = get_mount_fs_type_from_file("", mp);
let err = result.unwrap_err();
let msg = format!("{msg}: error: {err}");
let msg = format!("{}: error: {}", msg, err);
assert!(
format!("{err}").contains("No such file or directory"),
format!("{}", err).contains("No such file or directory"),
"{}",
msg
);
@@ -714,7 +715,7 @@ mod tests {
// Now, test various combinations of file contents
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let file_path = dir.path().join("mount_stats");
@@ -731,7 +732,7 @@ mod tests {
let result = get_mount_fs_type_from_file(filename, d.mount_point);
// add more details if an assertion fails
let msg = format!("{msg}: result: {result:?}");
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
let fs_type = result.unwrap();
@@ -874,7 +875,7 @@ mod tests {
);
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let file_path = dir.path().join("cgroups");
let filename = file_path
@@ -888,7 +889,7 @@ mod tests {
.unwrap_or_else(|_| panic!("{}: failed to write file contents", msg));
let result = get_cgroup_mounts(&logger, filename, false);
let msg = format!("{msg}: result: {result:?}");
let msg = format!("{}: result: {:?}", msg, result);
if !d.error_contains.is_empty() {
assert!(result.is_err(), "{}", msg);
@@ -973,7 +974,7 @@ mod tests {
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
skip_loop_by_user!(msg, d.test_user);
let drain = slog::Discard;
@@ -1010,7 +1011,7 @@ mod tests {
nix::mount::umount(&dest).unwrap();
}
let msg = format!("{msg}: result: {result:?}");
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), "{}", msg);
} else {
@@ -1061,14 +1062,14 @@ mod tests {
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let result = parse_mount_options(&d.options_vec).unwrap();
let msg = format!("{msg}: result: {result:?}");
let msg = format!("{}: result: {:?}", msg, result);
let expected_result = (d.result.0, d.result.1.to_owned());
assert_eq!(expected_result, result, "{msg}");
assert_eq!(expected_result, result, "{}", msg);
}
}
}

View File

@@ -41,9 +41,9 @@ pub enum LinkFilter<'a> {
impl fmt::Display for LinkFilter<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
LinkFilter::Name(name) => write!(f, "Name: {name}"),
LinkFilter::Index(idx) => write!(f, "Index: {idx}"),
LinkFilter::Address(addr) => write!(f, "Address: {addr}"),
LinkFilter::Name(name) => write!(f, "Name: {}", name),
LinkFilter::Index(idx) => write!(f, "Index: {}", idx),
LinkFilter::Address(addr) => write!(f, "Address: {}", addr),
}
}
}
@@ -272,7 +272,7 @@ impl Handle {
use LinkAttribute as Nla;
let mac_addr = parse_mac_address(addr)
.with_context(|| format!("Failed to parse MAC address: {addr}"))?;
.with_context(|| format!("Failed to parse MAC address: {}", addr))?;
// Hardware filter might not be supported by netlink,
// we may have to dump link list and then find the target link.
@@ -401,10 +401,11 @@ impl Handle {
}
if let RouteAttribute::Oif(index) = attribute {
route.device = match self.find_link(LinkFilter::Index(*index)).await {
Ok(link) => link.name(),
Err(_) => String::new(),
};
route.device = self
.find_link(LinkFilter::Index(*index))
.await
.context(format!("error looking up device {index}"))?
.name();
}
}
@@ -924,7 +925,7 @@ mod tests {
/// Helper function to check if the result is a netlink EACCES error
fn is_netlink_permission_error<T>(result: &Result<T>) -> bool {
if let Err(e) = result {
let error_string = format!("{e:?}");
let error_string = format!("{:?}", e);
if error_string.contains("code: Some(-13)") {
println!("INFO: skipping test - netlink operations are restricted in this environment (EACCES)");
return true;
@@ -1000,10 +1001,14 @@ mod tests {
.unwrap()
.list_routes()
.await
.context(format!("available devices: {devices:?}"))
.context(format!("available devices: {:?}", devices))
.expect("Failed to list routes");
assert_ne!(all.len(), 0);
for r in &all {
assert_ne!(r.device.len(), 0);
}
}
#[tokio::test]
@@ -1186,7 +1191,7 @@ mod tests {
);
assert_eq!(
stdout.trim(),
format!("{TEST_ARP_IP} lladdr {mac} PERMANENT")
format!("{} lladdr {} PERMANENT", TEST_ARP_IP, mac)
);
clean_env_for_test_add_one_arp_neighbor(TEST_DUMMY_INTERFACE, TEST_ARP_IP);

View File

@@ -191,13 +191,13 @@ mod tests {
fn test_slotfn() {
// Valid slots
let sf = SlotFn::new(0x00, 0x0).unwrap();
assert_eq!(format!("{sf}"), "00.0");
assert_eq!(format!("{}", sf), "00.0");
let sf = SlotFn::from_str("00.0").unwrap();
assert_eq!(format!("{sf}"), "00.0");
assert_eq!(format!("{}", sf), "00.0");
let sf = SlotFn::from_str("00").unwrap();
assert_eq!(format!("{sf}"), "00.0");
assert_eq!(format!("{}", sf), "00.0");
let sf = SlotFn::new(31, 7).unwrap();
let sf2 = SlotFn::from_str("1f.7").unwrap();
@@ -256,12 +256,12 @@ mod tests {
let sf1f_7 = SlotFn::new(0x1f, 7).unwrap();
let addr = Address::new(0, 0, sf0_0);
assert_eq!(format!("{addr}"), "0000:00:00.0");
assert_eq!(format!("{}", addr), "0000:00:00.0");
let addr2 = Address::from_str("0000:00:00.0").unwrap();
assert_eq!(addr, addr2);
let addr = Address::new(0xffff, 0xff, sf1f_7);
assert_eq!(format!("{addr}"), "ffff:ff:1f.7");
assert_eq!(format!("{}", addr), "ffff:ff:1f.7");
let addr2 = Address::from_str("ffff:ff:1f.7").unwrap();
assert_eq!(addr, addr2);
@@ -299,7 +299,7 @@ mod tests {
// Valid paths
let pcipath = Path::new(vec![sf3_0]).unwrap();
assert_eq!(format!("{pcipath}"), "03.0");
assert_eq!(format!("{}", pcipath), "03.0");
let pcipath2 = Path::from_str("03.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03").unwrap();
@@ -308,7 +308,7 @@ mod tests {
assert_eq!(pcipath[0], sf3_0);
let pcipath = Path::new(vec![sf3_0, sf4_0]).unwrap();
assert_eq!(format!("{pcipath}"), "03.0/04.0");
assert_eq!(format!("{}", pcipath), "03.0/04.0");
let pcipath2 = Path::from_str("03.0/04.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03/04").unwrap();
@@ -318,7 +318,7 @@ mod tests {
assert_eq!(pcipath[1], sf4_0);
let pcipath = Path::new(vec![sf3_0, sf4_0, sf5_0]).unwrap();
assert_eq!(format!("{pcipath}"), "03.0/04.0/05.0");
assert_eq!(format!("{}", pcipath), "03.0/04.0/05.0");
let pcipath2 = Path::from_str("03.0/04.0/05.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03/04/05").unwrap();
@@ -329,7 +329,7 @@ mod tests {
assert_eq!(pcipath[2], sf5_0);
let pcipath = Path::new(vec![sfa_5, sfb_6, sfc_7]).unwrap();
assert_eq!(format!("{pcipath}"), "0a.5/0b.6/0c.7");
assert_eq!(format!("{}", pcipath), "0a.5/0b.6/0c.7");
let pcipath2 = Path::from_str("0a.5/0b.6/0c.7").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 3);

View File

@@ -72,7 +72,7 @@ use crate::network::setup_guest_dns;
use crate::passfd_io;
use crate::pci;
use crate::random;
use crate::sandbox::{Sandbox, SandboxError};
use crate::sandbox::Sandbox;
use crate::storage::{add_storages, update_ephemeral_mounts, STORAGE_HANDLERS};
use crate::util;
use crate::version::{AGENT_VERSION, API_VERSION};
@@ -138,17 +138,7 @@ fn sl() -> slog::Logger {
// Convenience function to wrap an error and response to ttrpc client
pub fn ttrpc_error(code: ttrpc::Code, err: impl Debug) -> ttrpc::Error {
get_rpc_status(code, format!("{err:?}"))
}
/// Convert SandboxError to ttrpc error with appropriate code.
/// Process not found errors map to NOT_FOUND, others to INVALID_ARGUMENT.
fn sandbox_err_to_ttrpc(err: SandboxError) -> ttrpc::Error {
let code = match &err {
SandboxError::InitProcessNotFound | SandboxError::InvalidExecId => ttrpc::Code::NOT_FOUND,
SandboxError::InvalidContainerId => ttrpc::Code::INVALID_ARGUMENT,
};
ttrpc_error(code, err)
get_rpc_status(code, format!("{:?}", err))
}
#[cfg(not(feature = "agent-policy"))]
@@ -470,9 +460,7 @@ impl AgentService {
let mut sig: libc::c_int = req.signal as libc::c_int;
{
let mut sandbox = self.sandbox.lock().await;
let p = sandbox
.find_container_process(cid.as_str(), eid.as_str())
.map_err(sandbox_err_to_ttrpc)?;
let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?;
// For container initProcess, if it hasn't installed handler for "SIGTERM" signal,
// it will ignore the "SIGTERM" signal sent to it, thus send it "SIGKILL" signal
// instead of "SIGTERM" to terminate it.
@@ -580,9 +568,7 @@ impl AgentService {
let (exit_send, mut exit_recv) = tokio::sync::mpsc::channel(100);
let exit_rx = {
let mut sandbox = self.sandbox.lock().await;
let p = sandbox
.find_container_process(cid.as_str(), eid.as_str())
.map_err(sandbox_err_to_ttrpc)?;
let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?;
p.exit_watchers.push(exit_send);
pid = p.pid;
@@ -679,9 +665,7 @@ impl AgentService {
let term_exit_notifier;
let reader = {
let mut sandbox = self.sandbox.lock().await;
let p = sandbox
.find_container_process(cid.as_str(), eid.as_str())
.map_err(sandbox_err_to_ttrpc)?;
let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?;
term_exit_notifier = p.term_exit_notifier.clone();
@@ -963,7 +947,12 @@ impl agent_ttrpc::AgentService for AgentService {
let p = sandbox
.find_container_process(cid.as_str(), eid.as_str())
.map_err(sandbox_err_to_ttrpc)?;
.map_err(|e| {
ttrpc_error(
ttrpc::Code::INVALID_ARGUMENT,
format!("invalid argument: {:?}", e),
)
})?;
p.close_stdin().await;
@@ -981,7 +970,12 @@ impl agent_ttrpc::AgentService for AgentService {
let mut sandbox = self.sandbox.lock().await;
let p = sandbox
.find_container_process(req.container_id(), req.exec_id())
.map_err(sandbox_err_to_ttrpc)?;
.map_err(|e| {
ttrpc_error(
ttrpc::Code::UNAVAILABLE,
format!("invalid argument: {:?}", e),
)
})?;
let fd = p
.term_master
@@ -996,7 +990,7 @@ impl agent_ttrpc::AgentService for AgentService {
let err = unsafe { libc::ioctl(fd, TIOCSWINSZ, &win) };
Errno::result(err)
.map(drop)
.map_ttrpc_err(|e| format!("ioctl error: {e:?}"))?;
.map_ttrpc_err(|e| format!("ioctl error: {:?}", e))?;
Ok(Empty::new())
}
@@ -1020,20 +1014,20 @@ impl agent_ttrpc::AgentService for AgentService {
#[cfg(not(target_arch = "s390x"))]
{
let pcipath = pci::Path::from_str(&interface.devicePath).map_ttrpc_err(|e| {
format!("Unexpected pci-path for network interface: {e:?}")
format!("Unexpected pci-path for network interface: {:?}", e)
})?;
wait_for_pci_net_interface(&self.sandbox, &pcipath)
.await
.map_ttrpc_err(|e| format!("interface not available: {e:?}"))?;
.map_ttrpc_err(|e| format!("interface not available: {:?}", e))?;
}
#[cfg(target_arch = "s390x")]
{
let ccw_dev = ccw::Device::from_str(&interface.devicePath).map_ttrpc_err(|e| {
format!("Unexpected CCW path for network interface: {e:?}")
format!("Unexpected CCW path for network interface: {:?}", e)
})?;
wait_for_ccw_net_interface(&self.sandbox, &ccw_dev)
.await
.map_ttrpc_err(|e| format!("interface not available: {e:?}"))?;
.map_ttrpc_err(|e| format!("interface not available: {:?}", e))?;
}
}
@@ -1043,7 +1037,7 @@ impl agent_ttrpc::AgentService for AgentService {
.rtnl
.update_interface(&interface)
.await
.map_ttrpc_err(|e| format!("update interface: {e:?}"))?;
.map_ttrpc_err(|e| format!("update interface: {:?}", e))?;
Ok(interface)
}
@@ -1068,13 +1062,13 @@ impl agent_ttrpc::AgentService for AgentService {
.rtnl
.update_routes(new_routes)
.await
.map_ttrpc_err(|e| format!("Failed to update routes: {e:?}"))?;
.map_ttrpc_err(|e| format!("Failed to update routes: {:?}", e))?;
let list = sandbox
.rtnl
.list_routes()
.await
.map_ttrpc_err(|e| format!("Failed to list routes after update: {e:?}"))?;
.map_ttrpc_err(|e| format!("Failed to list routes after update: {:?}", e))?;
Ok(protocols::agent::Routes {
Routes: list,
@@ -1092,7 +1086,7 @@ impl agent_ttrpc::AgentService for AgentService {
update_ephemeral_mounts(sl(), &req.storages, &self.sandbox)
.await
.map_ttrpc_err(|e| format!("Failed to update mounts: {e:?}"))?;
.map_ttrpc_err(|e| format!("Failed to update mounts: {:?}", e))?;
Ok(Empty::new())
}
@@ -1243,7 +1237,7 @@ impl agent_ttrpc::AgentService for AgentService {
.rtnl
.list_interfaces()
.await
.map_ttrpc_err(|e| format!("Failed to list interfaces: {e:?}"))?;
.map_ttrpc_err(|e| format!("Failed to list interfaces: {:?}", e))?;
Ok(protocols::agent::Interfaces {
Interfaces: list,
@@ -1266,7 +1260,7 @@ impl agent_ttrpc::AgentService for AgentService {
.rtnl
.list_routes()
.await
.map_ttrpc_err(|e| format!("list routes: {e:?}"))?;
.map_ttrpc_err(|e| format!("list routes: {:?}", e))?;
Ok(protocols::agent::Routes {
Routes: list,
@@ -1383,7 +1377,7 @@ impl agent_ttrpc::AgentService for AgentService {
.rtnl
.add_arp_neighbors(neighs)
.await
.map_ttrpc_err(|e| format!("Failed to add ARP neighbours: {e:?}"))?;
.map_ttrpc_err(|e| format!("Failed to add ARP neighbours: {:?}", e))?;
Ok(Empty::new())
}
@@ -1603,7 +1597,7 @@ impl agent_ttrpc::AgentService for AgentService {
ma.memcg_set_config_async(mem_agent_memcgconfig_to_memcg_optionconfig(&config))
.await
.map_err(|e| {
let estr = format!("ma.memcg_set_config_async fail: {e}");
let estr = format!("ma.memcg_set_config_async fail: {}", e);
error!(sl(), "{}", estr);
ttrpc::Error::RpcStatus(ttrpc::get_status(ttrpc::Code::INTERNAL, estr))
})?;
@@ -1627,7 +1621,7 @@ impl agent_ttrpc::AgentService for AgentService {
ma.compact_set_config_async(mem_agent_compactconfig_to_compact_optionconfig(&config))
.await
.map_err(|e| {
let estr = format!("ma.compact_set_config_async fail: {e}");
let estr = format!("ma.compact_set_config_async fail: {}", e);
error!(sl(), "{}", estr);
ttrpc::Error::RpcStatus(ttrpc::get_status(ttrpc::Code::INTERNAL, estr))
})?;
@@ -2239,8 +2233,10 @@ fn load_kernel_module(module: &protocols::agent::KernelModule) -> Result<()> {
Some(code) => {
let std_out = String::from_utf8_lossy(&output.stdout);
let std_err = String::from_utf8_lossy(&output.stderr);
let msg =
format!("load_kernel_module return code: {code} stdout:{std_out} stderr:{std_err}");
let msg = format!(
"load_kernel_module return code: {} stdout:{} stderr:{}",
code, std_out, std_err
);
Err(anyhow!(msg))
}
None => Err(anyhow!("Process terminated by signal")),
@@ -2489,9 +2485,9 @@ mod tests {
// Skip test if loading kernel modules is not permitted
// or kernel module is not found
if let Err(e) = &result {
let error_string = format!("{e:?}");
let error_string = format!("{:?}", e);
// Let's print out the error message first
println!("DEBUG: error: {error_string}");
println!("DEBUG: error: {}", error_string);
if error_string.contains("Operation not permitted")
|| error_string.contains("EPERM")
|| error_string.contains("Permission denied")
@@ -2633,12 +2629,12 @@ mod tests {
},
TestData {
create_container: false,
result: Err(anyhow!(crate::sandbox::SandboxError::InvalidContainerId)),
result: Err(anyhow!(crate::sandbox::ERR_INVALID_CONTAINER_ID)),
..Default::default()
},
TestData {
container_id: "8181",
result: Err(anyhow!(crate::sandbox::SandboxError::InvalidContainerId)),
result: Err(anyhow!(crate::sandbox::ERR_INVALID_CONTAINER_ID)),
..Default::default()
},
TestData {
@@ -2652,7 +2648,7 @@ mod tests {
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let logger = slog::Logger::root(slog::Discard, o!());
let mut sandbox = Sandbox::new(&logger).unwrap();
@@ -2723,7 +2719,7 @@ mod tests {
// the fd will be closed on Process's dropping.
// unistd::close(wfd).unwrap();
let msg = format!("{msg}, result: {result:?}");
let msg = format!("{}, result: {:?}", msg, result);
assert_result!(d.result, result, msg);
}
}
@@ -2827,7 +2823,7 @@ mod tests {
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let logger = slog::Logger::root(slog::Discard, o!());
let mut sandbox = Sandbox::new(&logger).unwrap();
@@ -2847,14 +2843,15 @@ mod tests {
let result = update_container_namespaces(&sandbox, &mut oci, d.use_sandbox_pidns);
let msg = format!("{msg}, result: {result:?}");
let msg = format!("{}, result: {:?}", msg, result);
assert_result!(d.result, result, msg);
if let Some(linux) = oci.linux() {
assert_eq!(
d.expected_namespaces,
linux.namespaces().clone().unwrap(),
"{msg}"
"{}",
msg
);
}
}
@@ -2947,7 +2944,7 @@ mod tests {
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let dir = tempdir().expect("failed to make tempdir");
let block_size_path = dir.path().join("block_size_bytes");
@@ -2967,7 +2964,7 @@ mod tests {
hotplug_probe_path.to_str().unwrap(),
);
let msg = format!("{msg}, result: {result:?}");
let msg = format!("{}, result: {:?}", msg, result);
assert_result!(d.result, result, msg);
}
@@ -3077,7 +3074,7 @@ OtherField:other
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let dir = tempdir().expect("failed to make tempdir");
let proc_status_file_path = dir.path().join("status");
@@ -3088,9 +3085,9 @@ OtherField:other
let result = is_signal_handled(proc_status_file_path.to_str().unwrap(), d.signum);
let msg = format!("{msg}, result: {result:?}");
let msg = format!("{}, result: {:?}", msg, result);
assert_eq!(d.result, result, "{msg}");
assert_eq!(d.result, result, "{}", msg);
}
}
@@ -3389,9 +3386,9 @@ COMMIT
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let result = is_sealed_secret_path(d.source_path);
assert_eq!(d.result, result, "{msg}");
assert_eq!(d.result, result, "{}", msg);
}
}
}

View File

@@ -32,7 +32,6 @@ use rustjail::container::BaseContainer;
use rustjail::container::LinuxContainer;
use rustjail::process::Process;
use slog::Logger;
use thiserror::Error;
use tokio::sync::mpsc::{channel, Receiver, Sender};
use tokio::sync::oneshot;
use tokio::sync::Mutex;
@@ -48,16 +47,7 @@ use crate::storage::StorageDeviceGeneric;
use crate::uevent::{Uevent, UeventMatcher};
use crate::watcher::BindWatcher;
/// Errors that can occur when looking up processes in the sandbox.
#[derive(Debug, Error)]
pub enum SandboxError {
#[error("Invalid container id")]
InvalidContainerId,
#[error("Process not found: init process missing")]
InitProcessNotFound,
#[error("Process not found: invalid exec id")]
InvalidExecId,
}
pub const ERR_INVALID_CONTAINER_ID: &str = "Invalid container id";
type UeventWatcher = (Box<dyn UeventMatcher>, oneshot::Sender<Uevent>);
@@ -258,7 +248,7 @@ impl Sandbox {
}
let mut pid_ns = Namespace::new(&self.logger).get_pid();
pid_ns.path = format!("/proc/{init_pid}/ns/pid");
pid_ns.path = format!("/proc/{}/ns/pid", init_pid);
self.sandbox_pidns = Some(pid_ns);
}
@@ -292,14 +282,10 @@ impl Sandbox {
None
}
pub fn find_container_process(
&mut self,
cid: &str,
eid: &str,
) -> Result<&mut Process, SandboxError> {
pub fn find_container_process(&mut self, cid: &str, eid: &str) -> Result<&mut Process> {
let ctr = self
.get_container(cid)
.ok_or(SandboxError::InvalidContainerId)?;
.ok_or_else(|| anyhow!(ERR_INVALID_CONTAINER_ID))?;
if eid.is_empty() {
let init_pid = ctr.init_process_pid;
@@ -307,11 +293,10 @@ impl Sandbox {
.processes
.values_mut()
.find(|p| p.pid == init_pid)
.ok_or(SandboxError::InitProcessNotFound);
.ok_or_else(|| anyhow!("cannot find init process!"));
}
ctr.get_process(eid)
.map_err(|_| SandboxError::InvalidExecId)
ctr.get_process(eid).map_err(|_| anyhow!("Invalid exec id"))
}
#[instrument]
@@ -726,7 +711,8 @@ mod tests {
let ref_count = new_storage.ref_count().await;
assert_eq!(
ref_count, 1,
"Invalid refcount, got {ref_count} expected 1."
"Invalid refcount, got {} expected 1.",
ref_count
);
// Use the existing sandbox storage
@@ -737,7 +723,8 @@ mod tests {
let ref_count = new_storage.ref_count().await;
assert_eq!(
ref_count, 2,
"Invalid refcount, got {ref_count} expected 2."
"Invalid refcount, got {} expected 2.",
ref_count
);
}
@@ -824,7 +811,11 @@ mod tests {
// Reference counter should decrement to 1.
let storage = &s.storages[storage_path];
let refcount = storage.ref_count().await;
assert_eq!(refcount, 1, "Invalid refcount, got {refcount} expected 1.");
assert_eq!(
refcount, 1,
"Invalid refcount, got {} expected 1.",
refcount
);
assert!(
s.remove_sandbox_storage(storage_path).await.unwrap(),
@@ -968,7 +959,7 @@ mod tests {
assert!(s.sandbox_pidns.is_some());
let ns_path = format!("/proc/{test_pid}/ns/pid");
let ns_path = format!("/proc/{}/ns/pid", test_pid);
assert_eq!(s.sandbox_pidns.unwrap().path, ns_path);
}
@@ -1280,7 +1271,7 @@ mod tests {
let tmpdir_path = tmpdir.path().to_str().unwrap();
for (i, d) in tests.iter().enumerate() {
let current_test_dir_path = format!("{tmpdir_path}/test_{i}");
let current_test_dir_path = format!("{}/test_{}", tmpdir_path, i);
fs::create_dir(&current_test_dir_path).unwrap();
// create numbered directories and fill using root name
@@ -1289,7 +1280,7 @@ mod tests {
"{}/{}{}",
current_test_dir_path, d.directory_autogen_name, j
);
let subfile_path = format!("{subdir_path}/{SYSFS_ONLINE_FILE}");
let subfile_path = format!("{}/{}", subdir_path, SYSFS_ONLINE_FILE);
fs::create_dir(&subdir_path).unwrap();
let mut subfile = File::create(subfile_path).unwrap();
subfile.write_all(b"0").unwrap();
@@ -1316,15 +1307,18 @@ mod tests {
result.is_ok()
);
assert_eq!(result.is_ok(), d.result.is_ok(), "{msg}");
assert_eq!(result.is_ok(), d.result.is_ok(), "{}", msg);
if d.result.is_ok() {
let test_result_val = *d.result.as_ref().ok().unwrap();
let result_val = result.ok().unwrap();
msg = format!("test[{i}]: {d:?}, expected {test_result_val}, actual {result_val}");
msg = format!(
"test[{}]: {:?}, expected {}, actual {}",
i, d, test_result_val, result_val
);
assert_eq!(test_result_val, result_val, "{msg}");
assert_eq!(test_result_val, result_val, "{}", msg);
}
}
}

View File

@@ -44,7 +44,7 @@ async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result
if let Some(pid) = wait_status.pid() {
let raw_pid = pid.as_raw();
let child_pid = format!("{raw_pid}");
let child_pid = format!("{}", raw_pid);
let logger = logger.new(o!("child-pid" => child_pid));

View File

@@ -11,10 +11,9 @@ use std::str::FromStr;
use std::sync::Arc;
use anyhow::{anyhow, Context, Result};
#[cfg(target_arch = "s390x")]
use kata_types::device::DRIVER_BLK_CCW_TYPE;
use kata_types::device::{
DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE,
DRIVER_BLK_CCW_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, DRIVER_NVDIMM_TYPE,
DRIVER_SCSI_TYPE,
};
use kata_types::mount::StorageDevice;
use protocols::agent::Storage;
@@ -94,11 +93,9 @@ impl StorageHandler for VirtioBlkPciHandler {
}
}
#[cfg(target_arch = "s390x")]
#[derive(Debug)]
pub struct VirtioBlkCcwHandler {}
#[cfg(target_arch = "s390x")]
#[async_trait::async_trait]
impl StorageHandler for VirtioBlkCcwHandler {
#[instrument]

View File

@@ -467,7 +467,7 @@ mod tests {
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
skip_loop_by_user!(msg, d.test_user);
@@ -515,7 +515,7 @@ mod tests {
nix::mount::umount(&mount_point).unwrap();
}
let msg = format!("{msg}: result: {result:?}");
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), "{}", msg);
} else {
@@ -576,7 +576,7 @@ mod tests {
let tempdir = tempdir().expect("failed to create tmpdir");
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let mount_dir = tempdir.path().join(d.mount_path);
fs::create_dir(&mount_dir)
@@ -663,7 +663,7 @@ mod tests {
let tempdir = tempdir().expect("failed to create tmpdir");
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{i}]: {d:?}");
let msg = format!("test[{}]: {:?}", i, d);
let mount_dir = tempdir.path().join(d.path);
fs::create_dir(&mount_dir)
@@ -674,12 +674,12 @@ mod tests {
// create testing directories and files
for n in 1..COUNT {
let nest_dir = mount_dir.join(format!("nested{n}"));
let nest_dir = mount_dir.join(format!("nested{}", n));
fs::create_dir(&nest_dir)
.unwrap_or_else(|_| panic!("{}: failed to create nest directory", msg));
for f in 1..COUNT {
let filename = nest_dir.join(format!("file{f}"));
let filename = nest_dir.join(format!("file{}", f));
File::create(&filename)
.unwrap_or_else(|_| panic!("{}: failed to create file", msg));
file_mode = filename.as_path().metadata().unwrap().permissions().mode();
@@ -707,9 +707,9 @@ mod tests {
);
for n in 1..COUNT {
let nest_dir = mount_dir.join(format!("nested{n}"));
let nest_dir = mount_dir.join(format!("nested{}", n));
for f in 1..COUNT {
let filename = nest_dir.join(format!("file{f}"));
let filename = nest_dir.join(format!("file{}", f));
let file = Path::new(&filename);
assert_eq!(file.metadata().unwrap().gid(), d.gid);

View File

@@ -147,7 +147,7 @@ mod tests {
let v = vec_locked
.as_deref_mut()
.map_err(|e| std::io::Error::other(e.to_string()))?;
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?;
std::io::Write::flush(v)
}

View File

@@ -556,9 +556,9 @@ mod tests {
use test_utils::skip_if_not_root;
async fn create_test_storage(dir: &Path, id: &str) -> Result<(protos::Storage, PathBuf)> {
let src_path = dir.join(format!("src{id}"));
let src_path = dir.join(format!("src{}", id));
let src_filename = src_path.to_str().expect("failed to create src filename");
let dest_path = dir.join(format!("dest{id}"));
let dest_path = dir.join(format!("dest{}", id));
let dest_filename = dest_path.to_str().expect("failed to create dest filename");
std::fs::create_dir_all(src_filename).expect("failed to create path");
@@ -682,7 +682,7 @@ mod tests {
// setup storage0: too many files
for i in 1..21 {
fs::write(src0_path.join(format!("{i}.txt")), "original").unwrap();
fs::write(src0_path.join(format!("{}.txt", i)), "original").unwrap();
}
// setup storage1: two small files
@@ -700,7 +700,7 @@ mod tests {
// setup storage3: many files, but still watchable
for i in 1..MAX_ENTRIES_PER_STORAGE {
fs::write(src3_path.join(format!("{i}.txt")), "original").unwrap();
fs::write(src3_path.join(format!("{}.txt", i)), "original").unwrap();
}
let logger = slog::Logger::root(slog::Discard, o!());
@@ -919,7 +919,7 @@ mod tests {
// Up to 15 files should be okay (can watch 15 files + 1 directory)
for i in 1..MAX_ENTRIES_PER_STORAGE {
fs::write(source_dir.path().join(format!("{i}.txt")), "original").unwrap();
fs::write(source_dir.path().join(format!("{}.txt", i)), "original").unwrap();
}
assert_eq!(
@@ -1387,7 +1387,7 @@ mod tests {
assert!(!dest_dir.path().exists());
for i in 1..21 {
fs::write(source_dir.path().join(format!("{i}.txt")), "fluff").unwrap();
fs::write(source_dir.path().join(format!("{}.txt", i)), "fluff").unwrap();
}
// verify non-watched storage is cleaned up correctly

View File

@@ -70,7 +70,7 @@ impl ExportError for Error {
}
fn make_io_error(desc: String) -> std::io::Error {
std::io::Error::other(desc)
std::io::Error::new(ErrorKind::Other, desc)
}
// Send a trace span to the forwarder running on the host.

2835
src/dragonball/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -9,6 +9,58 @@ repository = "https://github.com/kata-containers/kata-containers.git"
license = "Apache-2.0"
edition = "2018"
[workspace]
members = [
"dbs_acpi",
"dbs_address_space",
"dbs_allocator",
"dbs_arch",
"dbs_boot",
"dbs_device",
"dbs_interrupt",
"dbs_legacy_devices",
"dbs_pci",
"dbs_tdx",
"dbs_upcall",
"dbs_utils",
"dbs_virtio_devices",
]
resolver = "2"
[workspace.dependencies]
# Rust-VMM crates
event-manager = "0.2.1"
kvm-bindings = "0.6.0"
kvm-ioctls = "=0.12.1"
linux-loader = "0.8.0"
seccompiler = "0.5.0"
vfio-bindings = "0.3.0"
vfio-ioctls = "0.1.0"
virtio-bindings = "0.1.0"
virtio-queue = "0.7.0"
vm-fdt = "0.2.0"
vm-memory = "0.10.0"
vm-superio = "0.5.0"
vmm-sys-util = "0.11.0"
# Local dependencies from Dragonball Sandbox crates
dbs-acpi = { path = "dbs_acpi" }
dbs-address-space = { path = "dbs_address_space" }
dbs-allocator = { path = "dbs_allocator" }
dbs-arch = { path = "dbs_arch" }
dbs-boot = { path = "dbs_boot" }
dbs-device = { path = "dbs_device" }
dbs-interrupt = { path = "dbs_interrupt" }
dbs-legacy-devices = { path = "dbs_legacy_devices" }
dbs-pci = { path = "dbs_pci" }
dbs-tdx = { path = "dbs_tdx" }
dbs-upcall = { path = "dbs_upcall" }
dbs-utils = { path = "dbs_utils" }
dbs-virtio-devices = { path = "dbs_virtio_devices" }
# Local dependencies from `src/lib`
test-utils = { path = "../libs/test-utils" }
[dependencies]
anyhow = "1.0.32"
arc-swap = "1.5.0"
@@ -31,12 +83,12 @@ kvm-bindings = { workspace = true }
kvm-ioctls = { workspace = true }
lazy_static = "1.2"
libc = "0.2.39"
linux-loader = { workspace = true }
linux-loader = {workspace = true}
log = "0.4.14"
nix = "0.24.2"
procfs = "0.12.0"
prometheus = { version = "0.14.0", features = ["process"] }
seccompiler = { workspace = true }
seccompiler = {workspace = true}
serde = "1.0.27"
serde_derive = "1.0.27"
serde_json = "1.0.9"
@@ -44,7 +96,7 @@ slog = "2.5.2"
slog-scope = "4.4.0"
thiserror = "1"
tracing = "0.1.41"
vmm-sys-util = { workspace = true }
vmm-sys-util = {workspace = true}
virtio-queue = { workspace = true, optional = true }
vm-memory = { workspace = true, features = ["backend-mmap"] }
crossbeam-channel = "0.5.6"
@@ -66,14 +118,14 @@ virtio-blk = ["dbs-virtio-devices/virtio-blk", "virtio-queue"]
virtio-net = ["dbs-virtio-devices/virtio-net", "virtio-queue"]
# virtio-fs only work on atomic-guest-memory
virtio-fs = [
"dbs-virtio-devices/virtio-fs-pro",
"virtio-queue",
"atomic-guest-memory",
"dbs-virtio-devices/virtio-fs-pro",
"virtio-queue",
"atomic-guest-memory",
]
virtio-mem = [
"dbs-virtio-devices/virtio-mem",
"virtio-queue",
"atomic-guest-memory",
"dbs-virtio-devices/virtio-mem",
"virtio-queue",
"atomic-guest-memory",
]
virtio-balloon = ["dbs-virtio-devices/virtio-balloon", "virtio-queue"]
vhost-net = ["dbs-virtio-devices/vhost-net"]
@@ -84,5 +136,5 @@ host-device = ["dep:vfio-bindings", "dep:vfio-ioctls", "dep:dbs-pci"]
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = [
'cfg(feature, values("test-mock"))',
'cfg(feature, values("test-mock"))',
] }

View File

@@ -44,7 +44,7 @@ test:
ifdef SUPPORT_VIRTUALIZATION
@set -e; \
for dir in $(PROJECT_DIRS); do \
bash -c "pushd $${dir} && RUST_BACKTRACE=1 cargo test --all-features --target $(TRIPLE) --no-fail-fast -- --nocapture --test-threads=1 --skip bindgen && popd"; \
bash -c "pushd $${dir} && RUST_BACKTRACE=1 cargo test --all-features --target $(TRIPLE) -- --nocapture --test-threads=1 && popd"; \
done
else
@echo "INFO: skip testing dragonball, it need virtualization support."

View File

@@ -207,7 +207,7 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionHybrid<B> {
&self,
offset: MemoryRegionAddress,
count: usize,
) -> guest_memory::Result<VolatileSlice<'_, BS<'_, B>>> {
) -> guest_memory::Result<VolatileSlice<BS<B>>> {
match self {
GuestRegionHybrid::Mmap(region) => region.get_slice(offset, count),
GuestRegionHybrid::Raw(region) => region.get_slice(offset, count),
@@ -246,8 +246,8 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
/// # Arguments
///
/// * `regions` - The vector of regions.
/// The regions shouldn't overlap and they should be sorted
/// by the starting address.
/// The regions shouldn't overlap and they should be sorted
/// by the starting address.
pub fn from_regions(mut regions: Vec<GuestRegionHybrid<B>>) -> Result<Self, Error> {
Self::from_arc_regions(regions.drain(..).map(Arc::new).collect())
}
@@ -262,8 +262,8 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
/// # Arguments
///
/// * `regions` - The vector of `Arc` regions.
/// The regions shouldn't overlap and they should be sorted
/// by the starting address.
/// The regions shouldn't overlap and they should be sorted
/// by the starting address.
pub fn from_arc_regions(regions: Vec<Arc<GuestRegionHybrid<B>>>) -> Result<Self, Error> {
if regions.is_empty() {
return Err(Error::NoMemoryRegion);
@@ -359,7 +359,7 @@ impl<B: Bitmap + 'static> GuestMemory for GuestMemoryHybrid<B> {
index.map(|x| self.regions[x].as_ref())
}
fn iter(&self) -> Iter<'_, B> {
fn iter(&self) -> Iter<B> {
Iter(self.regions.iter())
}
}

View File

@@ -202,7 +202,7 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionRaw<B> {
&self,
offset: MemoryRegionAddress,
count: usize,
) -> guest_memory::Result<VolatileSlice<'_, BS<'_, B>>> {
) -> guest_memory::Result<VolatileSlice<BS<B>>> {
let offset = offset.raw_value() as usize;
let end = compute_offset(offset, count)?;
if end > self.size {

View File

@@ -21,8 +21,6 @@ libc = ">=0.2.39"
[dev-dependencies]
vm-memory = { workspace = true, features = ["backend-mmap"] }
test-utils = { workspace = true }
nix = { workspace = true }
[package.metadata.docs.rs]
all-features = true

Some files were not shown because too many files have changed in this diff Show More