Compare commits

..

2 Commits

Author SHA1 Message Date
Mikko Ylinen
358b0184bb Revert "tdx: Update GPU config for the latest TDX stack"
Prefer the "full feature TDVF" instead of the generic OVMF build. See
Option-B in
https://github.com/tianocore/edk2/tree/master/OvmfPkg/IntelTdx#configurations-and-features
for the extra hardening supported.

FIRMWAREPATH_NV also seems to be TDX specific unlike the Makefile
suggests. Therefore, it can be dropped completely.

This reverts commit 66ccc25724.
2026-01-02 13:27:55 +02:00
Mikko Ylinen
29828e2aeb packaging: build OVMF for Intel TDX again
OVMF build for Intel TDX (aka "TDVF") was disabled in favor of Ubuntu/
CentOS pre-upstream releases of Intel TDX.

See 4292c4c3b1.

It's time to re-enable the build and move runtime configurations to
use it (the latter will be done in a later commit).

This is a partial revert of 4292c4c3b with the following changes:
- Stop calling OVMF for Intel TDX "TDVF" and follow the naming distros
use for TDX enabled build: OVMF.inteltdx.fd.
- Single binary OVMF.inteltdx.fd is supported using -bios QEMU param.
- Secure Boot infrastructure is disabled since Kata does not support it.

Signed-off-by: Mikko Ylinen <mikko.ylinen@intel.com>
2026-01-02 13:16:32 +02:00
90 changed files with 2045 additions and 4256 deletions

View File

@@ -148,8 +148,8 @@ jobs:
if: ${{ startsWith(matrix.asset, 'kernel-nvidia-gpu') }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-artifacts-amd64-${{ matrix.asset }}-modules${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-modules.tar.zst
name: kata-artifacts-amd64-${{ matrix.asset }}-headers${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-headers.tar.zst
retention-days: 15
if-no-files-found: error
@@ -237,8 +237,8 @@ jobs:
asset:
- busybox
- coco-guest-components
- kernel-nvidia-gpu-modules
- kernel-nvidia-gpu-confidential-modules
- kernel-nvidia-gpu-headers
- kernel-nvidia-gpu-confidential-headers
- pause-image
steps:
- uses: geekyeggo/delete-artifact@f275313e70c08f6120db482d7a6b98377786765b # v5.1.0

View File

@@ -134,8 +134,8 @@ jobs:
if: ${{ startsWith(matrix.asset, 'kernel-nvidia-gpu') }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: kata-artifacts-arm64-${{ matrix.asset }}-modules${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-modules.tar.zst
name: kata-artifacts-arm64-${{ matrix.asset }}-headers${{ inputs.tarball-suffix }}
path: kata-build/kata-static-${{ matrix.asset }}-headers.tar.zst
retention-days: 15
if-no-files-found: error
@@ -216,7 +216,7 @@ jobs:
matrix:
asset:
- busybox
- kernel-nvidia-gpu-modules
- kernel-nvidia-gpu-headers
steps:
- uses: geekyeggo/delete-artifact@f275313e70c08f6120db482d7a6b98377786765b # v5.1.0
with:

View File

@@ -1,75 +0,0 @@
name: Build kubectl multi-arch image
on:
schedule:
# Run every Sunday at 00:00 UTC
- cron: '0 0 * * 0'
workflow_dispatch:
# Allow manual triggering
push:
branches:
- main
paths:
- 'tools/packaging/kubectl/Dockerfile'
- '.github/workflows/build-kubectl-image.yaml'
permissions: {}
env:
REGISTRY: quay.io
IMAGE_NAME: kata-containers/kubectl
jobs:
build-and-push:
name: Build and push multi-arch image
runs-on: ubuntu-24.04
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Set up QEMU
uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Login to Quay.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ${{ env.REGISTRY }}
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- name: Get kubectl version
id: kubectl-version
run: |
KUBECTL_VERSION=$(curl -L -s https://dl.k8s.io/release/stable.txt)
echo "version=${KUBECTL_VERSION}" >> "$GITHUB_OUTPUT"
- name: Generate image metadata
id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=raw,value=latest
type=raw,value={{date 'YYYYMMDD'}}
type=raw,value=${{ steps.kubectl-version.outputs.version }}
type=sha,prefix=
- name: Build and push multi-arch image
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
with:
context: tools/packaging/kubectl/
file: tools/packaging/kubectl/Dockerfile
platforms: linux/amd64,linux/arm64,linux/s390x,linux/ppc64le
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@@ -1,32 +0,0 @@
name: Documentation
on:
push:
branches:
- main
permissions: {}
jobs:
deploy-docs:
name: deploy-docs
permissions:
contents: read
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
steps:
- uses: actions/configure-pages@v5
- uses: actions/checkout@v5
with:
persist-credentials: false
- uses: actions/setup-python@v5
with:
python-version: 3.x
- run: pip install zensical
- run: zensical build --clean
- uses: actions/upload-pages-artifact@v4
with:
path: site
- uses: actions/deploy-pages@v4
id: deployment

View File

@@ -134,7 +134,7 @@ jobs:
repo: kata-containers/kata-deploy-ci
tag: kata-containers-latest-ppc64le
target-branch: ${{ github.ref_name }}
runner: ubuntu-24.04-ppc64le
runner: ppc64le-small
arch: ppc64le
build-type: "" # Use script-based build (default)
secrets:

1
.gitignore vendored
View File

@@ -19,4 +19,3 @@ tools/packaging/static-build/agent/install_libseccomp.sh
.envrc
.direnv
**/.DS_Store
site/

28
Cargo.lock generated
View File

@@ -770,6 +770,12 @@ dependencies = [
"libc",
]
[[package]]
name = "cpuid-bool"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634"
[[package]]
name = "crc32fast"
version = "1.3.2"
@@ -2372,7 +2378,7 @@ dependencies = [
"nix 0.23.2",
"once_cell",
"serde",
"sha2 0.9.9",
"sha2 0.9.3",
"thiserror 1.0.48",
"uuid 0.8.2",
]
@@ -2993,9 +2999,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-src"
version = "300.5.4+3.5.4"
version = "300.5.0+3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507b3792995dae9b0df8a1c1e3771e8418b7c2d9f0baeba32e6fe8b06c7cb72"
checksum = "e8ce546f549326b0e6052b649198487d91320875da901e7bd11a06d1ee3f9c2f"
dependencies = [
"cc",
]
@@ -3621,9 +3627,9 @@ dependencies = [
[[package]]
name = "qapi"
version = "0.15.0"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b047adab56acc4948d4b9b58693c1f33fd13efef2d6bb5f0f66a47436ceada8"
checksum = "c6412bdd014ebee03ddbbe79ac03a0b622cce4d80ba45254f6357c847f06fa38"
dependencies = [
"bytes",
"futures 0.3.28",
@@ -3658,9 +3664,9 @@ dependencies = [
[[package]]
name = "qapi-qmp"
version = "0.15.0"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45303cac879d89361cad0287ae15f9ae1e7799b904b474152414aeece39b9875"
checksum = "e8b944db7e544d2fa97595e9a000a6ba5c62c426fa185e7e00aabe4b5640b538"
dependencies = [
"qapi-codegen",
"qapi-spec",
@@ -4424,13 +4430,13 @@ dependencies = [
[[package]]
name = "sha2"
version = "0.9.9"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800"
checksum = "fa827a14b29ab7f44778d14a88d3cb76e949c45083f7dbfa507d0cb699dc12de"
dependencies = [
"block-buffer 0.9.0",
"cfg-if 1.0.0",
"cpufeatures",
"cpuid-bool",
"digest 0.9.0",
"opaque-debug",
]
@@ -4476,7 +4482,7 @@ dependencies = [
"runtimes",
"serial_test 0.10.0",
"service",
"sha2 0.10.9",
"sha2 0.9.3",
"slog",
"slog-async",
"slog-scope",

View File

@@ -127,7 +127,6 @@ protobuf = "3.7.2"
rand = "0.8.4"
serde = { version = "1.0.145", features = ["derive"] }
serde_json = "1.0.91"
sha2 = "0.10.9"
slog = "2.5.2"
slog-scope = "4.4.0"
strum = { version = "0.24.0", features = ["derive"] }

View File

@@ -50,14 +50,10 @@ docs-url-alive-check:
build-and-publish-kata-debug:
bash tools/packaging/kata-debug/kata-debug-build-and-upload-payload.sh ${KATA_DEBUG_REGISTRY} ${KATA_DEBUG_TAG}
docs-serve:
docker run --rm -p 8000:8000 -v ./docs:/docs:ro -v ${PWD}/zensical.toml:/zensical.toml:ro zensical/zensical serve --config-file /zensical.toml -a 0.0.0.0:8000
.PHONY: \
all \
kata-tarball \
install-tarball \
default \
static-checks \
docs-url-alive-check \
docs-serve
docs-url-alive-check

View File

@@ -1 +1 @@
3.25.0
3.24.0

View File

@@ -1,9 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
<!-- Dark background matching the site -->
<rect width="32" height="32" rx="4" fill="#1a1a2e"/>
<!-- Kata logo scaled and centered -->
<g transform="translate(-27, -2) scale(0.75)">
<path d="M70.925 25.22L58.572 37.523 46.27 25.22l2.192-2.192 10.11 10.11 10.11-10.11zm-6.575-.2l-3.188-3.188 3.188-3.188 3.188 3.188zm-4.93-2.54l3.736 3.736-3.736 3.736zm-1.694 7.422l-8.07-8.07 8.07-8.07zm1.694-16.14l3.686 3.686-3.686 3.686zm-13.15 4.682L58.572 6.143l12.353 12.303-2.192 2.192-10.16-10.11-10.11 10.11zm26.997 0L58.572 3.752 43.878 18.446l3.387 3.387-3.387 3.387 14.694 14.694L73.266 25.22l-3.337-3.387z" fill="#f15b3e"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 710 B

View File

@@ -256,7 +256,7 @@ spec:
values:
- NODE_NAME
volumes:
- name: trusted-image-storage
- name: trusted-storage
persistentVolumeClaim:
claimName: trusted-pvc
containers:

View File

@@ -3,4 +3,4 @@
Kata Containers supports passing certain GPUs from the host into the container. Select the GPU vendor for detailed information:
- [Intel Discrete GPUs](Intel-Discrete-GPU-passthrough-and-Kata.md)/[Intel Integrated GPUs](Intel-GPU-passthrough-and-Kata.md)
- [NVIDIA GPUs](NVIDIA-GPU-passthrough-and-Kata.md) and [Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers](NVIDIA-GPU-passthrough-and-Kata-QEMU.md)
- [NVIDIA](NVIDIA-GPU-passthrough-and-Kata.md)

View File

@@ -1,569 +0,0 @@
# Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers
This page provides:
1. A description of the components involved when running GPU workloads with
Kata Containers using the NVIDIA TEE and non-TEE GPU runtime classes.
1. An explanation of the orchestration flow on a Kubernetes node for this
scenario.
1. A deployment guide enabling to utilize these runtime classes.
The goal is to educate readers familiar with Kubernetes and Kata Containers
on NVIDIA's reference implementation which is reflected in Kata CI's build
and test framework. With this, we aim to enable readers to leverage this
stack, or to use the principles behind this stack in order to run GPU
workloads on their variant of the Kata Containers stack.
We assume the reader is familiar with Kubernetes, Kata Containers, and
Confidential Containers.
> **Note:**
>
> The current supported mode for enabling GPU workloads in the TEE scenario
> is single GPU passthrough (one GPU per pod) on AMD64 platforms (AMD SEV-SNP
> being the only supported TEE scenario so far with support for Intel TDX being
> on the way).
## Component Overview
Before providing deployment guidance, we describe the components involved to
support running GPU workloads. We start from a top to bottom perspective
from the NVIDIA GPU operator via the Kata runtime to the components within
the NVIDIA GPU Utility Virtual Machine (UVM) root filesystem.
### NVIDIA GPU Operator
A central component is the
[NVIDIA GPU operator](https://github.com/NVIDIA/gpu-operator) which can be
deployed onto your cluster as a helm chart. Installing the GPU operator
delivers various operands on your nodes in the form of Kubernetes DaemonSets.
These operands are vital to support the flow of orchestrating pod manifests
using NVIDIA GPU runtime classes with GPU passthrough on your nodes. Without
getting into the details, the most important operands and their
responsibilities are:
- **nvidia-vfio-manager:** Binding discovered NVIDIA GPUs to the `vfio-pci`
driver for VFIO passthrough.
- **nvidia-cc-manager:** Transitioning GPUs into confidential computing (CC)
and non-CC mode (see the
[NVIDIA/k8s-cc-manager](https://github.com/NVIDIA/k8s-cc-manager)
repository).
- **nvidia-kata-manager:** Creating host-side CDI specifications for GPU
passthrough, resulting in the file `/var/run/cdi/nvidia.yaml`, containing
`kind: nvidia.com/pgpu` (see the
[NVIDIA/k8s-kata-manager](https://github.com/NVIDIA/k8s-kata-manager)
repository).
- **nvidia-sandbox-device-plugin** (see the
[NVIDIA/sandbox-device-plugin](https://github.com/NVIDIA/sandbox-device-plugin)
repository):
- Allocating GPUs during pod deployment.
- Discovering NVIDIA GPUs, their capabilities, and advertising these to
the Kubernetes control plane (allocatable resources as type
`nvidia.com/pgpu` resources will appear for the node and GPU Device IDs
will be registered with Kubelet). These GPUs can thus be allocated as
container resources in your pod manifests. See below GPU operator
deployment instructions for the use of the key `pgpu`, controlled via a
variable.
To summarize, the GPU operator manages the GPUs on each node, allowing for
simple orchestration of pod manifests using Kata Containers. Once the cluster
with GPU operator and Kata bits is up and running, the end user can schedule
Kata NVIDIA GPU workloads, using resource limits and the
`kata-qemu-nvidia-gpu` or `kata-qemu-nvidia-gpu-snp` runtime classes, for
example:
```yaml
apiVersion: v1
kind: Pod
...
spec:
...
runtimeClassName: kata-qemu-nvidia-gpu-snp
...
resources:
limits:
"nvidia.com/pgpu": 1
...
```
When this happens, the Kubelet calls into the sandbox device plugin to
allocate a GPU. The sandbox device plugin returns `DeviceSpec` entries to the
Kubelet for the allocated GPU. The Kubelet uses internal device IDs for
tracking of allocated GPUs and includes the device specifications in the CRI
request when scheduling the pod through containerd. Containerd processes the
device specifications and includes the device configuration in the OCI
runtime spec used to invoke the Kata runtime during the create container
request.
### Kata runtime
The Kata runtime for the NVIDIA GPU handlers is configured to cold-plug VFIO
devices (`cold_plug_vfio` is set to `root-port` while
`hot_plug_vfio` is set to `no-port`). Cold-plug is by design the only
supported mode for NVIDIA GPU passthrough of the NVIDIA reference stack.
With cold-plug, the Kata runtime attaches the GPU at VM launch time, when
creating the pod sandbox. This happens *before* the create container request,
i.e., before the Kata runtime receives the OCI spec including device
configurations from containerd. Thus, a mechanism to acquire the device
information is required. This is done by the runtime calling the
`coldPlugDevices()` function during sandbox creation. In this function,
the runtime queries Kubelet's Pod Resources API to discover allocated GPU
device IDs (e.g., `nvidia.com/pgpu = [vfio0]`). The runtime formats these as
CDI device identifiers and injects them into the OCI spec using
`config.InjectCDIDevices()`. The runtime then consults the host CDI
specifications and determines the device path the GPU is backed by
(e.g., `/dev/vfio/devices/vfio0`). Finally, the runtime resolves the device's
PCI BDF (e.g., `0000:21:00`) and cold-plugs the GPU by launching QEMU with
relevant parameters for device passthrough (e.g.,
`-device vfio-pci,host=0000:21:00.0,x-pci-vendor-id=0x10de,x-pci-device-id=0x2321,bus=rp0,iommufd=iommufdvfio-faf829f2ea7aec330`).
The runtime also creates *inner runtime* CDI annotations
which map host VFIO devices to guest GPU devices. These are annotations
intended for the kata-agent, here referred to as the inner runtime (inside the
UVM), to properly handle GPU passthrough into containers. These annotations
serve as metadata providing the kata-agent with the information needed to
attach the passthrough devices to the correct container.
The annotations are key-value pairs consisting of `cdi.k8s.io/vfio<num>` keys
(derived from the host VFIO device path, e.g., `/dev/vfio/devices/vfio1`) and
`nvidia.com/gpu=<index>` values (referencing the corresponding device in the
guest CDI spec). These annotations are injected by the runtime during container
creation via the `annotateContainerWithVFIOMetadata` function (see
`container.go`).
We continue describing the orchestration flow inside the UVM in the next
section.
### Kata NVIDIA GPU UVM
#### UVM composition
To better understand the orchestration flow inside the NVIDIA GPU UVM, we
first look at the components its root filesystem contains. Should you decide
to use your own root filesystem to enable NVIDIA GPU scenarios, this should
give you a good idea on what ingredients you need.
From a file system perspective, the UVM is composed of two files: a standard
Kata kernel image and the NVIDIA GPU rootfs in initrd or disk image format.
These two files are being utilized for the QEMU launch command when the UVM
is created.
The two most important pieces in Kata Container's build recipes for the
NVIDIA GPU root filesystem are the `nvidia_chroot.sh` and `nvidia_rootfs.sh`
files. The build follows a two-stage process. In the first stage, a
full-fledged Ubuntu-based root filesystem is composed within a chroot
environment. In this stage, NVIDIA kernel modules are built and signed
against the current Kata kernel and relevant NVIDIA packages are installed.
In the second stage, a chiseled build is performed: Only relevant contents
from the first stage are copied and compressed into a new distro-less root
filesystem folder. Kata's build infrastructure then turns this root
filesystem into the NVIDIA initrd and image files.
The resulting root filesystem contains the following software components:
- NVRC - the
[NVIDIA Runtime Container init system](https://github.com/NVIDIA/nvrc/tree/main)
- NVIDIA drivers (kernel modules)
- NVIDIA user space driver libraries
- NVIDIA user space tools
- kata-agent
- confidential computing guest components: the attestation agent,
confidential data hub and api-server-rest binaries
- CRI-O pause container (for the guest image-pull method)
- BusyBox utilities (provides a base set of libraries and binaries, and a
linker)
- some supporting files, such as file containing a list of supported GPU
device IDs which NVRC reads
#### UVM orchestration flow
When the Kata runtime asks QEMU to launch the VM, the UVM's Linux kernel
boots and mounts the root filesystem. After this, NVRC starts as the initial
process.
NVRC scans for NVIDIA GPUs on the PCI bus, loads the
NVIDIA kernel modules, waits for driver initialization, creates the device nodes,
and initializes the GPU hardware (using the `nvidia-smi` binary). NVRC also
creates the guest-side CDI specification file (using the
`nvidia-ctk cdi generate` command). This file specifies devices of
`kind: nvidia.com/gpu`, i.e., GPUs appearing to be physical GPUs on regular
bare metal systems. The guest CDI specification also contains `containerEdits`
for each device, specifying device nodes (e.g., `/dev/nvidia0`,
`/dev/nvidiactl`), library mounts, and environment variables to be mounted
into the container which receives the passthrough GPU.
Then, NVRC forks the Kata agent while continuing to run as the
init system. This allows NVRC to handle ongoing GPU management tasks
while kata-agent focuses on container lifecycle management. See the
[NVRC sources](https://github.com/NVIDIA/nvrc/blob/main/src/main.rs) for an
overview on the steps carried out by NVRC.
When the Kata runtime sends the create container request, the Kata agent
parses the inner runtime CDI annotation. For example, for the inner runtime
annotation `"cdi.k8s.io/vfio1": "nvidia.com/gpu=0"`, the agent looks up device
`0` in the guest CDI specification with `kind: nvidia.com/gpu`.
The Kata agent also reads the guest CDI specification's `containerEdits`
section and injects relevant contents into the OCI spec of the respective
container. The kata agent then creates and starts a `rustjail` container
based on the final OCI spec. The container now has relevant device nodes,
binaries and low-level libraries available, and can start a user application
linked against the CUDA runtime API (e.g., `libcudart.so` and other
libraries). When used, the CUDA runtime API in turn calls the CUDA driver
API and kernel drivers, interacting with the pass-through GPU device.
An additional step is exercised in our CI samples: when using images from an
authenticated registry, the guest-pull mechanism triggers attestation using
trustee's Key Broker Service (KBS) for secure release of the NGC API
authentication key used to access the NVCR container registry. As part of
this, the attestation agent exercises composite attestation and transitions
the GPU into `Ready` state (without this, the GPU has to explicitly be
transitioned into `Ready` state by passing the `nvrc.smi.srs=1` kernel
parameter via the shim config, causing NVRC to transition the GPU into the
`Ready` state).
## Deployment Guidance
This guidance assumes you use bare-metal machines with proper support for
Kata's non-TEE and TEE GPU workload deployment scenarios for your Kubernetes
nodes. We provide guidance based on the upstream Kata CI procedures for the
NVIDIA GPU CI validation jobs. Note that, this setup:
- uses the guest image pull method to pull container image layers
- uses the genpolicy tool to attach Kata agent security policies to the pod
manifest
- has dedicated (composite) attestation tests, a CUDA vectorAdd test, and a
NIM/RA test sample with secure API key release
A similar deployment guide and scenario description can be found in NVIDIA resources
under
[Early Access: NVIDIA GPU Operator with Confidential Containers based on Kata](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/confidential-containers.html).
### Requirements
The requirements for the TEE scenario are:
- Ubuntu 25.10 as host OS
- CPU with AMD SEV-SNP support with proper BIOS/UEFI version and settings
- CC-capable Hopper/Blackwell GPU with proper VBIOS version.
BIOS and VBIOS configuration is out of scope for this guide. Other resources,
such as the documentation found on the
[NVIDIA Trusted Computing Solutions](https://docs.nvidia.com/nvtrust/index.html)
page and the above linked NVIDIA documentation, provide guidance on
selecting proper hardware and on properly configuring its firmware and OS.
### Installation
#### Containerd and Kubernetes
First, set up your Kubernetes cluster. For instance, in Kata CI, our NVIDIA
jobs use a single-node vanilla Kubernetes cluster with a 2.x containerd
version and Kata's current supported Kubernetes version. We set this cluster
up using the `deploy_k8s` function from `tests/integration/kubernetes/gha-run.sh`
as follows:
```bash
$ export KUBERNETES="vanilla"
$ export CONTAINER_ENGINE="containerd"
$ export CONTAINER_ENGINE_VERSION="v2.1"
$ source tests/gha-run-k8s-common.sh
$ deploy_k8s
```
> **Note:**
>
> We recommend to configure your Kubelet with a higher
> `runtimeRequestTimeout` timeout value than the two minute default timeout.
> Using the guest-pull mechanism, pulling large images may take a significant
> amount of time and may delay container start, possibly leading your Kubelet
> to de-allocate your pod before it transitions from the *container created*
> to the *container running* state.
> **Note:**
>
> The NVIDIA GPU runtime classes use VFIO cold-plug which, as
> described above, requires the Kata runtime to query Kubelet's Pod Resources
> API to discover allocated GPU devices during sandbox creation. For
> Kubernetes versions **older than 1.34**, you must explicitly enable the
> `KubeletPodResourcesGet` feature gate in your Kubelet configuration. For
> Kubernetes 1.34 and later, this feature is enabled by default.
#### GPU Operator
Assuming you have the helm tools installed, deploy the latest version of the
GPU Operator as a helm chart (minimum version: `v25.10.0`):
```bash
$ helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && helm repo update
$ helm install --wait --generate-name \
-n gpu-operator --create-namespace \
nvidia/gpu-operator \
--set sandboxWorkloads.enabled=true \
--set sandboxWorkloads.defaultWorkload=vm-passthrough \
--set kataManager.enabled=true \
--set kataManager.config.runtimeClasses=null \
--set kataManager.repository=nvcr.io/nvidia/cloud-native \
--set kataManager.image=k8s-kata-manager \
--set kataManager.version=v0.2.4 \
--set ccManager.enabled=true \
--set ccManager.defaultMode=on \
--set ccManager.repository=nvcr.io/nvidia/cloud-native \
--set ccManager.image=k8s-cc-manager \
--set ccManager.version=v0.2.0 \
--set sandboxDevicePlugin.repository=nvcr.io/nvidia/cloud-native \
--set sandboxDevicePlugin.image=nvidia-sandbox-device-plugin \
--set sandboxDevicePlugin.version=v0.0.1 \
--set 'sandboxDevicePlugin.env[0].name=P_GPU_ALIAS' \
--set 'sandboxDevicePlugin.env[0].value=pgpu' \
--set nfd.enabled=true \
--set nfd.nodefeaturerules=true
```
> **Note:**
>
> For heterogeneous clusters with different GPU types, you can omit
> the `P_GPU_ALIAS` environment variable lines. This will cause the sandbox
> device plugin to create GPU model-specific resource types (e.g.,
> `nvidia.com/GH100_H100L_94GB`) instead of the generic `nvidia.com/pgpu`,
> which in turn can be used by pods through respective resource limits.
> For simplicity, this guide uses the generic alias.
> **Note:**
>
> Using `--set sandboxWorkloads.defaultWorkload=vm-passthrough` causes all
> your nodes to be labeled for GPU VM passthrough. Remove this parameter if
> you intend to only use selected nodes for this scenario, and label these
> nodes by hand, using:
> `kubectl label node <node-name> nvidia.com/gpu.workload.config=vm-passthrough`.
#### Kata Containers
Install the latest Kata Containers helm chart, similar to
[existing documentation](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/README.md)
(minimum version: `3.24.0`).
```bash
$ export VERSION=$(curl -sSL https://api.github.com/repos/kata-containers/kata-containers/releases/latest | jq .tag_name | tr -d '"')
$ export CHART="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"
$ helm install kata-deploy \
--namespace kata-system \
--create-namespace \
-f "https://raw.githubusercontent.com/kata-containers/kata-containers/refs/tags/${VERSION}/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-nvidia-gpu.values.yaml" \
--set nfd.enabled=false \
--set shims.qemu-nvidia-gpu-tdx.enabled=false \
--wait --timeout 10m --atomic \
"${CHART}" --version "${VERSION}"
```
#### Trustee's KBS for remote attestation
For our Kata CI runners we use Trustee's KBS for composite attestation for
secure key release, for instance, for test scenarios which use authenticated
container images. In such scenarios, the credentials to access the
authenticated container registry are only released to the confidential guest
after successful attestation. Please see the section below for more
information about this.
```bash
$ export NVIDIA_VERIFIER_MODE="remote"
$ export KBS_INGRESS="nodeport"
$ bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
$ bash tests/integration/kubernetes/gha-run.sh install-kbs-client
```
Please note, that Trustee can also be deployed via any other upstream
mechanism as documented by the
[confidential-containers repository](https://github.com/confidential-containers/trustee).
For our architecture it is important to set up KBS in the remote verifier
mode which requires entering a licensing agreement with NVIDIA, see the
[notes in confidential-containers repository](https://github.com/confidential-containers/trustee/blob/main/deps/verifier/src/nvidia/README.md).
### Cluster validation and preparation
If you did not use the `sandboxWorkloads.defaultWorkload=vm-passthrough`
parameter during GPU operator deployment, label your nodes for GPU VM
passthrough, for the example of using all nodes for GPU passthrough, run:
```bash
$ kubectl label nodes --all nvidia.com/gpu.workload.config=vm-passthrough --overwrite
```
Check if the `nvidia-cc-manager` pod is running if you intend to run GPU TEE
scenarios. If not, you need to manually label the node as CC capable. Current
GPU Operator node feature rules do not yet recognize all CC capable GPU PCI
IDs. Run the following command:
```bash
$ kubectl label nodes --all nvidia.com/cc.capable=true
```
After this, assure the `nvidia-cc-manager` pod is running. With the suggested
parameters for GPU Operator deployment, the `nvidia-cc-manager` will
automatically transition the GPU into CC mode.
After deployment, you can transition your node(s) to the desired CC state,
using either the `on` or `off` value, depending on your scenario. For the
non-CC scenario, transition to the `off` state via:
`kubectl label nodes --all nvidia.com/cc.mode=off` and wait until all pods
are back running. When an actual change is exercised, various GPU operator
operands will be restarted.
Ensure all pods are running:
```bash
$ kubectl get pods -A
```
On your node(s), ensure for correct driver binding. Your GPU device should be
bound to the VFIO driver, i.e., showing `Kernel driver in use: vfio-pci`
when running:
```bash
$ lspci -nnk -d 10de:
```
### Run the CUDA vectorAdd sample
Create the following file:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: cuda-vectoradd-kata
namespace: default
annotations:
io.katacontainers.config.hypervisor.kernel_params: "nvrc.smi.srs=1"
spec:
runtimeClassName: ${GPU_RUNTIME_CLASS_NAME}
restartPolicy: Never
containers:
- name: cuda-vectoradd
image: "nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0-ubuntu22.04"
resources:
limits:
nvidia.com/pgpu: "1"
memory: 16Gi
```
Depending on your scenario and on the CC state, export your desired runtime
class name define the environment variable:
```bash
$ export GPU_RUNTIME_CLASS_NAME="kata-qemu-nvidia-gpu-snp"
```
Then, deploy the sample Kubernetes pod manifest and observe the pod logs:
```bash
$ envsubst < ./cuda-vectoradd-kata.yaml.in | kubectl apply -f -
$ kubectl wait --for=condition=Ready pod/cuda-vectoradd-kata --timeout=60s
$ kubectl logs -n default cuda-vectoradd-kata
```
Expect the following output:
```
[Vector addition of 50000 elements]
Copy input data from the host memory to the CUDA device
CUDA kernel launch with 196 blocks of 256 threads
Copy output data from the CUDA device to the host memory
Test PASSED
Done
```
To stop the pod, run: `kubectl delete pod cuda-vectoradd-kata`.
### Next steps
#### Transition between CC and non-CC mode
Use the previously described node labeling approach to transition between
the CC and non-CC mode. In case of the non-CC mode, you can use the
`kata-qemu-nvidia-gpu` value for the `GPU_RUNTIME_CLASS_NAME` runtime class
variable in the above CUDA vectorAdd sample. The `kata-qemu-nvidia-gpu-snp`
runtime class will **NOT** work in this mode - and vice versa.
#### Run Kata CI tests locally
Upstream Kata CI runs the CUDA vectorAdd test, a composite attestation test,
and a basic NIM/RAG deployment. Running CI tests for the TEE GPU scenario
requires KBS to be deployed (except for the CUDA vectorAdd test). The best
place to get started running these tests locally is to look into our
[NVIDIA CI workflow manifest](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml)
and into the underling
[run_kubernetes_nv_tests.sh](https://github.com/kata-containers/kata-containers/blob/main/tests/integration/kubernetes/run_kubernetes_nv_tests.sh)
script. For example, to run the CUDA vectorAdd scenario against the TEE GPU
runtime class use the following commands:
```bash
# create the kata runtime class the test framework uses
$ export KATA_HYPERVISOR=qemu-nvidia-gpu-snp
$ kubectl delete runtimeclass kata --ignore-not-found
$ kubectl get runtimeclass "kata-${KATA_HYPERVISOR}" -o json | \
jq '.metadata.name = "kata" | del(.metadata.uid, .metadata.resourceVersion, .metadata.creationTimestamp)' | \
kubectl apply -f -
$ cd tests/integration/kubernetes
$ K8S_TEST_NV="k8s-nvidia-cuda.bats" ./gha-run.sh run-nv-tests
```
> **Note:**
>
> The other scenarios require an NGC API key to run, i.e., to export the
> `NGC_API_KEY` variable with a valid NGC API key.
#### Deploy pods using attestation
Attestation is a fundamental piece of the confidential containers solution.
In our upstream CI we use attestation at the example of leveraging the
authenticated container image pull mechanism where container images reside
in the authenticated NVCR registry (`k8s-nvidia-nim.bats`), and for
requesting secrets from KBS (`k8s-confidential-attestation.bats`). KBS will
release the image pull secret to a confidential guest. To get the
authentication credentials from inside the guest, KBS must already be
deployed and configured. In our CI samples, we configure KBS with the guest
image pull secret, a resource policy, and launch the pod with certain kernel
command line parameters:
`"agent.image_registry_auth=kbs:///default/credentials/nvcr agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"`.
The `agent.aa_kbc_params` option is a general configuration for attestation.
For your use case, you need to set the IP address and port under which KBS
is reachable through the `CC_KBS_ADDR` variable (see our CI sample). This
tells the guest how to reach KBS. Something like this must be set whenever
attestation is used, but on its own this parameter does not trigger
attestation. The `agent.image_registry_auth` option tells the guest to ask
for a resource from KBS and use it as the authentication configuration. When
this is set, the guest will request this resource at boot (and trigger
attestation) regardless of which image is being pulled.
To deploy your own pods using authenticated container images, or secure key
release for attestation, follow steps similar to our mentioned CI samples.
#### Deploy pods with Kata agent security policies
With GPU passthrough being supported by the
[genpolicy tool](https://github.com/kata-containers/kata-containers/tree/main/src/tools/genpolicy),
you can use the tool to create a Kata agent security policy. Our CI deploys
all sample pod manifests with a Kata agent security policy.
#### Deploy pods using your own containers and manifests
You can author pod manifests leveraging your own containers, for instance,
containers built using the CUDA container toolkit. We recommend to start
with a CUDA base container.
The GPU is transitioned into the `Ready` state via attestation, for instance,
when pulling authenticated images. If your deployment scenario does not use
attestation, please refer back to the CUDA vectorAdd pod manifest. In this
manifest, we ensure that NVRC sets the GPU to `Ready` state by adding the
following annotation in the manifest:
`io.katacontainers.config.hypervisor.kernel_params: "nvrc.smi.srs=1"`
> **Notes:**
>
> - musl-based container images (e.g., using Alpine), or distro-less
> containers are not supported.
> - for the TEE scenario, only single-GPU passthrough per pod is supported,
> so your pod resource limit must be: `nvidia.com/pgpu: "1"` (on a system
> with multiple GPUs, you can thus pass through one GPU per pod).

View File

@@ -1,25 +1,10 @@
# Using NVIDIA GPU device with Kata Containers
This page gives an overview on the different modes in which GPUs can be passed
to a Kata Containers container, provides host system requirements, explains how
Kata Containers guest components can be built to support the NVIDIA GPU
scenario, and gives practical usage examples using `ctr`.
Please see the guide
[Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers](NVIDIA-GPU-passthrough-and-Kata-QEMU.md)
for a documentation of an end-to-end reference implementation of a Kata
Containers stack for GPU passthrough using QEMU, the go-based Kata Runtime,
and an NVIDIA-specific root filesystem. This reference implementation is built
and validated in Kata's CI, and it can be used to test GPU workloads with Kata
components and Kubernetes out of the box.
## Comparison between Passthrough and vGPU Modes
An NVIDIA GPU device can be passed to a Kata Containers container using GPU
passthrough (NVIDIA GPU passthrough mode) as well as GPU mediated passthrough
passthrough (NVIDIA GPU pass-through mode) as well as GPU mediated passthrough
(NVIDIA `vGPU` mode).
NVIDIA GPU passthrough mode, an entire physical GPU is directly assigned to one
NVIDIA GPU pass-through mode, an entire physical GPU is directly assigned to one
VM, bypassing the NVIDIA Virtual GPU Manager. In this mode of operation, the GPU
is accessed exclusively by the NVIDIA driver running in the VM to which it is
assigned. The GPU is not shared among VMs.
@@ -35,20 +20,18 @@ with [MIG-slices](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
| Technology | Description | Behavior | Detail |
| --- | --- | --- | --- |
| NVIDIA GPU passthrough mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
| NVIDIA GPU pass-through mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
| NVIDIA vGPU time-sliced | GPU time-sliced | Physical GPU time-sliced for multiple VMs | Mediated passthrough |
| NVIDIA vGPU MIG-backed | GPU with MIG-slices | Physical GPU MIG-sliced for multiple VMs | Mediated passthrough |
## Host Requirements
## Hardware Requirements
### Hardware
NVIDIA GPUs recommended for virtualization:
NVIDIA GPUs Recommended for Virtualization:
- NVIDIA Tesla (T4, M10, P6, V100 or newer)
- NVIDIA Quadro RTX 6000/8000
### Firmware
## Host BIOS Requirements
Some hardware requires a larger PCI BARs window, for example, NVIDIA Tesla P100,
K40m
@@ -72,7 +55,9 @@ Some hardware vendors use a different name in BIOS, such as:
If one is using a GPU based on the Ampere architecture and later additionally
SR-IOV needs to be enabled for the `vGPU` use-case.
### Kernel
The following steps outline the workflow for using an NVIDIA GPU with Kata.
## Host Kernel Requirements
The following configurations need to be enabled on your host kernel:
@@ -85,13 +70,7 @@ The following configurations need to be enabled on your host kernel:
Your host kernel needs to be booted with `intel_iommu=on` on the kernel command
line.
## Build the Kata Components
This section explains how to build an environment with Kata Containers bits
supporting the GPU scenario. We first deploy and configure the regular Kata
components, then describe how to build the guest kernel and root filesystem.
### Install and configure Kata Containers
## Install and configure Kata Containers
To use non-large BARs devices (for example, NVIDIA Tesla T4), you need Kata
version 1.3.0 or above. Follow the [Kata Containers setup
@@ -122,7 +101,7 @@ hotplug_vfio_on_root_bus = true
pcie_root_port = 1
```
### Build guest kernel with GPU support
## Build Kata Containers kernel with GPU support
The default guest kernel installed with Kata Containers does not provide GPU
support. To use an NVIDIA GPU with Kata Containers, you need to build a kernel
@@ -181,11 +160,11 @@ code, using `Dragonball VMM` for NVIDIA GPU `hot-plug/hot-unplug` requires apply
addition to the above kernel configuration items. Follow these steps to build for NVIDIA GPU `hot-[un]plug`
for `Dragonball`:
```sh
# Prepare .config to support both upcall and nvidia gpu
```sh
# Prepare .config to support both upcall and nvidia gpu
$ ./build-kernel.sh -v 5.10.25 -e -t dragonball -g nvidia -f setup
# Build guest kernel to support both upcall and nvidia gpu
# Build guest kernel to support both upcall and nvidia gpu
$ ./build-kernel.sh -v 5.10.25 -e -t dragonball -g nvidia build
# Install guest kernel to support both upcall and nvidia gpu
@@ -217,7 +196,303 @@ Before using the new guest kernel, please update the `kernel` parameters in
kernel = "/usr/share/kata-containers/vmlinuz-nvidia-gpu.container"
```
### Build Guest OS with NVIDIA Driver and Toolkit
## NVIDIA GPU pass-through mode with Kata Containers
Use the following steps to pass an NVIDIA GPU device in pass-through mode with Kata:
1. Find the Bus-Device-Function (BDF) for the GPU device on the host:
```sh
$ sudo lspci -nn -D | grep -i nvidia
0000:d0:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:20b9] (rev a1)
```
> PCI address `0000:d0:00.0` is assigned to the hardware GPU device.
> `10de:20b9` is the device ID of the hardware GPU device.
2. Find the IOMMU group for the GPU device:
```sh
$ BDF="0000:d0:00.0"
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
```
The previous output shows that the GPU belongs to IOMMU group 192. The next
step is to bind the GPU to the VFIO-PCI driver.
```sh
$ BDF="0000:d0:00.0"
$ DEV="/sys/bus/pci/devices/$BDF"
$ echo "vfio-pci" > $DEV/driver_override
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
# To return the device to the standard driver, we simply clear the
# driver_override and reprobe the device, ex:
$ echo > $DEV/preferred_driver
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
```
3. Check the IOMMU group number under `/dev/vfio`:
```sh
$ ls -l /dev/vfio
total 0
crw------- 1 zvonkok zvonkok 243, 0 Mar 18 03:06 192
crw-rw-rw- 1 root root 10, 196 Mar 18 02:27 vfio
```
4. Start a Kata container with the GPU device:
```sh
# You may need to `modprobe vhost-vsock` if you get
# host system doesn't support vsock: stat /dev/vhost-vsock
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch uname -r
```
5. Run `lspci` within the container to verify the GPU device is seen in the list
of the PCI devices. Note the vendor-device id of the GPU (`10de:20b9`) in the `lspci` output.
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -nn | grep '10de:20b9'"
```
6. Additionally, you can check the PCI BARs space of the NVIDIA GPU device in the container:
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -s 02:00.0 -vv | grep Region"
```
> **Note**: If you see a message similar to the above, the BAR space of the NVIDIA
> GPU has been successfully allocated.
## NVIDIA vGPU mode with Kata Containers
NVIDIA vGPU is a licensed product on all supported GPU boards. A software license
is required to enable all vGPU features within the guest VM. NVIDIA vGPU manager
needs to be installed on the host to configure GPUs in vGPU mode. See [NVIDIA Virtual GPU Software Documentation v14.0 through 14.1](https://docs.nvidia.com/grid/14.0/) for more details.
### NVIDIA vGPU time-sliced
In the time-sliced mode, the GPU is not partitioned and the workload uses the
whole GPU and shares access to the GPU engines. Processes are scheduled in
series. The best effort scheduler is the default one and can be exchanged by
other scheduling policies see the documentation above how to do that.
Beware if you had `MIG` enabled before to disable `MIG` on the GPU if you want
to use `time-sliced` `vGPU`.
```sh
$ sudo nvidia-smi -mig 0
```
Enable the virtual functions for the physical GPU in the `sysfs` file system.
```sh
$ sudo /usr/lib/nvidia/sriov-manage -e 0000:41:00.0
```
Get the `BDF` of the available virtual function on the GPU, and choose one for the
following steps.
```sh
$ cd /sys/bus/pci/devices/0000:41:00.0/
$ ls -l | grep virtfn
```
#### List all available vGPU instances
The following shell snippet will walk the `sysfs` and only print instances
that are available, that can be created.
```sh
# The 00.0 is often the PF of the device the VFs will have the funciont in the
# BDF incremented by some values so e.g. the very first VF is 0000:41:00.4
cd /sys/bus/pci/devices/0000:41:00.0/
for vf in $(ls -d virtfn*)
do
BDF=$(basename $(readlink -f $vf))
for md in $(ls -d $vf/mdev_supported_types/*)
do
AVAIL=$(cat $md/available_instances)
NAME=$(cat $md/name)
DIR=$(basename $md)
if [ $AVAIL -gt 0 ]; then
echo "| BDF | INSTANCES | NAME | DIR |"
echo "+--------------+-----------+----------------+------------+"
printf "| %12s |%10d |%15s | %10s |\n\n" "$BDF" "$AVAIL" "$NAME" "$DIR"
fi
done
done
```
If there are available instances you get something like this (for the first VF),
beware that the output is highly dependent on the GPU you have, if there is no
output check again if `MIG` is really disabled.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-4C | nvidia-692 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-8C | nvidia-693 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-10C | nvidia-694 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-16C | nvidia-695 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-20C | nvidia-696 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-40C | nvidia-697 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-80C | nvidia-698 |
```
Change to the `mdev_supported_types` directory for the virtual function on which
you want to create the `vGPU`. Taking the first output as an example:
```sh
$ cd virtfn0/mdev_supported_types/nvidia-692
$ UUIDGEN=$(uuidgen)
$ sudo bash -c "echo $UUIDGEN > create"
```
Confirm that the `vGPU` was created. You should see the `UUID` pointing to a
subdirectory of the `sysfs` space.
```sh
$ ls -l /sys/bus/mdev/devices/
```
Get the `IOMMU` group number and verify there is a `VFIO` device created to use
with Kata.
```sh
$ ls -l /sys/bus/mdev/devices/*/
$ ls -l /dev/vfio
```
Use the `VFIO` device created in the same way as in the pass-through use-case.
Beware that the guest needs the NVIDIA guest drivers, so one would need to build
a new guest `OS` image.
### NVIDIA vGPU MIG-backed
We're not going into detail what `MIG` is but briefly it is a technology to
partition the hardware into independent instances with guaranteed quality of
service. For more details see [NVIDIA Multi-Instance GPU User Guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
First enable `MIG` mode for a GPU, depending on the platform you're running
a reboot would be necessary. Some platforms support GPU reset.
```sh
$ sudo nvidia-smi -mig 1
```
If the platform supports a GPU reset one can run, otherwise you will get a
warning to reboot the server.
```sh
$ sudo nvidia-smi --gpu-reset
```
The driver per default provides a number of profiles that users can opt-in when
configuring the MIG feature.
```sh
$ sudo nvidia-smi mig -lgip
+-----------------------------------------------------------------------------+
| GPU instance profiles: |
| GPU Name ID Instances Memory P2P SM DEC ENC |
| Free/Total GiB CE JPEG OFA |
|=============================================================================|
| 0 MIG 1g.10gb 19 7/7 9.50 No 14 0 0 |
| 1 0 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 1g.10gb+me 20 1/1 9.50 No 14 1 0 |
| 1 1 1 |
+-----------------------------------------------------------------------------+
| 0 MIG 2g.20gb 14 3/3 19.50 No 28 1 0 |
| 2 0 0 |
+-----------------------------------------------------------------------------+
...
```
Create the GPU instances that correspond to the `vGPU` types of the `MIG-backed`
`vGPUs` that you will create [NVIDIA A100 PCIe 80GB Virtual GPU Types](https://docs.nvidia.com/grid/13.0/grid-vgpu-user-guide/index.html#vgpu-types-nvidia-a100-pcie-80gb).
```sh
# MIG 1g.10gb --> vGPU A100D-1-10C
$ sudo nvidia-smi mig -cgi 19
```
List the GPU instances and get the GPU instance id to create the compute
instance.
```sh
$ sudo nvidia-smi mig -lgi # list the created GPU instances
$ sudo nvidia-smi mig -cci -gi 9 # each GPU instance can have several compute
# instances. Instance -> Workload
```
Verify that the compute instances were created within the GPU instance
```sh
$ nvidia-smi
... snip ...
+-----------------------------------------------------------------------------+
| MIG devices: |
+------------------+----------------------+-----------+-----------------------+
| GPU GI CI MIG | Memory-Usage | Vol| Shared |
| ID ID Dev | BAR1-Usage | SM Unc| CE ENC DEC OFA JPG|
| | | ECC| |
|==================+======================+===========+=======================|
| 0 9 0 0 | 0MiB / 9728MiB | 14 0 | 1 0 0 0 0 |
| | 0MiB / 4095MiB | | |
+------------------+----------------------+-----------+-----------------------+
... snip ...
```
We can use the [snippet](#list-all-available-vgpu-instances) from before to list
the available `vGPU` instances, this time `MIG-backed`.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.5 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:01.6 | 1 |GRID A100D-1-10C | nvidia-699 |
... snip ...
```
Repeat the steps after the [snippet](#list-all-available-vgpu-instances) listing
to create the corresponding `mdev` device and use the guest `OS` created in the
previous section with `time-sliced` `vGPUs`.
## Install NVIDIA Driver + Toolkit in Kata Containers Guest OS
Consult the [Developer-Guide](https://github.com/kata-containers/kata-containers/blob/main/docs/Developer-Guide.md#create-a-rootfs-image) on how to create a
rootfs base image for a distribution of your choice. This is going to be used as
@@ -308,12 +583,9 @@ Enable the `guest_hook_path` in Kata's `configuration.toml`
guest_hook_path = "/usr/share/oci/hooks"
```
As the last step one can remove the additional packages and files that were added
to the `$ROOTFS_DIR` to keep it as small as possible.
One has built a NVIDIA rootfs, kernel and now we can run any GPU container
without installing the drivers into the container. Check NVIDIA device status
with `nvidia-smi`:
with `nvidia-smi`
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/nvidia/cuda:11.6.0-base-ubuntu20.04" cuda nvidia-smi
@@ -339,309 +611,8 @@ Fri Mar 18 10:36:59 2022
+-----------------------------------------------------------------------------+
```
## Usage Examples with Kata Containers
The following sections give usage examples for this based on the different modes.
### NVIDIA GPU passthrough mode
Use the following steps to pass an NVIDIA GPU device in passthrough mode with Kata:
1. Find the Bus-Device-Function (BDF) for the GPU device on the host:
```sh
$ sudo lspci -nn -D | grep -i nvidia
0000:d0:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:20b9] (rev a1)
```
> PCI address `0000:d0:00.0` is assigned to the hardware GPU device.
> `10de:20b9` is the device ID of the hardware GPU device.
2. Find the IOMMU group for the GPU device:
```sh
$ BDF="0000:d0:00.0"
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
```
The previous output shows that the GPU belongs to IOMMU group 192. The next
step is to bind the GPU to the VFIO-PCI driver.
```sh
$ BDF="0000:d0:00.0"
$ DEV="/sys/bus/pci/devices/$BDF"
$ echo "vfio-pci" > $DEV/driver_override
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
# To return the device to the standard driver, we simply clear the
# driver_override and reprobe the device, ex:
$ echo > $DEV/preferred_driver
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
```
3. Check the IOMMU group number under `/dev/vfio`:
```sh
$ ls -l /dev/vfio
total 0
crw------- 1 zvonkok zvonkok 243, 0 Mar 18 03:06 192
crw-rw-rw- 1 root root 10, 196 Mar 18 02:27 vfio
```
4. Start a Kata container with the GPU device:
```sh
# You may need to `modprobe vhost-vsock` if you get
# host system doesn't support vsock: stat /dev/vhost-vsock
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch uname -r
```
5. Run `lspci` within the container to verify the GPU device is seen in the list
of the PCI devices. Note the vendor-device id of the GPU (`10de:20b9`) in the `lspci` output.
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -nn | grep '10de:20b9'"
```
6. Additionally, you can check the PCI BARs space of the NVIDIA GPU device in the container:
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -s 02:00.0 -vv | grep Region"
```
> **Note**: If you see a message similar to the above, the BAR space of the NVIDIA
> GPU has been successfully allocated.
### NVIDIA vGPU mode
NVIDIA vGPU is a licensed product on all supported GPU boards. A software license
is required to enable all vGPU features within the guest VM. NVIDIA vGPU manager
needs to be installed on the host to configure GPUs in vGPU mode. See
[NVIDIA Virtual GPU Software Documentation v14.0 through 14.1](https://docs.nvidia.com/grid/14.0/)
for more details.
#### NVIDIA vGPU time-sliced
In the time-sliced mode, the GPU is not partitioned and the workload uses the
whole GPU and shares access to the GPU engines. Processes are scheduled in
series. The best effort scheduler is the default one and can be exchanged by
other scheduling policies see the documentation above how to do that.
Beware if you had `MIG` enabled before to disable `MIG` on the GPU if you want
to use `time-sliced` `vGPU`.
```sh
$ sudo nvidia-smi -mig 0
```
Enable the virtual functions for the physical GPU in the `sysfs` file system.
```sh
$ sudo /usr/lib/nvidia/sriov-manage -e 0000:41:00.0
```
Get the `BDF` of the available virtual function on the GPU, and choose one for the
following steps.
```sh
$ cd /sys/bus/pci/devices/0000:41:00.0/
$ ls -l | grep virtfn
```
##### List all available vGPU instances
The following shell snippet will walk the `sysfs` and only print instances
that are available, that can be created.
```sh
# The 00.0 is often the PF of the device. The VFs will have the function in the
# BDF incremented by some values so e.g. the very first VF is 0000:41:00.4
cd /sys/bus/pci/devices/0000:41:00.0/
for vf in $(ls -d virtfn*)
do
BDF=$(basename $(readlink -f $vf))
for md in $(ls -d $vf/mdev_supported_types/*)
do
AVAIL=$(cat $md/available_instances)
NAME=$(cat $md/name)
DIR=$(basename $md)
if [ $AVAIL -gt 0 ]; then
echo "| BDF | INSTANCES | NAME | DIR |"
echo "+--------------+-----------+----------------+------------+"
printf "| %12s |%10d |%15s | %10s |\n\n" "$BDF" "$AVAIL" "$NAME" "$DIR"
fi
done
done
```
If there are available instances you get something like this (for the first VF),
beware that the output is highly dependent on the GPU you have, if there is no
output check again if `MIG` is really disabled.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-4C | nvidia-692 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-8C | nvidia-693 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-10C | nvidia-694 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-16C | nvidia-695 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-20C | nvidia-696 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-40C | nvidia-697 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-80C | nvidia-698 |
```
Change to the `mdev_supported_types` directory for the virtual function on which
you want to create the `vGPU`. Taking the first output as an example:
```sh
$ cd virtfn0/mdev_supported_types/nvidia-692
$ UUIDGEN=$(uuidgen)
$ sudo bash -c "echo $UUIDGEN > create"
```
Confirm that the `vGPU` was created. You should see the `UUID` pointing to a
subdirectory of the `sysfs` space.
```sh
$ ls -l /sys/bus/mdev/devices/
```
Get the `IOMMU` group number and verify there is a `VFIO` device created to use
with Kata.
```sh
$ ls -l /sys/bus/mdev/devices/*/
$ ls -l /dev/vfio
```
Use the `VFIO` device created in the same way as in the passthrough use-case.
Beware that the guest needs the NVIDIA guest drivers, so one would need to build
a new guest `OS` image.
#### NVIDIA vGPU MIG-backed
We're not going into detail what `MIG` is but briefly it is a technology to
partition the hardware into independent instances with guaranteed quality of
service. For more details see
[NVIDIA Multi-Instance GPU User Guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
First enable `MIG` mode for a GPU, depending on the platform you're running
a reboot would be necessary. Some platforms support GPU reset.
```sh
$ sudo nvidia-smi -mig 1
```
If the platform supports a GPU reset one can run, otherwise you will get a
warning to reboot the server.
```sh
$ sudo nvidia-smi --gpu-reset
```
The driver per default provides a number of profiles that users can opt-in when
configuring the MIG feature.
```sh
$ sudo nvidia-smi mig -lgip
+-----------------------------------------------------------------------------+
| GPU instance profiles: |
| GPU Name ID Instances Memory P2P SM DEC ENC |
| Free/Total GiB CE JPEG OFA |
|=============================================================================|
| 0 MIG 1g.10gb 19 7/7 9.50 No 14 0 0 |
| 1 0 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 1g.10gb+me 20 1/1 9.50 No 14 1 0 |
| 1 1 1 |
+-----------------------------------------------------------------------------+
| 0 MIG 2g.20gb 14 3/3 19.50 No 28 1 0 |
| 2 0 0 |
+-----------------------------------------------------------------------------+
...
```
Create the GPU instances that correspond to the `vGPU` types of the `MIG-backed`
`vGPUs` that you will create
[NVIDIA A100 PCIe 80GB Virtual GPU Types](https://docs.nvidia.com/grid/13.0/grid-vgpu-user-guide/index.html#vgpu-types-nvidia-a100-pcie-80gb).
```sh
# MIG 1g.10gb --> vGPU A100D-1-10C
$ sudo nvidia-smi mig -cgi 19
```
List the GPU instances and get the GPU instance id to create the compute
instance.
```sh
$ sudo nvidia-smi mig -lgi # list the created GPU instances
$ sudo nvidia-smi mig -cci -gi 9 # each GPU instance can have several compute
# instances. Instance -> Workload
```
Verify that the compute instances were created within the GPU instance
```sh
$ nvidia-smi
... snip ...
+-----------------------------------------------------------------------------+
| MIG devices: |
+------------------+----------------------+-----------+-----------------------+
| GPU GI CI MIG | Memory-Usage | Vol| Shared |
| ID ID Dev | BAR1-Usage | SM Unc| CE ENC DEC OFA JPG|
| | | ECC| |
|==================+======================+===========+=======================|
| 0 9 0 0 | 0MiB / 9728MiB | 14 0 | 1 0 0 0 0 |
| | 0MiB / 4095MiB | | |
+------------------+----------------------+-----------+-----------------------+
... snip ...
```
We can use the [snippet](#list-all-available-vgpu-instances) from before to list
the available `vGPU` instances, this time `MIG-backed`.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.5 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:01.6 | 1 |GRID A100D-1-10C | nvidia-699 |
... snip ...
```
Repeat the steps after the [snippet](#list-all-available-vgpu-instances) listing
to create the corresponding `mdev` device and use the guest `OS` created in the
previous section with `time-sliced` `vGPUs`.
As the last step one can remove the additional packages and files that were added
to the `$ROOTFS_DIR` to keep it as small as possible.
## References

View File

@@ -1,20 +1,24 @@
# Table of Contents
**Note:**: This guide used to contain an end-to-end flow to build a
custom Kata containers root filesystem with QAT out-of-tree SR-IOV virtual
function driver and run QAT enabled containers. The former is no longer necessary
so the instructions are dropped. If the use-case is still of interest, please file
an issue in either of the QAT Kubernetes specific repos linked below.
# Introduction
Intel® QuickAssist Technology (QAT) provides hardware acceleration
for security (cryptography) and compression. Kata Containers can enable
these acceleration functions for containers using QAT SR-IOV with the
support from [Intel QAT Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)
or [Intel QAT DRA Resource Driver for Kubernetes](https://github.com/intel/intel-resource-drivers-for-kubernetes).
for security (cryptography) and compression. These instructions cover the
steps for the latest [Ubuntu LTS release](https://ubuntu.com/download/desktop)
which already include the QAT host driver. These instructions can be adapted to
any Linux distribution. These instructions guide the user on how to download
the kernel sources, compile kernel driver modules against those sources, and
load them onto the host as well as preparing a specially built Kata Containers
kernel and custom Kata Containers rootfs.
## More Information
* Download kernel sources
* Compile Kata kernel
* Compile kernel driver modules against those sources
* Download rootfs
* Add driver modules to rootfs
* Build rootfs image
## Helpful Links before starting
[Intel® QuickAssist Technology at `01.org`](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html)
@@ -22,6 +26,554 @@ or [Intel QAT DRA Resource Driver for Kubernetes](https://github.com/intel/intel
[Intel Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)
[Intel DRA Resource Driver for Kubernetes](https://github.com/intel/intel-resource-drivers-for-kubernetes)
[Intel® QuickAssist Technology for Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)
## Steps to enable Intel® QAT in Kata Containers
There are some steps to complete only once, some steps to complete with every
reboot, and some steps to complete when the host kernel changes.
## Script variables
The following list of variables must be set before running through the
scripts. These variables refer to locations to store modules and configuration
files on the host and links to the drivers to use. Modify these as
needed to point to updated drivers or different install locations.
### Set environment variables (Every Reboot)
Make sure to check [`01.org`](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html) for
the latest driver.
```bash
$ export QAT_DRIVER_VER=qat1.7.l.4.14.0-00031.tar.gz
$ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER}
$ export QAT_CONF_LOCATION=~/QAT_conf
$ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/main/demo/openssl-qat-engine/Dockerfile
$ export QAT_SRC=~/src/QAT
$ export GOPATH=~/src/go
$ export KATA_KERNEL_LOCATION=~/kata
$ export KATA_ROOTFS_LOCATION=~/kata
```
## Prepare the Ubuntu Host
The host could be a bare metal instance or a virtual machine. If using a
virtual machine, make sure that KVM nesting is enabled. The following
instructions reference an Intel® C62X chipset. Some of the instructions must be
modified if using a different Intel® QAT device. The Intel® QAT chipset can be
identified by executing the following.
### Identify which PCI Bus the Intel® QAT card is on
```bash
$ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
```
### Install necessary packages for Ubuntu
These packages are necessary to compile the Kata kernel, Intel® QAT driver, and to
prepare the rootfs for Kata. [Docker](https://docs.docker.com/engine/install/ubuntu/)
also needs to be installed to be able to build the rootfs. To test that
everything works a Kubernetes pod is started requesting Intel® QAT resources. For the
pass through of the virtual functions the kernel boot parameter needs to have
`INTEL_IOMMU=on`.
```bash
$ sudo apt update
$ sudo apt install -y golang-go build-essential python pkg-config zlib1g-dev libudev-dev bison libelf-dev flex libtool automake autotools-dev autoconf bc libpixman-1-dev coreutils libssl-dev
$ sudo sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=""/GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"/' /etc/default/grub
$ sudo update-grub
$ sudo reboot
```
### Download Intel® QAT drivers
This will download the [Intel® QAT drivers](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html).
Make sure to check the website for the latest version.
```bash
$ mkdir -p $QAT_SRC
$ cd $QAT_SRC
$ curl -L $QAT_DRIVER_URL | tar zx
```
### Copy Intel® QAT configuration files and enable virtual functions
Modify the instructions below as necessary if using a different Intel® QAT hardware
platform. You can learn more about customizing configuration files at the
[Intel® QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
This section starts from a base config file and changes the `SSL` section to
`SHIM` to support the OpenSSL engine. There are more tweaks that you can make
depending on the use case and how many Intel® QAT engines should be run. You
can find more information about how to customize in the
[Intel® QuickAssist Technology Software for Linux* - Programmer's Guide.](https://www.intel.com/content/www/us/en/content-details/709196/intel-quickassist-technology-api-programmer-s-guide.html)
> **Note: This section assumes that a Intel® QAT `c6xx` platform is used.**
```bash
$ mkdir -p $QAT_CONF_LOCATION
$ cp $QAT_SRC/quickassist/utilities/adf_ctl/conf_files/c6xxvf_dev0.conf.vm $QAT_CONF_LOCATION/c6xxvf_dev0.conf
$ sed -i 's/\[SSL\]/\[SHIM\]/g' $QAT_CONF_LOCATION/c6xxvf_dev0.conf
```
### Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)
To enable virtual functions, the host OS should have IOMMU groups enabled. In
the UEFI Firmware Intel® Virtualization Technology for Directed I/O
(Intel® VT-d) must be enabled. Also, the kernel boot parameter should be
`intel_iommu=on` or `intel_iommu=ifgx_off`. This should have been set from
the instructions above. Check the output of `/proc/cmdline` to confirm. The
following commands assume you installed an Intel® QAT card, IOMMU is on, and
VT-d is enabled. The vendor and device ID add to the `VFIO-PCI` driver so that
each exposed virtual function can be bound to the `VFIO-PCI` driver. Once
complete, each virtual function passes into a Kata Containers container using
the PCIe device passthrough feature. For Kubernetes, the
[Intel device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes)
for Kubernetes handles the binding of the driver, but the VFs still must be
enabled.
```bash
$ sudo modprobe vfio-pci
$ QAT_PCI_BUS_PF_NUMBERS=$((lspci -d :435 && lspci -d :37c8 && lspci -d :19e2 && lspci -d :6f54) | cut -d ' ' -f 1)
$ QAT_PCI_BUS_PF_1=$(echo $QAT_PCI_BUS_PF_NUMBERS | cut -d ' ' -f 1)
$ echo 16 | sudo tee /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/sriov_numvfs
$ QAT_PCI_ID_VF=$(cat /sys/bus/pci/devices/0000:${QAT_PCI_BUS_PF_1}/virtfn0/uevent | grep PCI_ID)
$ QAT_VENDOR_AND_ID_VF=$(echo ${QAT_PCI_ID_VF/PCI_ID=} | sed 's/:/ /')
$ echo $QAT_VENDOR_AND_ID_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/new_id
```
Loop through all the virtual functions and bind to the VFIO driver
```bash
$ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
do QAT_PCI_BUS_VF=$(basename $(readlink $f))
echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/c6xxvf/unbind
echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/bind
done
```
### Check Intel® QAT virtual functions are enabled
If the following command returns empty, then the virtual functions are not
properly enabled. This command checks the enumerated device IDs for just the
virtual functions. Using the Intel® QAT as an example, the physical device ID
is `37c8` and virtual function device ID is `37c9`. The following command checks
if VF's are enabled for any of the currently known Intel® QAT device ID's. The
following `ls` command should show the 16 VF's bound to `VFIO-PCI`.
```bash
$ for i in 0442 0443 37c9 19e3; do lspci -d 8086:$i; done
```
Another way to check is to see what PCI devices that `VFIO-PCI` is mapped to.
It should match the device ID's of the VF's.
```bash
$ ls -la /sys/bus/pci/drivers/vfio-pci
```
## Prepare Kata Containers
### Download Kata kernel Source
This example automatically uses the latest Kata kernel supported by Kata. It
follows the instructions from the
[packaging kernel repository](../../tools/packaging/kernel)
and uses the latest Kata kernel
[config](../../tools/packaging/kernel/configs).
There are some patches that must be installed as well, which the
`build-kernel.sh` script should automatically apply. If you are using a
different kernel version, then you might need to manually apply them. Since
the Kata Containers kernel has a minimal set of kernel flags set, you must
create a Intel® QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
Update the config to set some of the `CRYPTO` flags to enabled. This might
change with different kernel versions. The following instructions were tested
with kernel `v5.4.0-64-generic`.
```bash
$ mkdir -p $GOPATH
$ cd $GOPATH
$ go get -v github.com/kata-containers/kata-containers
$ cat << EOF > $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/configs/fragments/common/qat.conf
CONFIG_PCIEAER=y
CONFIG_UIO=y
CONFIG_CRYPTO_HW=y
CONFIG_CRYPTO_DEV_QAT_C62XVF=m
CONFIG_CRYPTO_CBC=y
CONFIG_MODULES=y
CONFIG_MODULE_SIG=y
CONFIG_CRYPTO_AUTHENC=y
CONFIG_CRYPTO_DH=y
EOF
$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh setup
```
### Build Kata kernel
```bash
$ cd $GOPATH
$ export LINUX_VER=$(ls -d kata-linux-*)
$ sed -i 's/EXTRAVERSION =/EXTRAVERSION = .qat.container/' $LINUX_VER/Makefile
$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh build
```
### Copy Kata kernel
```bash
$ export KATA_KERNEL_NAME=vmlinux-${LINUX_VER}_qat
$ mkdir -p $KATA_KERNEL_LOCATION
$ cp ${GOPATH}/${LINUX_VER}/vmlinux ${KATA_KERNEL_LOCATION}/${KATA_KERNEL_NAME}
```
### Prepare Kata root filesystem
These instructions build upon the OS builder instructions located in the
[Developer Guide](../Developer-Guide.md). At this point it is recommended that
[Docker](https://docs.docker.com/engine/install/ubuntu/) is installed first, and
then [Kata-deploy](../../tools/packaging/kata-deploy)
is use to install Kata. This will make sure that the correct `agent` version
is installed into the rootfs in the steps below.
The following instructions use Ubuntu as the root filesystem with systemd as
the init and will add in the `kmod` binary, which is not a standard binary in
a Kata rootfs image. The `kmod` binary is necessary to load the Intel® QAT
kernel modules when the virtual machine rootfs boots.
```bash
$ export OSBUILDER=$GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder
$ export ROOTFS_DIR=${OSBUILDER}/rootfs-builder/rootfs
$ export EXTRA_PKGS='kmod'
```
Make sure that the `kata-agent` version matches the installed `kata-runtime`
version. Also make sure the `kata-runtime` install location is in your `PATH`
variable. The following `AGENT_VERSION` can be set manually to match
the `kata-runtime` version if the following commands don't work.
```bash
$ export PATH=$PATH:/opt/kata/bin
$ cd $GOPATH
$ export AGENT_VERSION=$(kata-runtime version | head -n 1 | grep -o "[0-9.]\+")
$ cd ${OSBUILDER}/rootfs-builder
$ sudo rm -rf ${ROOTFS_DIR}
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh ubuntu'
```
### Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs
After the Kata Containers kernel builds with the proper configuration flags,
you must build the Intel® QAT drivers against that Kata Containers kernel
version in a similar way they were previously built for the host OS. You must
set the `KERNEL_SOURCE_ROOT` variable to the Kata Containers kernel source
directory and build the Intel® QAT drivers again. The `make` command will
install the Intel® QAT modules into the Kata rootfs.
```bash
$ cd $GOPATH
$ export LINUX_VER=$(ls -d kata*)
$ export KERNEL_MAJOR_VERSION=$(awk '/^VERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_PATHLEVEL=$(awk '/^PATCHLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_SUBLEVEL=$(awk '/^SUBLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_EXTRAVERSION=$(awk '/^EXTRAVERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_ROOTFS_DIR=${KERNEL_MAJOR_VERSION}.${KERNEL_PATHLEVEL}.${KERNEL_SUBLEVEL}${KERNEL_EXTRAVERSION}
$ cd $QAT_SRC
$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --enable-icp-sriov=guest
$ sudo -E make all -j $(nproc)
$ sudo -E make INSTALL_MOD_PATH=$ROOTFS_DIR qat-driver-install -j $(nproc)
```
The `usdm_drv` module also needs to be copied into the rootfs modules path and
`depmod` should be run.
```bash
$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers
$ sudo depmod -a -b ${ROOTFS_DIR} ${KERNEL_ROOTFS_DIR}
$ cd ${OSBUILDER}/image-builder
$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
```
> **Note: Ignore any errors on modules.builtin and modules.order when running
> `depmod`.**
### Copy Kata rootfs
```bash
$ mkdir -p $KATA_ROOTFS_LOCATION
$ cp ${OSBUILDER}/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
```
## Verify Intel® QAT works in a container
The following instructions uses a OpenSSL Dockerfile that builds the
Intel® QAT engine to allow OpenSSL to offload crypto functions. It is a
convenient way to test that VFIO device passthrough for the Intel® QAT VFs are
working properly with the Kata Containers VM.
### Build OpenSSL Intel® QAT engine container
Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/demo/openssl-qat-engine)
to build a container image with an optimized OpenSSL engine for
Intel® QAT. Using `docker build` with the Kata Containers runtime can sometimes
have issues. Therefore, make sure that `runc` is the default Docker container
runtime.
```bash
$ cd $QAT_SRC
$ curl -O $QAT_DOCKERFILE
$ sudo docker build -t openssl-qat-engine .
```
> **Note: The Intel® QAT driver version in this container might not match the
> Intel® QAT driver compiled and loaded on the host when compiling.**
### Test Intel® QAT with the ctr tool
The `ctr` tool can be used to interact with the containerd daemon. It may be
more convenient to use this tool to verify the kernel and image instead of
setting up a Kubernetes cluster. The correct Kata runtimes need to be added
to the containerd `config.toml`. Below is a sample snippet that can be added
to allow QEMU and Cloud Hypervisor (CLH) to work with `ctr`.
```
[plugins.cri.containerd.runtimes.kata-qemu]
runtime_type = "io.containerd.kata-qemu.v2"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
[plugins.cri.containerd.runtimes.kata-qemu.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
[plugins.cri.containerd.runtimes.kata-clh]
runtime_type = "io.containerd.kata-clh.v2"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
[plugins.cri.containerd.runtimes.kata-clh.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-clh.toml"
```
In addition, containerd expects the binary to be in `/usr/local/bin` so add
this small script so that it redirects to be able to use either QEMU or
Cloud Hypervisor with Kata.
```bash
$ echo '#!/usr/bin/env bash' | sudo tee /usr/local/bin/containerd-shim-kata-qemu-v2
$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-qemu.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-qemu-v2
$ sudo chmod +x /usr/local/bin/containerd-shim-kata-qemu-v2
$ echo '#!/usr/bin/env bash' | sudo tee /usr/local/bin/containerd-shim-kata-clh-v2
$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-clh.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-clh-v2
$ sudo chmod +x /usr/local/bin/containerd-shim-kata-clh-v2
```
After the OpenSSL image is built and imported into containerd, a Intel® QAT
virtual function exposed in the step above can be added to the `ctr` command.
Make sure to change the `/dev/vfio` number to one that actually exists on the
host system. When using the `ctr` tool, the`configuration.toml` for Kata needs
to point to the custom Kata kernel and rootfs built above and the Intel® QAT
modules in the Kata rootfs need to load at boot. The following steps assume that
`kata-deploy` was used to install Kata and QEMU is being tested. If using a
different hypervisor, different install method for Kata, or a different
Intel® QAT chipset then the command will need to be modified.
> **Note: The following was tested with
[containerd v1.4.6](https://github.com/containerd/containerd/releases/tag/v1.4.6).**
```bash
$ config_file="/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
$ sudo sed -i "/kernel =/c kernel = "\"${KATA_ROOTFS_LOCATION}/${KATA_KERNEL_NAME}\""" $config_file
$ sudo sed -i "/image =/c image = "\"${KATA_KERNEL_LOCATION}/kata-containers.img\""" $config_file
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 modules-load=usdm_drv,qat_c62xvf"/g' $config_file
$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
$ sudo ctr images import openssl-qat-engine.tar
$ sudo ctr run --runtime io.containerd.run.kata-qemu.v2 --privileged -t --rm --device=/dev/vfio/180 --mount type=bind,src=/dev,dst=/dev,options=rbind:rw --mount type=bind,src=${QAT_CONF_LOCATION}/c6xxvf_dev0.conf,dst=/etc/c6xxvf_dev0.conf,options=rbind:rw docker.io/library/openssl-qat-engine:latest bash
```
Below are some commands to run in the container image to verify Intel® QAT is
working
```sh
root@67561dc2757a/ # cat /proc/modules
qat_c62xvf 16384 - - Live 0xffffffffc00d9000 (OE)
usdm_drv 86016 - - Live 0xffffffffc00e8000 (OE)
intel_qat 249856 - - Live 0xffffffffc009b000 (OE)
root@67561dc2757a/ # adf_ctl restart
Restarting all devices.
Processing /etc/c6xxvf_dev0.conf
root@67561dc2757a/ # adf_ctl status
Checking status of all devices.
There is 1 QAT acceleration device(s) in the system:
qat_dev0 - type: c6xxvf, inst_id: 0, node_id: 0, bsf: 0000:01:01.0, #accel: 1 #engines: 1 state: up
root@67561dc2757a/ # openssl engine -c -t qat-hw
(qat-hw) Reference implementation of QAT crypto engine v0.6.1
[RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
[ available ]
```
### Test Intel® QAT in Kubernetes
Start a Kubernetes cluster with containerd as the CRI. The host should
already be setup with 16 virtual functions of the Intel® QAT card bound to
`VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing of devices.
You might need to disable Docker before initializing Kubernetes. Be aware
that the OpenSSL container image built above will need to be exported from
Docker and imported into containerd.
If Kata is installed through [`kata-deploy`](../../tools/packaging/kata-deploy/helm-chart/README.md)
there will be multiple `configuration.toml` files associated with different
hypervisors. Rather than add in the custom Kata kernel, Kata rootfs, and
kernel modules to each `configuration.toml` as the default, instead use
[annotations](../how-to/how-to-load-kernel-modules-with-kata.md)
in the Kubernetes YAML file to tell Kata which kernel and rootfs to use. The
easy way to do this is to use `kata-deploy` which will install the Kata binaries
to `/opt` and properly configure the `/etc/containerd/config.toml` with annotation
support. However, the `configuration.toml` needs to enable support for
annotations as well. The following configures both QEMU and Cloud Hypervisor
`configuration.toml` files that are currently available with Kata Container
versions 2.0 and higher.
```bash
$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-qemu.toml
$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-clh.toml
```
Export the OpenSSL image from Docker and import into containerd.
```bash
$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
$ sudo ctr -n=k8s.io images import openssl-qat-engine.tar
```
The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/main/cmd/qat_plugin/README.md)
needs to be started so that the virtual functions can be discovered and
used by Kubernetes.
The following YAML file can be used to start a Kata container with Intel® QAT
support. If Kata is installed with `kata-deploy`, then the containerd
`configuration.toml` should have all of the Kata runtime classes already
populated and annotations supported. To use a Intel® QAT virtual function, the
Intel® QAT plugin needs to be started after the VF's are bound to `VFIO-PCI` as
described [above](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot).
Edit the following to point to the correct Kata kernel and rootfs location
built with Intel® QAT support.
```bash
$ cat << EOF > kata-openssl-qat.yaml
apiVersion: v1
kind: Pod
metadata:
name: kata-openssl-qat
labels:
app: kata-openssl-qat
annotations:
io.katacontainers.config.hypervisor.kernel: "$KATA_KERNEL_LOCATION/$KATA_KERNEL_NAME"
io.katacontainers.config.hypervisor.image: "$KATA_ROOTFS_LOCATION/kata-containers.img"
io.katacontainers.config.hypervisor.kernel_params: "modules-load=usdm_drv,qat_c62xvf"
spec:
runtimeClassName: kata-qemu
containers:
- name: kata-openssl-qat
image: docker.io/library/openssl-qat-engine:latest
imagePullPolicy: IfNotPresent
resources:
limits:
qat.intel.com/generic: 1
cpu: 1
securityContext:
capabilities:
add: ["IPC_LOCK", "SYS_ADMIN"]
volumeMounts:
- mountPath: /etc/c6xxvf_dev0.conf
name: etc-mount
- mountPath: /dev
name: dev-mount
volumes:
- name: dev-mount
hostPath:
path: /dev
- name: etc-mount
hostPath:
path: $QAT_CONF_LOCATION/c6xxvf_dev0.conf
EOF
```
Use `kubectl` to start the pod. Verify that Intel® QAT card acceleration is
working with the Intel® QAT engine.
```bash
$ kubectl apply -f kata-openssl-qat.yaml
```
```sh
$ kubectl exec -it kata-openssl-qat -- adf_ctl restart
Restarting all devices.
Processing /etc/c6xxvf_dev0.conf
$ kubectl exec -it kata-openssl-qat -- adf_ctl status
Checking status of all devices.
There is 1 QAT acceleration device(s) in the system:
qat_dev0 - type: c6xxvf, inst_id: 0, node_id: 0, bsf: 0000:01:01.0, #accel: 1 #engines: 1 state: up
$ kubectl exec -it kata-openssl-qat -- openssl engine -c -t qat-hw
(qat-hw) Reference implementation of QAT crypto engine v0.6.1
[RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
[ available ]
```
### Troubleshooting
* Check that `/dev/vfio` has VFs enabled.
```sh
$ ls /dev/vfio
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 vfio
```
* Check that the modules load when inside the Kata Container.
```sh
bash-5.0# grep -E "qat|usdm_drv" /proc/modules
qat_c62xvf 16384 - - Live 0x0000000000000000 (O)
usdm_drv 86016 - - Live 0x0000000000000000 (O)
intel_qat 184320 - - Live 0x0000000000000000 (O)
```
* Verify that at least the first `c6xxvf_dev0.conf` file mounts inside the
container image in `/etc`. You will need one configuration file for each VF
passed into the container.
```sh
bash-5.0# ls /etc
c6xxvf_dev0.conf c6xxvf_dev11.conf c6xxvf_dev14.conf c6xxvf_dev3.conf c6xxvf_dev6.conf c6xxvf_dev9.conf resolv.conf
c6xxvf_dev1.conf c6xxvf_dev12.conf c6xxvf_dev15.conf c6xxvf_dev4.conf c6xxvf_dev7.conf hostname
c6xxvf_dev10.conf c6xxvf_dev13.conf c6xxvf_dev2.conf c6xxvf_dev5.conf c6xxvf_dev8.conf hosts
```
* Check `dmesg` inside the container to see if there are any issues with the
Intel® QAT driver.
* If there are issues building the OpenSSL Intel® QAT container image, then
check to make sure that runc is the default runtime for building container.
```sh
$ cat /etc/systemd/system/docker.service.d/50-runtime.conf
[Service]
Environment="DOCKER_DEFAULT_RUNTIME=--default-runtime runc"
```
## Optional Scripts
### Verify Intel® QAT card counters are incremented
To check the built in firmware counters, the Intel® QAT driver has to be compiled
and installed to the host and can't rely on the built in host driver. The
counters will increase when the accelerator is actively being used. To verify
Intel® QAT is actively accelerating the containerized application, use the
following instructions to check if any of the counters increment. Make
sure to change the PCI Device ID to match whats in the system.
```bash
$ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b1\:00.0/fw_counters
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b3\:00.0/fw_counters
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b5\:00.0/fw_counters
```

View File

@@ -1,3 +1,3 @@
[toolchain]
# Keep in sync with versions.yaml
channel = "1.89"
channel = "1.88"

View File

@@ -269,7 +269,7 @@ impl Default for AgentConfig {
no_proxy: String::from(""),
guest_components_rest_api: GuestComponentsFeatures::default(),
guest_components_procs: GuestComponentsProcs::default(),
secure_storage_integrity: true,
secure_storage_integrity: false,
#[cfg(feature = "agent-policy")]
policy_file: String::from(""),
mem_agent: None,
@@ -911,7 +911,7 @@ mod tests {
no_proxy: "",
guest_components_rest_api: GuestComponentsFeatures::default(),
guest_components_procs: GuestComponentsProcs::default(),
secure_storage_integrity: true,
secure_storage_integrity: false,
#[cfg(feature = "agent-policy")]
policy_file: "",
mem_agent: None,
@@ -1364,7 +1364,7 @@ mod tests {
},
TestData {
contents: "",
secure_storage_integrity: true,
secure_storage_integrity: false,
..Default::default()
},
TestData {

View File

@@ -53,7 +53,6 @@ vm-memory = { workspace = true, features = [
test-utils = { workspace = true }
[features]
test-resources = []
virtio-mmio = []
virtio-vsock = ["virtio-mmio"]
virtio-net = ["virtio-mmio"]

View File

@@ -1250,12 +1250,11 @@ pub mod tests {
#[cfg(feature = "test-resources")]
fn test_fs_manipulate_backend_fs() {
let source = "/test_resources/nydus-rs/bootstrap/image_v2.boot";
let source_path = PathBuf::from(&source);
let source_path = PathBuf::from(source);
let bootstrapfile = source_path.to_str().unwrap().to_string();
if !source_path.exists() {
eprintln!("Test resource file not found: {}", source);
return;
panic!("Test resource file not found: {}", bootstrapfile);
}
let bootstrapfile = source.to_string();
// mount
{
// invalid fs type

View File

@@ -676,11 +676,11 @@ mod tests {
);
assert_eq!(VirtioDevice::<Arc<GuestMemoryMmap<()>>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 2), 0);
let config: [u8; 8] = [0; 8];
let _ = VirtioDevice::<Arc<GuestMemoryMmap<()>>, QueueSync, GuestRegionMmap>::write_config(
VirtioDevice::<Arc<GuestMemoryMmap<()>>, QueueSync, GuestRegionMmap>::write_config(
&mut dev, 0, &config,
);
let mut data: [u8; 8] = [1; 8];
let _ = VirtioDevice::<Arc<GuestMemoryMmap<()>>, QueueSync, GuestRegionMmap>::read_config(
VirtioDevice::<Arc<GuestMemoryMmap<()>>, QueueSync, GuestRegionMmap>::read_config(
&mut dev, 0, &mut data,
);
assert_eq!(config, data);

View File

@@ -519,7 +519,6 @@ impl<R: Req> Endpoint<R> {
/// * - OversizedMsg: message size is too big.
/// * - PartialMessage: received a partial message.
/// * - IncorrectFds: wrong number of attached fds.
#[allow(dead_code)]
pub fn send_message_with_payload<T: Sized, P: Sized>(
&mut self,
hdr: &VhostUserMsgHeader<R>,

View File

@@ -214,7 +214,7 @@ mod tests {
fn test_push_wrap() {
let mut txbuf = TxBuf::default();
let mut sink = TestSink::new();
let tmp: Vec<u8> = vec![0; TxBuf::SIZE - 2];
let mut tmp: Vec<u8> = vec![0; TxBuf::SIZE - 2];
txbuf.push(tmp.as_slice()).unwrap();
txbuf.flush_to(&mut sink).unwrap();
sink.clear();

View File

@@ -75,7 +75,7 @@ pub const DEFAULT_QEMU_GUEST_KERNEL_PARAMS: &str = "";
pub const DEFAULT_QEMU_FIRMWARE_PATH: &str = "";
pub const DEFAULT_QEMU_MEMORY_SIZE_MB: u32 = 128;
pub const DEFAULT_QEMU_MEMORY_SLOTS: u32 = 128;
pub const DEFAULT_QEMU_PCI_BRIDGES: u32 = 1;
pub const DEFAULT_QEMU_PCI_BRIDGES: u32 = 2;
pub const MAX_QEMU_PCI_BRIDGES: u32 = 5;
pub const MAX_QEMU_VCPUS: u32 = 256;
pub const MIN_QEMU_MEMORY_SIZE_MB: u32 = 64;

View File

@@ -133,17 +133,6 @@ PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR)
FIRMWAREPATH :=
FIRMWAREVOLUMEPATH :=
ROOTMEASURECONFIG ?= ""
KERNELTDXPARAMS += $(ROOTMEASURECONFIG)
# TDX
DEFSHAREDFS_QEMU_TDX_VIRTIOFS := none
FIRMWARETDXPATH := $(PREFIXDEPS)/share/ovmf/OVMF.inteltdx.fd
# SEV-SNP
FIRMWARE_SNP_PATH := $(PREFIXDEPS)/share/ovmf/AMDSEV.fd
FIRMWARE_VOLUME_SNP_PATH :=
##VAR DEFVCPUS=<number> Default number of vCPUs
DEFVCPUS := 1
##VAR DEFMAXVCPUS=<number> Default maximum number of vCPUs
@@ -160,7 +149,7 @@ DEFMEMSLOTS := 10
# Default maximum memory in MiB
DEFMAXMEMSZ := 0
##VAR DEFBRIDGES=<number> Default number of bridges
DEFBRIDGES := 1
DEFBRIDGES := 0
DEFENABLEANNOTATIONS := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"default_vcpus\", \"default_memory\"]
DEFENABLEANNOTATIONS_COCO := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"default_vcpus\", \"default_memory\", \"cc_init_data\"]
DEFDISABLEGUESTSECCOMP := true
@@ -187,7 +176,6 @@ DEFVIRTIOFSQUEUESIZE ?= 1024
# Make sure you quote args.
DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"-o\", \"announce_submounts\"]
DEFENABLEIOTHREADS := false
DEFINDEPIOTHREADS := 0
DEFENABLEVHOSTUSERSTORE := false
DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user
DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
@@ -204,8 +192,6 @@ QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT := 4050
DEFCREATECONTAINERTIMEOUT ?= 30
DEFCREATECONTAINERTIMEOUT_COCO ?= 60
DEFSTATICRESOURCEMGMT_COCO = true
DEFDISABLEIMAGENVDIMM ?= false
DEFPODRESOURCEAPISOCK := ""
SED = sed
CLI_DIR = cmd
@@ -258,7 +244,6 @@ ifneq (,$(DBCMD))
RUNTIMENAME := virt_container
PIPESIZE := 1
DBSHAREDFS := inline-virtio-fs
DEF_DGB_BRIDGES := 0
endif
ifneq (,$(CLHCMD))
@@ -306,30 +291,6 @@ ifneq (,$(QEMUCMD))
CONFIGS += $(CONFIG_QEMU)
CONFIG_FILE_QEMU_TDX = configuration-qemu-tdx-runtime-rs.toml
CONFIG_QEMU_TDX = config/$(CONFIG_FILE_QEMU_TDX)
CONFIG_QEMU_TDX_IN = $(CONFIG_QEMU_TDX).in
CONFIG_PATH_QEMU_TDX = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_TDX))
CONFIG_PATHS += $(CONFIG_PATH_QEMU_TDX)
SYSCONFIG_QEMU_TDX = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_TDX))
SYSCONFIG_PATHS += $(SYSCONFIG_QEMU_TDX)
CONFIGS += $(CONFIG_QEMU_TDX)
CONFIG_FILE_QEMU_SNP = configuration-qemu-snp-runtime-rs.toml
CONFIG_QEMU_SNP = config/$(CONFIG_FILE_QEMU_SNP)
CONFIG_QEMU_SNP_IN = $(CONFIG_QEMU_SNP).in
CONFIG_PATH_QEMU_SNP = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_SNP))
CONFIG_PATHS += $(CONFIG_PATH_QEMU_SNP)
SYSCONFIG_QEMU_SNP = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_SNP))
SYSCONFIG_PATHS += $(SYSCONFIG_QEMU_SNP)
CONFIGS += $(CONFIG_QEMU_SNP)
CONFIG_FILE_QEMU_SE = configuration-qemu-se-runtime-rs.toml
CONFIG_QEMU_SE = config/$(CONFIG_FILE_QEMU_SE)
CONFIG_QEMU_SE_IN = $(CONFIG_QEMU_SE).in
@@ -560,7 +521,6 @@ USER_VARS += DEFVIRTIOFSEXTRAARGS
USER_VARS += DEFENABLEANNOTATIONS
USER_VARS += DEFENABLEANNOTATIONS_COCO
USER_VARS += DEFENABLEIOTHREADS
USER_VARS += DEFINDEPIOTHREADS
USER_VARS += DEFSECCOMPSANDBOXPARAM
USER_VARS += DEFGUESTSELINUXLABEL
USER_VARS += DEFENABLEVHOSTUSERSTORE
@@ -581,7 +541,6 @@ USER_VARS += DEFSTATICRESOURCEMGMT_FC
USER_VARS += DEFSTATICRESOURCEMGMT_CLH
USER_VARS += DEFSTATICRESOURCEMGMT_QEMU
USER_VARS += DEFSTATICRESOURCEMGMT_COCO
USER_VARS += DEFDISABLEIMAGENVDIMM
USER_VARS += DEFBINDMOUNTS
USER_VARS += DEFVFIOMODE
USER_VARS += DEFVFIOMODE_SE
@@ -593,7 +552,6 @@ USER_VARS += HYPERVISOR_QEMU
USER_VARS += HYPERVISOR_FC
USER_VARS += PIPESIZE
USER_VARS += DBSHAREDFS
USER_VARS += DEF_DGB_BRIDGES
USER_VARS += KATA_INSTALL_GROUP
USER_VARS += KATA_INSTALL_OWNER
USER_VARS += KATA_INSTALL_CFG_PERMS
@@ -602,13 +560,6 @@ USER_VARS += DEFFORCEGUESTPULL
USER_VARS += QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT
USER_VARS += DEFCREATECONTAINERTIMEOUT
USER_VARS += DEFCREATECONTAINERTIMEOUT_COCO
USER_VARS += QEMUTDXEXPERIMENTALCMD
USER_VARS += FIRMWARE_SNP_PATH
USER_VARS += FIRMWARE_VOLUME_SNP_PATH
USER_VARS += KERNELTDXPARAMS
USER_VARS += DEFSHAREDFS_QEMU_TDX_VIRTIOFS
USER_VARS += FIRMWARETDXPATH
USER_VARS += DEFPODRESOURCEAPISOCK
SOURCES := \
$(shell find . 2>&1 | grep -E '.*\.rs$$') \
@@ -646,8 +597,6 @@ GENERATED_VARS = \
VERSION \
CONFIG_DB_IN \
CONFIG_FC_IN \
CONFIG_QEMU_TDX_IN \
CONFIG_QEMU_SNP_IN \
$(USER_VARS)

View File

@@ -92,11 +92,10 @@ default_maxvcpus = @DEFMAXVCPUS_DB@
# * Until 5 PCI bridges can be cold plugged per VM.
# This limitation could be a bug in the kernel
# Default number of bridges per SB/VM:
# unspecified or 0 --> will be set to @DEF_DGB_BRIDGES@
# unspecified or 0 --> will be set to @DEFBRIDGES@
# > 1 <= 5 --> will be set to the specified number
# > 5 --> will be set to 5
# As Dragonball VMM does not support PCI hotplug options, it should be set 0.
default_bridges = @DEF_DGB_BRIDGES@
default_bridges = @DEFBRIDGES@
# Reclaim guest freed memory.
# Enabling this will result in the VM balloon device having f_reporting=on set.

View File

@@ -1,770 +0,0 @@
# Copyright (c) 2017-2019 Intel Corporation
# Copyright (c) 2021 Adobe Inc.
# Copyright (c) 2024 IBM Corp.
# Copyright (c) 2025-2026 Ant Group
#
# SPDX-License-Identifier: Apache-2.0
#
# XXX: WARNING: this file is auto-generated.
# XXX:
# XXX: Source file: "@CONFIG_QEMU_IN@"
# XXX: Project:
# XXX: Name: @PROJECT_NAME@
# XXX: Type: @PROJECT_TYPE@
[hypervisor.qemu]
path = "@QEMUPATH@"
kernel = "@KERNELPATH_COCO@"
initrd = "@INITRDCONFIDENTIALPATH@"
# image = "@IMAGECONFIDENTIALPATH@"
machine_type = "@MACHINETYPE@"
# Enable confidential guest support.
# Toggling that setting may trigger different hardware features, ranging
# from memory encryption to both memory and CPU-state encryption and integrity.
# The Kata Containers runtime dynamically detects the available feature set and
# aims at enabling the largest possible one, returning an error if none is
# available, or none is supported by the hypervisor.
#
# Known limitations:
# * Does not work by design:
# - CPU Hotplug
# - Memory Hotplug
# - NVDIMM devices
#
# Default false
confidential_guest = true
# Enable AMD SEV-SNP confidential guests
# In case of using confidential guests on AMD hardware that supports SEV-SNP,
# the following enables SEV-SNP guests. Default true
sev_snp_guest = true
# SNP 'ID Block' and 'ID Authentication Information Structure'.
# If one of snp_id_block or snp_id_auth is specified, the other must be specified, too.
# Notice that the default SNP policy of QEMU (0x30000) is used by Kata, if not explicitly
# set via 'snp_guest_policy' option. The IDBlock contains the guest policy as field, and
# it must match the value from 'snp_guest_policy' or, if unset, the QEMU default policy.
#
# 96-byte, base64-encoded blob to provide the ID Block structure for the
# SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
snp_id_block = ""
# 4096-byte, base64-encoded blob to provide the ID Authentication Information Structure
# for the SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
snp_id_auth = ""
# SNP Guest Policy, the POLICY parameter to the SNP_LAUNCH_START command.
# If unset, the QEMU default policy (0x30000) will be used.
# Notice that the guest policy is enforced at VM launch, and your pod VMs
# won't start at all if the policy denys it. This will be indicated by a
# 'SNP_LAUNCH_START' error.
snp_guest_policy = 196608
# rootfs filesystem type:
# - ext4 (default)
# - xfs
# - erofs
rootfs_type = @DEFROOTFSTYPE@
# Block storage driver to be used for the VM rootfs is backed
# by a block device. This is virtio-blk-pci, virtio-blk-mmio or nvdimm
vm_rootfs_driver = "virtio-blk-pci"
# Enable running QEMU VMM as a non-root user.
# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
# a non-root random user. See documentation for the limitations of this mode.
rootless = false
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
enable_annotations = @DEFENABLEANNOTATIONS_COCO@
# List of valid annotations values for the hypervisor
# Each member of the list is a path pattern as described by glob(3).
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@
valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@
# Optional space-separated list of options to pass to the guest kernel.
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
# trouble running pre-2.15 glibc.
#
# WARNING: - any parameter specified here will take priority over the default
# parameter value of the same name used to start the virtual machine.
# Do not set values here unless you understand the impact of doing so as you
# may stop the virtual machine from booting.
# To see the list of default parameters, enable hypervisor debug, create a
# container and look for 'default-kernel-parameters' log entries.
kernel_params = "@KERNELPARAMS@"
# Path to the firmware.
# If you want that qemu uses the default firmware leave this option empty
firmware = "@FIRMWARE_SNP_PATH@"
# Path to the firmware volume.
# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables
# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables
# can be customized per each user while UEFI code is kept same.
firmware_volume = "@FIRMWARE_VOLUME_SNP_PATH@"
# Machine accelerators
# comma-separated list of machine accelerators to pass to the hypervisor.
# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
machine_accelerators = "@MACHINEACCELERATORS@"
# Qemu seccomp sandbox feature
# comma-separated list of seccomp sandbox features to control the syscall access.
# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"`
# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
# Another note: enabling this feature may reduce performance, you may enable
# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"
# CPU features
# comma-separated list of cpu features to pass to the cpu
# For example, `cpu_features = "pmu=off,vmx=off"
cpu_features = "@CPUFEATURES@"
# Default number of vCPUs per SB/VM:
# unspecified or 0 --> will be set to @DEFVCPUS@
# < 0 --> will be set to the actual number of physical cores
# > 0 <= number of physical cores --> will be set to the specified number
# > number of physical cores --> will be set to the actual number of physical cores
default_vcpus = @DEFVCPUS_QEMU@
# Default maximum number of vCPUs per SB/VM:
# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number
# of vCPUs supported by KVM if that number is exceeded
# > 0 <= number of physical cores --> will be set to the specified number
# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number
# of vCPUs supported by KVM if that number is exceeded
# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when
# the actual number of physical cores is greater than it.
# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU
# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs
# can be added to a SB/VM, but the memory footprint will be big. Another example, with
# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of
# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
# unless you know what are you doing.
# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8.
default_maxvcpus = @DEFMAXVCPUS_QEMU@
# Bridges can be used to hot plug devices.
# Limitations:
# * Currently only pci bridges are supported
# * Until 30 devices per bridge can be hot plugged.
# * Until 5 PCI bridges can be cold plugged per VM.
# This limitation could be a bug in qemu or in the kernel
# Default number of bridges per SB/VM:
# unspecified or 0 --> will be set to @DEFBRIDGES@
# > 1 <= 5 --> will be set to the specified number
# > 5 --> will be set to 5
default_bridges = @DEFBRIDGES@
# Default memory size in MiB for SB/VM.
# If unspecified then it will be set @DEFMEMSZ@ MiB.
default_memory = @DEFMEMSZ@
#
# Default memory slots per SB/VM.
# If unspecified then it will be set @DEFMEMSLOTS@.
# This is will determine the times that memory will be hotadded to sandbox/VM.
memory_slots = @DEFMEMSLOTS@
# Default maximum memory in MiB per SB / VM
# unspecified or == 0 --> will be set to the actual amount of physical RAM
# > 0 <= amount of physical RAM --> will be set to the specified number
# > amount of physical RAM --> will be set to the actual amount of physical RAM
default_maxmemory = @DEFMAXMEMSZ@
# The size in MiB will be plused to max memory of hypervisor.
# It is the memory address space for the NVDIMM device.
# If set block storage driver (block_device_driver) to "nvdimm",
# should set memory_offset to the size of block device.
# Default 0
memory_offset = 0
# Specifies virtio-mem will be enabled or not.
# Please note that this option should be used with the command
# "echo 1 > /proc/sys/vm/overcommit_memory".
# Default false
enable_virtio_mem = false
# Disable block device from being used for a container's rootfs.
# In case of a storage driver like devicemapper where a container's
# root file system is backed by a block device, the block device is passed
# directly to the hypervisor for performance reasons.
# This flag prevents the block device from being passed to the hypervisor,
# virtio-fs is used instead to pass the rootfs.
disable_block_device_use = @DEFDISABLEBLOCK@
# Shared file system type:
# - virtio-fs (default)
# - virtio-fs-nydus
# - none
shared_fs = "none"
# Path to vhost-user-fs daemon.
virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@"
# List of valid annotations values for the virtiofs daemon
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@
valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@
# Default size of DAX cache in MiB
virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
# Default size of virtqueues
virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@
# Extra args for virtiofsd daemon
#
# Format example:
# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
# Examples:
# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"]
#
# see `virtiofsd -h` for possible options.
virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
# Cache mode:
#
# - never
# Metadata, data, and pathname lookup are not cached in guest. They are
# always fetched from host and any changes are immediately pushed to host.
#
# - metadata
# Metadata and pathname lookup are cached in guest and never expire.
# Data is never cached in guest.
#
# - auto
# Metadata and pathname lookup cache expires after a configured amount of
# time (default is 1 second). Data is cached while the file is open (close
# to open consistency).
#
# - always
# Metadata, data, and pathname lookup are cached in guest and never expire.
virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
# Block storage driver to be used for the hypervisor in case the container
# rootfs is backed by a block device. This is virtio-scsi, virtio-blk
# or nvdimm.
block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@"
# aio is the I/O mechanism used by qemu
# Options:
#
# - threads
# Pthread based disk I/O.
#
# - native
# Native Linux I/O.
#
# - io_uring
# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and
# qemu >=5.0.
block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"
# Specifies cache-related options will be set to block devices or not.
# Default false
block_device_cache_set = false
# Specifies cache-related options for block devices.
# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
# Default false
block_device_cache_direct = false
# Specifies cache-related options for block devices.
# Denotes whether flush requests for the device are ignored.
# Default false
block_device_cache_noflush = false
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently only implemented
# for SCSI.
#
enable_iothreads = @DEFENABLEIOTHREADS@
# Independent IOThreads enables IO to be processed in a separate thread, it is
# for QEMU hotplug device attach to iothread, like virtio-blk.
indep_iothreads = @DEFINDEPIOTHREADS@
# Enable pre allocation of VM RAM, default false
# Enabling this will result in lower container density
# as all of the memory will be allocated and locked
# This is useful when you want to reserve all the memory
# upfront or in the cases where you want memory latencies
# to be very predictable
# Default false
enable_mem_prealloc = false
# Reclaim guest freed memory.
# Enabling this will result in the VM balloon device having f_reporting=on set.
# Then the hypervisor will use it to reclaim guest freed memory.
# This is useful for reducing the amount of memory used by a VM.
# Enabling this feature may sometimes reduce the speed of memory access in
# the VM.
#
# Default false
reclaim_guest_freed_memory = false
# Enable huge pages for VM RAM, default false
# Enabling this will result in the VM memory
# being allocated using huge pages.
# This is useful when you want to use vhost-user network
# stacks within the container. This will automatically
# result in memory pre allocation
enable_hugepages = false
# Enable vhost-user storage device, default false
# Enabling this will result in some Linux reserved block type
# major range 240-254 being chosen to represent vhost-user devices.
enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@
# The base directory specifically used for vhost-user devices.
# Its sub-path "block" is used for block devices; "block/sockets" is
# where we expect vhost-user sockets to live; "block/devices" is where
# simulated block device nodes for vhost-user devices to live.
vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
# Enable vIOMMU, default false
# Enabling this will result in the VM having a vIOMMU device
# This will also add the following options to the kernel's
# command line: intel_iommu=on,iommu=pt
enable_iommu = false
# Enable IOMMU_PLATFORM, default false
# Enabling this will result in the VM device having iommu_platform=on set
enable_iommu_platform = false
# List of valid annotations values for the vhost user store path
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@
valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@
# The timeout for reconnecting on non-server spdk sockets when the remote end goes away.
# qemu will delay this many seconds and then attempt to reconnect.
# Zero disables reconnecting, and the default is zero.
vhost_user_reconnect_timeout_sec = 0
# Enable file based guest memory support. The default is an empty string which
# will disable this feature. In the case of virtio-fs, this is enabled
# automatically and '/dev/shm' is used as the backing folder.
# This option will be ignored if VM templating is enabled.
file_mem_backend = ""
# List of valid annotations values for the file_mem_backend annotation
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@
valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@
# -pflash can add image file to VM. The arguments of it should be in format
# of ["/path/to/flash0.img", "/path/to/flash1.img"]
pflashes = []
# This option changes the default hypervisor and kernel parameters
# to enable debug output where available. And Debug also enable the hmp socket.
#
# Default false
enable_debug = false
# Disable the customizations done in the runtime when it detects
# that it is running on top a VMM. This will result in the runtime
# behaving as it would when running on bare metal.
#
disable_nesting_checks = true
# If false and nvdimm is supported, use nvdimm device to plug guest image.
# Otherwise virtio-block device is used.
#
# nvdimm is not supported when `confidential_guest = true`.
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
# The value means the number of pcie_root_port
# Default 0
pcie_root_port = 0
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
# security (vhost-net runs ring0) for network I/O performance.
disable_vhost_net = false
# This option allows to add an extra HMP or QMP socket when `enable_debug = true`
#
# WARNING: Anyone with access to the extra socket can take full control of
# Qemu. This is for debugging purpose only and must *NEVER* be used in
# production.
#
# Valid values are :
# - "hmp"
# - "qmp"
# - "qmp-pretty" (same as "qmp" with pretty json formatting)
#
# If set to the empty string "", no extra monitor socket is added. This is
# the default.
#extra_monitor_socket = "hmp"
#
# Default entropy source.
# The path to a host source of entropy (including a real hardware RNG)
# /dev/urandom and /dev/random are two main options.
# Be aware that /dev/random is a blocking source of entropy. If the host
# runs out of entropy, the VMs boot time will increase leading to get startup
# timeouts.
# The source of entropy /dev/urandom is non-blocking and provides a
# generally acceptable source of entropy. It should work well for pretty much
# all practical purposes.
entropy_source = "@DEFENTROPYSOURCE@"
# List of valid annotations values for entropy_source
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDENTROPYSOURCES@
valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
# Path to OCI hook binaries in the *guest rootfs*.
# This does not affect host-side hooks which must instead be added to
# the OCI spec passed to the runtime.
#
# You can create a rootfs with hooks by customizing the osbuilder scripts:
# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder
#
# Hooks must be stored in a subdirectory of guest_hook_path according to their
# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}".
# The agent will scan these directories for executable files and add them, in
# lexicographical order, to the lifecycle of the guest container.
# Hooks are executed in the runtime namespace of the guest. See the official documentation:
# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
# Warnings will be logged if any error is encountered while scanning for hooks,
# but it will not abort container execution.
# Recommended value when enabling: "/usr/share/oci/hooks"
guest_hook_path = ""
#
# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
# Default 0-sized value means unlimited rate.
rx_rate_limiter_max_rate = 0
# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
# to discipline traffic.
# Default 0-sized value means unlimited rate.
tx_rate_limiter_max_rate = 0
# Set where to save the guest memory dump file.
# If set, when GUEST_PANICKED event occurred,
# guest memeory will be dumped to host filesystem under guest_memory_dump_path,
# This directory will be created automatically if it does not exist.
#
# The dumped file(also called vmcore) can be processed with crash or gdb.
#
# WARNING:
# Dump guest's memory can take very long depending on the amount of guest memory
# and use much disk space.
# Recommended value when enabling: "/var/crash/kata"
guest_memory_dump_path = ""
# If enable paging.
# Basically, if you want to use "gdb" rather than "crash",
# or need the guest-virtual addresses in the ELF vmcore,
# then you should enable paging.
#
# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
guest_memory_dump_paging = false
# Enable swap in the guest. Default false.
# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness")
# is bigger than 0.
# The size of the swap device should be
# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
# If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
# If swap_in_bytes and memory_limit_in_bytes is not set, the size should
# be default_memory.
enable_guest_swap = false
# use legacy serial for guest console if available and implemented for architecture. Default false
use_legacy_serial = false
# disable applying SELinux on the VMM process (default false)
disable_selinux = @DEFDISABLESELINUX@
# disable applying SELinux on the container process
# If set to false, the type `container_t` is applied to the container process by default.
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
# with `SELINUX=yes`.
# (default: true)
disable_guest_selinux = @DEFDISABLEGUESTSELINUX@
[factory]
# VM templating support. Once enabled, new VMs are created from template
# using vm cloning. They will share the same initial kernel, initramfs and
# agent memory by mapping it readonly. It helps speeding up new container
# creation and saves a lot of memory if there are many kata containers running
# on the same host.
#
# When disabled, new VMs are created from scratch.
#
# Note: Requires "initrd=" to be set ("image=" is not supported).
#
# Default false
enable_template = false
# Specifies the path of template.
#
# Default "/run/vc/vm/template"
template_path = "/run/vc/vm/template"
# The number of caches of VMCache:
# unspecified or == 0 --> VMCache is disabled
# > 0 --> will be set to the specified number
#
# VMCache is a function that creates VMs as caches before using it.
# It helps speed up new container creation.
# The function consists of a server and some clients communicating
# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto.
# The VMCache server will create some VMs and cache them by factory cache.
# It will convert the VM to gRPC format and transport it when gets
# requestion from clients.
# Factory grpccache is the VMCache client. It will request gRPC format
# VM and convert it back to a VM. If VMCache function is enabled,
# kata-runtime will request VM from factory grpccache when it creates
# a new sandbox.
#
# Default 0
vm_cache_number = 0
# Specify the address of the Unix socket that is used by VMCache.
#
# Default /var/run/kata-containers/cache.sock
vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
[agent.@PROJECT_TYPE@]
# If enabled, make the agent display debug-level messages.
# (default: disabled)
enable_debug = false
# Enable agent tracing.
#
# If enabled, the agent will generate OpenTelemetry trace spans.
#
# Notes:
#
# - If the runtime also has tracing enabled, the agent spans will be
# associated with the appropriate runtime parent span.
# - If enabled, the runtime will wait for the container to shutdown,
# increasing the container shutdown time slightly.
#
# (default: disabled)
enable_tracing = false
# Comma separated list of kernel modules and their parameters.
# These modules will be loaded in the guest kernel using modprobe(8).
# The following example can be used to load two kernel modules with parameters
# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"]
# The first word is considered as the module name and the rest as its parameters.
# Container will not be started when:
# * A kernel module is specified and the modprobe command is not installed in the guest
# or it fails loading the module.
# * The module is not available in the guest or it doesn't met the guest kernel
# requirements, like architecture and version.
#
kernel_modules = []
# Enable debug console.
# If enabled, user can connect guest OS running inside hypervisor
# through "kata-runtime exec <sandbox-id>" command
debug_console_enabled = false
# Agent dial timeout in millisecond.
# (default: 10)
dial_timeout_ms = 10
# Agent reconnect timeout in millisecond.
# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300)
# If you find pod cannot connect to the agent when starting, please
# consider increasing this value to increase the retry times.
# You'd better not change the value of dial_timeout_ms, unless you have an
# idea of what you are doing.
# (default: 3000)
reconnect_timeout_ms = 3000
# Create Container Request Timeout
# This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
# It's also used to ensure that workloads, especially those involving large image pulls within the guest,
# have sufficient time to complete.
#
# Effective Timeout Determination:
# The effective timeout for a CreateContainerRequest is determined by taking the minimum of the following two values:
# - create_container_timeout: The timeout value configured for creating containers (default: 30,000 milliseconds).
# - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
# Defaults to @DEFCREATECONTAINERTIMEOUT_COCO@ second(s)
create_container_timeout = @DEFCREATECONTAINERTIMEOUT_COCO@
[runtime]
# If enabled, the runtime will log additional debug messages to the
# system log
# (default: disabled)
enable_debug = false
#
# Internetworking model
# Determines how the VM should be connected to the
# the container network interface
# Options:
#
# - macvtap
# Used when the Container network interface can be bridged using
# macvtap.
#
# - none
# Used when customize network. Only creates a tap device. No veth pair.
#
# - tcfilter
# Uses tc filter rules to redirect traffic from the network interface
# provided by plugin to a tap interface connected to the VM.
#
internetworking_model="@DEFNETWORKMODEL_QEMU@"
name="@RUNTIMENAME@"
hypervisor_name="@HYPERVISOR_QEMU@"
agent_name="@PROJECT_TYPE@"
# disable guest seccomp
# Determines whether container seccomp profiles are passed to the virtual
# machine and applied by the kata agent. If set to true, seccomp is not applied
# within the guest
# (default: true)
disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@
# vCPUs pinning settings
# if enabled, each vCPU thread will be scheduled to a fixed CPU
# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
enable_vcpus_pinning = false
# Apply a custom SELinux security policy to the container process inside the VM.
# This is used when you want to apply a type other than the default `container_t`,
# so general users should not uncomment and apply it.
# (format: "user:role:type")
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
# categories are determined automatically by high-level container runtimes such as containerd.
guest_selinux_label = "@DEFGUESTSELINUXLABEL@"
# If enabled, the runtime will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started).
# (default: disabled)
enable_tracing = false
# Set the full url to the Jaeger HTTP Thrift collector.
# The default if not set will be "http://localhost:14268/api/traces"
jaeger_endpoint = ""
# Sets the username to be used if basic auth is required for Jaeger.
jaeger_user = ""
# Sets the password to be used if basic auth is required for Jaeger.
jaeger_password = ""
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only
# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
# (like OVS) directly.
# (default: false)
disable_new_netns = false
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
# The container cgroups in the host are not created, just one single cgroup per sandbox.
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
# The sandbox cgroup is constrained if there is no container type annotation.
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_QEMU@
# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
# Compatibility for determining appropriate sandbox (VM) size:
# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
# does not yet support sandbox sizing annotations.
# - When running single containers using a tool like ctr, container sizing information will be available.
static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_COCO@
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
# These will not be exposed to the container workloads, and are only provided for potential guest services.
sandbox_bind_mounts = @DEFBINDMOUNTS@
# VFIO Mode
# Determines how VFIO devices should be be presented to the container.
# Options:
#
# - vfio
# Matches behaviour of OCI runtimes (e.g. runc) as much as
# possible. VFIO devices will appear in the container as VFIO
# character devices under /dev/vfio. The exact names may differ
# from the host (they need to match the VM's IOMMU group numbers
# rather than the host's)
#
# - guest-kernel
# This is a Kata-specific behaviour that's useful in certain cases.
# The VFIO device is managed by whatever driver in the VM kernel
# claims it. This means it will appear as one or more device nodes
# or network interfaces depending on the nature of the device.
# Using this mode requires specially built workloads that know how
# to locate the relevant device interfaces within the VM.
#
vfio_mode = "@DEFVFIOMODE@"
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
# Enabled experimental feature list, format: ["a", "b"].
# Experimental features are features not stable enough for production,
# they may break compatibility, and are prepared for a big version bump.
# Supported experimental features:
# for example:
# experimental=["force_guest_pull"]
# which is for enable force_guest_pull mode in CoCo scenarios.
# (default: [])
experimental = @DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
enable_pprof = false
# Base directory of directly attachable network config.
# Network devices for VM-based containers are allowed to be placed in the
# host netns to eliminate as many hops as possible, which is what we
# called a "Directly Attachable Network". The config, set by special CNI
# plugins, is used to tell the Kata containers what devices are attached
# to the hypervisor.
# (default: /run/kata-containers/dans)
dan_conf = "@DEFDANCONF@"
# pod_resource_api_sock specifies the unix socket for the Kubelet's
# PodResource API endpoint. If empty, kubernetes based cold plug
# will not be attempted. In order for this feature to work, the
# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
# if using Kubelet older than 1.34.
#
# The pod resource API's socket is relative to the Kubelet's root-dir,
# which is defined by the cluster admin, and its location is:
# ${KubeletRootDir}/pod-resources/kubelet.sock
#
# cold_plug_vfio(see hypervisor config) acts as a feature gate:
# cold_plug_vfio = no_port (default) => no cold plug
# cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
# explicit CDI annotation for cold plug (applies mainly
# to non-k8s cases)
# cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
# based cold plug.
pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"

View File

@@ -1,746 +0,0 @@
# Copyright (c) 2017-2019 Intel Corporation
# Copyright (c) 2021 Adobe Inc.
# Copyright (c) 2025-2026 Ant Group
#
# SPDX-License-Identifier: Apache-2.0
#
# XXX: WARNING: this file is auto-generated.
# XXX:
# XXX: Source file: "@CONFIG_QEMU_IN@"
# XXX: Project:
# XXX: Name: @PROJECT_NAME@
# XXX: Type: @PROJECT_TYPE@
[hypervisor.qemu]
path = "@QEMUPATH@"
kernel = "@KERNELPATH_COCO@"
image = "@IMAGECONFIDENTIALPATH@"
# initrd = "@INITRDPATH@"
machine_type = "@MACHINETYPE@"
tdx_quote_generation_service_socket_port = @QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT@
# rootfs filesystem type:
# - ext4 (default)
# - xfs
# - erofs
rootfs_type = @DEFROOTFSTYPE@
# Block storage driver to be used for the VM rootfs is backed
# by a block device. This is virtio-blk-pci, virtio-blk-mmio or nvdimm
vm_rootfs_driver = "virtio-blk-pci"
# Enable confidential guest support.
# Toggling that setting may trigger different hardware features, ranging
# from memory encryption to both memory and CPU-state encryption and integrity.
# The Kata Containers runtime dynamically detects the available feature set and
# aims at enabling the largest possible one, returning an error if none is
# available, or none is supported by the hypervisor.
#
# Known limitations:
# * Does not work by design:
# - CPU Hotplug
# - Memory Hotplug
# - NVDIMM devices
#
# Default false
confidential_guest = true
# Enable running QEMU VMM as a non-root user.
# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
# a non-root random user. See documentation for the limitations of this mode.
rootless = false
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
enable_annotations = @DEFENABLEANNOTATIONS_COCO@
# List of valid annotations values for the hypervisor
# Each member of the list is a path pattern as described by glob(3).
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@
valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@
# Optional space-separated list of options to pass to the guest kernel.
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
# trouble running pre-2.15 glibc.
#
# WARNING: - any parameter specified here will take priority over the default
# parameter value of the same name used to start the virtual machine.
# Do not set values here unless you understand the impact of doing so as you
# may stop the virtual machine from booting.
# To see the list of default parameters, enable hypervisor debug, create a
# container and look for 'default-kernel-parameters' log entries.
kernel_params = "@KERNELTDXPARAMS@"
# Path to the firmware.
# If you want that qemu uses the default firmware leave this option empty
firmware = "@FIRMWARETDXPATH@"
# Path to the firmware volume.
# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables
# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables
# can be customized per each user while UEFI code is kept same.
firmware_volume = "@FIRMWAREVOLUMEPATH@"
# Machine accelerators
# comma-separated list of machine accelerators to pass to the hypervisor.
# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
machine_accelerators = "@MACHINEACCELERATORS@"
# Qemu seccomp sandbox feature
# comma-separated list of seccomp sandbox features to control the syscall access.
# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"`
# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
# Another note: enabling this feature may reduce performance, you may enable
# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
# Recommended value when enabling: "on,obsolete=deny,spawn=deny,resourcecontrol=deny"
seccompsandbox = "@DEFSECCOMPSANDBOXPARAM@"
# CPU features
# comma-separated list of cpu features to pass to the cpu
# For example, `cpu_features = "pmu=off,vmx=off"
cpu_features = "@CPUFEATURES@"
# Default number of vCPUs per SB/VM:
# unspecified or 0 --> will be set to @DEFVCPUS@
# < 0 --> will be set to the actual number of physical cores
# > 0 <= number of physical cores --> will be set to the specified number
# > number of physical cores --> will be set to the actual number of physical cores
default_vcpus = 1
# Default maximum number of vCPUs per SB/VM:
# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number
# of vCPUs supported by KVM if that number is exceeded
# > 0 <= number of physical cores --> will be set to the specified number
# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number
# of vCPUs supported by KVM if that number is exceeded
# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when
# the actual number of physical cores is greater than it.
# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU
# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs
# can be added to a SB/VM, but the memory footprint will be big. Another example, with
# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of
# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
# unless you know what are you doing.
# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8.
default_maxvcpus = @DEFMAXVCPUS@
# Bridges can be used to hot plug devices.
# Limitations:
# * Currently only pci bridges are supported
# * Until 30 devices per bridge can be hot plugged.
# * Until 5 PCI bridges can be cold plugged per VM.
# This limitation could be a bug in qemu or in the kernel
# Default number of bridges per SB/VM:
# unspecified or 0 --> will be set to @DEFBRIDGES@
# > 1 <= 5 --> will be set to the specified number
# > 5 --> will be set to 5
default_bridges = @DEFBRIDGES@
# Default memory size in MiB for SB/VM.
# If unspecified then it will be set @DEFMEMSZ@ MiB.
default_memory = @DEFMEMSZ@
#
# Default memory slots per SB/VM.
# If unspecified then it will be set @DEFMEMSLOTS@.
# This is will determine the times that memory will be hotadded to sandbox/VM.
memory_slots = @DEFMEMSLOTS@
# Default maximum memory in MiB per SB / VM
# unspecified or == 0 --> will be set to the actual amount of physical RAM
# > 0 <= amount of physical RAM --> will be set to the specified number
# > amount of physical RAM --> will be set to the actual amount of physical RAM
default_maxmemory = @DEFMAXMEMSZ@
# The size in MiB will be plused to max memory of hypervisor.
# It is the memory address space for the NVDIMM device.
# If set block storage driver (block_device_driver) to "nvdimm",
# should set memory_offset to the size of block device.
# Default 0
memory_offset = 0
# Specifies virtio-mem will be enabled or not.
# Please note that this option should be used with the command
# "echo 1 > /proc/sys/vm/overcommit_memory".
# Default false
enable_virtio_mem = false
# Disable block device from being used for a container's rootfs.
# In case of a storage driver like devicemapper where a container's
# root file system is backed by a block device, the block device is passed
# directly to the hypervisor for performance reasons.
# This flag prevents the block device from being passed to the hypervisor,
# virtio-fs is used instead to pass the rootfs.
disable_block_device_use = @DEFDISABLEBLOCK@
# Shared file system type:
# - virtio-fs (default)
# - virtio-fs-nydus
# - none
shared_fs = "@DEFSHAREDFS_QEMU_TDX_VIRTIOFS@"
# Path to vhost-user-fs daemon.
virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@"
# List of valid annotations values for the virtiofs daemon
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@
valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@
# Default size of DAX cache in MiB
virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
# Default size of virtqueues
virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@
# Extra args for virtiofsd daemon
#
# Format example:
# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
# Examples:
# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"]
#
# see `virtiofsd -h` for possible options.
virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
# Cache mode:
#
# - never
# Metadata, data, and pathname lookup are not cached in guest. They are
# always fetched from host and any changes are immediately pushed to host.
#
# - metadata
# Metadata and pathname lookup are cached in guest and never expire.
# Data is never cached in guest.
#
# - auto
# Metadata and pathname lookup cache expires after a configured amount of
# time (default is 1 second). Data is cached while the file is open (close
# to open consistency).
#
# - always
# Metadata, data, and pathname lookup are cached in guest and never expire.
virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
# Block storage driver to be used for the hypervisor in case the container
# rootfs is backed by a block device. This is virtio-scsi, virtio-blk
# or nvdimm.
block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@"
# aio is the I/O mechanism used by qemu
# Options:
#
# - threads
# Pthread based disk I/O.
#
# - native
# Native Linux I/O.
#
# - io_uring
# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and
# qemu >=5.0.
block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"
# Specifies cache-related options will be set to block devices or not.
# Default false
block_device_cache_set = false
# Specifies cache-related options for block devices.
# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
# Default false
block_device_cache_direct = false
# Specifies cache-related options for block devices.
# Denotes whether flush requests for the device are ignored.
# Default false
block_device_cache_noflush = false
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently implemented
# for virtio-scsi and virtio-blk.
#
enable_iothreads = @DEFENABLEIOTHREADS@
# Independent IOThreads enables IO to be processed in a separate thread, it is
# for QEMU hotplug device attach to iothread, like virtio-blk.
indep_iothreads = @DEFINDEPIOTHREADS@
# Enable pre allocation of VM RAM, default false
# Enabling this will result in lower container density
# as all of the memory will be allocated and locked
# This is useful when you want to reserve all the memory
# upfront or in the cases where you want memory latencies
# to be very predictable
# Default false
enable_mem_prealloc = false
# Reclaim guest freed memory.
# Enabling this will result in the VM balloon device having f_reporting=on set.
# Then the hypervisor will use it to reclaim guest freed memory.
# This is useful for reducing the amount of memory used by a VM.
# Enabling this feature may sometimes reduce the speed of memory access in
# the VM.
#
# Default false
reclaim_guest_freed_memory = false
# Enable huge pages for VM RAM, default false
# Enabling this will result in the VM memory
# being allocated using huge pages.
# This is useful when you want to use vhost-user network
# stacks within the container. This will automatically
# result in memory pre allocation
enable_hugepages = false
# Enable vhost-user storage device, default false
# Enabling this will result in some Linux reserved block type
# major range 240-254 being chosen to represent vhost-user devices.
enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@
# The base directory specifically used for vhost-user devices.
# Its sub-path "block" is used for block devices; "block/sockets" is
# where we expect vhost-user sockets to live; "block/devices" is where
# simulated block device nodes for vhost-user devices to live.
vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
# Enable vIOMMU, default false
# Enabling this will result in the VM having a vIOMMU device
# This will also add the following options to the kernel's
# command line: intel_iommu=on,iommu=pt
enable_iommu = false
# Enable IOMMU_PLATFORM, default false
# Enabling this will result in the VM device having iommu_platform=on set
enable_iommu_platform = false
# List of valid annotations values for the vhost user store path
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@
valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@
# The timeout for reconnecting on non-server spdk sockets when the remote end goes away.
# qemu will delay this many seconds and then attempt to reconnect.
# Zero disables reconnecting, and the default is zero.
vhost_user_reconnect_timeout_sec = 0
# Enable file based guest memory support. The default is an empty string which
# will disable this feature. In the case of virtio-fs, this is enabled
# automatically and '/dev/shm' is used as the backing folder.
# This option will be ignored if VM templating is enabled.
file_mem_backend = "@DEFFILEMEMBACKEND@"
# List of valid annotations values for the file_mem_backend annotation
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@
valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@
# -pflash can add image file to VM. The arguments of it should be in format
# of ["/path/to/flash0.img", "/path/to/flash1.img"]
pflashes = []
# This option changes the default hypervisor and kernel parameters
# to enable debug output where available. And Debug also enable the hmp socket.
#
# Default false
enable_debug = false
# This option allows to add an extra HMP or QMP socket when `enable_debug = true`
#
# WARNING: Anyone with access to the extra socket can take full control of
# Qemu. This is for debugging purpose only and must *NEVER* be used in
# production.
#
# Valid values are :
# - "hmp"
# - "qmp"
# - "qmp-pretty" (same as "qmp" with pretty json formatting)
#
# If set to the empty string "", no extra monitor socket is added. This is
# the default.
extra_monitor_socket = ""
# Disable the customizations done in the runtime when it detects
# that it is running on top a VMM. This will result in the runtime
# behaving as it would when running on bare metal.
#
disable_nesting_checks = false
# If false and nvdimm is supported, use nvdimm device to plug guest image.
# Otherwise virtio-block device is used.
#
# nvdimm is not supported when `confidential_guest = true`.
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
# The value means the number of pcie_root_port
# Default 0
pcie_root_port = 0
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
# security (vhost-net runs ring0) for network I/O performance.
disable_vhost_net = false
#
# Default entropy source.
# The path to a host source of entropy (including a real hardware RNG)
# /dev/urandom and /dev/random are two main options.
# Be aware that /dev/random is a blocking source of entropy. If the host
# runs out of entropy, the VMs boot time will increase leading to get startup
# timeouts.
# The source of entropy /dev/urandom is non-blocking and provides a
# generally acceptable source of entropy. It should work well for pretty much
# all practical purposes.
entropy_source = "@DEFENTROPYSOURCE@"
# List of valid annotations values for entropy_source
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDENTROPYSOURCES@
valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
# Path to OCI hook binaries in the *guest rootfs*.
# This does not affect host-side hooks which must instead be added to
# the OCI spec passed to the runtime.
#
# You can create a rootfs with hooks by customizing the osbuilder scripts:
# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder
#
# Hooks must be stored in a subdirectory of guest_hook_path according to their
# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}".
# The agent will scan these directories for executable files and add them, in
# lexicographical order, to the lifecycle of the guest container.
# Hooks are executed in the runtime namespace of the guest. See the official documentation:
# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
# Warnings will be logged if any error is encountered while scanning for hooks,
# but it will not abort container execution.
# Recommended value when enabling: "/usr/share/oci/hooks"
guest_hook_path = ""
#
# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
# Default 0-sized value means unlimited rate.
rx_rate_limiter_max_rate = 0
# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
# to discipline traffic.
# Default 0-sized value means unlimited rate.
tx_rate_limiter_max_rate = 0
# Set where to save the guest memory dump file.
# If set, when GUEST_PANICKED event occurred,
# guest memeory will be dumped to host filesystem under guest_memory_dump_path,
# This directory will be created automatically if it does not exist.
#
# The dumped file(also called vmcore) can be processed with crash or gdb.
#
# WARNING:
# Dump guest's memory can take very long depending on the amount of guest memory
# and use much disk space.
# Recommended value when enabling: "/var/crash/kata"
guest_memory_dump_path = ""
# If enable paging.
# Basically, if you want to use "gdb" rather than "crash",
# or need the guest-virtual addresses in the ELF vmcore,
# then you should enable paging.
#
# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
guest_memory_dump_paging = false
# Enable swap in the guest. Default false.
# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness")
# is bigger than 0.
# The size of the swap device should be
# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
# If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
# If swap_in_bytes and memory_limit_in_bytes is not set, the size should
# be default_memory.
enable_guest_swap = false
# use legacy serial for guest console if available and implemented for architecture. Default false
use_legacy_serial = false
# disable applying SELinux on the VMM process (default false)
disable_selinux = @DEFDISABLESELINUX@
# disable applying SELinux on the container process
# If set to false, the type `container_t` is applied to the container process by default.
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
# with `SELINUX=yes`.
# (default: true)
disable_guest_selinux = @DEFDISABLEGUESTSELINUX@
[factory]
# VM templating support. Once enabled, new VMs are created from template
# using vm cloning. They will share the same initial kernel, initramfs and
# agent memory by mapping it readonly. It helps speeding up new container
# creation and saves a lot of memory if there are many kata containers running
# on the same host.
#
# When disabled, new VMs are created from scratch.
#
# Note: Requires "initrd=" to be set ("image=" is not supported).
#
# Default false
enable_template = false
# Specifies the path of template.
#
# Default "/run/vc/vm/template"
template_path = "/run/vc/vm/template"
# The number of caches of VMCache:
# unspecified or == 0 --> VMCache is disabled
# > 0 --> will be set to the specified number
#
# VMCache is a function that creates VMs as caches before using it.
# It helps speed up new container creation.
# The function consists of a server and some clients communicating
# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto.
# The VMCache server will create some VMs and cache them by factory cache.
# It will convert the VM to gRPC format and transport it when gets
# requestion from clients.
# Factory grpccache is the VMCache client. It will request gRPC format
# VM and convert it back to a VM. If VMCache function is enabled,
# kata-runtime will request VM from factory grpccache when it creates
# a new sandbox.
#
# Default 0
vm_cache_number = 0
# Specify the address of the Unix socket that is used by VMCache.
#
# Default /var/run/kata-containers/cache.sock
vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
[agent.@PROJECT_TYPE@]
# If enabled, make the agent display debug-level messages.
# (default: disabled)
enable_debug = false
# Enable agent tracing.
#
# If enabled, the agent will generate OpenTelemetry trace spans.
#
# Notes:
#
# - If the runtime also has tracing enabled, the agent spans will be
# associated with the appropriate runtime parent span.
# - If enabled, the runtime will wait for the container to shutdown,
# increasing the container shutdown time slightly.
#
# (default: disabled)
enable_tracing = false
# Comma separated list of kernel modules and their parameters.
# These modules will be loaded in the guest kernel using modprobe(8).
# The following example can be used to load two kernel modules with parameters
# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"]
# The first word is considered as the module name and the rest as its parameters.
# Container will not be started when:
# * A kernel module is specified and the modprobe command is not installed in the guest
# or it fails loading the module.
# * The module is not available in the guest or it doesn't met the guest kernel
# requirements, like architecture and version.
#
kernel_modules = []
# Enable debug console.
# If enabled, user can connect guest OS running inside hypervisor
# through "kata-runtime exec <sandbox-id>" command
debug_console_enabled = false
# Agent dial timeout in millisecond.
# (default: 10)
dial_timeout_ms = 10
# Agent reconnect timeout in millisecond.
# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300)
# If you find pod cannot connect to the agent when starting, please
# consider increasing this value to increase the retry times.
# You'd better not change the value of dial_timeout_ms, unless you have an
# idea of what you are doing.
# (default: 3000)
reconnect_timeout_ms = 3000
# Create Container Request Timeout
# This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
# It's also used to ensure that workloads, especially those involving large image pulls within the guest,
# have sufficient time to complete.
#
# Effective Timeout Determination:
# The effective timeout for a CreateContainerRequest is determined by taking the minimum of the following two values:
# - create_container_timeout: The timeout value configured for creating containers (default: 30,000 milliseconds).
# - runtime-request-timeout: The timeout value specified in the Kubelet configuration described as the link below:
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout)
# Defaults to @DEFCREATECONTAINERTIMEOUT_COCO@ second(s)
create_container_timeout = @DEFCREATECONTAINERTIMEOUT_COCO@
[runtime]
# If enabled, the runtime will log additional debug messages to the
# system log
# (default: disabled)
enable_debug = false
#
# Internetworking model
# Determines how the VM should be connected to the
# the container network interface
# Options:
#
# - macvtap
# Used when the Container network interface can be bridged using
# macvtap.
#
# - none
# Used when customize network. Only creates a tap device. No veth pair.
#
# - tcfilter
# Uses tc filter rules to redirect traffic from the network interface
# provided by plugin to a tap interface connected to the VM.
#
internetworking_model = "@DEFNETWORKMODEL_QEMU@"
name="@RUNTIMENAME@"
hypervisor_name="@HYPERVISOR_QEMU@"
agent_name="@PROJECT_TYPE@"
# disable guest seccomp
# Determines whether container seccomp profiles are passed to the virtual
# machine and applied by the kata agent. If set to true, seccomp is not applied
# within the guest
# (default: true)
disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@
# vCPUs pinning settings
# if enabled, each vCPU thread will be scheduled to a fixed CPU
# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
enable_vcpus_pinning = false
# Apply a custom SELinux security policy to the container process inside the VM.
# This is used when you want to apply a type other than the default `container_t`,
# so general users should not uncomment and apply it.
# (format: "user:role:type")
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
# categories are determined automatically by high-level container runtimes such as containerd.
# Example value when enabling: "system_u:system_r:container_t"
guest_selinux_label = "@DEFGUESTSELINUXLABEL@"
# If enabled, the runtime will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started).
# (default: disabled)
enable_tracing = false
# Set the full url to the Jaeger HTTP Thrift collector.
# The default if not set will be "http://localhost:14268/api/traces"
jaeger_endpoint = ""
# Sets the username to be used if basic auth is required for Jaeger.
jaeger_user = ""
# Sets the password to be used if basic auth is required for Jaeger.
jaeger_password = ""
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only
# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
# (like OVS) directly.
# (default: false)
disable_new_netns = false
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
# The container cgroups in the host are not created, just one single cgroup per sandbox.
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
# The sandbox cgroup is constrained if there is no container type annotation.
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY_QEMU@
# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
# Compatibility for determining appropriate sandbox (VM) size:
# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
# does not yet support sandbox sizing annotations.
# - When running single containers using a tool like ctr, container sizing information will be available.
static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_COCO@
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
# These will not be exposed to the container workloads, and are only provided for potential guest services.
sandbox_bind_mounts = @DEFBINDMOUNTS@
# VFIO Mode
# Determines how VFIO devices should be be presented to the container.
# Options:
#
# - vfio
# Matches behaviour of OCI runtimes (e.g. runc) as much as
# possible. VFIO devices will appear in the container as VFIO
# character devices under /dev/vfio. The exact names may differ
# from the host (they need to match the VM's IOMMU group numbers
# rather than the host's)
#
# - guest-kernel
# This is a Kata-specific behaviour that's useful in certain cases.
# The VFIO device is managed by whatever driver in the VM kernel
# claims it. This means it will appear as one or more device nodes
# or network interfaces depending on the nature of the device.
# Using this mode requires specially built workloads that know how
# to locate the relevant device interfaces within the VM.
#
vfio_mode = "@DEFVFIOMODE@"
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@
# Enabled experimental feature list, format: ["a", "b"].
# Experimental features are features not stable enough for production,
# they may break compatibility, and are prepared for a big version bump.
# Supported experimental features:
# for example:
# experimental=["force_guest_pull"]
# which is for enable force_guest_pull mode in CoCo scenarios.
# (default: [])
experimental = @DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
enable_pprof = false
# Base directory of directly attachable network config.
# Network devices for VM-based containers are allowed to be placed in the
# host netns to eliminate as many hops as possible, which is what we
# called a "Directly Attachable Network". The config, set by special CNI
# plugins, is used to tell the Kata containers what devices are attached
# to the hypervisor.
# (default: /run/kata-containers/dans)
dan_conf = "@DEFDANCONF@"
# pod_resource_api_sock specifies the unix socket for the Kubelet's
# PodResource API endpoint. If empty, kubernetes based cold plug
# will not be attempted. In order for this feature to work, the
# KubeletPodResourcesGet featureGate must be enabled in Kubelet,
# if using Kubelet older than 1.34.
#
# The pod resource API's socket is relative to the Kubelet's root-dir,
# which is defined by the cluster admin, and its location is:
# ${KubeletRootDir}/pod-resources/kubelet.sock
#
# cold_plug_vfio(see hypervisor config) acts as a feature gate:
# cold_plug_vfio = no_port (default) => no cold plug
# cold_plug_vfio != no_port AND pod_resource_api_sock = "" => need
# explicit CDI annotation for cold plug (applies mainly
# to non-k8s cases)
# cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
# based cold plug.
pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"

View File

@@ -33,9 +33,9 @@ oci-spec = { workspace = true }
futures = "0.3.25"
safe-path = "0.1.0"
crossbeam-channel = "0.5.6"
qapi = { version = "0.15", features = ["qmp", "async-tokio-all"] }
qapi-spec = "0.3.2"
qapi-qmp = "0.15.0"
qapi = { version = "0.14", features = ["qmp", "async-tokio-all"] }
qapi-spec = "0.3.1"
qapi-qmp = "0.14.0"
hyperlocal = { workspace = true }
hyper = { workspace = true, features = ["client"] }

View File

@@ -29,7 +29,6 @@ use kata_types::{
};
use nix::unistd::{setgid, setuid, Gid, Uid};
use persist::sandbox_persist::Persist;
use qapi_qmp::MigrationStatus;
use std::cmp::Ordering;
use std::convert::TryInto;
use std::path::Path;
@@ -37,7 +36,6 @@ use std::process::Stdio;
use std::time::Duration;
use tokio::time::sleep;
use tokio::time::Instant;
use tokio::{
io::{AsyncBufReadExt, BufReader},
process::{Child, ChildStderr, Command},
@@ -333,70 +331,16 @@ impl QemuInner {
}
pub async fn wait_for_migration(&mut self) -> Result<()> {
// Ensure QMP is connected.
if self.qmp.is_none() {
return Err(anyhow!("QMP is not connected"));
}
let qmp = self
.qmp
.as_mut()
.context("failed to get QMP connection for boot from template")?;
// Helper to migrate_completed migration state from `query-migrate`.
let migrate_completed = |st: Option<MigrationStatus>| -> Result<bool> {
match st {
Some(MigrationStatus::completed) => Ok(true), // done
Some(MigrationStatus::failed) | Some(MigrationStatus::cancelled) => {
Err(anyhow!("migration ended early: {:?}", st))
}
_ => Ok(false), // still running / unknown
}
};
// If already finished, just return Ok(()).
let mi = qmp.execute_query_migrate().await?;
match migrate_completed(mi.status) {
Ok(true) => return Ok(()),
Ok(false) => {
info!(sl!(), "migration not yet completed, entering wait loop");
}
Err(e) => return Err(e),
}
// Overall timeout for migration.
// Regarding why the timeout is set to 280ms and whether it should be adjusted, we need more empirical data.
// For now, we will keep using the previous configuration.
let timeout = Duration::from_millis(280);
// Polling interval: start small, then back off to reduce load.
let poll_interval = Duration::from_millis(20);
// Deadline with a timeout.
let deadline = Instant::now()
.checked_add(timeout)
.ok_or_else(|| anyhow!("timeout overflow"))?;
loop {
// Query migration status via QMP.
let mi = qmp.execute_query_migrate().await?;
match migrate_completed(mi.status) {
Ok(true) => return Ok(()),
Ok(false) => {
info!(sl!(), "migration still not completed, continuing wait loop");
}
Err(e) => return Err(e),
}
// Stop waiting once we hit the timeout.
let now = Instant::now();
if now >= deadline {
return Err(anyhow!("wait_for_migration timeout after {:?}", timeout));
}
// Sleep until next tick, but never beyond deadline
sleep(poll_interval.min(deadline - now)).await;
}
// The result format returned by QEMU version 9.1.2 does not match
// the expected format of the existing QAPI version 0.14.
// However, no issues were found when tested with QAPI version 0.15.
// Therefore, we will temporarily skip this issue.
sleep(Duration::from_millis(280)).await;
Ok(())
}
pub(crate) async fn stop_vm(&mut self) -> Result<()> {

View File

@@ -14,7 +14,7 @@ use kata_types::rootless::is_rootless;
use nix::sys::socket::{sendmsg, ControlMessage, MsgFlags};
use qapi_qmp::{
self as qmp, BlockdevAioOptions, BlockdevOptions, BlockdevOptionsBase,
BlockdevOptionsGenericFormat, BlockdevOptionsRaw, BlockdevRef, MigrationInfo, PciDeviceInfo,
BlockdevOptionsGenericFormat, BlockdevOptionsRaw, BlockdevRef, PciDeviceInfo,
};
use qapi_qmp::{migrate, migrate_incoming, migrate_set_capabilities};
use qapi_qmp::{MigrationCapability, MigrationCapabilityStatus};
@@ -98,27 +98,20 @@ impl Qmp {
pub fn execute_migration(&mut self, uri: &str) -> Result<()> {
self.qmp
.execute(&migrate {
channels: None,
detach: None,
resume: None,
uri: Some(uri.to_string()),
blk: None,
inc: None,
uri: uri.to_string(),
})
.map(|_| ())
.context("execute migration")
}
pub async fn execute_query_migrate(&mut self) -> Result<MigrationInfo> {
let migrate_info = self.qmp.execute(&qmp::query_migrate {})?;
Ok(migrate_info)
}
pub fn execute_migration_incoming(&mut self, uri: &str) -> Result<()> {
self.qmp
.execute(&migrate_incoming {
channels: None,
exit_on_error: None,
uri: Some(uri.to_string()),
uri: uri.to_string(),
})
.map(|_| ())
.context("execute migration incoming")
@@ -277,7 +270,6 @@ impl Qmp {
pmem: None,
readonly: None,
mem_path: "/dev/shm".to_owned(),
rom: None,
},
});
self.qmp.execute(&memory_backend)?;
@@ -842,6 +834,7 @@ impl Qmp {
| qmp::CpuInfoFast::mips64(cpu_info)
| qmp::CpuInfoFast::mips64el(cpu_info)
| qmp::CpuInfoFast::mipsel(cpu_info)
| qmp::CpuInfoFast::nios2(cpu_info)
| qmp::CpuInfoFast::or1k(cpu_info)
| qmp::CpuInfoFast::ppc(cpu_info)
| qmp::CpuInfoFast::ppc64(cpu_info)

View File

@@ -526,7 +526,7 @@ mod tests {
}
let tmp2 = TempDir::new().expect("create tmp2");
let tmp2_path = tmp2.keep();
let tmp2_path = tmp2.into_path();
let _ = fs::remove_dir_all(&tmp2_path);
let target2 = tmp2_path.join("foo").join("bar");

View File

@@ -24,7 +24,7 @@ libc = { workspace = true }
log = { workspace = true }
nix = { workspace = true }
protobuf = { workspace = true }
sha2 = { workspace = true }
sha2 = "=0.9.3"
slog = { workspace = true, features = [
"std",
"release_max_level_trace",

View File

@@ -234,7 +234,7 @@ DEFDISABLESELINUX := false
DEFDISABLEGUESTSELINUX := true
# Default is empty string "" to match the default golang (when commented out in config).
# Most users will want to set this to "system_u:system_r:container_t" for SELinux support.
DEFGUESTSELINUXLABEL :=
DEFGUESTSELINUXLABEL :=
#Default SeccomSandbox param
#The same default policy is used by libvirt
@@ -291,7 +291,6 @@ DEFSTATICRESOURCEMGMT_TEE = true
DEFSTATICRESOURCEMGMT_NV = true
DEFDISABLEIMAGENVDIMM ?= false
DEFDISABLEIMAGENVDIMM_NV = true
DEFBINDMOUNTS := []
@@ -477,11 +476,13 @@ ifneq (,$(QEMUCMD))
DEFAULTVFIOPORT_NV = root-port
DEFAULTPCIEROOTPORT_NV = 8
# Disable the devtmpfs mount in guest. NVRC does this, and later kata-agent
# attempts this as well in a non-failing manner. Otherwise, NVRC fails when
# using an image and /dev is already mounted.
KERNELPARAMS_NV = "cgroup_no_v1=all"
KERNELPARAMS_NV += "devtmpfs.mount=0"
KERNELPARAMS_NV = "agent.hotplug_timeout=20"
KERNELPARAMS_NV += "cgroup_no_v1=all"
KERNELTDXPARAMS_NV = $(KERNELPARAMS_NV)
KERNELTDXPARAMS_NV += "authorize_allow_devs=pci:ALL"
KERNELSNPPARAMS_NV = $(KERNELPARAMS_NV)
# Setting this to false can lead to cgroup leakages in the host
# Best practice for production is to set this to true
@@ -659,6 +660,8 @@ USER_VARS += DEFAULTMEMORY_NV
USER_VARS += DEFAULTVFIOPORT_NV
USER_VARS += DEFAULTPCIEROOTPORT_NV
USER_VARS += KERNELPARAMS_NV
USER_VARS += KERNELTDXPARAMS_NV
USER_VARS += KERNELSNPPARAMS_NV
USER_VARS += DEFAULTTIMEOUT_NV
USER_VARS += DEFSANDBOXCGROUPONLY_NV
USER_VARS += DEFROOTFSTYPE
@@ -785,7 +788,6 @@ USER_VARS += DEFVFIOMODE
USER_VARS += DEFVFIOMODE_SE
USER_VARS += BUILDFLAGS
USER_VARS += DEFDISABLEIMAGENVDIMM
USER_VARS += DEFDISABLEIMAGENVDIMM_NV
USER_VARS += DEFCCAMEASUREMENTALGO
USER_VARS += DEFSHAREDFS_QEMU_CCA_VIRTIOFS
USER_VARS += DEFPODRESOURCEAPISOCK

View File

@@ -90,7 +90,7 @@ snp_guest_policy = 196608
# may stop the virtual machine from booting.
# To see the list of default parameters, enable hypervisor debug, create a
# container and look for 'default-kernel-parameters' log entries.
kernel_params = "@KERNELPARAMS_NV@"
kernel_params = "@KERNELSNPPARAMS_NV@"
# Path to the firmware.
# If you want that qemu uses the default firmware leave this option empty
@@ -379,7 +379,7 @@ msize_9p = @DEFMSIZE9P@
# Otherwise virtio-block device is used.
#
# nvdimm is not supported when `confidential_guest = true`.
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_NV@
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU

View File

@@ -67,7 +67,7 @@ valid_hypervisor_paths = @QEMUTDXEXPERIMENTALVALIDHYPERVISORPATHS@
# may stop the virtual machine from booting.
# To see the list of default parameters, enable hypervisor debug, create a
# container and look for 'default-kernel-parameters' log entries.
kernel_params = "@KERNELPARAMS_NV@"
kernel_params = "@KERNELTDXPARAMS_NV@"
# Path to the firmware.
# If you want that qemu uses the default firmware leave this option empty
@@ -356,7 +356,7 @@ msize_9p = @DEFMSIZE9P@
# Otherwise virtio-block device is used.
#
# nvdimm is not supported when `confidential_guest = true`.
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_NV@
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU

View File

@@ -353,7 +353,7 @@ msize_9p = @DEFMSIZE9P@
# Otherwise virtio-block device is used.
#
# nvdimm is not supported when `confidential_guest = true`.
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_NV@
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
# Enable hot-plugging of VFIO devices to a bridge-port,
# root-port or switch-port.

View File

@@ -1415,13 +1415,6 @@ func (s *Sandbox) startVM(ctx context.Context, prestartHookFunc func(context.Con
if err != nil {
return err
}
// If we want the network, scan the netns again to update the network
// configuration after the prestart hooks have run.
if !s.config.NetworkConfig.DisableNewNetwork {
if _, err := s.network.AddEndpoints(ctx, s, nil, false); err != nil {
return err
}
}
}
if err := s.network.Run(ctx, func() error {
@@ -2552,18 +2545,9 @@ func (s *Sandbox) resourceControllerDelete() error {
return err
}
// When sandbox_cgroup_only is enabled, all Kata threads live in the
// sandbox controller and systemd can move tasks as part of unit deletion.
// In that mode, a systemd-formatted cgroup path is not a filesystem path,
// so MoveTo would fail with "invalid group path".
// Keep MoveTo for the case of using cgroupfs paths and for the
// non-sandbox_cgroup_only mode. In that mode, Kata may use an overhead
// cgroup in which case an explicit MoveTo is used to drain tasks.
if !(resCtrl.IsSystemdCgroup(s.state.SandboxCgroupPath) && s.config.SandboxCgroupOnly) {
resCtrlParent := sandboxController.Parent()
if err := sandboxController.MoveTo(resCtrlParent); err != nil {
return err
}
resCtrlParent := sandboxController.Parent()
if err := sandboxController.MoveTo(resCtrlParent); err != nil {
return err
}
if err := sandboxController.Delete(); err != nil {
@@ -2576,12 +2560,9 @@ func (s *Sandbox) resourceControllerDelete() error {
return err
}
// See comment at above MoveTo: Avoid this action as systemd moves tasks on unit deletion.
if !(resCtrl.IsSystemdCgroup(s.state.OverheadCgroupPath) && s.config.SandboxCgroupOnly) {
resCtrlParent := overheadController.Parent()
if err := s.overheadController.MoveTo(resCtrlParent); err != nil {
return err
}
resCtrlParent := overheadController.Parent()
if err := s.overheadController.MoveTo(resCtrlParent); err != nil {
return err
}
if err := overheadController.Delete(); err != nil {

View File

@@ -54,7 +54,7 @@ version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"cipher",
"cpufeatures",
"zeroize",
@@ -246,12 +246,13 @@ dependencies = [
[[package]]
name = "async-compression"
version = "0.4.37"
version = "0.4.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40"
checksum = "93c1f86859c1af3d514fa19e8323147ff10ea98684e6c7b307912509f50e67b2"
dependencies = [
"compression-codecs",
"compression-core",
"futures-core",
"futures-io",
"pin-project-lite",
"tokio",
@@ -291,7 +292,7 @@ checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af"
dependencies = [
"async-lock 2.8.0",
"autocfg",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"concurrent-queue",
"futures-lite 1.13.0",
"log",
@@ -310,7 +311,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1237c0ae75a0f3765f58910ff9cdd0a12eeb39ab2f4c7de23262f337f0aacbb3"
dependencies = [
"async-lock 3.4.0",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"concurrent-queue",
"futures-io",
"futures-lite 2.0.0",
@@ -352,7 +353,7 @@ dependencies = [
"async-lock 2.8.0",
"async-signal",
"blocking",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"event-listener 3.1.0",
"futures-lite 1.13.0",
"rustix 0.38.34",
@@ -379,7 +380,7 @@ dependencies = [
"async-io 2.4.1",
"async-lock 3.4.0",
"atomic-waker",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"futures-core",
"futures-io",
"rustix 1.0.7",
@@ -388,6 +389,28 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "async-stream"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
dependencies = [
"async-stream-impl",
"futures-core",
"pin-project-lite",
]
[[package]]
name = "async-stream-impl"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.87",
]
[[package]]
name = "async-task"
version = "4.7.1"
@@ -396,9 +419,9 @@ checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de"
[[package]]
name = "async-trait"
version = "0.1.89"
version = "0.1.88"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5"
dependencies = [
"proc-macro2",
"quote",
@@ -464,10 +487,11 @@ dependencies = [
[[package]]
name = "axum"
version = "0.8.8"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
dependencies = [
"async-trait",
"axum-core",
"bytes 1.7.2",
"futures-util",
@@ -480,26 +504,29 @@ dependencies = [
"mime",
"percent-encoding",
"pin-project-lite",
"serde_core",
"rustversion",
"serde",
"sync_wrapper",
"tower",
"tower 0.5.2",
"tower-layer",
"tower-service",
]
[[package]]
name = "axum-core"
version = "0.5.6"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
dependencies = [
"async-trait",
"bytes 1.7.2",
"futures-core",
"futures-util",
"http 1.1.0",
"http-body 1.0.1",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper",
"tower-layer",
"tower-service",
@@ -512,7 +539,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
dependencies = [
"addr2line",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"libc",
"miniz_oxide",
"object",
@@ -838,7 +865,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbdc32a78afc325d71a48d13084f1c3ddf67cc5dc06c6e5439a8630b14612cad"
dependencies = [
"bitflags 1.3.2",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"libc",
]
@@ -907,9 +934,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]]
name = "cfg-if"
version = "1.0.4"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
[[package]]
name = "cfg_aliases"
@@ -1055,9 +1082,9 @@ checksum = "2382f75942f4b3be3690fe4f86365e9c853c1587d6ee58212cebf6e2a9ccd101"
[[package]]
name = "compression-codecs"
version = "0.4.36"
version = "0.4.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a"
checksum = "680dc087785c5230f8e8843e2e57ac7c1c90488b6a91b88caa265410568f441b"
dependencies = [
"compression-core",
"flate2",
@@ -1068,9 +1095,9 @@ dependencies = [
[[package]]
name = "compression-core"
version = "0.4.31"
version = "0.4.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d"
checksum = "3a9b614a5787ef0c8802a55766480563cb3a93b435898c422ed2a359cf811582"
[[package]]
name = "concurrent-queue"
@@ -1138,7 +1165,7 @@ version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
]
[[package]]
@@ -1147,7 +1174,7 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6eb9105919ca8e40d437fc9cbb8f1975d916f1bd28afe795a48aae32a2cc8920"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-epoch",
@@ -1170,7 +1197,7 @@ version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"crossbeam-epoch",
"crossbeam-utils",
]
@@ -1182,7 +1209,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e3681d554572a651dda4186cd47240627c3d0114d45a95f6ad27f2f22e7548d"
dependencies = [
"autocfg",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"crossbeam-utils",
]
@@ -1192,7 +1219,7 @@ version = "0.3.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc6598521bb5a83d491e8c1fe51db7296019d2ca3cb93cc6c2a20369a4d78a2"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"crossbeam-utils",
]
@@ -1202,7 +1229,7 @@ version = "0.8.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3a430a770ebd84726f584a90ee7f020d28db52c6d02138900f22341f866d39c"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
]
[[package]]
@@ -1274,7 +1301,7 @@ version = "4.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"cpufeatures",
"curve25519-dalek-derive",
"digest 0.10.7",
@@ -1510,7 +1537,7 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"dirs-sys-next",
]
@@ -1827,7 +1854,7 @@ version = "0.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"libc",
"libredox",
"windows-sys 0.59.0",
@@ -1853,9 +1880,9 @@ checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec"
[[package]]
name = "flate2"
version = "1.1.8"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369"
checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
dependencies = [
"crc32fast",
"miniz_oxide",
@@ -2030,7 +2057,7 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"js-sys",
"libc",
"wasi",
@@ -2103,7 +2130,7 @@ dependencies = [
"futures-core",
"futures-sink",
"http 1.1.0",
"indexmap 2.13.0",
"indexmap 2.6.0",
"slab",
"tokio",
"tokio-util",
@@ -2121,9 +2148,9 @@ dependencies = [
[[package]]
name = "hashbrown"
version = "0.16.1"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
[[package]]
name = "heck"
@@ -2591,14 +2618,14 @@ dependencies = [
[[package]]
name = "image-rs"
version = "0.1.0"
source = "git+https://github.com/confidential-containers/guest-components?rev=026694d44d4ec483465d2fa5f80a0376166b174d#026694d44d4ec483465d2fa5f80a0376166b174d"
source = "git+https://github.com/confidential-containers/guest-components?rev=048ddaec4ecd6ee45c845d69bc39416908764560#048ddaec4ecd6ee45c845d69bc39416908764560"
dependencies = [
"anyhow",
"astral-tokio-tar",
"async-compression",
"async-trait",
"base64 0.22.1",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"filetime",
"flate2",
"futures",
@@ -2623,7 +2650,7 @@ dependencies = [
"thiserror 2.0.12",
"tokio",
"tokio-util",
"toml 0.9.11+spec-1.1.0",
"toml 0.8.23",
"tonic",
"url",
"walkdir",
@@ -2644,14 +2671,13 @@ dependencies = [
[[package]]
name = "indexmap"
version = "2.13.0"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
dependencies = [
"equivalent",
"hashbrown 0.16.1",
"hashbrown 0.15.2",
"serde",
"serde_core",
]
[[package]]
@@ -2692,7 +2718,7 @@ version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
]
[[package]]
@@ -2713,7 +2739,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013"
dependencies = [
"bitflags 2.6.0",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"libc",
]
@@ -2960,7 +2986,7 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"windows-targets 0.52.6",
]
@@ -3034,9 +3060,9 @@ dependencies = [
[[package]]
name = "log"
version = "0.4.29"
version = "0.4.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
[[package]]
name = "logging"
@@ -3065,9 +3091,9 @@ dependencies = [
[[package]]
name = "matchit"
version = "0.8.4"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
[[package]]
name = "md-5"
@@ -3075,7 +3101,7 @@ version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"digest 0.10.7",
]
@@ -3147,7 +3173,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
@@ -3168,7 +3193,7 @@ version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39a6bfcc6c8c7eed5ee98b9c3e33adc726054389233e201c95dab2d41a3839d2"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"downcast",
"fragile",
"mockall_derive",
@@ -3182,7 +3207,7 @@ version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25ca3004c2efe9011bd4e461bd8256445052b9615405b4f7ea43fc8ca5c20898"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"proc-macro2",
"quote",
"syn 2.0.87",
@@ -3208,7 +3233,7 @@ checksum = "8f3790c00a0150112de0f4cd161e3d7fc4b2d8a5542ffc35f099a2562aecb35c"
dependencies = [
"bitflags 1.3.2",
"cc",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"libc",
"memoffset 0.6.5",
]
@@ -3220,7 +3245,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069"
dependencies = [
"bitflags 1.3.2",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"libc",
"memoffset 0.6.5",
]
@@ -3233,7 +3258,7 @@ checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4"
dependencies = [
"autocfg",
"bitflags 1.3.2",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"libc",
]
@@ -3244,7 +3269,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
dependencies = [
"bitflags 1.3.2",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"libc",
"memoffset 0.7.1",
"pin-utils",
@@ -3257,7 +3282,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
dependencies = [
"bitflags 2.6.0",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"cfg_aliases",
"libc",
]
@@ -3422,9 +3447,9 @@ dependencies = [
[[package]]
name = "oci-spec"
version = "0.8.4"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc3da52b83ce3258fbf29f66ac784b279453c2ac3c22c5805371b921ede0d308"
checksum = "2eb4684653aeaba48dea019caa17b2773e1212e281d50b6fa759f36fe032239d"
dependencies = [
"const_format",
"derive_builder",
@@ -3440,11 +3465,11 @@ dependencies = [
[[package]]
name = "ocicrypt-rs"
version = "0.1.0"
source = "git+https://github.com/confidential-containers/guest-components?rev=026694d44d4ec483465d2fa5f80a0376166b174d#026694d44d4ec483465d2fa5f80a0376166b174d"
source = "git+https://github.com/confidential-containers/guest-components?rev=048ddaec4ecd6ee45c845d69bc39416908764560#048ddaec4ecd6ee45c845d69bc39416908764560"
dependencies = [
"anyhow",
"base64 0.22.1",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"protos",
"serde",
"serde_json",
@@ -3607,7 +3632,7 @@ version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"libc",
"redox_syscall 0.5.7",
"smallvec",
@@ -3736,7 +3761,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
dependencies = [
"fixedbitset 0.4.2",
"indexmap 2.13.0",
"indexmap 2.6.0",
]
[[package]]
@@ -3872,7 +3897,7 @@ checksum = "4b2d323e8ca7996b3e23126511a523f7e62924d93ecd5ae73b333815b0eb3dce"
dependencies = [
"autocfg",
"bitflags 1.3.2",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"concurrent-queue",
"libc",
"log",
@@ -3886,7 +3911,7 @@ version = "3.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b53a684391ad002dd6a596ceb6c74fd004fdce75f4be2e3f615068abbea5fd50"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"concurrent-queue",
"hermit-abi 0.5.2",
"pin-project-lite",
@@ -3912,7 +3937,7 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"cpufeatures",
"opaque-debug",
"universal-hash",
@@ -4058,12 +4083,12 @@ dependencies = [
[[package]]
name = "prost"
version = "0.14.3"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568"
checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
dependencies = [
"bytes 1.7.2",
"prost-derive 0.14.3",
"prost-derive 0.13.5",
]
[[package]]
@@ -4099,9 +4124,9 @@ dependencies = [
[[package]]
name = "prost-derive"
version = "0.14.3"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
dependencies = [
"anyhow",
"itertools 0.11.0",
@@ -4153,7 +4178,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4aeaa1f2460f1d348eeaeed86aea999ce98c1bded6f089ff8514c9d9dbdc973"
dependencies = [
"anyhow",
"indexmap 2.13.0",
"indexmap 2.6.0",
"log",
"protobuf",
"protobuf-support",
@@ -4187,11 +4212,10 @@ dependencies = [
[[package]]
name = "protos"
version = "0.1.0"
source = "git+https://github.com/confidential-containers/guest-components?rev=026694d44d4ec483465d2fa5f80a0376166b174d#026694d44d4ec483465d2fa5f80a0376166b174d"
source = "git+https://github.com/confidential-containers/guest-components?rev=048ddaec4ecd6ee45c845d69bc39416908764560#048ddaec4ecd6ee45c845d69bc39416908764560"
dependencies = [
"prost 0.14.3",
"prost 0.13.5",
"tonic",
"tonic-prost",
]
[[package]]
@@ -4216,9 +4240,9 @@ dependencies = [
[[package]]
name = "qapi"
version = "0.15.0"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b047adab56acc4948d4b9b58693c1f33fd13efef2d6bb5f0f66a47436ceada8"
checksum = "c6412bdd014ebee03ddbbe79ac03a0b622cce4d80ba45254f6357c847f06fa38"
dependencies = [
"bytes 1.7.2",
"futures",
@@ -4253,9 +4277,9 @@ dependencies = [
[[package]]
name = "qapi-qmp"
version = "0.15.0"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45303cac879d89361cad0287ae15f9ae1e7799b904b474152414aeece39b9875"
checksum = "e8b944db7e544d2fa97595e9a000a6ba5c62c426fa185e7e00aabe4b5640b538"
dependencies = [
"qapi-codegen",
"qapi-spec",
@@ -4526,7 +4550,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [
"cc",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"getrandom",
"libc",
"untrusted 0.9.0",
@@ -4582,9 +4606,9 @@ dependencies = [
[[package]]
name = "rsa"
version = "0.9.9"
version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40a0376c50d0358279d9d643e4bf7b7be212f1f4ff1da9070a7b54d22ef75c88"
checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc"
dependencies = [
"const-oid",
"digest 0.10.7",
@@ -4616,7 +4640,7 @@ version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"ordered-multimap",
]
@@ -4713,7 +4737,7 @@ dependencies = [
"bit-vec 0.8.0",
"capctl",
"caps",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"cgroups-rs",
"futures",
"inotify",
@@ -4951,9 +4975,9 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
[[package]]
name = "sequoia-openpgp"
version = "2.1.0"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0e334ce3ec5b9b47d86a80563b3ecec435f59acf37e86058b3b686a42c5a2ba"
checksum = "015e5fc3d023418b9db98ca9a7f3e90b305872eeafe5ca45c5c32b5eb335c1e8"
dependencies = [
"aes",
"aes-gcm",
@@ -4966,7 +4990,6 @@ dependencies = [
"bzip2",
"camellia",
"cast5",
"cbc",
"cfb-mode",
"chrono",
"cipher",
@@ -5012,11 +5035,10 @@ dependencies = [
[[package]]
name = "serde"
version = "1.0.228"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
dependencies = [
"serde_core",
"serde_derive",
]
@@ -5060,20 +5082,11 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
dependencies = [
"proc-macro2",
"quote",
@@ -5124,11 +5137,11 @@ dependencies = [
[[package]]
name = "serde_spanned"
version = "1.0.4"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
dependencies = [
"serde_core",
"serde",
]
[[package]]
@@ -5153,7 +5166,7 @@ dependencies = [
"chrono",
"hex",
"indexmap 1.9.3",
"indexmap 2.13.0",
"indexmap 2.6.0",
"schemars",
"serde",
"serde_derive",
@@ -5180,7 +5193,7 @@ version = "0.9.34+deprecated"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
dependencies = [
"indexmap 2.13.0",
"indexmap 2.6.0",
"itoa",
"ryu",
"serde",
@@ -5193,7 +5206,7 @@ version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"cpufeatures",
"digest 0.10.7",
]
@@ -5216,7 +5229,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800"
dependencies = [
"block-buffer 0.9.0",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"cpufeatures",
"digest 0.9.0",
"opaque-debug",
@@ -5228,7 +5241,7 @@ version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"cpufeatures",
"digest 0.10.7",
]
@@ -5283,14 +5296,14 @@ dependencies = [
[[package]]
name = "sigstore"
version = "0.13.0"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52bba786054331bdc89e90f74373b68a6c3b63c9754cf20e3a4a629d0165fe38"
checksum = "43427f0d642cfed11bd596608148ee4476dd75f938888aa13a9c4e176fe14225"
dependencies = [
"async-trait",
"aws-lc-rs",
"base64 0.22.1",
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"chrono",
"const-oid",
"crypto_secretbox",
@@ -5328,12 +5341,6 @@ dependencies = [
"zeroize",
]
[[package]]
name = "simd-adler32"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "simdutf8"
version = "0.1.4"
@@ -5566,9 +5573,9 @@ dependencies = [
[[package]]
name = "sync_wrapper"
version = "1.0.2"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
dependencies = [
"futures-core",
]
@@ -5615,7 +5622,7 @@ version = "3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"fastrand 1.9.0",
"redox_syscall 0.3.5",
"rustix 0.37.28",
@@ -5694,7 +5701,7 @@ version = "1.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"once_cell",
]
@@ -5896,17 +5903,14 @@ dependencies = [
[[package]]
name = "toml"
version = "0.9.11+spec-1.1.0"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46"
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
dependencies = [
"indexmap 2.13.0",
"serde_core",
"serde",
"serde_spanned",
"toml_datetime 0.7.5+spec-1.1.0",
"toml_parser",
"toml_writer",
"winnow 0.7.14",
"toml_datetime",
"toml_edit 0.22.27",
]
[[package]]
@@ -5914,14 +5918,8 @@ name = "toml_datetime"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
[[package]]
name = "toml_datetime"
version = "0.7.5+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
dependencies = [
"serde_core",
"serde",
]
[[package]]
@@ -5930,8 +5928,8 @@ version = "0.19.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
dependencies = [
"indexmap 2.13.0",
"toml_datetime 0.6.11",
"indexmap 2.6.0",
"toml_datetime",
"winnow 0.5.40",
]
@@ -5941,32 +5939,27 @@ version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap 2.13.0",
"toml_datetime 0.6.11",
"winnow 0.7.14",
"indexmap 2.6.0",
"serde",
"serde_spanned",
"toml_datetime",
"toml_write",
"winnow 0.7.11",
]
[[package]]
name = "toml_parser"
version = "1.0.6+spec-1.1.0"
name = "toml_write"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44"
dependencies = [
"winnow 0.7.14",
]
[[package]]
name = "toml_writer"
version = "1.0.6+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"
checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
[[package]]
name = "tonic"
version = "0.14.2"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203"
checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
dependencies = [
"async-stream",
"async-trait",
"axum",
"base64 0.22.1",
@@ -5980,25 +5973,34 @@ dependencies = [
"hyper-util",
"percent-encoding",
"pin-project",
"socket2 0.6.0",
"sync_wrapper",
"prost 0.13.5",
"socket2 0.5.10",
"tokio",
"tokio-stream",
"tower",
"tower 0.4.13",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tonic-prost"
version = "0.14.2"
name = "tower"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67"
checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
dependencies = [
"bytes 1.7.2",
"prost 0.14.3",
"tonic",
"futures-core",
"futures-util",
"indexmap 1.9.3",
"pin-project",
"pin-project-lite",
"rand",
"slab",
"tokio",
"tokio-util",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
@@ -6009,15 +6011,10 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [
"futures-core",
"futures-util",
"indexmap 2.13.0",
"pin-project-lite",
"slab",
"sync_wrapper",
"tokio",
"tokio-util",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
@@ -6347,7 +6344,7 @@ version = "0.2.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"once_cell",
"wasm-bindgen-macro",
]
@@ -6373,7 +6370,7 @@ version = "0.4.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed"
dependencies = [
"cfg-if 1.0.4",
"cfg-if 1.0.1",
"js-sys",
"wasm-bindgen",
"web-sys",
@@ -6805,9 +6802,9 @@ dependencies = [
[[package]]
name = "winnow"
version = "0.7.14"
version = "0.7.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd"
dependencies = [
"memchr",
]

View File

@@ -43,7 +43,8 @@ serde = { version = "1.0.131", features = ["derive"] }
serde_json = "1.0.73"
# Image pull/unpack
image-rs = { git = "https://github.com/confidential-containers/guest-components", rev = "026694d44d4ec483465d2fa5f80a0376166b174d", features = [
image-rs = { git = "https://github.com/confidential-containers/guest-components", rev = "048ddaec4ecd6ee45c845d69bc39416908764560", features = [
"snapshot-overlayfs",
"oci-client-rustls",
"signature-cosign-rustls",
] }

View File

@@ -1899,9 +1899,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-src"
version = "300.5.4+3.5.4"
version = "300.5.0+3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507b3792995dae9b0df8a1c1e3771e8418b7c2d9f0baeba32e6fe8b06c7cb72"
checksum = "e8ce546f549326b0e6052b649198487d91320875da901e7bd11a06d1ee3f9c2f"
dependencies = [
"cc",
]

View File

@@ -8,7 +8,6 @@ ACPI/AB
acpi
ACS/AB
API/AB
api/AB # when used in links like "api.github.com"
AUFS # Another Union FS
AWS/AB
BDF/AB
@@ -53,7 +52,6 @@ memdisk/B
MDEV/AB
NEMU/AB
NIC/AB
nv/AB # NVIDIA abbreviation (lowercase)
NVDIMM/AB
OCI/AB
OVMF/AB
@@ -94,7 +92,6 @@ vCPU/AB
VETH/AB
VF/AB
VFIO/AB
vfio/AB # For terms like "vfio-pci"
VGPU/AB
vhost/AB
VHOST/AB
@@ -122,8 +119,8 @@ msg/AB
UDS
dbs # Dragonball Sandbox
TDX
tdx
mptable
tdx
mptable
fdt
gic
msr

View File

@@ -16,7 +16,6 @@ bootloader/AB
centric/B
checkbox/A
chipset/AB
chroot # Unix change root command
codebase
commandline
config/AB
@@ -39,7 +38,6 @@ ethernet
filename/AB
filesystem/AB
freeform
genpolicy/AB # Kata policy generation tool
goroutine/AB
hostname/AB
hostPath
@@ -127,7 +125,6 @@ untrusted
untrusting
userid/AB
userspace/B
vectorAdd # CUDA sample name
vendored
vendoring
versioning

View File

@@ -46,7 +46,6 @@ Kata/B
Kibana/B
Kubelet/B
Kubernetes/B
kubernetes/B # when used in links or paths
Launchpad/B
LevelDB/B
libcontainer/B

View File

@@ -1,4 +1,4 @@
409
403
ACPI/AB
ACS/AB
API/AB
@@ -33,6 +33,7 @@ Docker/B
Dockerfile/AB
Dragonball/B
EulerOS/B
erofs/A
FS/AB
FaaS/B
Facebook/B
@@ -194,7 +195,6 @@ aarch64/B
ack/A
acpi
amd64/B
api/AB
arg
arm64/B
auditability
@@ -210,7 +210,6 @@ centric/B
cgroup/AB
checkbox/A
chipset/AB
chroot
ci/AB
cnn/B
codebase
@@ -248,7 +247,6 @@ fio/B
freeform
fs/B
gRPC/AB
genpolicy/AB
gic
golang/B
goroutine/AB
@@ -278,7 +276,6 @@ jq/B
k8s/B
kata
keypair/A
kubernetes/B
libcontainer/B
libelf/B
libvirt/B
@@ -306,8 +303,8 @@ nack/AB
namespace/ABCD
netlink
netns/AB
nv/AB
nvidia/A
nydus/A
onwards
openSUSE/B
openshift/B
@@ -390,11 +387,9 @@ util/A
vCPU/AB
vGPU
vSphere/B
vectorAdd
vendored
vendoring
versioning
vfio/AB
vhost/AB
virtcontainers/B
virtio/AB

View File

@@ -52,10 +52,6 @@ setup() {
export HELM_K8S_DISTRIBUTION="${KUBERNETES}"
# Enable deployment verification (verifies Kata Containers
# VM kernel isolation by comparing node vs pod kernel)
export HELM_VERIFY_DEPLOYMENT="true"
helm_helper
echo "::group::kata-deploy logs"

View File

@@ -28,7 +28,6 @@ HELM_SHIMS="${HELM_SHIMS:-}"
HELM_SNAPSHOTTER_HANDLER_MAPPING="${HELM_SNAPSHOTTER_HANDLER_MAPPING:-}"
HELM_EXPERIMENTAL_SETUP_SNAPSHOTTER="${HELM_EXPERIMENTAL_SETUP_SNAPSHOTTER:-}"
HELM_EXPERIMENTAL_FORCE_GUEST_PULL="${HELM_EXPERIMENTAL_FORCE_GUEST_PULL:-}"
HELM_VERIFY_DEPLOYMENT="${HELM_VERIFY_DEPLOYMENT:-false}"
KATA_DEPLOY_WAIT_TIMEOUT="${KATA_DEPLOY_WAIT_TIMEOUT:-600}"
KATA_HOST_OS="${KATA_HOST_OS:-}"
KUBERNETES="${KUBERNETES:-}"
@@ -129,7 +128,7 @@ function create_cluster() {
--node-count 1 \
--generate-ssh-keys \
--tags "${tags[@]}" \
$([[ "${KATA_HOST_OS}" = "cbl-mariner" ]] && echo "--os-sku AzureLinux --workload-runtime KataVmIsolation")
$([[ "${KATA_HOST_OS}" = "cbl-mariner" ]] && echo "--os-sku AzureLinux --workload-runtime KataMshvVmIsolation")
}
function install_bats() {
@@ -820,53 +819,10 @@ function helm_helper() {
[[ -n "${HELM_HOST_OS}" ]] && yq -i ".env.hostOS=\"${HELM_HOST_OS}\"" "${values_yaml}"
fi
# Enable verification during deployment if HELM_VERIFY_DEPLOYMENT is set
# Creates a simple verification pod that runs with the Kata runtime
local helm_set_file_args=""
if [[ "${HELM_VERIFY_DEPLOYMENT}" == "true" ]]; then
# Determine runtime class from HELM_DEFAULT_SHIM or default to kata-qemu
local runtime_class="kata-qemu"
if [[ -n "${HELM_DEFAULT_SHIM}" ]]; then
runtime_class="kata-${HELM_DEFAULT_SHIM}"
fi
local verification_yaml
verification_yaml=$(mktemp)
cat > "${verification_yaml}" << 'VERIFICATION_POD_EOF'
apiVersion: v1
kind: Pod
metadata:
name: kata-deploy-verify
spec:
runtimeClassName: RUNTIME_CLASS_PLACEHOLDER
restartPolicy: Never
nodeSelector:
katacontainers.io/kata-runtime: "true"
containers:
- name: verify
image: quay.io/kata-containers/alpine-bash-curl:latest
imagePullPolicy: Always
command:
- sh
- -c
- |
echo "=== Kata Verification ==="
echo "Kernel: $(uname -r)"
echo "SUCCESS: Pod running with Kata runtime"
VERIFICATION_POD_EOF
# Replace runtime class placeholder
sed -i "s|RUNTIME_CLASS_PLACEHOLDER|${runtime_class}|g" "${verification_yaml}"
echo "Enabling deployment verification with runtimeClass: ${runtime_class}"
helm_set_file_args="--set-file verification.pod=${verification_yaml}"
# Clean up temp file on exit
trap "rm -f ${verification_yaml}" EXIT
fi
echo "::group::Final kata-deploy manifests used in the test"
cat "${values_yaml}"
echo ""
# ${helm_set_file_args} is intentionally left unquoted
helm template "${helm_chart_dir}" --values "${values_yaml}" ${helm_set_file_args} --namespace kube-system
helm template "${helm_chart_dir}" --values "${values_yaml}" --namespace kube-system
[[ "$(yq .image.reference "${values_yaml}")" = "${HELM_IMAGE_REFERENCE}" ]] || die "Failed to set image reference"
[[ "$(yq .image.tag "${values_yaml}")" = "${HELM_IMAGE_TAG}" ]] || die "Failed to set image tag"
echo "::endgroup::"
@@ -881,8 +837,7 @@ VERIFICATION_POD_EOF
# Retry loop for helm install to prevent transient failures due to instantly unreachable cluster
set +e # Disable immediate exit on failure
while true; do
# ${helm_set_file_args} is intentionally left unquoted
helm upgrade --install kata-deploy "${helm_chart_dir}" --values "${values_yaml}" ${helm_set_file_args} --namespace kube-system --debug
helm upgrade --install kata-deploy "${helm_chart_dir}" --values "${values_yaml}" --namespace kube-system --debug
ret=${?}
if [[ ${ret} -eq 0 ]]; then
echo "Helm install succeeded!"

View File

@@ -116,6 +116,11 @@ setup() {
"${create_container_timeout}"
fi
# Enable dm-integrity in guest
set_metadata_annotation "${pod_config}" \
"io.katacontainers.config.hypervisor.kernel_params" \
"agent.secure_storage_integrity=true"
# Set annotation to pull image in guest
set_metadata_annotation "${pod_config}" \
"io.containerd.cri.runtime-handler" \
@@ -156,6 +161,11 @@ setup() {
"io.katacontainers.config.runtime.create_container_timeout" \
"${create_container_timeout}"
# Enable dm-integrity in guest
set_metadata_annotation "${pod_config}" \
"io.katacontainers.config.hypervisor.kernel_params" \
"agent.secure_storage_integrity=true"
# Set annotation to pull image in guest
set_metadata_annotation "${pod_config}" \
"io.containerd.cri.runtime-handler" \
@@ -209,6 +219,11 @@ setup() {
"io.katacontainers.config.runtime.create_container_timeout" \
"${create_container_timeout}"
# Enable dm-integrity in guest
set_metadata_annotation "${pod_config}" \
"io.katacontainers.config.hypervisor.kernel_params" \
"agent.secure_storage_integrity=true"
# Set annotation to pull image in guest
set_metadata_annotation "${pod_config}" \
"io.containerd.cri.runtime-handler" \

View File

@@ -61,9 +61,7 @@ setup() {
# Sleep necessary to check liveness probe returns a failure code
sleep "$sleep_liveness"
# For k8s up to 1.34 we need to check for "Started container"
# For k8s 1.35 and onwards we need to check for "Container started"
kubectl describe pod "$pod_name" | grep -E "Started container|Container started"
kubectl describe pod "$pod_name" | grep "Started container"
}
@@ -89,9 +87,7 @@ setup() {
# Sleep necessary to check liveness probe returns a failure code
sleep "$sleep_liveness"
# For k8s up to 1.34 we need to check for "Started container"
# For k8s 1.35 and onwards we need to check for "Container started"
kubectl describe pod "$pod_name" | grep -E "Started container|Container started"
kubectl describe pod "$pod_name" | grep "Started container"
}
teardown() {

View File

@@ -265,7 +265,7 @@ new_pod_config() {
# The runtimeclass is not optional.
[ -n "$runtimeclass" ] || return 1
new_config=$(mktemp "${BATS_FILE_TMPDIR}/pod-config.XXXXXX.yaml")
new_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${base_config}").XXX")
IMAGE="$image" RUNTIMECLASS="$runtimeclass" envsubst < "$base_config" > "$new_config"
echo "$new_config"
@@ -323,12 +323,11 @@ set_container_command() {
local container_idx="${2}"
shift 2
echo "YAML file: ${yaml}, and setting container[${container_idx}] command to: $*"
# Set the full command array once (yq v4 syntax)
local arr
arr="$(printf '"%s",' "$@" | sed 's/,$//')"
yq -i e ".spec.containers[${container_idx}].command = [${arr}]" "${yaml}"
for command_value in "$@"; do
yq -i \
'.spec.containers['"${container_idx}"'].command += ["'"${command_value}"'"]' \
"${yaml}"
done
}
# Set the node name on configuration spec.

View File

@@ -182,6 +182,17 @@ The Kata Containers kernel and rootfs images are by design "minimal". If advance
site specific, or customized features are required, then building a customized
kernel and/or rootfs may be required.
The below are some examples which may help or be useful for generating a
customized system.
#### Intel® QuickAssist Technology (QAT) customized kernel and rootfs
As documented in the
[Intel® QAT Kata use-case documentation](../../docs/use-cases/using-Intel-QAT-and-kata.md),
enabling this hardware requires a customized kernel and rootfs to work with Kata.
To ease building of the kernel and rootfs, a [Dockerfile](./dockerfiles/QAT) is
supplied, that when run, generates the required kernel and rootfs binaries.
## Testing
```

View File

@@ -0,0 +1,66 @@
# Copyright (c) 2020 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Kata osbuilder 'works best' on Fedora
FROM fedora:38
# Version of the Dockerfile - update if you change this file to avoid 'stale'
# images being pulled from the registry.
# Set AGENT_VERSION as an env variable to specify a specific version of Kata Agent to install
LABEL DOCKERFILE_VERSION="2.3"
ENV QAT_DRIVER_VER "QAT.L.4.15.0-00011.tar.gz"
ENV QAT_DRIVER_URL "https://downloadmirror.intel.com/649693/${QAT_DRIVER_VER}"
ENV QAT_CONFIGURE_OPTIONS "--enable-icp-sriov=guest"
ENV KATA_REPO_VERSION "main"
ENV AGENT_VERSION ""
ENV ROOTFS_OS "centos"
ENV OUTPUT_DIR "/output"
RUN dnf install -y \
bc \
bison \
debootstrap \
diffutils \
e2fsprogs \
elfutils-libelf-devel \
findutils \
flex \
gcc \
gcc-c++ \
git \
kiwi \
kmod \
openssl \
openssl-devel \
make \
parted \
patch \
qemu-img \
systemd-devel \
sudo \
xz \
yasm && \
dnf clean all
# Add in non-privileged user
RUN useradd qatbuilder -p "" && \
echo "qatbuilder ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
# Pull in our local files
COPY ./run.sh /input/
COPY ./qat.conf /input/
# Change to a less privileged user before running the commands
USER qatbuilder
# Output is placed in the /output directory.
# We could make this a VOLUME to force it to be attached to the host, but let's
# just leave it as a container dir that can then be over-ridden from a host commandline
# volume setup.
# VOLUME /output
# By default build everything
CMD ["/input/run.sh"]

View File

@@ -0,0 +1,81 @@
## Introduction
The files in this directory can be used to build a modified Kata Containers rootfs
and kernel with modifications to support Intel® QuickAssist Technology (QAT)
hardware. It is designed to work with Kata Container versions 2.0 and higher.
To properly load the driver modules, systemd init must be used. It is not adequate
to use the agent as the init. Because of this, alpine is not a valid base OS image
to use. The following rootfs OS's have been tested with this Dockerfile.
* Clear Linux
* Debian
* Ubuntu
The generated files will need to be copied and configured into your Kata Containers
setup.
Please see the
[Using Intel® QuickAssist Technology and Kata](../../../../docs/use-cases/using-Intel-QAT-and-kata.md)
documentation for more specific details on how to configure a host system and
enable acceleration of workloads.
## Building
The image build and run are executed using Docker, from within this `QAT` folder.
It is required to use **all** the files in this directory to build the Docker
image:
```sh
$ docker build --label kataqat --tag kataqat:latest .
$ mkdir ./output
$ docker run -ti --rm --privileged -v /dev:/dev -v $(pwd)/output:/output kataqat
```
> **Note:** The use of the `--privileged` and `-v /dev:/dev` arguments to the `docker run` are
> necessary, to enable the scripts within the container to generate a roofs file system.
When complete, the generated files will be placed into the output directory.
Sample config files that have been modified with a `[SHIM`] section are also
placed into the `config` subdirectory as a reference that can be used with
Kata Containers.
```sh
# ls -lR output
output:
total 136656
drwxr-xr-x 2 root root 4096 Feb 11 23:59 configs
-rw-r--r-- 1 root root 134217728 Feb 11 23:59 kata-containers.img
-rw-r--r-- 1 root root 5710336 Feb 11 23:59 vmlinuz-kata-linux-5.4.71-84_qat
output/configs:
total 20
-rw-r--r-- 1 root root 4082 Feb 11 23:59 200xxvf_dev0.conf
-rw-r--r-- 1 root root 4082 Feb 11 23:59 c3xxxvf_dev0.conf
-rw-r--r-- 1 root root 4082 Feb 11 23:59 c6xxvf_dev0.conf
-rw-r--r-- 1 root root 4082 Feb 11 23:59 d15xxvf_dev0.conf
-rw-r--r-- 1 root root 4082 Feb 11 23:59 dh895xccvf_dev0.conf
```
## Options
A number of parameters to the scripts are configured in the `Dockerfile`, and thus can be modified
on the commandline. The `AGENT_VERSION` is not set and by default will use the
latest stable version of Kata Containers.
| Variable | Definition | Default value |
| -------- | ---------- | ------------- |
| `AGENT_VERSION` | Kata agent that is installed into the rootfs | |
| `KATA_REPO_VERSION` | Kata Branch or Tag to build from | `main` |
| `OUTPUT_DIR` | Directory inside container where results are stored | `/output` |
| `QAT_CONFIGURE_OPTIONS` | `configure` options for QAT driver | `--enable-icp-sriov=guest` |
| `QAT_DRIVER_URL` | URL to curl QAT driver from | `https://01.org/sites/default/files/downloads/${QAT_DRIVER_VER}` |
| `QAT_DRIVER_VER` | QAT driver version to use | `qat1.7.l.4.9.0-00008.tar.gz` |
| `ROOTFS_OS` | Operating system to use for the rootfs | `ubuntu` |
Variables can be set on the `docker run` commandline, for example:
```sh
$ docker run -ti --rm --privileged -e "AGENT_VERSION=2.0.0" -v /dev:/dev -v ${PWD}/output:/output kataqat
```

View File

@@ -0,0 +1,17 @@
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
CONFIG_PCIEAER=y
CONFIG_UIO=y
CONFIG_CRYPTO_HW=y
# This is a hack. By setting this QAT device as a module, we get the kernel
# to configure/build all the other parts required for QAT - and then later we
# build and load the out-of-tree QAT kernel modules instead of this one.
CONFIG_CRYPTO_DEV_QAT_C62XVF=m
CONFIG_CRYPTO_CBC=y
CONFIG_MODULES=y
CONFIG_MODULE_SIG=y
CONFIG_CRYPTO_AUTHENC=y
CONFIG_CRYPTO_DH=y

View File

@@ -0,0 +1,174 @@
#!/usr/bin/env bash
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
set -u
# NOTE: Some env variables are set in the Dockerfile - those that are
# intended to be over-rideable.
export QAT_SRC=~/src/QAT
export ROOTFS_DIR=~/src/rootfs
export GOPATH=~/src/go
export PATH=${PATH}:/usr/local/go/bin:${GOPATH}/bin
kata_repo=github.com/kata-containers/kata-containers
kata_repo_path=${GOPATH}/src/${kata_repo}
grab_qat_drivers()
{
/bin/echo -e "\n\e[1;42mDownload and extract the drivers\e[0m"
mkdir -p $QAT_SRC
cd $QAT_SRC
wget $QAT_DRIVER_URL
if [ ! -f ${QAT_SRC}/${QAT_DRIVER_VER} ];then
/bin/echo -e "\e[1;41mQAT Driver ${QAT_DRIVER_VER} doesn't exist\e[0m"
echo "Check https://01.org/intel-quickassist-technology to find the latest"
echo "QAT driver version, update the Dockerfile, and try again."
exit 1
fi
tar xzf ${QAT_DRIVER_VER}
}
grab_kata_repos()
{
# Check out all the repos we will use now, so we can try and ensure they use the specified branch
# Only check out the branch needed, and make it shallow and thus space/bandwidth efficient
# Use a green prompt with white text for easy viewing
/bin/echo -e "\n\e[1;42mClone and checkout Kata repo\e[0m"
[ -d "${kata_repo_path}" ] || git clone --single-branch --branch $KATA_REPO_VERSION --depth=1 https://${kata_repo} ${kata_repo_path}
}
configure_kernel()
{
cp /input/qat.conf ${kata_repo_path}/tools/packaging/kernel/configs/fragments/common/qat.conf
# We need yq and go to grab kernel versions etc.
${kata_repo_path}/ci/install_yq.sh
${kata_repo_path}/tests/install_go.sh -p
cd ${kata_repo_path}
/bin/echo -e "\n\e[1;42mDownload and configure Kata kernel with CRYPTO support\e[0m"
./tools/packaging/kernel/build-kernel.sh setup
}
build_kernel()
{
cd ${kata_repo_path}
LINUX_VER=$(ls -d kata-linux-*)
sed -i 's/EXTRAVERSION =/EXTRAVERSION = .qat.container/' $LINUX_VER/Makefile
/bin/echo -e "\n\e[1;42mBuild Kata kernel with CRYPTO support\e[0m"
./tools/packaging/kernel/build-kernel.sh build
}
build_rootfs()
{
# Due to an issue with debootstrap unmounting /proc when running in a
# --privileged container, change into /proc to keep it from being umounted.
# This should only be done for Ubuntu and Debian based OS's. Other OS
# distributions had issues if building the rootfs from /proc
if [ "${ROOTFS_OS}" == "ubuntu" ]; then
cd /proc
fi
/bin/echo -e "\n\e[1;42mDownload ${ROOTFS_OS} based rootfs\e[0m"
sudo -E SECCOMP=no EXTRA_PKGS='kmod' ${kata_repo_path}/tools/osbuilder/rootfs-builder/rootfs.sh $ROOTFS_OS
}
build_qat_drivers()
{
/bin/echo -e "\n\e[1;42mCompile driver modules\e[0m"
cd ${kata_repo_path}
linux_kernel_path=${kata_repo_path}/${LINUX_VER}
KERNEL_MAJOR_VERSION=$(awk '/^VERSION =/{print $NF}' ${linux_kernel_path}/Makefile)
KERNEL_PATHLEVEL=$(awk '/^PATCHLEVEL =/{print $NF}' ${linux_kernel_path}/Makefile)
KERNEL_SUBLEVEL=$(awk '/^SUBLEVEL =/{print $NF}' ${linux_kernel_path}/Makefile)
KERNEL_EXTRAVERSION=$(awk '/^EXTRAVERSION =/{print $NF}' ${linux_kernel_path}/Makefile)
KERNEL_ROOTFS_DIR=${KERNEL_MAJOR_VERSION}.${KERNEL_PATHLEVEL}.${KERNEL_SUBLEVEL}${KERNEL_EXTRAVERSION}
cd $QAT_SRC
KERNEL_SOURCE_ROOT=${linux_kernel_path} ./configure ${QAT_CONFIGURE_OPTIONS}
make all -j $(nproc)
}
add_qat_to_rootfs()
{
/bin/echo -e "\n\e[1;42mCopy driver modules to rootfs\e[0m"
cd $QAT_SRC
sudo -E make INSTALL_MOD_PATH=${ROOTFS_DIR} qat-driver-install -j$(nproc --ignore=1)
sudo cp $QAT_SRC/build/usdm_drv.ko ${ROOTFS_DIR}/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers
sudo depmod -a -b ${ROOTFS_DIR} ${KERNEL_ROOTFS_DIR}
cd ${kata_repo_path}/tools/osbuilder/image-builder
/bin/echo -e "\n\e[1;42mBuild rootfs image\e[0m"
sudo -E ./image_builder.sh ${ROOTFS_DIR}
}
copy_outputs()
{
/bin/echo -e "\n\e[1;42mCopy kernel and rootfs to the output directory and provide sample configuration files\e[0m"
mkdir -p ${OUTPUT_DIR} || true
sudo cp ${linux_kernel_path}/arch/x86/boot/bzImage $OUTPUT_DIR/vmlinuz-${LINUX_VER}_qat
sudo cp ${linux_kernel_path}/vmlinux $OUTPUT_DIR/vmlinux-${LINUX_VER}_qat
sudo cp ${kata_repo_path}/tools/osbuilder/image-builder/kata-containers.img $OUTPUT_DIR
sudo mkdir -p ${OUTPUT_DIR}/configs || true
# Change extension from .conf.vm to just .conf and change the SSL section to
# SHIM so it works with Kata containers
for f in $QAT_SRC/quickassist/utilities/adf_ctl/conf_files/*.conf.vm; do
output_conf_file=$(basename -- "$f" .conf.vm).conf
sudo cp -- "$f" "${OUTPUT_DIR}/configs/${output_conf_file}"
sudo sed -i 's/\[SSL\]/\[SHIM\]/g' ${OUTPUT_DIR}/configs/${output_conf_file}
done
/bin/echo -e "Check the ./output directory for the kernel and rootfs\n"
}
help() {
cat << EOF
Usage: $0 [-h] [options]
Description:
This script builds kernel and rootfs artifacts for Kata Containers,
configured and built to support QAT hardware.
Options:
-d, Enable debug mode
-h, Show this help
EOF
}
main()
{
local check_in_container=${OUTPUT_DIR:-}
if [ -z "${check_in_container}" ]; then
echo "Error: 'OUTPUT_DIR' not set" >&2
echo "$0 should be run using the Dockerfile supplied." >&2
exit 1
fi
local OPTIND
while getopts "dh" opt;do
case ${opt} in
d)
set -x
;;
h)
help
exit 0;
;;
?)
# parse failure
help
echo "ERROR: Failed to parse arguments"
exit 1
;;
esac
done
shift $((OPTIND-1))
sudo chown -R qatbuilder:qatbuilder /home/qatbuilder
grab_qat_drivers
grab_kata_repos
configure_kernel
build_kernel
build_rootfs
build_qat_drivers
add_qat_to_rootfs
copy_outputs
}
main "$@"

View File

@@ -18,21 +18,62 @@ die() {
exit 1
}
arch_target=$1
nvidia_gpu_stack="$2"
base_os="$3"
run_file_name=$2
run_fm_file_name=$3
arch_target=$4
nvidia_gpu_stack="$5"
driver_version=""
driver_type="-open"
supported_gpu_devids="/supported-gpu.devids"
base_os="noble"
APT_INSTALL="apt -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' -yqq --no-install-recommends install"
export KBUILD_SIGN_PIN="${6:-}"
export DEBIAN_FRONTEND=noninteractive
is_feature_enabled() {
local feature="$1"
[[ ",${nvidia_gpu_stack}," == *",${feature},"* ]]
# Check if feature is in the comma-separated list
if [[ ",${nvidia_gpu_stack}," == *",${feature},"* ]]; then
return 0
else
return 1
fi
}
set_driver_version_type() {
echo "chroot: Setting the correct driver version"
if [[ ",${nvidia_gpu_stack}," == *",latest,"* ]]; then
driver_version="latest"
elif [[ ",${nvidia_gpu_stack}," == *",lts,"* ]]; then
driver_version="lts"
elif [[ "${nvidia_gpu_stack}" =~ version=([^,]+) ]]; then
driver_version="${BASH_REMATCH[1]}"
else
echo "No known driver spec found. Please specify \"latest\", \"lts\", or \"version=<VERSION>\"."
exit 1
fi
echo "chroot: driver_version: ${driver_version}"
echo "chroot: Setting the correct driver type"
# driver -> enable open or closed drivers
if [[ "${nvidia_gpu_stack}" =~ (^|,)driver=open($|,) ]]; then
driver_type="-open"
elif [[ "${nvidia_gpu_stack}" =~ (^|,)driver=closed($|,) ]]; then
driver_type=""
fi
echo "chroot: driver_type: ${driver_type}"
}
install_nvidia_ctk() {
echo "chroot: Installing NVIDIA GPU container runtime"
apt list nvidia-container-toolkit-base -a
# Base gives a nvidia-ctk and the nvidia-container-runtime
eval "${APT_INSTALL}" nvidia-container-toolkit-base=1.17.6-1
}
@@ -42,61 +83,222 @@ install_nvidia_fabricmanager() {
echo "chroot: Skipping NVIDIA fabricmanager installation"
return
}
echo "chroot: Install NVIDIA fabricmanager"
eval "${APT_INSTALL}" nvidia-fabricmanager libnvidia-nscq
apt-mark hold nvidia-fabricmanager libnvidia-nscq
# if run_fm_file_name exists run it
if [[ -f /"${run_fm_file_name}" ]]; then
install_nvidia_fabricmanager_from_run_file
else
install_nvidia_fabricmanager_from_distribution
fi
}
install_nvidia_fabricmanager_from_run_file() {
echo "chroot: Install NVIDIA fabricmanager from run file"
pushd / >> /dev/null
chmod +x "${run_fm_file_name}"
./"${run_fm_file_name}" --nox11
popd >> /dev/null
}
install_nvidia_fabricmanager_from_distribution() {
echo "chroot: Install NVIDIA fabricmanager from distribution"
eval "${APT_INSTALL}" nvidia-fabricmanager-"${driver_version}" libnvidia-nscq-"${driver_version}"
apt-mark hold nvidia-fabricmanager-"${driver_version}" libnvidia-nscq-"${driver_version}"
}
check_kernel_sig_config() {
[[ -n ${kernel_version} ]] || die "kernel_version is not set"
[[ -e /lib/modules/"${kernel_version}"/build/scripts/config ]] || die "Cannot find /lib/modules/${kernel_version}/build/scripts/config"
# make sure the used kernel has the proper CONFIG(s) set
readonly scripts_config=/lib/modules/"${kernel_version}"/build/scripts/config
[[ "$("${scripts_config}" --file "/boot/config-${kernel_version}" --state CONFIG_MODULE_SIG)" == "y" ]] || die "Kernel config CONFIG_MODULE_SIG must be =Y"
[[ "$("${scripts_config}" --file "/boot/config-${kernel_version}" --state CONFIG_MODULE_SIG_FORCE)" == "y" ]] || die "Kernel config CONFIG_MODULE_SIG_FORCE must be =Y"
[[ "$("${scripts_config}" --file "/boot/config-${kernel_version}" --state CONFIG_MODULE_SIG_ALL)" == "y" ]] || die "Kernel config CONFIG_MODULE_SIG_ALL must be =Y"
[[ "$("${scripts_config}" --file "/boot/config-${kernel_version}" --state CONFIG_MODULE_SIG_SHA512)" == "y" ]] || die "Kernel config CONFIG_MODULE_SIG_SHA512 must be =Y"
[[ "$("${scripts_config}" --file "/boot/config-${kernel_version}" --state CONFIG_SYSTEM_TRUSTED_KEYS)" == "" ]] || die "Kernel config CONFIG_SYSTEM_TRUSTED_KEYS must be =\"\""
[[ "$("${scripts_config}" --file "/boot/config-${kernel_version}" --state CONFIG_SYSTEM_TRUSTED_KEYRING)" == "y" ]] || die "Kernel config CONFIG_SYSTEM_TRUSTED_KEYRING must be =Y"
}
build_nvidia_drivers() {
is_feature_enabled "compute" || {
echo "chroot: Skipping NVIDIA drivers build"
return
}
echo "chroot: Build NVIDIA drivers"
pushd "${driver_source_files}" >> /dev/null
local certs_dir
local kernel_version
local ARCH
for version in /lib/modules/*; do
kernel_version=$(basename "${version}")
certs_dir=/lib/modules/"${kernel_version}"/build/certs
signing_key=${certs_dir}/signing_key.pem
echo "chroot: Building GPU modules for: ${kernel_version}"
cp /boot/System.map-"${kernel_version}" /lib/modules/"${kernel_version}"/build/System.map
if [[ "${arch_target}" == "aarch64" ]]; then
ln -sf /lib/modules/"${kernel_version}"/build/arch/arm64 /lib/modules/"${kernel_version}"/build/arch/aarch64
ARCH=arm64
fi
if [[ "${arch_target}" == "x86_64" ]]; then
ln -sf /lib/modules/"${kernel_version}"/build/arch/x86 /lib/modules/"${kernel_version}"/build/arch/amd64
ARCH=x86_64
fi
echo "chroot: Building GPU modules for: ${kernel_version} ${ARCH}"
make -j "$(nproc)" CC=gcc SYSSRC=/lib/modules/"${kernel_version}"/build > /dev/null
if [[ -n "${KBUILD_SIGN_PIN}" ]]; then
mkdir -p "${certs_dir}" && mv /signing_key.* "${certs_dir}"/.
check_kernel_sig_config
fi
make INSTALL_MOD_STRIP=1 -j "$(nproc)" CC=gcc SYSSRC=/lib/modules/"${kernel_version}"/build modules_install
make -j "$(nproc)" CC=gcc SYSSRC=/lib/modules/"${kernel_version}"/build clean > /dev/null
# The make clean above should clear also the certs directory but just in case something
# went wroing make sure the signing_key.pem is removed
[[ -e "${signing_key}" ]] && rm -f "${signing_key}"
done
# Save the modules for later so that a linux-image purge does not remove them
tar cvfa /lib/modules.save_from_purge.tar.zst /lib/modules
popd >> /dev/null
}
install_userspace_components() {
# Extract the driver=XXX part first, then get the value
if [[ "${nvidia_gpu_stack}" =~ driver=([^,]+) ]]; then
driver_version="${BASH_REMATCH[1]}"
if [[ ! -f /"${run_file_name}" ]]; then
echo "chroot: Skipping NVIDIA userspace runfile components installation"
return
fi
echo "chroot: driver_version: ${driver_version}"
eval "${APT_INSTALL}" nvidia-driver-pinning-"${driver_version}"
eval "${APT_INSTALL}" nvidia-imex nvidia-firmware \
libnvidia-cfg1 libnvidia-gl libnvidia-extra \
libnvidia-decode libnvidia-fbc1 libnvidia-encode \
libnvidia-nscq
pushd /NVIDIA-* >> /dev/null
# if aarch64 we need to remove --no-install-compat32-libs
if [[ "${arch_target}" == "aarch64" ]]; then
./nvidia-installer --no-kernel-modules --no-systemd --no-nvidia-modprobe -s --x-prefix=/root
else
./nvidia-installer --no-kernel-modules --no-systemd --no-nvidia-modprobe -s --x-prefix=/root --no-install-compat32-libs
fi
popd >> /dev/null
apt-mark hold nvidia-imex nvidia-firmware \
libnvidia-cfg1 libnvidia-gl libnvidia-extra \
libnvidia-decode libnvidia-fbc1 libnvidia-encode \
libnvidia-nscq
}
prepare_run_file_drivers() {
if [[ "${driver_version}" == "latest" ]]; then
driver_version=""
echo "chroot: Resetting driver version not supported with run-file"
elif [[ "${driver_version}" == "lts" ]]; then
driver_version=""
echo "chroot: Resetting driver version not supported with run-file"
fi
echo "chroot: Prepare NVIDIA run file drivers"
pushd / >> /dev/null
chmod +x "${run_file_name}"
./"${run_file_name}" -x
mkdir -p /usr/share/nvidia/rim/
# Sooner or later RIM files will be only available remotely
RIMFILE=$(ls NVIDIA-*/RIM_GH100PROD.swidtag)
if [[ -e "${RIMFILE}" ]]; then
cp NVIDIA-*/RIM_GH100PROD.swidtag /usr/share/nvidia/rim/.
fi
popd >> /dev/null
}
prepare_distribution_drivers() {
if [[ "${driver_version}" == "latest" ]]; then
driver_version=$(apt-cache search --names-only 'nvidia-headless-no-dkms-.?.?.?-server-open' | sort | awk '{ print $1 }' | tail -n 1 | cut -d'-' -f5)
elif [[ "${driver_version}" == "lts" ]]; then
driver_version="580"
fi
echo "chroot: Prepare NVIDIA distribution drivers"
eval "${APT_INSTALL}" nvidia-headless-no-dkms-"${driver_version}-server${driver_type}" \
nvidia-kernel-common-"${driver_version}"-server \
nvidia-imex-"${driver_version}" \
nvidia-utils-"${driver_version}"-server \
libnvidia-cfg1-"${driver_version}"-server \
libnvidia-gl-"${driver_version}"-server \
libnvidia-extra-"${driver_version}"-server \
libnvidia-decode-"${driver_version}"-server \
libnvidia-fbc1-"${driver_version}"-server \
libnvidia-encode-"${driver_version}"-server \
libnvidia-nscq-"${driver_version}"
}
prepare_nvidia_drivers() {
local driver_source_dir=""
if [[ -f /"${run_file_name}" ]]; then
prepare_run_file_drivers
for source_dir in /NVIDIA-*; do
if [[ -d "${source_dir}" ]]; then
driver_source_files="${source_dir}"/kernel${driver_type}
driver_source_dir="${source_dir}"
break
fi
done
get_supported_gpus_from_run_file "${driver_source_dir}"
else
prepare_distribution_drivers
for source_dir in /usr/src/nvidia*; do
if [[ -d "${source_dir}" ]]; then
driver_source_files="${source_dir}"
driver_source_dir="${source_dir}"
break
fi
done
get_supported_gpus_from_distro_drivers "${driver_source_dir}"
fi
}
install_build_dependencies() {
echo "chroot: Install NVIDIA drivers build dependencies"
eval "${APT_INSTALL}" make gcc gawk kmod libvulkan1 pciutils jq zstd linuxptp xz-utils
}
setup_apt_repositories() {
echo "chroot: Setup APT repositories"
# Architecture to mirror mapping
declare -A arch_to_mirror=(
["x86_64"]="us.archive.ubuntu.com/ubuntu"
["aarch64"]="ports.ubuntu.com/ubuntu-ports"
)
local mirror="${arch_to_mirror[${arch_target}]}"
[[ -z "${mirror}" ]] && die "Unknown arch_target: ${arch_target}"
local deb_arch="amd64"
[[ "${arch_target}" == "aarch64" ]] && deb_arch="arm64"
mkdir -p /var/cache/apt/archives/partial /var/log/apt \
/var/lib/dpkg/{info,updates,alternatives,triggers,parts}
mkdir -p /var/cache/apt/archives/partial
mkdir -p /var/log/apt
mkdir -p /var/lib/dpkg/info
mkdir -p /var/lib/dpkg/updates
mkdir -p /var/lib/dpkg/alternatives
mkdir -p /var/lib/dpkg/triggers
mkdir -p /var/lib/dpkg/parts
touch /var/lib/dpkg/status
rm -f /etc/apt/sources.list.d/*
key="/usr/share/keyrings/ubuntu-archive-keyring.gpg"
comp="main restricted universe multiverse"
if [[ "${arch_target}" == "x86_64" ]]; then
cat <<-CHROOT_EOF > /etc/apt/sources.list.d/"${base_os}".list
deb [arch=amd64 signed-by=/usr/share/keyrings/ubuntu-archive-keyring.gpg] http://us.archive.ubuntu.com/ubuntu ${base_os} main restricted universe multiverse
deb [arch=amd64 signed-by=/usr/share/keyrings/ubuntu-archive-keyring.gpg] http://us.archive.ubuntu.com/ubuntu ${base_os}-updates main restricted universe multiverse
deb [arch=amd64 signed-by=/usr/share/keyrings/ubuntu-archive-keyring.gpg] http://us.archive.ubuntu.com/ubuntu ${base_os}-security main restricted universe multiverse
deb [arch=amd64 signed-by=/usr/share/keyrings/ubuntu-archive-keyring.gpg] http://us.archive.ubuntu.com/ubuntu ${base_os}-backports main restricted universe multiverse
CHROOT_EOF
fi
cat <<-CHROOT_EOF > /etc/apt/sources.list.d/"${base_os}".list
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os} ${comp}
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os}-updates ${comp}
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os}-security ${comp}
deb [arch=${deb_arch} signed-by=${key}] http://${mirror} ${base_os}-backports ${comp}
CHROOT_EOF
if [[ "${arch_target}" == "aarch64" ]]; then
cat <<-CHROOT_EOF > /etc/apt/sources.list.d/"${base_os}".list
deb [arch=arm64 signed-by=/usr/share/keyrings/ubuntu-archive-keyring.gpg] http://ports.ubuntu.com/ubuntu-ports ${base_os} main restricted universe multiverse
deb [arch=arm64 signed-by=/usr/share/keyrings/ubuntu-archive-keyring.gpg] http://ports.ubuntu.com/ubuntu-ports ${base_os}-updates main restricted universe multiverse
deb [arch=arm64 signed-by=/usr/share/keyrings/ubuntu-archive-keyring.gpg] http://ports.ubuntu.com/ubuntu-ports ${base_os}-security main restricted universe multiverse
deb [arch=arm64 signed-by=/usr/share/keyrings/ubuntu-archive-keyring.gpg] http://ports.ubuntu.com/ubuntu-ports ${base_os}-backports main restricted universe multiverse
CHROOT_EOF
fi
local arch="${arch_target}"
[[ ${arch_target} == "aarch64" ]] && arch="sbsa"
@@ -108,24 +310,60 @@ setup_apt_repositories() {
curl -fsSL -O "https://developer.download.nvidia.com/compute/cuda/repos/${osver}/${arch}/${keyring}"
dpkg -i "${keyring}" && rm -f "${keyring}"
# Set priorities: CUDA repos highest, Ubuntu non-driver next, Ubuntu blocked for driver packages
# Set priorities: Ubuntu repos highest, NVIDIA Container Toolkit next, CUDA repo blocked for driver packages
cat <<-CHROOT_EOF > /etc/apt/preferences.d/nvidia-priority
# Prioritize Ubuntu repositories (highest priority)
Package: *
Pin: $(dirname "${mirror}")
Pin-Priority: 400
Pin: origin us.archive.ubuntu.com
Pin-Priority: 1000
Package: *
Pin: origin ports.ubuntu.com
Pin-Priority: 1000
# NVIDIA Container Toolkit (medium priority for toolkit only)
Package: nvidia-container-toolkit* libnvidia-container*
Pin: origin nvidia.github.io
Pin-Priority: 500
# Block all nvidia and libnvidia packages from CUDA repository
Package: nvidia-* libnvidia-*
Pin: $(dirname "${mirror}")
Pin: origin developer.download.nvidia.com
Pin-Priority: -1
# Allow non-driver CUDA packages from CUDA repository (low priority)
Package: *
Pin: origin developer.download.nvidia.com
Pin-Priority: 800
Pin-Priority: 100
CHROOT_EOF
apt update
}
install_kernel_dependencies() {
dpkg -i /linux-*deb
}
get_supported_gpus_from_run_file() {
local source_dir="$1"
local supported_gpus_json="${source_dir}"/supported-gpus/supported-gpus.json
jq . < "${supported_gpus_json}" | grep '"devid"' | awk '{ print $2 }' | tr -d ',"' > "${supported_gpu_devids}"
}
get_supported_gpus_from_distro_drivers() {
local supported_gpus_json="./usr/share/doc/nvidia-kernel-common-${driver_version}-server/supported-gpus.json"
jq . < "${supported_gpus_json}" | grep '"devid"' | awk '{ print $2 }' | tr -d ',"' > "${supported_gpu_devids}"
}
export_driver_version() {
for modules_version in /lib/modules/*; do
modinfo "${modules_version}"/kernel/drivers/video/nvidia.ko | grep ^version | awk '{ print $2 }' > /nvidia_driver_version
break
done
}
install_nvidia_dcgm() {
is_feature_enabled "dcgm" || {
echo "chroot: Skipping NVIDIA DCGM installation"
@@ -141,12 +379,49 @@ install_nvidia_dcgm() {
cleanup_rootfs() {
echo "chroot: Cleanup NVIDIA GPU rootfs"
apt-mark hold libstdc++6 libzstd1 libgnutls30t64 pciutils linuxptp libnftnl11
apt-mark hold libstdc++6 libzstd1 libgnutls30t64 pciutils
if [[ -n "${driver_version}" ]]; then
apt-mark hold libnvidia-cfg1-"${driver_version}"-server \
nvidia-utils-"${driver_version}"-server \
nvidia-kernel-common-"${driver_version}"-server \
nvidia-imex-"${driver_version}" \
nvidia-compute-utils-"${driver_version}"-server \
libnvidia-compute-"${driver_version}"-server \
libnvidia-gl-"${driver_version}"-server \
libnvidia-extra-"${driver_version}"-server \
libnvidia-decode-"${driver_version}"-server \
libnvidia-fbc1-"${driver_version}"-server \
libnvidia-encode-"${driver_version}"-server \
libnvidia-nscq-"${driver_version}" \
linuxptp libnftnl11
fi
kernel_headers=$(dpkg --get-selections | cut -f1 | grep linux-headers)
linux_images=$(dpkg --get-selections | cut -f1 | grep linux-image)
for i in ${kernel_headers} ${linux_images}; do
apt purge -yqq "${i}"
done
apt purge -yqq jq make gcc xz-utils linux-libc-dev
if [[ -n "${driver_version}" ]]; then
apt purge -yqq nvidia-headless-no-dkms-"${driver_version}"-server"${driver_type}" \
nvidia-kernel-source-"${driver_version}"-server"${driver_type}"
fi
apt autoremove -yqq
apt clean
apt autoclean
for modules_version in /lib/modules/*; do
ln -sf "${modules_version}" /lib/modules/"$(uname -r)"
touch "${modules_version}"/modules.order
touch "${modules_version}"/modules.builtin
depmod -a
done
rm -rf /var/lib/apt/lists/* /var/cache/apt/* /var/log/apt /var/cache/debconf
rm -f /etc/apt/sources.list
rm -f /usr/bin/nvidia-ngx-updater /usr/bin/nvidia-container-runtime
@@ -155,14 +430,23 @@ cleanup_rootfs() {
# Clear and regenerate the ld cache
rm -f /etc/ld.so.cache
ldconfig
tar xvf /lib/modules.save_from_purge.tar.zst -C /
rm -f /lib/modules.save_from_purge.tar.zst
}
# Start of script
echo "chroot: Setup NVIDIA GPU rootfs stage one"
set_driver_version_type
setup_apt_repositories
install_kernel_dependencies
install_build_dependencies
prepare_nvidia_drivers
build_nvidia_drivers
install_userspace_components
install_nvidia_fabricmanager
install_nvidia_ctk
export_driver_version
install_nvidia_dcgm
cleanup_rootfs

View File

@@ -41,27 +41,29 @@ fi
readonly stage_one="${BUILD_DIR:?}/rootfs-${VARIANT:?}-stage-one"
setup_nvidia-nvrc() {
local url ver
local nvrc=NVRC-${machine_arch}-unknown-linux-musl
url=$(get_package_version_from_kata_yaml "externals.nvrc.url")
ver=$(get_package_version_from_kata_yaml "externals.nvrc.version")
local rootfs_type=${1:-""}
local dl="${url}/${ver}"
curl -fsSL -o "${BUILD_DIR}/${nvrc}.tar.xz" "${dl}/${nvrc}.tar.xz"
curl -fsSL -o "${BUILD_DIR}/${nvrc}.tar.xz.sig" "${dl}/${nvrc}.tar.xz.sig"
curl -fsSL -o "${BUILD_DIR}/${nvrc}.tar.xz.cert" "${dl}/${nvrc}.tar.xz.cert"
BIN="NVRC${rootfs_type:+"-${rootfs_type}"}"
TARGET=${machine_arch}-unknown-linux-musl
URL=$(get_package_version_from_kata_yaml "externals.nvrc.url")
VER=$(get_package_version_from_kata_yaml "externals.nvrc.version")
local id="^https://github.com/NVIDIA/nvrc/.github/workflows/.+@refs/heads/main$"
local oidc="https://token.actions.githubusercontent.com"
local DL="${URL}/${VER}"
curl -fsSL -o "${BUILD_DIR}/${BIN}-${TARGET}.tar.xz" "${DL}/${BIN}-${TARGET}.tar.xz"
curl -fsSL -o "${BUILD_DIR}/${BIN}-${TARGET}.tar.xz.sig" "${DL}/${BIN}-${TARGET}.tar.xz.sig"
curl -fsSL -o "${BUILD_DIR}/${BIN}-${TARGET}.tar.xz.cert" "${DL}/${BIN}-${TARGET}.tar.xz.cert"
ID="^https://github.com/NVIDIA/nvrc/.github/workflows/.+@refs/heads/main$"
OIDC="https://token.actions.githubusercontent.com"
# Only allow releases from the NVIDIA/nvrc main branch and build by github actions
cosign verify-blob \
--rekor-url https://rekor.sigstore.dev \
--certificate "${BUILD_DIR}/${nvrc}.tar.xz.cert" \
--signature "${BUILD_DIR}/${nvrc}.tar.xz.sig" \
--certificate-identity-regexp "${id}" \
--certificate-oidc-issuer "${oidc}" \
"${BUILD_DIR}/${nvrc}.tar.xz"
cosign verify-blob \
--rekor-url https://rekor.sigstore.dev \
--certificate "${BUILD_DIR}/${BIN}-${TARGET}.tar.xz.cert" \
--signature "${BUILD_DIR}/${BIN}-${TARGET}.tar.xz.sig" \
--certificate-identity-regexp "${ID}" \
--certificate-oidc-issuer "${OIDC}" \
"${BUILD_DIR}/${BIN}-${TARGET}.tar.xz"
}
setup_nvidia_gpu_rootfs_stage_one() {
@@ -79,31 +81,47 @@ setup_nvidia_gpu_rootfs_stage_one() {
chmod +x ./nvidia_chroot.sh
local nvrc=NVRC-${machine_arch}-unknown-linux-musl
if [[ ! -e "${BUILD_DIR}/${nvrc}.tar.xz" ]]; then
setup_nvidia-nvrc
local BIN="NVRC${rootfs_type:+"-${rootfs_type}"}"
local TARGET=${machine_arch}-unknown-linux-musl
if [[ ! -e "${BUILD_DIR}/${BIN}-${TARGET}.tar.xz" ]]; then
setup_nvidia-nvrc "${rootfs_type}"
fi
tar -xvf "${BUILD_DIR}/${nvrc}.tar.xz" -C ./bin/
tar -xvf "${BUILD_DIR}/${BIN}-${TARGET}.tar.xz" -C ./bin/
local appendix="${rootfs_type:+"-${rootfs_type}"}"
if echo "${NVIDIA_GPU_STACK}" | grep -q '\<dragonball\>'; then
appendix="-dragonball-experimental"
fi
# Install the precompiled kernel modules shipped with the kernel
mkdir -p ./lib/modules/
tar --zstd -xvf "${BUILD_DIR}"/kata-static-kernel-nvidia-gpu"${appendix}"-modules.tar.zst -C ./lib/modules/
# We need the kernel packages for building the drivers cleanly will be
# deinstalled and removed from the roofs once the build finishes.
tar --zstd -xvf "${BUILD_DIR}"/kata-static-kernel-nvidia-gpu"${appendix}"-headers.tar.zst -C .
# If we find a local downloaded run file build the kernel modules
# with it, otherwise use the distribution packages. Run files may have
# more recent drivers available then the distribution packages.
local run_file_name="nvidia-driver.run"
if [[ -f ${BUILD_DIR}/${run_file_name} ]]; then
cp -L "${BUILD_DIR}"/"${run_file_name}" ./"${run_file_name}"
fi
local run_fm_file_name="nvidia-fabricmanager.run"
if [[ -f ${BUILD_DIR}/${run_fm_file_name} ]]; then
cp -L "${BUILD_DIR}"/"${run_fm_file_name}" ./"${run_fm_file_name}"
fi
mount --rbind /dev ./dev
mount --make-rslave ./dev
mount -t proc /proc ./proc
chroot . /bin/bash -c "/nvidia_chroot.sh ${machine_arch} ${NVIDIA_GPU_STACK} noble"
chroot . /bin/bash -c "/nvidia_chroot.sh $(uname -r) ${run_file_name} \
${run_fm_file_name} ${machine_arch} ${NVIDIA_GPU_STACK} ${KBUILD_SIGN_PIN}"
umount -R ./dev
umount ./proc
rm ./nvidia_chroot.sh
rm ./*.deb
tar cfa "${stage_one}.tar.zst" --remove-files -- *
@@ -165,6 +183,7 @@ chisseled_dcgm() {
chisseled_compute() {
echo "nvidia: chisseling GPU"
cp -a "${stage_one}"/nvidia_driver_version .
cp -a "${stage_one}"/lib/modules/* lib/modules/.
libdir="lib/${machine_arch}-linux-gnu"
@@ -175,15 +194,6 @@ chisseled_compute() {
cp -a "${stage_one}/${libdir}"/libc.so.6* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libm.so.6* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/librt.so.1* "${libdir}"/.
# nvidia-persistenced dependencies for CUDA repo and >= 590
cp -a "${stage_one}/${libdir}"/libtirpc.so.3* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libgssapi_krb5.so.2* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libkrb5.so.3* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libkrb5support.so.0* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libk5crypto.so.3* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libcom_err.so.2* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libkeyutils.so.1* "${libdir}"/.
cp -a "${stage_one}/etc/netconfig" etc/.
[[ "${type}" == "confidential" ]] && cp -a "${stage_one}/${libdir}"/libnvidia-pkcs11* "${libdir}"/.
@@ -212,13 +222,19 @@ chisseled_gpudirect() {
}
setup_nvrc_init_symlinks() {
local nvrc="NVRC-${machine_arch}-unknown-linux-musl"
local rootfs_type=${1:-""}
local bin="NVRC${rootfs_type:+"-${rootfs_type}"}"
local target=${machine_arch}-unknown-linux-musl
# make sure NVRC is the init process for the initrd and image case
ln -sf /bin/"${nvrc}" init
ln -sf /bin/"${nvrc}" sbin/init
ln -sf /bin/"${bin}-${target}" init
ln -sf /bin/"${bin}-${target}" sbin/init
}
chisseled_init() {
local rootfs_type=${1:-""}
echo "nvidia: chisseling init"
tar --zstd -xvf "${BUILD_DIR}"/kata-static-busybox.tar.zst -C .
@@ -232,19 +248,21 @@ chisseled_init() {
libdir=lib/"${machine_arch}"-linux-gnu
cp -a "${stage_one}"/"${libdir}"/libgcc_s.so.1* "${libdir}"/.
local nvrc="NVRC-${machine_arch}-unknown-linux-musl"
bin="NVRC${rootfs_type:+"-${rootfs_type}"}"
target=${machine_arch}-unknown-linux-musl
cp -a "${stage_one}/bin/${nvrc}" bin/.
cp -a "${stage_one}/bin/${nvrc}".cert bin/.
cp -a "${stage_one}/bin/${nvrc}".sig bin/.
cp -a "${stage_one}/bin/${bin}-${target}" bin/.
cp -a "${stage_one}/bin/${bin}-${target}".cert bin/.
cp -a "${stage_one}/bin/${bin}-${target}".sig bin/.
setup_nvrc_init_symlinks
setup_nvrc_init_symlinks "${rootfs_type}"
cp -a "${stage_one}"/usr/bin/kata-agent usr/bin/.
if [[ "${AGENT_POLICY}" == "yes" ]]; then
cp -a "${stage_one}"/etc/kata-opa etc/.
fi
cp -a "${stage_one}"/etc/resolv.conf etc/.
cp -a "${stage_one}"/supported-gpu.devids .
cp -a "${stage_one}"/lib/firmware/nvidia lib/firmware/.
cp -a "${stage_one}"/sbin/ldconfig.real sbin/ldconfig
@@ -332,7 +350,7 @@ setup_nvidia_gpu_rootfs_stage_two() {
pushd "${stage_two}" >> /dev/null
# Only step needed from stage_two (see chisseled_init)
setup_nvrc_init_symlinks
setup_nvrc_init_symlinks "${type}"
else
echo "nvidia: chisseling the following stack components: ${stack}"
@@ -343,7 +361,7 @@ setup_nvidia_gpu_rootfs_stage_two() {
pushd "${stage_two}" >> /dev/null
chisseled_init
chisseled_init "${type}"
chisseled_iptables
IFS=',' read -r -a stack_components <<< "${NVIDIA_GPU_STACK}"

View File

@@ -52,7 +52,7 @@ build_initrd() {
GUEST_HOOKS_TARBALL="${GUEST_HOOKS_TARBALL}"
if [[ "${image_initrd_suffix}" == "nvidia-gpu"* ]]; then
nvidia_driver_version=$(get_from_kata_deps .externals.nvidia.driver.version)
nvidia_driver_version=$(cat "${builddir}"/initrd-image/*/nvidia_driver_version)
artifact_name=${artifact_name/.initrd/"-${nvidia_driver_version}".initrd}
fi
@@ -81,7 +81,7 @@ build_image() {
GUEST_HOOKS_TARBALL="${GUEST_HOOKS_TARBALL}"
if [[ "${image_initrd_suffix}" == "nvidia-gpu"* ]]; then
nvidia_driver_version=$(get_from_kata_deps .externals.nvidia.driver.version)
nvidia_driver_version=$(cat "${builddir}"/rootfs-image/*/nvidia_driver_version)
artifact_name=${artifact_name/.image/"-${nvidia_driver_version}".image}
fi

View File

@@ -893,6 +893,7 @@ dependencies = [
"tempfile",
"thiserror 1.0.69",
"tokio",
"toml",
"toml_edit",
"walkdir",
]
@@ -1727,6 +1728,15 @@ dependencies = [
"tokio",
]
[[package]]
name = "toml"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234"
dependencies = [
"serde",
]
[[package]]
name = "toml_datetime"
version = "0.6.11"

View File

@@ -23,6 +23,7 @@ env_logger = "0.10"
clap = { version = "4.5", features = ["derive"] }
# TOML parsing and manipulation
toml = "0.5.8"
toml_edit = "0.22"
# YAML parsing and manipulation
@@ -49,4 +50,4 @@ regex = "1.10"
tokio = { version = "1.38", features = ["rt-multi-thread", "macros", "time"] }
[dev-dependencies]
tempfile = "3.8"
tempfile = "3.8"

View File

@@ -15,13 +15,13 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: "3.25.0"
version: "3.24.0"
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "3.25.0"
appVersion: "3.24.0"
dependencies:
- name: node-feature-discovery

View File

@@ -3,46 +3,6 @@
# SPDX-License-Identifier: Apache-2.0
#
{{/*
Expand the name of the chart.
*/}}
{{- define "kata-deploy.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
*/}}
{{- define "kata-deploy.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "kata-deploy.labels" -}}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
{{ include "kata-deploy.selectorLabels" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "kata-deploy.selectorLabels" -}}
app.kubernetes.io/name: {{ include "kata-deploy.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Set the correct containerd conf path depending on the k8s distribution
*/}}

View File

@@ -1,161 +0,0 @@
{{- /*
Copyright (c) 2026 The Kata Containers Authors
SPDX-License-Identifier: Apache-2.0
Verification Job - runs after kata-deploy installation to validate Kata is working.
Only created when verification.pod is provided.
*/ -}}
{{- if .Values.verification.pod }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "kata-deploy.fullname" . }}-verification-spec
namespace: {{ .Release.Namespace }}
labels:
{{- include "kata-deploy.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "-5"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
data:
pod-spec.yaml: |
{{- .Values.verification.pod | nindent 4 }}
---
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "kata-deploy.fullname" . }}-verify
namespace: {{ .Release.Namespace }}
labels:
{{- include "kata-deploy.labels" . | nindent 4 }}
app.kubernetes.io/component: verification
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "0"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded,hook-failed
spec:
backoffLimit: 3
ttlSecondsAfterFinished: 3600
template:
metadata:
labels:
{{- include "kata-deploy.selectorLabels" . | nindent 8 }}
app.kubernetes.io/component: verification
spec:
restartPolicy: Never
serviceAccountName: {{ include "kata-deploy.fullname" . }}-verification
containers:
- name: verify
image: quay.io/kata-containers/kubectl:latest
command:
- bash
- -c
- |
set -e
VERIFY_NS="{{ .Values.verification.namespace }}"
TIMEOUT="{{ .Values.verification.timeout }}"
echo "=== Kata Deploy Verification ==="
echo "Namespace: ${VERIFY_NS}"
echo "Timeout: ${TIMEOUT}s"
echo ""
# Wait for kata-deploy DaemonSet to be ready
echo "Waiting for kata-deploy DaemonSet to be ready..."
{{- if .Values.env.multiInstallSuffix }}
kubectl rollout status daemonset/{{ .Chart.Name }}-{{ .Values.env.multiInstallSuffix }} -n {{ .Release.Namespace }} --timeout=600s
{{- else }}
kubectl rollout status daemonset/{{ .Chart.Name }} -n {{ .Release.Namespace }} --timeout=600s
{{- end }}
echo ""
echo "Creating verification pod..."
POD_RESOURCE=$(kubectl apply -n "${VERIFY_NS}" -f /config/pod-spec.yaml -o name)
POD_NAME="${POD_RESOURCE#pod/}"
echo "Created: ${POD_NAME}"
# Ensure cleanup runs on any exit (success, failure, or signal)
cleanup() {
echo ""
echo "Cleaning up verification pod..."
kubectl delete pod "${POD_NAME}" -n "${VERIFY_NS}" --ignore-not-found --wait=false
}
trap cleanup EXIT
echo ""
echo "Waiting for verification pod to complete..."
if kubectl wait pod "${POD_NAME}" -n "${VERIFY_NS}" --for=jsonpath='{.status.phase}'=Succeeded --timeout="${TIMEOUT}s"; then
echo ""
echo "=== Verification Pod Logs ==="
kubectl logs "${POD_NAME}" -n "${VERIFY_NS}" || true
echo ""
echo "SUCCESS: Verification passed"
exit 0
else
echo ""
echo "=== Verification Failed ==="
echo "Pod status:"
kubectl describe pod "${POD_NAME}" -n "${VERIFY_NS}" || true
echo ""
echo "Pod logs:"
kubectl logs "${POD_NAME}" -n "${VERIFY_NS}" || true
exit 1
fi
volumeMounts:
- name: pod-spec
mountPath: /config
volumes:
- name: pod-spec
configMap:
name: {{ include "kata-deploy.fullname" . }}-verification-spec
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "kata-deploy.fullname" . }}-verification
namespace: {{ .Release.Namespace }}
labels:
{{- include "kata-deploy.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "-10"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded,hook-failed
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "kata-deploy.fullname" . }}-verification
labels:
{{- include "kata-deploy.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "-10"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded,hook-failed
rules:
- apiGroups: [""]
resources: ["pods", "pods/log"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "kata-deploy.fullname" . }}-verification
labels:
{{- include "kata-deploy.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "-10"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded,hook-failed
subjects:
- kind: ServiceAccount
name: {{ include "kata-deploy.fullname" . }}-verification
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: {{ include "kata-deploy.fullname" . }}-verification
apiGroup: rbac.authorization.k8s.io
{{- end }}

View File

@@ -273,33 +273,3 @@ env:
# deployment, use nodeSelector manually.
node-feature-discovery:
enabled: false
# Verification
# Post-install verification to validate Kata Containers is working correctly.
# When a pod spec is provided, runs a verification pod after installation.
#
# Provide your own pod YAML that validates your specific deployment requirements.
#
verification:
# Namespace where verification pod will be created
namespace: default
# Timeout for verification pod to complete (seconds)
timeout: 180
# Pod spec for verification (optional)
# If provided, a verification job will run after install/upgrade.
# If empty, no verification is performed.
#
# Provide your own pod YAML that validates your deployment works correctly.
# The pod must:
# - Have metadata.name set
# - Include the correct runtimeClassName (e.g., kata-qemu, kata-qemu-snp)
# - Include any annotations needed (e.g., cc_init_data for CoCo)
# - Exit 0 on success, non-zero on failure
#
# Usage:
# helm install kata-deploy ... \
# --set-file verification.pod=/path/to/your-verification-pod.yaml
#
pod: ""

View File

@@ -56,6 +56,7 @@ REPO_COMPONENTS="${REPO_COMPONENTS:-}"
AGENT_POLICY="${AGENT_POLICY:-yes}"
TARGET_BRANCH="${TARGET_BRANCH:-main}"
PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY:-}"
KERNEL_HEADERS_PKG_TYPE="${KERNEL_HEADERS_PKG_TYPE:-deb}"
RELEASE="${RELEASE:-"no"}"
KBUILD_SIGN_PIN="${KBUILD_SIGN_PIN:-}"
RUNTIME_CHOICE="${RUNTIME_CHOICE:-both}"
@@ -144,6 +145,15 @@ EOF
exit "${return_code}"
}
get_kernel_headers_dir() {
local kernel_name"=${1:-}"
[ -z "${kernel_name}" ] && die "kernel name is a required argument"
local kernel_headers_dir="${repo_root_dir}/tools/packaging/kata-deploy/local-build/build/${kernel_name}/builddir"
echo "${kernel_headers_dir}"
}
get_kernel_modules_dir() {
local kernel_version="${1:-}"
local kernel_kata_config_version="${2:-}"
@@ -357,29 +367,6 @@ get_latest_kernel_artefact_and_builder_image_version() {
echo "${latest_kernel_artefact}-${latest_kernel_builder_image}"
}
get_latest_kernel_nvidia_confidential_artefact_and_builder_image_version() {
local kernel_version=$(get_from_kata_deps ".assets.kernel.nvidia-confidential.version")
local kernel_kata_config_version="$(cat ${repo_root_dir}/tools/packaging/kernel/kata_config_version)"
local latest_kernel_artefact="${kernel_version}-${kernel_kata_config_version}-$(get_last_modification $(dirname $kernel_builder))"
local latest_kernel_builder_image="$(get_kernel_image_name)"
echo "${latest_kernel_artefact}-${latest_kernel_builder_image}"
}
get_latest_kernel_nvidia_artefact_and_builder_image_version() {
local kernel_version
local kernel_kata_config_version
local latest_kernel_artefact
local latest_kernel_builder_image
kernel_version=$(get_from_kata_deps ".assets.kernel.nvidia.version")
kernel_kata_config_version="$(cat "${repo_root_dir}"/tools/packaging/kernel/kata_config_version)"
latest_kernel_artefact="${kernel_version}-${kernel_kata_config_version}-$(get_last_modification "$(dirname "${kernel_builder}")")"
latest_kernel_builder_image="$(get_kernel_image_name)"
echo "${latest_kernel_artefact}-${latest_kernel_builder_image}"
}
#Install guest image
install_image() {
local variant="${1:-}"
@@ -410,12 +397,6 @@ install_image() {
if [[ "${variant}" == *confidential ]]; then
# For the confidential image we depend on the kernel built in order to ensure that
# measured boot is used
if [[ "${variant}" == "nvidia-gpu-confidential" ]]; then
latest_artefact+="-$(get_latest_kernel_nvidia_confidential_artefact_and_builder_image_version)"
else
latest_artefact+="-$(get_latest_kernel_confidential_artefact_and_builder_image_version)"
fi
latest_artefact+="-$(get_latest_kernel_confidential_artefact_and_builder_image_version)"
latest_artefact+="-$(get_latest_coco_guest_components_artefact_and_builder_image_version)"
latest_artefact+="-$(get_latest_pause_image_artefact_and_builder_image_version)"
@@ -423,7 +404,7 @@ install_image() {
if [[ "${variant}" == "nvidia-gpu" ]]; then
# If we bump the kernel we need to rebuild the image
latest_artefact+="-$(get_latest_kernel_nvidia_artefact_and_builder_image_version "${variant}")"
latest_artefact+="-$(get_latest_kernel_artefact_and_builder_image_version "${variant}")"
fi
latest_builder_image=""
@@ -516,18 +497,14 @@ install_initrd() {
if [[ "${variant}" == *confidential ]]; then
# For the confidential initrd we depend on the kernel built in order to ensure that
# measured boot is used
if [[ "${variant}" == "nvidia-gpu-confidential" ]]; then
latest_artefact+="-$(get_latest_kernel_nvidia_confidential_artefact_and_builder_image_version)"
else
latest_artefact+="-$(get_latest_kernel_confidential_artefact_and_builder_image_version)"
fi
latest_artefact+="-$(get_latest_kernel_confidential_artefact_and_builder_image_version)"
latest_artefact+="-$(get_latest_coco_guest_components_artefact_and_builder_image_version)"
latest_artefact+="-$(get_latest_pause_image_artefact_and_builder_image_version)"
fi
if [[ "${variant}" == "nvidia-gpu" ]]; then
# If we bump the kernel we need to rebuild the initrd as well
latest_artefact+="-$(get_latest_kernel_nvidia_artefact_and_builder_image_version "${variant}")"
latest_artefact+="-$(get_latest_kernel_artefact_and_builder_image_version "${variant}")"
fi
latest_builder_image=""
@@ -591,8 +568,10 @@ install_initrd_confidential() {
# For all nvidia_gpu targets we can customize the stack that is enbled
# in the VM by setting the NVIDIA_GPU_STACK= environment variable
#
# driver -> driver version is set via versions.yaml making sure kernel
# and rootfs builds are using the same version
# latest | lts | version
# -> use the latest and greatest driver,
# lts release or e.g. version=550.127.1
# driver -> enable open or closed drivers
# compute -> enable the compute GPU stack, includes utility
# graphics -> enable the graphics GPU stack, includes compute
# dcgm -> enable the DCGM stack + DGCM exporter
@@ -606,43 +585,39 @@ install_initrd_confidential() {
#
# The full stack can be enabled by setting all the options like:
#
# NVIDIA_GPU_STACK="compute,dcgm,nvswitch,gpudirect"
# NVIDIA_GPU_STACK="latest,compute,dcgm,nvswitch,gpudirect"
#
# Install NVIDIA GPU image
install_image_nvidia_gpu() {
export AGENT_POLICY
local version=$(get_from_kata_deps .externals.nvidia.driver.version)
EXTRA_PKGS="apt curl ${EXTRA_PKGS}"
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"driver=${version},compute,dcgm"}
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"latest,compute,dcgm"}
install_image "nvidia-gpu"
}
# Install NVIDIA GPU initrd
install_initrd_nvidia_gpu() {
export AGENT_POLICY
local version=$(get_from_kata_deps .externals.nvidia.driver.version)
EXTRA_PKGS="apt curl ${EXTRA_PKGS}"
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"driver=${version},compute,dcgm"}
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"latest,compute,dcgm"}
install_initrd "nvidia-gpu"
}
# Instal NVIDIA GPU confidential image
install_image_nvidia_gpu_confidential() {
export AGENT_POLICY
local version=$(get_from_kata_deps .externals.nvidia.driver.version)
EXTRA_PKGS="apt curl ${EXTRA_PKGS}"
# TODO: export MEASURED_ROOTFS=yes
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"driver=${version},compute,dcgm"}
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"latest,compute,dcgm"}
install_image "nvidia-gpu-confidential"
}
# Install NVIDIA GPU confidential initrd
install_initrd_nvidia_gpu_confidential() {
export AGENT_POLICY
local version=$(get_from_kata_deps .externals.nvidia.driver.version)
EXTRA_PKGS="apt curl ${EXTRA_PKGS}"
# TODO: export MEASURED_ROOTFS=yes
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"driver=${version},compute,dcgm"}
NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-"latest,compute,dcgm"}
install_initrd "nvidia-gpu-confidential"
}
@@ -671,12 +646,10 @@ install_cached_kernel_tarball_component() {
case ${kernel_name} in
"kernel-nvidia-gpu"*"")
local modules_dir
modules_dir=$(get_kernel_modules_dir "${kernel_version}" "${kernel_kata_config_version}" "${build_target}")
mkdir -p "${modules_dir}" || true
tar --strip-components=1 --zstd -xvf "${workdir}/kata-static-${kernel_name}-modules.tar.zst" -C "${modules_dir}" || return 1
;;
local kernel_headers_dir=$(get_kernel_headers_dir "${kernel_name}")
mkdir -p ${kernel_headers_dir} || true
tar --zstd -xvf ${workdir}/${kernel_name}/builddir/kata-static-${kernel_name}-headers.tar.zst -C "${kernel_headers_dir}" || return 1
;;& # fallthrough in the confidential case we need the modules.tar.zst and for every kernel-nvidia-gpu we need the headers
"kernel"*"-confidential")
local modules_dir=$(get_kernel_modules_dir ${kernel_version} ${kernel_kata_config_version} ${build_target})
mkdir -p "${modules_dir}" || true
@@ -704,24 +677,18 @@ install_kernel_helper() {
kernel_url="$(get_from_kata_deps .assets.kernel.confidential.url)"
fi
if [[ "${kernel_name}" == "kernel-nvidia-gpu" ]]; then
kernel_version="$(get_from_kata_deps .assets.kernel.nvidia.version)"
kernel_url="$(get_from_kata_deps .assets.kernel.nvidia.url)"
if [[ "${kernel_name}" == "kernel"*"-confidential" ]]; then
local kernel_modules_tarball_name="kata-static-${kernel_name}-modules.tar.zst"
local kernel_modules_tarball_path="${workdir}/${kernel_modules_tarball_name}"
extra_tarballs="${kernel_modules_tarball_name}:${kernel_modules_tarball_path}"
fi
if [[ "${kernel_name}" == "kernel-nvidia-gpu-confidential" ]]; then
kernel_version="$(get_from_kata_deps .assets.kernel.nvidia-confidential.version)"
kernel_url="$(get_from_kata_deps .assets.kernel.nvidia-confidential.url)"
if [[ "${kernel_name}" == "kernel-nvidia-gpu*" ]]; then
local kernel_headers_tarball_name="kata-static-${kernel_name}-headers.tar.zst"
local kernel_headers_tarball_path="${workdir}/${kernel_headers_tarball_name}"
extra_tarballs+=" ${kernel_headers_tarball_name}:${kernel_headers_tarball_path}"
fi
case ${kernel_name} in
kernel-nvidia-gpu*|kernel*-confidential)
local kernel_modules_tarball_name="kata-static-${kernel_name}-modules.tar.zst"
local kernel_modules_tarball_path="${workdir}/${kernel_modules_tarball_name}"
extra_tarballs="${kernel_modules_tarball_name}:${kernel_modules_tarball_path}"
;;
esac
default_patches_dir="${repo_root_dir}/tools/packaging/kernel/patches"
install_cached_kernel_tarball_component ${kernel_name} ${extra_tarballs} && return 0
@@ -775,23 +742,23 @@ install_kernel_nvidia_gpu_dragonball_experimental() {
install_kernel_helper \
"assets.kernel-dragonball-experimental" \
"kernel-dragonball-experimental" \
"-e -t dragonball -g nvidia"
"-e -t dragonball -g nvidia -H deb"
}
#Install GPU enabled kernel asset
install_kernel_nvidia_gpu() {
install_kernel_helper \
"assets.kernel.nvidia" \
"assets.kernel" \
"kernel-nvidia-gpu" \
"-g nvidia"
"-g nvidia -H deb"
}
#Install GPU and TEE enabled kernel asset
install_kernel_nvidia_gpu_confidential() {
install_kernel_helper \
"assets.kernel.nvidia-confidential" \
"assets.kernel.confidential" \
"kernel-nvidia-gpu-confidential" \
"-x -g nvidia"
"-x -g nvidia -H deb"
}
install_qemu_helper() {
@@ -1442,21 +1409,33 @@ handle_build() {
case ${build_target} in
kernel-nvidia-gpu*)
local modules_final_tarball_path="${workdir}/kata-static-${build_target}-modules.tar.zst"
if [[ ! -f "${modules_final_tarball_path}" ]]; then
local modules_dir
modules_dir=$(get_kernel_modules_dir "${kernel_version}" "${kernel_kata_config_version}" "${build_target}")
local kernel_headers_final_tarball_path="${workdir}/kata-static-${build_target}-headers.tar.zst"
if [ ! -f "${kernel_headers_final_tarball_path}" ]; then
local kernel_headers_dir
kernel_headers_dir=$(get_kernel_headers_dir "${build_target}")
parent_dir=$(dirname "${modules_dir}")
parent_dir_basename=$(basename "${parent_dir}")
pushd "${kernel_headers_dir}"
find . -type f -name "*.${KERNEL_HEADERS_PKG_TYPE}" -exec tar -rvf kernel-headers.tar {} +
if [ -n "${KBUILD_SIGN_PIN}" ]; then
# For those 2 we can simply do a `|| true` as the signing_key.{pem,x509} are either:
# * already in ., as we're using a cached tarball
# * will be moved here, in case we had built the kernel
mv kata-linux-*/certs/signing_key.pem . || true
mv kata-linux-*/certs/signing_key.x509 . || true
pushd "${parent_dir}"
rm -f "${parent_dir_basename}"/build
tar --zstd -cvf "${modules_final_tarball_path}" "."
# Then we can check for the key on ., as it should always be here on both cases
# (cached or built kernel).
head -n1 "signing_key.pem" | grep -q "ENCRYPTED PRIVATE KEY" || die "signing_key.pem is not encrypted"
tar -rvf kernel-headers.tar signing_key.pem signing_key.x509 --remove-files
fi
zstd -T0 kernel-headers.tar -o kernel-headers.tar.zst
mv kernel-headers.tar.zst "${kernel_headers_final_tarball_path}"
popd
fi
tar --zstd -tvf "${modules_final_tarball_path}"
;;
tar --zstd -tvf "${kernel_headers_final_tarball_path}"
;;& # fallthrough in the confidential case we need the modules.tar.zst and for every kernel-nvidia-gpu we need the headers
kernel*-confidential)
local modules_final_tarball_path="${workdir}/kata-static-${build_target}-modules.tar.zst"
if [ ! -f "${modules_final_tarball_path}" ]; then
@@ -1523,7 +1502,18 @@ handle_build() {
)
oci_image="${ARTEFACT_REGISTRY}/${ARTEFACT_REPOSITORY}/cached-artefacts/${build_target}:${normalized_tags}"
case ${build_target} in
kernel-nvidia-gpu*|kernel*-confidential)
kernel-nvidia-gpu)
files_to_push+=(
"kata-static-${build_target}-headers.tar.zst"
)
;;
kernel-nvidia-gpu-confidential)
files_to_push+=(
"kata-static-${build_target}-modules.tar.zst"
"kata-static-${build_target}-headers.tar.zst"
)
;;
kernel*-confidential)
files_to_push+=(
"kata-static-${build_target}-modules.tar.zst"
)

View File

@@ -288,8 +288,8 @@ function is_containerd_capable_of_using_drop_in_files() {
return
fi
local major_version=$(kubectl get node $NODE_NAME -o jsonpath='{.status.nodeInfo.containerRuntimeVersion}' | grep -oE '[0-9]+\.[0-9]+' | head -n1 | cut -d'.' -f1)
if [ $major_version -lt 2 ]; then
local version_major=$(kubectl get node $NODE_NAME -o jsonpath='{.status.nodeInfo.containerRuntimeVersion}' | grep -oE '[0-9]+\.[0-9]+' | cut -d'.' -f1)
if [ $version_major -lt 2 ]; then
# Only containerd 2.0 does the merge of the plugins section from different snippets,
# instead of overwritting the whole section, which makes things considerably more
# complicated for us to deal with.

View File

@@ -514,18 +514,6 @@ setup_kernel() {
cp "${kernel_config_path}" ./.config
ARCH=${arch_target} make oldconfig ${CROSS_BUILD_ARG}
)
info "Fetching NVIDIA driver source code"
if [[ "${gpu_vendor}" == "${VENDOR_NVIDIA}" ]]; then
driver_version=$(get_from_kata_deps .externals.nvidia.driver.version)
driver_url=$(get_from_kata_deps .externals.nvidia.driver.url)
driver_src="open-gpu-kernel-modules-${driver_version}"
info "Downloading NVIDIA driver source code from: ${driver_url}${driver_version}.tar.gz"
[[ -d "${driver_src}" ]] && rm -rf "${driver_src}"
curl -L -o "${driver_version}.tar.gz" "${driver_url}${driver_version}.tar.gz"
tar -xvf "${driver_version}.tar.gz" --transform "s|open-gpu-kernel-modules-${driver_version}|open-gpu-kernel-modules|"
fi
}
build_kernel() {
@@ -543,13 +531,6 @@ build_kernel() {
[ -e "vmlinux" ]
([ "${hypervisor_target}" == "firecracker" ] || [ "${hypervisor_target}" == "cloud-hypervisor" ]) && [ "${arch_target}" == "arm64" ] && [ -e "arch/${arch_target}/boot/Image" ]
popd >>/dev/null
if [[ "${gpu_vendor}" == "${VENDOR_NVIDIA}" ]]; then
pushd open-gpu-kernel-modules
make -j "$(nproc)" CC=gcc SYSSRC="${kernel_path}" > /dev/null
make INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=${kernel_path} -j "$(nproc)" CC=gcc SYSSRC="${kernel_path}" modules_install
make -j "$(nproc)" CC=gcc SYSSRC="${kernel_path}" clean > /dev/null
fi
}
build_kernel_headers() {

View File

@@ -3,3 +3,4 @@
CONFIG_ARM64_CRYPTO=y
CONFIG_CRYPTO_AES_ARM64=y
CONFIG_CRYPTO_AES_ARM64_CE=y
CONFIG_CRYPTO_SHA256_ARM64=y

View File

@@ -13,46 +13,8 @@ CONFIG_DECOMPRESS_GZIP=y
CONFIG_CRYPTO_USER_API=y
CONFIG_CRYPTO_USER_API_HASH=y
CONFIG_CRYPTO_SHA256=y
# CRYPTO_FIPS dependencies (required in 6.18+)
# CRYPTO_FIPS -> CRYPTO_SELFTESTS -> EXPERT
CONFIG_EXPERT=y
CONFIG_CRYPTO_SELFTESTS=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_ANSI_CPRNG=y
# EXPERT disables many options by default - restore common ones:
# (Architecture-specific options are in x86_64/crypto.conf and arm64/crypto.conf)
CONFIG_PCI_QUIRKS=y
CONFIG_ZONE_DMA=y
CONFIG_BUG=y
CONFIG_IO_URING=y
CONFIG_PCIEASPM=y
CONFIG_FW_LOADER=y
CONFIG_KALLSYMS=y
CONFIG_MEMBARRIER=y
CONFIG_COREDUMP=y
CONFIG_ELF_CORE=y
CONFIG_ADVISE_SYSCALLS=y
CONFIG_CACHESTAT_SYSCALL=y
CONFIG_RSEQ=y
CONFIG_SECRETMEM=y
CONFIG_VT=y
CONFIG_RANDOMIZE_KSTACK_OFFSET=y
CONFIG_ALLOW_DEV_COREDUMP=y
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_VT_CONSOLE=y
CONFIG_PROC_PAGE_MONITOR=y
CONFIG_CRC_OPTIMIZATIONS=y
CONFIG_XZ_DEC_X86=y
CONFIG_XZ_DEC_POWERPC=y
CONFIG_XZ_DEC_ARM=y
CONFIG_XZ_DEC_ARMTHUMB=y
CONFIG_XZ_DEC_ARM64=y
CONFIG_XZ_DEC_SPARC=y
CONFIG_XZ_DEC_RISCV=y
CONFIG_DEBUG_BUGVERBOSE=y
CONFIG_SLUB_DEBUG=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_STACKTRACE=y
# IPsec and VXLAN crypto requirements
CONFIG_CRYPTO_GCM=y

View File

@@ -5,12 +5,6 @@
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
# Legacy xtables/iptables support (required by 6.18+ for IP_NF_* options)
# This is needed to enable legacy iptables support
CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_IP_NF_IPTABLES_LEGACY=y
CONFIG_IP6_NF_IPTABLES_LEGACY=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_NETLINK=y
CONFIG_NETFILTER_FAMILY_ARP=y

View File

@@ -1,14 +1,7 @@
# Module signing
# Dependencies required for MODULE_SIG and SYSTEM_TRUSTED_KEYRING
CONFIG_MODULES=y
CONFIG_KEYS=y
CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
CONFIG_X509_CERTIFICATE_PARSER=y
CONFIG_PKCS7_MESSAGE_PARSER=y
CONFIG_SYSTEM_TRUSTED_KEYRING=y
CONFIG_SYSTEM_TRUSTED_KEYS=""
CONFIG_MODULE_SIG=y
CONFIG_MODULE_SIG_FORCE=y
CONFIG_MODULE_SIG_ALL=y
CONFIG_MODULE_SIG_SHA512=y
CONFIG_SYSTEM_TRUSTED_KEYS=""
CONFIG_SYSTEM_TRUSTED_KEYRING=y

View File

@@ -15,6 +15,12 @@ CONFIG_CRYPTO_HW=y
CONFIG_ZCRYPT=y
# Kernel API for protected key handling
CONFIG_PKEY=y
# s390 hardware accelerated implementation of the SHA-1 secure hash standard
CONFIG_CRYPTO_SHA1_S390=y
# s390 hardware accelerated implementation of the SHA256 secure hash standard
CONFIG_CRYPTO_SHA256_S390=y
# s390 hardware accelerated implementation of the SHA512 secure hash standard
CONFIG_CRYPTO_SHA512_S390=y
# s390 hardware accelerated implementation of the DES cipher algorithm (FIPS 46-2),
# and Triple DES EDE (FIPS 46-3).
CONFIG_CRYPTO_DES_S390=y
@@ -34,5 +40,6 @@ CONFIG_CRYPTO_SHA3_512_S390=y
# Support for ChaCha stream cipher algorithms
CONFIG_CRYPTO_CHACHA20=y
CONFIG_CRYPTO_CHACHA20POLY1305=y
CONFIG_CRYPTO_CHACHA_S390=y
# When PKEY is enabled and dm-crypt wants to use protected keys
CONFIG_CRYPTO_PAES_S390=y

View File

@@ -42,27 +42,3 @@ CONFIG_BLK_DEV_WRITE_MOUNTED
CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
CONFIG_MHP_DEFAULT_ONLINE_TYPE_ONLINE_AUTO
CONFIG_CRYPTO_CRC32_S390
# Removed in 6.18+ kernels
CONFIG_NF_CT_PROTO_DCCP
# Added in 6.18+ kernels (not present in older kernels)
CONFIG_CRYPTO_SELFTESTS
CONFIG_NETFILTER_XTABLES_LEGACY
CONFIG_IP_NF_IPTABLES_LEGACY
CONFIG_IP6_NF_IPTABLES_LEGACY
CONFIG_CRC_OPTIMIZATIONS
CONFIG_MTRR
CONFIG_X86_PAT
CONFIG_EARLY_PRINTK
CONFIG_PCSPKR_PLATFORM
CONFIG_X86_16BIT
CONFIG_X86_ESPFIX64
CONFIG_X86_UMIP
CONFIG_MODIFY_LDT_SYSCALL
CONFIG_X86_PM_TIMER
CONFIG_ISA_DMA_API
CONFIG_DMI
CONFIG_VGA_ARB
CONFIG_SERIAL_8250_PNP
CONFIG_ZONE_DMA
CONFIG_SECRETMEM

View File

@@ -1,20 +1,2 @@
# x86 cryptographic instructions to improve AES encryption and SHA256 hashing.
CONFIG_CRYPTO_AES_NI_INTEL=y
# x86-specific options disabled by CONFIG_EXPERT that need to be restored:
CONFIG_ZONE_DMA=y
CONFIG_DMI=y
CONFIG_MTRR=y
CONFIG_X86_PAT=y
CONFIG_EARLY_PRINTK=y
CONFIG_SGETMASK_SYSCALL=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_X86_16BIT=y
CONFIG_X86_ESPFIX64=y
CONFIG_X86_UMIP=y
CONFIG_MODIFY_LDT_SYSCALL=y
CONFIG_X86_PM_TIMER=y
CONFIG_ISA_DMA_API=y
CONFIG_VGA_ARB=y
CONFIG_FIRMWARE_MEMMAP=y
CONFIG_SERIAL_8250_PNP=y

View File

@@ -1 +1 @@
176
173

View File

@@ -1,170 +0,0 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Received: from foss.arm.com (foss.arm.com [217.140.110.172])
by smtp.subspace.kernel.org (Postfix) with ESMTP id BCE6D2FFDD5
for <nvdimm@lists.linux.dev>; Wed, 14 Jan 2026 17:49:59 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172
ARC-Seal:i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
t=1768413001; cv=none; b=mYidLRrJZn5ooS7h+lyKLsbA8/GKL/ZqDOCHo66hKab86eV5cgpwbWPeudpYGPiMp/QhczPxwDzq1J9qP57FU8xy5AmIFwF6cAn6FPN0BzBWxBQUeT/pxDwfOkvh33RigAd/HACiTa+9waLWNn94H1tPpUOn1SUKYcC2anb/EMA=
ARC-Message-Signature:i=1; a=rsa-sha256; d=subspace.kernel.org;
s=arc-20240116; t=1768413001; c=relaxed/simple;
bh=/AHMpb4+3MfzIhYBPESp8KGt8HTeUy14LUGKGDaY7Jc=;
h=Message-ID:Date:MIME-Version:To:Cc:From:Subject:Content-Type; b=nYCvtwNmfNPRI2kMt6FzMOpG8Xv2GrytpiVJh3K4jRBpJFo3NO0icdYoGz0yjvq1G4vQvh/VrhrLhOVAEHdNkuGQz1yL6qHm/9KniwafY98ihbvaadCAZtdiNtjhZ646/irNi48nnwxquGqUkiPk2n9PqYSVLR9VYf60nr/nAOA=
ARC-Authentication-Results:i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172
Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com
Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])
by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 4F3F21515;
Wed, 14 Jan 2026 09:49:52 -0800 (PST)
Received: from [10.1.37.132] (unknown [10.1.37.132])
by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 0CA253F59E;
Wed, 14 Jan 2026 09:49:57 -0800 (PST)
Message-ID: <18af3213-6c46-4611-ba75-da5be5a1c9b0@arm.com>
Date: Wed, 14 Jan 2026 17:49:30 +0000
Precedence: bulk
X-Mailing-List: nvdimm@lists.linux.dev
List-Id: <nvdimm.lists.linux.dev>
List-Subscribe: <mailto:nvdimm+subscribe@lists.linux.dev>
List-Unsubscribe: <mailto:nvdimm+unsubscribe@lists.linux.dev>
MIME-Version: 1.0
User-Agent: Mozilla Thunderbird
Content-Language: en-US
To: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, nvdimm@lists.linux.dev,
dan.j.williams@intel.com, willy@infradead.org, jack@suse.cz,
Nick.Connolly@arm.com, ffidencio@nvidia.com
From: Seunguk Shin <seunguk.shin@arm.com>
Subject: [PATCH] fs/dax: check zero or empty entry before converting xarray
Content-Type: text/plain; charset=UTF-8; format=flowed
Content-Transfer-Encoding: 8bit
Trying to convert zero or empty xarray entry causes kernel panic.
[ 0.737679] EXT4-fs (pmem0p1): mounted filesystem
79676804-7c8b-491a-b2a6-9bae3c72af70 ro with ordered data mode. Quota
mode: disabled.
[ 0.737891] VFS: Mounted root (ext4 filesystem) readonly on device 259:1.
[ 0.739119] devtmpfs: mounted
[ 0.739476] Freeing unused kernel memory: 1920K
[ 0.740156] Run /sbin/init as init process
[ 0.740229] with arguments:
[ 0.740286] /sbin/init
[ 0.740321] with environment:
[ 0.740369] HOME=/
[ 0.740400] TERM=linux
[ 0.743162] Unable to handle kernel paging request at virtual address
fffffdffbf000008
[ 0.743285] Mem abort info:
[ 0.743316] ESR = 0x0000000096000006
[ 0.743371] EC = 0x25: DABT (current EL), IL = 32 bits
[ 0.743444] SET = 0, FnV = 0
[ 0.743489] EA = 0, S1PTW = 0
[ 0.743545] FSC = 0x06: level 2 translation fault
[ 0.743610] Data abort info:
[ 0.743656] ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
[ 0.743720] CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[ 0.743785] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[ 0.743848] swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000b9d17000
[ 0.743931] [fffffdffbf000008] pgd=10000000bfa3d403,
p4d=10000000bfa3d403, pud=1000000040bfe403, pmd=0000000000000000
[ 0.744070] Internal error: Oops: 0000000096000006 [#1] SMP
[ 0.748888] CPU: 0 UID: 0 PID: 1 Comm: init Not tainted 6.18.4 #1 NONE
[ 0.749421] pstate: 004000c5 (nzcv daIF +PAN -UAO -TCO -DIT -SSBS
BTYPE=--)
[ 0.749969] pc : dax_disassociate_entry.constprop.0+0x20/0x50
[ 0.750444] lr : dax_insert_entry+0xcc/0x408
[ 0.750802] sp : ffff80008000b9e0
[ 0.751083] x29: ffff80008000b9e0 x28: 0000000000000000 x27:
0000000000000000
[ 0.751682] x26: 0000000001963d01 x25: ffff0000004f7d90 x24:
0000000000000000
[ 0.752264] x23: 0000000000000000 x22: ffff80008000bcc8 x21:
0000000000000011
[ 0.752836] x20: ffff80008000ba90 x19: 0000000001963d01 x18:
0000000000000000
[ 0.753407] x17: 0000000000000000 x16: 0000000000000000 x15:
0000000000000000
[ 0.753970] x14: ffffbf3154b9ae70 x13: 0000000000000000 x12:
ffffbf3154b9ae70
[ 0.754548] x11: ffffffffffffffff x10: 0000000000000000 x9 :
0000000000000000
[ 0.755122] x8 : 000000000000000d x7 : 000000000000001f x6 :
0000000000000000
[ 0.755707] x5 : 0000000000000000 x4 : 0000000000000000 x3 :
fffffdffc0000000
[ 0.756287] x2 : 0000000000000008 x1 : 0000000040000000 x0 :
fffffdffbf000000
[ 0.756871] Call trace:
[ 0.757107] dax_disassociate_entry.constprop.0+0x20/0x50 (P)
[ 0.757592] dax_iomap_pte_fault+0x4fc/0x808
[ 0.757951] dax_iomap_fault+0x28/0x30
[ 0.758258] ext4_dax_huge_fault+0x80/0x2dc
[ 0.758594] ext4_dax_fault+0x10/0x3c
[ 0.758892] __do_fault+0x38/0x12c
[ 0.759175] __handle_mm_fault+0x530/0xcf0
[ 0.759518] handle_mm_fault+0xe4/0x230
[ 0.759833] do_page_fault+0x17c/0x4dc
[ 0.760144] do_translation_fault+0x30/0x38
[ 0.760483] do_mem_abort+0x40/0x8c
[ 0.760771] el0_ia+0x4c/0x170
[ 0.761032] el0t_64_sync_handler+0xd8/0xdc
[ 0.761371] el0t_64_sync+0x168/0x16c
[ 0.761677] Code: f9453021 f2dfbfe3 cb813080 8b001860 (f9400401)
[ 0.762168] ---[ end trace 0000000000000000 ]---
[ 0.762550] note: init[1] exited with irqs disabled
[ 0.762631] Kernel panic - not syncing: Attempted to kill init!
exitcode=0x0000000b
This patch just reorders checking and converting.
Signed-off-by: Seunguk Shin <seunguk.shin@arm.com>
---
fs/dax.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 516f995a9..69ac2562c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -443,11 +443,12 @@ static void dax_associate_entry(void *entry, struct address_space *mapping,
unsigned long address, bool shared)
{
unsigned long size = dax_entry_size(entry), index;
- struct folio *folio = dax_to_folio(entry);
+ struct folio *folio;
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
return;
+ folio = dax_to_folio(entry);
index = linear_page_index(vma, address & ~(size - 1));
if (shared && (folio->mapping || dax_folio_is_shared(folio))) {
if (folio->mapping)
@@ -468,21 +469,23 @@ static void dax_associate_entry(void *entry, struct address_space *mapping,
static void dax_disassociate_entry(void *entry, struct address_space *mapping,
bool trunc)
{
- struct folio *folio = dax_to_folio(entry);
+ struct folio *folio;
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
return;
+ folio = dax_to_folio(entry);
dax_folio_put(folio);
}
static struct page *dax_busy_page(void *entry)
{
- struct folio *folio = dax_to_folio(entry);
+ struct folio *folio;
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
return NULL;
+ folio = dax_to_folio(entry);
if (folio_ref_count(folio) - folio_mapcount(folio))
return &folio->page;
else
--
2.43.0

View File

@@ -1,29 +0,0 @@
# Copyright (c) 2026 Kata Contributors
#
# SPDX-License-Identifier: Apache-2.0
# Alpine-based image with kubectl for multi-arch support
# Used for kata-deploy verification jobs and other kubectl operations
ARG ALPINE_VERSION=3.23
FROM alpine:${ALPINE_VERSION}
# Install bash, curl, and download kubectl
# hadolint ignore=DL3018
RUN apk add --no-cache bash curl ca-certificates && \
ARCH=$(uname -m) && \
case "${ARCH}" in \
x86_64) KUBECTL_ARCH=amd64 ;; \
aarch64) KUBECTL_ARCH=arm64 ;; \
ppc64le) KUBECTL_ARCH=ppc64le ;; \
s390x) KUBECTL_ARCH=s390x ;; \
*) echo "Unsupported architecture: ${ARCH}" && exit 1 ;; \
esac && \
KUBECTL_VERSION=$(curl -L -s https://dl.k8s.io/release/stable.txt) && \
curl -fL --progress-bar -o /usr/local/bin/kubectl \
"https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/${KUBECTL_ARCH}/kubectl" && \
chmod +x /usr/local/bin/kubectl && \
kubectl version --client
# Default to bash shell
CMD ["/bin/bash"]

View File

@@ -40,7 +40,6 @@ function _check_required_env_var()
KATA_STATIC_TARBALL) env_var="${KATA_STATIC_TARBALL}" ;;
KATA_DEPLOY_IMAGE_TAGS) env_var="${KATA_DEPLOY_IMAGE_TAGS}" ;;
KATA_DEPLOY_REGISTRIES) env_var="${KATA_DEPLOY_REGISTRIES}" ;;
KATA_TOOLS_STATIC_TARBALL) env_var="${KATA_TOOLS_STATIC_TARBALL}" ;;
*) >&2 _die "Invalid environment variable \"${1}\"" ;;
esac

View File

@@ -4,12 +4,9 @@
FROM ubuntu:22.04
ENV DEBIAN_FRONTEND=noninteractive
ENV INSTALL_IN_GOPATH=false
ARG ARCH
COPY install_yq.sh /usr/bin/install_yq.sh
# kernel deps
RUN apt-get update && \
apt-get install -y --no-install-recommends \
@@ -33,5 +30,3 @@ RUN apt-get update && \
python3 && \
if [ "${ARCH}" != "$(uname -m)" ]; then apt-get install --no-install-recommends -y gcc-"${ARCH}"-linux-gnu binutils-"${ARCH}"-linux-gnu; fi && \
apt-get clean && apt-get autoclean && rm -rf /var/lib/apt/lists/*
RUN install_yq.sh

View File

@@ -1,118 +0,0 @@
#!/usr/bin/env bash
#
# Copyright (c) 2019 IBM
#
# SPDX-License-Identifier: Apache-2.0
#
[[ -n "${DEBUG}" ]] && set -o xtrace
# If we fail for any reason a message will be displayed
die() {
msg="$*"
echo "ERROR: ${msg}" >&2
exit 1
}
function verify_yq_exists() {
local yq_path=$1
local yq_version=$2
local expected="yq (https://github.com/mikefarah/yq/) version ${yq_version}"
if [[ -x "${yq_path}" ]] && [[ "$(${yq_path} --version)"X == "${expected}"X ]]; then
return 0
else
return 1
fi
}
# Install the yq yaml query package from the mikefarah github repo
# Install via binary download, as we may not have golang installed at this point
function install_yq() {
local yq_pkg="github.com/mikefarah/yq"
local yq_version=v4.44.5
local precmd=""
local yq_path=""
INSTALL_IN_GOPATH=${INSTALL_IN_GOPATH:-true}
if [[ "${INSTALL_IN_GOPATH}" == "true" ]]; then
GOPATH=${GOPATH:-${HOME}/go}
mkdir -p "${GOPATH}/bin"
yq_path="${GOPATH}/bin/yq"
else
yq_path="/usr/local/bin/yq"
fi
if verify_yq_exists "${yq_path}" "${yq_version}"; then
echo "yq is already installed in correct version"
return
fi
if [[ "${yq_path}" == "/usr/local/bin/yq" ]]; then
# Check if we need sudo to install yq
if [[ ! -w "/usr/local/bin" ]]; then
# Check if we have sudo privileges
if ! sudo -n true 2>/dev/null; then
die "Please provide sudo privileges to install yq"
else
precmd="sudo"
fi
fi
fi
read -r -a sysInfo <<< "$(uname -sm)"
case "${sysInfo[0]}" in
"Linux" | "Darwin")
goos="${sysInfo[0],}"
;;
"*")
die "OS ${sysInfo[0]} not supported"
;;
esac
case "${sysInfo[1]}" in
"aarch64")
goarch=arm64
;;
"arm64")
# If we're on an apple silicon machine, just assign amd64.
# The version of yq we use doesn't have a darwin arm build,
# but Rosetta can come to the rescue here.
if [[ ${goos} == "Darwin" ]]; then
goarch=amd64
else
goarch=arm64
fi
;;
"riscv64")
goarch=riscv64
;;
"ppc64le")
goarch=ppc64le
;;
"x86_64")
goarch=amd64
;;
"s390x")
goarch=s390x
;;
"*")
die "Arch ${sysInfo[1]} not supported"
;;
esac
# Check curl
if ! command -v "curl" >/dev/null; then
die "Please install curl"
fi
## NOTE: ${var,,} => gives lowercase value of var
local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos}_${goarch}"
${precmd} curl -o "${yq_path}" -LSsf "${yq_url}" || die "Download ${yq_url} failed"
${precmd} chmod +x "${yq_path}"
if ! command -v "${yq_path}" >/dev/null; then
die "Cannot not get ${yq_path} executable"
fi
}
install_yq

View File

@@ -25,7 +25,7 @@ ENV PKG_CONFIG_PATH=${OPT_LIB}/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN mkdir ${RUSTUP_HOME} ${CARGO_HOME}
RUN mkdir ${RUSTUP_HOME} ${CARGO_HOME} && chmod -R a+rwX /opt
RUN apt-get update && \
apt-get --no-install-recommends -y install \
@@ -42,8 +42,7 @@ RUN apt-get update && \
perl \
protobuf-compiler && \
apt-get clean && rm -rf /var/lib/apt/lists/ && \
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_TOOLCHAIN} && \
chmod -R a+rwX /opt
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_TOOLCHAIN}
# Install ORAS CLI for tarball caching
ARG ORAS_VERSION=1.3.0

View File

@@ -12,7 +12,7 @@ ENV PATH="/opt/cargo/bin/:${PATH}"
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN mkdir ${RUSTUP_HOME} ${CARGO_HOME}
RUN mkdir ${RUSTUP_HOME} ${CARGO_HOME} && chmod -R a+rwX ${RUSTUP_HOME} ${CARGO_HOME}
RUN apt-get update && \
apt-get install -y --no-install-recommends \
@@ -24,8 +24,7 @@ RUN apt-get update && \
libseccomp-dev \
unzip && \
apt-get clean && rm -rf /var/lib/lists/ && \
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_TOOLCHAIN} && \
chmod -R a+rwX ${RUSTUP_HOME} ${CARGO_HOME}
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_TOOLCHAIN}
RUN ARCH=$(uname -m); \
rust_arch=""; \

View File

@@ -11,7 +11,7 @@ ENV PATH="/opt/cargo/bin/:${PATH}"
SHELL ["/bin/ash", "-o", "pipefail", "-c"]
RUN mkdir ${RUSTUP_HOME} ${CARGO_HOME}
RUN mkdir ${RUSTUP_HOME} ${CARGO_HOME} && chmod -R a+rwX ${RUSTUP_HOME} ${CARGO_HOME}
RUN apk --no-cache add \
bash \
@@ -21,8 +21,7 @@ RUN apk --no-cache add \
libcap-ng-static \
libseccomp-static \
musl-dev && \
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_TOOLCHAIN} && \
chmod -R a+rwX ${RUSTUP_HOME} ${CARGO_HOME}
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_TOOLCHAIN}
RUN ARCH=$(uname -m); \
rust_arch=""; \

View File

@@ -46,23 +46,20 @@ mapping:
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-containerd-stability (active, cloud-hypervisor)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-containerd-stability (active, dragonball)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-containerd-stability (active, qemu)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-containerd-stability (active, qemu-runtime-rs)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-containerd-stability (lts, clh)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-containerd-stability (lts, cloud-hypervisor)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-containerd-stability (lts, dragonball)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-containerd-stability (lts, qemu)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-containerd-stability (lts, qemu-runtime-rs)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-kata-agent-apis
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nerdctl-tests (cloud-hypervisor)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nerdctl-tests (dragonball)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nerdctl-tests (qemu)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nerdctl-tests (qemu-runtime-rs)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (active, clh)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (active, dragonball)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (active, qemu)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (lts, clh)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (lts, dragonball)
- Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (lts, qemu)
# - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (active, clh)
# - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (active, dragonball)
# - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (active, qemu)
# - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (lts, clh)
# - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (lts, dragonball)
# - Kata Containers CI / kata-containers-ci-on-push / run-basic-amd64-tests / run-nydus (lts, qemu)
- Kata Containers CI / kata-containers-ci-on-push / run-cri-containerd-amd64 (active, cloud-hypervisor) / run-cri-containerd-amd64 (active, cloud-hypervisor)
- Kata Containers CI / kata-containers-ci-on-push / run-cri-containerd-amd64 (active, clh) / run-cri-containerd-amd64 (active, clh)
- Kata Containers CI / kata-containers-ci-on-push / run-cri-containerd-amd64 (active, dragonball) / run-cri-containerd-amd64 (active, dragonball)

View File

@@ -88,8 +88,8 @@ assets:
qemu:
description: "VMM that uses KVM"
url: "https://github.com/qemu/qemu"
version: "v10.2.0"
tag: "v10.2.0"
version: "v10.1.1"
tag: "v10.1.1"
# Do not include any non-full release versions
# Break the line *without CR or space being appended*, to appease
# yamllint, and note the deliberate ' ' at the end of the expression.
@@ -107,12 +107,12 @@ assets:
qemu-snp-experimental:
description: "QEMU with GPU+SNP support"
url: "https://github.com/confidential-containers/qemu.git"
tag: "gpu-snp-20260107"
tag: "gpu-snp-20251217"
qemu-tdx-experimental:
description: "QEMU with GPU+TDX support"
url: "https://github.com/confidential-containers/qemu.git"
tag: "gpu-tdx-20260107"
tag: "gpu-tdx-20251217"
stratovirt:
description: "StratoVirt is an lightweight opensource VMM"
@@ -130,7 +130,7 @@ assets:
version: "noble" # 24.04 LTS
confidential:
name: "ubuntu"
version: "noble" # 24.04 LTS
version: "noble" # 24.04 LTS
nvidia-gpu:
name: "ubuntu"
version: "noble" # 24.04 LTS
@@ -207,19 +207,11 @@ assets:
kernel:
description: "Linux kernel optimised for virtual machines"
url: "https://cdn.kernel.org/pub/linux/kernel/v6.x/"
version: "v6.18.5"
version: "v6.12.47"
confidential:
description: "Linux kernel with x86_64 TEEs (SNP and TDX) support"
url: "https://cdn.kernel.org/pub/linux/kernel/v6.x/"
version: "v6.18.5"
nvidia:
description: "Linux kernel optimised for virtual machines"
url: "https://cdn.kernel.org/pub/linux/kernel/v6.x/"
version: "v6.18.5"
nvidia-confidential:
description: "Linux kernel with x86_64 TEEs (SNP and TDX) support"
url: "https://cdn.kernel.org/pub/linux/kernel/v6.x/"
version: "v6.18.5"
version: "v6.16.7"
kernel-arm-experimental:
description: "Linux kernel with cpu/mem hotplug support on arm64"
@@ -242,16 +234,9 @@ externals:
nvrc:
# yamllint disable-line rule:line-length
desc: "The NVRC project provides a Rust binary that implements a simple init system for microVMs"
version: "v0.1.1"
version: "v0.0.1"
url: "https://github.com/NVIDIA/nvrc/releases/download/"
nvidia:
desc: "NVIDIA driver version"
driver:
version: "590.48.01"
# yamllint disable-line rule:line-length
url: "https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/"
busybox:
desc: "The Swiss Army Knife of Embedded Linux"
version: "1.36.1"
@@ -270,18 +255,18 @@ externals:
coco-guest-components:
description: "Provides attested key unwrapping for image decryption"
url: "https://github.com/confidential-containers/guest-components/"
version: "026694d44d4ec483465d2fa5f80a0376166b174d"
version: "dea0fa37f35d05d10af1bf4e7a6ae6ccb2a7d982"
toolchain: "1.85.1"
coco-trustee:
description: "Provides attestation and secret delivery components"
url: "https://github.com/confidential-containers/trustee"
version: "f342a12115c67ea3af769fcd57e450f569ff7377"
version: "94c3958250837420aea266623fe24ac990c7e5bf"
# image / ita_image and image_tag / ita_image_tag must be in sync
image: "ghcr.io/confidential-containers/staged-images/kbs"
image_tag: "f342a12115c67ea3af769fcd57e450f569ff7377"
image_tag: "94c3958250837420aea266623fe24ac990c7e5bf"
ita_image: "ghcr.io/confidential-containers/staged-images/kbs-ita-as"
ita_image_tag: "f342a12115c67ea3af769fcd57e450f569ff7377-x86_64"
ita_image_tag: "94c3958250837420aea266623fe24ac990c7e5bf-x86_64"
toolchain: "1.85.1"
containerd:
@@ -464,12 +449,12 @@ languages:
description: "Rust language"
notes: "'version' is the default minimum version used by this project."
# Keep in sync with rust-toolchain.toml
version: "1.89"
version: "1.88"
meta:
description: |
'newest-version' is the latest version known to work when
building Kata
newest-version: "1.89"
newest-version: "1.88"
golangci-lint:
description: "golangci-lint"
@@ -501,4 +486,4 @@ docker_images:
description: "Proxy server for HTTP, HTTPS, SMTP, POP3 and IMAP protocols"
registry: "quay.io/kata-containers/nginx"
# yamllint disable-line rule:line-length
digest: "sha256:a905609e0f9adc2607f06da2f76893c6da07caa396c41f2806fee162064cfb4b" # 1.15-alpine
digest: "sha256:a905609e0f9adc2607f06da2f76893c6da07caa396c41f2806fee162064cfb4b" # 1.15-alpine

View File

@@ -1,318 +0,0 @@
[project]
# The site_name is shown in the page header and the browser window title
#
# Read more: https://zensical.org/docs/setup/basics/#site_name
site_name = "Kata Containers Docs"
# The site_description is included in the HTML head and should contain a
# meaningful description of the site content for use by search engines.
#
# Read more: https://zensical.org/docs/setup/basics/#site_description
site_description = "Developer and user documentation for the Kata Containers project."
# The site_author attribute. This is used in the HTML head element.
#
# Read more: https://zensical.org/docs/setup/basics/#site_author
site_author = "Kata Containers Community"
repo_url = "https://github.com/kata-containers/kata-containers"
# The site_url is the canonical URL for your site. When building online
# documentation you should set this.
# Read more: https://zensical.org/docs/setup/basics/#site_url
site_url = "https://kata-containers.github.io/kata-containers"
edit_uri = "edit/main/docs/"
# The copyright notice appears in the page footer and can contain an HTML
# fragment.
#
# Read more: https://zensical.org/docs/setup/basics/#copyright
copyright = """
Copyright &copy; 2026 Kata Containers
"""
# Zensical supports both implicit navigation and explicitly defined navigation.
# If you decide not to define a navigation here then Zensical will simply
# derive the navigation structure from the directory structure of your
# "docs_dir". The definition below demonstrates how a navigation structure
# can be defined using TOML syntax.
#
# Read more: https://zensical.org/docs/setup/navigation/
# nav = [
# { "Get started" = "index.md" },
# { "Markdown in 5min" = "markdown.md" },
# ]
# With the "extra_css" option you can add your own CSS styling to customize
# your Zensical project according to your needs. You can add any number of
# CSS files.
#
# The path provided should be relative to the "docs_dir".
#
# Read more: https://zensical.org/docs/customization/#additional-css
#
#extra_css = ["stylesheets/extra.css"]
# With the `extra_javascript` option you can add your own JavaScript to your
# project to customize the behavior according to your needs.
#
# The path provided should be relative to the "docs_dir".
#
# Read more: https://zensical.org/docs/customization/#additional-javascript
#extra_javascript = ["javascripts/extra.js"]
# ----------------------------------------------------------------------------
# Section for configuring theme options
# ----------------------------------------------------------------------------
[project.theme]
# change this to "classic" to use the traditional Material for MkDocs look.
#variant = "classic"
# Zensical allows you to override specific blocks, partials, or whole
# templates as well as to define your own templates. To do this, uncomment
# the custom_dir setting below and set it to a directory in which you
# keep your template overrides.
#
# Read more:
# - https://zensical.org/docs/customization/#extending-the-theme
#
#custom_dir = "overrides"
# With the "favicon" option you can set your own image to use as the icon
# browsers will use in the browser title bar or tab bar. The path provided
# must be relative to the "docs_dir".
#
# Read more:
# - https://zensical.org/docs/setup/logo-and-icons/#favicon
# - https://developer.mozilla.org/en-US/docs/Glossary/Favicon
#
favicon = "assets/favicon.svg"
logo = "assets/favicon.svg"
# Zensical supports more than 60 different languages. This means that the
# labels and tooltips that Zensical's templates produce are translated.
# The "language" option allows you to set the language used. This language
# is also indicated in the HTML head element to help with accessibility
# and guide search engines and translation tools.
#
# The default language is "en" (English). It is possible to create
# sites with multiple languages and configure a language selector. See
# the documentation for details.
#
# Read more:
# - https://zensical.org/docs/setup/language/
#
language = "en"
# Zensical provides a number of feature toggles that change the behavior
# of the documentation site.
features = [
# Zensical includes an announcement bar. This feature allows users to
# dismiss it then they have read the announcement.
# https://zensical.org/docs/setup/header/#announcement-bar
"announce.dismiss",
# If you have a repository configured and turn feature this on, Zensical
# will generate an edit button for the page. This works for common
# repository hosting services.
# https://zensical.org/docs/setup/repository/#code-actions
"content.action.edit",
# If you have a repository configured and turn feature this on, Zensical
# will generate a button that allows the user to view the Markdown
# code for the current page.
# https://zensical.org/docs/setup/repository/#code-actions
"content.action.view",
# Code annotations allow you to add an icon with a tooltip to your
# code blocks to provide explanations at crucial points.
# https://zensical.org/docs/authoring/code-blocks/#code-annotations
"content.code.annotate",
# This feature turns on a button in code blocks that allow users to
# copy the content to their clipboard without first selecting it.
# https://zensical.org/docs/authoring/code-blocks/#code-copy-button
"content.code.copy",
# Code blocks can include a button to allow for the selection of line
# ranges by the user.
# https://zensical.org/docs/authoring/code-blocks/#code-selection-button
"content.code.select",
# Zensical can render footnotes as inline tooltips, so the user can read
# the footnote without leaving the context of the document.
# https://zensical.org/docs/authoring/footnotes/#footnote-tooltips
"content.footnote.tooltips",
# If you have many content tabs that have the same titles (e.g., "Python",
# "JavaScript", "Cobol"), this feature causes all of them to switch to
# at the same time when the user chooses their language in one.
# https://zensical.org/docs/authoring/content-tabs/#linked-content-tabs
"content.tabs.link",
# TODO: not sure I understand this one? Is there a demo of this in the docs?
# https://zensical.org/docs/authoring/tooltips/#improved-tooltips
"content.tooltips",
# With this feature enabled, Zensical will automatically hide parts
# of the header when the user scrolls past a certain point.
# https://zensical.org/docs/setup/header/#automatic-hiding
# "header.autohide",
# Turn on this feature to expand all collapsible sections in the
# navigation sidebar by default.
# https://zensical.org/docs/setup/navigation/#navigation-expansion
# "navigation.expand",
# This feature turns on navigation elements in the footer that allow the
# user to navigate to a next or previous page.
# https://zensical.org/docs/setup/footer/#navigation
"navigation.footer",
# When section index pages are enabled, documents can be directly attached
# to sections, which is particularly useful for providing overview pages.
# https://zensical.org/docs/setup/navigation/#section-index-pages
"navigation.indexes",
# When instant navigation is enabled, clicks on all internal links will be
# intercepted and dispatched via XHR without fully reloading the page.
# https://zensical.org/docs/setup/navigation/#instant-navigation
"navigation.instant",
# With instant prefetching, your site will start to fetch a page once the
# user hovers over a link. This will reduce the perceived loading time
# for the user.
# https://zensical.org/docs/setup/navigation/#instant-prefetching
"navigation.instant.prefetch",
# In order to provide a better user experience on slow connections when
# using instant navigation, a progress indicator can be enabled.
# https://zensical.org/docs/setup/navigation/#progress-indicator
#"navigation.instant.progress",
# When navigation paths are activated, a breadcrumb navigation is rendered
# above the title of each page
# https://zensical.org/docs/setup/navigation/#navigation-path
"navigation.path",
# When pruning is enabled, only the visible navigation items are included
# in the rendered HTML, reducing the size of the built site by 33% or more.
# https://zensical.org/docs/setup/navigation/#navigation-pruning
#"navigation.prune",
# When sections are enabled, top-level sections are rendered as groups in
# the sidebar for viewports above 1220px, but remain as-is on mobile.
# https://zensical.org/docs/setup/navigation/#navigation-sections
"navigation.sections",
# When tabs are enabled, top-level sections are rendered in a menu layer
# below the header for viewports above 1220px, but remain as-is on mobile.
# https://zensical.org/docs/setup/navigation/#navigation-tabs
#"navigation.tabs",
# When sticky tabs are enabled, navigation tabs will lock below the header
# and always remain visible when scrolling down.
# https://zensical.org/docs/setup/navigation/#sticky-navigation-tabs
#"navigation.tabs.sticky",
# A back-to-top button can be shown when the user, after scrolling down,
# starts to scroll up again.
# https://zensical.org/docs/setup/navigation/#back-to-top-button
"navigation.top",
# When anchor tracking is enabled, the URL in the address bar is
# automatically updated with the active anchor as highlighted in the table
# of contents.
# https://zensical.org/docs/setup/navigation/#anchor-tracking
"navigation.tracking",
# When search highlighting is enabled and a user clicks on a search result,
# Zensical will highlight all occurrences after following the link.
# https://zensical.org/docs/setup/search/#search-highlighting
"search.highlight",
# When anchor following for the table of contents is enabled, the sidebar
# is automatically scrolled so that the active anchor is always visible.
# https://zensical.org/docs/setup/navigation/#anchor-following
"toc.follow",
# When navigation integration for the table of contents is enabled, it is
# always rendered as part of the navigation sidebar on the left.
# https://zensical.org/docs/setup/navigation/#navigation-integration
#"toc.integrate",
]
# ----------------------------------------------------------------------------
# In the "palette" subsection you can configure options for the color scheme.
# You can configure different color # schemes, e.g., to turn on dark mode,
# that the user can switch between. Each color scheme can be further
# customized.
#
# Read more:
# - https://zensical.org/docs/setup/colors/
# ----------------------------------------------------------------------------
[[project.theme.palette]]
scheme = "slate"
toggle.icon = "lucide/moon"
toggle.name = "Switch to light mode"
primary = "indigo"
accent = "orange"
[[project.theme.palette]]
scheme = "default"
toggle.icon = "lucide/sun"
toggle.name = "Switch to dark mode"
primary = "indigo"
accent = "orange"
# ----------------------------------------------------------------------------
# In the "font" subsection you can configure the fonts used. By default, fonts
# are loaded from Google Fonts, giving you a wide range of choices from a set
# of suitably licensed fonts. There are options for a normal text font and for
# a monospaced font used in code blocks.
# ----------------------------------------------------------------------------
#[project.theme.font]
#text = "Inter"
#code = "Jetbrains Mono"
# ----------------------------------------------------------------------------
# You can configure your own logo to be shown in the header using the "logo"
# option in the "icons" subsection. The logo can be a path to a file in your
# "docs_dir" or it can be a path to an icon.
#
# Likewise, you can customize the logo used for the repository section of the
# header. Zensical derives the default logo for this from the repository URL.
# See below...
#
# There are other icons you can customize. See the documentation for details.
#
# Read more:
# - https://zensical.org/docs/setup/logo-and-icons
# - https://zensical.org/docs/authoring/icons-emojis/#search
# ----------------------------------------------------------------------------
[project.theme.icon]
#logo = "./images/logo.png"
#repo = "lucide/smile"
# ----------------------------------------------------------------------------
# The "extra" section contains miscellaneous settings.
# ----------------------------------------------------------------------------
#[[project.extra.social]]
#icon = "fontawesome/brands/github"
#link = "https://github.com/user/repo"
[project.markdown_extensions.toc]
permalink = true
[project.markdown_extensions.admonition]
[project.markdown_extensions.pymdownx.highlight]
anchor_linenums = true
line_spans = "__span"
pygments_lang_class = true
[project.markdown_extensions.pymdownx.inlinehilite]
[project.markdown_extensions.pymdownx.snippets]
[project.markdown_extensions.pymdownx.superfences]