Compare commits

...

4 Commits

Author SHA1 Message Date
Fabiano Fidêncio
3d8e3f205a tests: Remove stale qemu-coco-dev* skip from sandbox vCPU allocation test
The skip message said "Requires CPU hotplug which disabled by
static_sandbox_resource_mgmt", but static sandbox resource management
does not require hotplug — it sizes the VM upfront at creation time.

The Go runtime already handled this correctly, and the previous commit
fixed runtime-rs to do the same. Both runtimes now add the workload
vCPU and memory requirements to the base defaults when
static_sandbox_resource_mgmt is enabled, so the VM is created with the
right number of vCPUs without any hotplug.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Made-with: Cursor
2026-04-07 15:33:50 +02:00
Fabiano Fidêncio
f20cf68b76 runtime-rs: Fix initial vCPU and memory sizing with static sandbox resource management
InitialSizeManager::setup_config() is responsible for applying the
sandbox workload sizing (computed from containerd/CRI-O sandbox
annotations) to the hypervisor configuration before VM creation.

Previously, the workload vCPU count was only logged but never actually
added to default_vcpus, so the VM was always created with only the base
vCPUs from the configuration/annotations. This caused the
k8s-sandbox-vcpus-allocation test to fail with qemu-snp-runtime-rs:
a pod with default_vcpus=0.75 and a container CPU limit of 1.2 should
see ceil(0.75 + 1.2) = 2 vCPUs, but only got 1.

Additionally, the workload memory was being added to default_memory
unconditionally, diverging from the Go runtime which only applies both
CPU and memory additions when static_sandbox_resource_mgmt is enabled.
In the non-static path, adding workload resources here would cause
double-counting: once from setup_config() at sandbox creation, and
again from update_cpu_resources()/update_mem_resources() when
individual containers are added.

Guard both additions behind static_sandbox_resource_mgmt, matching the
Go runtime's behavior in src/runtime/pkg/oci/utils.go:

    if sandboxConfig.StaticResourceMgmt {
        sandboxConfig.HypervisorConfig.NumVCPUsF += sandboxConfig.SandboxResources.WorkloadCPUs
        sandboxConfig.HypervisorConfig.MemorySize += sandboxConfig.SandboxResources.WorkloadMemMB
    }

Fixes: k8s-sandbox-vcpus-allocation test failure on qemu-snp-runtime-rs

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
Made-with: Cursor
2026-04-07 15:31:14 +02:00
Fabiano Fidêncio
fed63cc5d1 tests: Support runtime-rs QEMU command line format in attestation test
The k8s-confidential-attestation test extracts the QEMU command line
from journal logs to compute the SNP launch measurement. It only
matched the Go runtime's log format ("launching <path> with: [<args>]"),
but runtime-rs logs differently ("qemu args: <args>").

Handle both formats so the test works with qemu-snp-runtime-rs.

Made-with: Cursor
2026-04-07 14:46:56 +02:00
Fabiano Fidêncio
734e75b7a4 ci: Run runtime-rs tests for SNP
As we're in the process to stabilise runtime-rs for the coming 4.0.0
release, we better start running as many tests as possible with that.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
2026-04-07 13:26:05 +02:00
10 changed files with 29 additions and 21 deletions

View File

@@ -53,6 +53,8 @@ jobs:
vmm: qemu-tdx
- runner: sev-snp
vmm: qemu-snp
- runner: sev-snp
vmm: qemu-snp-runtime-rs
runs-on: ${{ matrix.runner }}
env:
DOCKER_REGISTRY: ${{ inputs.registry }}

View File

@@ -142,15 +142,16 @@ impl InitialSizeManager {
if self.resource.vcpu > 0.0 {
info!(sl!(), "resource with vcpu {}", self.resource.vcpu);
if config.runtime.static_sandbox_resource_mgmt {
hv.cpu_info.default_vcpus += self.resource.vcpu;
}
}
self.resource.orig_toml_default_mem = hv.memory_info.default_memory;
if self.resource.mem_mb > 0 {
// since the memory overhead introduced by kata-agent and system components
// will really affect the amount of memory the user can use, so we choose to
// plus the default_memory here, instead of overriding it.
// (if we override the default_memory here, and user apllications still
// use memory as they orignally expected, it would be easy to OOM.)
hv.memory_info.default_memory += self.resource.mem_mb;
info!(sl!(), "resource with memory {}", self.resource.mem_mb);
if config.runtime.static_sandbox_resource_mgmt {
hv.memory_info.default_memory += self.resource.mem_mb;
}
}
Ok(())
}

View File

@@ -635,7 +635,7 @@ function helm_helper() {
base_values_file="${helm_chart_dir}/try-kata-nvidia-gpu.values.yaml"
fi
;;
qemu-snp|qemu-tdx|qemu-se|qemu-se-runtime-rs|qemu-cca|qemu-coco-dev|qemu-coco-dev-runtime-rs)
qemu-snp|qemu-snp-runtime-rs|qemu-tdx|qemu-se|qemu-se-runtime-rs|qemu-cca|qemu-coco-dev|qemu-coco-dev-runtime-rs)
# Use TEE example file
if [[ -f "${helm_chart_dir}/try-kata-tee.values.yaml" ]]; then
base_values_file="${helm_chart_dir}/try-kata-tee.values.yaml"

View File

@@ -11,7 +11,7 @@ source "${BATS_TEST_DIRNAME}/../../common.bash"
load "${BATS_TEST_DIRNAME}/confidential_kbs.sh"
SUPPORTED_GPU_TEE_HYPERVISORS=("qemu-nvidia-gpu-snp" "qemu-nvidia-gpu-tdx")
SUPPORTED_TEE_HYPERVISORS=("qemu-snp" "qemu-tdx" "qemu-se" "qemu-se-runtime-rs" "${SUPPORTED_GPU_TEE_HYPERVISORS[@]}")
SUPPORTED_TEE_HYPERVISORS=("qemu-snp" "qemu-snp-runtime-rs" "qemu-tdx" "qemu-se" "qemu-se-runtime-rs" "${SUPPORTED_GPU_TEE_HYPERVISORS[@]}")
SUPPORTED_NON_TEE_HYPERVISORS=("qemu-coco-dev" "qemu-coco-dev-runtime-rs")
function setup_unencrypted_confidential_pod() {
@@ -36,7 +36,7 @@ function get_remote_command_per_hypervisor() {
qemu-se*)
echo "cd /sys/firmware/uv; cat prot_virt_guest | grep 1"
;;
qemu-snp)
qemu-snp|qemu-snp-runtime-rs)
echo "dmesg | grep \"Memory Encryption Features active:.*SEV-SNP\""
;;
qemu-tdx)

View File

@@ -187,7 +187,7 @@ function deploy_kata() {
# Workaround to avoid modifying the workflow yaml files
case "${KATA_HYPERVISOR}" in
qemu-tdx|qemu-snp|qemu-nvidia-gpu-*)
qemu-tdx|qemu-snp|qemu-snp-runtime-rs|qemu-nvidia-gpu-*)
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER=true
SNAPSHOTTER="nydus"
EXPERIMENTAL_FORCE_GUEST_PULL=false
@@ -447,7 +447,7 @@ function cleanup() {
}
function deploy_snapshotter() {
if [[ "${KATA_HYPERVISOR}" == "qemu-tdx" || "${KATA_HYPERVISOR}" == "qemu-snp" ]]; then
if [[ "${KATA_HYPERVISOR}" == "qemu-tdx" || "${KATA_HYPERVISOR}" == "qemu-snp" || "${KATA_HYPERVISOR}" == "qemu-snp-runtime-rs" ]]; then
echo "[Skip] ${SNAPSHOTTER} is pre-installed in the TEE machine"
return
fi
@@ -461,7 +461,7 @@ function deploy_snapshotter() {
}
function cleanup_snapshotter() {
if [[ "${KATA_HYPERVISOR}" == "qemu-tdx" || "${KATA_HYPERVISOR}" == "qemu-snp" ]]; then
if [[ "${KATA_HYPERVISOR}" == "qemu-tdx" || "${KATA_HYPERVISOR}" == "qemu-snp" || "${KATA_HYPERVISOR}" == "qemu-snp-runtime-rs" ]]; then
echo "[Skip] ${SNAPSHOTTER} is pre-installed in the TEE machine"
return
fi

View File

@@ -146,8 +146,15 @@ setup() {
kbs_set_cpu0_resource_policy
# get measured artifacts from qemu command line of previous test
# Go runtime logs: "launching <path> with: [<args>]"
# runtime-rs logs: "qemu args: <args>"
log_line=$(sudo journalctl -r -x -t kata | grep -m 1 'launching.*qemu.*with:' || true)
qemu_cmd=$(echo "$log_line" | sed 's/.*with: \[\(.*\)\]".*/\1/')
if [[ -n "$log_line" ]]; then
qemu_cmd=$(echo "$log_line" | sed 's/.*with: \[\(.*\)\]".*/\1/')
else
log_line=$(sudo journalctl -r -x -t kata | grep -m 1 'qemu args:' || true)
qemu_cmd=$(echo "$log_line" | sed 's/.*qemu args: //')
fi
[[ -n "$qemu_cmd" ]] || { echo "Could not find QEMU command line"; return 1; }
kernel_path=$(echo "$qemu_cmd" | grep -oP -- '-kernel \K[^ ]+')

View File

@@ -15,7 +15,7 @@ setup() {
[ "${KATA_HYPERVISOR}" == "qemu-se-runtime-rs" ] && skip "Requires CPU hotplug which isn't supported on ${KATA_HYPERVISOR} yet"
[[ "${KATA_HYPERVISOR}" == qemu-coco-dev* ]] && skip "Requires CPU hotplug which disabled by static_sandbox_resource_mgmt"
( [ "${KATA_HYPERVISOR}" == "qemu-tdx" ] || [ "${KATA_HYPERVISOR}" == "qemu-snp" ] || \
[ "${KATA_HYPERVISOR}" == "qemu-se" ] ) \
[ "${KATA_HYPERVISOR}" == "qemu-snp-runtime-rs" ] || [ "${KATA_HYPERVISOR}" == "qemu-se" ] ) \
&& skip "TEEs do not support memory / CPU hotplug"
pod_name="constraints-cpu-test"
@@ -121,7 +121,7 @@ teardown() {
[ "${KATA_HYPERVISOR}" == "qemu-se-runtime-rs" ] && skip "Requires CPU hotplug which isn't supported on ${KATA_HYPERVISOR} yet"
[[ "${KATA_HYPERVISOR}" == qemu-coco-dev* ]] && skip "Requires CPU hotplug which disabled by static_sandbox_resource_mgmt"
( [ "${KATA_HYPERVISOR}" == "qemu-tdx" ] || [ "${KATA_HYPERVISOR}" == "qemu-snp" ] || \
[ "${KATA_HYPERVISOR}" == "qemu-se" ] ) \
[ "${KATA_HYPERVISOR}" == "qemu-snp-runtime-rs" ] || [ "${KATA_HYPERVISOR}" == "qemu-se" ] ) \
&& skip "TEEs do not support memory / CPU hotplug"
# Debugging information

View File

@@ -11,7 +11,6 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh"
setup() {
[ "$(uname -m)" == "aarch64" ] && skip "See: https://github.com/kata-containers/kata-containers/issues/10928"
[[ "${KATA_HYPERVISOR}" == qemu-coco-dev* ]] && skip "Requires CPU hotplug which disabled by static_sandbox_resource_mgmt"
[[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] && skip "See: https://github.com/kata-containers/kata-containers/issues/12492"
setup_common || die "setup_common failed"
@@ -52,7 +51,6 @@ setup() {
teardown() {
[ "$(uname -m)" == "aarch64" ] && skip "See: https://github.com/kata-containers/kata-containers/issues/10928"
[[ "${KATA_HYPERVISOR}" == qemu-coco-dev* ]] && skip "Requires CPU hotplug which disabled by static_sandbox_resource_mgmt"
[[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] && skip "See: https://github.com/kata-containers/kata-containers/issues/12492"
for pod in "${pods[@]}"; do

View File

@@ -138,7 +138,7 @@ add_runtime_handler_annotations() {
fi
case "${KATA_HYPERVISOR}" in
qemu-coco-dev | qemu-snp | qemu-tdx | qemu-coco-dev-runtime-rs)
qemu-coco-dev | qemu-snp | qemu-snp-runtime-rs | qemu-tdx | qemu-coco-dev-runtime-rs)
info "Add runtime handler annotations for ${KATA_HYPERVISOR}"
local handler_value="kata-${KATA_HYPERVISOR}"
for K8S_TEST_YAML in runtimeclass_workloads_work/*.yaml

View File

@@ -82,7 +82,7 @@ auto_generate_policy_enabled() {
is_coco_platform() {
case "${KATA_HYPERVISOR}" in
"qemu-tdx"|"qemu-snp"|"qemu-coco-dev"|"qemu-coco-dev-runtime-rs"|"qemu-nvidia-gpu-tdx"|"qemu-nvidia-gpu-snp")
"qemu-tdx"|"qemu-snp"|"qemu-snp-runtime-rs"|"qemu-coco-dev"|"qemu-coco-dev-runtime-rs"|"qemu-nvidia-gpu-tdx"|"qemu-nvidia-gpu-snp")
return 0
;;
*)
@@ -148,7 +148,7 @@ install_genpolicy_drop_ins() {
# 20-* OCI version overlay
if [[ "${KATA_HOST_OS:-}" == "cbl-mariner" ]]; then
cp "${examples_dir}/20-oci-1.2.0-drop-in.json" "${settings_d}/"
elif is_k3s_or_rke2 || is_nvidia_gpu_platform || [[ "${KATA_HYPERVISOR}" == "qemu-snp" ]] || [[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] || [[ -n "${CONTAINER_ENGINE_VERSION:-}" ]]; then
elif is_k3s_or_rke2 || is_nvidia_gpu_platform || [[ "${KATA_HYPERVISOR}" == "qemu-snp" ]] || [[ "${KATA_HYPERVISOR}" == "qemu-snp-runtime-rs" ]] || [[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] || [[ -n "${CONTAINER_ENGINE_VERSION:-}" ]]; then
cp "${examples_dir}/20-oci-1.3.0-drop-in.json" "${settings_d}/"
fi
@@ -340,7 +340,7 @@ hard_coded_policy_tests_enabled() {
# CI is testing hard-coded policies just on a the platforms listed here. Outside of CI,
# users can enable testing of the same policies (plus the auto-generated policies) by
# specifying AUTO_GENERATE_POLICY=yes.
local -r enabled_hypervisors=("qemu-coco-dev" "qemu-snp" "qemu-tdx" "qemu-coco-dev-runtime-rs")
local -r enabled_hypervisors=("qemu-coco-dev" "qemu-snp" "qemu-snp-runtime-rs" "qemu-tdx" "qemu-coco-dev-runtime-rs")
for enabled_hypervisor in "${enabled_hypervisors[@]}"
do
if [[ "${enabled_hypervisor}" == "${KATA_HYPERVISOR}" ]]; then