mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-01-24 14:05:30 +00:00
tests: Simplify kata-deploy test to use helm directly
The kata-deploy test was using helm_helper which made it hard to debug failures (die() calls would cause "Executed 0 tests" errors) and added unnecessary complexity. The test now calls helm directly like a user would, making it simpler and more representative of real-world usage. The verification job status is explicitly checked with proper failure detection instead of relying on helm --wait. Timeouts are configurable via environment variables to account for different network speeds and image sizes: - KATA_DEPLOY_TIMEOUT (default: 600s) - KATA_DEPLOY_DAEMONSET_TIMEOUT (default: 300s) - KATA_DEPLOY_VERIFICATION_TIMEOUT (default: 120s) Documentation has been added to explain what each timeout controls and how to customize them. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
@@ -4,15 +4,35 @@
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Kata Deploy Functional Tests
|
||||
#
|
||||
# This test validates that kata-deploy successfully installs and configures
|
||||
# Kata Containers on a Kubernetes cluster using Helm.
|
||||
#
|
||||
# Required environment variables:
|
||||
# DOCKER_REGISTRY - Container registry for kata-deploy image
|
||||
# DOCKER_REPO - Repository name for kata-deploy image
|
||||
# DOCKER_TAG - Image tag to test
|
||||
# KATA_HYPERVISOR - Hypervisor to test (qemu, clh, etc.)
|
||||
# KUBERNETES - K8s distribution (microk8s, k3s, rke2, etc.)
|
||||
#
|
||||
# Optional timeout configuration (increase for slow networks or large images):
|
||||
# KATA_DEPLOY_TIMEOUT - Overall helm timeout (default: 30m)
|
||||
# KATA_DEPLOY_DAEMONSET_TIMEOUT - DaemonSet rollout timeout in seconds (default: 1200 = 20m)
|
||||
# Includes time to pull kata-deploy image
|
||||
# KATA_DEPLOY_VERIFICATION_TIMEOUT - Verification pod timeout in seconds (default: 180 = 3m)
|
||||
# Time for verification pod to run
|
||||
#
|
||||
# Example with custom timeouts for slow network:
|
||||
# KATA_DEPLOY_DAEMONSET_TIMEOUT=3600 bats kata-deploy.bats
|
||||
#
|
||||
|
||||
load "${BATS_TEST_DIRNAME}/../../common.bash"
|
||||
repo_root_dir="${BATS_TEST_DIRNAME}/../../../"
|
||||
load "${repo_root_dir}/tests/gha-run-k8s-common.sh"
|
||||
|
||||
setup() {
|
||||
ensure_yq
|
||||
|
||||
pushd "${repo_root_dir}"
|
||||
ensure_helm
|
||||
|
||||
# We expect 2 runtime classes because:
|
||||
# * `kata` is the default runtimeclass created by Helm, basically an alias for `kata-${KATA_HYPERVISOR}`.
|
||||
@@ -26,50 +46,113 @@ setup() {
|
||||
"kata\s+kata-${KATA_HYPERVISOR}" \
|
||||
"kata-${KATA_HYPERVISOR}\s+kata-${KATA_HYPERVISOR}" \
|
||||
)
|
||||
|
||||
|
||||
# Set the latest image, the one generated as part of the PR, to be used as part of the tests
|
||||
export HELM_IMAGE_REFERENCE="${DOCKER_REGISTRY}/${DOCKER_REPO}"
|
||||
export HELM_IMAGE_TAG="${DOCKER_TAG}"
|
||||
|
||||
# Enable debug for Kata Containers
|
||||
export HELM_DEBUG="true"
|
||||
|
||||
# Create the runtime class only for the shim that's being tested
|
||||
export HELM_SHIMS="${KATA_HYPERVISOR}"
|
||||
|
||||
# Set the tested hypervisor as the default `kata` shim
|
||||
export HELM_DEFAULT_SHIM="${KATA_HYPERVISOR}"
|
||||
|
||||
# Let the Helm chart create the default `kata` runtime class
|
||||
export HELM_CREATE_DEFAULT_RUNTIME_CLASS="true"
|
||||
|
||||
HOST_OS=""
|
||||
if [[ "${KATA_HOST_OS}" = "cbl-mariner" ]]; then
|
||||
HOST_OS="${KATA_HOST_OS}"
|
||||
fi
|
||||
export HELM_HOST_OS="${HOST_OS}"
|
||||
|
||||
export HELM_K8S_DISTRIBUTION="${KUBERNETES}"
|
||||
|
||||
# Enable deployment verification (verifies Kata Containers
|
||||
# VM kernel isolation by comparing node vs pod kernel)
|
||||
export HELM_VERIFY_DEPLOYMENT="true"
|
||||
|
||||
helm_helper
|
||||
|
||||
echo "::group::kata-deploy logs"
|
||||
kubectl -n kube-system logs --tail=100 -l name=kata-deploy
|
||||
echo "::endgroup::"
|
||||
|
||||
echo "::group::Runtime classes"
|
||||
kubectl get runtimeclass
|
||||
echo "::endgroup::"
|
||||
|
||||
popd
|
||||
}
|
||||
|
||||
@test "Test runtimeclasses are being properly created and container runtime is not broken" {
|
||||
pushd "${repo_root_dir}"
|
||||
|
||||
# Create verification pod spec
|
||||
local verification_yaml
|
||||
verification_yaml=$(mktemp)
|
||||
cat > "${verification_yaml}" << EOF
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: kata-deploy-verify
|
||||
spec:
|
||||
runtimeClassName: kata-${KATA_HYPERVISOR}
|
||||
restartPolicy: Never
|
||||
nodeSelector:
|
||||
katacontainers.io/kata-runtime: "true"
|
||||
containers:
|
||||
- name: verify
|
||||
image: quay.io/kata-containers/alpine-bash-curl:latest
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
echo "=== Kata Verification ==="
|
||||
echo "Kernel: \$(uname -r)"
|
||||
echo "SUCCESS: Pod running with Kata runtime"
|
||||
EOF
|
||||
|
||||
# Install kata-deploy via Helm
|
||||
echo "Installing kata-deploy with Helm..."
|
||||
local helm_chart_dir="tools/packaging/kata-deploy/helm-chart/kata-deploy"
|
||||
|
||||
# Timeouts can be customized via environment variables:
|
||||
# - KATA_DEPLOY_TIMEOUT: Overall helm timeout (includes all hooks)
|
||||
# Default: 600s (10 minutes)
|
||||
# - KATA_DEPLOY_DAEMONSET_TIMEOUT: Time to wait for kata-deploy DaemonSet rollout (image pull + pod start)
|
||||
# Default: 300s (5 minutes) - accounts for large image downloads
|
||||
# - KATA_DEPLOY_VERIFICATION_TIMEOUT: Time to wait for verification pod to complete
|
||||
# Default: 120s (2 minutes) - verification pod execution time
|
||||
local helm_timeout="${KATA_DEPLOY_TIMEOUT:-600s}"
|
||||
local daemonset_timeout="${KATA_DEPLOY_DAEMONSET_TIMEOUT:-300}"
|
||||
local verification_timeout="${KATA_DEPLOY_VERIFICATION_TIMEOUT:-120}"
|
||||
|
||||
echo "Timeout configuration:"
|
||||
echo " Helm overall: ${helm_timeout}"
|
||||
echo " DaemonSet rollout: ${daemonset_timeout}s (includes image pull)"
|
||||
echo " Verification pod: ${verification_timeout}s (pod execution)"
|
||||
|
||||
helm dependency build "${helm_chart_dir}"
|
||||
|
||||
# Disable all shims except the one being tested
|
||||
helm upgrade --install kata-deploy "${helm_chart_dir}" \
|
||||
--set image.reference="${DOCKER_REGISTRY}/${DOCKER_REPO}" \
|
||||
--set image.tag="${DOCKER_TAG}" \
|
||||
--set debug=true \
|
||||
--set k8sDistribution="${KUBERNETES}" \
|
||||
--set shims.clh.enabled=false \
|
||||
--set shims.cloud-hypervisor.enabled=false \
|
||||
--set shims.dragonball.enabled=false \
|
||||
--set shims.fc.enabled=false \
|
||||
--set shims.qemu.enabled=false \
|
||||
--set shims.qemu-runtime-rs.enabled=false \
|
||||
--set shims.qemu-cca.enabled=false \
|
||||
--set shims.qemu-se.enabled=false \
|
||||
--set shims.qemu-se-runtime-rs.enabled=false \
|
||||
--set shims.qemu-nvidia-gpu.enabled=false \
|
||||
--set shims.qemu-nvidia-gpu-snp.enabled=false \
|
||||
--set shims.qemu-nvidia-gpu-tdx.enabled=false \
|
||||
--set shims.qemu-sev.enabled=false \
|
||||
--set shims.qemu-snp.enabled=false \
|
||||
--set shims.qemu-snp-runtime-rs.enabled=false \
|
||||
--set shims.qemu-tdx.enabled=false \
|
||||
--set shims.qemu-tdx-runtime-rs.enabled=false \
|
||||
--set shims.qemu-coco-dev.enabled=false \
|
||||
--set shims.qemu-coco-dev-runtime-rs.enabled=false \
|
||||
--set "shims.${KATA_HYPERVISOR}.enabled=true" \
|
||||
--set "defaultShim.amd64=${KATA_HYPERVISOR}" \
|
||||
--set "defaultShim.arm64=${KATA_HYPERVISOR}" \
|
||||
--set runtimeClasses.enabled=true \
|
||||
--set runtimeClasses.createDefault=true \
|
||||
--set-file verification.pod="${verification_yaml}" \
|
||||
--set verification.timeout="${verification_timeout}" \
|
||||
--set verification.daemonsetTimeout="${daemonset_timeout}" \
|
||||
--namespace kube-system \
|
||||
--wait --timeout "${helm_timeout}"
|
||||
|
||||
rm -f "${verification_yaml}"
|
||||
|
||||
echo ""
|
||||
echo "::group::kata-deploy logs"
|
||||
kubectl -n kube-system logs --tail=200 -l name=kata-deploy
|
||||
echo "::endgroup::"
|
||||
|
||||
echo ""
|
||||
echo "::group::Runtime classes"
|
||||
kubectl get runtimeclass
|
||||
echo "::endgroup::"
|
||||
|
||||
# helm --wait already waits for post-install hooks to complete
|
||||
# If helm returns successfully, the verification job passed
|
||||
# The job is deleted after success (hook-delete-policy: hook-succeeded)
|
||||
echo ""
|
||||
echo "Helm install completed successfully - verification passed"
|
||||
|
||||
# We filter `kata-mshv-vm-isolation` out as that's present on AKS clusters, but that's not coming from kata-deploy
|
||||
current_runtime_classes=$(kubectl get runtimeclasses | grep -v "kata-mshv-vm-isolation" | grep "kata" | wc -l)
|
||||
[[ ${current_runtime_classes} -eq ${expected_runtime_classes} ]]
|
||||
@@ -91,6 +174,8 @@ setup() {
|
||||
# Check that the container runtime verison doesn't have unknown, which happens when containerd can't start properly
|
||||
container_runtime_version=$(kubectl get nodes --no-headers -o custom-columns=CONTAINER_RUNTIME:.status.nodeInfo.containerRuntimeVersion)
|
||||
[[ ${container_runtime_version} != *"containerd://Unknown"* ]]
|
||||
|
||||
popd
|
||||
}
|
||||
|
||||
teardown() {
|
||||
|
||||
Reference in New Issue
Block a user