diff --git a/tests/functional/kata-deploy/kata-deploy.bats b/tests/functional/kata-deploy/kata-deploy.bats index 798ccda44f..634dd746eb 100644 --- a/tests/functional/kata-deploy/kata-deploy.bats +++ b/tests/functional/kata-deploy/kata-deploy.bats @@ -4,15 +4,35 @@ # # SPDX-License-Identifier: Apache-2.0 # +# Kata Deploy Functional Tests +# +# This test validates that kata-deploy successfully installs and configures +# Kata Containers on a Kubernetes cluster using Helm. +# +# Required environment variables: +# DOCKER_REGISTRY - Container registry for kata-deploy image +# DOCKER_REPO - Repository name for kata-deploy image +# DOCKER_TAG - Image tag to test +# KATA_HYPERVISOR - Hypervisor to test (qemu, clh, etc.) +# KUBERNETES - K8s distribution (microk8s, k3s, rke2, etc.) +# +# Optional timeout configuration (increase for slow networks or large images): +# KATA_DEPLOY_TIMEOUT - Overall helm timeout (default: 30m) +# KATA_DEPLOY_DAEMONSET_TIMEOUT - DaemonSet rollout timeout in seconds (default: 1200 = 20m) +# Includes time to pull kata-deploy image +# KATA_DEPLOY_VERIFICATION_TIMEOUT - Verification pod timeout in seconds (default: 180 = 3m) +# Time for verification pod to run +# +# Example with custom timeouts for slow network: +# KATA_DEPLOY_DAEMONSET_TIMEOUT=3600 bats kata-deploy.bats +# load "${BATS_TEST_DIRNAME}/../../common.bash" repo_root_dir="${BATS_TEST_DIRNAME}/../../../" load "${repo_root_dir}/tests/gha-run-k8s-common.sh" setup() { - ensure_yq - - pushd "${repo_root_dir}" + ensure_helm # We expect 2 runtime classes because: # * `kata` is the default runtimeclass created by Helm, basically an alias for `kata-${KATA_HYPERVISOR}`. @@ -26,50 +46,113 @@ setup() { "kata\s+kata-${KATA_HYPERVISOR}" \ "kata-${KATA_HYPERVISOR}\s+kata-${KATA_HYPERVISOR}" \ ) - - - # Set the latest image, the one generated as part of the PR, to be used as part of the tests - export HELM_IMAGE_REFERENCE="${DOCKER_REGISTRY}/${DOCKER_REPO}" - export HELM_IMAGE_TAG="${DOCKER_TAG}" - - # Enable debug for Kata Containers - export HELM_DEBUG="true" - - # Create the runtime class only for the shim that's being tested - export HELM_SHIMS="${KATA_HYPERVISOR}" - - # Set the tested hypervisor as the default `kata` shim - export HELM_DEFAULT_SHIM="${KATA_HYPERVISOR}" - - # Let the Helm chart create the default `kata` runtime class - export HELM_CREATE_DEFAULT_RUNTIME_CLASS="true" - - HOST_OS="" - if [[ "${KATA_HOST_OS}" = "cbl-mariner" ]]; then - HOST_OS="${KATA_HOST_OS}" - fi - export HELM_HOST_OS="${HOST_OS}" - - export HELM_K8S_DISTRIBUTION="${KUBERNETES}" - - # Enable deployment verification (verifies Kata Containers - # VM kernel isolation by comparing node vs pod kernel) - export HELM_VERIFY_DEPLOYMENT="true" - - helm_helper - - echo "::group::kata-deploy logs" - kubectl -n kube-system logs --tail=100 -l name=kata-deploy - echo "::endgroup::" - - echo "::group::Runtime classes" - kubectl get runtimeclass - echo "::endgroup::" - - popd } @test "Test runtimeclasses are being properly created and container runtime is not broken" { + pushd "${repo_root_dir}" + + # Create verification pod spec + local verification_yaml + verification_yaml=$(mktemp) + cat > "${verification_yaml}" << EOF +apiVersion: v1 +kind: Pod +metadata: + name: kata-deploy-verify +spec: + runtimeClassName: kata-${KATA_HYPERVISOR} + restartPolicy: Never + nodeSelector: + katacontainers.io/kata-runtime: "true" + containers: + - name: verify + image: quay.io/kata-containers/alpine-bash-curl:latest + imagePullPolicy: Always + command: + - sh + - -c + - | + echo "=== Kata Verification ===" + echo "Kernel: \$(uname -r)" + echo "SUCCESS: Pod running with Kata runtime" +EOF + + # Install kata-deploy via Helm + echo "Installing kata-deploy with Helm..." + local helm_chart_dir="tools/packaging/kata-deploy/helm-chart/kata-deploy" + + # Timeouts can be customized via environment variables: + # - KATA_DEPLOY_TIMEOUT: Overall helm timeout (includes all hooks) + # Default: 600s (10 minutes) + # - KATA_DEPLOY_DAEMONSET_TIMEOUT: Time to wait for kata-deploy DaemonSet rollout (image pull + pod start) + # Default: 300s (5 minutes) - accounts for large image downloads + # - KATA_DEPLOY_VERIFICATION_TIMEOUT: Time to wait for verification pod to complete + # Default: 120s (2 minutes) - verification pod execution time + local helm_timeout="${KATA_DEPLOY_TIMEOUT:-600s}" + local daemonset_timeout="${KATA_DEPLOY_DAEMONSET_TIMEOUT:-300}" + local verification_timeout="${KATA_DEPLOY_VERIFICATION_TIMEOUT:-120}" + + echo "Timeout configuration:" + echo " Helm overall: ${helm_timeout}" + echo " DaemonSet rollout: ${daemonset_timeout}s (includes image pull)" + echo " Verification pod: ${verification_timeout}s (pod execution)" + + helm dependency build "${helm_chart_dir}" + + # Disable all shims except the one being tested + helm upgrade --install kata-deploy "${helm_chart_dir}" \ + --set image.reference="${DOCKER_REGISTRY}/${DOCKER_REPO}" \ + --set image.tag="${DOCKER_TAG}" \ + --set debug=true \ + --set k8sDistribution="${KUBERNETES}" \ + --set shims.clh.enabled=false \ + --set shims.cloud-hypervisor.enabled=false \ + --set shims.dragonball.enabled=false \ + --set shims.fc.enabled=false \ + --set shims.qemu.enabled=false \ + --set shims.qemu-runtime-rs.enabled=false \ + --set shims.qemu-cca.enabled=false \ + --set shims.qemu-se.enabled=false \ + --set shims.qemu-se-runtime-rs.enabled=false \ + --set shims.qemu-nvidia-gpu.enabled=false \ + --set shims.qemu-nvidia-gpu-snp.enabled=false \ + --set shims.qemu-nvidia-gpu-tdx.enabled=false \ + --set shims.qemu-sev.enabled=false \ + --set shims.qemu-snp.enabled=false \ + --set shims.qemu-snp-runtime-rs.enabled=false \ + --set shims.qemu-tdx.enabled=false \ + --set shims.qemu-tdx-runtime-rs.enabled=false \ + --set shims.qemu-coco-dev.enabled=false \ + --set shims.qemu-coco-dev-runtime-rs.enabled=false \ + --set "shims.${KATA_HYPERVISOR}.enabled=true" \ + --set "defaultShim.amd64=${KATA_HYPERVISOR}" \ + --set "defaultShim.arm64=${KATA_HYPERVISOR}" \ + --set runtimeClasses.enabled=true \ + --set runtimeClasses.createDefault=true \ + --set-file verification.pod="${verification_yaml}" \ + --set verification.timeout="${verification_timeout}" \ + --set verification.daemonsetTimeout="${daemonset_timeout}" \ + --namespace kube-system \ + --wait --timeout "${helm_timeout}" + + rm -f "${verification_yaml}" + + echo "" + echo "::group::kata-deploy logs" + kubectl -n kube-system logs --tail=200 -l name=kata-deploy + echo "::endgroup::" + + echo "" + echo "::group::Runtime classes" + kubectl get runtimeclass + echo "::endgroup::" + + # helm --wait already waits for post-install hooks to complete + # If helm returns successfully, the verification job passed + # The job is deleted after success (hook-delete-policy: hook-succeeded) + echo "" + echo "Helm install completed successfully - verification passed" + # We filter `kata-mshv-vm-isolation` out as that's present on AKS clusters, but that's not coming from kata-deploy current_runtime_classes=$(kubectl get runtimeclasses | grep -v "kata-mshv-vm-isolation" | grep "kata" | wc -l) [[ ${current_runtime_classes} -eq ${expected_runtime_classes} ]] @@ -91,6 +174,8 @@ setup() { # Check that the container runtime verison doesn't have unknown, which happens when containerd can't start properly container_runtime_version=$(kubectl get nodes --no-headers -o custom-columns=CONTAINER_RUNTIME:.status.nodeInfo.containerRuntimeVersion) [[ ${container_runtime_version} != *"containerd://Unknown"* ]] + + popd } teardown() {