diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 35c6a40b84..a92f0fbbe8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -248,11 +248,3 @@ jobs: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} target-branch: ${{ inputs.target-branch }} - - run-tracing-tests: - needs: build-kata-static-tarball-amd64 - uses: ./.github/workflows/run-tracing-tests.yaml - with: - tarball-suffix: -${{ inputs.tag }} - commit-hash: ${{ inputs.commit-hash }} - target-branch: ${{ inputs.target-branch }} diff --git a/.github/workflows/run-tracing-tests.yaml b/.github/workflows/run-tracing-tests.yaml deleted file mode 100644 index ede15aca11..0000000000 --- a/.github/workflows/run-tracing-tests.yaml +++ /dev/null @@ -1,52 +0,0 @@ -name: CI | Run tracing tests -on: - workflow_call: - inputs: - tarball-suffix: - required: false - type: string - commit-hash: - required: false - type: string - target-branch: - required: false - type: string - default: "" - -jobs: - run-tracing: - strategy: - fail-fast: false - matrix: - vmm: - - clh # cloud-hypervisor - - qemu - runs-on: garm-ubuntu-2204-smaller - env: - KATA_HYPERVISOR: ${{ matrix.vmm }} - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ inputs.commit-hash }} - fetch-depth: 0 - - - name: Rebase atop of the latest target branch - run: | - ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" - env: - TARGET_BRANCH: ${{ inputs.target-branch }} - - - name: Install dependencies - run: bash tests/functional/tracing/gha-run.sh install-dependencies - - - name: get-kata-tarball - uses: actions/download-artifact@v3 - with: - name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} - path: kata-artifacts - - - name: Install kata - run: bash tests/functional/tracing/gha-run.sh install-kata kata-artifacts - - - name: Run tracing tests - run: bash tests/functional/tracing/gha-run.sh run diff --git a/tests/common.bash b/tests/common.bash index 34c59e0528..a111445749 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -547,24 +547,6 @@ EOF sudo systemctl enable --now crio } -function install_docker() { - # Add Docker's official GPG key - sudo apt-get update - sudo apt-get -y install ca-certificates curl gnupg - sudo install -m 0755 -d /etc/apt/keyrings - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg - sudo chmod a+r /etc/apt/keyrings/docker.gpg - - # Add the repository to Apt sources: - echo \ - "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ - "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - sudo apt-get update - - sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -} - # Convert architecture to the name used by golang function arch_to_golang() { local arch="$(uname -m)" diff --git a/tests/functional/tracing/configure_tracing_for_kata.sh b/tests/functional/tracing/configure_tracing_for_kata.sh deleted file mode 100755 index ed97571139..0000000000 --- a/tests/functional/tracing/configure_tracing_for_kata.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2019-2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail - -SCRIPT_PATH=$(dirname "$(readlink -f "$0")") -source "${SCRIPT_PATH}/../../common.bash" - -[ "$#" -eq 1 ] || die "Specify enable or disable" - -kata_cfg_file=$(kata-runtime kata-env --json |jq '.Runtime | .Config | .Path' |cut -d\" -f2) - -enable_tracing() { - info "Enabling kata tracing on $kata_cfg_file" - sudo crudini --set "$kata_cfg_file" agent.kata enable_tracing true - sudo crudini --set "$kata_cfg_file" runtime enable_tracing true -} - -disable_tracing() { - info "Disabling kata tracing on $kata_cfg_file" - sudo crudini --set "$kata_cfg_file" agent.kata enable_tracing false - sudo crudini --set "$kata_cfg_file" runtime enable_tracing false -} - -main() { - cmd="$1" - case "$cmd" in - enable ) enable_tracing ;; - disable ) disable_tracing ;; - *) die "invalid command: '$cmd'" ;; - esac -} - -main "$@" diff --git a/tests/functional/tracing/gha-run.sh b/tests/functional/tracing/gha-run.sh deleted file mode 100755 index 3369926629..0000000000 --- a/tests/functional/tracing/gha-run.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail - -kata_tarball_dir="${2:-kata-artifacts}" -tracing_dir="$(dirname "$(readlink -f "$0")")" -source "${tracing_dir}/../../common.bash" - -function install_dependencies() { - info "Installing the dependencies needed for running the tracing tests" - - # Dependency list of projects that we can rely on the system packages - # - crudini - # - jq - # - socat - # - tmux - declare -a system_deps=( - crudini - jq - socat - tmux - ) - - sudo apt-get update - sudo apt-get -y install "${system_deps[@]}" - - # Install docker according to the docker's website documentation - install_docker -} - -function run() { - info "Running tracing tests using ${KATA_HYPERVISOR} hypervisor" - - enabling_hypervisor - bash -c ${tracing_dir}/test-agent-shutdown.sh - bash -c ${tracing_dir}/tracing-test.sh -} - -function main() { - action="${1:-}" - case "${action}" in - install-dependencies) install_dependencies ;; - install-kata) install_kata ;; - run) run ;; - *) >&2 die "Invalid argument" ;; - esac -} - -main "$@" diff --git a/tests/functional/tracing/test-agent-shutdown.sh b/tests/functional/tracing/test-agent-shutdown.sh deleted file mode 100755 index 6bad962d8b..0000000000 --- a/tests/functional/tracing/test-agent-shutdown.sh +++ /dev/null @@ -1,1502 +0,0 @@ -#!/bin/bash -# Copyright (c) 2021 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -#--------------------------------------------------------------------- -# Description: Test the Kata Containers 2.x rust agent shutdown behaviour. -# -# Normally, the kata-agent process running inside the VM is not shut down; -# once the workload ends and the agent has returned the workload return -# value back to the runtime, the runtime simply kills the VM. This is safe -# since nothing the user cares about is running any more. -# -# However, for agent tracing, a graceful agent shutdown is necessary to ensure -# all trace spans are generated. When *static* agent tracing is enabled, the -# runtime relies entirely on the agent to perform a graceful shutdown _and_ -# shut down the VM. -# -# This script tests the kata-agent in two ways: -# -# - "manually" / "standalone" where the agent binary is run directly. -# - Inside a Kata VM, started by a shimv2-capable container manager -# (containerd). -# -# In both cases, the agent is shut down using the agent-ctl tool -# to request the agent shut down gracefully. -# -# Various configuration options are also tested. One of these enables -# the agents built-in (VSOCK) debug console. This test not only enables -# the option but also connects to the created console. -# -# Since this script needs to start various programs with a terminal, -# it uses tmux(1) consistently to simplify the handling logic. -#--------------------------------------------------------------------- - -readonly script_name=${0##*/} - -set -o errexit -set -o nounset -set -o pipefail -set -o errtrace - -SCRIPT_PATH=$(dirname "$(readlink -f "$0")") -source "${SCRIPT_PATH}/../../common.bash" -source "/etc/os-release" || source "/usr/lib/os-release" - -CTR_RUNTIME=${CTR_RUNTIME:-"io.containerd.kata.v2"} - -# Kata always uses this value -EXPECTED_VSOCK_PORT="1024" - -DOCKER_IMAGE=${DOCKER_IMAGE:-"busybox"} -CTR_IMAGE=${CTR_IMAGE:-"quay.io/prometheus/busybox:latest"} - -# Number of times the test should be run -KATA_AGENT_SHUTDOWN_TEST_COUNT=${KATA_AGENT_SHUTDOWN_TEST_COUNT:-1} - -# Default VSOCK port used by the agent -KATA_AGENT_VSOCK_CONSOLE_PORT=${KATA_AGENT_VSOCK_CONSOLE_PORT:-1026} - -# The shutdown test type that represents a "default" / vanilla Kata -# installation (where no debug options are enabled). -VANILLA_TEST_TYPE='default' - -# Name of tmux(1) sessions to create to run Kata VM and local agent in -KATA_TMUX_VM_SESSION="kata-shutdown-test-vm-session" -KATA_TMUX_LOCAL_SESSION="kata-shutdown-test-local-agent-session" - -# Name of tmux(1) session to create to run a debug console in -KATA_TMUX_CONSOLE_SESSION="kata-shutdown-test-console-session" - -# tmux(1) session to run the trace forwarder in -KATA_TMUX_FORWARDER_SESSION="kata-shutdown-test-trace-forwarder-session" - -KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" - -# List of test types used by configure_kata(). -# -# Each element contains four colon delimited fields: -# -# 1: Name. -# 2: Whether debug should be enabled for the agent+runtime. -# 3: Whether hypervisor debug should be enabled. -# (handled separately due to a previous bug which blocked agent shutdown). -# 4: Whether a VSOCK debug console should be configured and used. -# -# Notes: -# -# - Tests are run in the order found in this array. -# - An array is used (rather than a hash) to ensure the standard/vanilla -# configuration is run *last*. The reason for this being that debug is -# needed to diagnose shutdown errors, so there is no point in runnning -# the default scenario first, in case it fails (and it thus "undebuggable"). -shutdown_test_types=( - 'with-debug:true:false:false' - 'with-debug-console:false:false:true' - 'with-hypervisor-debug:true:true:false' - 'with-everything:true:true:true' - "${VANILLA_TEST_TYPE}:false:false:false" -) - -# Number of fields each entry in the 'shutdown_test_types' array should have. -shutdown_test_type_fields=4 - -# Pseudo test type name that represents all test types defined -# in the 'shutdown_test_types' array. -ALL_TEST_TYPES='all' - -DEFAULT_SHUTDOWN_TEST_TYPE="${ALL_TEST_TYPES}" - -# List of ways of running the agent: -# -# Each element contains two colon delimited fields: -# -# 1: Name used for a particular way of running the agent. -# 2: Description. -agent_test_types=( - 'local:Run agent using agent-ctl tool' - 'vm:Run agent inside a Kata Container' -) - -# Default value from the 'agent_test_types' array. -DEFAULT_AGENT_TEST_TYPE='vm' - -# Set by every call to run_single_agent() -test_start_time= -test_end_time= - -#------------------------------------------------------------------------------- -# Settings - -# values used to wait for local and VM processes to start and end. -wait_time_secs=${WAIT_TIME_SECS:-20} -sleep_time_secs=${SLEEP_TIME_SECS:-1} - -# Time to allow for the agent and VM to shutdown -shutdown_time_secs=${SHUTDOWN_TIME_SECS:-120} - -# Name for the container that will be created -container_id="${CONTAINER_ID:-kata-agent-shutdown-test}" - -# If 'true', don't run any commands, just show what would be run. -dry_run="${DRY_RUN:-false}" - -# If 'true', don't remove logs on a successful run. -keep_logs="${KEEP_LOGS:-false}" - -# Name of socket file used by a local agent. -agent_socket_file="kata-agent.socket" - -# Kata Agent socket URI. -# -# Notes: -# -# - The file is an abstract socket -# (meaning it is not visible in the filesystem). -# -# - The agent and the agent-ctl tool use slightly different -# address formats for abstract sockets. -local_agent_server_addr="unix://${agent_socket_file}" -local_agent_ctl_server_addr="unix://@${agent_socket_file}" - -# Address that is dynamically configured when using CLH before -# starting trace forwarder or container -clh_socket_path= -clh_socket_prefix="/run/vc/vm/" - -ctl_log_file="${PWD}/agent-ctl.log" - -# Log file that must contain agent output. -agent_log_file="${PWD}/kata-agent.log" - -# Set in setup() based on KATA_HYPERVISOR -# Supported hypervisors are qemu and clh -configured_hypervisor= -# String that would appear in config file (qemu or clh) -configured_hypervisor_cfg= - -# Full path to directory containing an OCI bundle based on "$DOCKER_IMAGE", -# which is required by the agent control tool. -bundle_dir=${BUNDLE_DIR:-""} - -#--------------------------------------- -# Default values - -default_arch=$(uname -m) -arch="${arch:-${default_arch}}" - -#------------------------------------------------------------------------------- - -agent_binary="/usr/bin/kata-agent" - -# Maximum debug level -default_agent_log_level="trace" - -agent_log_level=${agent_log_level:-${default_agent_log_level}} - -# Full path to the main configuration file (set by setup()). -kata_cfg_file= - -# Set in setup() based on KATA_HYPERVISOR -hypervisor_binary= - - -#------------------------------------------------------------------------------- - -[ -n "${DEBUG:-}" ] && set -o xtrace - -usage() -{ - cat < : Agent test type to use - (default: '$DEFAULT_AGENT_TEST_TYPE'). - -c : Run specified number of iterations - (default: $KATA_AGENT_SHUTDOWN_TEST_COUNT). - -d : Enable debug (shell trace) output. - -h : Show this help statement. - -k : Keep logs on successful run - (default: logs will be deleted on success). - -l : List all available agent and shutdown test types. - -n : Dry-run mode - show the commands that would be run. - -t : Only run the specified shutdown test type - (default: '$DEFAULT_SHUTDOWN_TEST_TYPE'). - -Notes: - -- These tests should be run *before* the Kata Agent tracing tests, since if - the agent cannot be shut down, static tracing will not work reliably. - -- By default all shutdown test types are run, but only the default agent test - type is run. - -EOF -} - -warn() -{ - echo >&2 "WARNING: $*" -} - -# Run the specified command, or if dry-run mode is enabled, -# just show the command that would be run. -run_cmd() -{ - local cmdline="$@" - - if [ "$dry_run" = 'true' ] - then - info "dry-run: Would run: '$cmdline'" - else - eval $cmdline - fi -} - -# Show a subset of processes (for debugging) -show_procs() -{ - info "Processes" - - local hypervisor - hypervisor="qemu" - [ ${configured_hypervisor} = "clh" ] && hypervisor="cloud-hypervisor" - - local patterns=() - - patterns+=("kata-agent-ctl") - patterns+=("${hypervisor}") - patterns+=("containerd") - patterns+=("ctr") - - local pattern_list - pattern_list=$(echo "${patterns[@]}"|tr ' ' '|') - - local regex - regex="(${pattern_list})" - - ps -efww | egrep -i "$regex" || true -} - -kill_tmux_sessions() -{ - local session - - for session in \ - "$KATA_TMUX_CONSOLE_SESSION" \ - "$KATA_TMUX_FORWARDER_SESSION" \ - "$KATA_TMUX_LOCAL_SESSION" \ - "$KATA_TMUX_VM_SESSION" - do - tmux kill-session -t "$session" &>/dev/null || true - done - - true -} - -get_shutdown_test_type_entry() -{ - local shutdown_test_type="${1:-}" - [ -z "$shutdown_test_type" ] && die "need shutdown test type name" - - local entry - - for entry in "${shutdown_test_types[@]}" - do - local count - count=$(echo "$entry"|tr ':' '\n'|wc -l) - [ "$count" -eq "$shutdown_test_type_fields" ] \ - || die "expected $shutdown_test_type_fields fields, found $count: '$entry'" - - local name - - name=$(echo "$entry"|cut -d: -f1) - - [ "$name" = "$shutdown_test_type" ] \ - && echo "$entry" \ - && break - done - - echo -} - -list_shutdown_test_types() -{ - local entry - local debug_value - local hypervisor_debug_value - local debug_console_value - - printf "# Shutdown test types:\n\n" - - printf "%-24s %-15s %-23s %s\n\n" \ - "Test type" \ - "Debug enabled" \ - "Hypervisor debug" \ - "Debug console used" - - for entry in "${shutdown_test_types[@]}" - do - local name - local debug_value - local hypervisor_debug_value - local debug_console_value - - name=$(echo "$entry"|cut -d: -f1) - debug_value=$(echo "$entry"|cut -d: -f2) - hypervisor_debug_value=$(echo "$entry"|cut -d: -f3) - debug_console_value=$(echo "$entry"|cut -d: -f4) - - printf "%-24s %-15s %-23s %s\n" \ - "$name" \ - "$debug_value" \ - "$hypervisor_debug_value" \ - "$debug_console_value" - done - - echo -} - -list_agent_test_types() -{ - local entry - - printf "# Agent test types:\n\n" - - printf "%-12s %s\n\n" \ - "Agent type" \ - "Description" - - for entry in "${agent_test_types[@]}" - do - local name - local descr - - name=$(echo "$entry"|cut -d: -f1) - descr=$(echo "$entry"|cut -d: -f2-) - - local msg="" - - [ "$name" = "$DEFAULT_AGENT_TEST_TYPE" ] && msg=" (default)" - - printf "%-12s %s%s.\n" \ - "$name" \ - "$descr" \ - "$msg" - done - - echo -} - -list_test_types() -{ - list_agent_test_types - list_shutdown_test_types -} - -# Set Kata options according to test type. -configure_kata() -{ - local shutdown_test_type="${1:-}" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - local entry - local debug_value - local hypervisor_debug_value - local debug_console_value - - local entry - entry=$(get_shutdown_test_type_entry "$shutdown_test_type" || true) - [ -z "$entry" ] && die "invalid test type: '$shutdown_test_type'" - - debug_value=$(echo "$entry"|cut -d: -f2) - hypervisor_debug_value=$(echo "$entry"|cut -d: -f3) - debug_console_value=$(echo "$entry"|cut -d: -f4) - - [ -z "$debug_value" ] && \ - die "need debug value for $shutdown_test_type" - - [ -z "$hypervisor_debug_value" ] && \ - die "need hypervisor debug value for $shutdown_test_type" - - [ -z "$debug_console_value" ] && \ - die "need debug console value for $shutdown_test_type" - - toggle_debug "$debug_value" "$hypervisor_debug_value" - toggle_vsock_debug_console "$debug_console_value" - - # Enable agent tracing - # - # Even though this program only tests agent shutdown, static tracing - # must be configured. This is because normally (with tracing - # disabled), the runtime kills the VM after the workload has exited. - # However, if static tracing is enabled, the runtime will not kill the - # VM - the responsibility for shutting down the VM is given to the - # agent process running inside the VM. - - if [ "$shutdown_test_type" = "$VANILLA_TEST_TYPE" ] - then - # We don't need to worry about the 'trace_mode' here since agent tracing - # is *only* enabled if the 'enable_tracing' variable is set. - run_cmd sudo crudini --set "${kata_cfg_file}" 'agent.kata' 'enable_tracing' 'false' - else - run_cmd sudo crudini --set "${kata_cfg_file}" 'agent.kata' 'enable_tracing' 'true' - fi -} - -unconfigure_kata() -{ - info "Resetting configuration to defaults" - - configure_kata "$VANILLA_TEST_TYPE" -} - -# Enable/disable the agent's built-in VSOCK debug console -toggle_vsock_debug_console() -{ - run_cmd sudo crudini --set "${kata_cfg_file}" \ - 'agent.kata' 'debug_console_enabled' "$1" -} - -# Enable/disable debug options. -# -# Note: Don't use 'kata-manager.sh "enable-debug"' since this -# enables all debug (including the problematic hypervisor -# debug - see below). -toggle_debug() -{ - local value="${1:-}" - local hypervisor_debug="${2:-}" - - [ -z "$value" ] && die "need value" - [ -z "$hypervisor_debug" ] && die "need hypervisor debug value" - - # list of confguration.toml sections that have debug options we care about - local debug_sections=() - - debug_sections+=('agent.kata') - debug_sections+=('runtime') - - local section - - for section in "${debug_sections[@]}" - do - run_cmd sudo crudini --set "$kata_cfg_file" "$section" \ - 'enable_debug' "$value" - done - - # XXX: Enabling hypervisor debug for QEMU will make a systemd debug - # console service inoperable (*), but we need to test it anyhow. - # - # (*) - If enabled, it stops "kata-debug.service" from attaching to - # the console and the socat call made on the client hangs until - # the VM is shut down! - local section - - section=$(printf "hypervisor.%s" "$configured_hypervisor_cfg") - - run_cmd sudo crudini --set "$kata_cfg_file" "$section" \ - 'enable_debug' "$hypervisor_debug_value" -} - -# Provide a "semi-valid" vsock address for when dry-run mode is active. -# The URI includes a message telling the user to change it and replace -# with the real VSOCK CID value. -get_dry_run_agent_vsock_address() -{ - echo "vsock://FIXME-CHANGE-TO-VSOCK-CID:${EXPECTED_VSOCK_PORT}" -} - -# Start a debug console shell using the agent's built-in debug console -# feature. -# -# Note: You should be able to use "kata-runtime exec $cid", but that isn't -# working currently. -connect_to_vsock_debug_console() -{ - local agent_addr - - if [ "$dry_run" = 'true' ] - then - agent_addr=$(get_dry_run_agent_vsock_address) - else - agent_addr=$(get_agent_vsock_address || true) - [ -z "$agent_addr" ] && die "cannot determine agent VSOCK address" - fi - - local socat_connect= - if [ $configured_hypervisor = "qemu" ]; then - socat_connect=$(echo "$agent_addr"|sed 's!^vsock://!vsock-connect:!') - elif [ $configured_hypervisor = "clh" ]; then - socat_connect="unix-connect:${clh_socket_path}" - else - die "Cannot configure address for socat, unknown hypervisor: '$configured_hypervisor'" - fi - - run_cmd \ - "tmux new-session \ - -d \ - -s \"$KATA_TMUX_CONSOLE_SESSION\" \ - \"socat \ - '${socat_connect}' \ - stdout\"" - -} - -cleanup() -{ - # Save the result of the last call made before - # this handler was called. - # - # XXX: This *MUST* be the first command in this function! - local failure_ret="$?" - - [ "$dry_run" = 'true' ] && return 0 - - if [ "$failure_ret" -eq 0 ] && [ "$keep_logs" = 'true' ] - then - info "SUCCESS: Test passed, but leaving logs:" - info "" - info "agent log file : ${agent_log_file}" - info "agent-ctl log file : ${ctl_log_file}" - info "OCI bundle directory : ${bundle_dir}" - - return 0 - fi - - local arg="${1:-}" - - if [ $failure_ret -ne 0 ] && [ "$arg" != 'initial' ]; then - warn "ERROR: Test failed" - warn "" - warn "Not cleaning up to help debug failure:" - warn "" - - if [ "${CI:-}" = "true" ] - then - show_procs - - info "VSOCK details" - ss -Hp --vsock || true - - info "agent-ctl log file" - sudo cat "${ctl_log_file}" || true - echo - - info "agent log file" - sudo cat "${agent_log_file}" || true - echo - - else - info "agent-ctl log file : ${ctl_log_file}" - info "agent log file : ${agent_log_file}" - fi - - info "OCI bundle directory : ${bundle_dir}" - - return 0 - fi - - kill_tmux_sessions - - unconfigure_kata - - [ "$arg" != 'initial' ] && [ -d "$bundle_dir" ] && rm -rf "$bundle_dir" - - sudo rm -f \ - "$agent_log_file" \ - "$ctl_log_file" - - clean_env_ctr &>/dev/null || true - - local sandbox_dir="/run/sandbox-ns/" - - # XXX: Without doing this, the agent will hang attempting to create the - # XXX: namespaces (in function "setup_shared_namespaces()") - sudo umount -f "${sandbox_dir}/uts" "${sandbox_dir}/ipc" &>/dev/null || true - sudo rm -rf "${sandbox_dir}" &>/dev/null || true - - # Check that clh socket was deleted - if [ $configured_hypervisor = "clh" ] && [ ! -z $clh_socket_path ]; then - [ -f $clh_socket_path ] && die "CLH socket path $clh_socket_path was not properly cleaned up" - fi - - sudo systemctl restart containerd -} - -setup_containerd() -{ - local file="/etc/containerd/config.toml" - - [ -e "$file" ] || die "missing containerd config file: '$file'" - - # Although the containerd config file is in TOML format, crudini(1) - # won't parse it due to the indentation it uses. - local containerd_debug_enabled - - containerd_debug_enabled=$(sed \ - -e '/./{H;$!d;}' \ - -e 'x;/\[debug\]/!d;' \ - "$file" |\ - grep "level *= *\"debug\"" || true) - - if [ -z "$containerd_debug_enabled" ] - then - cat <<-EOF | sudo tee -a "$file" - [debug] - # Allow Kata Containers debug messages to be propageted - # into the hosts journal. - # (use "journalctl -t kata" to view). - level = "debug" - EOF - - sudo systemctl restart containerd - fi - - sudo ctr image pull "$CTR_IMAGE" - - true -} - -create_oci_rootfs() -{ - local dir="${1:-}" - - [ -z "$dir" ] && die "Need OCI rootfs dir" - - sudo docker export $(sudo docker create "$DOCKER_IMAGE") |\ - tar -C "${dir}" -xvf - >/dev/null -} - -setup_oci_bundle() -{ - bundle_dir="$(mktemp -d)" - export bundle_dir - - info "Creating OCI bundle in directory: '$bundle_dir'" - - local config="${bundle_dir}/config.json" - local rootfs_dir="${bundle_dir}/rootfs/" - - mkdir -p "$rootfs_dir" - - create_oci_rootfs "$rootfs_dir" - - pushd "$bundle_dir" &>/dev/null - runc spec - popd &>/dev/null - - [ -e "$config" ] || die "no OCI config file at ${config}" -} - -setup() -{ - configured_hypervisor="${KATA_HYPERVISOR:-}" - - if [ "${KATA_HYPERVISOR:-}" = "qemu" ]; then - hypervisor_binary="qemu-system-${arch}" - configured_hypervisor_cfg="qemu" - elif [ "${KATA_HYPERVISOR:-}" = "clh" ]; then - hypervisor_binary="cloud-hypervisor" - configured_hypervisor_cfg="clh" - else - local msg="" - msg+="Exiting as hypervisor test dependency not met" - msg+=" (expected 'qemu' or 'cloud-hypervisor', found '$KATA_HYPERVISOR')" - die "$msg" - fi - info "Configured hypervisor is $configured_hypervisor" - - trap cleanup EXIT - - # Don't mess with an existing tmux session - unset TMUX - - [ "$dry_run" = 'false' ] && \ - [ -z "$bundle_dir" ] && \ - setup_oci_bundle || true - - local cmds=() - - # For parsing TOML config files - cmds+=('crudini') - - # For container manager (containerd) - cmds+=('ctr') - - # for OCI bundle creation - cmds+=('docker') - cmds+=('runc') - - # For querying VSOCK sockets - cmds+=('socat') - - # For launching processes - cmds+=('tmux') - - local cmd - - for cmd in "${cmds[@]}" - do - local result - result=$(command -v "$cmd" || true) - [ -n "$result" ] || die "need $cmd" - done - - kata_cfg_file=$(kata-runtime kata-env \ - --json |\ - jq '.Runtime | .Config | .Path' |\ - cut -d\" -f2 || true) - - [ -z "$kata_cfg_file" ] && die "Cannot determine config file" - - sudo mkdir -p $(dirname "$kata_cfg_file") - - #------------------------------ - # Check configured hypervisor - - local hypervisor_section - - hypervisor_section=$(printf "hypervisor.%s\n" "${configured_hypervisor_cfg}") - - local ret - - { crudini --get "${kata_cfg_file}" "${hypervisor_section}" &>/dev/null; ret=$?; } || true - - [ "$ret" -eq 0 ] || \ - die "Configured hypervisor ${configured_hypervisor} does not match config file ${kata_cfg_file}" - - setup_containerd -} - -start_local_agent() -{ - local log_file="${1:-}" - [ -z "$log_file" ] && die "need agent log file" - - local running - running=$(get_local_agent_pid || true) - - [ -n "$running" ] && die "agent already running: '$running'" - - # Note: it's imperative that we capture stderr to the log file - # as the agent writes the shutdown message to this stream! - run_cmd \ - "tmux new-session \ - -d \ - -s \"$KATA_TMUX_LOCAL_SESSION\" \ - \"sudo \ - RUST_BACKTRACE=full \ - KATA_AGENT_LOG_LEVEL=${agent_log_level} \ - KATA_AGENT_SERVER_ADDR=${local_agent_server_addr} \ - ${agent_binary} \ - &> ${log_file}\"" - - [ "$dry_run" = 'false' ] && wait_for_local_agent_to_start || true -} - -# Wait for the agent to finish starting -wait_for_kata_vm_agent_to_start() -{ - local cid="${1:-}" - [ -z "$log_file" ] && die "need container ID" - - # First, check the containerd status of the container - local cmd="sudo ctr task list | grep \"${cid}\" | grep -q \"RUNNING\"" - - info "Waiting for VM to start (cid: '$cid')" - - waitForProcess \ - "$wait_time_secs" \ - "$sleep_time_secs" \ - "$cmd" - - show_procs - - # Next, ensure there is a valid VSOCK address for the VM - info "Waiting for agent VSOCK server" - - cmd="get_agent_vsock_address_simple >/dev/null" - - waitForProcess \ - "$wait_time_secs" \ - "$sleep_time_secs" \ - "$cmd" - - info "Kata VM running" -} - -check_local_agent_alive() -{ - local cmds=() - - cmds+=("-c Check") - - run_agent_ctl \ - "${local_agent_ctl_server_addr}" \ - "${cmds[@]}" - - true -} - -wait_for_local_agent_to_start() -{ - local cmd="check_local_agent_alive" - - info "Waiting for agent process to start" - - waitForProcess \ - "$wait_time_secs" \ - "$sleep_time_secs" \ - "$cmd" - - info "Kata agent process running" -} - -# Create a Kata Container that blocks "forever" -start_agent_in_kata_vm() -{ - local log_file="${1:-}" - [ -z "$log_file" ] && die "need agent log file" - - local snapshotter="" - local ret - - # Allow containerd to run on a ZFS root filesystem - { zfs list &>/dev/null; ret=$?; } || true - [ "$ret" = 0 ] && snapshotter='zfs' - - # Ensure the container blocks forever - local cmd='tail -f /dev/null' - - run_cmd \ - "tmux new-session \ - -d \ - -s \"$KATA_TMUX_VM_SESSION\" \ - \"sudo ctr run \ - --snapshotter '$snapshotter' \ - --runtime '${CTR_RUNTIME}' \ - --rm \ - -t '${CTR_IMAGE}' \ - '$container_id' \ - $cmd\"" - - [ "$dry_run" = 'false' ] && \ - wait_for_kata_vm_agent_to_start "$container_id" || true -} - -start_agent() -{ - local agent_test_type="${1:-}" - [ -z "$agent_test_type" ] && die "need agent test type" - - local log_file="${2:-}" - [ -z "$log_file" ] && die "need agent log file" - - case "$agent_test_type" in - 'local') start_local_agent "$log_file" ;; - 'vm') start_agent_in_kata_vm "$log_file" ;; - *) die "invalid agent test type: '$agent_test_type'" ;; - esac - - true -} - -run_agent_ctl() -{ - local server_addr="${1:-}" - - shift - - local cmds="${*:-}" - - [ -n "$server_addr" ] || die "need agent ttRPC server address" - [ -n "$cmds" ] || die "need commands for agent control tool" - - local agent_ctl_path - agent_ctl_path="/opt/kata/bin/kata-agent-ctl" - - local redirect="&>\"${ctl_log_file}\"" - - if [ "$dry_run" = 'true' ] - then - redirect="" - bundle_dir="FIXME-set-to-OCI-bundle-directory" - fi - - local server_address= - if [ $configured_hypervisor = "qemu" ]; then - server_address="--server-address \"${server_addr}\"" - elif [ $configured_hypervisor = "clh" ]; then - server_address="--server-address \"${server_addr}\" --hybrid-vsock" - else - die "Cannot configure server address, unknown hypervisor: '$configured_hypervisor'" - fi - - run_cmd \ - sudo \ - RUST_BACKTRACE=full \ - "${agent_ctl_path}" \ - -l debug \ - connect \ - "${server_address}" \ - --bundle-dir "${bundle_dir}" \ - "${cmds}" \ - "${redirect}" -} - -# This function "cheats" a little - it gets the agent -# to do some work *and then* stops it. -stop_local_agent() -{ - local cmds=() - - cmds+=("-c Check") - cmds+=("-c GetGuestDetails") - cmds+=("-c 'sleep 1s'") - cmds+=("-c DestroySandbox") - - run_agent_ctl \ - "${local_agent_ctl_server_addr}" \ - "${cmds[@]}" -} - -get_addresses() -{ - local addresses= - - if [ $configured_hypervisor = "qemu" ]; then - addresses=$(ss -Hp --vsock |\ - egrep -v "\" |\ - awk '$2 ~ /^ESTAB$/ {print $6}' |\ - grep ":${EXPECTED_VSOCK_PORT}$") - elif [ $configured_hypervisor = "clh" ]; then - # since we preconfigured the socket, we are checking to see if it is reported - addresses=$(ss -Hp |\ - grep "${clh_socket_path}" |\ - awk '$2 ~ /^ESTAB$/ {print $5}') - else - die "Cannot retrieve address, unknown hypervisor: '$configured_hypervisor'" - fi - - echo ${addresses} -} - -# Doesn't fail. Instead it will return the empty string on error. -get_agent_vsock_address_simple() -{ - local addresses=$(get_addresses) - - [ -z "$addresses" ] && return 1 - - local expected_count=1 - - local count - count=$(echo "$addresses"|wc -l || true) - - [ "$count" -eq "$expected_count" ] || return 1 - - if [ $configured_hypervisor = "qemu" ]; then - local cid - local port - - cid=$(echo "$addresses"|cut -d: -f1) - port=$(echo "$addresses"|cut -d: -f2) - - echo "vsock://${cid}:${port}" - elif [ $configured_hypervisor = "clh" ]; then - address=$(echo "$addresses" | awk 'NR==1{print $1}') - echo "unix://${address}" - else - die "Cannot get agent vsock address, unknown hypervisor: '$configured_hypervisor'" - fi - - return 0 -} - -get_agent_vsock_address() -{ - local addresses=$(get_addresses) - - [ -z "$addresses" ] && die "no VSOCK connections found" - - local expected_count=1 - - local count - count=$(echo "$addresses"|wc -l || true) - - if [ $configured_hypervisor = "qemu" ]; then - # For QEMU we always expect 1 result. For Cloud Hypervisor, if a debug console is configured - # and running, we will have more than 1 result, so only run this check for QEMU - [ "$count" -eq "$expected_count" ] \ - || die "expected $expected_count VSOCK entry, found $count: '$addresses'" - - local cid - local port - - cid=$(echo "$addresses"|cut -d: -f1) - port=$(echo "$addresses"|cut -d: -f2) - - echo "vsock://${cid}:${port}" - elif [ $configured_hypervisor = "clh" ]; then - address=$(echo "$addresses" | awk 'NR==1{print $1}') - echo "unix://${address}" - else - die "Cannot get agent vsock address, unknown hypervisor: '$configured_hypervisor'" - fi -} - -stop_agent_in_kata_vm() -{ - local agent_addr - - if [ "$dry_run" = 'true' ] - then - agent_addr=$(get_dry_run_agent_vsock_address) - else - agent_addr=$(get_agent_vsock_address || true) - [ -z "$agent_addr" ] && \ - die "cannot determine agent VSOCK address for $hypervisor_binary" - fi - - # List of API commands to send to the agent. - local cmds=() - - # Run a couple of query commands first to ensure - # the agent is listening. - cmds+=("-c Check") - cmds+=("-c GetGuestDetails") - - # Creating a container implies creating a sandbox, so request - # agent/VM/container shutdown by asking the agent - # to destroy the sandbox. - cmds+=("-c DestroySandbox") - - run_agent_ctl \ - "${agent_addr}" \ - "${cmds[@]}" - - true -} - -stop_agent() -{ - info "Stopping agent" - - local agent_test_type="${1:-}" - [ -z "$agent_test_type" ] && die "need agent test type" - - local log_file="${2:-}" - [ -z "$log_file" ] && die "need agent-ctl log file" - - case "$agent_test_type" in - 'local') stop_local_agent ;; - 'vm') stop_agent_in_kata_vm ;; - *) die "invalid agent test type: '$agent_test_type'" ;; - esac - - true -} - -get_local_agent_pid() -{ - local pids - - local name - name=$(basename "$agent_binary") - - pids=$(pgrep "$name" || true) - [ -z "$pids" ] && return 0 - - local count - count=$(echo "$pids"|wc -l) - - [ "$count" -gt 1 ] && \ - die "too many agent processes running ($count, '$pids')" - - echo $pids -} - -# Function that writes all agent logs to '$agent_log_file'. -get_agent_log_file() -{ - local agent_test_type="${1:-}" - [ -z "$agent_test_type" ] && die "need agent test type" - - local log_file="${2:-}" - [ -z "$log_file" ] && die "need agent log file" - - info "Getting agent log details" - - case "$agent_test_type" in - # NOP: File should have been created by start_local_agent() - 'local') true ;; - - # Extract journal entries for the duration of the test - 'vm') - sudo journalctl \ - -q \ - -a \ - -o cat \ - -t 'kata' \ - --since="$test_start_time" \ - > "$log_file" - ;; - - *) die "invalid agent test type: '$agent_test_type'" ;; - esac - - [ -e "$log_file" ] || die "no log file: '$log_file'" - [ -s "$log_file" ] || die "empty log file: '$log_file'" - - true -} - -# Function to run to ensure correct behaviour -validate_agent() -{ - local agent_test_type="${1:-}" - local shutdown_test_type="${2:-}" - local log_file="${3:-}" - - [ -z "$agent_test_type" ] && die "need agent test type" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - [ -z "$log_file" ] && die "need agent log file" - - info "validating" - - get_agent_log_file \ - "$agent_test_type" \ - "$log_file" - - # Regular expression that describes possible agent failures - local regex="(slog::Fuse|Drain|Custom|serialization error|thread.*panicked|stack backtrace:)" - - egrep -q "$regex" "$log_file" && cat $log_file && die "Found agent error in log file: '$log_file'" - - local entry - entry=$(get_shutdown_test_type_entry "$shutdown_test_type" || true) - [ -z "$entry" ] && die "invalid test type: '$shutdown_test_type'" - - local hypervisor_debug=$(echo "$entry"|cut -d: -f3) - local vsock_console=$(echo "$entry"|cut -d: -f4) - - local agent_debug_logs_available='false' - - [ "$hypervisor_debug" = 'true' ] && \ - [ "$vsock_console" = 'false' ] && \ - agent_debug_logs_available='true' - - if [ "$agent_debug_logs_available" = 'true' ] || [ "$agent_test_type" = 'local' ] - then - # The message the agent writes to stderr just before it exits. - local done_msg="\" - - egrep -q "$done_msg" "$log_file" || (cat $log_file && die "missing agent shutdown message") - else - # We can only check for the shutdown message if the agent debug - # logs are available. - info "Not checking for agent shutdown message as hypervisor debug disabled" - fi -} - -setup_agent() -{ - local shutdown_test_type="${1:-}" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - kill_tmux_sessions - - configure_kata "$shutdown_test_type" - - true -} - -# Even though this test is not testing tracing, agent tracing needs to be -# enabled to stop the runtime from killing the VM. However, if tracing is -# enabled, the forwarder must be running. To remove the need for Jaeger to -# also be running, run the forwarder in "NOP" mode. -run_trace_forwarder() -{ - local forwarder_binary_path - forwarder_binary_path="/opt/kata/bin/kata-trace-forwarder" - - local socket_path_tf="" - - # If using CLH, socket path must be passed to trace forwarder - if [ $configured_hypervisor = "clh" ]; then - socket_path_tf="--socket-path ${clh_socket_path}" - fi - - run_cmd \ - "tmux new-session \ - -d \ - -s \"$KATA_TMUX_FORWARDER_SESSION\" \ - sudo \"$forwarder_binary_path --dump-only -l trace ${socket_path_tf}\"" -} - -check_agent_stopped() -{ - info "Checking agent stopped" - - local agent_test_type="${1:-}" - [ -z "$agent_test_type" ] && die "need agent test type" - - local cmd= - - case "$agent_test_type" in - 'local') cmd=check_local_agent_stopped ;; - 'vm') cmd=check_vm_stopped ;; - *) die "invalid agent test type: '$agent_test_type'" ;; - esac - - waitForProcess \ - "$shutdown_time_secs" \ - "$sleep_time_secs" \ - "$cmd" - - true -} - -check_local_agent_stopped() -{ - local ret=0 - - local i=0 - local max=20 - - agent_ended="false" - - local agent_pid - agent_pid=$(get_local_agent_pid || true) - - # Agent has finished - [ -z "$agent_pid" ] && return 0 - - for _ in $(seq "$max") - do - { sudo kill -0 "$agent_pid"; ret=$?; } || true - - [ "$ret" -ne 0 ] && agent_ended="true" && break - - sleep 0.2 - done - - [ "$agent_ended" = "false" ] && die "agent still running: pid $agent_pid" || true -} - -get_vm_pid() -{ - pgrep "$hypervisor_binary" -} - -check_vm_stopped() -{ - tmux list-sessions |\ - grep -q "^${KATA_TMUX_VM_SESSION}:" \ - && return 1 - - return 0 -} - -start_debug_console() -{ - local agent_test_type="${1:-}" - local shutdown_test_type="${2:-}" - - [ -z "$agent_test_type" ] && die "need agent test type" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - info "Starting debug console" - - case "$agent_test_type" in - 'vm') connect_to_vsock_debug_console ;; - # NOP for a local agent since we cannot connect to the agents - # VSOCK console socket from *outside* the host! - 'local') true ;; - *) die "invalid agent test type: '$agent_test_type'" ;; - esac - - true -} - -run_single_agent() -{ - local agent_test_type="${1:-}" - local shutdown_test_type="${2:-}" - - [ -z "$agent_test_type" ] && die "need agent test type" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - local msg - msg=$(printf \ - "Testing agent (agent test type: '%s', shutdown test type: '%s')" \ - "$agent_test_type" \ - "$shutdown_test_type") - info "$msg" - - setup_agent "$shutdown_test_type" - - if [ $configured_hypervisor = "clh" ]; then - # CLH uses hybrid VSOCK which uses a local UNIX socket that we need to specify - socket_path_template=$clh_socket_prefix$(sudo kata-runtime env --json | jq '.Hypervisor.SocketPath') - clh_socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${container_id}/g" | tr -d '"') - [ "$dry_run" = 'false' ] && sudo mkdir -p $(dirname "$clh_socket_path") - fi - - run_trace_forwarder "$shutdown_test_type" - - sleep 5s - - test_start_time=$(date '+%F %T') - - start_agent \ - "$agent_test_type" \ - "$agent_log_file" - - info "Testing agent: shutdown test type: '$shutdown_test_type', agent test type: $agent_test_type" - - local entry - entry=$(get_shutdown_test_type_entry "$shutdown_test_type" || true) - local debug_console=$(echo "$entry"|cut -d: -f4) - [ "$debug_console" = 'true' ] && \ - start_debug_console \ - "$agent_test_type" \ - "$shutdown_test_type" - - stop_agent \ - "$agent_test_type" \ - "$ctl_log_file" - - # We only need to show the set of commands once - [ "$dry_run" = 'true' ] && exit 0 - - test_end_time=$(date '+%F %T') - - check_agent_stopped "$agent_test_type" - - validate_agent \ - "$agent_test_type" \ - "$shutdown_test_type" \ - "$agent_log_file" -} - -run_agent() -{ - local agent_test_type="${1:-}" - local shutdown_test_type="${2:-}" - - [ -z "$agent_test_type" ] && die "need agent test type" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - case "$shutdown_test_type" in - "$ALL_TEST_TYPES") - local entry - - # Run all shutdown types - for entry in "${shutdown_test_types[@]}" - do - local name - name=$(echo "$entry"|cut -d: -f1) - - run_single_agent \ - "$agent_test_type" \ - "$name" - - # Clean up between iterations - sudo rm -f \ - "$ctl_log_file" \ - "$agent_log_file" - - local addresses=$(get_addresses || true) - - [ -z "$addresses" ] || \ - die "found unexpected vsock addresses: '$addresses'" - - done - ;; - - *) - run_single_agent \ - "$agent_test_type" \ - "$shutdown_test_type" - ;; - esac - -} - -test_agent_shutdown() -{ - local count="${1:-}" - local agent_test_type="${2:-}" - local shutdown_test_type="${3:-}" - - [ -z "$count" ] && die "need count" - [ -z "$agent_test_type" ] && die "need agent test type" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - # Start with a clean environment - [ "$dry_run" = 'false' ] && cleanup initial || true - - local i - - for i in $(seq "$count") - do - [ "$dry_run" = 'false' ] && \ - info "testing agent: run $i of $count" || true - run_agent \ - "$agent_test_type" \ - "$shutdown_test_type" - done - - info "testing agent: completed $count runs" -} - -handle_args() -{ - local opt - - local count="${KATA_AGENT_SHUTDOWN_TEST_COUNT}" - local shutdown_test_type="$DEFAULT_SHUTDOWN_TEST_TYPE" - local agent_test_type="$DEFAULT_AGENT_TEST_TYPE" - - while getopts "a:c:dhklnt:" opt "$@" - do - case "$opt" in - a) agent_test_type="$OPTARG" ;; - c) count="$OPTARG" ;; - d) set -o xtrace ;; - h) usage; exit 0 ;; - k) keep_logs='true' ;; - l) list_test_types; exit 0 ;; - n) dry_run='true' ;; - t) shutdown_test_type="$OPTARG" ;; - *) die "invalid option: '$opt'" ;; - esac - done - - setup - - test_agent_shutdown \ - "$count" \ - "$agent_test_type" \ - "$shutdown_test_type" -} - -main() -{ - handle_args "$@" -} - -main "$@" diff --git a/tests/functional/tracing/tracing-test.sh b/tests/functional/tracing/tracing-test.sh deleted file mode 100755 index e8256ba381..0000000000 --- a/tests/functional/tracing/tracing-test.sh +++ /dev/null @@ -1,540 +0,0 @@ -#!/bin/bash -# Copyright (c) 2019-2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail -set -o errtrace - -script_name=${0##*/} -source "/etc/os-release" || source "/usr/lib/os-release" - -# Set to true if all tests pass -success="false" - -DEBUG=${DEBUG:-} - -# If set to any value, do not shut down the Jaeger service. -DEBUG_KEEP_JAEGER=${DEBUG_KEEP_JAEGER:-} -# If set to any value, do not shut down the trace forwarder. -DEBUG_KEEP_FORWARDER=${DEBUG_KEEP_FORWARDER:-} - -[ -n "$DEBUG" ] && set -o xtrace - -SCRIPT_PATH=$(dirname "$(readlink -f "$0")") -source "${SCRIPT_PATH}/../../common.bash" - -RUNTIME="io.containerd.kata.v2" -CONTAINER_IMAGE="quay.io/prometheus/busybox:latest" - -TRACE_LOG_DIR=${TRACE_LOG_DIR:-${KATA_TESTS_LOGDIR}/traces} - -KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" - -# files for output -formatted_traces_file="kata-traces-formatted.json" -trace_summary_file="span-summary.txt" - -# tmux(1) session to run the trace forwarder in -KATA_TMUX_FORWARDER_SESSION="kata-trace-forwarder-session" - -forwarder_binary="/opt/kata/bin/kata-trace-forwarder" - -# path prefix for CLH socket path -socket_path_prefix="/run/vc/vm/" - -container_id="tracing-test" - -jaeger_server=${jaeger_server:-localhost} -jaeger_ui_port=${jaeger_ui_port:-16686} -jaeger_docker_container_name="jaeger" - -# Span data for testing: -# 1. Existence of spans in jaeger output -# 2. That the relative ordering in the data occurs -# in the jaeger output. -# This is tested to make sure specific spans exist in the output and -# that the order of spans is preserved. -# Ordered in latest in sequence to earliest. -# -# Fields are all span existing span names in relative order from latest -# to earliest call in a sequence of calls. Example (pseudocode): -# func1() { -# span = trace("func1") -# func2() -# end span -# } -# func2() { -# span = trace("func2") -# func3() -# end span -# } -# func3() { -# span = trace("func3") -# end span -# } -# The following data should result in a passing test: -# 'func3:func2:func1' -# 'func3:func2' -# 'func3:func1' -# 'func2:func1' -span_ordering_data=( - 'StartVM:createSandboxFromConfig:create:rootSpan' - 'setup_shared_namespaces:StartVM:createSandboxFromConfig:create:rootSpan' - 'start_container:Start:rootSpan' - 'stopSandbox:Stop:Start:rootSpan' -) - -# Cleanup will remove Jaeger container and -# disable tracing. -cleanup() -{ - local fp="die" - local result="failed" - local dest="$logdir" - - if [ "$success" = "true" ]; then - local fp="info" - result="passed" - - [ -z "$DEBUG_KEEP_JAEGER" ] && stop_jaeger 2>/dev/null || true - - [ -z "$DEBUG_KEEP_FORWARDER" ] && kill_trace_forwarder - - # The tests worked so remove the logs - if [ -n "$DEBUG" ]; then - eval "$fp" "test $result - logs left in '$dest'" - else - "${SCRIPT_PATH}/configure_tracing_for_kata.sh" disable - - [ -d "$logdir" ] && rm -rf "$logdir" || true - fi - - return 0 - fi - - if [ -n "${CI:-}" ]; then - # Running under the CI, so copy the logs to allow them - # to be added as test artifacts. - sudo mkdir -p "$TRACE_LOG_DIR" - sudo cp -a "$logdir"/* "$TRACE_LOG_DIR" - - dest="$TRACE_LOG_DIR" - fi - - eval "$fp" "test $result - logs left in '$dest'" -} - -# Run an operation to generate Jaeger trace spans -create_traces() -{ - sudo ctr image pull "$CONTAINER_IMAGE" - sudo ctr run --runtime "$RUNTIME" --rm "$CONTAINER_IMAGE" "$container_id" true -} - -start_jaeger() -{ - local jaeger_docker_image="jaegertracing/all-in-one:latest" - - sudo docker rm -f "${jaeger_docker_container_name}" - - # Defaults - see https://www.jaegertracing.io/docs/getting-started/ - sudo docker run -d --runtime runc --name "${jaeger_docker_container_name}" \ - -e COLLECTOR_ZIPKIN_HTTP_PORT=9411 \ - -p 5775:5775/udp \ - -p 6831:6831/udp \ - -p 6832:6832/udp \ - -p 5778:5778 \ - -p "${jaeger_ui_port}:${jaeger_ui_port}" \ - -p 14268:14268 \ - -p 9411:9411 \ - "$jaeger_docker_image" - - sudo mkdir -m 0750 -p "$TRACE_LOG_DIR" -} - -stop_jaeger() -{ - sudo docker stop "${jaeger_docker_container_name}" - sudo docker rm -f "${jaeger_docker_container_name}" -} - -get_jaeger_traces() -{ - local service="$1" - [ -z "$service" ] && die "need jaeger service name" - - local traces_url="http://${jaeger_server}:${jaeger_ui_port}/api/traces?service=${service}" - curl -s "${traces_url}" 2>/dev/null -} - -get_trace_summary() -{ - local status="$1" - [ -z "$status" ] && die "need jaeger status JSON" - - echo "${status}" | jq -S '.data[].spans[] | [.spanID, .operationName] | @sh' -} - -get_span_count() -{ - local status="$1" - [ -z "$status" ] && die "need jaeger status JSON" - - # This could be simplified but creating a variable holding the - # summary is useful in debug mode as the summary is displayed. - local trace_summary=$(get_trace_summary "$status" || true) - - [ -z "$trace_summary" ] && die "failed to get trace summary" - - local count=$(echo "${trace_summary}" | wc -l) - - [ -z "$count" ] && count=0 - - echo "$count" -} - -# Returns status from Jaeger web UI -get_jaeger_status() -{ - local service="$1" - local logdir="$2" - - [ -z "$service" ] && die "need jaeger service name" - [ -z "$logdir" ] && die "need logdir" - - local status="" - local span_count=0 - - # Find spans - status=$(get_jaeger_traces "$service" || true) - if [ -n "$status" ]; then - echo "$status" | tee "$logdir/${service}-status.json" - span_count=$(get_span_count "$status") - fi - - [ -z "$status" ] && die "failed to query Jaeger for status" - [ "$span_count" -eq 0 ] && die "failed to find any trace spans" - [ "$span_count" -le 0 ] && die "invalid span count" - - get_trace_summary "$status" > "$logdir/$trace_summary_file" -} - -# Check Jaeger spans for the specified service. -check_jaeger_output() -{ - local service="$1" - local min_spans="$2" - local logdir="$3" - - [ -z "$service" ] && die "need jaeger service name" - [ -z "$min_spans" ] && die "need minimum trace span count" - [ -z "$logdir" ] && die "need logdir" - - local status - local errors=0 - - info "Checking Jaeger status" - - status=$(get_jaeger_status "$service" "$logdir") - - #------------------------------ - # Basic sanity checks - [ -z "$status" ] && die "failed to query status via HTTP" - - local span_lines=$(echo "$status"|jq -S '.data[].spans | length') - [ -z "$span_lines" ] && die "no span status" - - # Log the spans to allow for analysis in case the test fails - echo "$status"|jq -S . > "$logdir/${service}-traces-formatted.json" - - local span_lines_count=$(echo "$span_lines"|wc -l) - - # Total up all span counts - local spans=$(echo "$span_lines"|paste -sd+ -|bc) - [ -z "$spans" ] && die "no spans" - - # Ensure total span count is numeric - echo "$spans"|grep -q "^[0-9][0-9]*$" || die "invalid span count: '$spans'" - - info "found $spans spans (across $span_lines_count traces)" - - # Validate - [ "$spans" -lt "$min_spans" ] && die "expected >= $min_spans spans, got $spans" - - # Look for common errors in span data - local error_msg=$(echo "$status"|jq -S . 2>/dev/null|grep "invalid parent span" || true) - - if [ -n "$error_msg" ]; then - errors=$((errors+1)) - warn "Found invalid parent span errors: $error_msg" - else - errors=$((errors-1)) - [ "$errors" -lt 0 ] && errors=0 - fi - - # Crude but it works - error_or_warning_msgs=$(echo "$status" |\ - jq -S . 2>/dev/null |\ - jq '.data[].spans[].warnings' |\ - grep -E -v "\" |\ - grep -E -v "\[" |\ - grep -E -v "\]" |\ - grep -E -v "clock skew" || true) # ignore clock skew error - - if [ -n "$error_or_warning_msgs" ]; then - errors=$((errors+1)) - warn "Found errors/warnings: $error_or_warning_msgs" - else - errors=$((errors-1)) - [ "$errors" -lt 0 ] && errors=0 - fi - - [ "$errors" -eq 0 ] || die "errors detected" -} - -# Check output for spans in span_ordering_data -check_spans() -{ - local logdir="$1" - [ -z "$logdir" ] && die "need logdir" - - local errors=0 - - # Check for existence of spans in output so we do not do the more - # time consuming test of checking span ordering if it will fail - info "Checking spans: ${span_ordering_data[@]}" - local missing_spans=() - for span_ordering in "${span_ordering_data[@]}"; do - local test_spans=(`echo $span_ordering | tr ':' ' '`) - for s in "${test_spans[@]}"; do - grep -q \'$s\' "$logdir/$trace_summary_file" || missing_spans+=( "$s" ) - done - done - if [ "${#missing_spans[@]}" -gt 0 ]; then - die "Fail: Missing spans: ${missing_spans[@]}" - fi - - # Check relative ordering of spans. We are not checking full trace, just - # that known calls are not out of order based on the test input. - for span_ordering in "${span_ordering_data[@]}"; do # runs maximum length of span_ordering_data - local test_spans=(`echo $span_ordering | tr ':' ' '`) - - # create array for span IDs that match span string - local span_ids=() - for span in "${test_spans[@]}"; do - grep -q \'$span\' "$logdir/$trace_summary_file" || die "Fail: Missing span: $span" - id=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.operationName==\"$span\") | .spanID") || die "Fail: error with span $span retrieved from traces" - id_formatted=$(echo $id | tr -d '\"' | tr '\n' ':') # format to a string for parsing later, not an array - span_ids+=("$id_formatted") - done - - # We now have 2 parallel arrays where test_spans[n] is the string name and - # span_ids[n] has all possible span IDs for that string separated by a colon - - # Since functions can be called multiple times, we may have multiple results - # for span IDs. - initial_span_ids=(`echo ${span_ids[0]} | tr ':' ' '`) - for initial in "${initial_span_ids[@]}"; do # test parents for all initial spans - # construct array of all parents of first span - local retrieved_spans=() - local current_span="$initial" - [ "$current_span" != "" ] || break - - MAX_DEPTH=20 # to prevent infinite loop due to unforeseen errors - for i in `seq 1 $MAX_DEPTH`; do - retrieved_spans+=("$current_span") - current_span=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.spanID==\"$current_span\") | .references[].spanID") || die "Fail: error with current_span $current_span retrieved from formatted traces" - [ "$current_span" != "" ] || break - current_span=$(echo $current_span | tr -d '"') - [ $i -lt $MAX_DEPTH ] || die "Fail: max depth reached, error in jq or adjust test depth" - done - - # Keep track of this index so we can ensure we are testing the constructed array in order - # Increment when there is a match between test case and constructed path - local retrieved_ids_index=0 - - local matches=0 - local index=0 - - # TODO: Optimize - for ((index=0; index<${#span_ids[@]}; index++)); do - for ((r_index=$retrieved_ids_index; r_index<${#retrieved_spans[@]}; r_index++)); do - grep -q "${retrieved_spans[$r_index]}" <<< ${span_ids[$index]} && (( retrieved_ids_index=$r_index+1 )) && (( matches+=1 )) && break - done - done - - local last_initial_span_index=${#initial_span_ids[@]}-1 - if [ $matches -eq ${#span_ids[@]} ]; then - info "Pass: spans \"${test_spans[@]}\" found in jaeger output" - break - elif [ $matches -lt ${#span_ids[@]} ] && [ "$initial" = "${initial_span_ids[$last_initial_span_index]}" ]; then - die "Fail: spans \"${test_spans[@]}\" NOT in jaeger output" - fi - # else repeat test for next initial span ID - done - done - - -} - -run_trace_forwarder() -{ - if [ $KATA_HYPERVISOR = "qemu" ]; then - tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "sudo $forwarder_binary -l trace" - elif [ $KATA_HYPERVISOR = "clh" ]; then - # CLH uses hybrid VSOCK which uses a local UNIX socket that we need to specify - socket_path_template=$socket_path_prefix$(sudo kata-runtime env --json | jq '.Hypervisor.SocketPath') - socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${container_id}/g" | tr -d '"') - sudo mkdir -p $(dirname "$socket_path") - - tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "sudo $forwarder_binary -l trace --socket-path $socket_path" - else - die "Unsupported hypervisor $KATA_HYPERVISOR" - fi - - info "Verifying trace forwarder in tmux session $KATA_TMUX_FORWARDER_SESSION" - - local cmd="tmux capture-pane -pt $KATA_TMUX_FORWARDER_SESSION | tr -d '\n' | tr -d '\"' | grep -q \"source:kata-trace-forwarder\"" - waitForProcess 10 1 "$cmd" -} - -kill_trace_forwarder() -{ - tmux kill-session -t "$KATA_TMUX_FORWARDER_SESSION" -} - -setup() -{ - # containerd must be running in order to use ctr to generate traces - restart_containerd_service - - local cmds=() - # For container manager (containerd) - cmds+=('ctr') - # For jaeger - cmds+=('docker') - # For launching processes - cmds+=('tmux') - - local cmd - for cmd in "${cmds[@]}" - do - local result - result=$(command -v "$cmd" || true) - [ -n "$result" ] || die "need $cmd" - done - - run_trace_forwarder - - start_jaeger - - "${SCRIPT_PATH}/configure_tracing_for_kata.sh" enable -} - -run_test() -{ - local service="$1" - local min_spans="$2" - local logdir="$3" - - [ -z "$service" ] && die "need service name" - [ -z "$min_spans" ] && die "need minimum span count" - [ -z "$logdir" ] && die "need logdir" - - info "Running test for service '$service'" - - logdir="$logdir/$service" - mkdir -p "$logdir" - - check_jaeger_output "$service" "$min_spans" "$logdir" - check_spans "$logdir" - - info "test passed" -} - -run_tests() -{ - # List of services to check - # - # Format: "name:min-spans" - # - # Where: - # - # - 'name' is the Jaeger service name. - # - 'min-spans' is an integer representing the minimum number of - # trace spans this service should generate. - # - # Notes: - # - # - Uses an array to ensure predictable ordering. - # - All services listed are expected to generate traces - # when create_traces() is called a single time. - local -a services - - services+=("kata:125") - - create_traces - - logdir=$(mktemp -d) - - for service in "${services[@]}" - do - local name=$(echo "${service}"|cut -d: -f1) - local min_spans=$(echo "${service}"|cut -d: -f2) - - run_test "${name}" "${min_spans}" "${logdir}" - done - - info "all tests passed" - success="true" -} - -usage() -{ - cat <] - -Commands: - - clean - Perform cleanup phase only. - help - Show usage. - run - Only run tests (no setup or cleanup). - setup - Perform setup phase only. - -Environment variables: - - CI - if set, save logs of all tests to ${TRACE_LOG_DIR}. - DEBUG - if set, enable tracing and do not cleanup after tests. - DEBUG_KEEP_JAEGER - if set, do not shut down the Jaeger service. - DEBUG_KEEP_FORWARDER - if set, do not shut down the trace forwarder. - -Notes: - - Runs all test phases if no arguments are specified. - -EOF -} - -main() -{ - local cmd="${1:-}" - - case "$cmd" in - clean) success="true"; cleanup; exit 0;; - help|-h|-help|--help) usage; exit 0;; - run) run_tests; exit 0;; - setup) setup; exit 0;; - esac - - trap cleanup EXIT - - setup - - run_tests -} - -main "$@" diff --git a/tests/integration/docker/gha-run.sh b/tests/integration/docker/gha-run.sh index 0a01006684..cab3401ea1 100755 --- a/tests/integration/docker/gha-run.sh +++ b/tests/integration/docker/gha-run.sh @@ -16,7 +16,21 @@ source "${docker_dir}/../../common.bash" function install_dependencies() { info "Installing the dependencies needed for running the docker smoke test" - install_docker + # Add Docker's official GPG key: + sudo apt-get update + sudo apt-get -y install ca-certificates curl gnupg + sudo install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + sudo chmod a+r /etc/apt/keyrings/docker.gpg + + # Add the repository to Apt sources: + echo \ + "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update + + sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin } function run() {