1
0
mirror of https://github.com/kata-containers/kata-containers.git synced 2025-05-07 07:57:43 +00:00
kata-containers/tests/functional/tracing/tracing-test.sh
stevenhorsman 58672068ff shellcheck: Fix shellcheck SC2145
> Argument mixes string and array. Use * or separate argument.

- Swap echos for printfs and improve formatting
- Replace $@ with $*
- Split arrays into separate arguments

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2025-03-04 09:35:46 +00:00

532 lines
14 KiB
Bash
Executable File

#!/bin/bash
# Copyright (c) 2019-2022 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
script_name=${0##*/}
source "/etc/os-release" || source "/usr/lib/os-release"
# Set to true if all tests pass
success="false"
DEBUG=${DEBUG:-}
# If set to any value, do not shut down the Jaeger service.
DEBUG_KEEP_JAEGER=${DEBUG_KEEP_JAEGER:-}
# If set to any value, do not shut down the trace forwarder.
DEBUG_KEEP_FORWARDER=${DEBUG_KEEP_FORWARDER:-}
[ -n "$DEBUG" ] && set -o xtrace
SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
source "${SCRIPT_PATH}/../../common.bash"
RUNTIME="io.containerd.kata.v2"
CONTAINER_IMAGE="quay.io/prometheus/busybox:latest"
TRACE_LOG_DIR=${TRACE_LOG_DIR:-${KATA_TESTS_LOGDIR}/traces}
KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}"
# files for output
formatted_traces_file="kata-traces-formatted.json"
trace_summary_file="span-summary.txt"
# tmux(1) session to run the trace forwarder in
KATA_TMUX_FORWARDER_SESSION="kata-trace-forwarder-session"
forwarder_binary="/opt/kata/bin/kata-trace-forwarder"
# path prefix for CLH socket path
socket_path_prefix="/run/vc/vm/"
container_id="tracing-test"
jaeger_server=${jaeger_server:-localhost}
jaeger_ui_port=${jaeger_ui_port:-16686}
jaeger_docker_container_name="jaeger"
# Span data for testing:
# 1. Existence of spans in jaeger output
# 2. That the relative ordering in the data occurs
# in the jaeger output.
# This is tested to make sure specific spans exist in the output and
# that the order of spans is preserved.
# Ordered in latest in sequence to earliest.
#
# Fields are all span existing span names in relative order from latest
# to earliest call in a sequence of calls. Example (pseudocode):
# func1() {
# span = trace("func1")
# func2()
# end span
# }
# func2() {
# span = trace("func2")
# func3()
# end span
# }
# func3() {
# span = trace("func3")
# end span
# }
# The following data should result in a passing test:
# 'func3:func2:func1'
# 'func3:func2'
# 'func3:func1'
# 'func2:func1'
span_ordering_data=(
'StartVM:createSandboxFromConfig:create:rootSpan'
'setup_shared_namespaces:StartVM:createSandboxFromConfig:create:rootSpan'
'start_container:Start:rootSpan'
'stopSandbox:Stop:Start:rootSpan'
)
# Cleanup will remove Jaeger container and
# disable tracing.
cleanup()
{
local fp="die"
local result="failed"
local dest="$logdir"
if [ "$success" = "true" ]; then
local fp="info"
result="passed"
[ -z "$DEBUG_KEEP_JAEGER" ] && stop_jaeger 2>/dev/null || true
[ -z "$DEBUG_KEEP_FORWARDER" ] && kill_trace_forwarder
# The tests worked so remove the logs
if [ -n "$DEBUG" ]; then
eval "$fp" "test $result - logs left in '$dest'"
else
"${SCRIPT_PATH}/configure_tracing_for_kata.sh" disable
[ -d "$logdir" ] && rm -rf "$logdir" || true
fi
return 0
fi
eval "$fp" "test $result - logs left in '$dest'"
}
# Run an operation to generate Jaeger trace spans
create_traces()
{
sudo ctr image pull "$CONTAINER_IMAGE"
sudo ctr run --runtime "$RUNTIME" --rm "$CONTAINER_IMAGE" "$container_id" true
}
start_jaeger()
{
local jaeger_docker_image="jaegertracing/all-in-one:latest"
sudo docker rm -f "${jaeger_docker_container_name}"
# Defaults - see https://www.jaegertracing.io/docs/getting-started/
sudo docker run -d --runtime runc --name "${jaeger_docker_container_name}" \
-e COLLECTOR_ZIPKIN_HTTP_PORT=9411 \
-p 5775:5775/udp \
-p 6831:6831/udp \
-p 6832:6832/udp \
-p 5778:5778 \
-p "${jaeger_ui_port}:${jaeger_ui_port}" \
-p 14268:14268 \
-p 9411:9411 \
"$jaeger_docker_image"
sudo mkdir -m 0750 -p "$TRACE_LOG_DIR"
}
stop_jaeger()
{
sudo docker stop "${jaeger_docker_container_name}"
sudo docker rm -f "${jaeger_docker_container_name}"
}
get_jaeger_traces()
{
local service="$1"
[ -z "$service" ] && die "need jaeger service name"
local traces_url="http://${jaeger_server}:${jaeger_ui_port}/api/traces?service=${service}"
curl -s "${traces_url}" 2>/dev/null
}
get_trace_summary()
{
local status="$1"
[ -z "$status" ] && die "need jaeger status JSON"
echo "${status}" | jq -S '.data[].spans[] | [.spanID, .operationName] | @sh'
}
get_span_count()
{
local status="$1"
[ -z "$status" ] && die "need jaeger status JSON"
# This could be simplified but creating a variable holding the
# summary is useful in debug mode as the summary is displayed.
local trace_summary=$(get_trace_summary "$status" || true)
[ -z "$trace_summary" ] && die "failed to get trace summary"
local count=$(echo "${trace_summary}" | wc -l)
[ -z "$count" ] && count=0
echo "$count"
}
# Returns status from Jaeger web UI
get_jaeger_status()
{
local service="$1"
local logdir="$2"
[ -z "$service" ] && die "need jaeger service name"
[ -z "$logdir" ] && die "need logdir"
local status=""
local span_count=0
# Find spans
status=$(get_jaeger_traces "$service" || true)
if [ -n "$status" ]; then
echo "$status" | tee "$logdir/${service}-status.json"
span_count=$(get_span_count "$status")
fi
[ -z "$status" ] && die "failed to query Jaeger for status"
[ "$span_count" -eq 0 ] && die "failed to find any trace spans"
[ "$span_count" -le 0 ] && die "invalid span count"
get_trace_summary "$status" > "$logdir/$trace_summary_file"
}
# Check Jaeger spans for the specified service.
check_jaeger_output()
{
local service="$1"
local min_spans="$2"
local logdir="$3"
[ -z "$service" ] && die "need jaeger service name"
[ -z "$min_spans" ] && die "need minimum trace span count"
[ -z "$logdir" ] && die "need logdir"
local status
local errors=0
info "Checking Jaeger status"
status=$(get_jaeger_status "$service" "$logdir")
#------------------------------
# Basic sanity checks
[ -z "$status" ] && die "failed to query status via HTTP"
local span_lines=$(echo "$status"|jq -S '.data[].spans | length')
[ -z "$span_lines" ] && die "no span status"
# Log the spans to allow for analysis in case the test fails
echo "$status"|jq -S . > "$logdir/${service}-traces-formatted.json"
local span_lines_count=$(echo "$span_lines"|wc -l)
# Total up all span counts
local spans=$(echo "$span_lines"|paste -sd+ -|bc)
[ -z "$spans" ] && die "no spans"
# Ensure total span count is numeric
echo "$spans"|grep -q "^[0-9][0-9]*$" || die "invalid span count: '$spans'"
info "found $spans spans (across $span_lines_count traces)"
# Validate
[ "$spans" -lt "$min_spans" ] && die "expected >= $min_spans spans, got $spans"
# Look for common errors in span data
local error_msg=$(echo "$status"|jq -S . 2>/dev/null|grep "invalid parent span" || true)
if [ -n "$error_msg" ]; then
errors=$((errors+1))
warn "Found invalid parent span errors: $error_msg"
else
errors=$((errors-1))
[ "$errors" -lt 0 ] && errors=0
fi
# Crude but it works
error_or_warning_msgs=$(echo "$status" |\
jq -S . 2>/dev/null |\
jq '.data[].spans[].warnings' |\
grep -E -v "\<null\>" |\
grep -E -v "\[" |\
grep -E -v "\]" |\
grep -E -v "clock skew" || true) # ignore clock skew error
if [ -n "$error_or_warning_msgs" ]; then
errors=$((errors+1))
warn "Found errors/warnings: $error_or_warning_msgs"
else
errors=$((errors-1))
[ "$errors" -lt 0 ] && errors=0
fi
[ "$errors" -eq 0 ] || die "errors detected"
}
# Check output for spans in span_ordering_data
check_spans()
{
local logdir="$1"
[ -z "$logdir" ] && die "need logdir"
local errors=0
# Check for existence of spans in output so we do not do the more
# time consuming test of checking span ordering if it will fail
info "Checking spans:" "${span_ordering_data[@]}"
local missing_spans=()
for span_ordering in "${span_ordering_data[@]}"; do
local test_spans=(`echo $span_ordering | tr ':' ' '`)
for s in "${test_spans[@]}"; do
grep -q \'$s\' "$logdir/$trace_summary_file" || missing_spans+=( "$s" )
done
done
if [ "${#missing_spans[@]}" -gt 0 ]; then
die "Fail: Missing spans:" "${missing_spans[@]}"
fi
# Check relative ordering of spans. We are not checking full trace, just
# that known calls are not out of order based on the test input.
for span_ordering in "${span_ordering_data[@]}"; do # runs maximum length of span_ordering_data
local test_spans=(`echo $span_ordering | tr ':' ' '`)
# create array for span IDs that match span string
local span_ids=()
for span in "${test_spans[@]}"; do
grep -q \'$span\' "$logdir/$trace_summary_file" || die "Fail: Missing span: $span"
id=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.operationName==\"$span\") | .spanID") || die "Fail: error with span $span retrieved from traces"
id_formatted=$(echo $id | tr -d '\"' | tr '\n' ':') # format to a string for parsing later, not an array
span_ids+=("$id_formatted")
done
# We now have 2 parallel arrays where test_spans[n] is the string name and
# span_ids[n] has all possible span IDs for that string separated by a colon
# Since functions can be called multiple times, we may have multiple results
# for span IDs.
initial_span_ids=(`echo ${span_ids[0]} | tr ':' ' '`)
for initial in "${initial_span_ids[@]}"; do # test parents for all initial spans
# construct array of all parents of first span
local retrieved_spans=()
local current_span="$initial"
[ "$current_span" != "" ] || break
MAX_DEPTH=20 # to prevent infinite loop due to unforeseen errors
for i in `seq 1 $MAX_DEPTH`; do
retrieved_spans+=("$current_span")
current_span=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.spanID==\"$current_span\") | .references[].spanID") || die "Fail: error with current_span $current_span retrieved from formatted traces"
[ "$current_span" != "" ] || break
current_span=$(echo $current_span | tr -d '"')
[ $i -lt $MAX_DEPTH ] || die "Fail: max depth reached, error in jq or adjust test depth"
done
# Keep track of this index so we can ensure we are testing the constructed array in order
# Increment when there is a match between test case and constructed path
local retrieved_ids_index=0
local matches=0
local index=0
# TODO: Optimize
for ((index=0; index<${#span_ids[@]}; index++)); do
for ((r_index=$retrieved_ids_index; r_index<${#retrieved_spans[@]}; r_index++)); do
grep -q "${retrieved_spans[$r_index]}" <<< ${span_ids[$index]} && (( retrieved_ids_index=$r_index+1 )) && (( matches+=1 )) && break
done
done
local last_initial_span_index=${#initial_span_ids[@]}-1
if [ $matches -eq ${#span_ids[@]} ]; then
info "Pass: spans \"" "${test_spans[@]}" "\" found in jaeger output"
break
elif [ $matches -lt ${#span_ids[@]} ] && [ "$initial" = "${initial_span_ids[$last_initial_span_index]}" ]; then
die "Fail: spans \"" "${test_spans[@]}" "\" NOT in jaeger output"
fi
# else repeat test for next initial span ID
done
done
}
run_trace_forwarder()
{
if [ $KATA_HYPERVISOR = "qemu" ]; then
tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "sudo $forwarder_binary -l trace"
elif [ $KATA_HYPERVISOR = "clh" ]; then
# CLH uses hybrid VSOCK which uses a local UNIX socket that we need to specify
socket_path_template=$socket_path_prefix$(sudo kata-runtime env --json | jq '.Hypervisor.SocketPath')
socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${container_id}/g" | tr -d '"')
sudo mkdir -p $(dirname "$socket_path")
tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "sudo $forwarder_binary -l trace --socket-path $socket_path"
else
die "Unsupported hypervisor $KATA_HYPERVISOR"
fi
info "Verifying trace forwarder in tmux session $KATA_TMUX_FORWARDER_SESSION"
local cmd="tmux capture-pane -pt $KATA_TMUX_FORWARDER_SESSION | tr -d '\n' | tr -d '\"' | grep -q \"source:kata-trace-forwarder\""
waitForProcess 10 1 "$cmd"
}
kill_trace_forwarder()
{
tmux kill-session -t "$KATA_TMUX_FORWARDER_SESSION"
}
setup()
{
# containerd must be running in order to use ctr to generate traces
restart_containerd_service
local cmds=()
# For container manager (containerd)
cmds+=('ctr')
# For jaeger
cmds+=('docker')
# For launching processes
cmds+=('tmux')
local cmd
for cmd in "${cmds[@]}"
do
local result
result=$(command -v "$cmd" || true)
[ -n "$result" ] || die "need $cmd"
done
run_trace_forwarder
start_jaeger
"${SCRIPT_PATH}/configure_tracing_for_kata.sh" enable
}
run_test()
{
local service="$1"
local min_spans="$2"
local logdir="$3"
[ -z "$service" ] && die "need service name"
[ -z "$min_spans" ] && die "need minimum span count"
[ -z "$logdir" ] && die "need logdir"
info "Running test for service '$service'"
logdir="$logdir/$service"
mkdir -p "$logdir"
check_jaeger_output "$service" "$min_spans" "$logdir"
check_spans "$logdir"
info "test passed"
}
run_tests()
{
# List of services to check
#
# Format: "name:min-spans"
#
# Where:
#
# - 'name' is the Jaeger service name.
# - 'min-spans' is an integer representing the minimum number of
# trace spans this service should generate.
#
# Notes:
#
# - Uses an array to ensure predictable ordering.
# - All services listed are expected to generate traces
# when create_traces() is called a single time.
local -a services
services+=("kata:125")
create_traces
logdir=$(mktemp -d)
for service in "${services[@]}"
do
local name=$(echo "${service}"|cut -d: -f1)
local min_spans=$(echo "${service}"|cut -d: -f2)
run_test "${name}" "${min_spans}" "${logdir}"
done
info "all tests passed"
success="true"
}
usage()
{
cat <<EOF
Usage: $script_name [<command>]
Commands:
clean - Perform cleanup phase only.
help - Show usage.
run - Only run tests (no setup or cleanup).
setup - Perform setup phase only.
Environment variables:
CI - if set, save logs of all tests to ${TRACE_LOG_DIR}.
DEBUG - if set, enable tracing and do not cleanup after tests.
DEBUG_KEEP_JAEGER - if set, do not shut down the Jaeger service.
DEBUG_KEEP_FORWARDER - if set, do not shut down the trace forwarder.
Notes:
- Runs all test phases if no arguments are specified.
EOF
}
main()
{
local cmd="${1:-}"
case "$cmd" in
clean) success="true"; cleanup; exit 0;;
help|-h|-help|--help) usage; exit 0;;
run) run_tests; exit 0;;
setup) setup; exit 0;;
esac
trap cleanup EXIT
setup
run_tests
}
main "$@"