mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-05-07 07:57:43 +00:00
> Argument mixes string and array. Use * or separate argument. - Swap echos for printfs and improve formatting - Replace $@ with $* - Split arrays into separate arguments Signed-off-by: stevenhorsman <steven@uk.ibm.com>
532 lines
14 KiB
Bash
Executable File
532 lines
14 KiB
Bash
Executable File
#!/bin/bash
|
|
# Copyright (c) 2019-2022 Intel Corporation
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
set -o errexit
|
|
set -o nounset
|
|
set -o pipefail
|
|
set -o errtrace
|
|
|
|
script_name=${0##*/}
|
|
source "/etc/os-release" || source "/usr/lib/os-release"
|
|
|
|
# Set to true if all tests pass
|
|
success="false"
|
|
|
|
DEBUG=${DEBUG:-}
|
|
|
|
# If set to any value, do not shut down the Jaeger service.
|
|
DEBUG_KEEP_JAEGER=${DEBUG_KEEP_JAEGER:-}
|
|
# If set to any value, do not shut down the trace forwarder.
|
|
DEBUG_KEEP_FORWARDER=${DEBUG_KEEP_FORWARDER:-}
|
|
|
|
[ -n "$DEBUG" ] && set -o xtrace
|
|
|
|
SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
|
|
source "${SCRIPT_PATH}/../../common.bash"
|
|
|
|
RUNTIME="io.containerd.kata.v2"
|
|
CONTAINER_IMAGE="quay.io/prometheus/busybox:latest"
|
|
|
|
TRACE_LOG_DIR=${TRACE_LOG_DIR:-${KATA_TESTS_LOGDIR}/traces}
|
|
|
|
KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}"
|
|
|
|
# files for output
|
|
formatted_traces_file="kata-traces-formatted.json"
|
|
trace_summary_file="span-summary.txt"
|
|
|
|
# tmux(1) session to run the trace forwarder in
|
|
KATA_TMUX_FORWARDER_SESSION="kata-trace-forwarder-session"
|
|
|
|
forwarder_binary="/opt/kata/bin/kata-trace-forwarder"
|
|
|
|
# path prefix for CLH socket path
|
|
socket_path_prefix="/run/vc/vm/"
|
|
|
|
container_id="tracing-test"
|
|
|
|
jaeger_server=${jaeger_server:-localhost}
|
|
jaeger_ui_port=${jaeger_ui_port:-16686}
|
|
jaeger_docker_container_name="jaeger"
|
|
|
|
# Span data for testing:
|
|
# 1. Existence of spans in jaeger output
|
|
# 2. That the relative ordering in the data occurs
|
|
# in the jaeger output.
|
|
# This is tested to make sure specific spans exist in the output and
|
|
# that the order of spans is preserved.
|
|
# Ordered in latest in sequence to earliest.
|
|
#
|
|
# Fields are all span existing span names in relative order from latest
|
|
# to earliest call in a sequence of calls. Example (pseudocode):
|
|
# func1() {
|
|
# span = trace("func1")
|
|
# func2()
|
|
# end span
|
|
# }
|
|
# func2() {
|
|
# span = trace("func2")
|
|
# func3()
|
|
# end span
|
|
# }
|
|
# func3() {
|
|
# span = trace("func3")
|
|
# end span
|
|
# }
|
|
# The following data should result in a passing test:
|
|
# 'func3:func2:func1'
|
|
# 'func3:func2'
|
|
# 'func3:func1'
|
|
# 'func2:func1'
|
|
span_ordering_data=(
|
|
'StartVM:createSandboxFromConfig:create:rootSpan'
|
|
'setup_shared_namespaces:StartVM:createSandboxFromConfig:create:rootSpan'
|
|
'start_container:Start:rootSpan'
|
|
'stopSandbox:Stop:Start:rootSpan'
|
|
)
|
|
|
|
# Cleanup will remove Jaeger container and
|
|
# disable tracing.
|
|
cleanup()
|
|
{
|
|
local fp="die"
|
|
local result="failed"
|
|
local dest="$logdir"
|
|
|
|
if [ "$success" = "true" ]; then
|
|
local fp="info"
|
|
result="passed"
|
|
|
|
[ -z "$DEBUG_KEEP_JAEGER" ] && stop_jaeger 2>/dev/null || true
|
|
|
|
[ -z "$DEBUG_KEEP_FORWARDER" ] && kill_trace_forwarder
|
|
|
|
# The tests worked so remove the logs
|
|
if [ -n "$DEBUG" ]; then
|
|
eval "$fp" "test $result - logs left in '$dest'"
|
|
else
|
|
"${SCRIPT_PATH}/configure_tracing_for_kata.sh" disable
|
|
|
|
[ -d "$logdir" ] && rm -rf "$logdir" || true
|
|
fi
|
|
|
|
return 0
|
|
fi
|
|
|
|
eval "$fp" "test $result - logs left in '$dest'"
|
|
}
|
|
|
|
# Run an operation to generate Jaeger trace spans
|
|
create_traces()
|
|
{
|
|
sudo ctr image pull "$CONTAINER_IMAGE"
|
|
sudo ctr run --runtime "$RUNTIME" --rm "$CONTAINER_IMAGE" "$container_id" true
|
|
}
|
|
|
|
start_jaeger()
|
|
{
|
|
local jaeger_docker_image="jaegertracing/all-in-one:latest"
|
|
|
|
sudo docker rm -f "${jaeger_docker_container_name}"
|
|
|
|
# Defaults - see https://www.jaegertracing.io/docs/getting-started/
|
|
sudo docker run -d --runtime runc --name "${jaeger_docker_container_name}" \
|
|
-e COLLECTOR_ZIPKIN_HTTP_PORT=9411 \
|
|
-p 5775:5775/udp \
|
|
-p 6831:6831/udp \
|
|
-p 6832:6832/udp \
|
|
-p 5778:5778 \
|
|
-p "${jaeger_ui_port}:${jaeger_ui_port}" \
|
|
-p 14268:14268 \
|
|
-p 9411:9411 \
|
|
"$jaeger_docker_image"
|
|
|
|
sudo mkdir -m 0750 -p "$TRACE_LOG_DIR"
|
|
}
|
|
|
|
stop_jaeger()
|
|
{
|
|
sudo docker stop "${jaeger_docker_container_name}"
|
|
sudo docker rm -f "${jaeger_docker_container_name}"
|
|
}
|
|
|
|
get_jaeger_traces()
|
|
{
|
|
local service="$1"
|
|
[ -z "$service" ] && die "need jaeger service name"
|
|
|
|
local traces_url="http://${jaeger_server}:${jaeger_ui_port}/api/traces?service=${service}"
|
|
curl -s "${traces_url}" 2>/dev/null
|
|
}
|
|
|
|
get_trace_summary()
|
|
{
|
|
local status="$1"
|
|
[ -z "$status" ] && die "need jaeger status JSON"
|
|
|
|
echo "${status}" | jq -S '.data[].spans[] | [.spanID, .operationName] | @sh'
|
|
}
|
|
|
|
get_span_count()
|
|
{
|
|
local status="$1"
|
|
[ -z "$status" ] && die "need jaeger status JSON"
|
|
|
|
# This could be simplified but creating a variable holding the
|
|
# summary is useful in debug mode as the summary is displayed.
|
|
local trace_summary=$(get_trace_summary "$status" || true)
|
|
|
|
[ -z "$trace_summary" ] && die "failed to get trace summary"
|
|
|
|
local count=$(echo "${trace_summary}" | wc -l)
|
|
|
|
[ -z "$count" ] && count=0
|
|
|
|
echo "$count"
|
|
}
|
|
|
|
# Returns status from Jaeger web UI
|
|
get_jaeger_status()
|
|
{
|
|
local service="$1"
|
|
local logdir="$2"
|
|
|
|
[ -z "$service" ] && die "need jaeger service name"
|
|
[ -z "$logdir" ] && die "need logdir"
|
|
|
|
local status=""
|
|
local span_count=0
|
|
|
|
# Find spans
|
|
status=$(get_jaeger_traces "$service" || true)
|
|
if [ -n "$status" ]; then
|
|
echo "$status" | tee "$logdir/${service}-status.json"
|
|
span_count=$(get_span_count "$status")
|
|
fi
|
|
|
|
[ -z "$status" ] && die "failed to query Jaeger for status"
|
|
[ "$span_count" -eq 0 ] && die "failed to find any trace spans"
|
|
[ "$span_count" -le 0 ] && die "invalid span count"
|
|
|
|
get_trace_summary "$status" > "$logdir/$trace_summary_file"
|
|
}
|
|
|
|
# Check Jaeger spans for the specified service.
|
|
check_jaeger_output()
|
|
{
|
|
local service="$1"
|
|
local min_spans="$2"
|
|
local logdir="$3"
|
|
|
|
[ -z "$service" ] && die "need jaeger service name"
|
|
[ -z "$min_spans" ] && die "need minimum trace span count"
|
|
[ -z "$logdir" ] && die "need logdir"
|
|
|
|
local status
|
|
local errors=0
|
|
|
|
info "Checking Jaeger status"
|
|
|
|
status=$(get_jaeger_status "$service" "$logdir")
|
|
|
|
#------------------------------
|
|
# Basic sanity checks
|
|
[ -z "$status" ] && die "failed to query status via HTTP"
|
|
|
|
local span_lines=$(echo "$status"|jq -S '.data[].spans | length')
|
|
[ -z "$span_lines" ] && die "no span status"
|
|
|
|
# Log the spans to allow for analysis in case the test fails
|
|
echo "$status"|jq -S . > "$logdir/${service}-traces-formatted.json"
|
|
|
|
local span_lines_count=$(echo "$span_lines"|wc -l)
|
|
|
|
# Total up all span counts
|
|
local spans=$(echo "$span_lines"|paste -sd+ -|bc)
|
|
[ -z "$spans" ] && die "no spans"
|
|
|
|
# Ensure total span count is numeric
|
|
echo "$spans"|grep -q "^[0-9][0-9]*$" || die "invalid span count: '$spans'"
|
|
|
|
info "found $spans spans (across $span_lines_count traces)"
|
|
|
|
# Validate
|
|
[ "$spans" -lt "$min_spans" ] && die "expected >= $min_spans spans, got $spans"
|
|
|
|
# Look for common errors in span data
|
|
local error_msg=$(echo "$status"|jq -S . 2>/dev/null|grep "invalid parent span" || true)
|
|
|
|
if [ -n "$error_msg" ]; then
|
|
errors=$((errors+1))
|
|
warn "Found invalid parent span errors: $error_msg"
|
|
else
|
|
errors=$((errors-1))
|
|
[ "$errors" -lt 0 ] && errors=0
|
|
fi
|
|
|
|
# Crude but it works
|
|
error_or_warning_msgs=$(echo "$status" |\
|
|
jq -S . 2>/dev/null |\
|
|
jq '.data[].spans[].warnings' |\
|
|
grep -E -v "\<null\>" |\
|
|
grep -E -v "\[" |\
|
|
grep -E -v "\]" |\
|
|
grep -E -v "clock skew" || true) # ignore clock skew error
|
|
|
|
if [ -n "$error_or_warning_msgs" ]; then
|
|
errors=$((errors+1))
|
|
warn "Found errors/warnings: $error_or_warning_msgs"
|
|
else
|
|
errors=$((errors-1))
|
|
[ "$errors" -lt 0 ] && errors=0
|
|
fi
|
|
|
|
[ "$errors" -eq 0 ] || die "errors detected"
|
|
}
|
|
|
|
# Check output for spans in span_ordering_data
|
|
check_spans()
|
|
{
|
|
local logdir="$1"
|
|
[ -z "$logdir" ] && die "need logdir"
|
|
|
|
local errors=0
|
|
|
|
# Check for existence of spans in output so we do not do the more
|
|
# time consuming test of checking span ordering if it will fail
|
|
info "Checking spans:" "${span_ordering_data[@]}"
|
|
local missing_spans=()
|
|
for span_ordering in "${span_ordering_data[@]}"; do
|
|
local test_spans=(`echo $span_ordering | tr ':' ' '`)
|
|
for s in "${test_spans[@]}"; do
|
|
grep -q \'$s\' "$logdir/$trace_summary_file" || missing_spans+=( "$s" )
|
|
done
|
|
done
|
|
if [ "${#missing_spans[@]}" -gt 0 ]; then
|
|
die "Fail: Missing spans:" "${missing_spans[@]}"
|
|
fi
|
|
|
|
# Check relative ordering of spans. We are not checking full trace, just
|
|
# that known calls are not out of order based on the test input.
|
|
for span_ordering in "${span_ordering_data[@]}"; do # runs maximum length of span_ordering_data
|
|
local test_spans=(`echo $span_ordering | tr ':' ' '`)
|
|
|
|
# create array for span IDs that match span string
|
|
local span_ids=()
|
|
for span in "${test_spans[@]}"; do
|
|
grep -q \'$span\' "$logdir/$trace_summary_file" || die "Fail: Missing span: $span"
|
|
id=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.operationName==\"$span\") | .spanID") || die "Fail: error with span $span retrieved from traces"
|
|
id_formatted=$(echo $id | tr -d '\"' | tr '\n' ':') # format to a string for parsing later, not an array
|
|
span_ids+=("$id_formatted")
|
|
done
|
|
|
|
# We now have 2 parallel arrays where test_spans[n] is the string name and
|
|
# span_ids[n] has all possible span IDs for that string separated by a colon
|
|
|
|
# Since functions can be called multiple times, we may have multiple results
|
|
# for span IDs.
|
|
initial_span_ids=(`echo ${span_ids[0]} | tr ':' ' '`)
|
|
for initial in "${initial_span_ids[@]}"; do # test parents for all initial spans
|
|
# construct array of all parents of first span
|
|
local retrieved_spans=()
|
|
local current_span="$initial"
|
|
[ "$current_span" != "" ] || break
|
|
|
|
MAX_DEPTH=20 # to prevent infinite loop due to unforeseen errors
|
|
for i in `seq 1 $MAX_DEPTH`; do
|
|
retrieved_spans+=("$current_span")
|
|
current_span=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.spanID==\"$current_span\") | .references[].spanID") || die "Fail: error with current_span $current_span retrieved from formatted traces"
|
|
[ "$current_span" != "" ] || break
|
|
current_span=$(echo $current_span | tr -d '"')
|
|
[ $i -lt $MAX_DEPTH ] || die "Fail: max depth reached, error in jq or adjust test depth"
|
|
done
|
|
|
|
# Keep track of this index so we can ensure we are testing the constructed array in order
|
|
# Increment when there is a match between test case and constructed path
|
|
local retrieved_ids_index=0
|
|
|
|
local matches=0
|
|
local index=0
|
|
|
|
# TODO: Optimize
|
|
for ((index=0; index<${#span_ids[@]}; index++)); do
|
|
for ((r_index=$retrieved_ids_index; r_index<${#retrieved_spans[@]}; r_index++)); do
|
|
grep -q "${retrieved_spans[$r_index]}" <<< ${span_ids[$index]} && (( retrieved_ids_index=$r_index+1 )) && (( matches+=1 )) && break
|
|
done
|
|
done
|
|
|
|
local last_initial_span_index=${#initial_span_ids[@]}-1
|
|
if [ $matches -eq ${#span_ids[@]} ]; then
|
|
info "Pass: spans \"" "${test_spans[@]}" "\" found in jaeger output"
|
|
break
|
|
elif [ $matches -lt ${#span_ids[@]} ] && [ "$initial" = "${initial_span_ids[$last_initial_span_index]}" ]; then
|
|
die "Fail: spans \"" "${test_spans[@]}" "\" NOT in jaeger output"
|
|
fi
|
|
# else repeat test for next initial span ID
|
|
done
|
|
done
|
|
|
|
|
|
}
|
|
|
|
run_trace_forwarder()
|
|
{
|
|
if [ $KATA_HYPERVISOR = "qemu" ]; then
|
|
tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "sudo $forwarder_binary -l trace"
|
|
elif [ $KATA_HYPERVISOR = "clh" ]; then
|
|
# CLH uses hybrid VSOCK which uses a local UNIX socket that we need to specify
|
|
socket_path_template=$socket_path_prefix$(sudo kata-runtime env --json | jq '.Hypervisor.SocketPath')
|
|
socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${container_id}/g" | tr -d '"')
|
|
sudo mkdir -p $(dirname "$socket_path")
|
|
|
|
tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "sudo $forwarder_binary -l trace --socket-path $socket_path"
|
|
else
|
|
die "Unsupported hypervisor $KATA_HYPERVISOR"
|
|
fi
|
|
|
|
info "Verifying trace forwarder in tmux session $KATA_TMUX_FORWARDER_SESSION"
|
|
|
|
local cmd="tmux capture-pane -pt $KATA_TMUX_FORWARDER_SESSION | tr -d '\n' | tr -d '\"' | grep -q \"source:kata-trace-forwarder\""
|
|
waitForProcess 10 1 "$cmd"
|
|
}
|
|
|
|
kill_trace_forwarder()
|
|
{
|
|
tmux kill-session -t "$KATA_TMUX_FORWARDER_SESSION"
|
|
}
|
|
|
|
setup()
|
|
{
|
|
# containerd must be running in order to use ctr to generate traces
|
|
restart_containerd_service
|
|
|
|
local cmds=()
|
|
# For container manager (containerd)
|
|
cmds+=('ctr')
|
|
# For jaeger
|
|
cmds+=('docker')
|
|
# For launching processes
|
|
cmds+=('tmux')
|
|
|
|
local cmd
|
|
for cmd in "${cmds[@]}"
|
|
do
|
|
local result
|
|
result=$(command -v "$cmd" || true)
|
|
[ -n "$result" ] || die "need $cmd"
|
|
done
|
|
|
|
run_trace_forwarder
|
|
|
|
start_jaeger
|
|
|
|
"${SCRIPT_PATH}/configure_tracing_for_kata.sh" enable
|
|
}
|
|
|
|
run_test()
|
|
{
|
|
local service="$1"
|
|
local min_spans="$2"
|
|
local logdir="$3"
|
|
|
|
[ -z "$service" ] && die "need service name"
|
|
[ -z "$min_spans" ] && die "need minimum span count"
|
|
[ -z "$logdir" ] && die "need logdir"
|
|
|
|
info "Running test for service '$service'"
|
|
|
|
logdir="$logdir/$service"
|
|
mkdir -p "$logdir"
|
|
|
|
check_jaeger_output "$service" "$min_spans" "$logdir"
|
|
check_spans "$logdir"
|
|
|
|
info "test passed"
|
|
}
|
|
|
|
run_tests()
|
|
{
|
|
# List of services to check
|
|
#
|
|
# Format: "name:min-spans"
|
|
#
|
|
# Where:
|
|
#
|
|
# - 'name' is the Jaeger service name.
|
|
# - 'min-spans' is an integer representing the minimum number of
|
|
# trace spans this service should generate.
|
|
#
|
|
# Notes:
|
|
#
|
|
# - Uses an array to ensure predictable ordering.
|
|
# - All services listed are expected to generate traces
|
|
# when create_traces() is called a single time.
|
|
local -a services
|
|
|
|
services+=("kata:125")
|
|
|
|
create_traces
|
|
|
|
logdir=$(mktemp -d)
|
|
|
|
for service in "${services[@]}"
|
|
do
|
|
local name=$(echo "${service}"|cut -d: -f1)
|
|
local min_spans=$(echo "${service}"|cut -d: -f2)
|
|
|
|
run_test "${name}" "${min_spans}" "${logdir}"
|
|
done
|
|
|
|
info "all tests passed"
|
|
success="true"
|
|
}
|
|
|
|
usage()
|
|
{
|
|
cat <<EOF
|
|
|
|
Usage: $script_name [<command>]
|
|
|
|
Commands:
|
|
|
|
clean - Perform cleanup phase only.
|
|
help - Show usage.
|
|
run - Only run tests (no setup or cleanup).
|
|
setup - Perform setup phase only.
|
|
|
|
Environment variables:
|
|
|
|
CI - if set, save logs of all tests to ${TRACE_LOG_DIR}.
|
|
DEBUG - if set, enable tracing and do not cleanup after tests.
|
|
DEBUG_KEEP_JAEGER - if set, do not shut down the Jaeger service.
|
|
DEBUG_KEEP_FORWARDER - if set, do not shut down the trace forwarder.
|
|
|
|
Notes:
|
|
- Runs all test phases if no arguments are specified.
|
|
|
|
EOF
|
|
}
|
|
|
|
main()
|
|
{
|
|
local cmd="${1:-}"
|
|
|
|
case "$cmd" in
|
|
clean) success="true"; cleanup; exit 0;;
|
|
help|-h|-help|--help) usage; exit 0;;
|
|
run) run_tests; exit 0;;
|
|
setup) setup; exit 0;;
|
|
esac
|
|
|
|
trap cleanup EXIT
|
|
|
|
setup
|
|
|
|
run_tests
|
|
}
|
|
|
|
main "$@"
|