metrics: enable launch-times test on gha-run metrics script

This PR enables launch-times test on gha metrics workflow. Fixes: #7049 Signed-off-by: David Esparza <david.esparza.borquez@intel.com>
2025-07-14 23:55:49 +00:00 · 2023-06-21 12:39:16 -06:00 · 2023-06-21 12:39:16 -06:00 · 8593594247
commit 8593594247
parent 267e97f9c0
3 changed files with 536 additions and 16 deletions
--- a/tests/metrics/gha-run.sh
+++ b/tests/metrics/gha-run.sh
@ -11,6 +11,7 @@ set -o pipefail

 kata_tarball_dir=${2:-kata-artifacts}
 metrics_dir="$(dirname "$(readlink -f "$0")")"
+source "${metrics_dir}/../common.bash"

 create_symbolic_links() {
 	hypervisor="${1:-qemu}"
@ -18,39 +19,76 @@ create_symbolic_links() {
 	local source_configuration_file="/opt/kata/share/defaults/kata-containers/configuration-${hypervisor}.toml"

 	if [ ${hypervisor} != 'qemu' ] && [ ${hypervisor} != 'clh' ]; then
-		exit 2
+		die "Failed to set the configuration.toml: '${hypervisor}' is not recognized as a valid hypervisor name."
 	fi

 	sudo ln -sf "${source_configuration_file}" "${link_configuration_file}"
 }

+# Configures containerd
+overwrite_containerd_config() {
+	containerd_config="/etc/containerd/config.toml"
+	sudo rm "${containerd_config}"
+	sudo tee "${containerd_config}" << EOF
+version = 2
+[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
+  SystemdCgroup = true
+
+[plugins]
+  [plugins."io.containerd.grpc.v1.cri"]
+    [plugins."io.containerd.grpc.v1.cri".containerd]
+      default_runtime_name = "kata"
+      [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
+        [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
+          runtime_type = "io.containerd.kata.v2"
+EOF
+}
+
 install_kata() {
-	local katadir="/opt/kata"
-	local destdir="/"
+	local kata_tarball="kata-static.tar.xz"
+	declare -r katadir="/opt/kata"
+	declare -r destdir="/"
+	declare -r local_bin_dir="/usr/local/bin/"

 	# Removing previous kata installation
 	sudo rm -rf "${katadir}"

 	pushd ${kata_tarball_dir}
-	for c in kata-static-*.tar.xz; do
-		echo "untarring tarball "${c}" into ${destdir}"
-		sudo tar -xvf "${c}" -C "${destdir}"
-	done
+	sudo tar -xvf "${kata_tarball}" -C "${destdir}"
 	popd
+
+	# create symbolic links to kata components
+	for b in "${katadir}/bin/*" ; do
+		sudo ln -sf "${b}" "${local_bin_dir}/$(basename $b)"
+	done
+
+	check_containerd_config_for_kata
+	restart_containerd_service
+}
+
+check_containerd_config_for_kata() {
+	# check containerd config
+	declare -r line1="default_runtime_name = \"kata\""
+	declare -r line2="runtime_type = \"io.containerd.kata.v2\""
+	declare -r num_lines_containerd=2
+	declare -r containerd_path="/etc/containerd/config.toml"
+	local count_matches=$(grep -ic  "$line1\|$line2" ${containerd_path})
+
+	if [ $count_matches = $num_lines_containerd ]; then
+		info "containerd ok"
+	else
+		info "overwriting containerd configuration w/ a valid one"
+		overwrite_containerd_config
+	fi
 }

 function run_test_launchtimes() {
 	hypervisor="${1}"

-	echo "Running launchtimes tests: "
+	info "Running Launch Time test using ${hypervisor} hypervisor"

 	create_symbolic_links "${hypervisor}"
-
-	if [ "${hypervisor}" = 'qemu' ]; then
-		echo "qemu"
-	elif [ "${hypervisor}" = 'clh' ]; then
-		echo "clh"
-	fi
+	bash tests/metrics/time/launch_times.sh -i public.ecr.aws/ubuntu/ubuntu:latest -n 20
 }

 function main() {
@ -59,7 +97,7 @@ function main() {
 		install-kata) install_kata ;;
 		run-test-launchtimes-qemu) run_test_launchtimes "qemu" ;;
 		run-test-launchtimes-clh) run_test_launchtimes "clh" ;;
-		*) >&2 echo "Invalid argument"; exit 2 ;;
+		*) >&2 die "Invalid argument" ;;
 	esac
 }

--- a/tests/metrics/lib/common.bash
+++ b/tests/metrics/lib/common.bash
@ -8,7 +8,7 @@ THIS_FILE=$(readlink -f ${BASH_SOURCE[0]})
 LIB_DIR=${THIS_FILE%/*}
 RESULT_DIR="${LIB_DIR}/../results"

-source ${LIB_DIR}/../../lib/common.bash
+source ${LIB_DIR}/../../common.bash
 source ${LIB_DIR}/json.bash
 source /etc/os-release || source /usr/lib/os-release

--- a/tests/metrics/time/launch_times.sh
+++ b/tests/metrics/time/launch_times.sh
@ -0,0 +1,482 @@
+#!/bin/bash
+# Copyright (c) 2017-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+#  Description of the test:
+#  This test takes a number of time measurements through the complete
+#  launch/shutdown cycle of a single container.
+#  From those measurements it derives a number of time measures, such as:
+#   - time to payload execution
+#   - time to get to VM kernel
+#   - time in VM kernel boot
+#   - time to quit
+#   - total time (from launch to finished)
+#
+# Note, the <image> used for this test must support the full 'date' command
+# syntax - the date from busybox for instance *does not* support this, so
+# will not work with this test.
+#
+# Note, this test launches a single container at a time, that quits - thus,
+# this test measures times for the 'first container' only. This test does
+# not look for any scalability slowdowns as the number of running containers
+# increases for instance - that is handled in other tests
+
+set -e
+
+SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
+source "${SCRIPT_PATH}/../lib/common.bash"
+
+# Calculating the kernel time from dmesg stamps only really works for VM
+# based runtimes - we dynamically enable it if we find we are using a known
+# VM runtime
+CALCULATE_KERNEL=
+
+REQUIRED_CMDS=("bc" "awk")
+
+# set the total number of decimal digits after the decimal point 
+# for representing the calculations results
+CALC_SCALE=4
+
+# The date command format we use to ensure we capture the ns timings
+# Note the no-0-padding - 0 padding the results breaks bc in some cases
+DATECMD="date -u +%-s:%-N"
+
+# The modern Linux RNG is extremely fast at generating entropy on demand
+# and does not need to have as large a store of entropy anymore as the value
+# of 256 was found to work well with common cryptographic algorithms
+entropy_level="256"
+
+# Grabs the number of iterations performed
+num_iters=0
+
+# sets to this max number of repetitons for failed runs
+MAX_REPETITIONS=3
+
+# The individual results are stored in an array
+declare -a total_result_ds
+declare -a to_workload_ds
+declare -a in_kernel_ds
+declare -a to_kernel_ds
+declare -a to_quit_ds
+# data_is_valid value 1 represent not valid
+# data_is_valid value 0 represent is valid
+data_is_valid=0
+
+check_entropy_level() {
+	retries="10"
+	for i in $(seq 1 "$retries"); do
+		if [ $(cat /proc/sys/kernel/random/entropy_avail) -ge ${entropy_level} ]; then
+			break;
+		fi
+		sleep 1
+	done
+	if [ $(cat /proc/sys/kernel/random/entropy_avail) -lt ${entropy_level} ]; then
+		die "Not enough entropy level to run this test"
+	fi
+}
+
+# convert a 'seconds:nanoseconds' string into nanoseconds
+sn_to_ns() {
+	# !!: Remove 0's from beginning otherwise the number will be converted to octal
+	s=$(echo ${1%:*} | sed 's/^0*//g')
+	ns=$(echo ${1##*:} | sed 's/^0*//g')
+	# use shell magic to strip out the 's' and 'ns' fields and print
+	# them as a 0-padded ns string...
+	printf "%d%09d" ${s} ${ns}
+}
+
+# convert 'nanoseconds' (since epoch) into a 'float' seconds
+ns_to_s() {
+	printf "%.0${CALC_SCALE}f" $(bc <<< "scale=$CALC_SCALE; $1 / 1000000000")
+}
+
+run_workload() {
+	# L_CALC_SCALE is set to accounting a significant
+	# number of decimal digits after the decimal points
+	# for 'bc' performing math in kernel period estimation
+	L_CALC_SCALE=13
+	start_time=$($DATECMD)
+
+	# Check entropy level of the host
+	check_entropy_level
+
+	# Run the image and command and capture the results into an array...
+	declare workload_result
+	readarray -n 0 workload_result < <(sudo -E "${CTR_EXE}" run --rm --runtime=${CTR_RUNTIME} ${IMAGE} test bash -c "$DATECMD $DMESGCMD")
+
+	end_time=$($DATECMD)
+
+	# Delay this calculation until after we have run - do not want
+	# to measure it in the results
+	start_time=$(sn_to_ns $start_time)
+	end_time=$(sn_to_ns $end_time)
+
+	# Extract the 'date' info from the first line of the log
+	# This script assumes the VM clock is in sync with the host clock...
+	workload_time=${workload_result[0]}
+	workload_time=$(echo $workload_time | tr -d '\r')
+	workload_time=$(sn_to_ns $workload_time)
+
+	# How long did the whole launch/quit take
+	total_period=$((end_time-start_time))
+	# How long did it take to get to the workload
+	workload_period=$((workload_time-start_time))
+	# How long did it take to quit
+	shutdown_period=$((end_time-workload_time))
+
+	if [ -n "$CALCULATE_KERNEL" ]; then
+		# Grab the last kernel dmesg time
+		# In our case, we need to find the last real kernel line before
+		# the systemd lines begin. The last:
+		# 'Freeing unused kernel' line is a reasonable
+		# 'last in kernel line' to look for.
+		# We make a presumption here that as we are in a cold-boot VM
+		# kernel, the first dmesg is at '0 seconds', so the timestamp
+		# of that last line is the length of time in the kernel.
+		kernel_last_line=$( (fgrep "Freeing unused kernel" <<- EOF
+				${workload_result[@]}
+			EOF
+			) | tail -1 )
+
+		if [ -z "$kernel_last_line" ]; then
+			echo "No kernel last line"
+			for l in "${workload_result[@]}"; do
+				echo ">: [$l]"
+			done
+			die "No kernel last line"
+		fi
+
+		kernel_period=$(echo $kernel_last_line | awk '{print $2}' | tr -d "]")
+
+		# And we can then work out how much time it took to get to the kernel
+		to_kernel_period=$(printf "%f" $(bc <<<"scale=$L_CALC_SCALE; $(ns_to_s $workload_period) - $kernel_period"))
+	else
+		kernel_period="0.0"
+		to_kernel_period="0.0"
+	fi
+
+	total_result="$(ns_to_s $total_period)"
+	to_workload="$(ns_to_s $workload_period)"
+	in_kernel=$kernel_period
+	to_kernel=$to_kernel_period
+	to_quit=$(ns_to_s $shutdown_period)
+
+	tr_is_neg=$(echo $total_result'<='0.0 | bc -l)
+	tw_is_neg=$(echo $to_workload'<='0.0 | bc -l)
+	ik_is_neg=$(echo $in_kernel'<='0.0 | bc -l)
+	tk_is_neg=$(echo $to_kernel'<='0.0 | bc -l)
+	tq_is_neg=$(echo $to_quit'<='0.0 | bc -l)
+
+	data_is_valid=0
+	if [ $tr_is_neg -eq 1 ] || [ $tw_is_neg -eq 1 ] || [ $ik_is_neg -eq 1 ] || [ $tk_is_neg -eq 1 ] || [ $tq_is_neg -eq 1 ]; then
+		data_is_valid=1
+	else
+		# Insert results individually
+		total_result_ds+=($total_result)
+		to_workload_ds+=($to_workload)
+		in_kernel_ds+=($in_kernel)
+		to_kernel_ds+=($to_kernel)
+		to_quit_ds+=($to_quit)
+	fi
+
+	((num_iters+=1))
+
+	# If we are doing an (optional) scaling test, then we launch a permanent container
+	# between each of our 'test' containers. The aim being to see if our launch times
+	# are linear with the number of running containers or not
+	if [ -n "$SCALING" ]; then
+		sudo -E "${CTR_EXE}" run --runtime=${CTR_RUNTIME} -d ${IMAGE} test bash -c "tail -f /dev/null"
+	fi
+}
+
+# Writes a JSON  with the measurements
+# results per execution
+write_individual_results() {
+	for i in "${!total_result_ds[@]}"; do
+		local json="$(cat << EOF
+	{
+		"total": {
+			"Result": ${total_result_ds[i]},
+			"Units": "s"
+		},
+		"to-workload": {
+			"Result": ${to_workload_ds[i]},
+			"Units": "s"
+		},
+		"in-kernel": {
+			"Result": ${in_kernel_ds[i]},
+			"Units": "s"
+		},
+		"to-kernel": {
+			"Result": ${to_kernel_ds[i]},
+			"Units": "s"
+		},
+		"to-quit": {
+			"Result": ${to_quit_ds[i]},
+			"Units": "s"
+		}
+	}
+EOF
+)"
+		metrics_json_add_array_element "$json"
+	done
+}
+
+init () {
+	TEST_ARGS="image=${IMAGE} runtime=${CTR_RUNTIME} units=seconds"
+
+	# We set the generic name here, but we save the different time results separately,
+	# and append the actual detail to the name at the time of saving...
+	TEST_NAME="boot times"
+
+	# If we are scaling, note that in the name
+	[ -n "$SCALING" ] && TEST_NAME="${TEST_NAME} scaling"
+
+	echo "Executing test: ${TEST_NAME} ${TEST_ARGS}"
+	check_cmds "${REQUIRED_CMDS[@]}"
+
+	# For non-VM runtimes, we don't use the output of dmesg, and
+	# we have seen it cause some test instabilities, so do not invo>
+	# it if not needed.
+	if [ "${CTR_RUNTIME}" == "io.containerd.runc.v2" ]; then
+		DMESGCMD=""
+	else
+		CALCULATE_KERNEL=1
+		DMESGCMD="; dmesg"
+	fi
+
+	# Start from a fairly clean environment
+	init_env
+	check_images "$IMAGE"
+}
+
+# Computes the average of the data
+calc_avg_array() {
+	data=("$@")
+	avg=0
+	LSCALE=6
+	size="${#data[@]}"
+
+	[ -z "$data" ] && die "List of results was not passed to the calc_avg_array() function when trying to calculate the average result."
+	[ $size -eq 0 ] && die "Division by zero: The number of items is 0 when trying to calculate the average result."
+
+	sum=$(IFS='+'; echo "scale=4; ${data[*]}" | bc)
+	avg=$(echo "scale=$LSCALE; $sum / $size" | bc)
+	printf "%.0${CALC_SCALE}f" $avg
+}
+
+
+# Computes the standard deviation of the data
+calc_sd_array() {
+	data=("$@")
+	sum_sqr_n=0
+	size=${#data[@]}
+
+	# LSCALE is the scale used for calculations in the middle
+	# CALC_SCALE is the scale used for the result
+	LSCALE=13
+	CALC_SCALE=6
+
+	[ -z "$data" ] && die "List results was not passed to the calc_sd_result() function when trying to calculate the standard deviation result."
+	[ $size -eq 0 ] && die "Division by zero: The number of items is 0 when trying to calculate the standard deviation result."
+
+
+	# [1] sum data
+	sum_data=$(IFS='+'; echo "scale=$LSCALE; ${data[*]}" | bc)
+
+	# [2] square the sum of data
+	pow_2_sum_data=$(echo "scale=$LSCALE; $sum_data ^ 2" | bc)
+
+	# [3] divide the square of data by the num of items
+	div_sqr_n=$(echo "scale=$LSCALE; $pow_2_sum_data / $size" | bc)
+
+	# [4] Sum of the sqr of each item
+	for i in "${data[@]}"; do
+		sqr_n=$(echo "scale=$LSCALE; $i ^ 2" | bc)
+		sum_sqr_n=$(echo "scale=$LSCALE; $sqr_n + $sum_sqr_n" | bc)
+	done
+
+	# substract [4] from [3]
+	subs=$(echo "scale=$LSCALE; $sum_sqr_n - $div_sqr_n" | bc)
+
+	# get variance
+	var=$(echo "scale=$LSCALE; $subs / $size" | bc)
+
+	# get standard deviation
+	sd=$(echo "scale=$LSCALE; sqrt($var)" | bc)
+
+	# if sd is zero, limit the decimal scale to 1 digit
+	sd_is_zero=$(echo $sd'=='0.0 | bc -l)
+	[ $sd_is_zero -eq 1 ] && CALC_SCALE=1
+
+	printf "%.0${CALC_SCALE}f" $sd
+}
+
+# Computes the Coefficient of variation.
+# The result is given as percentage.
+calc_cov_array() {
+	sd=$1
+	mean=$2
+
+	# LSCALE used for consider more decimals digits than usual in cov estimation.
+	# CALC_SCALE is the scale used to return the result.
+	LSCALE=13
+	CALC_SCALE=6
+
+	mean_is_zero=$(echo $mean'=='0.0 | bc -l)
+
+	[ -z "$sd" ] && die "Standard deviation was not passed to the calc_cov_array() function when trying to calculate the CoV result."
+	[ -z "$mean" ] && die "Mean was not passed to the calc_cov_array() function when trying to calculate the CoV result."
+	[ $mean_is_zero -eq 1 ] && die "Division by zero: Mean value passed is 0 when trying to get CoV result."
+
+	cov=$(echo "scale=$LSCALE; $sd / $mean" | bc)
+	cov=$(echo "scale=$LSCALE; $cov * 100" | bc)
+
+	# if cov is zero, limit the decimal scale to 1 digit
+	cov_is_zero=$(echo $cov'=='0.0 | bc -l)
+	[ $cov_is_zero -eq 1 ] && CALC_SCALE=1
+
+	printf "%.0${CALC_SCALE}f" $cov
+}
+
+# Writes a JSON with the statistics results
+# for each launch time metric
+write_stats_results() {
+	size="${#total_result_ds[@]}"
+	avg_total_result=$(calc_avg_array "${total_result_ds[@]}")
+	avg_to_workload=$(calc_avg_array "${to_workload_ds[@]}")
+	avg_in_kernel=$(calc_avg_array "${in_kernel_ds[@]}")
+	avg_to_kernel=$(calc_avg_array "${to_kernel_ds[@]}")
+	avg_to_quit=$(calc_avg_array "${to_quit_ds[@]}")
+
+	sd_total_result=$(calc_sd_array "${total_result_ds[@]}")
+	sd_to_workload=$(calc_sd_array "${to_workload_ds[@]}")
+	sd_in_kernel=$(calc_sd_array "${in_kernel_ds[@]}")
+	sd_to_kernel=$(calc_sd_array "${to_kernel_ds[@]}")
+	sd_to_quit=$(calc_sd_array "${to_quit_ds[@]}")
+
+	cov_total_result=$(calc_cov_array ${sd_total_result} ${avg_total_result})
+	cov_to_workload=$(calc_cov_array ${sd_to_workload} ${avg_to_workload})
+	cov_in_kernel=$(calc_cov_array ${sd_in_kernel} ${avg_in_kernel})
+	cov_to_kernel=$(calc_cov_array ${sd_to_kernel} ${avg_to_kernel})
+	cov_to_quit=$(calc_cov_array ${sd_to_quit} ${avg_to_quit})
+
+	local json="$(cat << EOF
+	{
+	"size": $size,
+	"total": {
+		"avg": $avg_total_result,
+		"sd": $sd_total_result,
+		"cov": $cov_total_result
+	},
+	"to-workload": {
+		"avg": $avg_to_workload,
+		"sd": $sd_to_workload,
+		"cov": $cov_to_workload
+	},
+	"in-kernel": {
+		"avg": $avg_in_kernel,
+		"sd": $sd_in_kernel,
+		"cov": $cov_in_kernel
+	},
+	"to-kernel_avg": {
+		"avg": $avg_to_kernel,
+		"sd": $sd_to_kernel,
+		"cov": $cov_to_kernel
+	},
+	"to-quit": {
+		"avg": $avg_to_quit,
+		"sd": $sd_to_quit,
+		"cov": $cov_to_quit
+	}
+	}
+EOF
+)"
+	metrics_json_add_array_element "$json"
+}
+
+help() {
+	usage=$(cat << EOF
+Usage: $0 [-h] [options]
+   Description:
+        This script takes time measurements for different
+	stages of a boot/run/rm cycle
+   Options:
+        -h,         Help
+        -i <name>,  Image name (mandatory)
+        -n <n>,     Number of containers to run (mandatory)
+        -s,         Enable scaling (keep containers running)
+EOF
+)
+	echo "$usage"
+}
+
+main() {
+	local OPTIND
+	while getopts "dhi:n:s" opt;do
+		case ${opt} in
+		h)
+		    help
+		    exit 0;
+		    ;;
+		i)
+		    IMAGE="${OPTARG}"
+		    ;;
+		n)
+		    TIMES="${OPTARG}"
+		    ;;
+		s)
+		    SCALING=true
+		    ;;
+		?)
+		    # parse failure
+		    help
+		    die "Failed to parse arguments"
+		    ;;
+		esac
+	done
+	shift $((OPTIND-1))
+
+	[ -z "$IMAGE" ] && help && die "Mandatory IMAGE name not supplied"
+	[ -z "$TIMES" ] && help && die "Mandatory nunmber of containers not supplied"
+	# Although this is mandatory, the 'lib/common.bash' environment can set
+	# it, so we may not fail if it is not set on the command line...
+	[ -z "$RUNTIME" ] && help && die "Mandatory runtime argument not supplied"
+
+	init
+	j=0
+	max_reps=$MAX_REPETITIONS
+
+	while [ $j -lt $TIMES ]; do
+
+		echo " run $num_iters"
+		run_workload
+
+		if [ $data_is_valid -eq 0 ]; then
+			j=$(( j + 1 ))
+			# if valid result then reset 'max_reps' to initial value
+			max_reps=$MAX_REPETITIONS
+			continue
+		fi
+
+		echo "Skipping run due to invalid result"
+		((max_reps-=1))
+
+		if [ $max_reps -lt 0 ]; then
+			die "Max. num of repetitions reached for run: $j"
+		fi
+	done
+
+	metrics_json_init
+	metrics_json_start_array
+	write_stats_results
+	metrics_json_end_array "Statistics"
+	metrics_json_start_array
+	write_individual_results
+	metrics_json_end_array "Results"
+	metrics_json_save
+	clean_env_ctr
+}
+
+main "$@"