mirror of
				https://github.com/kata-containers/kata-containers.git
				synced 2025-10-31 17:37:20 +00:00 
			
		
		
		
	tests: Add soak parallel stability test
This PR adds the soak parallel stability test. Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
This commit is contained in:
		
							
								
								
									
										208
									
								
								tests/stability/soak_parallel_rm.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										208
									
								
								tests/stability/soak_parallel_rm.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,208 @@ | |||||||
|  | #!/bin/bash | ||||||
|  | # | ||||||
|  | # Copyright (c) 2017-2018, 2020 Intel Corporation | ||||||
|  | # | ||||||
|  | # SPDX-License-Identifier: Apache-2.0 | ||||||
|  | # | ||||||
|  | # This test will run a number of parallel containers, and then try to | ||||||
|  | # 'rm -f' them all at the same time. It will check after each run and | ||||||
|  | # rm that we have the expected number of containers, shims, | ||||||
|  | # qemus and runtimes active | ||||||
|  | # The goals are two fold: | ||||||
|  | # - spot any stuck or non-started components | ||||||
|  | # - catch any hang ups | ||||||
|  |  | ||||||
|  | cidir=$(dirname "$0") | ||||||
|  | source "${cidir}/../metrics/lib/common.bash" | ||||||
|  | source "/etc/os-release" || source "/usr/lib/os-release" | ||||||
|  | set -x | ||||||
|  |  | ||||||
|  | # How many times will we run the test loop... | ||||||
|  | ITERATIONS="${ITERATIONS:-5}" | ||||||
|  |  | ||||||
|  | # the system 'free available' level where we stop running the tests, as otherwise | ||||||
|  | #  the system can crawl to a halt, and/or start refusing to launch new VMs anyway | ||||||
|  | # We choose 2G, as that is one of the default VM sizes for Kata | ||||||
|  | MEM_CUTOFF="${MEM_CUTOFF:-(2*1024*1024*1024)}" | ||||||
|  |  | ||||||
|  | # do we need a command argument for this payload? | ||||||
|  | COMMAND="${COMMAND:-tail -f /dev/null}" | ||||||
|  |  | ||||||
|  | # Runtime path | ||||||
|  | RUNTIME_PATH=$(command -v $RUNTIME) | ||||||
|  |  | ||||||
|  | # The place where virtcontainers keeps its active pod info | ||||||
|  | # This is ultimately what 'kata-runtime list' uses to get its info, but | ||||||
|  | # we can also check it for sanity directly | ||||||
|  | VC_POD_DIR="${VC_POD_DIR:-/run/vc/sbs}" | ||||||
|  |  | ||||||
|  | # let's cap the test. If you want to run until you hit the memory limit | ||||||
|  | # then just set this to a very large number | ||||||
|  | MAX_CONTAINERS="${MAX_CONTAINERS:-110}" | ||||||
|  |  | ||||||
|  | KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" | ||||||
|  |  | ||||||
|  | check_vsock_active() { | ||||||
|  | 	vsock_configured=$($RUNTIME_PATH kata-env | awk '/UseVSock/ {print $3}') | ||||||
|  | 	vsock_supported=$($RUNTIME_PATH kata-env | awk '/SupportVSock/ {print $3}') | ||||||
|  | 	if [ "$vsock_configured" == true ] && [ "$vsock_supported" == true ]; then | ||||||
|  | 		return 0 | ||||||
|  | 	else | ||||||
|  | 		return 1 | ||||||
|  | 	fi | ||||||
|  | } | ||||||
|  |  | ||||||
|  | count_containers() { | ||||||
|  | 	sudo ctr c list -q | wc -l | ||||||
|  | } | ||||||
|  |  | ||||||
|  | check_all_running() { | ||||||
|  | 	local goterror=0 | ||||||
|  |  | ||||||
|  | 	echo "Checking ${how_many} containers have all relevant components" | ||||||
|  |  | ||||||
|  | 	# check what docker thinks | ||||||
|  | 	how_many_running=$(count_containers) | ||||||
|  |  | ||||||
|  | 	if (( ${how_many_running} != ${how_many} )); then | ||||||
|  | 		echo "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping" | ||||||
|  | 		((goterror++)) | ||||||
|  | 	fi | ||||||
|  |  | ||||||
|  | 	# Only check for Kata components if we are using a Kata runtime | ||||||
|  | 	if (( $check_kata_components )); then | ||||||
|  |  | ||||||
|  | 		# check we have the right number of shims | ||||||
|  | 		how_many_shims=$(pgrep -a -f ${SHIM_PATH} | grep containerd.sock | wc -l) | ||||||
|  | 		# one shim process per container... | ||||||
|  | 		if (( ${how_many_running} != ${how_many_shims} )); then | ||||||
|  | 			echo "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping" | ||||||
|  | 			((goterror++)) | ||||||
|  | 		fi | ||||||
|  |  | ||||||
|  | 		# check we have the right number of vm's | ||||||
|  | 		if [[ "$KATA_HYPERVISOR" != "dragonball" ]]; then | ||||||
|  | 			how_many_vms=$(pgrep -a $(basename ${HYPERVISOR_PATH} | cut -d '-' -f1) | wc -l) | ||||||
|  | 			if (( ${how_many_running} != ${how_many_vms} )); then | ||||||
|  | 				echo "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping" | ||||||
|  | 				((goterror++)) | ||||||
|  | 			fi | ||||||
|  | 		fi | ||||||
|  |  | ||||||
|  | 		# if this is kata-runtime, check how many pods virtcontainers thinks we have | ||||||
|  | 		if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then | ||||||
|  | 			if [ -d "${VC_POD_DIR}" ]; then | ||||||
|  | 				num_vc_pods=$(sudo ls -1 ${VC_POD_DIR} | wc -l) | ||||||
|  |  | ||||||
|  | 				if (( ${how_many_running} != ${num_vc_pods} )); then | ||||||
|  | 					echo "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)" | ||||||
|  | 					((goterror++)) | ||||||
|  | 				fi | ||||||
|  | 			fi | ||||||
|  | 		fi | ||||||
|  | 	fi | ||||||
|  |  | ||||||
|  | 	if (( goterror != 0 )); then | ||||||
|  | 		show_system_ctr_state | ||||||
|  | 		die "Got $goterror errors, quitting" | ||||||
|  | 	fi | ||||||
|  | } | ||||||
|  |  | ||||||
|  | # reported system 'available' memory | ||||||
|  | get_system_avail() { | ||||||
|  | 	echo $(free -b | head -2 | tail -1 | awk '{print $7}') | ||||||
|  | } | ||||||
|  |  | ||||||
|  | go() { | ||||||
|  | 	echo "Running..." | ||||||
|  |  | ||||||
|  | 	how_many=0 | ||||||
|  |  | ||||||
|  | 	while true; do { | ||||||
|  | 		check_all_running | ||||||
|  |  | ||||||
|  | 		local i | ||||||
|  | 		for ((i=1; i<= ${MAX_CONTAINERS}; i++)); do | ||||||
|  | 			containers+=($(random_name)) | ||||||
|  | 			sudo ctr run --runtime=${CTR_RUNTIME} -d ${nginx_image} ${containers[-1]} sh -c ${COMMAND} | ||||||
|  | 			((how_many++)) | ||||||
|  | 		done | ||||||
|  |  | ||||||
|  | 		if (( ${how_many} >= ${MAX_CONTAINERS} )); then | ||||||
|  | 			echo "And we have hit the max ${how_many} containers" | ||||||
|  | 			return | ||||||
|  | 		fi | ||||||
|  |  | ||||||
|  | 		how_much=$(get_system_avail) | ||||||
|  | 		if (( ${how_much} < ${MEM_CUTOFF} )); then | ||||||
|  | 			echo "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})" | ||||||
|  | 			return | ||||||
|  | 		fi | ||||||
|  | 	} | ||||||
|  | 	done | ||||||
|  | } | ||||||
|  |  | ||||||
|  | count_mounts() { | ||||||
|  | 	echo $(mount | wc -l) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | check_mounts() { | ||||||
|  | 	final_mount_count=$(count_mounts) | ||||||
|  |  | ||||||
|  | 	if [[ $final_mount_count < $initial_mount_count ]]; then | ||||||
|  | 		echo "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})" | ||||||
|  | 	fi | ||||||
|  | } | ||||||
|  |  | ||||||
|  | init() { | ||||||
|  | 	restart_containerd_service | ||||||
|  | 	extract_kata_env | ||||||
|  | 	clean_env_ctr | ||||||
|  |  | ||||||
|  | 	# remember how many mount points we had before we do anything | ||||||
|  | 	# and then sanity check we end up with no new ones dangling at the end | ||||||
|  | 	initial_mount_count=$(count_mounts) | ||||||
|  |  | ||||||
|  | 	# Only check Kata items if we are using a Kata runtime | ||||||
|  | 	if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then | ||||||
|  | 		echo "Checking Kata runtime" | ||||||
|  | 		check_kata_components=1 | ||||||
|  | 	else | ||||||
|  | 		echo "Not a Kata runtime, not checking for Kata components" | ||||||
|  | 		check_kata_components=0 | ||||||
|  | 	fi | ||||||
|  |  | ||||||
|  | 	versions_file="${cidir}/../versions.yaml" | ||||||
|  | 	nginx_version=$("${GOPATH}/bin/yq" read "$versions_file" "docker_images.nginx.version") | ||||||
|  | 	nginx_image="docker.io/library/nginx:$nginx_version" | ||||||
|  |  | ||||||
|  | 	# Pull nginx image | ||||||
|  | 	sudo ctr image pull ${nginx_image} | ||||||
|  | 	if [ $? != 0 ]; then | ||||||
|  | 		die "Unable to retry docker image ${nginx_image}" | ||||||
|  | 	fi | ||||||
|  | } | ||||||
|  |  | ||||||
|  | spin() { | ||||||
|  | 	local i | ||||||
|  | 	for ((i=1; i<= ITERATIONS; i++)); do { | ||||||
|  | 		echo "Start iteration $i of $ITERATIONS" | ||||||
|  | 		#spin them up | ||||||
|  | 		go | ||||||
|  | 		#check we are in a sane state | ||||||
|  | 		check_all_running | ||||||
|  | 		#shut them all down | ||||||
|  | 		clean_env_ctr | ||||||
|  | 		#Note there should be none running | ||||||
|  | 		how_many=0 | ||||||
|  | 		#and check they all died | ||||||
|  | 		check_all_running | ||||||
|  | 		#and that we have no dangling mounts | ||||||
|  | 		check_mounts | ||||||
|  | 	} | ||||||
|  | 	done | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | init | ||||||
|  | spin | ||||||
| @@ -395,3 +395,11 @@ plugins: | |||||||
|       available on a Kubernetes host. |       available on a Kubernetes host. | ||||||
|     url: "https://github.com/k8snetworkplumbingwg/sriov-network-device-plugin" |     url: "https://github.com/k8snetworkplumbingwg/sriov-network-device-plugin" | ||||||
|     version: "b7f6d3e0679796e907ecca88cfab0e32e326850d" |     version: "b7f6d3e0679796e907ecca88cfab0e32e326850d" | ||||||
|  |  | ||||||
|  | docker_images: | ||||||
|  |   description: "Docker hub images used for testing" | ||||||
|  |  | ||||||
|  |   nginx: | ||||||
|  |     description: "Proxy server for HTTP, HTTPS, SMTP, POP3 and IMAP protocols" | ||||||
|  |     url: "https://hub.docker.com/_/nginx/" | ||||||
|  |     version: "1.15-alpine" | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user