From 0f04d527d9cb0f0a981bda377ed7853f34d78802 Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Fri, 6 Oct 2023 15:23:03 +0000
Subject: [PATCH 1/4] tests: Enable soak parallel test

This PR enables the soak parallel test for stability test.

Fixes #8153

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 tests/stability/gha-run.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh
index 66e6f21c05..e05ae5aa40 100755
--- a/tests/stability/gha-run.sh
+++ b/tests/stability/gha-run.sh
@@ -8,6 +8,7 @@
 set -o errexit
 set -o nounset
 set -o pipefail
+set -x
 
 kata_tarball_dir="${2:-kata-artifacts}"
 stability_dir="$(dirname "$(readlink -f "$0")")"
@@ -16,8 +17,8 @@ source "${stability_dir}/../common.bash"
 function run() {
 	info "Running soak parallel stability tests using ${KATA_HYPERVISOR} hypervisor"
 
-	# export ITERATIONS=2 MAX_CONTAINERS=20
-	# bash "${stability_dir}/soak_parallel_rm.sh"
+	export ITERATIONS=2 MAX_CONTAINERS=20
+	bash "${stability_dir}/soak_parallel_rm.sh"
 }
 
 function main() {

From dec3951ca55fbb1c2e3dd23551a815fcdebe7d11 Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Fri, 6 Oct 2023 15:27:28 +0000
Subject: [PATCH 2/4] tests: Add soak parallel stability test

This PR adds the soak parallel stability test.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 tests/stability/soak_parallel_rm.sh | 208 ++++++++++++++++++++++++++++
 versions.yaml                       |   8 ++
 2 files changed, 216 insertions(+)
 create mode 100755 tests/stability/soak_parallel_rm.sh

diff --git a/tests/stability/soak_parallel_rm.sh b/tests/stability/soak_parallel_rm.sh
new file mode 100755
index 0000000000..6bf74ea611
--- /dev/null
+++ b/tests/stability/soak_parallel_rm.sh
@@ -0,0 +1,208 @@
+#!/bin/bash
+#
+# Copyright (c) 2017-2018, 2020 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# This test will run a number of parallel containers, and then try to
+# 'rm -f' them all at the same time. It will check after each run and
+# rm that we have the expected number of containers, shims,
+# qemus and runtimes active
+# The goals are two fold:
+# - spot any stuck or non-started components
+# - catch any hang ups
+
+cidir=$(dirname "$0")
+source "${cidir}/../metrics/lib/common.bash"
+source "/etc/os-release" || source "/usr/lib/os-release"
+set -x
+
+# How many times will we run the test loop...
+ITERATIONS="${ITERATIONS:-5}"
+
+# the system 'free available' level where we stop running the tests, as otherwise
+#  the system can crawl to a halt, and/or start refusing to launch new VMs anyway
+# We choose 2G, as that is one of the default VM sizes for Kata
+MEM_CUTOFF="${MEM_CUTOFF:-(2*1024*1024*1024)}"
+
+# do we need a command argument for this payload?
+COMMAND="${COMMAND:-tail -f /dev/null}"
+
+# Runtime path
+RUNTIME_PATH=$(command -v $RUNTIME)
+
+# The place where virtcontainers keeps its active pod info
+# This is ultimately what 'kata-runtime list' uses to get its info, but
+# we can also check it for sanity directly
+VC_POD_DIR="${VC_POD_DIR:-/run/vc/sbs}"
+
+# let's cap the test. If you want to run until you hit the memory limit
+# then just set this to a very large number
+MAX_CONTAINERS="${MAX_CONTAINERS:-110}"
+
+KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}"
+
+check_vsock_active() {
+	vsock_configured=$($RUNTIME_PATH kata-env | awk '/UseVSock/ {print $3}')
+	vsock_supported=$($RUNTIME_PATH kata-env | awk '/SupportVSock/ {print $3}')
+	if [ "$vsock_configured" == true ] && [ "$vsock_supported" == true ]; then
+		return 0
+	else
+		return 1
+	fi
+}
+
+count_containers() {
+	sudo ctr c list -q | wc -l
+}
+
+check_all_running() {
+	local goterror=0
+
+	echo "Checking ${how_many} containers have all relevant components"
+
+	# check what docker thinks
+	how_many_running=$(count_containers)
+
+	if (( ${how_many_running} != ${how_many} )); then
+		echo "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping"
+		((goterror++))
+	fi
+
+	# Only check for Kata components if we are using a Kata runtime
+	if (( $check_kata_components )); then
+
+		# check we have the right number of shims
+		how_many_shims=$(pgrep -a -f ${SHIM_PATH} | grep containerd.sock | wc -l)
+		# one shim process per container...
+		if (( ${how_many_running} != ${how_many_shims} )); then
+			echo "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping"
+			((goterror++))
+		fi
+
+		# check we have the right number of vm's
+		if [[ "$KATA_HYPERVISOR" != "dragonball" ]]; then
+			how_many_vms=$(pgrep -a $(basename ${HYPERVISOR_PATH} | cut -d '-' -f1) | wc -l)
+			if (( ${how_many_running} != ${how_many_vms} )); then
+				echo "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping"
+				((goterror++))
+			fi
+		fi
+
+		# if this is kata-runtime, check how many pods virtcontainers thinks we have
+		if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then
+			if [ -d "${VC_POD_DIR}" ]; then
+				num_vc_pods=$(sudo ls -1 ${VC_POD_DIR} | wc -l)
+
+				if (( ${how_many_running} != ${num_vc_pods} )); then
+					echo "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)"
+					((goterror++))
+				fi
+			fi
+		fi
+	fi
+
+	if (( goterror != 0 )); then
+		show_system_ctr_state
+		die "Got $goterror errors, quitting"
+	fi
+}
+
+# reported system 'available' memory
+get_system_avail() {
+	echo $(free -b | head -2 | tail -1 | awk '{print $7}')
+}
+
+go() {
+	echo "Running..."
+
+	how_many=0
+
+	while true; do {
+		check_all_running
+
+		local i
+		for ((i=1; i<= ${MAX_CONTAINERS}; i++)); do
+			containers+=($(random_name))
+			sudo ctr run --runtime=${CTR_RUNTIME} -d ${nginx_image} ${containers[-1]} sh -c ${COMMAND}
+			((how_many++))
+		done
+
+		if (( ${how_many} >= ${MAX_CONTAINERS} )); then
+			echo "And we have hit the max ${how_many} containers"
+			return
+		fi
+
+		how_much=$(get_system_avail)
+		if (( ${how_much} < ${MEM_CUTOFF} )); then
+			echo "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})"
+			return
+		fi
+	}
+	done
+}
+
+count_mounts() {
+	echo $(mount | wc -l)
+}
+
+check_mounts() {
+	final_mount_count=$(count_mounts)
+
+	if [[ $final_mount_count < $initial_mount_count ]]; then
+		echo "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})"
+	fi
+}
+
+init() {
+	restart_containerd_service
+	extract_kata_env
+	clean_env_ctr
+
+	# remember how many mount points we had before we do anything
+	# and then sanity check we end up with no new ones dangling at the end
+	initial_mount_count=$(count_mounts)
+
+	# Only check Kata items if we are using a Kata runtime
+	if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then
+		echo "Checking Kata runtime"
+		check_kata_components=1
+	else
+		echo "Not a Kata runtime, not checking for Kata components"
+		check_kata_components=0
+	fi
+
+	versions_file="${cidir}/../versions.yaml"
+	nginx_version=$("${GOPATH}/bin/yq" read "$versions_file" "docker_images.nginx.version")
+	nginx_image="docker.io/library/nginx:$nginx_version"
+
+	# Pull nginx image
+	sudo ctr image pull ${nginx_image}
+	if [ $? != 0 ]; then
+		die "Unable to retry docker image ${nginx_image}"
+	fi
+}
+
+spin() {
+	local i
+	for ((i=1; i<= ITERATIONS; i++)); do {
+		echo "Start iteration $i of $ITERATIONS"
+		#spin them up
+		go
+		#check we are in a sane state
+		check_all_running
+		#shut them all down
+		clean_env_ctr
+		#Note there should be none running
+		how_many=0
+		#and check they all died
+		check_all_running
+		#and that we have no dangling mounts
+		check_mounts
+	}
+	done
+
+}
+
+init
+spin
diff --git a/versions.yaml b/versions.yaml
index 47c74d907d..7851f3deb6 100644
--- a/versions.yaml
+++ b/versions.yaml
@@ -395,3 +395,11 @@ plugins:
       available on a Kubernetes host.
     url: "https://github.com/k8snetworkplumbingwg/sriov-network-device-plugin"
     version: "b7f6d3e0679796e907ecca88cfab0e32e326850d"
+
+docker_images:
+  description: "Docker hub images used for testing"
+
+  nginx:
+    description: "Proxy server for HTTP, HTTPS, SMTP, POP3 and IMAP protocols"
+    url: "https://hub.docker.com/_/nginx/"
+    version: "1.15-alpine"

From 84e3d884e4c7dfde4e5bec38920af8952a9df60f Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Fri, 6 Oct 2023 18:41:43 +0000
Subject: [PATCH 3/4] gha: Add general dependencies to stability tests

This PR adds the general dependencies to stability tests.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 tests/stability/gha-run.sh | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh
index e05ae5aa40..586e45015c 100755
--- a/tests/stability/gha-run.sh
+++ b/tests/stability/gha-run.sh
@@ -14,6 +14,27 @@ kata_tarball_dir="${2:-kata-artifacts}"
 stability_dir="$(dirname "$(readlink -f "$0")")"
 source "${stability_dir}/../common.bash"
 
+function install_dependencies() {
+	info "Installing the dependencies needed for running the containerd-stability tests"
+
+	declare -a system_deps=(
+		jq
+	)
+
+	sudo apt-get update
+	sudo apt-get -y install "${system_deps[@]}"
+
+	ensure_yq
+
+	declare -a github_deps
+	github_deps[0]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")"
+
+	for github_dep in "${github_deps[@]}"; do
+		IFS=":" read -r -a dep <<< "${github_dep}"
+		install_${dep[0]} "${dep[1]}"
+	done
+}
+
 function run() {
 	info "Running soak parallel stability tests using ${KATA_HYPERVISOR} hypervisor"
 
@@ -24,6 +45,7 @@ function run() {
 function main() {
 	action="${1:-}"
 	case "${action}" in
+		install-dependencies) install_dependencies ;;
 		install-kata) install_kata ;;
 		enabling-hypervisor) enabling_hypervisor ;;
 		run) run ;;

From e786b2b019dab49716e2ab40b7cb57b19686859d Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Mon, 9 Oct 2023 17:09:32 +0000
Subject: [PATCH 4/4] gha: Add install dependencies for stability tests

This PR adds the install dependencies for stability tests.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 .github/workflows/basic-ci-amd64.yaml |  3 ++
 tests/stability/gha-run.sh            |  1 -
 tests/stability/soak_parallel_rm.sh   | 44 +++++++++++++--------------
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/basic-ci-amd64.yaml b/.github/workflows/basic-ci-amd64.yaml
index 5780605ede..6ac0a52401 100644
--- a/.github/workflows/basic-ci-amd64.yaml
+++ b/.github/workflows/basic-ci-amd64.yaml
@@ -78,6 +78,9 @@ jobs:
         env:
           TARGET_BRANCH: ${{ inputs.target-branch }}
 
+      - name: Install dependencies
+        run: bash tests/stability/gha-run.sh install-dependencies
+
       - name: get-kata-tarball
         uses: actions/download-artifact@v3
         with:
diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh
index 586e45015c..01672534a4 100755
--- a/tests/stability/gha-run.sh
+++ b/tests/stability/gha-run.sh
@@ -8,7 +8,6 @@
 set -o errexit
 set -o nounset
 set -o pipefail
-set -x
 
 kata_tarball_dir="${2:-kata-artifacts}"
 stability_dir="$(dirname "$(readlink -f "$0")")"
diff --git a/tests/stability/soak_parallel_rm.sh b/tests/stability/soak_parallel_rm.sh
index 6bf74ea611..c05c2fade0 100755
--- a/tests/stability/soak_parallel_rm.sh
+++ b/tests/stability/soak_parallel_rm.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (c) 2017-2018, 2020 Intel Corporation
+# Copyright (c) 2017-2023 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -42,7 +42,7 @@ MAX_CONTAINERS="${MAX_CONTAINERS:-110}"
 
 KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}"
 
-check_vsock_active() {
+function check_vsock_active() {
 	vsock_configured=$($RUNTIME_PATH kata-env | awk '/UseVSock/ {print $3}')
 	vsock_supported=$($RUNTIME_PATH kata-env | awk '/SupportVSock/ {print $3}')
 	if [ "$vsock_configured" == true ] && [ "$vsock_supported" == true ]; then
@@ -52,20 +52,20 @@ check_vsock_active() {
 	fi
 }
 
-count_containers() {
+function count_containers() {
 	sudo ctr c list -q | wc -l
 }
 
-check_all_running() {
+function check_all_running() {
 	local goterror=0
 
-	echo "Checking ${how_many} containers have all relevant components"
+	info "Checking ${how_many} containers have all relevant components"
 
 	# check what docker thinks
 	how_many_running=$(count_containers)
 
 	if (( ${how_many_running} != ${how_many} )); then
-		echo "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping"
+		info "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping"
 		((goterror++))
 	fi
 
@@ -76,7 +76,7 @@ check_all_running() {
 		how_many_shims=$(pgrep -a -f ${SHIM_PATH} | grep containerd.sock | wc -l)
 		# one shim process per container...
 		if (( ${how_many_running} != ${how_many_shims} )); then
-			echo "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping"
+			info "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping"
 			((goterror++))
 		fi
 
@@ -84,7 +84,7 @@ check_all_running() {
 		if [[ "$KATA_HYPERVISOR" != "dragonball" ]]; then
 			how_many_vms=$(pgrep -a $(basename ${HYPERVISOR_PATH} | cut -d '-' -f1) | wc -l)
 			if (( ${how_many_running} != ${how_many_vms} )); then
-				echo "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping"
+				info "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping"
 				((goterror++))
 			fi
 		fi
@@ -95,7 +95,7 @@ check_all_running() {
 				num_vc_pods=$(sudo ls -1 ${VC_POD_DIR} | wc -l)
 
 				if (( ${how_many_running} != ${num_vc_pods} )); then
-					echo "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)"
+					info "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)"
 					((goterror++))
 				fi
 			fi
@@ -109,12 +109,12 @@ check_all_running() {
 }
 
 # reported system 'available' memory
-get_system_avail() {
+function get_system_avail() {
 	echo $(free -b | head -2 | tail -1 | awk '{print $7}')
 }
 
-go() {
-	echo "Running..."
+function go() {
+	info "Running..."
 
 	how_many=0
 
@@ -129,32 +129,32 @@ go() {
 		done
 
 		if (( ${how_many} >= ${MAX_CONTAINERS} )); then
-			echo "And we have hit the max ${how_many} containers"
+			info "And we have hit the max ${how_many} containers"
 			return
 		fi
 
 		how_much=$(get_system_avail)
 		if (( ${how_much} < ${MEM_CUTOFF} )); then
-			echo "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})"
+			info "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})"
 			return
 		fi
 	}
 	done
 }
 
-count_mounts() {
+function count_mounts() {
 	echo $(mount | wc -l)
 }
 
-check_mounts() {
+function check_mounts() {
 	final_mount_count=$(count_mounts)
 
 	if [[ $final_mount_count < $initial_mount_count ]]; then
-		echo "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})"
+		info "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})"
 	fi
 }
 
-init() {
+function init() {
 	restart_containerd_service
 	extract_kata_env
 	clean_env_ctr
@@ -165,10 +165,10 @@ init() {
 
 	# Only check Kata items if we are using a Kata runtime
 	if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then
-		echo "Checking Kata runtime"
+		info "Checking Kata runtime"
 		check_kata_components=1
 	else
-		echo "Not a Kata runtime, not checking for Kata components"
+		info "Not a Kata runtime, not checking for Kata components"
 		check_kata_components=0
 	fi
 
@@ -183,10 +183,10 @@ init() {
 	fi
 }
 
-spin() {
+function spin() {
 	local i
 	for ((i=1; i<= ITERATIONS; i++)); do {
-		echo "Start iteration $i of $ITERATIONS"
+		info "Start iteration $i of $ITERATIONS"
 		#spin them up
 		go
 		#check we are in a sane state