From 0f04d527d9cb0f0a981bda377ed7853f34d78802 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 6 Oct 2023 15:23:03 +0000 Subject: [PATCH 1/4] tests: Enable soak parallel test This PR enables the soak parallel test for stability test. Fixes #8153 Signed-off-by: Gabriela Cervantes --- tests/stability/gha-run.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh index 66e6f21c05..e05ae5aa40 100755 --- a/tests/stability/gha-run.sh +++ b/tests/stability/gha-run.sh @@ -8,6 +8,7 @@ set -o errexit set -o nounset set -o pipefail +set -x kata_tarball_dir="${2:-kata-artifacts}" stability_dir="$(dirname "$(readlink -f "$0")")" @@ -16,8 +17,8 @@ source "${stability_dir}/../common.bash" function run() { info "Running soak parallel stability tests using ${KATA_HYPERVISOR} hypervisor" - # export ITERATIONS=2 MAX_CONTAINERS=20 - # bash "${stability_dir}/soak_parallel_rm.sh" + export ITERATIONS=2 MAX_CONTAINERS=20 + bash "${stability_dir}/soak_parallel_rm.sh" } function main() { From dec3951ca55fbb1c2e3dd23551a815fcdebe7d11 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 6 Oct 2023 15:27:28 +0000 Subject: [PATCH 2/4] tests: Add soak parallel stability test This PR adds the soak parallel stability test. Signed-off-by: Gabriela Cervantes --- tests/stability/soak_parallel_rm.sh | 208 ++++++++++++++++++++++++++++ versions.yaml | 8 ++ 2 files changed, 216 insertions(+) create mode 100755 tests/stability/soak_parallel_rm.sh diff --git a/tests/stability/soak_parallel_rm.sh b/tests/stability/soak_parallel_rm.sh new file mode 100755 index 0000000000..6bf74ea611 --- /dev/null +++ b/tests/stability/soak_parallel_rm.sh @@ -0,0 +1,208 @@ +#!/bin/bash +# +# Copyright (c) 2017-2018, 2020 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# This test will run a number of parallel containers, and then try to +# 'rm -f' them all at the same time. It will check after each run and +# rm that we have the expected number of containers, shims, +# qemus and runtimes active +# The goals are two fold: +# - spot any stuck or non-started components +# - catch any hang ups + +cidir=$(dirname "$0") +source "${cidir}/../metrics/lib/common.bash" +source "/etc/os-release" || source "/usr/lib/os-release" +set -x + +# How many times will we run the test loop... +ITERATIONS="${ITERATIONS:-5}" + +# the system 'free available' level where we stop running the tests, as otherwise +# the system can crawl to a halt, and/or start refusing to launch new VMs anyway +# We choose 2G, as that is one of the default VM sizes for Kata +MEM_CUTOFF="${MEM_CUTOFF:-(2*1024*1024*1024)}" + +# do we need a command argument for this payload? +COMMAND="${COMMAND:-tail -f /dev/null}" + +# Runtime path +RUNTIME_PATH=$(command -v $RUNTIME) + +# The place where virtcontainers keeps its active pod info +# This is ultimately what 'kata-runtime list' uses to get its info, but +# we can also check it for sanity directly +VC_POD_DIR="${VC_POD_DIR:-/run/vc/sbs}" + +# let's cap the test. If you want to run until you hit the memory limit +# then just set this to a very large number +MAX_CONTAINERS="${MAX_CONTAINERS:-110}" + +KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" + +check_vsock_active() { + vsock_configured=$($RUNTIME_PATH kata-env | awk '/UseVSock/ {print $3}') + vsock_supported=$($RUNTIME_PATH kata-env | awk '/SupportVSock/ {print $3}') + if [ "$vsock_configured" == true ] && [ "$vsock_supported" == true ]; then + return 0 + else + return 1 + fi +} + +count_containers() { + sudo ctr c list -q | wc -l +} + +check_all_running() { + local goterror=0 + + echo "Checking ${how_many} containers have all relevant components" + + # check what docker thinks + how_many_running=$(count_containers) + + if (( ${how_many_running} != ${how_many} )); then + echo "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping" + ((goterror++)) + fi + + # Only check for Kata components if we are using a Kata runtime + if (( $check_kata_components )); then + + # check we have the right number of shims + how_many_shims=$(pgrep -a -f ${SHIM_PATH} | grep containerd.sock | wc -l) + # one shim process per container... + if (( ${how_many_running} != ${how_many_shims} )); then + echo "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping" + ((goterror++)) + fi + + # check we have the right number of vm's + if [[ "$KATA_HYPERVISOR" != "dragonball" ]]; then + how_many_vms=$(pgrep -a $(basename ${HYPERVISOR_PATH} | cut -d '-' -f1) | wc -l) + if (( ${how_many_running} != ${how_many_vms} )); then + echo "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping" + ((goterror++)) + fi + fi + + # if this is kata-runtime, check how many pods virtcontainers thinks we have + if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then + if [ -d "${VC_POD_DIR}" ]; then + num_vc_pods=$(sudo ls -1 ${VC_POD_DIR} | wc -l) + + if (( ${how_many_running} != ${num_vc_pods} )); then + echo "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)" + ((goterror++)) + fi + fi + fi + fi + + if (( goterror != 0 )); then + show_system_ctr_state + die "Got $goterror errors, quitting" + fi +} + +# reported system 'available' memory +get_system_avail() { + echo $(free -b | head -2 | tail -1 | awk '{print $7}') +} + +go() { + echo "Running..." + + how_many=0 + + while true; do { + check_all_running + + local i + for ((i=1; i<= ${MAX_CONTAINERS}; i++)); do + containers+=($(random_name)) + sudo ctr run --runtime=${CTR_RUNTIME} -d ${nginx_image} ${containers[-1]} sh -c ${COMMAND} + ((how_many++)) + done + + if (( ${how_many} >= ${MAX_CONTAINERS} )); then + echo "And we have hit the max ${how_many} containers" + return + fi + + how_much=$(get_system_avail) + if (( ${how_much} < ${MEM_CUTOFF} )); then + echo "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})" + return + fi + } + done +} + +count_mounts() { + echo $(mount | wc -l) +} + +check_mounts() { + final_mount_count=$(count_mounts) + + if [[ $final_mount_count < $initial_mount_count ]]; then + echo "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})" + fi +} + +init() { + restart_containerd_service + extract_kata_env + clean_env_ctr + + # remember how many mount points we had before we do anything + # and then sanity check we end up with no new ones dangling at the end + initial_mount_count=$(count_mounts) + + # Only check Kata items if we are using a Kata runtime + if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then + echo "Checking Kata runtime" + check_kata_components=1 + else + echo "Not a Kata runtime, not checking for Kata components" + check_kata_components=0 + fi + + versions_file="${cidir}/../versions.yaml" + nginx_version=$("${GOPATH}/bin/yq" read "$versions_file" "docker_images.nginx.version") + nginx_image="docker.io/library/nginx:$nginx_version" + + # Pull nginx image + sudo ctr image pull ${nginx_image} + if [ $? != 0 ]; then + die "Unable to retry docker image ${nginx_image}" + fi +} + +spin() { + local i + for ((i=1; i<= ITERATIONS; i++)); do { + echo "Start iteration $i of $ITERATIONS" + #spin them up + go + #check we are in a sane state + check_all_running + #shut them all down + clean_env_ctr + #Note there should be none running + how_many=0 + #and check they all died + check_all_running + #and that we have no dangling mounts + check_mounts + } + done + +} + +init +spin diff --git a/versions.yaml b/versions.yaml index 47c74d907d..7851f3deb6 100644 --- a/versions.yaml +++ b/versions.yaml @@ -395,3 +395,11 @@ plugins: available on a Kubernetes host. url: "https://github.com/k8snetworkplumbingwg/sriov-network-device-plugin" version: "b7f6d3e0679796e907ecca88cfab0e32e326850d" + +docker_images: + description: "Docker hub images used for testing" + + nginx: + description: "Proxy server for HTTP, HTTPS, SMTP, POP3 and IMAP protocols" + url: "https://hub.docker.com/_/nginx/" + version: "1.15-alpine" From 84e3d884e4c7dfde4e5bec38920af8952a9df60f Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 6 Oct 2023 18:41:43 +0000 Subject: [PATCH 3/4] gha: Add general dependencies to stability tests This PR adds the general dependencies to stability tests. Signed-off-by: Gabriela Cervantes --- tests/stability/gha-run.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh index e05ae5aa40..586e45015c 100755 --- a/tests/stability/gha-run.sh +++ b/tests/stability/gha-run.sh @@ -14,6 +14,27 @@ kata_tarball_dir="${2:-kata-artifacts}" stability_dir="$(dirname "$(readlink -f "$0")")" source "${stability_dir}/../common.bash" +function install_dependencies() { + info "Installing the dependencies needed for running the containerd-stability tests" + + declare -a system_deps=( + jq + ) + + sudo apt-get update + sudo apt-get -y install "${system_deps[@]}" + + ensure_yq + + declare -a github_deps + github_deps[0]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")" + + for github_dep in "${github_deps[@]}"; do + IFS=":" read -r -a dep <<< "${github_dep}" + install_${dep[0]} "${dep[1]}" + done +} + function run() { info "Running soak parallel stability tests using ${KATA_HYPERVISOR} hypervisor" @@ -24,6 +45,7 @@ function run() { function main() { action="${1:-}" case "${action}" in + install-dependencies) install_dependencies ;; install-kata) install_kata ;; enabling-hypervisor) enabling_hypervisor ;; run) run ;; From e786b2b019dab49716e2ab40b7cb57b19686859d Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 9 Oct 2023 17:09:32 +0000 Subject: [PATCH 4/4] gha: Add install dependencies for stability tests This PR adds the install dependencies for stability tests. Signed-off-by: Gabriela Cervantes --- .github/workflows/basic-ci-amd64.yaml | 3 ++ tests/stability/gha-run.sh | 1 - tests/stability/soak_parallel_rm.sh | 44 +++++++++++++-------------- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/.github/workflows/basic-ci-amd64.yaml b/.github/workflows/basic-ci-amd64.yaml index 5780605ede..6ac0a52401 100644 --- a/.github/workflows/basic-ci-amd64.yaml +++ b/.github/workflows/basic-ci-amd64.yaml @@ -78,6 +78,9 @@ jobs: env: TARGET_BRANCH: ${{ inputs.target-branch }} + - name: Install dependencies + run: bash tests/stability/gha-run.sh install-dependencies + - name: get-kata-tarball uses: actions/download-artifact@v3 with: diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh index 586e45015c..01672534a4 100755 --- a/tests/stability/gha-run.sh +++ b/tests/stability/gha-run.sh @@ -8,7 +8,6 @@ set -o errexit set -o nounset set -o pipefail -set -x kata_tarball_dir="${2:-kata-artifacts}" stability_dir="$(dirname "$(readlink -f "$0")")" diff --git a/tests/stability/soak_parallel_rm.sh b/tests/stability/soak_parallel_rm.sh index 6bf74ea611..c05c2fade0 100755 --- a/tests/stability/soak_parallel_rm.sh +++ b/tests/stability/soak_parallel_rm.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2017-2018, 2020 Intel Corporation +# Copyright (c) 2017-2023 Intel Corporation # # SPDX-License-Identifier: Apache-2.0 # @@ -42,7 +42,7 @@ MAX_CONTAINERS="${MAX_CONTAINERS:-110}" KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" -check_vsock_active() { +function check_vsock_active() { vsock_configured=$($RUNTIME_PATH kata-env | awk '/UseVSock/ {print $3}') vsock_supported=$($RUNTIME_PATH kata-env | awk '/SupportVSock/ {print $3}') if [ "$vsock_configured" == true ] && [ "$vsock_supported" == true ]; then @@ -52,20 +52,20 @@ check_vsock_active() { fi } -count_containers() { +function count_containers() { sudo ctr c list -q | wc -l } -check_all_running() { +function check_all_running() { local goterror=0 - echo "Checking ${how_many} containers have all relevant components" + info "Checking ${how_many} containers have all relevant components" # check what docker thinks how_many_running=$(count_containers) if (( ${how_many_running} != ${how_many} )); then - echo "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping" + info "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping" ((goterror++)) fi @@ -76,7 +76,7 @@ check_all_running() { how_many_shims=$(pgrep -a -f ${SHIM_PATH} | grep containerd.sock | wc -l) # one shim process per container... if (( ${how_many_running} != ${how_many_shims} )); then - echo "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping" + info "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping" ((goterror++)) fi @@ -84,7 +84,7 @@ check_all_running() { if [[ "$KATA_HYPERVISOR" != "dragonball" ]]; then how_many_vms=$(pgrep -a $(basename ${HYPERVISOR_PATH} | cut -d '-' -f1) | wc -l) if (( ${how_many_running} != ${how_many_vms} )); then - echo "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping" + info "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping" ((goterror++)) fi fi @@ -95,7 +95,7 @@ check_all_running() { num_vc_pods=$(sudo ls -1 ${VC_POD_DIR} | wc -l) if (( ${how_many_running} != ${num_vc_pods} )); then - echo "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)" + info "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)" ((goterror++)) fi fi @@ -109,12 +109,12 @@ check_all_running() { } # reported system 'available' memory -get_system_avail() { +function get_system_avail() { echo $(free -b | head -2 | tail -1 | awk '{print $7}') } -go() { - echo "Running..." +function go() { + info "Running..." how_many=0 @@ -129,32 +129,32 @@ go() { done if (( ${how_many} >= ${MAX_CONTAINERS} )); then - echo "And we have hit the max ${how_many} containers" + info "And we have hit the max ${how_many} containers" return fi how_much=$(get_system_avail) if (( ${how_much} < ${MEM_CUTOFF} )); then - echo "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})" + info "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})" return fi } done } -count_mounts() { +function count_mounts() { echo $(mount | wc -l) } -check_mounts() { +function check_mounts() { final_mount_count=$(count_mounts) if [[ $final_mount_count < $initial_mount_count ]]; then - echo "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})" + info "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})" fi } -init() { +function init() { restart_containerd_service extract_kata_env clean_env_ctr @@ -165,10 +165,10 @@ init() { # Only check Kata items if we are using a Kata runtime if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then - echo "Checking Kata runtime" + info "Checking Kata runtime" check_kata_components=1 else - echo "Not a Kata runtime, not checking for Kata components" + info "Not a Kata runtime, not checking for Kata components" check_kata_components=0 fi @@ -183,10 +183,10 @@ init() { fi } -spin() { +function spin() { local i for ((i=1; i<= ITERATIONS; i++)); do { - echo "Start iteration $i of $ITERATIONS" + info "Start iteration $i of $ITERATIONS" #spin them up go #check we are in a sane state