mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-28 08:17:37 +00:00
Merge pull request #8154 from GabyCT/topic/addstability
tests: Enable soak parallel stability test
This commit is contained in:
commit
538131ab44
3
.github/workflows/basic-ci-amd64.yaml
vendored
3
.github/workflows/basic-ci-amd64.yaml
vendored
@ -78,6 +78,9 @@ jobs:
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/stability/gha-run.sh install-dependencies
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
|
@ -13,16 +13,38 @@ kata_tarball_dir="${2:-kata-artifacts}"
|
||||
stability_dir="$(dirname "$(readlink -f "$0")")"
|
||||
source "${stability_dir}/../common.bash"
|
||||
|
||||
function install_dependencies() {
|
||||
info "Installing the dependencies needed for running the containerd-stability tests"
|
||||
|
||||
declare -a system_deps=(
|
||||
jq
|
||||
)
|
||||
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install "${system_deps[@]}"
|
||||
|
||||
ensure_yq
|
||||
|
||||
declare -a github_deps
|
||||
github_deps[0]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")"
|
||||
|
||||
for github_dep in "${github_deps[@]}"; do
|
||||
IFS=":" read -r -a dep <<< "${github_dep}"
|
||||
install_${dep[0]} "${dep[1]}"
|
||||
done
|
||||
}
|
||||
|
||||
function run() {
|
||||
info "Running soak parallel stability tests using ${KATA_HYPERVISOR} hypervisor"
|
||||
|
||||
# export ITERATIONS=2 MAX_CONTAINERS=20
|
||||
# bash "${stability_dir}/soak_parallel_rm.sh"
|
||||
export ITERATIONS=2 MAX_CONTAINERS=20
|
||||
bash "${stability_dir}/soak_parallel_rm.sh"
|
||||
}
|
||||
|
||||
function main() {
|
||||
action="${1:-}"
|
||||
case "${action}" in
|
||||
install-dependencies) install_dependencies ;;
|
||||
install-kata) install_kata ;;
|
||||
enabling-hypervisor) enabling_hypervisor ;;
|
||||
run) run ;;
|
||||
|
208
tests/stability/soak_parallel_rm.sh
Executable file
208
tests/stability/soak_parallel_rm.sh
Executable file
@ -0,0 +1,208 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2017-2023 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This test will run a number of parallel containers, and then try to
|
||||
# 'rm -f' them all at the same time. It will check after each run and
|
||||
# rm that we have the expected number of containers, shims,
|
||||
# qemus and runtimes active
|
||||
# The goals are two fold:
|
||||
# - spot any stuck or non-started components
|
||||
# - catch any hang ups
|
||||
|
||||
cidir=$(dirname "$0")
|
||||
source "${cidir}/../metrics/lib/common.bash"
|
||||
source "/etc/os-release" || source "/usr/lib/os-release"
|
||||
set -x
|
||||
|
||||
# How many times will we run the test loop...
|
||||
ITERATIONS="${ITERATIONS:-5}"
|
||||
|
||||
# the system 'free available' level where we stop running the tests, as otherwise
|
||||
# the system can crawl to a halt, and/or start refusing to launch new VMs anyway
|
||||
# We choose 2G, as that is one of the default VM sizes for Kata
|
||||
MEM_CUTOFF="${MEM_CUTOFF:-(2*1024*1024*1024)}"
|
||||
|
||||
# do we need a command argument for this payload?
|
||||
COMMAND="${COMMAND:-tail -f /dev/null}"
|
||||
|
||||
# Runtime path
|
||||
RUNTIME_PATH=$(command -v $RUNTIME)
|
||||
|
||||
# The place where virtcontainers keeps its active pod info
|
||||
# This is ultimately what 'kata-runtime list' uses to get its info, but
|
||||
# we can also check it for sanity directly
|
||||
VC_POD_DIR="${VC_POD_DIR:-/run/vc/sbs}"
|
||||
|
||||
# let's cap the test. If you want to run until you hit the memory limit
|
||||
# then just set this to a very large number
|
||||
MAX_CONTAINERS="${MAX_CONTAINERS:-110}"
|
||||
|
||||
KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}"
|
||||
|
||||
function check_vsock_active() {
|
||||
vsock_configured=$($RUNTIME_PATH kata-env | awk '/UseVSock/ {print $3}')
|
||||
vsock_supported=$($RUNTIME_PATH kata-env | awk '/SupportVSock/ {print $3}')
|
||||
if [ "$vsock_configured" == true ] && [ "$vsock_supported" == true ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
function count_containers() {
|
||||
sudo ctr c list -q | wc -l
|
||||
}
|
||||
|
||||
function check_all_running() {
|
||||
local goterror=0
|
||||
|
||||
info "Checking ${how_many} containers have all relevant components"
|
||||
|
||||
# check what docker thinks
|
||||
how_many_running=$(count_containers)
|
||||
|
||||
if (( ${how_many_running} != ${how_many} )); then
|
||||
info "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping"
|
||||
((goterror++))
|
||||
fi
|
||||
|
||||
# Only check for Kata components if we are using a Kata runtime
|
||||
if (( $check_kata_components )); then
|
||||
|
||||
# check we have the right number of shims
|
||||
how_many_shims=$(pgrep -a -f ${SHIM_PATH} | grep containerd.sock | wc -l)
|
||||
# one shim process per container...
|
||||
if (( ${how_many_running} != ${how_many_shims} )); then
|
||||
info "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping"
|
||||
((goterror++))
|
||||
fi
|
||||
|
||||
# check we have the right number of vm's
|
||||
if [[ "$KATA_HYPERVISOR" != "dragonball" ]]; then
|
||||
how_many_vms=$(pgrep -a $(basename ${HYPERVISOR_PATH} | cut -d '-' -f1) | wc -l)
|
||||
if (( ${how_many_running} != ${how_many_vms} )); then
|
||||
info "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping"
|
||||
((goterror++))
|
||||
fi
|
||||
fi
|
||||
|
||||
# if this is kata-runtime, check how many pods virtcontainers thinks we have
|
||||
if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then
|
||||
if [ -d "${VC_POD_DIR}" ]; then
|
||||
num_vc_pods=$(sudo ls -1 ${VC_POD_DIR} | wc -l)
|
||||
|
||||
if (( ${how_many_running} != ${num_vc_pods} )); then
|
||||
info "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)"
|
||||
((goterror++))
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if (( goterror != 0 )); then
|
||||
show_system_ctr_state
|
||||
die "Got $goterror errors, quitting"
|
||||
fi
|
||||
}
|
||||
|
||||
# reported system 'available' memory
|
||||
function get_system_avail() {
|
||||
echo $(free -b | head -2 | tail -1 | awk '{print $7}')
|
||||
}
|
||||
|
||||
function go() {
|
||||
info "Running..."
|
||||
|
||||
how_many=0
|
||||
|
||||
while true; do {
|
||||
check_all_running
|
||||
|
||||
local i
|
||||
for ((i=1; i<= ${MAX_CONTAINERS}; i++)); do
|
||||
containers+=($(random_name))
|
||||
sudo ctr run --runtime=${CTR_RUNTIME} -d ${nginx_image} ${containers[-1]} sh -c ${COMMAND}
|
||||
((how_many++))
|
||||
done
|
||||
|
||||
if (( ${how_many} >= ${MAX_CONTAINERS} )); then
|
||||
info "And we have hit the max ${how_many} containers"
|
||||
return
|
||||
fi
|
||||
|
||||
how_much=$(get_system_avail)
|
||||
if (( ${how_much} < ${MEM_CUTOFF} )); then
|
||||
info "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})"
|
||||
return
|
||||
fi
|
||||
}
|
||||
done
|
||||
}
|
||||
|
||||
function count_mounts() {
|
||||
echo $(mount | wc -l)
|
||||
}
|
||||
|
||||
function check_mounts() {
|
||||
final_mount_count=$(count_mounts)
|
||||
|
||||
if [[ $final_mount_count < $initial_mount_count ]]; then
|
||||
info "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})"
|
||||
fi
|
||||
}
|
||||
|
||||
function init() {
|
||||
restart_containerd_service
|
||||
extract_kata_env
|
||||
clean_env_ctr
|
||||
|
||||
# remember how many mount points we had before we do anything
|
||||
# and then sanity check we end up with no new ones dangling at the end
|
||||
initial_mount_count=$(count_mounts)
|
||||
|
||||
# Only check Kata items if we are using a Kata runtime
|
||||
if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then
|
||||
info "Checking Kata runtime"
|
||||
check_kata_components=1
|
||||
else
|
||||
info "Not a Kata runtime, not checking for Kata components"
|
||||
check_kata_components=0
|
||||
fi
|
||||
|
||||
versions_file="${cidir}/../versions.yaml"
|
||||
nginx_version=$("${GOPATH}/bin/yq" read "$versions_file" "docker_images.nginx.version")
|
||||
nginx_image="docker.io/library/nginx:$nginx_version"
|
||||
|
||||
# Pull nginx image
|
||||
sudo ctr image pull ${nginx_image}
|
||||
if [ $? != 0 ]; then
|
||||
die "Unable to retry docker image ${nginx_image}"
|
||||
fi
|
||||
}
|
||||
|
||||
function spin() {
|
||||
local i
|
||||
for ((i=1; i<= ITERATIONS; i++)); do {
|
||||
info "Start iteration $i of $ITERATIONS"
|
||||
#spin them up
|
||||
go
|
||||
#check we are in a sane state
|
||||
check_all_running
|
||||
#shut them all down
|
||||
clean_env_ctr
|
||||
#Note there should be none running
|
||||
how_many=0
|
||||
#and check they all died
|
||||
check_all_running
|
||||
#and that we have no dangling mounts
|
||||
check_mounts
|
||||
}
|
||||
done
|
||||
|
||||
}
|
||||
|
||||
init
|
||||
spin
|
@ -395,3 +395,11 @@ plugins:
|
||||
available on a Kubernetes host.
|
||||
url: "https://github.com/k8snetworkplumbingwg/sriov-network-device-plugin"
|
||||
version: "b7f6d3e0679796e907ecca88cfab0e32e326850d"
|
||||
|
||||
docker_images:
|
||||
description: "Docker hub images used for testing"
|
||||
|
||||
nginx:
|
||||
description: "Proxy server for HTTP, HTTPS, SMTP, POP3 and IMAP protocols"
|
||||
url: "https://hub.docker.com/_/nginx/"
|
||||
version: "1.15-alpine"
|
||||
|
Loading…
Reference in New Issue
Block a user