kata-containers/tests/metrics/density/fast_footprint.sh
Gabriela Cervantes 52ef092489 metrics: Update fast footprint script to use grep
This PR updates the fast footprint script to remove the use
of egrep as this command has been deprecated and change it
to use grep command.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2024-09-30 17:43:08 +00:00

434 lines
11 KiB
Bash
Executable File

#!/bin/bash
# Copyright (c) 2017-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
# A script to gather memory 'footprint' information as we launch more
# and more containers
#
# The script gathers information about both user and kernel space consumption
# Output is into a .json file, named using some of the config component names
# (such as footprint-busybox.json)
# Pull in some common, useful, items
SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
source "${SCRIPT_PATH}/../lib/common.bash"
# Note that all vars that can be set from outside the script (that is,
# passed in the ENV), use the ':-' setting to allow being over-ridden
# Default sleep, in seconds, to let containers come up and finish their
# initialisation before we take the measures. Some of the larger
# containers can take a number of seconds to get running.
PAYLOAD_SLEEP="${PAYLOAD_SLEEP:-10}"
# How long, in seconds, do we wait for KSM to 'settle down', before we
# timeout and just continue anyway.
KSM_WAIT_TIME="${KSM_WAIT_TIME:-300}"
# How long, in seconds, do we poll for ctr to complete launching all the
# containers?
CTR_POLL_TIMEOUT="${CTR_POLL_TIMEOUT:-300}"
# How many containers do we launch in parallel before taking the PAYLOAD_SLEEP
# nap
PARALLELISM="${PARALLELISM:-10}"
### The default config - run a small busybox image
# Define what we will be running (app under test)
# Default is we run busybox, as a 'small' workload
PAYLOAD="${PAYLOAD:-quay.io/prometheus/busybox:latest}"
PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}"
###
# which RUNTIME we use is picked up from the env in
# common.bash. You can over-ride by setting RUNTIME in your env
###
# Define the cutoff checks for when we stop running the test
# Run up to this many containers
NUM_CONTAINERS="${NUM_CONTAINERS:-100}"
# Run until we have consumed this much memory (from MemFree)
MAX_MEMORY_CONSUMED="${MAX_MEMORY_CONSUMED:-256*1024*1024*1024}"
# Run until we have this much MemFree left
MIN_MEMORY_FREE="${MIN_MEMORY_FREE:-2*1024*1024*1024}"
# Tools we need to have installed in order to operate
REQUIRED_COMMANDS="smem awk"
# If we 'dump' the system caches before we measure then we get less
# noise in the results - they show more what our un-reclaimable footprint is
DUMP_CACHES="${DUMP_CACHES:-1}"
# Affects the name of the file to store the results in
TEST_NAME="${TEST_NAME:-fast-footprint-busybox}"
############# end of configurable items ###################
# vars to remember where we started so we can calc diffs
base_mem_avail=0
base_mem_free=0
# dump the kernel caches, so we get a more precise (or just different)
# view of what our footprint really is.
function dump_caches() {
sudo bash -c "echo 3 > /proc/sys/vm/drop_caches"
}
function init() {
restart_containerd_service
check_cmds $REQUIRED_COMMANDS
sudo -E "${CTR_EXE}" image pull "$PAYLOAD"
# Modify the test name if running with KSM enabled
check_for_ksm
# Use the common init func to get to a known state
init_env
# Prepare to start storing results
metrics_json_init
# Store up baseline measures
base_mem_avail=$(free -b | head -2 | tail -1 | awk '{print $7}')
base_mem_free=$(get_memfree)
# Store our configuration for this run
save_config
}
save_config(){
metrics_json_start_array
local json="$(cat << EOF
{
"testname": "${TEST_NAME}",
"payload": "${PAYLOAD}",
"payload_args": "${PAYLOAD_ARGS}",
"payload_sleep": ${PAYLOAD_SLEEP},
"ksm_settle_time": ${KSM_WAIT_TIME},
"num_containers": ${NUM_CONTAINERS},
"parallelism": ${PARALLELISM},
"max_memory_consumed": "${MAX_MEMORY_CONSUMED}",
"min_memory_free": "${MIN_MEMORY_FREE}",
"dump_caches": "${DUMP_CACHES}"
}
EOF
)"
metrics_json_add_array_element "$json"
metrics_json_end_array "Config"
}
function cleanup() {
# Finish storing the results
metrics_json_save
clean_env_ctr
}
# helper function to get USS of process in arg1
function get_proc_uss() {
item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $4}')
((item*=1024))
echo $item
}
# helper function to get PSS of process in arg1
function get_proc_pss() {
item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $5}')
((item*=1024))
echo $item
}
# Get the PSS for the whole of userspace (all processes)
# This allows us to see if we had any impact on the rest of the system, for instance
# dockerd grows as we launch containers, so we should account for that in our total
# memory breakdown
function grab_all_pss() {
item=$(sudo smem -t | tail -1 | awk '{print $5}')
((item*=1024))
local json="$(cat << EOF
"all_pss": {
"pss": $item,
"Units": "KB"
}
EOF
)"
metrics_json_add_array_fragment "$json"
}
function grab_user_smem() {
# userspace
item=$(sudo smem -w | head -5 | tail -1 | awk '{print $3}')
((item*=1024))
local json="$(cat << EOF
"user_smem": {
"userspace": $item,
"Units": "KB"
}
EOF
)"
metrics_json_add_array_fragment "$json"
}
function grab_slab() {
# Grabbing slab total from meminfo is easier than doing the math
# on slabinfo
item=$(grep -F "Slab:" /proc/meminfo | awk '{print $2}')
((item*=1024))
local json="$(cat << EOF
"slab": {
"slab": $item,
"Units": "KB"
}
EOF
)"
metrics_json_add_array_fragment "$json"
}
function get_memfree() {
mem_free=$(sudo smem -w | head -6 | tail -1 | awk '{print $4}')
((mem_free*=1024))
echo $mem_free
}
function grab_system() {
# avail memory, from 'free'
local avail=$(free -b | head -2 | tail -1 | awk '{print $7}')
local avail_decr=$((base_mem_avail-avail))
# cached memory, from 'free'
local cached=$(free -b | head -2 | tail -1 | awk '{print $6}')
# free memory from smem
local smem_free=$(get_memfree)
local free_decr=$((base_mem_free-item))
# Anon pages
local anon=$(grep -F "AnonPages:" /proc/meminfo | awk '{print $2}')
((anon*=1024))
# Mapped pages
local mapped=$(grep "^Mapped:" /proc/meminfo | awk '{print $2}')
((mapped*=1024))
# Cached
local meminfo_cached=$(grep "^Cached:" /proc/meminfo | awk '{print $2}')
((meminfo_cached*=1024))
local json="$(cat << EOF
"system": {
"avail": $avail,
"avail_decr": $avail_decr,
"cached": $cached,
"smem_free": $smem_free,
"free_decr": $free_decr,
"anon": $anon,
"mapped": $mapped,
"meminfo_cached": $meminfo_cached,
"Units": "KB"
}
EOF
)"
metrics_json_add_array_fragment "$json"
}
function grab_stats() {
# If configured, dump the caches so we get a more stable
# view of what our static footprint really is
if [[ "$DUMP_CACHES" ]] ; then
dump_caches
fi
# user space data
# PSS taken all userspace
grab_all_pss
# user as reported by smem
grab_user_smem
# System overview data
# System free and cached
grab_system
# kernel data
# The 'total kernel space taken' we can work out as:
# ktotal = ((free-avail)-user)
# So, we don't grab that number from smem, as that is what it does
# internally anyhow.
# Still try to grab any finer kernel details that we can though
# totals from slabinfo
grab_slab
metrics_json_close_array_element
}
function check_limits() {
mem_free=$(get_memfree)
if ((mem_free <= MIN_MEMORY_FREE)); then
echo 1
return
fi
mem_consumed=$((base_mem_avail-mem_free))
if ((mem_consumed >= MAX_MEMORY_CONSUMED)); then
echo 1
return
fi
echo 0
}
launch_containers() {
local parloops leftovers
(( parloops=${NUM_CONTAINERS}/${PARALLELISM} ))
(( leftovers=${NUM_CONTAINERS} - (${parloops}*${PARALLELISM}) ))
echo "Launching ${parloops}x${PARALLELISM} containers + ${leftovers} etras"
containers=()
local iter n
for iter in $(seq 1 $parloops); do
echo "Launch iteration ${iter}"
for n in $(seq 1 $PARALLELISM); do
containers+=($(random_name))
sudo -E "${CTR_EXE}" run -d --runtime=$CTR_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS &
done
if [[ $PAYLOAD_SLEEP ]]; then
sleep $PAYLOAD_SLEEP
fi
# check if we have hit one of our limits and need to wrap up the tests
if (($(check_limits))); then
echo "Ran out of resources, check_limits failed"
return
fi
done
for n in $(seq 1 $leftovers); do
containers+=($(random_name))
sudo -E "${CTR_EXE}" run -d --runtime=$CTR_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS &
done
}
wait_containers() {
local t numcontainers
# nap 3s between checks
local step=3
for ((t=0; t<${CTR_POLL_TIMEOUT}; t+=step)); do
numcontainers=$(sudo -E "${CTR_EXE}" c list -q | wc -l)
if (( numcontainers >= ${NUM_CONTAINERS} )); then
echo "All containers now launched (${t}s)"
return
else
echo "Waiting for containers to launch (${numcontainers} at ${t}s)"
fi
sleep ${step}
done
echo "Timed out waiting for containers to launch (${t}s)"
cleanup
die "Timed out waiting for containers to launch (${t}s)"
}
function go() {
# Init the json cycle for this save
metrics_json_start_array
# Grab the first set of stats before we run any containers.
grab_stats
launch_containers
wait_containers
if [ $ksm_on == "1" ]; then
echo "Wating for KSM to settle..."
wait_ksm_settle ${KSM_WAIT_TIME}
fi
grab_stats
# Wrap up the results array
metrics_json_end_array "Results"
}
function show_vars()
{
echo -e "\nEvironment variables:"
echo -e "\tName (default)"
echo -e "\t\tDescription"
echo -e "\tPAYLOAD (${PAYLOAD})"
echo -e "\t\tThe ctr image to run"
echo -e "\tPAYLOAD_ARGS (${PAYLOAD_ARGS})"
echo -e "\t\tAny extra arguments passed into the docker 'run' command"
echo -e "\tPAYLOAD_SLEEP (${PAYLOAD_SLEEP})"
echo -e "\t\tSeconds to sleep between launch and measurement, to allow settling"
echo -e "\tKSM_WAIT_TIME (${KSM_WAIT_TIME})"
echo -e "\t\tSeconds to wait for KSM to settle before we take the final measure"
echo -e "\tCTR_POLL_TIMEOUT (${CTR_POLL_TIMEOUT})"
echo -e "\t\tSeconds to poll for ctr to finish launching containers"
echo -e "\tPARALLELISM (${PARALLELISM})"
echo -e "\t\tNumber of containers we launch in parallel"
echo -e "\tNUM_CONTAINERS (${NUM_CONTAINERS})"
echo -e "\t\tThe total number of containers to run"
echo -e "\tMAX_MEMORY_CONSUMED (${MAX_MEMORY_CONSUMED})"
echo -e "\t\tThe maximum amount of memory to be consumed before terminating"
echo -e "\tMIN_MEMORY_FREE (${MIN_MEMORY_FREE})"
echo -e "\t\tThe minimum amount of memory allowed to be free before terminating"
echo -e "\tDUMP_CACHES (${DUMP_CACHES})"
echo -e "\t\tA flag to note if the system caches should be dumped before capturing stats"
echo -e "\tTEST_NAME (${TEST_NAME})"
echo -e "\t\tCan be set to over-ride the default JSON results filename"
}
function help()
{
usage=$(cat << EOF
Usage: $0 [-h] [options]
Description:
Launch a series of workloads and take memory metric measurements after
each launch.
Options:
-h, Help page.
EOF
)
echo "$usage"
show_vars
}
function main() {
local OPTIND
while getopts "h" opt;do
case ${opt} in
h)
help
exit 0;
;;
esac
done
shift $((OPTIND-1))
init
go
cleanup
}
main "$@"