mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-26 07:22:20 +00:00
This PR updates the fast footprint script to remove the use of egrep as this command has been deprecated and change it to use grep command. Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
434 lines
11 KiB
Bash
Executable File
434 lines
11 KiB
Bash
Executable File
#!/bin/bash
|
|
# Copyright (c) 2017-2023 Intel Corporation
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# A script to gather memory 'footprint' information as we launch more
|
|
# and more containers
|
|
#
|
|
# The script gathers information about both user and kernel space consumption
|
|
# Output is into a .json file, named using some of the config component names
|
|
# (such as footprint-busybox.json)
|
|
|
|
# Pull in some common, useful, items
|
|
SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
|
|
source "${SCRIPT_PATH}/../lib/common.bash"
|
|
|
|
# Note that all vars that can be set from outside the script (that is,
|
|
# passed in the ENV), use the ':-' setting to allow being over-ridden
|
|
|
|
# Default sleep, in seconds, to let containers come up and finish their
|
|
# initialisation before we take the measures. Some of the larger
|
|
# containers can take a number of seconds to get running.
|
|
PAYLOAD_SLEEP="${PAYLOAD_SLEEP:-10}"
|
|
|
|
# How long, in seconds, do we wait for KSM to 'settle down', before we
|
|
# timeout and just continue anyway.
|
|
KSM_WAIT_TIME="${KSM_WAIT_TIME:-300}"
|
|
|
|
# How long, in seconds, do we poll for ctr to complete launching all the
|
|
# containers?
|
|
CTR_POLL_TIMEOUT="${CTR_POLL_TIMEOUT:-300}"
|
|
|
|
# How many containers do we launch in parallel before taking the PAYLOAD_SLEEP
|
|
# nap
|
|
PARALLELISM="${PARALLELISM:-10}"
|
|
|
|
### The default config - run a small busybox image
|
|
# Define what we will be running (app under test)
|
|
# Default is we run busybox, as a 'small' workload
|
|
PAYLOAD="${PAYLOAD:-quay.io/prometheus/busybox:latest}"
|
|
PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}"
|
|
|
|
###
|
|
# which RUNTIME we use is picked up from the env in
|
|
# common.bash. You can over-ride by setting RUNTIME in your env
|
|
|
|
###
|
|
# Define the cutoff checks for when we stop running the test
|
|
# Run up to this many containers
|
|
NUM_CONTAINERS="${NUM_CONTAINERS:-100}"
|
|
# Run until we have consumed this much memory (from MemFree)
|
|
MAX_MEMORY_CONSUMED="${MAX_MEMORY_CONSUMED:-256*1024*1024*1024}"
|
|
# Run until we have this much MemFree left
|
|
MIN_MEMORY_FREE="${MIN_MEMORY_FREE:-2*1024*1024*1024}"
|
|
|
|
# Tools we need to have installed in order to operate
|
|
REQUIRED_COMMANDS="smem awk"
|
|
|
|
# If we 'dump' the system caches before we measure then we get less
|
|
# noise in the results - they show more what our un-reclaimable footprint is
|
|
DUMP_CACHES="${DUMP_CACHES:-1}"
|
|
|
|
# Affects the name of the file to store the results in
|
|
TEST_NAME="${TEST_NAME:-fast-footprint-busybox}"
|
|
|
|
############# end of configurable items ###################
|
|
|
|
# vars to remember where we started so we can calc diffs
|
|
base_mem_avail=0
|
|
base_mem_free=0
|
|
|
|
# dump the kernel caches, so we get a more precise (or just different)
|
|
# view of what our footprint really is.
|
|
function dump_caches() {
|
|
sudo bash -c "echo 3 > /proc/sys/vm/drop_caches"
|
|
}
|
|
|
|
function init() {
|
|
restart_containerd_service
|
|
|
|
check_cmds $REQUIRED_COMMANDS
|
|
sudo -E "${CTR_EXE}" image pull "$PAYLOAD"
|
|
|
|
# Modify the test name if running with KSM enabled
|
|
check_for_ksm
|
|
|
|
# Use the common init func to get to a known state
|
|
init_env
|
|
|
|
# Prepare to start storing results
|
|
metrics_json_init
|
|
|
|
# Store up baseline measures
|
|
base_mem_avail=$(free -b | head -2 | tail -1 | awk '{print $7}')
|
|
base_mem_free=$(get_memfree)
|
|
|
|
# Store our configuration for this run
|
|
save_config
|
|
}
|
|
|
|
save_config(){
|
|
metrics_json_start_array
|
|
|
|
local json="$(cat << EOF
|
|
{
|
|
"testname": "${TEST_NAME}",
|
|
"payload": "${PAYLOAD}",
|
|
"payload_args": "${PAYLOAD_ARGS}",
|
|
"payload_sleep": ${PAYLOAD_SLEEP},
|
|
"ksm_settle_time": ${KSM_WAIT_TIME},
|
|
"num_containers": ${NUM_CONTAINERS},
|
|
"parallelism": ${PARALLELISM},
|
|
"max_memory_consumed": "${MAX_MEMORY_CONSUMED}",
|
|
"min_memory_free": "${MIN_MEMORY_FREE}",
|
|
"dump_caches": "${DUMP_CACHES}"
|
|
}
|
|
EOF
|
|
)"
|
|
metrics_json_add_array_element "$json"
|
|
metrics_json_end_array "Config"
|
|
}
|
|
|
|
function cleanup() {
|
|
# Finish storing the results
|
|
metrics_json_save
|
|
|
|
clean_env_ctr
|
|
}
|
|
|
|
# helper function to get USS of process in arg1
|
|
function get_proc_uss() {
|
|
item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $4}')
|
|
((item*=1024))
|
|
echo $item
|
|
}
|
|
|
|
# helper function to get PSS of process in arg1
|
|
function get_proc_pss() {
|
|
item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $5}')
|
|
((item*=1024))
|
|
echo $item
|
|
}
|
|
|
|
# Get the PSS for the whole of userspace (all processes)
|
|
# This allows us to see if we had any impact on the rest of the system, for instance
|
|
# dockerd grows as we launch containers, so we should account for that in our total
|
|
# memory breakdown
|
|
function grab_all_pss() {
|
|
item=$(sudo smem -t | tail -1 | awk '{print $5}')
|
|
((item*=1024))
|
|
|
|
local json="$(cat << EOF
|
|
"all_pss": {
|
|
"pss": $item,
|
|
"Units": "KB"
|
|
}
|
|
EOF
|
|
)"
|
|
|
|
metrics_json_add_array_fragment "$json"
|
|
}
|
|
|
|
function grab_user_smem() {
|
|
# userspace
|
|
item=$(sudo smem -w | head -5 | tail -1 | awk '{print $3}')
|
|
((item*=1024))
|
|
|
|
local json="$(cat << EOF
|
|
"user_smem": {
|
|
"userspace": $item,
|
|
"Units": "KB"
|
|
}
|
|
EOF
|
|
)"
|
|
|
|
metrics_json_add_array_fragment "$json"
|
|
}
|
|
|
|
function grab_slab() {
|
|
# Grabbing slab total from meminfo is easier than doing the math
|
|
# on slabinfo
|
|
item=$(grep -F "Slab:" /proc/meminfo | awk '{print $2}')
|
|
((item*=1024))
|
|
|
|
local json="$(cat << EOF
|
|
"slab": {
|
|
"slab": $item,
|
|
"Units": "KB"
|
|
}
|
|
EOF
|
|
)"
|
|
|
|
metrics_json_add_array_fragment "$json"
|
|
}
|
|
|
|
function get_memfree() {
|
|
mem_free=$(sudo smem -w | head -6 | tail -1 | awk '{print $4}')
|
|
((mem_free*=1024))
|
|
echo $mem_free
|
|
}
|
|
|
|
function grab_system() {
|
|
|
|
# avail memory, from 'free'
|
|
local avail=$(free -b | head -2 | tail -1 | awk '{print $7}')
|
|
local avail_decr=$((base_mem_avail-avail))
|
|
|
|
# cached memory, from 'free'
|
|
local cached=$(free -b | head -2 | tail -1 | awk '{print $6}')
|
|
|
|
# free memory from smem
|
|
local smem_free=$(get_memfree)
|
|
local free_decr=$((base_mem_free-item))
|
|
|
|
# Anon pages
|
|
local anon=$(grep -F "AnonPages:" /proc/meminfo | awk '{print $2}')
|
|
((anon*=1024))
|
|
|
|
# Mapped pages
|
|
local mapped=$(grep "^Mapped:" /proc/meminfo | awk '{print $2}')
|
|
((mapped*=1024))
|
|
|
|
# Cached
|
|
local meminfo_cached=$(grep "^Cached:" /proc/meminfo | awk '{print $2}')
|
|
((meminfo_cached*=1024))
|
|
|
|
local json="$(cat << EOF
|
|
"system": {
|
|
"avail": $avail,
|
|
"avail_decr": $avail_decr,
|
|
"cached": $cached,
|
|
"smem_free": $smem_free,
|
|
"free_decr": $free_decr,
|
|
"anon": $anon,
|
|
"mapped": $mapped,
|
|
"meminfo_cached": $meminfo_cached,
|
|
"Units": "KB"
|
|
}
|
|
EOF
|
|
)"
|
|
|
|
metrics_json_add_array_fragment "$json"
|
|
}
|
|
|
|
function grab_stats() {
|
|
# If configured, dump the caches so we get a more stable
|
|
# view of what our static footprint really is
|
|
if [[ "$DUMP_CACHES" ]] ; then
|
|
dump_caches
|
|
fi
|
|
|
|
# user space data
|
|
# PSS taken all userspace
|
|
grab_all_pss
|
|
# user as reported by smem
|
|
grab_user_smem
|
|
|
|
# System overview data
|
|
# System free and cached
|
|
grab_system
|
|
|
|
# kernel data
|
|
# The 'total kernel space taken' we can work out as:
|
|
# ktotal = ((free-avail)-user)
|
|
# So, we don't grab that number from smem, as that is what it does
|
|
# internally anyhow.
|
|
# Still try to grab any finer kernel details that we can though
|
|
|
|
# totals from slabinfo
|
|
grab_slab
|
|
|
|
metrics_json_close_array_element
|
|
}
|
|
|
|
function check_limits() {
|
|
mem_free=$(get_memfree)
|
|
if ((mem_free <= MIN_MEMORY_FREE)); then
|
|
echo 1
|
|
return
|
|
fi
|
|
|
|
mem_consumed=$((base_mem_avail-mem_free))
|
|
if ((mem_consumed >= MAX_MEMORY_CONSUMED)); then
|
|
echo 1
|
|
return
|
|
fi
|
|
|
|
echo 0
|
|
}
|
|
|
|
launch_containers() {
|
|
local parloops leftovers
|
|
|
|
(( parloops=${NUM_CONTAINERS}/${PARALLELISM} ))
|
|
(( leftovers=${NUM_CONTAINERS} - (${parloops}*${PARALLELISM}) ))
|
|
|
|
echo "Launching ${parloops}x${PARALLELISM} containers + ${leftovers} etras"
|
|
|
|
containers=()
|
|
|
|
local iter n
|
|
for iter in $(seq 1 $parloops); do
|
|
echo "Launch iteration ${iter}"
|
|
for n in $(seq 1 $PARALLELISM); do
|
|
containers+=($(random_name))
|
|
sudo -E "${CTR_EXE}" run -d --runtime=$CTR_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS &
|
|
done
|
|
|
|
if [[ $PAYLOAD_SLEEP ]]; then
|
|
sleep $PAYLOAD_SLEEP
|
|
fi
|
|
|
|
# check if we have hit one of our limits and need to wrap up the tests
|
|
if (($(check_limits))); then
|
|
echo "Ran out of resources, check_limits failed"
|
|
return
|
|
fi
|
|
done
|
|
|
|
for n in $(seq 1 $leftovers); do
|
|
containers+=($(random_name))
|
|
sudo -E "${CTR_EXE}" run -d --runtime=$CTR_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS &
|
|
done
|
|
}
|
|
|
|
wait_containers() {
|
|
local t numcontainers
|
|
# nap 3s between checks
|
|
local step=3
|
|
|
|
for ((t=0; t<${CTR_POLL_TIMEOUT}; t+=step)); do
|
|
|
|
numcontainers=$(sudo -E "${CTR_EXE}" c list -q | wc -l)
|
|
|
|
if (( numcontainers >= ${NUM_CONTAINERS} )); then
|
|
echo "All containers now launched (${t}s)"
|
|
return
|
|
else
|
|
echo "Waiting for containers to launch (${numcontainers} at ${t}s)"
|
|
fi
|
|
sleep ${step}
|
|
done
|
|
|
|
echo "Timed out waiting for containers to launch (${t}s)"
|
|
cleanup
|
|
die "Timed out waiting for containers to launch (${t}s)"
|
|
}
|
|
|
|
function go() {
|
|
# Init the json cycle for this save
|
|
metrics_json_start_array
|
|
|
|
# Grab the first set of stats before we run any containers.
|
|
grab_stats
|
|
|
|
launch_containers
|
|
wait_containers
|
|
|
|
if [ $ksm_on == "1" ]; then
|
|
echo "Wating for KSM to settle..."
|
|
wait_ksm_settle ${KSM_WAIT_TIME}
|
|
fi
|
|
|
|
grab_stats
|
|
|
|
# Wrap up the results array
|
|
metrics_json_end_array "Results"
|
|
}
|
|
|
|
function show_vars()
|
|
{
|
|
echo -e "\nEvironment variables:"
|
|
echo -e "\tName (default)"
|
|
echo -e "\t\tDescription"
|
|
echo -e "\tPAYLOAD (${PAYLOAD})"
|
|
echo -e "\t\tThe ctr image to run"
|
|
echo -e "\tPAYLOAD_ARGS (${PAYLOAD_ARGS})"
|
|
echo -e "\t\tAny extra arguments passed into the docker 'run' command"
|
|
echo -e "\tPAYLOAD_SLEEP (${PAYLOAD_SLEEP})"
|
|
echo -e "\t\tSeconds to sleep between launch and measurement, to allow settling"
|
|
echo -e "\tKSM_WAIT_TIME (${KSM_WAIT_TIME})"
|
|
echo -e "\t\tSeconds to wait for KSM to settle before we take the final measure"
|
|
echo -e "\tCTR_POLL_TIMEOUT (${CTR_POLL_TIMEOUT})"
|
|
echo -e "\t\tSeconds to poll for ctr to finish launching containers"
|
|
echo -e "\tPARALLELISM (${PARALLELISM})"
|
|
echo -e "\t\tNumber of containers we launch in parallel"
|
|
echo -e "\tNUM_CONTAINERS (${NUM_CONTAINERS})"
|
|
echo -e "\t\tThe total number of containers to run"
|
|
echo -e "\tMAX_MEMORY_CONSUMED (${MAX_MEMORY_CONSUMED})"
|
|
echo -e "\t\tThe maximum amount of memory to be consumed before terminating"
|
|
echo -e "\tMIN_MEMORY_FREE (${MIN_MEMORY_FREE})"
|
|
echo -e "\t\tThe minimum amount of memory allowed to be free before terminating"
|
|
echo -e "\tDUMP_CACHES (${DUMP_CACHES})"
|
|
echo -e "\t\tA flag to note if the system caches should be dumped before capturing stats"
|
|
echo -e "\tTEST_NAME (${TEST_NAME})"
|
|
echo -e "\t\tCan be set to over-ride the default JSON results filename"
|
|
|
|
}
|
|
|
|
function help()
|
|
{
|
|
usage=$(cat << EOF
|
|
Usage: $0 [-h] [options]
|
|
Description:
|
|
Launch a series of workloads and take memory metric measurements after
|
|
each launch.
|
|
Options:
|
|
-h, Help page.
|
|
EOF
|
|
)
|
|
echo "$usage"
|
|
show_vars
|
|
}
|
|
|
|
function main() {
|
|
|
|
local OPTIND
|
|
while getopts "h" opt;do
|
|
case ${opt} in
|
|
h)
|
|
help
|
|
exit 0;
|
|
;;
|
|
esac
|
|
done
|
|
shift $((OPTIND-1))
|
|
|
|
init
|
|
go
|
|
cleanup
|
|
}
|
|
|
|
main "$@"
|