#!/bin/bash
# Copyright (c) 2017-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
# A script to gather memory 'footprint' information as we launch more
# and more containers
#
# The script gathers information about both user and kernel space consumption
# Output is into a .json file, named using some of the config component names
# (such as footprint-busybox.json)

# Pull in some common, useful, items
SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
source "${SCRIPT_PATH}/../lib/common.bash"

# Note that all vars that can be set from outside the script (that is,
# passed in the ENV), use the ':-' setting to allow being over-ridden

# Default sleep, in seconds, to let containers come up and finish their
# initialisation before we take the measures. Some of the larger
# containers can take a number of seconds to get running.
PAYLOAD_SLEEP="${PAYLOAD_SLEEP:-10}"

# How long, in seconds, do we wait for KSM to 'settle down', before we
# timeout and just continue anyway.
KSM_WAIT_TIME="${KSM_WAIT_TIME:-300}"

# How long, in seconds, do we poll for ctr to complete launching all the
# containers?
CTR_POLL_TIMEOUT="${CTR_POLL_TIMEOUT:-300}"

# How many containers do we launch in parallel before taking the PAYLOAD_SLEEP
# nap
PARALLELISM="${PARALLELISM:-10}"

### The default config - run a small busybox image
# Define what we will be running (app under test)
#  Default is we run busybox, as a 'small' workload
PAYLOAD="${PAYLOAD:-quay.io/prometheus/busybox:latest}"
PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}"

###
# which RUNTIME we use is picked up from the env in
# common.bash. You can over-ride by setting RUNTIME in your env

###
# Define the cutoff checks for when we stop running the test
  # Run up to this many containers
NUM_CONTAINERS="${NUM_CONTAINERS:-100}"
  # Run until we have consumed this much memory (from MemFree)
MAX_MEMORY_CONSUMED="${MAX_MEMORY_CONSUMED:-256*1024*1024*1024}"
  # Run until we have this much MemFree left
MIN_MEMORY_FREE="${MIN_MEMORY_FREE:-2*1024*1024*1024}"

# Tools we need to have installed in order to operate
REQUIRED_COMMANDS="smem awk"

# If we 'dump' the system caches before we measure then we get less
# noise in the results - they show more what our un-reclaimable footprint is
DUMP_CACHES="${DUMP_CACHES:-1}"

# Affects the name of the file to store the results in
TEST_NAME="${TEST_NAME:-fast-footprint-busybox}"

############# end of configurable items ###################

# vars to remember where we started so we can calc diffs
base_mem_avail=0
base_mem_free=0

# dump the kernel caches, so we get a more precise (or just different)
# view of what our footprint really is.
function dump_caches() {
	sudo bash -c "echo 3 > /proc/sys/vm/drop_caches"
}

function init() {
	restart_containerd_service

	check_cmds $REQUIRED_COMMANDS
	sudo -E "${CTR_EXE}" image pull "$PAYLOAD"

	# Modify the test name if running with KSM enabled
	check_for_ksm

	# Use the common init func to get to a known state
	init_env

	# Prepare to start storing results
	metrics_json_init

	# Store up baseline measures
	base_mem_avail=$(free -b | head -2 | tail -1 | awk '{print $7}')
	base_mem_free=$(get_memfree)

	# Store our configuration for this run
	save_config
}

save_config(){
	metrics_json_start_array

	local json="$(cat << EOF
	{
		"testname": "${TEST_NAME}",
		"payload": "${PAYLOAD}",
		"payload_args": "${PAYLOAD_ARGS}",
		"payload_sleep": ${PAYLOAD_SLEEP},
		"ksm_settle_time": ${KSM_WAIT_TIME},
		"num_containers": ${NUM_CONTAINERS},
		"parallelism": ${PARALLELISM},
		"max_memory_consumed": "${MAX_MEMORY_CONSUMED}",
		"min_memory_free": "${MIN_MEMORY_FREE}",
		"dump_caches": "${DUMP_CACHES}"
	}
EOF
)"
	metrics_json_add_array_element "$json"
	metrics_json_end_array "Config"
}

function cleanup() {
	# Finish storing the results
	metrics_json_save

	clean_env_ctr
}

# helper function to get USS of process in arg1
function get_proc_uss() {
	item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $4}')
	((item*=1024))
	echo $item
}

# helper function to get PSS of process in arg1
function get_proc_pss() {
	item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $5}')
	((item*=1024))
	echo $item
}

# Get the PSS for the whole of userspace (all processes)
#  This allows us to see if we had any impact on the rest of the system, for instance
#  dockerd grows as we launch containers, so we should account for that in our total
#  memory breakdown
function grab_all_pss() {
	item=$(sudo smem -t | tail -1 | awk '{print $5}')
	((item*=1024))

	local json="$(cat << EOF
		"all_pss": {
			"pss": $item,
			"Units": "KB"
		}
EOF
)"

	metrics_json_add_array_fragment "$json"
}

function grab_user_smem() {
	# userspace
	item=$(sudo smem -w | head -5 | tail -1 | awk '{print $3}')
	((item*=1024))

	local json="$(cat << EOF
		"user_smem": {
			"userspace": $item,
			"Units": "KB"
		}
EOF
)"

	metrics_json_add_array_fragment "$json"
}

function grab_slab() {
	# Grabbing slab total from meminfo is easier than doing the math
	# on slabinfo
	item=$(grep -F "Slab:" /proc/meminfo | awk '{print $2}')
	((item*=1024))

	local json="$(cat << EOF
		"slab": {
			"slab": $item,
			"Units": "KB"
		}
EOF
)"

	metrics_json_add_array_fragment "$json"
}

function get_memfree() {
	mem_free=$(sudo smem -w | head -6 | tail -1 | awk '{print $4}')
	((mem_free*=1024))
	echo $mem_free
}

function grab_system() {

	# avail memory, from 'free'
	local avail=$(free -b | head -2 | tail -1 | awk '{print $7}')
	local avail_decr=$((base_mem_avail-avail))

	# cached memory, from 'free'
	local cached=$(free -b | head -2 | tail -1 | awk '{print $6}')

	# free memory from smem
	local smem_free=$(get_memfree)
	local free_decr=$((base_mem_free-item))

	# Anon pages
	local anon=$(grep -F "AnonPages:" /proc/meminfo | awk '{print $2}')
	((anon*=1024))

	# Mapped pages
	local mapped=$(grep "^Mapped:" /proc/meminfo | awk '{print $2}')
	((mapped*=1024))

	# Cached
	local meminfo_cached=$(grep "^Cached:" /proc/meminfo | awk '{print $2}')
	((meminfo_cached*=1024))

	local json="$(cat << EOF
		"system": {
			"avail": $avail,
			"avail_decr": $avail_decr,
			"cached": $cached,
			"smem_free": $smem_free,
			"free_decr": $free_decr,
			"anon": $anon,
			"mapped": $mapped,
			"meminfo_cached": $meminfo_cached,
			"Units": "KB"
		}
EOF
)"

	metrics_json_add_array_fragment "$json"
}

function grab_stats() {
	# If configured, dump the caches so we get a more stable
	# view of what our static footprint really is
	if [[ "$DUMP_CACHES" ]] ; then
		dump_caches
	fi

	# user space data
		# PSS taken all userspace
	grab_all_pss
		# user as reported by smem
	grab_user_smem

	# System overview data
		# System free and cached
	grab_system

	# kernel data
		# The 'total kernel space taken' we can work out as:
		# ktotal = ((free-avail)-user)
		# So, we don't grab that number from smem, as that is what it does
		# internally anyhow.
		# Still try to grab any finer kernel details that we can though

		# totals from slabinfo
	grab_slab

	metrics_json_close_array_element
}

function check_limits() {
	mem_free=$(get_memfree)
	if ((mem_free <= MIN_MEMORY_FREE)); then
		echo 1
		return
	fi

	mem_consumed=$((base_mem_avail-mem_free))
	if ((mem_consumed >= MAX_MEMORY_CONSUMED)); then
		echo 1
		return
	fi

	echo 0
}

launch_containers() {
	local parloops leftovers

	(( parloops=${NUM_CONTAINERS}/${PARALLELISM} ))
	(( leftovers=${NUM_CONTAINERS} - (${parloops}*${PARALLELISM}) ))

	echo "Launching ${parloops}x${PARALLELISM} containers + ${leftovers} etras"

	containers=()

	local iter n
	for iter in $(seq 1 $parloops); do
		echo "Launch iteration ${iter}"
		for n in $(seq 1 $PARALLELISM); do
			containers+=($(random_name))
			sudo -E "${CTR_EXE}" run -d --runtime=$CTR_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS &
		done

		if [[ $PAYLOAD_SLEEP ]]; then
			sleep $PAYLOAD_SLEEP
		fi

		# check if we have hit one of our limits and need to wrap up the tests
		if (($(check_limits))); then
			echo "Ran out of resources, check_limits failed"
			return
		fi
	done

	for n in $(seq 1 $leftovers); do
		containers+=($(random_name))
		sudo -E "${CTR_EXE}" run -d --runtime=$CTR_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS &
	done
}

wait_containers() {
	local t numcontainers
	# nap 3s between checks
	local step=3

	for ((t=0; t<${CTR_POLL_TIMEOUT}; t+=step)); do

		numcontainers=$(sudo -E "${CTR_EXE}" c list -q | wc -l)

		if (( numcontainers >=  ${NUM_CONTAINERS} )); then
			echo "All containers now launched (${t}s)"
				return
		else
			echo "Waiting for containers to launch (${numcontainers} at ${t}s)"
		fi
		sleep ${step}
	done

	echo "Timed out waiting for containers to launch (${t}s)"
	cleanup
	die "Timed out waiting for containers to launch (${t}s)"
}

function go() {
	# Init the json cycle for this save
	metrics_json_start_array

	# Grab the first set of stats before we run any containers.
	grab_stats

	launch_containers
	wait_containers

	if [ $ksm_on == "1" ]; then
		echo "Wating for KSM to settle..."
		wait_ksm_settle ${KSM_WAIT_TIME}
	fi

	grab_stats

	# Wrap up the results array
	metrics_json_end_array "Results"
}

function show_vars()
{
	echo -e "\nEvironment variables:"
	echo -e "\tName (default)"
	echo -e "\t\tDescription"
	echo -e "\tPAYLOAD (${PAYLOAD})"
	echo -e "\t\tThe ctr image to run"
	echo -e "\tPAYLOAD_ARGS (${PAYLOAD_ARGS})"
	echo -e "\t\tAny extra arguments passed into the docker 'run' command"
	echo -e "\tPAYLOAD_SLEEP (${PAYLOAD_SLEEP})"
	echo -e "\t\tSeconds to sleep between launch and measurement, to allow settling"
	echo -e "\tKSM_WAIT_TIME (${KSM_WAIT_TIME})"
	echo -e "\t\tSeconds to wait for KSM to settle before we take the final measure"
	echo -e "\tCTR_POLL_TIMEOUT (${CTR_POLL_TIMEOUT})"
	echo -e "\t\tSeconds to poll for ctr to finish launching containers"
	echo -e "\tPARALLELISM (${PARALLELISM})"
	echo -e "\t\tNumber of containers we launch in parallel"
	echo -e "\tNUM_CONTAINERS (${NUM_CONTAINERS})"
	echo -e "\t\tThe total number of containers to run"
	echo -e "\tMAX_MEMORY_CONSUMED (${MAX_MEMORY_CONSUMED})"
	echo -e "\t\tThe maximum amount of memory to be consumed before terminating"
	echo -e "\tMIN_MEMORY_FREE (${MIN_MEMORY_FREE})"
	echo -e "\t\tThe minimum amount of memory allowed to be free before terminating"
	echo -e "\tDUMP_CACHES (${DUMP_CACHES})"
	echo -e "\t\tA flag to note if the system caches should be dumped before capturing stats"
	echo -e "\tTEST_NAME (${TEST_NAME})"
	echo -e "\t\tCan be set to over-ride the default JSON results filename"

}

function help()
{
	usage=$(cat << EOF
Usage: $0 [-h] [options]
   Description:
	Launch a series of workloads and take memory metric measurements after
	each launch.
   Options:
        -h,    Help page.
EOF
)
	echo "$usage"
	show_vars
}

function main() {

	local OPTIND
	while getopts "h" opt;do
		case ${opt} in
		h)
		    help
		    exit 0;
		    ;;
		esac
	done
	shift $((OPTIND-1))

	init
	go
	cleanup
}

main "$@"