mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-28 03:42:09 +00:00
metrics: Add TensorFlow ResNet50 FP32 benchmark
This PR adds TensorFlow ResNet50 FP32 benchmark for kata metrics. Fixes #7735 Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
This commit is contained in:
parent
e7e4cc2182
commit
4b7d72c4a8
161
tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh
Executable file
161
tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh
Executable file
@ -0,0 +1,161 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -o pipefail
|
||||
|
||||
# General env
|
||||
SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
|
||||
source "${SCRIPT_PATH}/../lib/common.bash"
|
||||
|
||||
IMAGE="docker.io/library/resnet50_fp32:latest"
|
||||
DOCKERFILE="${SCRIPT_PATH}/resnet50_fp32_dockerfile/Dockerfile"
|
||||
tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX)
|
||||
NUM_CONTAINERS="$1"
|
||||
TIMEOUT="$2"
|
||||
TEST_NAME="resnet50_fp32"
|
||||
PAYLOAD_ARGS="tail -f /dev/null"
|
||||
TESTDIR="${TESTDIR:-/testdir}"
|
||||
# Options to control the start of the workload using a trigger-file
|
||||
dst_dir="/host"
|
||||
src_dir=$(mktemp --tmpdir -d tensorflowresnet.XXXXXXXXXX)
|
||||
MOUNT_OPTIONS="type=bind,src=$src_dir,dst=$dst_dir,options=rbind:ro"
|
||||
start_script="resnet50_fp32_start.sh"
|
||||
# CMD points to the script that starts the workload
|
||||
CMD="$dst_dir/$start_script"
|
||||
guest_trigger_file="$dst_dir/$trigger_file"
|
||||
host_trigger_file="$src_dir/$trigger_file"
|
||||
INITIAL_NUM_PIDS=1
|
||||
CMD_FILE="cat results | grep 'Average Throughput' | wc -l"
|
||||
CMD_RESULTS="cat results | grep 'Average Throughput' | cut -d':' -f2 | cut -d' ' -f2 | tr '\n' ','"
|
||||
|
||||
function remove_tmp_file() {
|
||||
rm -rf "${tensorflow_file}"
|
||||
}
|
||||
|
||||
trap remove_tmp_file EXIT
|
||||
|
||||
function help() {
|
||||
cat << EOF
|
||||
Usage: $0 <count> <timeout>
|
||||
Description:
|
||||
This script launches n number of containers
|
||||
to run the ResNet50 fp32 model using a Tensorflow
|
||||
container.
|
||||
Options:
|
||||
<count> : Number of containers to run.
|
||||
<timeout> : Timeout to launch the containers.
|
||||
EOF
|
||||
}
|
||||
|
||||
function create_start_script() {
|
||||
local script="${src_dir}/${start_script}"
|
||||
rm -rf "${script}"
|
||||
|
||||
cat <<EOF >>"${script}"
|
||||
#!/bin/bash
|
||||
python3.8 models/benchmarks/launch_benchmark.py --benchmark-only --framework tensorflow --model-name resnet50 --precision fp32 --mode inference --in-graph /resnet50_fp32_pretrained_model.pb --batch-size 1 --num-intra-threads 16 >> results
|
||||
EOF
|
||||
chmod +x "${script}"
|
||||
}
|
||||
|
||||
function resnet50_fp32_test() {
|
||||
local CMD_EXPORT_VAR="export KMP_AFFINITY=granularity=fine,verbose,compact && export OMP_NUM_THREADS=16"
|
||||
|
||||
info "Export environment variables"
|
||||
for i in "${containers[@]}"; do
|
||||
sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_EXPORT_VAR}"
|
||||
done
|
||||
|
||||
info "Running ResNet50 FP32 Tensorflow test"
|
||||
local pids=()
|
||||
local j=0
|
||||
for i in "${containers[@]}"; do
|
||||
$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD}")&
|
||||
pids["${j}"]=$!
|
||||
((j++))
|
||||
done
|
||||
|
||||
# wait for all pids
|
||||
for pid in ${pids[*]}; do
|
||||
wait "${pid}"
|
||||
done
|
||||
|
||||
touch "${host_trigger_file}"
|
||||
info "All containers are running the workload..."
|
||||
|
||||
collect_results "${CMD_FILE}"
|
||||
|
||||
for i in "${containers[@]}"; do
|
||||
sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULTS}" >> "${tensorflow_file}"
|
||||
done
|
||||
|
||||
local resnet50_fp32_results=$(cat "${tensorflow_file}" | sed 's/.$//')
|
||||
local average_resnet50_fp32=$(echo "${resnet50_fp32_results}" | sed 's/.$//' | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l)
|
||||
local json="$(cat << EOF
|
||||
{
|
||||
"ResNet50_fp32": {
|
||||
"Result": "${resnet50_fp32_results}",
|
||||
"Average": "${average_resnet50_fp32}",
|
||||
"Units": "images/s"
|
||||
}
|
||||
}
|
||||
EOF
|
||||
)"
|
||||
metrics_json_add_array_element "$json"
|
||||
metrics_json_end_array "Results"
|
||||
}
|
||||
|
||||
function main() {
|
||||
# Verify enough arguments
|
||||
if [ $# != 2 ]; then
|
||||
echo >&2 "error: Not enough arguments [$@]"
|
||||
help
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local i=0
|
||||
local containers=()
|
||||
local not_started_count="${NUM_CONTAINERS}"
|
||||
|
||||
# Check tools/commands dependencies
|
||||
cmds=("awk" "docker" "bc")
|
||||
check_cmds "${cmds[@]}"
|
||||
check_ctr_images "${IMAGE}" "${DOCKERFILE}"
|
||||
|
||||
init_env
|
||||
create_start_script
|
||||
|
||||
info "Creating ${NUM_CONTAINERS} containers"
|
||||
|
||||
for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do
|
||||
containers+=($(random_name))
|
||||
sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" --mount="${MOUNT_OPTIONS}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}"
|
||||
((not_started_count--))
|
||||
info "${not_started_count} remaining containers"
|
||||
done
|
||||
|
||||
metrics_json_init
|
||||
metrics_json_start_array
|
||||
|
||||
# Check that the requested number of containers are running
|
||||
check_containers_are_up "${NUM_CONTAINERS}"
|
||||
|
||||
# Check that the requested number of containers are running
|
||||
check_containers_are_running "${NUM_CONTAINERS}"
|
||||
|
||||
# Get the initial number of pids in a single container before the workload starts
|
||||
INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2)
|
||||
((INITIAL_NUM_PIDS++))
|
||||
|
||||
resnet50_fp32_test
|
||||
|
||||
metrics_json_save
|
||||
|
||||
sudo rm -rf "${src_dir}"
|
||||
|
||||
clean_env_ctr
|
||||
}
|
||||
main "$@"
|
Loading…
Reference in New Issue
Block a user