Merge pull request #7653 from GabyCT/topic/tensorflowfp32

metrics: Add Tensorflow ResNet50 int8 benchmark
2025-09-01 08:56:32 +00:00 · 2023-08-17 10:44:25 -06:00
parent e470a650e0 bade6a5c3b
commit 4ba684e6e4
3 changed files with 193 additions and 4 deletions
--- a/tests/metrics/machine_learning/README.md
+++ b/tests/metrics/machine_learning/README.md
@@ -1,4 +1,4 @@
-# Kata Containers Tensorflow Metrics
+# Kata Containers TensorFlow Metrics
 Kata Containers provides a series of performance tests using the
 TensorFlow reference benchmarks (tf_cnn_benchmarks).
@@ -30,16 +30,16 @@ Individual tests can be run by hand, for example:
 $ cd metrics/machine_learning
 $ ./tensorflow.sh 40 100
 ```
-# Kata Containers Tensorflow `MobileNet` Metrics
+# Kata Containers TensorFlow `MobileNet` Metrics
 `MobileNets` are small, low-latency, low-power models parameterized to meet the resource 
 constraints of a variety of use cases. They can be built upon for classification, detection, 
 embeddings and segmentation similar to how other popular large scale models, such as Inception, are used. 
 `MobileNets` can be run efficiently on mobile devices with `Tensorflow` Lite.
-Kata Containers provides a test for running `MobileNet V1` inference using Intel-Optimized `Tensorflow`.
+Kata Containers provides a test for running `MobileNet V1` inference using Intel-Optimized `TensorFlow`.
-## Running the `Tensorflow` `MobileNet` test
+## Running the `TensorFlow` `MobileNet` test
 Individual test can be run by hand, for example:
 ```
@@ -47,3 +47,16 @@ $ cd metrics/machine_learning
 $ ./tensorflow_mobilenet_benchmark.sh 25 60
 ```
 # Kata Containers TensorFlow `ResNet50` Metrics
 `ResNet50` is an image classification model pre-trained on the `ImageNet` dataset.
 Kata Containers provides a test for running `ResNet50` inference using Intel-Optimized
 `TensorFlow`.
 ## Running the `TensorFlow` `ResNet50` test
 Individual test can be run by hand, for example:
 ```
 $ cd metrics/machine_learning
 $ ./tensorflow_resnet50_int8.sh 25 60
 ```
--- a/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile
+++ b/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile
@@ -0,0 +1,21 @@
 # Copyright (c) 2023 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 # Usage: FROM [image name]
 FROM ubuntu:20.04
 ENV DEBIAN_FRONTEND=noninteractive
 # Version of the Dockerfile
 LABEL DOCKERFILE_VERSION="1.0"
 RUN apt-get update && \
 	apt-get install -y --no-install-recommends wget nano curl build-essential git && \
 	apt-get install -y python3.8  python3-pip && \
 	pip install --no-cache-dir intel-tensorflow-avx512==2.8.0 && \
 	pip install --no-cache-dir protobuf==3.20.* && \
 	wget -q https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50_int8_pretrained_model.pb && \
 	git clone https://github.com/IntelAI/models.git
 CMD ["/bin/bash"]
--- a/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh
+++ b/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh
@@ -0,0 +1,155 @@
 #!/bin/bash
 #
 # Copyright (c) 2023 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 set -o pipefial
 # General env
 SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
 source "${SCRIPT_PATH}/../lib/common.bash"
 IMAGE="docker.io/library/resnet50int8:latest"
 DOCKERFILE="${SCRIPT_PATH}/resnet50_int8_dockerfile/Dockerfile"
 tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX)
 NUM_CONTAINERS="$1"
 TIMEOUT="$2"
 TEST_NAME="tensorflow-resnet50int8"
 PAYLOAD_ARGS="tail -f /dev/null"
 TESTDIR="${TESTDIR:-/testdir}"
 # Options to control the start of the workload using a trigger-file
 dst_dir="/host"
 src_dir=$(mktemp --tmpdir -d tensorflowresnet50int8.XXXXXXXXXX)
 MOUNT_OPTIONS="type=bind,src=$src_dir,dst=$dst_dir,options=rbind:ro"
 start_script="resnet50int8_start.sh"
 # CMD points to the script that starts the workload
 # export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX
 CMD="export KMP_AFFINITY=granularity=fine,verbose,compact && export OMP_NUM_THREADS=16 && $dst_dir/$start_script"
 guest_trigger_file="$dst_dir/$trigger_file"
 host_trigger_file="$src_dir/$trigger_file"
 INITIAL_NUM_PIDS=1
 CMD_FILE="cat results | grep 'Throughput' | wc -l"
 CMD_RESULTS="cat results | grep 'Throughput' | cut -d':' -f2 | cut -d' ' -f2 | tr '\n' ','"
 function remove_tmp_file() {
 	rm -rf "${tensorflow_file}"
 }
 trap remove_tmp_file EXIT
 function help() {
 cat << EOF
 Usage: $0 <count> <timeout>
 	Description:
 		This script launches n number of containers
 		to run the ResNet50 int8 using a Tensorflow
 		container.
 	Options:
 		<count> : Number of containers to run.
 		<timeout> : Timeout to launch the containers.
 EOF
 }
 function create_start_script() {
 	local script="${src_dir}/${start_script}"
 	rm -rf "${script}"
 cat <<EOF >>"${script}"
 #!/bin/bash
 python3.8 models/benchmarks/launch_benchmark.py --benchmark-only --framework tensorflow --model-name resnet50  --precision int8 --mode inference --in-graph /resnet50_int8_pretrained_model.pb --batch-size 116 --num-intra-threads 16 >> results
 EOF
 	chmod +x "${script}"
 }
 function resnet50_int8_test() {
 	info "Running ResNet50 Int8 Tensorflow test"
 	local pids=()
 	local j=0
 	for i in "${containers[@]}"; do
 		$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD}")&
 		pids["${j}"]=$!
 		((j++))
 	done
 	# wait for all pids
 	for pid in ${pids[*]}; do
 		wait "${pid}"
 	done
 	touch "${host_trigger_file}"
 	info "All containers are running the workload..."
 	collect_results "${CMD_FILE}"
 	for i in "${containers[@]}"; do
 		sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULTS}"  >> "${tensorflow_file}"
 	done
 	local resnet50_int8_results=$(cat "${tensorflow_file}" | sed 's/.$//')
 	local average_resnet50_int8=$(echo "${resnet50_int8_results}" | sed 's/.$//'| sed "s/,/+/g;s/.*/(&)\/2/g" | bc -l)
 	local json="$(cat << EOF
 	{
 		"ResNet50Int8": {
 			"Result": "${resnet50_int8_results}",
 			"Average": "${average_resnet50_int8}",
 			"Units": "images/s"
 		}
 	}
 EOF
 )"
 	metrics_json_add_array_element "$json"
 	metrics_json_end_array "Results"
 }
 function main() {
 	# Verify enough arguments
 	if [ $# != 2 ]; then
 		echo >&2 "error: Not enough arguments [$@]"
 		help
 		exit 1
 	fi
 	local i=0
 	local containers=()
 	local not_started_count="${NUM_CONTAINERS}"
 	# Check tools/commands dependencies
 	cmds=("awk" "docker" "bc")
 	check_cmds "${cmds[@]}"
 	check_ctr_images "${IMAGE}" "${DOCKERFILE}"
 	init_env
 	create_start_script
 	info "Creating ${NUM_CONTAINERS} containers"
 	for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do
 		containers+=($(random_name))
 		sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" --mount="${MOUNT_OPTIONS}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}"
 		((not_started_count--))
 		info "$not_started_count remaining containers"
 	done
 	metrics_json_init
 	metrics_json_start_array
 	# Check that the requested number of containers are running
 	check_containers_are_up "${NUM_CONTAINERS}"
 	# Check that the requested number of containers are running
 	check_containers_are_running "${NUM_CONTAINERS}"
 	# Get the initial number of pids in a single container before the workload starts
 	INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2)
 	((INITIAL_NUM_PIDS++))
 	resnet50_int8_test
 	metrics_json_save
 	sudo rm -rf "${src_dir}"
 	clean_env_ctr
 }
 main "$@"