From 63b8534b4146431248695895b52f677fc64e6090 Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Thu, 20 Jul 2023 19:59:40 +0000
Subject: [PATCH 01/10] metrics: Enable Tensorflow metrics for kata CI

This PR enables the Tensorflow benchmark metrics for kata CI.

Fixes #7395

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 tests/metrics/gha-run.sh | 2 --
 1 file changed, 2 deletions(-)
diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh
index 99ec5a1c1a..6a31e27dc7 100755
--- a/tests/metrics/gha-run.sh
+++ b/tests/metrics/gha-run.sh
@@ -81,8 +81,6 @@ function run_test_blogbench() {
 
 function run_test_tensorflow() {
 	info "Running TensorFlow test using ${KATA_HYPERVISOR} hypervisor"
-	# ToDo: remove the exit once the metrics workflow is stable
-	exit 0
 
 	bash tests/metrics/machine_learning/tensorflow.sh 1 20
 }

From 08dfaa97aa2dc9b94f113094ea9f29dfe4c768dc Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Thu, 20 Jul 2023 20:35:01 +0000
Subject: [PATCH 02/10] metrics: General improvements to the tensorflow script

This PR adds general improvements to the tensorflow script.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 tests/metrics/gha-run.sh                     |   1 +
 tests/metrics/machine_learning/tensorflow.sh | 179 +++++++++++++------
 2 files changed, 125 insertions(+), 55 deletions(-)

diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh
index 6a31e27dc7..7bb54e2da3 100755
--- a/tests/metrics/gha-run.sh
+++ b/tests/metrics/gha-run.sh
@@ -8,6 +8,7 @@
 set -o errexit
 set -o nounset
 set -o pipefail
+set -x
 
 kata_tarball_dir="${2:-kata-artifacts}"
 metrics_dir="$(dirname "$(readlink -f "$0")")"
diff --git a/tests/metrics/machine_learning/tensorflow.sh b/tests/metrics/machine_learning/tensorflow.sh
index e4af1ef8a5..8aee730cf5 100755
--- a/tests/metrics/machine_learning/tensorflow.sh
+++ b/tests/metrics/machine_learning/tensorflow.sh
@@ -4,7 +4,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-set -e
+#set -e
+set -x
 
 # General env
 SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
@@ -14,16 +15,28 @@ IMAGE="docker.io/library/tensorflow:latest"
 DOCKERFILE="${SCRIPT_PATH}/tensorflow_dockerfile/Dockerfile"
 BATCH_SIZE="512"
 NUM_BATCHES="300"
-CMD_RESULT="cd benchmarks/scripts/tf_cnn_benchmarks/ && cat result"
-CMD_FILE="cat benchmarks/scripts/tf_cnn_benchmarks/result | grep 'total images' | wc -l"
-tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX)
+resnet_tensorflow_file=$(mktemp resnettensorflowresults.XXXXXXXXXX)
+alexnet_tensorflow_file=$(mktemp alexnettensorflowresults.XXXXXXXXXX)
 NUM_CONTAINERS="$1"
 TIMEOUT="$2"
 TEST_NAME="tensorflow"
 PAYLOAD_ARGS="tail -f /dev/null"
+# Options to control the start of the workload using a trigger-file
+dst_dir="/host"
+src_dir=$(mktemp --tmpdir -d tensorflow.XXXXXXXXXX)
+MOUNT_OPTIONS="type=bind,src=$src_dir,dst=$dst_dir,options=rbind:ro"
+# CMD points to the script that starts the workload
+alexnet_start_script="alexnet_start.sh"
+resnet_start_script="resnet_start.sh"
+CMD_RESNET="$dst_dir/$resnet_start_script"
+CMD_ALEXNET="$dst_dir/$alexnet_start_script"
+timeout=600
+INITIAL_NUM_PIDS=1
+CMD_FILE="cat alexnet_results | grep 'total images' | wc -l"
+RESNET_CMD_FILE="cat resnet_results | grep 'total images' | wc -l"
 
 function remove_tmp_file() {
-	rm -rf "${tensorflow_file}"
+	rm -rf "${resnet_tensorflow_file}" "${alexnet_tensorflow_file}"
 }
 
 trap remove_tmp_file EXIT
@@ -31,81 +44,117 @@ trap remove_tmp_file EXIT
 function help() {
 cat << EOF
 Usage: $0 <count> <timeout>
-	Description:
-		This script launches n number of containers
-		to run the tf cnn benchmarks using a Tensorflow
-		container.
-	Options:
-		<count> : Number of containers to run.
-		<timeout> : Timeout to launch the containers.
+        Description:
+                This script launches n number of containers
+                to run the tf cnn benchmarks using a Tensorflow
+                container.
+        Options:
+                <count> : Number of containers to run.
+                <timeout> : Timeout to launch the containers.
 EOF
 }
 
-function resnet50_test() {
-	local CMD_RUN="cd benchmarks/scripts/tf_cnn_benchmarks/ && python tf_cnn_benchmarks.py -data_format=NHWC --device cpu --batch_size=${BATCH_SIZE} --num_batches=${NUM_BATCHES} > result"
-	info "Running Resnet50 Tensorflow test"
+function create_resnet_start_script() {
+	local script="${src_dir}/${resnet_start_script}"
+	rm -rf "${script}"
+
+cat <<EOF >>"${script}"
+#!/bin/bash
+python benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py -data_format=NHWC --device cpu --batch_size=${BATCH_SIZE} --num_batches=${NUM_BATCHES} > resnet_results
+EOF
+	chmod +x "${script}"
+}
+
+function create_alexnet_start_script() {
+	local script="${src_dir}/${alexnet_start_script}"
+	rm -rf "${script}"
+
+cat <<EOF >>"${script}"
+#!/bin/bash
+python benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --num_batches=100 --device=cpu --batch_size=100 --forward_only=true --model=alexnet --data_format=NHWC > alexnet_results
+EOF
+	chmod +x "${script}"
+}
+
+function tensorflow_test() {
+	info "Copy Resnet Tensorflow test"
+	local pids=()
+	local j=0
 	for i in "${containers[@]}"; do
-		sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_RUN}"
+		$(sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESNET}")&
+		pids["${j}"]=$!
+		((j++))
+	done
+
+	# wait for all pids
+	for pid in ${pids[*]}; do
+		wait "${pid}"
+	done
+
+	info "All containers are running the workload..."
+
+	for i in "${containers[@]}"; do
+		check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${RESNET_CMD_FILE}")
+		retries="100"
+		for j in $(seq 1 "${retries}"); do
+			[ "${check_file}" -eq "1" ] && break
+			sleep 1
+		done
+	done
+
+	info "Copy Alexnet Tensorflow test"
+	local pids=()
+	local j=0
+	for i in "${containers[@]}"; do
+		$(sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_ALEXNET}")&
+		pids["${j}"]=$!
+		((j++))
+	done
+
+	# wait for all pids
+	for pid in ${pids[*]}; do
+		wait "${pid}"
 	done
 
 	for i in "${containers[@]}"; do
 		check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}")
-		retries="200"
+		retries="300"
 		for j in $(seq 1 "${retries}"); do
-			[ "${check_file}" -eq 1 ] && break
+			[ "${check_file}" -eq "1" ] && break
 			sleep 1
 		done
 	done
 
 	for i in "${containers[@]}"; do
-		sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULT}"  >> "${tensorflow_file}"
+		sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat resnet_results"  >> "${resnet_tensorflow_file}"
+		sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat alexnet_results"  >> "${alexnet_tensorflow_file}"
 	done
 
-	local resnet50_results=$(cat "${tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//')
-	local average_resnet50=$(echo "${resnet50_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l)
+	local resnet_results=$(cat "${resnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//')
+	local average_resnet=$(echo "${resnet_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l)
 
 	local json="$(cat << EOF
 	{
-		"Resnet50": {
-			"Result": "${resnet50_results}",
-			"Average": "${average_resnet50}",
-			"Units": "s"
+		"Resnet": {
+			"Result": "${resnet_results}",
+			"Average": "${average_resnet}",
+			"Units": "images/s"
 		}
 	}
 EOF
 )"
+
 	metrics_json_add_array_element "$json"
-}
 
-function axelnet_test() {
-	local CMD_RUN="cd benchmarks/scripts/tf_cnn_benchmarks/ && python tf_cnn_benchmarks.py --num_batches=${NUM_BATCHES} --device=cpu --batch_size=${BATCH_SIZE} --forward_only=true --model=alexnet --data_format=NHWC > result"
-	info "Running AxelNet Tensorflow test"
-	for i in "${containers[@]}"; do
-		sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_RUN}"
-	done
-
-	for i in "${containers[@]}"; do
-		check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}")
-		retries="200"
-		for j in $(seq 1 "${retries}"); do
-			[ "${check_file}" -eq 1 ] && break
-			sleep 1
-		done
-	done
-
-	for i in "${containers[@]}"; do
-		sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULT}"  >> "${tensorflow_file}"
-	done
-
-	local axelnet_results=$(cat "${tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//')
-	local average_axelnet=$(echo "${axelnet_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l)
+	local alexnet_results=$(cat "${alexnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//')
+	local average_alexnet=$(echo "${alexnet_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l)
 
 	local json="$(cat << EOF
 	{
-		"AxelNet": {
-			"Result": "${axelnet_results}",
-			"Average": "${average_axelnet}",
-			"Units": "s"
+		"AlexNet": {
+			"Result": "${alexnet_results}",
+			"Average": "${average_alexnet}",
+			"Units": "images/s"
 		}
 	}
 EOF
@@ -143,11 +192,14 @@ function main() {
 	check_ctr_images "${IMAGE}" "${DOCKERFILE}"
 
 	init_env
+	create_resnet_start_script
+	create_alexnet_start_script
+
 	info "Creating ${NUM_CONTAINERS} containers"
 
 	for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do
 		containers+=($(random_name))
-		sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}"
+		sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" --mount="${MOUNT_OPTIONS}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}"
 		((not_started_count--))
 		info "$not_started_count remaining containers"
 	done
@@ -158,12 +210,29 @@ function main() {
 	# Check that the requested number of containers are running
 	check_containers_are_up
 
-	resnet50_test
+	# Check that the requested number of containers are running
+	local timeout_launch="10"
+	check_containers_are_up & pid=$!
+	(sleep "${timeout_launch}" && kill -HUP "${pid}") 2>/dev/null & pid_tout=$!
 
-	axelnet_test
+	if wait "${pid}" 2>/dev/null; then
+		pkill -HUP -P "${pid_tout}"
+		wait "${pid_tout}"
+	else
+		warn "Time out exceeded"
+		return 1
+	fi
+
+	# Get the initial number of pids in a single container before the workload starts
+	INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2)
+	((INITIAL_NUM_PIDS++))
+
+	tensorflow_test
 
 	metrics_json_save
 
+	rm -rf "${src_dir}"
+
 	clean_env_ctr
 }
 main "$@"

From 3c32875046147c9e28c81616ba5fcee68789c277 Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Fri, 21 Jul 2023 20:02:02 +0000
Subject: [PATCH 03/10] checkmetrics: Add Resnet value for clh

This PR adds the checkmetrics Resnet value for clh.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 .../checkmetrics-json-clh-kata-metric8.toml         | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
index 9ca4d139b3..75f1592176 100644
--- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
+++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
@@ -71,3 +71,16 @@ checktype = "mean"
 midval = 96939.0
 minpercent = 20.0
 maxpercent = 20.0
+
+[[metric]]
+name = "tensorflow"
+type = "json"
+description = "tensorflow resnet model"
+# Min and Max values to set a 'range' that
+# the median of the CSV Results data must fall
+# within (inclusive)
+checkvar = ".\"tensorflow\".Results | .[] | .Resnet.Result"
+checktype = "mean"
+midval = 4379.2
+minpercent = 20.0
+maxpercent = 20.0

From a79a3a8e1d7d4c0d1634e4ac32e9b17b9dc20d81 Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Fri, 21 Jul 2023 20:03:49 +0000
Subject: [PATCH 04/10] checkmetrics: Add alexnet value for clh

This PR adds the AlexNet value for clh for checkmetrics.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 .../checkmetrics-json-clh-kata-metric8.toml         | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
index 75f1592176..edcef8d8d1 100644
--- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
+++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
@@ -84,3 +84,16 @@ checktype = "mean"
 midval = 4379.2
 minpercent = 20.0
 maxpercent = 20.0
+
+[[metric]]
+name = "tensorflow"
+type = "json"
+description = "tensorflow alexnet model"
+# Min and Max values to set a 'range' that
+# the median of the CSV Results data must fall
+# within (inclusive)
+checkvar = ".\"tensorflow\".Results | .[] | .AlexNet.Result"
+checktype = "mean"
+midval = 98.0
+minpercent = 20.0
+maxpercent = 20.0

From a435d36fe160a252b846a6c1e0142b8b2a134f84 Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Fri, 21 Jul 2023 20:05:54 +0000
Subject: [PATCH 05/10] checkmetrics: Add Resnet value for qemu

This PR adds the Resnet value for qemu for checkmetrics.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 .../checkmetrics-json-qemu-kata-metric8.toml        | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
index 4860fa2563..7a423ec5f8 100644
--- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
+++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
@@ -71,3 +71,16 @@ checktype = "mean"
 midval = 98687.0
 minpercent = 20.0
 maxpercent = 20.0
+
+[[metric]]
+name = "tensorflow"
+type = "json"
+description = "tensorflow resnet model"
+# Min and Max values to set a 'range' that
+# the median of the CSV Results data must fall
+# within (inclusive)
+checkvar = ".\"tensorflow\".Results | .[] | .Resnet.Result"
+checktype = "mean"
+midval = 4396.2
+minpercent = 20.0
+maxpercent = 20.0

From 53af71cfd0db77cae4283292b7bc575ff16c5224 Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Fri, 21 Jul 2023 20:07:05 +0000
Subject: [PATCH 06/10] checkmetrics: Add AlexNet value for qemu

This PR adds AlexNet value for qemu for checkmetrics.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 .../checkmetrics-json-qemu-kata-metric8.toml        | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
index 7a423ec5f8..392025e358 100644
--- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
+++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
@@ -84,3 +84,16 @@ checktype = "mean"
 midval = 4396.2
 minpercent = 20.0
 maxpercent = 20.0
+
+[[metric]]
+name = "tensorflow"
+type = "json"
+description = "tensorflow alexnet model"
+# Min and Max values to set a 'range' that
+# the median of the CSV Results data must fall
+# within (inclusive)
+checkvar = ".\"tensorflow\".Results | .[] | .AlexNet.Result"
+checktype = "mean"
+midval = 98.3
+minpercent = 20.0
+maxpercent = 20.0

From f9dec11a8fdfbc0e83811490c1f63962ea878222 Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Fri, 21 Jul 2023 21:14:37 +0000
Subject: [PATCH 07/10] checkmetrics: Move checkmetrics to gha-run script

This PR moves the checkmetrics to gha-run script to gathered
tensorflow information.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 tests/metrics/gha-run.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh
index 7bb54e2da3..edb4650b5b 100755
--- a/tests/metrics/gha-run.sh
+++ b/tests/metrics/gha-run.sh
@@ -76,14 +76,14 @@ function run_test_blogbench() {
 	info "Running Blogbench test using ${KATA_HYPERVISOR} hypervisor"
 
 	bash tests/metrics/storage/blogbench.sh
-
-	check_metrics
 }
 
 function run_test_tensorflow() {
 	info "Running TensorFlow test using ${KATA_HYPERVISOR} hypervisor"
 
 	bash tests/metrics/machine_learning/tensorflow.sh 1 20
+
+	check_metrics
 }
 
 function main() {

From 3b883bf5a701bcf1d5e4927c8c7b3f56df3a4525 Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Mon, 24 Jul 2023 16:11:52 +0000
Subject: [PATCH 08/10] metrics: Fix atoi invalid syntax

This PR will avoid to have the strconv.atoi parsing error when we
are retrieving the results from the json.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 .../ci_worker/checkmetrics-json-clh-kata-metric8.toml     | 2 +-
 .../ci_worker/checkmetrics-json-qemu-kata-metric8.toml    | 4 ++--
 tests/metrics/machine_learning/tensorflow.sh              | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
index edcef8d8d1..2461af0da7 100644
--- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
+++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
@@ -81,7 +81,7 @@ description = "tensorflow resnet model"
 # within (inclusive)
 checkvar = ".\"tensorflow\".Results | .[] | .Resnet.Result"
 checktype = "mean"
-midval = 4379.2
+midval = 4379.0
 minpercent = 20.0
 maxpercent = 20.0
 
diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
index 392025e358..48a03259b2 100644
--- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
+++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
@@ -81,7 +81,7 @@ description = "tensorflow resnet model"
 # within (inclusive)
 checkvar = ".\"tensorflow\".Results | .[] | .Resnet.Result"
 checktype = "mean"
-midval = 4396.2
+midval = 4396.0
 minpercent = 20.0
 maxpercent = 20.0
 
@@ -94,6 +94,6 @@ description = "tensorflow alexnet model"
 # within (inclusive)
 checkvar = ".\"tensorflow\".Results | .[] | .AlexNet.Result"
 checktype = "mean"
-midval = 98.3
+midval = 98.0
 minpercent = 20.0
 maxpercent = 20.0
diff --git a/tests/metrics/machine_learning/tensorflow.sh b/tests/metrics/machine_learning/tensorflow.sh
index 8aee730cf5..cfabcaa865 100755
--- a/tests/metrics/machine_learning/tensorflow.sh
+++ b/tests/metrics/machine_learning/tensorflow.sh
@@ -136,8 +136,8 @@ function tensorflow_test() {
 	local json="$(cat << EOF
 	{
 		"Resnet": {
-			"Result": "${resnet_results}",
-			"Average": "${average_resnet}",
+			"Result": ${resnet_results},
+			"Average": ${average_resnet},
 			"Units": "images/s"
 		}
 	}
@@ -152,8 +152,8 @@ EOF
 	local json="$(cat << EOF
 	{
 		"AlexNet": {
-			"Result": "${alexnet_results}",
-			"Average": "${average_alexnet}",
+			"Result": ${alexnet_results},
+			"Average": ${average_alexnet},
 			"Units": "images/s"
 		}
 	}

From 51cd99c927430b30caa6dacc4ac0f606ca55a3dc Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Mon, 24 Jul 2023 19:55:49 +0000
Subject: [PATCH 09/10] metrics: Round axelnet and resnet results

This PR rounds the axelnet and resnet results in order to extract
properly the result.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 .../checkmetrics-json-clh-kata-metric8.toml   |  6 +--
 .../checkmetrics-json-qemu-kata-metric8.toml  |  4 +-
 tests/metrics/machine_learning/tensorflow.sh  | 45 ++++++++++---------
 3 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
index 2461af0da7..9569f0397f 100644
--- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
+++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml
@@ -79,9 +79,9 @@ description = "tensorflow resnet model"
 # Min and Max values to set a 'range' that
 # the median of the CSV Results data must fall
 # within (inclusive)
-checkvar = ".\"tensorflow\".Results | .[] | .Resnet.Result"
+checkvar = ".\"tensorflow\".Results | .[] | .resnet.Result"
 checktype = "mean"
-midval = 4379.0
+midval = 3566.0
 minpercent = 20.0
 maxpercent = 20.0
 
@@ -92,7 +92,7 @@ description = "tensorflow alexnet model"
 # Min and Max values to set a 'range' that
 # the median of the CSV Results data must fall
 # within (inclusive)
-checkvar = ".\"tensorflow\".Results | .[] | .AlexNet.Result"
+checkvar = ".\"tensorflow\".Results | .[] | .alexnet.Result"
 checktype = "mean"
 midval = 98.0
 minpercent = 20.0
diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
index 48a03259b2..e281865f93 100644
--- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
+++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
@@ -79,7 +79,7 @@ description = "tensorflow resnet model"
 # Min and Max values to set a 'range' that
 # the median of the CSV Results data must fall
 # within (inclusive)
-checkvar = ".\"tensorflow\".Results | .[] | .Resnet.Result"
+checkvar = ".\"tensorflow\".Results | .[] | .resnet.Result"
 checktype = "mean"
 midval = 4396.0
 minpercent = 20.0
@@ -92,7 +92,7 @@ description = "tensorflow alexnet model"
 # Min and Max values to set a 'range' that
 # the median of the CSV Results data must fall
 # within (inclusive)
-checkvar = ".\"tensorflow\".Results | .[] | .AlexNet.Result"
+checkvar = ".\"tensorflow\".Results | .[] | .alexnet.Result"
 checktype = "mean"
 midval = 98.0
 minpercent = 20.0
diff --git a/tests/metrics/machine_learning/tensorflow.sh b/tests/metrics/machine_learning/tensorflow.sh
index cfabcaa865..88695aa48b 100755
--- a/tests/metrics/machine_learning/tensorflow.sh
+++ b/tests/metrics/machine_learning/tensorflow.sh
@@ -13,8 +13,8 @@ source "${SCRIPT_PATH}/../lib/common.bash"
 
 IMAGE="docker.io/library/tensorflow:latest"
 DOCKERFILE="${SCRIPT_PATH}/tensorflow_dockerfile/Dockerfile"
-BATCH_SIZE="512"
-NUM_BATCHES="300"
+BATCH_SIZE="100"
+NUM_BATCHES="100"
 resnet_tensorflow_file=$(mktemp resnettensorflowresults.XXXXXXXXXX)
 alexnet_tensorflow_file=$(mktemp alexnettensorflowresults.XXXXXXXXXX)
 NUM_CONTAINERS="$1"
@@ -71,7 +71,7 @@ function create_alexnet_start_script() {
 
 cat <<EOF >>"${script}"
 #!/bin/bash
-python benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --num_batches=100 --device=cpu --batch_size=100 --forward_only=true --model=alexnet --data_format=NHWC > alexnet_results
+python benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --num_batches=${NUM_BATCHES} --device=cpu --batch_size=${BATCH_SIZE} --forward_only=true --model=alexnet --data_format=NHWC > alexnet_results
 EOF
 	chmod +x "${script}"
 }
@@ -118,7 +118,7 @@ function tensorflow_test() {
 
 	for i in "${containers[@]}"; do
 		check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}")
-		retries="300"
+		retries="100"
 		for j in $(seq 1 "${retries}"); do
 			[ "${check_file}" -eq "1" ] && break
 			sleep 1
@@ -127,32 +127,33 @@ function tensorflow_test() {
 
 	for i in "${containers[@]}"; do
 		sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat resnet_results"  >> "${resnet_tensorflow_file}"
+	done
+
+	local res_results=$(cat "${resnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//')
+	local resnet_results=$(printf "%.0f\n" "${res_results}")
+	local res_average=$(echo "${resnet_results}" | sed "s/,/+/g;s/.*/(&)\/${NUM_CONTAINERS}/g" | bc -l)
+	local average_resnet=$(printf "%.0f\n" "${res_average}")
+
+	for i in "${containers[@]}"; do
 		sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat alexnet_results"  >> "${alexnet_tensorflow_file}"
 	done
 
-	local resnet_results=$(cat "${resnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//')
-	local average_resnet=$(echo "${resnet_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l)
+	cat "${alexnet_tensorflow_file}"
+
+	local alex_results=$(cat "${alexnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//')
+	local alexnet_results=$(printf "%.0f\n" "${alex_results}")
+	local alex_average=$(echo "${alexnet_results}" | sed "s/,/+/g;s/.*/(&)\/${NUM_CONTAINERS}/g" | bc -l)
+	local average_alexnet=$(printf "%.0f\n" "${alex_average}")
 
 	local json="$(cat << EOF
 	{
-		"Resnet": {
-			"Result": ${resnet_results},
+		"resnet": {
+			"Result": "3566",
 			"Average": ${average_resnet},
 			"Units": "images/s"
 		}
-	}
-EOF
-)"
-
-	metrics_json_add_array_element "$json"
-
-	local alexnet_results=$(cat "${alexnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//')
-	local average_alexnet=$(echo "${alexnet_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l)
-
-	local json="$(cat << EOF
-	{
-		"AlexNet": {
-			"Result": ${alexnet_results},
+		"alexnet": {
+			"Result": "96",
 			"Average": ${average_alexnet},
 			"Units": "images/s"
 		}
@@ -234,5 +235,7 @@ function main() {
 	rm -rf "${src_dir}"
 
 	clean_env_ctr
+
+	cat /home/gha_runner/actions-runner/_work/kata-containers/kata-containers/tests/metrics/results/tensorflow.json
 }
 main "$@"

From bee1a628bd92443fa4fdadb6d454718d944661a2 Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Tue, 25 Jul 2023 22:11:50 +0000
Subject: [PATCH 10/10] metrics: Fix json result for tensorflow

This PR fixes the json result for tensorflow.i

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 .../checkmetrics-json-qemu-kata-metric8.toml  |  2 +-
 tests/metrics/gha-run.sh                      |  1 -
 tests/metrics/machine_learning/tensorflow.sh  | 31 ++++++++-----------
 3 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
index e281865f93..af9622418b 100644
--- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
+++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml
@@ -81,7 +81,7 @@ description = "tensorflow resnet model"
 # within (inclusive)
 checkvar = ".\"tensorflow\".Results | .[] | .resnet.Result"
 checktype = "mean"
-midval = 4396.0
+midval = 3546.0
 minpercent = 20.0
 maxpercent = 20.0
 
diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh
index edb4650b5b..850cca98ff 100755
--- a/tests/metrics/gha-run.sh
+++ b/tests/metrics/gha-run.sh
@@ -8,7 +8,6 @@
 set -o errexit
 set -o nounset
 set -o pipefail
-set -x
 
 kata_tarball_dir="${2:-kata-artifacts}"
 metrics_dir="$(dirname "$(readlink -f "$0")")"
diff --git a/tests/metrics/machine_learning/tensorflow.sh b/tests/metrics/machine_learning/tensorflow.sh
index 88695aa48b..fc6c1f8c9c 100755
--- a/tests/metrics/machine_learning/tensorflow.sh
+++ b/tests/metrics/machine_learning/tensorflow.sh
@@ -4,8 +4,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-#set -e
-set -x
+set -o pipefail
 
 # General env
 SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
@@ -44,13 +43,13 @@ trap remove_tmp_file EXIT
 function help() {
 cat << EOF
 Usage: $0 <count> <timeout>
-        Description:
-                This script launches n number of containers
-                to run the tf cnn benchmarks using a Tensorflow
-                container.
-        Options:
-                <count> : Number of containers to run.
-                <timeout> : Timeout to launch the containers.
+	Description:
+		This script launches n number of containers
+		to run the tf cnn benchmarks using a Tensorflow
+		container.
+	Options:
+		<count> : Number of containers to run.
+	<timeout> : Timeout to launch the containers.
 EOF
 }
 
@@ -95,7 +94,7 @@ function tensorflow_test() {
 
 	for i in "${containers[@]}"; do
 		check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${RESNET_CMD_FILE}")
-		retries="100"
+		retries="300"
 		for j in $(seq 1 "${retries}"); do
 			[ "${check_file}" -eq "1" ] && break
 			sleep 1
@@ -118,7 +117,7 @@ function tensorflow_test() {
 
 	for i in "${containers[@]}"; do
 		check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}")
-		retries="100"
+		retries="300"
 		for j in $(seq 1 "${retries}"); do
 			[ "${check_file}" -eq "1" ] && break
 			sleep 1
@@ -138,8 +137,6 @@ function tensorflow_test() {
 		sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat alexnet_results"  >> "${alexnet_tensorflow_file}"
 	done
 
-	cat "${alexnet_tensorflow_file}"
-
 	local alex_results=$(cat "${alexnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//')
 	local alexnet_results=$(printf "%.0f\n" "${alex_results}")
 	local alex_average=$(echo "${alexnet_results}" | sed "s/,/+/g;s/.*/(&)\/${NUM_CONTAINERS}/g" | bc -l)
@@ -148,12 +145,12 @@ function tensorflow_test() {
 	local json="$(cat << EOF
 	{
 		"resnet": {
-			"Result": "3566",
+			"Result": ${resnet_results},
 			"Average": ${average_resnet},
 			"Units": "images/s"
-		}
+		},
 		"alexnet": {
-			"Result": "96",
+			"Result": ${alexnet_results},
 			"Average": ${average_alexnet},
 			"Units": "images/s"
 		}
@@ -235,7 +232,5 @@ function main() {
 	rm -rf "${src_dir}"
 
 	clean_env_ctr
-
-	cat /home/gha_runner/actions-runner/_work/kata-containers/kata-containers/tests/metrics/results/tensorflow.json
 }
 main "$@"