Merge pull request #7783 from GabyCT/topic/makereport

metrics: Add metrics report script
2025-07-31 07:19:06 +00:00 · 2023-08-30 17:11:39 +02:00 · 2023-08-30 17:11:39 +02:00 · aa2b51a831
commit aa2b51a831
parent 924d06a7f5 7e364716dd
13 changed files with 1560 additions and 0 deletions
--- a/tests/metrics/report/makereport.sh
+++ b/tests/metrics/report/makereport.sh
@ -0,0 +1,97 @@
+#!/bin/bash
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Take the data found in subdirectories of the metrics 'results' directory,
+# and turn them into a PDF report. Use a Dockerfile containing all the tooling
+# and scripts we need to do that.
+
+set -e
+
+SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
+source "${SCRIPT_PATH}/../lib/common.bash"
+
+IMAGE="${IMAGE:-metrics-report}"
+DOCKERFILE="${SCRIPT_PATH}/report_dockerfile/Dockerfile"
+
+HOST_INPUT_DIR="${SCRIPT_PATH}/../results"
+R_ENV_FILE="${HOST_INPUT_DIR}/Env.R"
+HOST_OUTPUT_DIR="${SCRIPT_PATH}/output"
+
+GUEST_INPUT_DIR="/inputdir/"
+GUEST_OUTPUT_DIR="/outputdir/"
+
+# If in debugging mode, we also map in the scripts dir so you can
+# dynamically edit and re-load them at the R prompt
+HOST_SCRIPT_DIR="${SCRIPT_PATH}/report_dockerfile"
+GUEST_SCRIPT_DIR="/scripts/"
+
+setup() {
+	echo "Checking subdirectories"
+	check_subdir="$(ls -dx ${HOST_INPUT_DIR}/*/ 2> /dev/null | wc -l)"
+	if [ $check_subdir -eq 0 ]; then
+		die "No subdirs in [${HOST_INPUT_DIR}] to read results from."
+	fi
+
+	echo "Checking Dockerfile"
+	check_dockerfiles_images "$IMAGE" "$DOCKERFILE"
+
+	mkdir -p "$HOST_OUTPUT_DIR" && true
+
+	echo "inputdir=\"${GUEST_INPUT_DIR}\"" > ${R_ENV_FILE}
+	echo "outputdir=\"${GUEST_OUTPUT_DIR}\"" >> ${R_ENV_FILE}
+
+	# A bit of a hack to get an R syntax'd list of dirs to process
+	# Also, need it as not host-side dir path - so short relative names
+	resultdirs="$(cd ${HOST_INPUT_DIR}; ls -dx */)"
+	resultdirslist=$(echo ${resultdirs} | sed 's/ \+/", "/g')
+	echo "resultdirs=c(" >> ${R_ENV_FILE}
+	echo "	\"${resultdirslist}\"" >> ${R_ENV_FILE}
+	echo ")" >> ${R_ENV_FILE}
+}
+
+run() {
+	docker run -ti --rm -v ${HOST_INPUT_DIR}:${GUEST_INPUT_DIR} -v ${HOST_OUTPUT_DIR}:${GUEST_OUTPUT_DIR} ${extra_volumes} ${IMAGE} ${extra_command}
+	ls -la ${HOST_OUTPUT_DIR}/*
+}
+
+help() {
+	usage=$(cat << EOF
+Usage: $0 [-h] [options]
+   Description:
+        This script generates a metrics report document
+        from the results directory one level up in the
+        directory tree (../results).
+   Options:
+        -d,         Run in debug (interactive) mode
+        -h,         Print this help
+EOF
+)
+	echo "$usage"
+}
+
+main() {
+
+	local OPTIND
+	while getopts "d" opt;do
+		case ${opt} in
+		d)
+			# In debug mode, run a shell instead of the default report generation
+			extra_command="bash"
+			extra_volumes="-v ${HOST_SCRIPT_DIR}:${GUEST_SCRIPT_DIR}"
+			;;
+		?)
+		    # parse failure
+		    help
+		    die "Failed to parse arguments"
+		    ;;
+		esac
+	done
+	shift $((OPTIND-1))
+
+	setup
+	run
+}
+
+main "$@"
--- a/tests/metrics/report/report_dockerfile/Dockerfile
+++ b/tests/metrics/report/report_dockerfile/Dockerfile
@ -0,0 +1,44 @@
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Set up an Ubuntu image with the components needed to generate a
+# metrics report. That includes:
+#  - R
+#  - The R 'tidyverse'
+#  - pandoc
+#  - The report generation R files and helper scripts
+
+# Start with the base rocker tidyverse.
+# We would have used the 'verse' base, that already has some of the docs processing
+# installed, but I could not figure out how to add in the extra bits we needed to
+# the lite tex version is uses.
+# Here we specify a tag for base image instead of using latest to let it free from
+# the risk from the update of latest base image.
+FROM rocker/tidyverse:3.6.0
+
+# Version of the Dockerfile
+LABEL DOCKERFILE_VERSION="1.2"
+
+# Without this some of the package installs stop to try and ask questions...
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install the extra doc processing parts we need for our Rmarkdown PDF flow.
+RUN apt-get update -qq && \
+  apt-get install -y --no-install-recommends \
+    texlive-latex-base \
+    texlive-fonts-recommended \
+    latex-xcolor && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists
+
+# Install the extra R packages we need.
+RUN install2.r --error --deps TRUE \
+	gridExtra \
+	ggpubr
+
+# Pull in our actual worker scripts
+COPY . /scripts
+
+# By default generate the report
+CMD ["/scripts/genreport.sh"]
--- a/tests/metrics/report/report_dockerfile/dut-details.R
+++ b/tests/metrics/report/report_dockerfile/dut-details.R
@ -0,0 +1,122 @@
+#!/usr/bin/env Rscript
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Display details for the 'Device Under Test', for all data sets being processed.
+
+suppressMessages(suppressWarnings(library(tidyr)))	# for gather().
+library(tibble)
+suppressMessages(suppressWarnings(library(plyr)))	# rbind.fill
+							# So we can plot multiple graphs
+library(gridExtra)					# together.
+suppressMessages(suppressWarnings(library(ggpubr)))	# for ggtexttable.
+suppressMessages(library(jsonlite))			# to load the data.
+
+# A list of all the known results files we might find the information inside.
+resultsfiles=c(
+	"boot-times.json",
+	"memory-footprint.json",
+	"memory-footprint-ksm.json",
+	"memory-footprint-inside-container.json"
+	)
+
+data=c()
+stats=c()
+stats_names=c()
+
+# For each set of results
+for (currentdir in resultdirs) {
+	count=1
+	dirstats=c()
+	for (resultsfile in resultsfiles) {
+		fname=paste(inputdir, currentdir, resultsfile, sep="/")
+		if ( !file.exists(fname)) {
+			#warning(paste("Skipping non-existent file: ", fname))
+			next
+		}
+
+		# Derive the name from the test result dirname
+		datasetname=basename(currentdir)
+
+		# Import the data
+		fdata=fromJSON(fname)
+
+		if (length(fdata$'kata-env') != 0 ) {
+			# We have kata-runtime data
+			dirstats=tibble("Run Ver"=as.character(fdata$'kata-env'$Runtime$Version$Semver))
+			dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'kata-env'$Runtime$Version$Commit))
+
+			pver=as.character(fdata$'kata-env'$Proxy$Version)
+			pver=sub("^[[:alpha:][:blank:]-]*", "", pver)
+			# uncomment if you want to drop the commit sha as well
+			#pver=sub("([[:digit:].]*).*", "\\1", pver)
+			dirstats=cbind(dirstats, "Proxy Ver"=pver)
+
+			# Trim the shim string
+			sver=as.character(fdata$'kata-env'$Shim$Version)
+			sver=sub("^[[:alpha:][:blank:]-]*", "", sver)
+			# uncomment if you want to drop the commit sha as well
+			#sver=sub("([[:digit:].]*).*", "\\1", sver)
+			dirstats=cbind(dirstats, "Shim Ver"=sver)
+
+			# Default QEMU ver string is far too long and noisy - trim.
+			hver=as.character(fdata$'kata-env'$Hypervisor$Version)
+			hver=sub("^[[:alpha:][:blank:]]*", "", hver)
+			hver=sub("([[:digit:].]*).*", "\\1", hver)
+			dirstats=cbind(dirstats, "Hyper Ver"=hver)
+
+			iver=as.character(fdata$'kata-env'$Image$Path)
+			iver=sub("^[[:alpha:]/-]*", "", iver)
+			dirstats=cbind(dirstats, "Image Ver"=iver)
+
+			kver=as.character(fdata$'kata-env'$Kernel$Path)
+			kver=sub("^[[:alpha:]/-]*", "", kver)
+			dirstats=cbind(dirstats, "Guest Krnl"=kver)
+
+			dirstats=cbind(dirstats, "Host arch"=as.character(fdata$'kata-env'$Host$Architecture))
+			dirstats=cbind(dirstats, "Host Distro"=as.character(fdata$'kata-env'$Host$Distro$Name))
+			dirstats=cbind(dirstats, "Host DistVer"=as.character(fdata$'kata-env'$Host$Distro$Version))
+			dirstats=cbind(dirstats, "Host Model"=as.character(fdata$'kata-env'$Host$CPU$Model))
+			dirstats=cbind(dirstats, "Host Krnl"=as.character(fdata$'kata-env'$Host$Kernel))
+			dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime))
+
+			break
+		} else {
+			if (length(fdata$'runc-env') != 0 ) {
+				dirstats=tibble("Run Ver"=as.character(fdata$'runc-env'$Version$Semver))
+				dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'runc-env'$Version$Commit))
+				dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime))
+			} else {
+				dirstats=tibble("runtime"="Unknown")
+			}
+			break
+		}
+	}
+
+	if ( length(dirstats) == 0 ) {
+		warning(paste("No valid data found for directory ", currentdir))
+	}
+
+	# use plyr rbind.fill so we can combine disparate version info frames
+	stats=rbind.fill(stats, dirstats)
+	stats_names=rbind(stats_names, datasetname)
+}
+
+rownames(stats) = stats_names
+
+# Rotate the tibble so we get data dirs as the columns
+spun_stats = as_tibble(cbind(What=names(stats), t(stats)))
+
+# Build us a text table of numerical results
+stats_plot = suppressWarnings(ggtexttable(data.frame(spun_stats, check.names=FALSE),
+	theme=ttheme(base_size=6),
+	rows=NULL
+	))
+
+# It may seem odd doing a grid of 1x1, but it should ensure we get a uniform format and
+# layout to match the other charts and tables in the report.
+master_plot = grid.arrange(
+	stats_plot,
+	nrow=1,
+	ncol=1 )
--- a/tests/metrics/report/report_dockerfile/fio-reads.R
+++ b/tests/metrics/report/report_dockerfile/fio-reads.R
@ -0,0 +1,269 @@
+#!/usr/bin/env Rscript
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Display details for `fio` random read storage IO tests.
+
+
+library(ggplot2)	# ability to plot nicely
+library(gridExtra)	# So we can plot multiple graphs together
+suppressMessages(suppressWarnings(library(ggpubr)))	# for ggtexttable
+suppressMessages(library(jsonlite))			# to load the data
+suppressMessages(suppressWarnings(library(tidyr)))	# for gather
+library(tibble)
+
+testnames=c(
+	"fio-randread-128",
+	"fio-randread-256",
+	"fio-randread-512",
+	"fio-randread-1k",
+	"fio-randread-2k",
+	"fio-randread-4k",
+	"fio-randread-8k",
+	"fio-randread-16k",
+	"fio-randread-32k",
+	"fio-randread-64k"
+	)
+
+data2=c()
+all_ldata=c()
+all_ldata2=c()
+stats=c()
+rstats=c()
+rstats_names=c()
+
+# Where to store up the stats for the tables
+read_bw_stats=c()
+read_iops_stats=c()
+read_lat95_stats=c()
+read_lat99_stats=c()
+
+# For each set of results
+for (currentdir in resultdirs) {
+	bw_dirstats=c()
+	iops_dirstats=c()
+	lat95_dirstats=c()
+	lat99_dirstats=c()
+	# Derive the name from the test result dirname
+	datasetname=basename(currentdir)
+
+	for (testname in testnames) {
+		fname=paste(inputdir, currentdir, testname, '.json', sep="")
+		if ( !file.exists(fname)) {
+			#warning(paste("Skipping non-existent file: ", fname))
+			next
+		}
+
+		# Import the data
+		fdata=fromJSON(fname)
+		# De-ref the test named unique data
+		fdata=fdata[[testname]]
+
+		blocksize=fdata$Raw$'global options'$bs
+
+		# Extract the latency data - it comes as a table of percentiles, so
+		# we have to do a little work...
+		clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$read$clat_ns$percentile)
+
+		# Generate a clat data set with 'clean' percentile numbers so
+		# we can sensibly plot it later on.
+		clat2=clat
+		colnames(clat2)<-sub("clat_ns.", "", colnames(clat2))
+		colnames(clat2)<-sub("0000", "", colnames(clat2))
+		ldata2=gather(clat2)
+		colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile"
+		colnames(ldata2)[colnames(ldata2)=="value"] <- "ms"
+		ldata2$ms=ldata2$ms/1000000	#ns->ms
+		ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile)))
+		ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile)))
+
+		# Pull the 95 and 99 percentile numbers for the boxplot
+		# Plotting all values for all runtimes and blocksizes is just way too
+		# noisy to make a meaninful picture, so we use this subset.
+		# Our values fall more in the range of ms...
+		pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000)
+		pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile)))
+		pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000)
+		pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile)))
+		ldata=rbind(pc95data, pc99data)
+		ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile)))
+
+		# We want total bandwidth, so that is the sum of the bandwidths
+		# from all the read 'jobs'.
+		mdata=data.frame(read_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$read$bw)/1024))
+		mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$read$iops)))
+		mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "read_bw_mps"]) ))
+		mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "read_bw_mps"]) ))
+
+		# Extract the stats tables
+		bw_dirstats=rbind(bw_dirstats, round(mdata$read_bw_mps, digits=1))
+		# Rowname hack to get the blocksize recorded
+		rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize
+
+		iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1))
+		rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize
+
+		# And do the 95 and 99 percentiles as tables as well
+		lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1))
+		rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize
+		lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1))
+		rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize
+
+		# Collect up as sets across all files and runtimes.
+		data2=rbind(data2, mdata)
+		all_ldata=rbind(all_ldata, ldata)
+		all_ldata2=rbind(all_ldata2, ldata2)
+	}
+
+	# Collect up for each dir we process into a column
+	read_bw_stats=cbind(read_bw_stats, bw_dirstats)
+	colnames(read_bw_stats)[ncol(read_bw_stats)]=datasetname
+
+	read_iops_stats=cbind(read_iops_stats, iops_dirstats)
+	colnames(read_iops_stats)[ncol(read_iops_stats)]=datasetname
+
+	read_lat95_stats=cbind(read_lat95_stats, lat95_dirstats)
+	colnames(read_lat95_stats)[ncol(read_lat95_stats)]=datasetname
+	read_lat99_stats=cbind(read_lat99_stats, lat99_dirstats)
+	colnames(read_lat99_stats)[ncol(read_lat99_stats)]=datasetname
+}
+
+# To get a nice looking table, we need to extract the rownames into their
+# own column
+read_bw_stats=cbind(Bandwidth=rownames(read_bw_stats), read_bw_stats)
+read_bw_stats=cbind(read_bw_stats, Units=rep("MB/s", nrow(read_bw_stats)))
+
+read_iops_stats=cbind(IOPS=rownames(read_iops_stats), read_iops_stats)
+read_iops_stats=cbind(read_iops_stats, Units=rep("IOP/s", nrow(read_iops_stats)))
+
+read_lat95_stats=cbind('lat 95pc'=rownames(read_lat95_stats), read_lat95_stats)
+read_lat95_stats=cbind(read_lat95_stats, Units=rep("ms", nrow(read_lat95_stats)))
+read_lat99_stats=cbind('lat 99pc'=rownames(read_lat99_stats), read_lat99_stats)
+read_lat99_stats=cbind(read_lat99_stats, Units=rep("ms", nrow(read_lat99_stats)))
+
+# Bandwidth line plot
+read_bw_line_plot <- ggplot() +
+	geom_line( data=data2, aes(blocksize, read_bw_mps, group=runtime, color=runtime)) +
+	ylim(0, NA) +
+	ggtitle("Random Read total bandwidth") +
+	xlab("Blocksize") +
+	ylab("Bandwidth (MiB/s)") +
+	theme(
+		axis.text.x=element_text(angle=90),
+		legend.position=c(0.35,0.8),
+		legend.title=element_text(size=5),
+		legend.text=element_text(size=5),
+		legend.background = element_rect(fill=alpha('blue', 0.2))
+	)
+
+# IOPS line plot
+read_iops_line_plot <- ggplot() +
+	geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) +
+	ylim(0, NA) +
+	ggtitle("Random Read total IOPS") +
+	xlab("Blocksize") +
+	ylab("IOPS") +
+	theme(
+		axis.text.x=element_text(angle=90),
+		legend.position=c(0.35,0.8),
+		legend.title=element_text(size=5),
+		legend.text=element_text(size=5),
+		legend.background = element_rect(fill=alpha('blue', 0.2))
+	)
+
+# 95 and 99 percentile box plot
+read_clat_box_plot <- ggplot() +
+	geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) +
+	stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") +
+	ylim(0, NA) +
+	ggtitle("Random Read completion latency", subtitle="95&99 percentiles, boxplot over jobs") +
+	xlab("Blocksize") +
+	ylab("Latency (ms)") +
+	theme(axis.text.x=element_text(angle=90)) +
+	# Use the 'paired' colour matrix as we are setting these up as pairs of
+	# 95 and 99 percentiles, and it is much easier to visually group those to
+	# each runtime if we use this colourmap.
+	scale_colour_brewer(palette="Paired")
+#	it would be nice to use the same legend theme as the other plots on this
+#	page, but because of the number of entries it tends to flow off the picture.
+#	theme(
+#		axis.text.x=element_text(angle=90),
+#		legend.position=c(0.35,0.8),
+#		legend.title=element_text(size=5),
+#		legend.text=element_text(size=5),
+#		legend.background = element_rect(fill=alpha('blue', 0.2))
+#	)
+
+# As the boxplot is actually quite hard to interpret, also show a linegraph
+# of all the percentiles for a single blocksize.
+which_blocksize='4k'
+clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ")
+single_blocksize=subset(all_ldata2, blocksize==which_blocksize)
+clat_line=aggregate(
+	single_blocksize$ms,
+	by=list(
+		percentile=single_blocksize$percentile,
+		blocksize=single_blocksize$blocksize,
+		runtime=single_blocksize$runtime
+	),
+	FUN=mean
+)
+
+clat_line$percentile=as.numeric(clat_line$percentile)
+
+read_clat_line_plot <- ggplot() +
+	geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) +
+	ylim(0, NA) +
+	ggtitle("Random Read completion latency percentiles", subtitle=clat_line_subtitle) +
+	xlab("Percentile") +
+	ylab("Time (ms)") +
+	theme(
+		axis.text.x=element_text(angle=90),
+		legend.position=c(0.35,0.8),
+		legend.title=element_text(size=5),
+		legend.text=element_text(size=5),
+		legend.background = element_rect(fill=alpha('blue', 0.2))
+	)
+
+# Output the pretty pictures
+graphics_plot = grid.arrange(
+	read_bw_line_plot,
+	read_iops_line_plot,
+	read_clat_box_plot,
+	read_clat_line_plot,
+	nrow=2,
+	ncol=2 )
+
+# A bit of an odd tweak to force a pagebreak between the pictures and
+# the tables. This only works because we have a `results='asis'` in the Rmd
+# R fragment.
+cat("\n\n\\pagebreak\n")
+
+read_bw_stats_plot = suppressWarnings(ggtexttable(read_bw_stats,
+	theme=ttheme(base_size=10),
+	rows=NULL
+	))
+
+read_iops_stats_plot = suppressWarnings(ggtexttable(read_iops_stats,
+	theme=ttheme(base_size=10),
+	rows=NULL
+	))
+
+read_lat95_stats_plot = suppressWarnings(ggtexttable(read_lat95_stats,
+	theme=ttheme(base_size=10),
+	rows=NULL
+	))
+read_lat99_stats_plot = suppressWarnings(ggtexttable(read_lat99_stats,
+	theme=ttheme(base_size=10),
+	rows=NULL
+	))
+
+# and then the statistics tables
+stats_plot = grid.arrange(
+	read_bw_stats_plot,
+	read_iops_stats_plot,
+	read_lat95_stats_plot,
+	read_lat99_stats_plot,
+	nrow=4,
+	ncol=1 )
--- a/tests/metrics/report/report_dockerfile/fio-writes.R
+++ b/tests/metrics/report/report_dockerfile/fio-writes.R
@ -0,0 +1,260 @@
+#!/usr/bin/env Rscript
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Display details for 'fio' random writes storage IO tests.
+
+
+library(ggplot2)	# ability to plot nicely
+library(gridExtra)	# So we can plot multiple graphs together
+suppressMessages(suppressWarnings(library(ggpubr)))	# for ggtexttable
+suppressMessages(library(jsonlite))			# to load the data
+suppressMessages(suppressWarnings(library(tidyr)))	# for gather
+library(tibble)
+
+testnames=c(
+	"fio-randwrite-128",
+	"fio-randwrite-256",
+	"fio-randwrite-512",
+	"fio-randwrite-1k",
+	"fio-randwrite-2k",
+	"fio-randwrite-4k",
+	"fio-randwrite-8k",
+	"fio-randwrite-16k",
+	"fio-randwrite-32k",
+	"fio-randwrite-64k"
+	)
+
+data2=c()
+all_ldata=c()
+all_ldata2=c()
+stats=c()
+rstats=c()
+rstats_names=c()
+
+
+# Where to store up the stats for the tables
+write_bw_stats=c()
+write_iops_stats=c()
+write_lat95_stats=c()
+write_lat99_stats=c()
+
+# For each set of results
+for (currentdir in resultdirs) {
+	bw_dirstats=c()
+	iops_dirstats=c()
+	lat95_dirstats=c()
+	lat99_dirstats=c()
+	# Derive the name from the test result dirname
+	datasetname=basename(currentdir)
+
+	for (testname in testnames) {
+		fname=paste(inputdir, currentdir, testname, '.json', sep="")
+		if ( !file.exists(fname)) {
+			#warning(paste("Skipping non-existent file: ", fname))
+			next
+		}
+
+		# Import the data
+		fdata=fromJSON(fname)
+		# De-nest the test specific named data
+		fdata=fdata[[testname]]
+
+		blocksize=fdata$Raw$'global options'$bs
+
+		# Extract the latency data - it comes as a table of percentiles, so
+		# we have to do a little work...
+		clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$write$clat_ns$percentile)
+
+		# Generate a clat data set with 'clean' percentile numbers so
+		# we can sensibly plot it later on.
+		clat2=clat
+		colnames(clat2)<-sub("clat_ns.", "", colnames(clat2))
+		colnames(clat2)<-sub("0000", "", colnames(clat2))
+		ldata2=gather(clat2)
+		colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile"
+		colnames(ldata2)[colnames(ldata2)=="value"] <- "ms"
+		ldata2$ms=ldata2$ms/1000000	#ns->ms
+		ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile)))
+		ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile)))
+
+		# Pull the 95 and 99 percentiles for the boxplot diagram.
+		# Our values fall more in the range of ms...
+		pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000)
+		pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile)))
+		pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000)
+		pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile)))
+		ldata=rbind(pc95data, pc99data)
+		ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile)))
+
+		# We want total bandwidth, so that is the sum of the bandwidths
+		# from all the write 'jobs'.
+		mdata=data.frame(write_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$write$bw)/1024))
+		mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$write$iops)))
+		mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "write_bw_mps"]) ))
+		mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "write_bw_mps"]) ))
+
+		# Extract the stats tables
+		bw_dirstats=rbind(bw_dirstats, round(mdata$write_bw_mps, digits=1))
+		# Rowname hack to get the blocksize recorded
+		rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize
+
+		iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1))
+		rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize
+
+		# And do the 95 and 99 percentiles as tables as well
+		lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1))
+		rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize
+		lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1))
+		rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize
+
+		# Store away as single sets
+		data2=rbind(data2, mdata)
+		all_ldata=rbind(all_ldata, ldata)
+		all_ldata2=rbind(all_ldata2, ldata2)
+	}
+
+	# Collect up for each dir we process into a column
+	write_bw_stats=cbind(write_bw_stats, bw_dirstats)
+	colnames(write_bw_stats)[ncol(write_bw_stats)]=datasetname
+
+	write_iops_stats=cbind(write_iops_stats, iops_dirstats)
+	colnames(write_iops_stats)[ncol(write_iops_stats)]=datasetname
+
+	write_lat95_stats=cbind(write_lat95_stats, lat95_dirstats)
+	colnames(write_lat95_stats)[ncol(write_lat95_stats)]=datasetname
+	write_lat99_stats=cbind(write_lat99_stats, lat99_dirstats)
+	colnames(write_lat99_stats)[ncol(write_lat99_stats)]=datasetname
+}
+
+# To get a nice looking table, we need to extract the rownames into their
+# own column
+write_bw_stats=cbind(Bandwidth=rownames(write_bw_stats), write_bw_stats)
+write_bw_stats=cbind(write_bw_stats, Units=rep("MB/s", nrow(write_bw_stats)))
+
+write_iops_stats=cbind(IOPS=rownames(write_iops_stats), write_iops_stats)
+write_iops_stats=cbind(write_iops_stats, Units=rep("IOP/s", nrow(write_iops_stats)))
+
+write_lat95_stats=cbind('lat 95pc'=rownames(write_lat95_stats), write_lat95_stats)
+write_lat95_stats=cbind(write_lat95_stats, Units=rep("ms", nrow(write_lat95_stats)))
+write_lat99_stats=cbind('lat 99pc'=rownames(write_lat99_stats), write_lat99_stats)
+write_lat99_stats=cbind(write_lat99_stats, Units=rep("ms", nrow(write_lat99_stats)))
+
+# lineplot of total bandwidth across blocksizes.
+write_bw_line_plot <- ggplot() +
+	geom_line( data=data2, aes(blocksize, write_bw_mps, group=runtime, color=runtime)) +
+	ylim(0, NA) +
+	ggtitle("Random Write total bandwidth") +
+	xlab("Blocksize") +
+	ylab("Bandwidth (MiB/s)") +
+	theme(
+		axis.text.x=element_text(angle=90),
+		legend.position=c(0.35,0.8),
+		legend.title=element_text(size=5),
+		legend.text=element_text(size=5),
+		legend.background = element_rect(fill=alpha('blue', 0.2))
+	)
+
+# lineplot of IOPS across blocksizes
+write_iops_line_plot <- ggplot() +
+	geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) +
+	ylim(0, NA) +
+	ggtitle("Random Write total IOPS") +
+	xlab("Blocksize") +
+	ylab("IOPS") +
+	theme(
+		axis.text.x=element_text(angle=90),
+		legend.position=c(0.35,0.8),
+		legend.title=element_text(size=5),
+		legend.text=element_text(size=5),
+		legend.background = element_rect(fill=alpha('blue', 0.2))
+	)
+
+# boxplot of 95 and 99 percentiles covering the parallel jobs, shown across
+# the blocksizes.
+write_clat_box_plot <- ggplot() +
+	geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) +
+	stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") +
+	ylim(0, NA) +
+	ggtitle("Random Write completion latency", subtitle="95&99 Percentiles, boxplot across jobs") +
+	xlab("Blocksize") +
+	ylab("Latency (ms)") +
+	theme(axis.text.x=element_text(angle=90)) +
+	# Use the 'paired' colour matrix as we are setting these up as pairs of
+	# 95 and 99 percentiles, and it is much easier to visually group those to
+	# each runtime if we use this colourmap.
+	scale_colour_brewer(palette="Paired")
+
+
+# completion latency line plot across the percentiles, for a specific blocksize only
+# as otherwise the graph would be far too noisy.
+which_blocksize='4k'
+clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ")
+single_blocksize=subset(all_ldata2, blocksize==which_blocksize)
+clat_line=aggregate(
+	single_blocksize$ms,
+	by=list(
+		percentile=single_blocksize$percentile,
+		blocksize=single_blocksize$blocksize,
+		runtime=single_blocksize$runtime
+	),
+	FUN=mean
+)
+
+clat_line$percentile=as.numeric(clat_line$percentile)
+
+write_clat_line_plot <- ggplot() +
+	geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) +
+	ylim(0, NA) +
+	ggtitle("Random Write completion latency percentiles", subtitle=clat_line_subtitle) +
+	xlab("Percentile") +
+	ylab("Time (ms)") +
+	theme(
+		axis.text.x=element_text(angle=90),
+		legend.position=c(0.35,0.8),
+		legend.title=element_text(size=5),
+		legend.text=element_text(size=5),
+		legend.background = element_rect(fill=alpha('blue', 0.2))
+	)
+
+master_plot = grid.arrange(
+	write_bw_line_plot,
+	write_iops_line_plot,
+	write_clat_box_plot,
+	write_clat_line_plot,
+	nrow=2,
+	ncol=2 )
+
+# A bit of an odd tweak to force a pagebreak between the pictures and
+# the tables. This only works because we have a `results='asis'` in the Rmd
+# R fragment.
+cat("\n\n\\pagebreak\n")
+
+write_bw_stats_plot = suppressWarnings(ggtexttable(write_bw_stats,
+	theme=ttheme(base_size=10),
+	rows=NULL
+	))
+
+write_iops_stats_plot = suppressWarnings(ggtexttable(write_iops_stats,
+	theme=ttheme(base_size=10),
+	rows=NULL
+	))
+
+write_lat95_stats_plot = suppressWarnings(ggtexttable(write_lat95_stats,
+	theme=ttheme(base_size=10),
+	rows=NULL
+	))
+write_lat99_stats_plot = suppressWarnings(ggtexttable(write_lat99_stats,
+	theme=ttheme(base_size=10),
+	rows=NULL
+	))
+
+# and then the statistics tables
+stats_plot = grid.arrange(
+	write_bw_stats_plot,
+	write_iops_stats_plot,
+	write_lat95_stats_plot,
+	write_lat99_stats_plot,
+	nrow=4,
+	ncol=1 )
--- a/tests/metrics/report/report_dockerfile/footprint-density.R
+++ b/tests/metrics/report/report_dockerfile/footprint-density.R
@ -0,0 +1,111 @@
+#!/usr/bin/env Rscript
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Show system memory reduction, and hence container 'density', by analysing the
+# scaling footprint data results and the 'system free' memory.
+
+library(ggplot2)					# ability to plot nicely.
+							# So we can plot multiple graphs
+library(gridExtra)					# together.
+suppressMessages(suppressWarnings(library(ggpubr)))	# for ggtexttable.
+suppressMessages(library(jsonlite))			# to load the data.
+
+testnames=c(
+	paste("footprint-busybox.*", test_name_extra, sep=""),
+	paste("footprint-mysql.*", test_name_extra, sep=""),
+	paste("footprint-elasticsearch.*", test_name_extra, sep="")
+)
+
+data=c()
+stats=c()
+rstats=c()
+rstats_names=c()
+
+for (currentdir in resultdirs) {
+	count=1
+	dirstats=c()
+	for (testname in testnames) {
+		matchdir=paste(inputdir, currentdir, sep="")
+		matchfile=paste(testname, '\\.json', sep="")
+		files=list.files(matchdir, pattern=matchfile)
+		if ( length(files) == 0 ) {
+			#warning(paste("Pattern [", matchdir, "/", matchfile, "] matched nothing"))
+		}
+		for (ffound in files) {
+			fname=paste(inputdir, currentdir, ffound, sep="")
+			if ( !file.exists(fname)) {
+				warning(paste("Skipping non-existent file: ", fname))
+				next
+			}
+
+			# Derive the name from the test result dirname
+			datasetname=basename(currentdir)
+
+			# Import the data
+			fdata=fromJSON(fname)
+			# De-nest the test name specific data
+			shortname=substr(ffound, 1, nchar(ffound)-nchar(".json"))
+			fdata=fdata[[shortname]]
+
+			payload=fdata$Config$payload
+			testname=paste(datasetname, payload)
+
+			cdata=data.frame(avail_mb=as.numeric(fdata$Results$system$avail)/(1024*1024))
+			cdata=cbind(cdata, avail_decr=as.numeric(fdata$Results$system$avail_decr))
+			cdata=cbind(cdata, count=seq_len(length(cdata[, "avail_mb"])))
+			cdata=cbind(cdata, testname=rep(testname, length(cdata[, "avail_mb"]) ))
+			cdata=cbind(cdata, payload=rep(payload, length(cdata[, "avail_mb"]) ))
+			cdata=cbind(cdata, dataset=rep(datasetname, length(cdata[, "avail_mb"]) ))
+
+			# Gather our statistics
+			sdata=data.frame(num_containers=length(cdata[, "avail_mb"]))
+			# Pick out the last avail_decr value - which in theory should be
+			# the most we have consumed...
+			sdata=cbind(sdata, mem_consumed=cdata[, "avail_decr"][length(cdata[, "avail_decr"])])
+			sdata=cbind(sdata, avg_bytes_per_c=sdata$mem_consumed / sdata$num_containers)
+			sdata=cbind(sdata, runtime=testname)
+
+			# Store away as a single set
+			data=rbind(data, cdata)
+			stats=rbind(stats, sdata)
+
+			s = c(
+				"Test"=testname,
+				"n"=sdata$num_containers,
+				"size"=(sdata$mem_consumed) / 1024,
+				"kb/n"=round((sdata$mem_consumed / sdata$num_containers) / 1024, digits=1),
+				"n/Gb"= round((1*1024*1024*1024) / (sdata$mem_consumed / sdata$num_containers), digits=1)
+			)
+
+			rstats=rbind(rstats, s)
+			count = count + 1
+		}
+	}
+}
+
+# Set up the text table headers
+colnames(rstats)=c("Test", "n", "Tot_Kb", "avg_Kb", "n_per_Gb")
+
+# Build us a text table of numerical results
+stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
+	theme=ttheme(base_size=10),
+	rows=NULL
+	))
+
+# plot how samples varioed over  'time'
+line_plot <- ggplot() +
+	geom_point( data=data, aes(count, avail_mb, group=testname, color=payload, shape=dataset)) +
+	geom_line( data=data, aes(count, avail_mb, group=testname, color=payload)) +
+	xlab("Containers") +
+	ylab("System Avail (Mb)") +
+	ggtitle("System Memory free") +
+	ylim(0, NA) +
+	theme(axis.text.x=element_text(angle=90))
+
+master_plot = grid.arrange(
+	line_plot,
+	stats_plot,
+	nrow=2,
+	ncol=1 )
--- a/tests/metrics/report/report_dockerfile/genreport.sh
+++ b/tests/metrics/report/report_dockerfile/genreport.sh
@ -0,0 +1,19 @@
+#!/bin/bash
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+REPORTNAME="metrics_report.pdf"
+
+cd scripts
+
+Rscript --slave -e "library(knitr);knit('pdf.Rmd')"
+Rscript --slave -e "library(knitr);pandoc('pdf.md', format='latex')"
+
+Rscript --slave -e "library(knitr);knit('html.Rmd')"
+Rscript --slave -e "library(knitr);pandoc('html.md', format='html')"
+
+cp /scripts/pdf.pdf /outputdir/${REPORTNAME}
+cp /scripts/figure/*.png /outputdir/
+echo "PNGs of graphs and tables can be found in the output directory."
+echo "The report, named ${REPORTNAME}, can be found in the output directory"
--- a/tests/metrics/report/report_dockerfile/html.Rmd
+++ b/tests/metrics/report/report_dockerfile/html.Rmd
@ -0,0 +1,23 @@
+---
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+title: "Kata Containers metrics report"
+author: "Auto generated"
+date: "`r format(Sys.time(), '%d %B, %Y')`"
+output:
+  html_document:
+urlcolor: blue
+---
+
+```{r setup, include=FALSE}
+#Set these opts to get pdf images which fit into beamer slides better
+opts_chunk$set(dev = 'png')
+# Pick up any env set by the invoking script, such as the root dir of the
+# results data tree
+source("/inputdir/Env.R")
+```
+
+```{r child = 'metrics_report.Rmd'}
+```
--- a/tests/metrics/report/report_dockerfile/lifecycle-time.R
+++ b/tests/metrics/report/report_dockerfile/lifecycle-time.R
@ -0,0 +1,157 @@
+#!/usr/bin/env Rscript
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Display how long the various phases of a container lifecycle (run, execute, die etc.
+# take.
+
+library(ggplot2)					# ability to plot nicely.
+suppressMessages(suppressWarnings(library(tidyr)))	# for gather().
+							# So we can plot multiple graphs
+library(gridExtra)					# together.
+suppressMessages(suppressWarnings(library(ggpubr)))	# for ggtexttable.
+suppressMessages(library(jsonlite))			# to load the data.
+
+testnames=c(
+	"boot-times"
+)
+
+data=c()
+stats=c()
+rstats=c()
+rstats_names=c()
+
+# For each set of results
+for (currentdir in resultdirs) {
+	count=1
+	dirstats=c()
+	for (testname in testnames) {
+		fname=paste(inputdir, currentdir, testname, '.json', sep="")
+		if ( !file.exists(fname)) {
+			warning(paste("Skipping non-existent file: ", fname))
+			next
+		}
+
+		# Derive the name from the test result dirname
+		datasetname=basename(currentdir)
+
+		# Import the data
+		fdata=fromJSON(fname)
+		# De-nest the test specific name
+		fdata=fdata[[testname]]
+
+		cdata=data.frame(workload=as.numeric(fdata$Results$'to-workload'$Result))
+		cdata=cbind(cdata, quit=as.numeric(fdata$Results$'to-quit'$Result))
+
+		cdata=cbind(cdata, tokernel=as.numeric(fdata$Results$'to-kernel'$Result))
+		cdata=cbind(cdata, inkernel=as.numeric(fdata$Results$'in-kernel'$Result))
+		cdata=cbind(cdata, total=as.numeric(fdata$Results$'total'$Result))
+
+		cdata=cbind(cdata, count=seq_len(length(cdata[,"workload"])))
+		cdata=cbind(cdata, runtime=rep(datasetname, length(cdata[, "workload"]) ))
+
+		# Calculate some stats for total time
+		sdata=data.frame(workload_mean=mean(cdata$workload))
+		sdata=cbind(sdata, workload_min=min(cdata$workload))
+		sdata=cbind(sdata, workload_max=max(cdata$workload))
+		sdata=cbind(sdata, workload_sd=sd(cdata$workload))
+		sdata=cbind(sdata, workload_cov=((sdata$workload_sd / sdata$workload_mean) * 100))
+		sdata=cbind(sdata, runtime=datasetname)
+
+		sdata=cbind(sdata, quit_mean = mean(cdata$quit))
+		sdata=cbind(sdata, quit_min = min(cdata$quit))
+		sdata=cbind(sdata, quit_max = max(cdata$quit))
+		sdata=cbind(sdata, quit_sd = sd(cdata$quit))
+		sdata=cbind(sdata, quit_cov = (sdata$quit_sd / sdata$quit_mean) * 100)
+
+		sdata=cbind(sdata, tokernel_mean = mean(cdata$tokernel))
+		sdata=cbind(sdata, inkernel_mean = mean(cdata$inkernel))
+		sdata=cbind(sdata, total_mean = mean(cdata$total))
+
+		# Store away as a single set
+		data=rbind(data, cdata)
+		stats=rbind(stats, sdata)
+
+		# Store away some stats for the text table
+		dirstats[count]=round(sdata$tokernel_mean, digits=2)
+		count = count + 1
+		dirstats[count]=round(sdata$inkernel_mean, digits=2)
+		count = count + 1
+		dirstats[count]=round(sdata$workload_mean, digits=2)
+		count = count + 1
+		dirstats[count]=round(sdata$quit_mean, digits=2)
+		count = count + 1
+		dirstats[count]=round(sdata$total_mean, digits=2)
+		count = count + 1
+	}
+	rstats=rbind(rstats, dirstats)
+	rstats_names=rbind(rstats_names, datasetname)
+}
+
+unts=c("s", "s", "s", "s", "s")
+rstats=rbind(rstats, unts)
+rstats_names=rbind(rstats_names, "Units")
+
+
+# If we have only 2 sets of results, then we can do some more
+# stats math for the text table
+if (length(resultdirs) == 2) {
+	# This is a touch hard wired - but we *know* we only have two
+	# datasets...
+	diff=c()
+	for( i in 1:5) {
+		difference = as.double(rstats[2,i]) - as.double(rstats[1,i])
+		val = 100 * (difference/as.double(rstats[1,i]))
+		diff[i] = paste(round(val, digits=2), "%", sep=" ")
+	}
+
+	rstats=rbind(rstats, diff)
+	rstats_names=rbind(rstats_names, "Diff")
+}
+
+rstats=cbind(rstats_names, rstats)
+
+# Set up the text table headers
+colnames(rstats)=c("Results", "2k", "ik", "2w", "2q", "tot")
+
+
+# Build us a text table of numerical results
+stats_plot = suppressWarnings(ggtexttable(data.frame(rstats, check.names=FALSE),
+	theme=ttheme(base_size=8),
+	rows=NULL
+	))
+
+# plot how samples varioed over  'time'
+line_plot <- ggplot() +
+	geom_line( data=data, aes(count, workload, color=runtime)) +
+	geom_smooth( data=data, aes(count, workload, color=runtime), se=FALSE, method="loess") +
+	xlab("Iteration") +
+	ylab("Time (s)") +
+	ggtitle("Boot to workload", subtitle="First container") +
+	ylim(0, NA) +
+	theme(axis.text.x=element_text(angle=90))
+
+boot_boxplot <- ggplot() +
+	geom_boxplot( data=data, aes(runtime, workload, color=runtime), show.legend=FALSE) +
+	ylim(0, NA) +
+	ylab("Time (s)")
+
+# convert the stats to a long format so we can more easily do a side-by-side barplot
+longstats <- gather(stats, measure, value, workload_mean, quit_mean, inkernel_mean, tokernel_mean, total_mean)
+
+bar_plot <- ggplot() +
+	geom_bar( data=longstats, aes(measure, value, fill=runtime), stat="identity", position="dodge", show.legend=FALSE) +
+	xlab("Phase") +
+	ylab("Time (s)") +
+	ggtitle("Lifecycle phase times", subtitle="Mean") +
+	ylim(0, NA) +
+	theme(axis.text.x=element_text(angle=90))
+
+master_plot = grid.arrange(
+	bar_plot,
+	line_plot,
+	stats_plot,
+	boot_boxplot,
+	nrow=2,
+	ncol=2 )
--- a/tests/metrics/report/report_dockerfile/mem-in-cont.R
+++ b/tests/metrics/report/report_dockerfile/mem-in-cont.R
@ -0,0 +1,142 @@
+#!/usr/bin/env Rscript
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Analyse the runtime component memory footprint data.
+
+library(ggplot2)					# ability to plot nicely.
+							# So we can plot multiple graphs
+library(gridExtra)					# together.
+suppressMessages(suppressWarnings(library(ggpubr)))	# for ggtexttable.
+suppressMessages(library(jsonlite))			# to load the data.
+
+testnames=c(
+	"memory-footprint-inside-container"
+)
+
+data=c()
+rstats=c()
+rstats_rows=c()
+rstats_cols=c()
+
+# For each set of results
+for (currentdir in resultdirs) {
+	dirstats=c()
+	# For the two different types of memory footprint measures
+	for (testname in testnames) {
+		# R seems not to like double path slashes '//' ?
+		fname=paste(inputdir, currentdir, testname, '.json', sep="")
+		if ( !file.exists(fname)) {
+			warning(paste("Skipping non-existent file: ", fname))
+			next
+		}
+
+		# Derive the name from the test result dirname
+		datasetname=basename(currentdir)
+
+		# Import the data
+		fdata=fromJSON(fname)
+		fdata=fdata[[testname]]
+		# Copy the average result into a shorter, more accesible name
+		fdata$requested=fdata$Results$memrequest$Result
+		fdata$total=fdata$Results$memtotal$Result
+		fdata$free=fdata$Results$memfree$Result
+		fdata$avail=fdata$Results$memavailable$Result
+
+		# And lets work out what % we have 'lost' between the amount requested
+		# and the total the container actually sees.
+		fdata$lost=fdata$requested - fdata$total
+		fdata$pctotal= 100 * (fdata$lost/ fdata$requested)
+
+		fdata$Runtime=rep(datasetname, length(fdata$Result) )
+
+		# Store away the bits we need
+		data=rbind(data, data.frame(
+			Result=fdata$requested,
+			Type="requested",
+			Runtime=fdata$Runtime ))
+			
+		data=rbind(data, data.frame(
+			Result=fdata$total,
+			Type="total",
+			Runtime=fdata$Runtime ))
+			
+		data=rbind(data, data.frame(
+			Result=fdata$free,
+			Type="free",
+			Runtime=fdata$Runtime ))
+
+		data=rbind(data, data.frame(
+			Result=fdata$avail,
+			Type="avail",
+			Runtime=fdata$Runtime ))
+
+		data=rbind(data, data.frame(
+			Result=fdata$lost,
+			Type="lost",
+			Runtime=fdata$Runtime ))
+
+		data=rbind(data, data.frame(
+			Result=fdata$pctotal,
+			Type="% consumed",
+			Runtime=fdata$Runtime ))
+
+		# Store away some stats for the text table
+		dirstats=rbind(dirstats, round(fdata$requested, digits=2) )
+		dirstats=rbind(dirstats, round(fdata$total, digits=2) )
+		dirstats=rbind(dirstats, round(fdata$free, digits=2) )
+		dirstats=rbind(dirstats, round(fdata$avail, digits=2) )
+		dirstats=rbind(dirstats, round(fdata$lost, digits=2) )
+		dirstats=rbind(dirstats, round(fdata$pctotal, digits=2) )
+	}
+	rstats=cbind(rstats, dirstats)
+	rstats_cols=append(rstats_cols, datasetname)
+}
+
+rstats_rows=c("Requested", "Total", "Free", "Avail", "Consumed", "% Consumed")
+
+unts=c("Kb", "Kb", "Kb", "Kb", "Kb", "%")
+rstats=cbind(rstats, unts)
+rstats_cols=append(rstats_cols, "Units")
+
+# If we have only 2 sets of results, then we can do some more
+# stats math for the text table
+if (length(resultdirs) == 2) {
+	# This is a touch hard wired - but we *know* we only have two
+	# datasets...
+	diff=c()
+	# Just the first three entries - meaningless for the pctotal entry
+	for (n in 1:5) {
+		difference = (as.double(rstats[n,2]) - as.double(rstats[n,1]))
+		val = 100 * (difference/as.double(rstats[n,1]))
+		diff=rbind(diff, round(val, digits=2))
+	}
+
+	# Add a blank entry for the other entries
+	diff=rbind(diff, "")
+	rstats=cbind(rstats, diff)
+	rstats_cols=append(rstats_cols, "Diff %")
+}
+
+# Build us a text table of numerical results
+stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
+	theme=ttheme(base_size=10),
+	rows=rstats_rows, cols=rstats_cols
+	))
+
+bardata <- subset(data, Type %in% c("requested", "total", "free", "avail"))
+# plot how samples varioed over  'time'
+barplot <- ggplot() +
+	geom_bar(data=bardata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") +
+	xlab("Measure") +
+	ylab("Size (Kb)") +
+	ggtitle("In-container memory statistics") +
+	ylim(0, NA) +
+	theme(axis.text.x=element_text(angle=90))
+
+master_plot = grid.arrange(
+	barplot,
+	stats_plot,
+	nrow=2,
+	ncol=1 )
--- a/tests/metrics/report/report_dockerfile/memory-footprint.R
+++ b/tests/metrics/report/report_dockerfile/memory-footprint.R
@ -0,0 +1,121 @@
+#!/usr/bin/env Rscript
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Analyse the runtime component memory footprint data.
+
+library(ggplot2)					# ability to plot nicely.
+							# So we can plot multiple graphs
+library(gridExtra)					# together.
+suppressMessages(suppressWarnings(library(ggpubr)))	# for ggtexttable.
+suppressMessages(library(jsonlite))			# to load the data.
+
+testnames=c(
+	"memory-footprint",
+	"memory-footprint-ksm"
+)
+
+resultsfilesshort=c(
+	"noKSM",
+	"KSM"
+)
+
+data=c()
+rstats=c()
+rstats_names=c()
+
+# For each set of results
+for (currentdir in resultdirs) {
+	count=1
+	dirstats=c()
+	# For the two different types of memory footprint measures
+	for (testname in testnames) {
+		# R seems not to like double path slashes '//' ?
+		fname=paste(inputdir, currentdir, testname, '.json', sep="")
+		if ( !file.exists(fname)) {
+			warning(paste("Skipping non-existent file: ", fname))
+			next
+		}
+
+		# Derive the name from the test result dirname
+		datasetname=basename(currentdir)
+		datasetvariant=resultsfilesshort[count]
+
+		# Import the data
+		fdata=fromJSON(fname)
+		fdata=fdata[[testname]]
+		# Copy the average result into a shorter, more accesible name
+		fdata$Result=fdata$Results$average$Result
+		fdata$variant=rep(datasetvariant, length(fdata$Result) )
+		fdata$Runtime=rep(datasetname, length(fdata$Result) )
+		fdata$Count=seq_len(length(fdata$Result))
+
+		# Calculate some stats
+		fdata.mean = mean(fdata$Result)
+		fdata.min = min(fdata$Result)
+		fdata.max = max(fdata$Result)
+		fdata.sd = sd(fdata$Result)
+		fdata.cov = (fdata.sd / fdata.mean) * 100
+
+		# Store away the bits we need
+		data=rbind(data, data.frame(
+			Result=fdata$Result,
+			Count=fdata$Count,
+			Runtime=fdata$Runtime,
+			variant=fdata$variant ) )
+
+		# Store away some stats for the text table
+		dirstats[count]=round(fdata.mean, digits=2)
+
+		count = count + 1
+	}
+	rstats=rbind(rstats, dirstats)
+	rstats_names=rbind(rstats_names, datasetname)
+}
+
+rstats=cbind(rstats_names, rstats)
+unts=rep("Kb", length(resultdirs))
+
+# If we have only 2 sets of results, then we can do some more
+# stats math for the text table
+if (length(resultdirs) == 2) {
+	# This is a touch hard wired - but we *know* we only have two
+	# datasets...
+	diff=c("diff")
+	difference = (as.double(rstats[2,2]) - as.double(rstats[1,2]))
+	val = 100 * (difference/as.double(rstats[1,2]))
+	diff[2] = round(val, digits=2)
+	difference = (as.double(rstats[2,3]) - as.double(rstats[1,3]))
+	val = 100 * (difference/as.double(rstats[1,3]))
+	diff[3] = round(val, digits=2)
+	rstats=rbind(rstats, diff)
+
+	unts[3]="%"
+}
+
+rstats=cbind(rstats, unts)
+
+# Set up the text table headers
+colnames(rstats)=c("Results", resultsfilesshort, "Units")
+
+# Build us a text table of numerical results
+stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
+	theme=ttheme(base_size=10),
+	rows=NULL
+	))
+
+# plot how samples varioed over  'time'
+point_plot <- ggplot() +
+	geom_point( data=data, aes(Runtime, Result, color=variant), position=position_dodge(0.1)) +
+	xlab("Dataset") +
+	ylab("Size (Kb)") +
+	ggtitle("Average PSS footprint", subtitle="per container") +
+	ylim(0, NA) +
+	theme(axis.text.x=element_text(angle=90))
+
+master_plot = grid.arrange(
+	point_plot,
+	stats_plot,
+	nrow=1,
+	ncol=2 )
--- a/tests/metrics/report/report_dockerfile/metrics_report.Rmd
+++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd
@ -0,0 +1,63 @@
+---
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+---
+\pagebreak
+
+# Introduction
+This report compares the metrics between multiple sets of data generated from
+the [Kata Containers report generation scripts](https://github.com/kata-containers/kata-containers/tree/main/metrics/report/README.md).
+
+This report was generated using the data from the **`r resultdirs`** results directories.
+
+\pagebreak
+
+# Container scaling system footprint
+This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/footprint_data.sh)
+measures the system memory footprint impact whilst running an increasing number
+of containers. For this test, [KSM](https://en.wikipedia.org/wiki/Kernel_same-page_merging)
+ is enabled. The results show how system memory is consumed for different sized
+containers, and their average system memory footprint cost and density (how many
+containers you can fit per Gb) is calculated.
+
+```{r footprint-density, echo=FALSE, fig.cap="System Memory density"}
+test_name_extra="-ksm"
+source('footprint-density.R')
+rm(test_name_extra)
+```
+\pagebreak
+
+# Memory used inside container
+This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/memory_usage_inside_container.sh)
+measures the memory inside a container taken by the container runtime. It shows the difference between the amount of memory requested for the container, and the amount the container can actually 'see'.
+
+The *% Consumed* is the key row in the table, which compares the *Requested* against *Total* values.
+
+
+```{r mem-in-cont, echo=FALSE, fig.cap="System Memory density"}
+source('mem-in-cont.R')
+```
+\pagebreak
+
+# Container boot lifecycle times
+This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/time/launch_times.sh)
+uses the `date` command on the host and in the container, as well as data from the container
+kernel `dmesg`, to ascertain how long different phases of the create/boot/run/delete
+Docker container lifecycle take for the first launched container.
+
+To decode the stats table, the prefixes are 'to(`2`)' and '`i`n'. The suffixes are '`k`ernel', '`w`orkload' and '`q`uit'. 'tot' is the total time for a complete container start-to-finished cycle.
+
+```{r lifecycle-time, echo=FALSE, fig.cap="Execution lifecycle times"}
+source('lifecycle-time.R')
+```
+\pagebreak
+
+# Test setup details
+
+This table describes the test system details, as derived from the information contained
+in the test results files.
+
+
+```{r dut, echo=FALSE, fig.cap="System configuration details"}
+source('dut-details.R')
--- a/tests/metrics/report/report_dockerfile/network-cpu.R
+++ b/tests/metrics/report/report_dockerfile/network-cpu.R
@ -0,0 +1,132 @@
+#!/usr/bin/env Rscript
+# Copyright (c) 2018-2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Analyse the runtime component memory footprint data.
+
+library(ggplot2)					# ability to plot nicely.
+							# So we can plot multiple graphs
+library(gridExtra)					# together.
+suppressMessages(suppressWarnings(library(ggpubr)))	# for ggtexttable.
+suppressMessages(library(jsonlite))			# to load the data.
+
+testnames=c(
+	"cpu-information"
+)
+
+resultsfilesshort=c(
+	"CPU"
+)
+
+data=c()
+rstats=c()
+rstats_rows=c()
+rstats_cols=c()
+
+Gdenom = (1000.0 * 1000.0 * 1000.0)
+
+# For each set of results
+for (currentdir in resultdirs) {
+	dirstats=c()
+	# For the two different types of memory footprint measures
+	for (testname in testnames) {
+		# R seems not to like double path slashes '//' ?
+		fname=paste(inputdir, currentdir, testname, '.json', sep="")
+		if ( !file.exists(fname)) {
+			warning(paste("Skipping non-existent file: ", fname))
+			next
+		}
+
+		# Derive the name from the test result dirname
+		datasetname=basename(currentdir)
+		datasetvariant=resultsfilesshort[count]
+
+		# Import the data
+		fdata=fromJSON(fname)
+		fdata=fdata[[testname]]
+		# Copy the average result into a shorter, more accesible name
+		fdata$ips=fdata$Results$"instructions per cycle"$Result
+		fdata$Gcycles=fdata$Results$cycles$Result / Gdenom
+		fdata$Ginstructions=fdata$Results$instructions$Result / Gdenom
+		fdata$variant=rep(datasetvariant, length(fdata$Result) )
+		fdata$Runtime=rep(datasetname, length(fdata$Result) )
+
+		# Store away the bits we need
+		data=rbind(data, data.frame(
+			Result=fdata$ips,
+			Type="ips",
+			Runtime=fdata$Runtime,
+			variant=fdata$variant ) )
+		data=rbind(data, data.frame(
+			Result=fdata$Gcycles,
+			Type="Gcycles",
+			Runtime=fdata$Runtime,
+			variant=fdata$variant ) )
+		data=rbind(data, data.frame(
+			Result=fdata$Ginstructions,
+			Type="Ginstr",
+			Runtime=fdata$Runtime,
+			variant=fdata$variant ) )
+
+		# Store away some stats for the text table
+		dirstats=rbind(dirstats, round(fdata$ips, digits=2) )
+		dirstats=rbind(dirstats, round(fdata$Gcycles, digits=2) )
+		dirstats=rbind(dirstats, round(fdata$Ginstructions, digits=2) )
+	}
+	rstats=cbind(rstats, dirstats)
+	rstats_cols=append(rstats_cols, datasetname)
+}
+
+rstats_rows=c("IPS", "GCycles", "GInstr")
+
+unts=c("Ins/Cyc", "G", "G")
+rstats=cbind(rstats, unts)
+rstats_cols=append(rstats_cols, "Units")
+
+# If we have only 2 sets of results, then we can do some more
+# stats math for the text table
+if (length(resultdirs) == 2) {
+	# This is a touch hard wired - but we *know* we only have two
+	# datasets...
+	diff=c()
+	for (n in 1:3) {
+		difference = (as.double(rstats[n,2]) - as.double(rstats[n,1]))
+		val = 100 * (difference/as.double(rstats[n,1]))
+		diff=rbind(diff, round(val, digits=2))
+	}
+	rstats=cbind(rstats, diff)
+	rstats_cols=append(rstats_cols, "Diff %")
+}
+
+# Build us a text table of numerical results
+stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
+	theme=ttheme(base_size=10),
+	rows=rstats_rows, cols=rstats_cols
+	))
+
+# plot how samples varioed over  'time'
+ipsdata <- subset(data, Type %in% c("ips"))
+ips_plot <- ggplot() +
+	geom_bar(data=ipsdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") +
+	xlab("Measure") +
+	ylab("IPS") +
+	ggtitle("Instructions Per Cycle") +
+	ylim(0, NA) +
+	theme(axis.text.x=element_text(angle=90))
+
+cycdata <- subset(data, Type %in% c("Gcycles", "Ginstr"))
+cycles_plot <- ggplot() +
+	geom_bar(data=cycdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge", show.legend=FALSE) +
+	xlab("Measure") +
+	ylab("Count (G)") +
+	ggtitle("Cycles and Instructions") +
+	ylim(0, NA) +
+	theme(axis.text.x=element_text(angle=90))
+
+master_plot = grid.arrange(
+	ips_plot,
+	cycles_plot,
+	stats_plot,
+	nrow=2,
+	ncol=2 )