From 69781fc027ec86371b77c92de3d58253ba396d13 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 16:25:14 +0000 Subject: [PATCH 01/10] metrics: Add metrics report script This PR adds metrics report script for kata metrics. Fixes #7782 Signed-off-by: Gabriela Cervantes --- tests/metrics/report/makereport.sh | 97 ++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100755 tests/metrics/report/makereport.sh diff --git a/tests/metrics/report/makereport.sh b/tests/metrics/report/makereport.sh new file mode 100755 index 0000000000..3926d7567f --- /dev/null +++ b/tests/metrics/report/makereport.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Take the data found in subdirectories of the metrics 'results' directory, +# and turn them into a PDF report. Use a Dockerfile containing all the tooling +# and scripts we need to do that. + +set -e + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../lib/common.bash" + +IMAGE="${IMAGE:-metrics-report}" +DOCKERFILE="${SCRIPT_PATH}/report_dockerfile/Dockerfile" + +HOST_INPUT_DIR="${SCRIPT_PATH}/../results" +R_ENV_FILE="${HOST_INPUT_DIR}/Env.R" +HOST_OUTPUT_DIR="${SCRIPT_PATH}/output" + +GUEST_INPUT_DIR="/inputdir/" +GUEST_OUTPUT_DIR="/outputdir/" + +# If in debugging mode, we also map in the scripts dir so you can +# dynamically edit and re-load them at the R prompt +HOST_SCRIPT_DIR="${SCRIPT_PATH}/report_dockerfile" +GUEST_SCRIPT_DIR="/scripts/" + +setup() { + echo "Checking subdirectories" + check_subdir="$(ls -dx ${HOST_INPUT_DIR}/*/ 2> /dev/null | wc -l)" + if [ $check_subdir -eq 0 ]; then + die "No subdirs in [${HOST_INPUT_DIR}] to read results from." + fi + + echo "Checking Dockerfile" + check_dockerfiles_images "$IMAGE" "$DOCKERFILE" + + mkdir -p "$HOST_OUTPUT_DIR" && true + + echo "inputdir=\"${GUEST_INPUT_DIR}\"" > ${R_ENV_FILE} + echo "outputdir=\"${GUEST_OUTPUT_DIR}\"" >> ${R_ENV_FILE} + + # A bit of a hack to get an R syntax'd list of dirs to process + # Also, need it as not host-side dir path - so short relative names + resultdirs="$(cd ${HOST_INPUT_DIR}; ls -dx */)" + resultdirslist=$(echo ${resultdirs} | sed 's/ \+/", "/g') + echo "resultdirs=c(" >> ${R_ENV_FILE} + echo " \"${resultdirslist}\"" >> ${R_ENV_FILE} + echo ")" >> ${R_ENV_FILE} +} + +run() { + docker run -ti --rm -v ${HOST_INPUT_DIR}:${GUEST_INPUT_DIR} -v ${HOST_OUTPUT_DIR}:${GUEST_OUTPUT_DIR} ${extra_volumes} ${IMAGE} ${extra_command} + ls -la ${HOST_OUTPUT_DIR}/* +} + +help() { + usage=$(cat << EOF +Usage: $0 [-h] [options] + Description: + This script generates a metrics report document + from the results directory one level up in the + directory tree (../results). + Options: + -d, Run in debug (interactive) mode + -h, Print this help +EOF +) + echo "$usage" +} + +main() { + + local OPTIND + while getopts "d" opt;do + case ${opt} in + d) + # In debug mode, run a shell instead of the default report generation + extra_command="bash" + extra_volumes="-v ${HOST_SCRIPT_DIR}:${GUEST_SCRIPT_DIR}" + ;; + ?) + # parse failure + help + die "Failed to parse arguments" + ;; + esac + done + shift $((OPTIND-1)) + + setup + run +} + +main "$@" From 08812074d12d6980193b4e167784fecc58ac6b8c Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 16:28:32 +0000 Subject: [PATCH 02/10] metrics: Add report dockerfile This PR adds the report dockerfile for kata metrics. Signed-off-by: Gabriela Cervantes --- .../report/report_dockerfile/Dockerfile | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/metrics/report/report_dockerfile/Dockerfile diff --git a/tests/metrics/report/report_dockerfile/Dockerfile b/tests/metrics/report/report_dockerfile/Dockerfile new file mode 100644 index 0000000000..74c5026524 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/Dockerfile @@ -0,0 +1,44 @@ +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Set up an Ubuntu image with the components needed to generate a +# metrics report. That includes: +# - R +# - The R 'tidyverse' +# - pandoc +# - The report generation R files and helper scripts + +# Start with the base rocker tidyverse. +# We would have used the 'verse' base, that already has some of the docs processing +# installed, but I could not figure out how to add in the extra bits we needed to +# the lite tex version is uses. +# Here we specify a tag for base image instead of using latest to let it free from +# the risk from the update of latest base image. +FROM rocker/tidyverse:3.6.0 + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.2" + +# Without this some of the package installs stop to try and ask questions... +ENV DEBIAN_FRONTEND=noninteractive + +# Install the extra doc processing parts we need for our Rmarkdown PDF flow. +RUN apt-get update -qq && \ + apt-get install -y --no-install-recommends \ + texlive-latex-base \ + texlive-fonts-recommended \ + latex-xcolor && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists + +# Install the extra R packages we need. +RUN install2.r --error --deps TRUE \ + gridExtra \ + ggpubr + +# Pull in our actual worker scripts +COPY . /scripts + +# By default generate the report +CMD ["/scripts/genreport.sh"] From fce2487971a27f217f613859e38d00aca95e4dda Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 16:45:22 +0000 Subject: [PATCH 03/10] metrics: Add metrics report R files This PR adds the metrics report R files. Signed-off-by: Gabriela Cervantes --- .../report/report_dockerfile/dut-details.R | 122 ++++++++ .../report/report_dockerfile/fio-reads.R | 269 ++++++++++++++++++ .../report/report_dockerfile/fio-writes.R | 260 +++++++++++++++++ .../report_dockerfile/footprint-density.R | 111 ++++++++ .../report/report_dockerfile/lifecycle-time.R | 157 ++++++++++ .../report/report_dockerfile/mem-in-cont.R | 142 +++++++++ .../report_dockerfile/memory-footprint.R | 121 ++++++++ .../report/report_dockerfile/network-cpu.R | 132 +++++++++ 8 files changed, 1314 insertions(+) create mode 100644 tests/metrics/report/report_dockerfile/dut-details.R create mode 100644 tests/metrics/report/report_dockerfile/fio-reads.R create mode 100644 tests/metrics/report/report_dockerfile/fio-writes.R create mode 100644 tests/metrics/report/report_dockerfile/footprint-density.R create mode 100644 tests/metrics/report/report_dockerfile/lifecycle-time.R create mode 100644 tests/metrics/report/report_dockerfile/mem-in-cont.R create mode 100644 tests/metrics/report/report_dockerfile/memory-footprint.R create mode 100644 tests/metrics/report/report_dockerfile/network-cpu.R diff --git a/tests/metrics/report/report_dockerfile/dut-details.R b/tests/metrics/report/report_dockerfile/dut-details.R new file mode 100644 index 0000000000..3b16c8c2b7 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/dut-details.R @@ -0,0 +1,122 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display details for the 'Device Under Test', for all data sets being processed. + +suppressMessages(suppressWarnings(library(tidyr))) # for gather(). +library(tibble) +suppressMessages(suppressWarnings(library(plyr))) # rbind.fill + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +# A list of all the known results files we might find the information inside. +resultsfiles=c( + "boot-times.json", + "memory-footprint.json", + "memory-footprint-ksm.json", + "memory-footprint-inside-container.json" + ) + +data=c() +stats=c() +stats_names=c() + +# For each set of results +for (currentdir in resultdirs) { + count=1 + dirstats=c() + for (resultsfile in resultsfiles) { + fname=paste(inputdir, currentdir, resultsfile, sep="/") + if ( !file.exists(fname)) { + #warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + + if (length(fdata$'kata-env') != 0 ) { + # We have kata-runtime data + dirstats=tibble("Run Ver"=as.character(fdata$'kata-env'$Runtime$Version$Semver)) + dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'kata-env'$Runtime$Version$Commit)) + + pver=as.character(fdata$'kata-env'$Proxy$Version) + pver=sub("^[[:alpha:][:blank:]-]*", "", pver) + # uncomment if you want to drop the commit sha as well + #pver=sub("([[:digit:].]*).*", "\\1", pver) + dirstats=cbind(dirstats, "Proxy Ver"=pver) + + # Trim the shim string + sver=as.character(fdata$'kata-env'$Shim$Version) + sver=sub("^[[:alpha:][:blank:]-]*", "", sver) + # uncomment if you want to drop the commit sha as well + #sver=sub("([[:digit:].]*).*", "\\1", sver) + dirstats=cbind(dirstats, "Shim Ver"=sver) + + # Default QEMU ver string is far too long and noisy - trim. + hver=as.character(fdata$'kata-env'$Hypervisor$Version) + hver=sub("^[[:alpha:][:blank:]]*", "", hver) + hver=sub("([[:digit:].]*).*", "\\1", hver) + dirstats=cbind(dirstats, "Hyper Ver"=hver) + + iver=as.character(fdata$'kata-env'$Image$Path) + iver=sub("^[[:alpha:]/-]*", "", iver) + dirstats=cbind(dirstats, "Image Ver"=iver) + + kver=as.character(fdata$'kata-env'$Kernel$Path) + kver=sub("^[[:alpha:]/-]*", "", kver) + dirstats=cbind(dirstats, "Guest Krnl"=kver) + + dirstats=cbind(dirstats, "Host arch"=as.character(fdata$'kata-env'$Host$Architecture)) + dirstats=cbind(dirstats, "Host Distro"=as.character(fdata$'kata-env'$Host$Distro$Name)) + dirstats=cbind(dirstats, "Host DistVer"=as.character(fdata$'kata-env'$Host$Distro$Version)) + dirstats=cbind(dirstats, "Host Model"=as.character(fdata$'kata-env'$Host$CPU$Model)) + dirstats=cbind(dirstats, "Host Krnl"=as.character(fdata$'kata-env'$Host$Kernel)) + dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime)) + + break + } else { + if (length(fdata$'runc-env') != 0 ) { + dirstats=tibble("Run Ver"=as.character(fdata$'runc-env'$Version$Semver)) + dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'runc-env'$Version$Commit)) + dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime)) + } else { + dirstats=tibble("runtime"="Unknown") + } + break + } + } + + if ( length(dirstats) == 0 ) { + warning(paste("No valid data found for directory ", currentdir)) + } + + # use plyr rbind.fill so we can combine disparate version info frames + stats=rbind.fill(stats, dirstats) + stats_names=rbind(stats_names, datasetname) +} + +rownames(stats) = stats_names + +# Rotate the tibble so we get data dirs as the columns +spun_stats = as_tibble(cbind(What=names(stats), t(stats))) + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(spun_stats, check.names=FALSE), + theme=ttheme(base_size=6), + rows=NULL + )) + +# It may seem odd doing a grid of 1x1, but it should ensure we get a uniform format and +# layout to match the other charts and tables in the report. +master_plot = grid.arrange( + stats_plot, + nrow=1, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/fio-reads.R b/tests/metrics/report/report_dockerfile/fio-reads.R new file mode 100644 index 0000000000..5f3f1c212a --- /dev/null +++ b/tests/metrics/report/report_dockerfile/fio-reads.R @@ -0,0 +1,269 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display details for `fio` random read storage IO tests. + + +library(ggplot2) # ability to plot nicely +library(gridExtra) # So we can plot multiple graphs together +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable +suppressMessages(library(jsonlite)) # to load the data +suppressMessages(suppressWarnings(library(tidyr))) # for gather +library(tibble) + +testnames=c( + "fio-randread-128", + "fio-randread-256", + "fio-randread-512", + "fio-randread-1k", + "fio-randread-2k", + "fio-randread-4k", + "fio-randread-8k", + "fio-randread-16k", + "fio-randread-32k", + "fio-randread-64k" + ) + +data2=c() +all_ldata=c() +all_ldata2=c() +stats=c() +rstats=c() +rstats_names=c() + +# Where to store up the stats for the tables +read_bw_stats=c() +read_iops_stats=c() +read_lat95_stats=c() +read_lat99_stats=c() + +# For each set of results +for (currentdir in resultdirs) { + bw_dirstats=c() + iops_dirstats=c() + lat95_dirstats=c() + lat99_dirstats=c() + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + for (testname in testnames) { + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + #warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Import the data + fdata=fromJSON(fname) + # De-ref the test named unique data + fdata=fdata[[testname]] + + blocksize=fdata$Raw$'global options'$bs + + # Extract the latency data - it comes as a table of percentiles, so + # we have to do a little work... + clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$read$clat_ns$percentile) + + # Generate a clat data set with 'clean' percentile numbers so + # we can sensibly plot it later on. + clat2=clat + colnames(clat2)<-sub("clat_ns.", "", colnames(clat2)) + colnames(clat2)<-sub("0000", "", colnames(clat2)) + ldata2=gather(clat2) + colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile" + colnames(ldata2)[colnames(ldata2)=="value"] <- "ms" + ldata2$ms=ldata2$ms/1000000 #ns->ms + ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile))) + ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile))) + + # Pull the 95 and 99 percentile numbers for the boxplot + # Plotting all values for all runtimes and blocksizes is just way too + # noisy to make a meaninful picture, so we use this subset. + # Our values fall more in the range of ms... + pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000) + pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile))) + pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000) + pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile))) + ldata=rbind(pc95data, pc99data) + ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile))) + + # We want total bandwidth, so that is the sum of the bandwidths + # from all the read 'jobs'. + mdata=data.frame(read_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$read$bw)/1024)) + mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$read$iops))) + mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "read_bw_mps"]) )) + mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "read_bw_mps"]) )) + + # Extract the stats tables + bw_dirstats=rbind(bw_dirstats, round(mdata$read_bw_mps, digits=1)) + # Rowname hack to get the blocksize recorded + rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize + + iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1)) + rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize + + # And do the 95 and 99 percentiles as tables as well + lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1)) + rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize + lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1)) + rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize + + # Collect up as sets across all files and runtimes. + data2=rbind(data2, mdata) + all_ldata=rbind(all_ldata, ldata) + all_ldata2=rbind(all_ldata2, ldata2) + } + + # Collect up for each dir we process into a column + read_bw_stats=cbind(read_bw_stats, bw_dirstats) + colnames(read_bw_stats)[ncol(read_bw_stats)]=datasetname + + read_iops_stats=cbind(read_iops_stats, iops_dirstats) + colnames(read_iops_stats)[ncol(read_iops_stats)]=datasetname + + read_lat95_stats=cbind(read_lat95_stats, lat95_dirstats) + colnames(read_lat95_stats)[ncol(read_lat95_stats)]=datasetname + read_lat99_stats=cbind(read_lat99_stats, lat99_dirstats) + colnames(read_lat99_stats)[ncol(read_lat99_stats)]=datasetname +} + +# To get a nice looking table, we need to extract the rownames into their +# own column +read_bw_stats=cbind(Bandwidth=rownames(read_bw_stats), read_bw_stats) +read_bw_stats=cbind(read_bw_stats, Units=rep("MB/s", nrow(read_bw_stats))) + +read_iops_stats=cbind(IOPS=rownames(read_iops_stats), read_iops_stats) +read_iops_stats=cbind(read_iops_stats, Units=rep("IOP/s", nrow(read_iops_stats))) + +read_lat95_stats=cbind('lat 95pc'=rownames(read_lat95_stats), read_lat95_stats) +read_lat95_stats=cbind(read_lat95_stats, Units=rep("ms", nrow(read_lat95_stats))) +read_lat99_stats=cbind('lat 99pc'=rownames(read_lat99_stats), read_lat99_stats) +read_lat99_stats=cbind(read_lat99_stats, Units=rep("ms", nrow(read_lat99_stats))) + +# Bandwidth line plot +read_bw_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, read_bw_mps, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Read total bandwidth") + + xlab("Blocksize") + + ylab("Bandwidth (MiB/s)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# IOPS line plot +read_iops_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Read total IOPS") + + xlab("Blocksize") + + ylab("IOPS") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# 95 and 99 percentile box plot +read_clat_box_plot <- ggplot() + + geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) + + stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") + + ylim(0, NA) + + ggtitle("Random Read completion latency", subtitle="95&99 percentiles, boxplot over jobs") + + xlab("Blocksize") + + ylab("Latency (ms)") + + theme(axis.text.x=element_text(angle=90)) + + # Use the 'paired' colour matrix as we are setting these up as pairs of + # 95 and 99 percentiles, and it is much easier to visually group those to + # each runtime if we use this colourmap. + scale_colour_brewer(palette="Paired") +# it would be nice to use the same legend theme as the other plots on this +# page, but because of the number of entries it tends to flow off the picture. +# theme( +# axis.text.x=element_text(angle=90), +# legend.position=c(0.35,0.8), +# legend.title=element_text(size=5), +# legend.text=element_text(size=5), +# legend.background = element_rect(fill=alpha('blue', 0.2)) +# ) + +# As the boxplot is actually quite hard to interpret, also show a linegraph +# of all the percentiles for a single blocksize. +which_blocksize='4k' +clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ") +single_blocksize=subset(all_ldata2, blocksize==which_blocksize) +clat_line=aggregate( + single_blocksize$ms, + by=list( + percentile=single_blocksize$percentile, + blocksize=single_blocksize$blocksize, + runtime=single_blocksize$runtime + ), + FUN=mean +) + +clat_line$percentile=as.numeric(clat_line$percentile) + +read_clat_line_plot <- ggplot() + + geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Read completion latency percentiles", subtitle=clat_line_subtitle) + + xlab("Percentile") + + ylab("Time (ms)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# Output the pretty pictures +graphics_plot = grid.arrange( + read_bw_line_plot, + read_iops_line_plot, + read_clat_box_plot, + read_clat_line_plot, + nrow=2, + ncol=2 ) + +# A bit of an odd tweak to force a pagebreak between the pictures and +# the tables. This only works because we have a `results='asis'` in the Rmd +# R fragment. +cat("\n\n\\pagebreak\n") + +read_bw_stats_plot = suppressWarnings(ggtexttable(read_bw_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +read_iops_stats_plot = suppressWarnings(ggtexttable(read_iops_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +read_lat95_stats_plot = suppressWarnings(ggtexttable(read_lat95_stats, + theme=ttheme(base_size=10), + rows=NULL + )) +read_lat99_stats_plot = suppressWarnings(ggtexttable(read_lat99_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +# and then the statistics tables +stats_plot = grid.arrange( + read_bw_stats_plot, + read_iops_stats_plot, + read_lat95_stats_plot, + read_lat99_stats_plot, + nrow=4, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/fio-writes.R b/tests/metrics/report/report_dockerfile/fio-writes.R new file mode 100644 index 0000000000..5fa2231766 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/fio-writes.R @@ -0,0 +1,260 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display details for 'fio' random writes storage IO tests. + + +library(ggplot2) # ability to plot nicely +library(gridExtra) # So we can plot multiple graphs together +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable +suppressMessages(library(jsonlite)) # to load the data +suppressMessages(suppressWarnings(library(tidyr))) # for gather +library(tibble) + +testnames=c( + "fio-randwrite-128", + "fio-randwrite-256", + "fio-randwrite-512", + "fio-randwrite-1k", + "fio-randwrite-2k", + "fio-randwrite-4k", + "fio-randwrite-8k", + "fio-randwrite-16k", + "fio-randwrite-32k", + "fio-randwrite-64k" + ) + +data2=c() +all_ldata=c() +all_ldata2=c() +stats=c() +rstats=c() +rstats_names=c() + + +# Where to store up the stats for the tables +write_bw_stats=c() +write_iops_stats=c() +write_lat95_stats=c() +write_lat99_stats=c() + +# For each set of results +for (currentdir in resultdirs) { + bw_dirstats=c() + iops_dirstats=c() + lat95_dirstats=c() + lat99_dirstats=c() + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + for (testname in testnames) { + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + #warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Import the data + fdata=fromJSON(fname) + # De-nest the test specific named data + fdata=fdata[[testname]] + + blocksize=fdata$Raw$'global options'$bs + + # Extract the latency data - it comes as a table of percentiles, so + # we have to do a little work... + clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$write$clat_ns$percentile) + + # Generate a clat data set with 'clean' percentile numbers so + # we can sensibly plot it later on. + clat2=clat + colnames(clat2)<-sub("clat_ns.", "", colnames(clat2)) + colnames(clat2)<-sub("0000", "", colnames(clat2)) + ldata2=gather(clat2) + colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile" + colnames(ldata2)[colnames(ldata2)=="value"] <- "ms" + ldata2$ms=ldata2$ms/1000000 #ns->ms + ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile))) + ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile))) + + # Pull the 95 and 99 percentiles for the boxplot diagram. + # Our values fall more in the range of ms... + pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000) + pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile))) + pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000) + pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile))) + ldata=rbind(pc95data, pc99data) + ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile))) + + # We want total bandwidth, so that is the sum of the bandwidths + # from all the write 'jobs'. + mdata=data.frame(write_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$write$bw)/1024)) + mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$write$iops))) + mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "write_bw_mps"]) )) + mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "write_bw_mps"]) )) + + # Extract the stats tables + bw_dirstats=rbind(bw_dirstats, round(mdata$write_bw_mps, digits=1)) + # Rowname hack to get the blocksize recorded + rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize + + iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1)) + rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize + + # And do the 95 and 99 percentiles as tables as well + lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1)) + rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize + lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1)) + rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize + + # Store away as single sets + data2=rbind(data2, mdata) + all_ldata=rbind(all_ldata, ldata) + all_ldata2=rbind(all_ldata2, ldata2) + } + + # Collect up for each dir we process into a column + write_bw_stats=cbind(write_bw_stats, bw_dirstats) + colnames(write_bw_stats)[ncol(write_bw_stats)]=datasetname + + write_iops_stats=cbind(write_iops_stats, iops_dirstats) + colnames(write_iops_stats)[ncol(write_iops_stats)]=datasetname + + write_lat95_stats=cbind(write_lat95_stats, lat95_dirstats) + colnames(write_lat95_stats)[ncol(write_lat95_stats)]=datasetname + write_lat99_stats=cbind(write_lat99_stats, lat99_dirstats) + colnames(write_lat99_stats)[ncol(write_lat99_stats)]=datasetname +} + +# To get a nice looking table, we need to extract the rownames into their +# own column +write_bw_stats=cbind(Bandwidth=rownames(write_bw_stats), write_bw_stats) +write_bw_stats=cbind(write_bw_stats, Units=rep("MB/s", nrow(write_bw_stats))) + +write_iops_stats=cbind(IOPS=rownames(write_iops_stats), write_iops_stats) +write_iops_stats=cbind(write_iops_stats, Units=rep("IOP/s", nrow(write_iops_stats))) + +write_lat95_stats=cbind('lat 95pc'=rownames(write_lat95_stats), write_lat95_stats) +write_lat95_stats=cbind(write_lat95_stats, Units=rep("ms", nrow(write_lat95_stats))) +write_lat99_stats=cbind('lat 99pc'=rownames(write_lat99_stats), write_lat99_stats) +write_lat99_stats=cbind(write_lat99_stats, Units=rep("ms", nrow(write_lat99_stats))) + +# lineplot of total bandwidth across blocksizes. +write_bw_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, write_bw_mps, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Write total bandwidth") + + xlab("Blocksize") + + ylab("Bandwidth (MiB/s)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# lineplot of IOPS across blocksizes +write_iops_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Write total IOPS") + + xlab("Blocksize") + + ylab("IOPS") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# boxplot of 95 and 99 percentiles covering the parallel jobs, shown across +# the blocksizes. +write_clat_box_plot <- ggplot() + + geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) + + stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") + + ylim(0, NA) + + ggtitle("Random Write completion latency", subtitle="95&99 Percentiles, boxplot across jobs") + + xlab("Blocksize") + + ylab("Latency (ms)") + + theme(axis.text.x=element_text(angle=90)) + + # Use the 'paired' colour matrix as we are setting these up as pairs of + # 95 and 99 percentiles, and it is much easier to visually group those to + # each runtime if we use this colourmap. + scale_colour_brewer(palette="Paired") + + +# completion latency line plot across the percentiles, for a specific blocksize only +# as otherwise the graph would be far too noisy. +which_blocksize='4k' +clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ") +single_blocksize=subset(all_ldata2, blocksize==which_blocksize) +clat_line=aggregate( + single_blocksize$ms, + by=list( + percentile=single_blocksize$percentile, + blocksize=single_blocksize$blocksize, + runtime=single_blocksize$runtime + ), + FUN=mean +) + +clat_line$percentile=as.numeric(clat_line$percentile) + +write_clat_line_plot <- ggplot() + + geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Write completion latency percentiles", subtitle=clat_line_subtitle) + + xlab("Percentile") + + ylab("Time (ms)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +master_plot = grid.arrange( + write_bw_line_plot, + write_iops_line_plot, + write_clat_box_plot, + write_clat_line_plot, + nrow=2, + ncol=2 ) + +# A bit of an odd tweak to force a pagebreak between the pictures and +# the tables. This only works because we have a `results='asis'` in the Rmd +# R fragment. +cat("\n\n\\pagebreak\n") + +write_bw_stats_plot = suppressWarnings(ggtexttable(write_bw_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +write_iops_stats_plot = suppressWarnings(ggtexttable(write_iops_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +write_lat95_stats_plot = suppressWarnings(ggtexttable(write_lat95_stats, + theme=ttheme(base_size=10), + rows=NULL + )) +write_lat99_stats_plot = suppressWarnings(ggtexttable(write_lat99_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +# and then the statistics tables +stats_plot = grid.arrange( + write_bw_stats_plot, + write_iops_stats_plot, + write_lat95_stats_plot, + write_lat99_stats_plot, + nrow=4, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/footprint-density.R b/tests/metrics/report/report_dockerfile/footprint-density.R new file mode 100644 index 0000000000..3539dc9cc9 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/footprint-density.R @@ -0,0 +1,111 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Show system memory reduction, and hence container 'density', by analysing the +# scaling footprint data results and the 'system free' memory. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + paste("footprint-busybox.*", test_name_extra, sep=""), + paste("footprint-mysql.*", test_name_extra, sep=""), + paste("footprint-elasticsearch.*", test_name_extra, sep="") +) + +data=c() +stats=c() +rstats=c() +rstats_names=c() + +for (currentdir in resultdirs) { + count=1 + dirstats=c() + for (testname in testnames) { + matchdir=paste(inputdir, currentdir, sep="") + matchfile=paste(testname, '\\.json', sep="") + files=list.files(matchdir, pattern=matchfile) + if ( length(files) == 0 ) { + #warning(paste("Pattern [", matchdir, "/", matchfile, "] matched nothing")) + } + for (ffound in files) { + fname=paste(inputdir, currentdir, ffound, sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + # De-nest the test name specific data + shortname=substr(ffound, 1, nchar(ffound)-nchar(".json")) + fdata=fdata[[shortname]] + + payload=fdata$Config$payload + testname=paste(datasetname, payload) + + cdata=data.frame(avail_mb=as.numeric(fdata$Results$system$avail)/(1024*1024)) + cdata=cbind(cdata, avail_decr=as.numeric(fdata$Results$system$avail_decr)) + cdata=cbind(cdata, count=seq_len(length(cdata[, "avail_mb"]))) + cdata=cbind(cdata, testname=rep(testname, length(cdata[, "avail_mb"]) )) + cdata=cbind(cdata, payload=rep(payload, length(cdata[, "avail_mb"]) )) + cdata=cbind(cdata, dataset=rep(datasetname, length(cdata[, "avail_mb"]) )) + + # Gather our statistics + sdata=data.frame(num_containers=length(cdata[, "avail_mb"])) + # Pick out the last avail_decr value - which in theory should be + # the most we have consumed... + sdata=cbind(sdata, mem_consumed=cdata[, "avail_decr"][length(cdata[, "avail_decr"])]) + sdata=cbind(sdata, avg_bytes_per_c=sdata$mem_consumed / sdata$num_containers) + sdata=cbind(sdata, runtime=testname) + + # Store away as a single set + data=rbind(data, cdata) + stats=rbind(stats, sdata) + + s = c( + "Test"=testname, + "n"=sdata$num_containers, + "size"=(sdata$mem_consumed) / 1024, + "kb/n"=round((sdata$mem_consumed / sdata$num_containers) / 1024, digits=1), + "n/Gb"= round((1*1024*1024*1024) / (sdata$mem_consumed / sdata$num_containers), digits=1) + ) + + rstats=rbind(rstats, s) + count = count + 1 + } + } +} + +# Set up the text table headers +colnames(rstats)=c("Test", "n", "Tot_Kb", "avg_Kb", "n_per_Gb") + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=NULL + )) + +# plot how samples varioed over 'time' +line_plot <- ggplot() + + geom_point( data=data, aes(count, avail_mb, group=testname, color=payload, shape=dataset)) + + geom_line( data=data, aes(count, avail_mb, group=testname, color=payload)) + + xlab("Containers") + + ylab("System Avail (Mb)") + + ggtitle("System Memory free") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + line_plot, + stats_plot, + nrow=2, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/lifecycle-time.R b/tests/metrics/report/report_dockerfile/lifecycle-time.R new file mode 100644 index 0000000000..4969f23c97 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/lifecycle-time.R @@ -0,0 +1,157 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display how long the various phases of a container lifecycle (run, execute, die etc. +# take. + +library(ggplot2) # ability to plot nicely. +suppressMessages(suppressWarnings(library(tidyr))) # for gather(). + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "boot-times" +) + +data=c() +stats=c() +rstats=c() +rstats_names=c() + +# For each set of results +for (currentdir in resultdirs) { + count=1 + dirstats=c() + for (testname in testnames) { + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + # De-nest the test specific name + fdata=fdata[[testname]] + + cdata=data.frame(workload=as.numeric(fdata$Results$'to-workload'$Result)) + cdata=cbind(cdata, quit=as.numeric(fdata$Results$'to-quit'$Result)) + + cdata=cbind(cdata, tokernel=as.numeric(fdata$Results$'to-kernel'$Result)) + cdata=cbind(cdata, inkernel=as.numeric(fdata$Results$'in-kernel'$Result)) + cdata=cbind(cdata, total=as.numeric(fdata$Results$'total'$Result)) + + cdata=cbind(cdata, count=seq_len(length(cdata[,"workload"]))) + cdata=cbind(cdata, runtime=rep(datasetname, length(cdata[, "workload"]) )) + + # Calculate some stats for total time + sdata=data.frame(workload_mean=mean(cdata$workload)) + sdata=cbind(sdata, workload_min=min(cdata$workload)) + sdata=cbind(sdata, workload_max=max(cdata$workload)) + sdata=cbind(sdata, workload_sd=sd(cdata$workload)) + sdata=cbind(sdata, workload_cov=((sdata$workload_sd / sdata$workload_mean) * 100)) + sdata=cbind(sdata, runtime=datasetname) + + sdata=cbind(sdata, quit_mean = mean(cdata$quit)) + sdata=cbind(sdata, quit_min = min(cdata$quit)) + sdata=cbind(sdata, quit_max = max(cdata$quit)) + sdata=cbind(sdata, quit_sd = sd(cdata$quit)) + sdata=cbind(sdata, quit_cov = (sdata$quit_sd / sdata$quit_mean) * 100) + + sdata=cbind(sdata, tokernel_mean = mean(cdata$tokernel)) + sdata=cbind(sdata, inkernel_mean = mean(cdata$inkernel)) + sdata=cbind(sdata, total_mean = mean(cdata$total)) + + # Store away as a single set + data=rbind(data, cdata) + stats=rbind(stats, sdata) + + # Store away some stats for the text table + dirstats[count]=round(sdata$tokernel_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$inkernel_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$workload_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$quit_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$total_mean, digits=2) + count = count + 1 + } + rstats=rbind(rstats, dirstats) + rstats_names=rbind(rstats_names, datasetname) +} + +unts=c("s", "s", "s", "s", "s") +rstats=rbind(rstats, unts) +rstats_names=rbind(rstats_names, "Units") + + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c() + for( i in 1:5) { + difference = as.double(rstats[2,i]) - as.double(rstats[1,i]) + val = 100 * (difference/as.double(rstats[1,i])) + diff[i] = paste(round(val, digits=2), "%", sep=" ") + } + + rstats=rbind(rstats, diff) + rstats_names=rbind(rstats_names, "Diff") +} + +rstats=cbind(rstats_names, rstats) + +# Set up the text table headers +colnames(rstats)=c("Results", "2k", "ik", "2w", "2q", "tot") + + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats, check.names=FALSE), + theme=ttheme(base_size=8), + rows=NULL + )) + +# plot how samples varioed over 'time' +line_plot <- ggplot() + + geom_line( data=data, aes(count, workload, color=runtime)) + + geom_smooth( data=data, aes(count, workload, color=runtime), se=FALSE, method="loess") + + xlab("Iteration") + + ylab("Time (s)") + + ggtitle("Boot to workload", subtitle="First container") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +boot_boxplot <- ggplot() + + geom_boxplot( data=data, aes(runtime, workload, color=runtime), show.legend=FALSE) + + ylim(0, NA) + + ylab("Time (s)") + +# convert the stats to a long format so we can more easily do a side-by-side barplot +longstats <- gather(stats, measure, value, workload_mean, quit_mean, inkernel_mean, tokernel_mean, total_mean) + +bar_plot <- ggplot() + + geom_bar( data=longstats, aes(measure, value, fill=runtime), stat="identity", position="dodge", show.legend=FALSE) + + xlab("Phase") + + ylab("Time (s)") + + ggtitle("Lifecycle phase times", subtitle="Mean") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + bar_plot, + line_plot, + stats_plot, + boot_boxplot, + nrow=2, + ncol=2 ) diff --git a/tests/metrics/report/report_dockerfile/mem-in-cont.R b/tests/metrics/report/report_dockerfile/mem-in-cont.R new file mode 100644 index 0000000000..8dede6b997 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/mem-in-cont.R @@ -0,0 +1,142 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Analyse the runtime component memory footprint data. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "memory-footprint-inside-container" +) + +data=c() +rstats=c() +rstats_rows=c() +rstats_cols=c() + +# For each set of results +for (currentdir in resultdirs) { + dirstats=c() + # For the two different types of memory footprint measures + for (testname in testnames) { + # R seems not to like double path slashes '//' ? + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + fdata=fdata[[testname]] + # Copy the average result into a shorter, more accesible name + fdata$requested=fdata$Results$memrequest$Result + fdata$total=fdata$Results$memtotal$Result + fdata$free=fdata$Results$memfree$Result + fdata$avail=fdata$Results$memavailable$Result + + # And lets work out what % we have 'lost' between the amount requested + # and the total the container actually sees. + fdata$lost=fdata$requested - fdata$total + fdata$pctotal= 100 * (fdata$lost/ fdata$requested) + + fdata$Runtime=rep(datasetname, length(fdata$Result) ) + + # Store away the bits we need + data=rbind(data, data.frame( + Result=fdata$requested, + Type="requested", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$total, + Type="total", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$free, + Type="free", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$avail, + Type="avail", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$lost, + Type="lost", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$pctotal, + Type="% consumed", + Runtime=fdata$Runtime )) + + # Store away some stats for the text table + dirstats=rbind(dirstats, round(fdata$requested, digits=2) ) + dirstats=rbind(dirstats, round(fdata$total, digits=2) ) + dirstats=rbind(dirstats, round(fdata$free, digits=2) ) + dirstats=rbind(dirstats, round(fdata$avail, digits=2) ) + dirstats=rbind(dirstats, round(fdata$lost, digits=2) ) + dirstats=rbind(dirstats, round(fdata$pctotal, digits=2) ) + } + rstats=cbind(rstats, dirstats) + rstats_cols=append(rstats_cols, datasetname) +} + +rstats_rows=c("Requested", "Total", "Free", "Avail", "Consumed", "% Consumed") + +unts=c("Kb", "Kb", "Kb", "Kb", "Kb", "%") +rstats=cbind(rstats, unts) +rstats_cols=append(rstats_cols, "Units") + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c() + # Just the first three entries - meaningless for the pctotal entry + for (n in 1:5) { + difference = (as.double(rstats[n,2]) - as.double(rstats[n,1])) + val = 100 * (difference/as.double(rstats[n,1])) + diff=rbind(diff, round(val, digits=2)) + } + + # Add a blank entry for the other entries + diff=rbind(diff, "") + rstats=cbind(rstats, diff) + rstats_cols=append(rstats_cols, "Diff %") +} + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=rstats_rows, cols=rstats_cols + )) + +bardata <- subset(data, Type %in% c("requested", "total", "free", "avail")) +# plot how samples varioed over 'time' +barplot <- ggplot() + + geom_bar(data=bardata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") + + xlab("Measure") + + ylab("Size (Kb)") + + ggtitle("In-container memory statistics") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + barplot, + stats_plot, + nrow=2, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/memory-footprint.R b/tests/metrics/report/report_dockerfile/memory-footprint.R new file mode 100644 index 0000000000..4d5f53129d --- /dev/null +++ b/tests/metrics/report/report_dockerfile/memory-footprint.R @@ -0,0 +1,121 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Analyse the runtime component memory footprint data. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "memory-footprint", + "memory-footprint-ksm" +) + +resultsfilesshort=c( + "noKSM", + "KSM" +) + +data=c() +rstats=c() +rstats_names=c() + +# For each set of results +for (currentdir in resultdirs) { + count=1 + dirstats=c() + # For the two different types of memory footprint measures + for (testname in testnames) { + # R seems not to like double path slashes '//' ? + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + datasetvariant=resultsfilesshort[count] + + # Import the data + fdata=fromJSON(fname) + fdata=fdata[[testname]] + # Copy the average result into a shorter, more accesible name + fdata$Result=fdata$Results$average$Result + fdata$variant=rep(datasetvariant, length(fdata$Result) ) + fdata$Runtime=rep(datasetname, length(fdata$Result) ) + fdata$Count=seq_len(length(fdata$Result)) + + # Calculate some stats + fdata.mean = mean(fdata$Result) + fdata.min = min(fdata$Result) + fdata.max = max(fdata$Result) + fdata.sd = sd(fdata$Result) + fdata.cov = (fdata.sd / fdata.mean) * 100 + + # Store away the bits we need + data=rbind(data, data.frame( + Result=fdata$Result, + Count=fdata$Count, + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + + # Store away some stats for the text table + dirstats[count]=round(fdata.mean, digits=2) + + count = count + 1 + } + rstats=rbind(rstats, dirstats) + rstats_names=rbind(rstats_names, datasetname) +} + +rstats=cbind(rstats_names, rstats) +unts=rep("Kb", length(resultdirs)) + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c("diff") + difference = (as.double(rstats[2,2]) - as.double(rstats[1,2])) + val = 100 * (difference/as.double(rstats[1,2])) + diff[2] = round(val, digits=2) + difference = (as.double(rstats[2,3]) - as.double(rstats[1,3])) + val = 100 * (difference/as.double(rstats[1,3])) + diff[3] = round(val, digits=2) + rstats=rbind(rstats, diff) + + unts[3]="%" +} + +rstats=cbind(rstats, unts) + +# Set up the text table headers +colnames(rstats)=c("Results", resultsfilesshort, "Units") + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=NULL + )) + +# plot how samples varioed over 'time' +point_plot <- ggplot() + + geom_point( data=data, aes(Runtime, Result, color=variant), position=position_dodge(0.1)) + + xlab("Dataset") + + ylab("Size (Kb)") + + ggtitle("Average PSS footprint", subtitle="per container") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + point_plot, + stats_plot, + nrow=1, + ncol=2 ) diff --git a/tests/metrics/report/report_dockerfile/network-cpu.R b/tests/metrics/report/report_dockerfile/network-cpu.R new file mode 100644 index 0000000000..b6df50b304 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/network-cpu.R @@ -0,0 +1,132 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Analyse the runtime component memory footprint data. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "cpu-information" +) + +resultsfilesshort=c( + "CPU" +) + +data=c() +rstats=c() +rstats_rows=c() +rstats_cols=c() + +Gdenom = (1000.0 * 1000.0 * 1000.0) + +# For each set of results +for (currentdir in resultdirs) { + dirstats=c() + # For the two different types of memory footprint measures + for (testname in testnames) { + # R seems not to like double path slashes '//' ? + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + datasetvariant=resultsfilesshort[count] + + # Import the data + fdata=fromJSON(fname) + fdata=fdata[[testname]] + # Copy the average result into a shorter, more accesible name + fdata$ips=fdata$Results$"instructions per cycle"$Result + fdata$Gcycles=fdata$Results$cycles$Result / Gdenom + fdata$Ginstructions=fdata$Results$instructions$Result / Gdenom + fdata$variant=rep(datasetvariant, length(fdata$Result) ) + fdata$Runtime=rep(datasetname, length(fdata$Result) ) + + # Store away the bits we need + data=rbind(data, data.frame( + Result=fdata$ips, + Type="ips", + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + data=rbind(data, data.frame( + Result=fdata$Gcycles, + Type="Gcycles", + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + data=rbind(data, data.frame( + Result=fdata$Ginstructions, + Type="Ginstr", + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + + # Store away some stats for the text table + dirstats=rbind(dirstats, round(fdata$ips, digits=2) ) + dirstats=rbind(dirstats, round(fdata$Gcycles, digits=2) ) + dirstats=rbind(dirstats, round(fdata$Ginstructions, digits=2) ) + } + rstats=cbind(rstats, dirstats) + rstats_cols=append(rstats_cols, datasetname) +} + +rstats_rows=c("IPS", "GCycles", "GInstr") + +unts=c("Ins/Cyc", "G", "G") +rstats=cbind(rstats, unts) +rstats_cols=append(rstats_cols, "Units") + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c() + for (n in 1:3) { + difference = (as.double(rstats[n,2]) - as.double(rstats[n,1])) + val = 100 * (difference/as.double(rstats[n,1])) + diff=rbind(diff, round(val, digits=2)) + } + rstats=cbind(rstats, diff) + rstats_cols=append(rstats_cols, "Diff %") +} + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=rstats_rows, cols=rstats_cols + )) + +# plot how samples varioed over 'time' +ipsdata <- subset(data, Type %in% c("ips")) +ips_plot <- ggplot() + + geom_bar(data=ipsdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") + + xlab("Measure") + + ylab("IPS") + + ggtitle("Instructions Per Cycle") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +cycdata <- subset(data, Type %in% c("Gcycles", "Ginstr")) +cycles_plot <- ggplot() + + geom_bar(data=cycdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge", show.legend=FALSE) + + xlab("Measure") + + ylab("Count (G)") + + ggtitle("Cycles and Instructions") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + ips_plot, + cycles_plot, + stats_plot, + nrow=2, + ncol=2 ) From 878d1a2e7dab4a912aa5f99c830d09cfa5bca8ce Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 16:50:32 +0000 Subject: [PATCH 04/10] metrics: Generate PNGs alongside the PDF report This PR generates the PNGs for the kata metrics PDF report. Signed-off-by: Gabriela Cervantes --- .../report/report_dockerfile/genreport.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/metrics/report/report_dockerfile/genreport.sh diff --git a/tests/metrics/report/report_dockerfile/genreport.sh b/tests/metrics/report/report_dockerfile/genreport.sh new file mode 100644 index 0000000000..5689c93d0f --- /dev/null +++ b/tests/metrics/report/report_dockerfile/genreport.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +REPORTNAME="metrics_report.pdf" + +cd scripts + +Rscript --slave -e "library(knitr);knit('pdf.Rmd')" +Rscript --slave -e "library(knitr);pandoc('pdf.md', format='latex')" + +Rscript --slave -e "library(knitr);knit('html.Rmd')" +Rscript --slave -e "library(knitr);pandoc('html.md', format='html')" + +cp /scripts/pdf.pdf /outputdir/${REPORTNAME} +cp /scripts/figure/*.png /outputdir/ +echo "PNGs of graphs and tables can be found in the output directory." +echo "The report, named ${REPORTNAME}, can be found in the output directory" From 139ffd4f758ea282aa8e3a5312c6151c20c50362 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:43:06 +0000 Subject: [PATCH 05/10] metrics: Add report file titles This PR adds report file titles for kata metrics. Signed-off-by: Gabriela Cervantes --- .../metrics/report/report_dockerfile/html.Rmd | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/metrics/report/report_dockerfile/html.Rmd diff --git a/tests/metrics/report/report_dockerfile/html.Rmd b/tests/metrics/report/report_dockerfile/html.Rmd new file mode 100644 index 0000000000..86229f335c --- /dev/null +++ b/tests/metrics/report/report_dockerfile/html.Rmd @@ -0,0 +1,23 @@ +--- +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +title: "Kata Containers metrics report" +author: "Auto generated" +date: "`r format(Sys.time(), '%d %B, %Y')`" +output: + html_document: +urlcolor: blue +--- + +```{r setup, include=FALSE} +#Set these opts to get pdf images which fit into beamer slides better +opts_chunk$set(dev = 'png') +# Pick up any env set by the invoking script, such as the root dir of the +# results data tree +source("/inputdir/Env.R") +``` + +```{r child = 'metrics_report.Rmd'} +``` From 8e6d4e6f3d4b2b799fb3fe338b9e69a6a70be6dd Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:45:36 +0000 Subject: [PATCH 06/10] metrics: Add metrics reportgen This PR adds metrics reportgen for kata metrics. Signed-off-by: Gabriela Cervantes --- .../report/report_dockerfile/metrics_report.Rmd | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tests/metrics/report/report_dockerfile/metrics_report.Rmd diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd new file mode 100644 index 0000000000..6a6cdbbe81 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd @@ -0,0 +1,15 @@ +--- +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +--- +\pagebreak + +# Introduction +This report compares the metrics between multiple sets of data generated from +the [Kata Containers report generation scripts](https://github.com/kata-containers/tests/tree/main/metrics/report/README.md). + +This report was generated using the data from the **`r resultdirs`** results directories. + +\pagebreak + From 79fbb9d2430d69c862d0bf2ddd7cae2f559572ff Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:51:27 +0000 Subject: [PATCH 07/10] metrics: Add scaling system footprint in metrics report This PR adds scaling system footprint in metrics report. Signed-off-by: Gabriela Cervantes --- .../report/report_dockerfile/metrics_report.Rmd | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd index 6a6cdbbe81..27f4e2b1d7 100644 --- a/tests/metrics/report/report_dockerfile/metrics_report.Rmd +++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd @@ -7,9 +7,23 @@ # Introduction This report compares the metrics between multiple sets of data generated from -the [Kata Containers report generation scripts](https://github.com/kata-containers/tests/tree/main/metrics/report/README.md). +the [Kata Containers report generation scripts](https://github.com/kata-containers/kata-containers/tree/main/metrics/report/README.md). This report was generated using the data from the **`r resultdirs`** results directories. \pagebreak +# Container scaling system footprint +This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/footprint_data.sh) +measures the system memory footprint impact whilst running an increasing number +of containers. For this test, [KSM](https://en.wikipedia.org/wiki/Kernel_same-page_merging) + is enabled. The results show how system memory is consumed for different sized +containers, and their average system memory footprint cost and density (how many +containers you can fit per Gb) is calculated. + +```{r footprint-density, echo=FALSE, fig.cap="System Memory density"} +test_name_extra="-ksm" +source('footprint-density.R') +rm(test_name_extra) +``` +\pagebreak From 3b0d6538f2c3eae0919b0ae85d377c924d5c9022 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:53:17 +0000 Subject: [PATCH 08/10] metrics: Add memory inside container to metrics report This PR adds memory inside container to metrics report. Signed-off-by: Gabriela Cervantes --- .../report/report_dockerfile/metrics_report.Rmd | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd index 27f4e2b1d7..68ff50923f 100644 --- a/tests/metrics/report/report_dockerfile/metrics_report.Rmd +++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd @@ -27,3 +27,15 @@ source('footprint-density.R') rm(test_name_extra) ``` \pagebreak + +# Memory used inside container +This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/memory_usage_inside_container.sh) +measures the memory inside a container taken by the container runtime. It shows the difference between the amount of memory requested for the container, and the amount the container can actually 'see'. + +The *% Consumed* is the key row in the table, which compares the *Requested* against *Total* values. + + +```{r mem-in-cont, echo=FALSE, fig.cap="System Memory density"} +source('mem-in-cont.R') +``` +\pagebreak From 17dc1b976044f57c048ff76613b4e91e8dd43186 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:55:44 +0000 Subject: [PATCH 09/10] metrics: Add boot lifecycle times to metrics report This PR adds the boot lifecycle times to metrics report. Signed-off-by: Gabriela Cervantes --- .../report/report_dockerfile/metrics_report.Rmd | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd index 68ff50923f..dc4dd7b0a3 100644 --- a/tests/metrics/report/report_dockerfile/metrics_report.Rmd +++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd @@ -39,3 +39,16 @@ The *% Consumed* is the key row in the table, which compares the *Requested* aga source('mem-in-cont.R') ``` \pagebreak + +# Container boot lifecycle times +This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/time/launch_times.sh) +uses the `date` command on the host and in the container, as well as data from the container +kernel `dmesg`, to ascertain how long different phases of the create/boot/run/delete +Docker container lifecycle take for the first launched container. + +To decode the stats table, the prefixes are 'to(`2`)' and '`i`n'. The suffixes are '`k`ernel', '`w`orkload' and '`q`uit'. 'tot' is the total time for a complete container start-to-finished cycle. + +```{r lifecycle-time, echo=FALSE, fig.cap="Execution lifecycle times"} +source('lifecycle-time.R') +``` +\pagebreak From 7e364716dd658ef77f2df2944ad84b0d34e856a9 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:56:53 +0000 Subject: [PATCH 10/10] metrics: Add test setup details to metrics report This PR adds test setup details to metrics report. Signed-off-by: Gabriela Cervantes --- .../metrics/report/report_dockerfile/metrics_report.Rmd | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd index dc4dd7b0a3..9dca2b49f6 100644 --- a/tests/metrics/report/report_dockerfile/metrics_report.Rmd +++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd @@ -52,3 +52,12 @@ To decode the stats table, the prefixes are 'to(`2`)' and '`i`n'. The suffixes a source('lifecycle-time.R') ``` \pagebreak + +# Test setup details + +This table describes the test system details, as derived from the information contained +in the test results files. + + +```{r dut, echo=FALSE, fig.cap="System configuration details"} +source('dut-details.R')