Merge pull request #7783 from GabyCT/topic/makereport

metrics: Add metrics report script
This commit is contained in:
Fabiano Fidêncio 2023-08-30 17:11:39 +02:00 committed by GitHub
commit aa2b51a831
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 1560 additions and 0 deletions

View File

@ -0,0 +1,97 @@
#!/bin/bash
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Take the data found in subdirectories of the metrics 'results' directory,
# and turn them into a PDF report. Use a Dockerfile containing all the tooling
# and scripts we need to do that.
set -e
SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
source "${SCRIPT_PATH}/../lib/common.bash"
IMAGE="${IMAGE:-metrics-report}"
DOCKERFILE="${SCRIPT_PATH}/report_dockerfile/Dockerfile"
HOST_INPUT_DIR="${SCRIPT_PATH}/../results"
R_ENV_FILE="${HOST_INPUT_DIR}/Env.R"
HOST_OUTPUT_DIR="${SCRIPT_PATH}/output"
GUEST_INPUT_DIR="/inputdir/"
GUEST_OUTPUT_DIR="/outputdir/"
# If in debugging mode, we also map in the scripts dir so you can
# dynamically edit and re-load them at the R prompt
HOST_SCRIPT_DIR="${SCRIPT_PATH}/report_dockerfile"
GUEST_SCRIPT_DIR="/scripts/"
setup() {
echo "Checking subdirectories"
check_subdir="$(ls -dx ${HOST_INPUT_DIR}/*/ 2> /dev/null | wc -l)"
if [ $check_subdir -eq 0 ]; then
die "No subdirs in [${HOST_INPUT_DIR}] to read results from."
fi
echo "Checking Dockerfile"
check_dockerfiles_images "$IMAGE" "$DOCKERFILE"
mkdir -p "$HOST_OUTPUT_DIR" && true
echo "inputdir=\"${GUEST_INPUT_DIR}\"" > ${R_ENV_FILE}
echo "outputdir=\"${GUEST_OUTPUT_DIR}\"" >> ${R_ENV_FILE}
# A bit of a hack to get an R syntax'd list of dirs to process
# Also, need it as not host-side dir path - so short relative names
resultdirs="$(cd ${HOST_INPUT_DIR}; ls -dx */)"
resultdirslist=$(echo ${resultdirs} | sed 's/ \+/", "/g')
echo "resultdirs=c(" >> ${R_ENV_FILE}
echo " \"${resultdirslist}\"" >> ${R_ENV_FILE}
echo ")" >> ${R_ENV_FILE}
}
run() {
docker run -ti --rm -v ${HOST_INPUT_DIR}:${GUEST_INPUT_DIR} -v ${HOST_OUTPUT_DIR}:${GUEST_OUTPUT_DIR} ${extra_volumes} ${IMAGE} ${extra_command}
ls -la ${HOST_OUTPUT_DIR}/*
}
help() {
usage=$(cat << EOF
Usage: $0 [-h] [options]
Description:
This script generates a metrics report document
from the results directory one level up in the
directory tree (../results).
Options:
-d, Run in debug (interactive) mode
-h, Print this help
EOF
)
echo "$usage"
}
main() {
local OPTIND
while getopts "d" opt;do
case ${opt} in
d)
# In debug mode, run a shell instead of the default report generation
extra_command="bash"
extra_volumes="-v ${HOST_SCRIPT_DIR}:${GUEST_SCRIPT_DIR}"
;;
?)
# parse failure
help
die "Failed to parse arguments"
;;
esac
done
shift $((OPTIND-1))
setup
run
}
main "$@"

View File

@ -0,0 +1,44 @@
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Set up an Ubuntu image with the components needed to generate a
# metrics report. That includes:
# - R
# - The R 'tidyverse'
# - pandoc
# - The report generation R files and helper scripts
# Start with the base rocker tidyverse.
# We would have used the 'verse' base, that already has some of the docs processing
# installed, but I could not figure out how to add in the extra bits we needed to
# the lite tex version is uses.
# Here we specify a tag for base image instead of using latest to let it free from
# the risk from the update of latest base image.
FROM rocker/tidyverse:3.6.0
# Version of the Dockerfile
LABEL DOCKERFILE_VERSION="1.2"
# Without this some of the package installs stop to try and ask questions...
ENV DEBIAN_FRONTEND=noninteractive
# Install the extra doc processing parts we need for our Rmarkdown PDF flow.
RUN apt-get update -qq && \
apt-get install -y --no-install-recommends \
texlive-latex-base \
texlive-fonts-recommended \
latex-xcolor && \
apt-get clean && \
rm -rf /var/lib/apt/lists
# Install the extra R packages we need.
RUN install2.r --error --deps TRUE \
gridExtra \
ggpubr
# Pull in our actual worker scripts
COPY . /scripts
# By default generate the report
CMD ["/scripts/genreport.sh"]

View File

@ -0,0 +1,122 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Display details for the 'Device Under Test', for all data sets being processed.
suppressMessages(suppressWarnings(library(tidyr))) # for gather().
library(tibble)
suppressMessages(suppressWarnings(library(plyr))) # rbind.fill
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
# A list of all the known results files we might find the information inside.
resultsfiles=c(
"boot-times.json",
"memory-footprint.json",
"memory-footprint-ksm.json",
"memory-footprint-inside-container.json"
)
data=c()
stats=c()
stats_names=c()
# For each set of results
for (currentdir in resultdirs) {
count=1
dirstats=c()
for (resultsfile in resultsfiles) {
fname=paste(inputdir, currentdir, resultsfile, sep="/")
if ( !file.exists(fname)) {
#warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
# Import the data
fdata=fromJSON(fname)
if (length(fdata$'kata-env') != 0 ) {
# We have kata-runtime data
dirstats=tibble("Run Ver"=as.character(fdata$'kata-env'$Runtime$Version$Semver))
dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'kata-env'$Runtime$Version$Commit))
pver=as.character(fdata$'kata-env'$Proxy$Version)
pver=sub("^[[:alpha:][:blank:]-]*", "", pver)
# uncomment if you want to drop the commit sha as well
#pver=sub("([[:digit:].]*).*", "\\1", pver)
dirstats=cbind(dirstats, "Proxy Ver"=pver)
# Trim the shim string
sver=as.character(fdata$'kata-env'$Shim$Version)
sver=sub("^[[:alpha:][:blank:]-]*", "", sver)
# uncomment if you want to drop the commit sha as well
#sver=sub("([[:digit:].]*).*", "\\1", sver)
dirstats=cbind(dirstats, "Shim Ver"=sver)
# Default QEMU ver string is far too long and noisy - trim.
hver=as.character(fdata$'kata-env'$Hypervisor$Version)
hver=sub("^[[:alpha:][:blank:]]*", "", hver)
hver=sub("([[:digit:].]*).*", "\\1", hver)
dirstats=cbind(dirstats, "Hyper Ver"=hver)
iver=as.character(fdata$'kata-env'$Image$Path)
iver=sub("^[[:alpha:]/-]*", "", iver)
dirstats=cbind(dirstats, "Image Ver"=iver)
kver=as.character(fdata$'kata-env'$Kernel$Path)
kver=sub("^[[:alpha:]/-]*", "", kver)
dirstats=cbind(dirstats, "Guest Krnl"=kver)
dirstats=cbind(dirstats, "Host arch"=as.character(fdata$'kata-env'$Host$Architecture))
dirstats=cbind(dirstats, "Host Distro"=as.character(fdata$'kata-env'$Host$Distro$Name))
dirstats=cbind(dirstats, "Host DistVer"=as.character(fdata$'kata-env'$Host$Distro$Version))
dirstats=cbind(dirstats, "Host Model"=as.character(fdata$'kata-env'$Host$CPU$Model))
dirstats=cbind(dirstats, "Host Krnl"=as.character(fdata$'kata-env'$Host$Kernel))
dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime))
break
} else {
if (length(fdata$'runc-env') != 0 ) {
dirstats=tibble("Run Ver"=as.character(fdata$'runc-env'$Version$Semver))
dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'runc-env'$Version$Commit))
dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime))
} else {
dirstats=tibble("runtime"="Unknown")
}
break
}
}
if ( length(dirstats) == 0 ) {
warning(paste("No valid data found for directory ", currentdir))
}
# use plyr rbind.fill so we can combine disparate version info frames
stats=rbind.fill(stats, dirstats)
stats_names=rbind(stats_names, datasetname)
}
rownames(stats) = stats_names
# Rotate the tibble so we get data dirs as the columns
spun_stats = as_tibble(cbind(What=names(stats), t(stats)))
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(spun_stats, check.names=FALSE),
theme=ttheme(base_size=6),
rows=NULL
))
# It may seem odd doing a grid of 1x1, but it should ensure we get a uniform format and
# layout to match the other charts and tables in the report.
master_plot = grid.arrange(
stats_plot,
nrow=1,
ncol=1 )

View File

@ -0,0 +1,269 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Display details for `fio` random read storage IO tests.
library(ggplot2) # ability to plot nicely
library(gridExtra) # So we can plot multiple graphs together
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable
suppressMessages(library(jsonlite)) # to load the data
suppressMessages(suppressWarnings(library(tidyr))) # for gather
library(tibble)
testnames=c(
"fio-randread-128",
"fio-randread-256",
"fio-randread-512",
"fio-randread-1k",
"fio-randread-2k",
"fio-randread-4k",
"fio-randread-8k",
"fio-randread-16k",
"fio-randread-32k",
"fio-randread-64k"
)
data2=c()
all_ldata=c()
all_ldata2=c()
stats=c()
rstats=c()
rstats_names=c()
# Where to store up the stats for the tables
read_bw_stats=c()
read_iops_stats=c()
read_lat95_stats=c()
read_lat99_stats=c()
# For each set of results
for (currentdir in resultdirs) {
bw_dirstats=c()
iops_dirstats=c()
lat95_dirstats=c()
lat99_dirstats=c()
# Derive the name from the test result dirname
datasetname=basename(currentdir)
for (testname in testnames) {
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
#warning(paste("Skipping non-existent file: ", fname))
next
}
# Import the data
fdata=fromJSON(fname)
# De-ref the test named unique data
fdata=fdata[[testname]]
blocksize=fdata$Raw$'global options'$bs
# Extract the latency data - it comes as a table of percentiles, so
# we have to do a little work...
clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$read$clat_ns$percentile)
# Generate a clat data set with 'clean' percentile numbers so
# we can sensibly plot it later on.
clat2=clat
colnames(clat2)<-sub("clat_ns.", "", colnames(clat2))
colnames(clat2)<-sub("0000", "", colnames(clat2))
ldata2=gather(clat2)
colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile"
colnames(ldata2)[colnames(ldata2)=="value"] <- "ms"
ldata2$ms=ldata2$ms/1000000 #ns->ms
ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile)))
ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile)))
# Pull the 95 and 99 percentile numbers for the boxplot
# Plotting all values for all runtimes and blocksizes is just way too
# noisy to make a meaninful picture, so we use this subset.
# Our values fall more in the range of ms...
pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000)
pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile)))
pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000)
pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile)))
ldata=rbind(pc95data, pc99data)
ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile)))
# We want total bandwidth, so that is the sum of the bandwidths
# from all the read 'jobs'.
mdata=data.frame(read_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$read$bw)/1024))
mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$read$iops)))
mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "read_bw_mps"]) ))
mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "read_bw_mps"]) ))
# Extract the stats tables
bw_dirstats=rbind(bw_dirstats, round(mdata$read_bw_mps, digits=1))
# Rowname hack to get the blocksize recorded
rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize
iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1))
rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize
# And do the 95 and 99 percentiles as tables as well
lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1))
rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize
lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1))
rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize
# Collect up as sets across all files and runtimes.
data2=rbind(data2, mdata)
all_ldata=rbind(all_ldata, ldata)
all_ldata2=rbind(all_ldata2, ldata2)
}
# Collect up for each dir we process into a column
read_bw_stats=cbind(read_bw_stats, bw_dirstats)
colnames(read_bw_stats)[ncol(read_bw_stats)]=datasetname
read_iops_stats=cbind(read_iops_stats, iops_dirstats)
colnames(read_iops_stats)[ncol(read_iops_stats)]=datasetname
read_lat95_stats=cbind(read_lat95_stats, lat95_dirstats)
colnames(read_lat95_stats)[ncol(read_lat95_stats)]=datasetname
read_lat99_stats=cbind(read_lat99_stats, lat99_dirstats)
colnames(read_lat99_stats)[ncol(read_lat99_stats)]=datasetname
}
# To get a nice looking table, we need to extract the rownames into their
# own column
read_bw_stats=cbind(Bandwidth=rownames(read_bw_stats), read_bw_stats)
read_bw_stats=cbind(read_bw_stats, Units=rep("MB/s", nrow(read_bw_stats)))
read_iops_stats=cbind(IOPS=rownames(read_iops_stats), read_iops_stats)
read_iops_stats=cbind(read_iops_stats, Units=rep("IOP/s", nrow(read_iops_stats)))
read_lat95_stats=cbind('lat 95pc'=rownames(read_lat95_stats), read_lat95_stats)
read_lat95_stats=cbind(read_lat95_stats, Units=rep("ms", nrow(read_lat95_stats)))
read_lat99_stats=cbind('lat 99pc'=rownames(read_lat99_stats), read_lat99_stats)
read_lat99_stats=cbind(read_lat99_stats, Units=rep("ms", nrow(read_lat99_stats)))
# Bandwidth line plot
read_bw_line_plot <- ggplot() +
geom_line( data=data2, aes(blocksize, read_bw_mps, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Read total bandwidth") +
xlab("Blocksize") +
ylab("Bandwidth (MiB/s)") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
# IOPS line plot
read_iops_line_plot <- ggplot() +
geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Read total IOPS") +
xlab("Blocksize") +
ylab("IOPS") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
# 95 and 99 percentile box plot
read_clat_box_plot <- ggplot() +
geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) +
stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") +
ylim(0, NA) +
ggtitle("Random Read completion latency", subtitle="95&99 percentiles, boxplot over jobs") +
xlab("Blocksize") +
ylab("Latency (ms)") +
theme(axis.text.x=element_text(angle=90)) +
# Use the 'paired' colour matrix as we are setting these up as pairs of
# 95 and 99 percentiles, and it is much easier to visually group those to
# each runtime if we use this colourmap.
scale_colour_brewer(palette="Paired")
# it would be nice to use the same legend theme as the other plots on this
# page, but because of the number of entries it tends to flow off the picture.
# theme(
# axis.text.x=element_text(angle=90),
# legend.position=c(0.35,0.8),
# legend.title=element_text(size=5),
# legend.text=element_text(size=5),
# legend.background = element_rect(fill=alpha('blue', 0.2))
# )
# As the boxplot is actually quite hard to interpret, also show a linegraph
# of all the percentiles for a single blocksize.
which_blocksize='4k'
clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ")
single_blocksize=subset(all_ldata2, blocksize==which_blocksize)
clat_line=aggregate(
single_blocksize$ms,
by=list(
percentile=single_blocksize$percentile,
blocksize=single_blocksize$blocksize,
runtime=single_blocksize$runtime
),
FUN=mean
)
clat_line$percentile=as.numeric(clat_line$percentile)
read_clat_line_plot <- ggplot() +
geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Read completion latency percentiles", subtitle=clat_line_subtitle) +
xlab("Percentile") +
ylab("Time (ms)") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
# Output the pretty pictures
graphics_plot = grid.arrange(
read_bw_line_plot,
read_iops_line_plot,
read_clat_box_plot,
read_clat_line_plot,
nrow=2,
ncol=2 )
# A bit of an odd tweak to force a pagebreak between the pictures and
# the tables. This only works because we have a `results='asis'` in the Rmd
# R fragment.
cat("\n\n\\pagebreak\n")
read_bw_stats_plot = suppressWarnings(ggtexttable(read_bw_stats,
theme=ttheme(base_size=10),
rows=NULL
))
read_iops_stats_plot = suppressWarnings(ggtexttable(read_iops_stats,
theme=ttheme(base_size=10),
rows=NULL
))
read_lat95_stats_plot = suppressWarnings(ggtexttable(read_lat95_stats,
theme=ttheme(base_size=10),
rows=NULL
))
read_lat99_stats_plot = suppressWarnings(ggtexttable(read_lat99_stats,
theme=ttheme(base_size=10),
rows=NULL
))
# and then the statistics tables
stats_plot = grid.arrange(
read_bw_stats_plot,
read_iops_stats_plot,
read_lat95_stats_plot,
read_lat99_stats_plot,
nrow=4,
ncol=1 )

View File

@ -0,0 +1,260 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Display details for 'fio' random writes storage IO tests.
library(ggplot2) # ability to plot nicely
library(gridExtra) # So we can plot multiple graphs together
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable
suppressMessages(library(jsonlite)) # to load the data
suppressMessages(suppressWarnings(library(tidyr))) # for gather
library(tibble)
testnames=c(
"fio-randwrite-128",
"fio-randwrite-256",
"fio-randwrite-512",
"fio-randwrite-1k",
"fio-randwrite-2k",
"fio-randwrite-4k",
"fio-randwrite-8k",
"fio-randwrite-16k",
"fio-randwrite-32k",
"fio-randwrite-64k"
)
data2=c()
all_ldata=c()
all_ldata2=c()
stats=c()
rstats=c()
rstats_names=c()
# Where to store up the stats for the tables
write_bw_stats=c()
write_iops_stats=c()
write_lat95_stats=c()
write_lat99_stats=c()
# For each set of results
for (currentdir in resultdirs) {
bw_dirstats=c()
iops_dirstats=c()
lat95_dirstats=c()
lat99_dirstats=c()
# Derive the name from the test result dirname
datasetname=basename(currentdir)
for (testname in testnames) {
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
#warning(paste("Skipping non-existent file: ", fname))
next
}
# Import the data
fdata=fromJSON(fname)
# De-nest the test specific named data
fdata=fdata[[testname]]
blocksize=fdata$Raw$'global options'$bs
# Extract the latency data - it comes as a table of percentiles, so
# we have to do a little work...
clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$write$clat_ns$percentile)
# Generate a clat data set with 'clean' percentile numbers so
# we can sensibly plot it later on.
clat2=clat
colnames(clat2)<-sub("clat_ns.", "", colnames(clat2))
colnames(clat2)<-sub("0000", "", colnames(clat2))
ldata2=gather(clat2)
colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile"
colnames(ldata2)[colnames(ldata2)=="value"] <- "ms"
ldata2$ms=ldata2$ms/1000000 #ns->ms
ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile)))
ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile)))
# Pull the 95 and 99 percentiles for the boxplot diagram.
# Our values fall more in the range of ms...
pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000)
pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile)))
pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000)
pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile)))
ldata=rbind(pc95data, pc99data)
ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile)))
# We want total bandwidth, so that is the sum of the bandwidths
# from all the write 'jobs'.
mdata=data.frame(write_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$write$bw)/1024))
mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$write$iops)))
mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "write_bw_mps"]) ))
mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "write_bw_mps"]) ))
# Extract the stats tables
bw_dirstats=rbind(bw_dirstats, round(mdata$write_bw_mps, digits=1))
# Rowname hack to get the blocksize recorded
rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize
iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1))
rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize
# And do the 95 and 99 percentiles as tables as well
lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1))
rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize
lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1))
rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize
# Store away as single sets
data2=rbind(data2, mdata)
all_ldata=rbind(all_ldata, ldata)
all_ldata2=rbind(all_ldata2, ldata2)
}
# Collect up for each dir we process into a column
write_bw_stats=cbind(write_bw_stats, bw_dirstats)
colnames(write_bw_stats)[ncol(write_bw_stats)]=datasetname
write_iops_stats=cbind(write_iops_stats, iops_dirstats)
colnames(write_iops_stats)[ncol(write_iops_stats)]=datasetname
write_lat95_stats=cbind(write_lat95_stats, lat95_dirstats)
colnames(write_lat95_stats)[ncol(write_lat95_stats)]=datasetname
write_lat99_stats=cbind(write_lat99_stats, lat99_dirstats)
colnames(write_lat99_stats)[ncol(write_lat99_stats)]=datasetname
}
# To get a nice looking table, we need to extract the rownames into their
# own column
write_bw_stats=cbind(Bandwidth=rownames(write_bw_stats), write_bw_stats)
write_bw_stats=cbind(write_bw_stats, Units=rep("MB/s", nrow(write_bw_stats)))
write_iops_stats=cbind(IOPS=rownames(write_iops_stats), write_iops_stats)
write_iops_stats=cbind(write_iops_stats, Units=rep("IOP/s", nrow(write_iops_stats)))
write_lat95_stats=cbind('lat 95pc'=rownames(write_lat95_stats), write_lat95_stats)
write_lat95_stats=cbind(write_lat95_stats, Units=rep("ms", nrow(write_lat95_stats)))
write_lat99_stats=cbind('lat 99pc'=rownames(write_lat99_stats), write_lat99_stats)
write_lat99_stats=cbind(write_lat99_stats, Units=rep("ms", nrow(write_lat99_stats)))
# lineplot of total bandwidth across blocksizes.
write_bw_line_plot <- ggplot() +
geom_line( data=data2, aes(blocksize, write_bw_mps, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Write total bandwidth") +
xlab("Blocksize") +
ylab("Bandwidth (MiB/s)") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
# lineplot of IOPS across blocksizes
write_iops_line_plot <- ggplot() +
geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Write total IOPS") +
xlab("Blocksize") +
ylab("IOPS") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
# boxplot of 95 and 99 percentiles covering the parallel jobs, shown across
# the blocksizes.
write_clat_box_plot <- ggplot() +
geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) +
stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") +
ylim(0, NA) +
ggtitle("Random Write completion latency", subtitle="95&99 Percentiles, boxplot across jobs") +
xlab("Blocksize") +
ylab("Latency (ms)") +
theme(axis.text.x=element_text(angle=90)) +
# Use the 'paired' colour matrix as we are setting these up as pairs of
# 95 and 99 percentiles, and it is much easier to visually group those to
# each runtime if we use this colourmap.
scale_colour_brewer(palette="Paired")
# completion latency line plot across the percentiles, for a specific blocksize only
# as otherwise the graph would be far too noisy.
which_blocksize='4k'
clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ")
single_blocksize=subset(all_ldata2, blocksize==which_blocksize)
clat_line=aggregate(
single_blocksize$ms,
by=list(
percentile=single_blocksize$percentile,
blocksize=single_blocksize$blocksize,
runtime=single_blocksize$runtime
),
FUN=mean
)
clat_line$percentile=as.numeric(clat_line$percentile)
write_clat_line_plot <- ggplot() +
geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Write completion latency percentiles", subtitle=clat_line_subtitle) +
xlab("Percentile") +
ylab("Time (ms)") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
master_plot = grid.arrange(
write_bw_line_plot,
write_iops_line_plot,
write_clat_box_plot,
write_clat_line_plot,
nrow=2,
ncol=2 )
# A bit of an odd tweak to force a pagebreak between the pictures and
# the tables. This only works because we have a `results='asis'` in the Rmd
# R fragment.
cat("\n\n\\pagebreak\n")
write_bw_stats_plot = suppressWarnings(ggtexttable(write_bw_stats,
theme=ttheme(base_size=10),
rows=NULL
))
write_iops_stats_plot = suppressWarnings(ggtexttable(write_iops_stats,
theme=ttheme(base_size=10),
rows=NULL
))
write_lat95_stats_plot = suppressWarnings(ggtexttable(write_lat95_stats,
theme=ttheme(base_size=10),
rows=NULL
))
write_lat99_stats_plot = suppressWarnings(ggtexttable(write_lat99_stats,
theme=ttheme(base_size=10),
rows=NULL
))
# and then the statistics tables
stats_plot = grid.arrange(
write_bw_stats_plot,
write_iops_stats_plot,
write_lat95_stats_plot,
write_lat99_stats_plot,
nrow=4,
ncol=1 )

View File

@ -0,0 +1,111 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Show system memory reduction, and hence container 'density', by analysing the
# scaling footprint data results and the 'system free' memory.
library(ggplot2) # ability to plot nicely.
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
testnames=c(
paste("footprint-busybox.*", test_name_extra, sep=""),
paste("footprint-mysql.*", test_name_extra, sep=""),
paste("footprint-elasticsearch.*", test_name_extra, sep="")
)
data=c()
stats=c()
rstats=c()
rstats_names=c()
for (currentdir in resultdirs) {
count=1
dirstats=c()
for (testname in testnames) {
matchdir=paste(inputdir, currentdir, sep="")
matchfile=paste(testname, '\\.json', sep="")
files=list.files(matchdir, pattern=matchfile)
if ( length(files) == 0 ) {
#warning(paste("Pattern [", matchdir, "/", matchfile, "] matched nothing"))
}
for (ffound in files) {
fname=paste(inputdir, currentdir, ffound, sep="")
if ( !file.exists(fname)) {
warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
# Import the data
fdata=fromJSON(fname)
# De-nest the test name specific data
shortname=substr(ffound, 1, nchar(ffound)-nchar(".json"))
fdata=fdata[[shortname]]
payload=fdata$Config$payload
testname=paste(datasetname, payload)
cdata=data.frame(avail_mb=as.numeric(fdata$Results$system$avail)/(1024*1024))
cdata=cbind(cdata, avail_decr=as.numeric(fdata$Results$system$avail_decr))
cdata=cbind(cdata, count=seq_len(length(cdata[, "avail_mb"])))
cdata=cbind(cdata, testname=rep(testname, length(cdata[, "avail_mb"]) ))
cdata=cbind(cdata, payload=rep(payload, length(cdata[, "avail_mb"]) ))
cdata=cbind(cdata, dataset=rep(datasetname, length(cdata[, "avail_mb"]) ))
# Gather our statistics
sdata=data.frame(num_containers=length(cdata[, "avail_mb"]))
# Pick out the last avail_decr value - which in theory should be
# the most we have consumed...
sdata=cbind(sdata, mem_consumed=cdata[, "avail_decr"][length(cdata[, "avail_decr"])])
sdata=cbind(sdata, avg_bytes_per_c=sdata$mem_consumed / sdata$num_containers)
sdata=cbind(sdata, runtime=testname)
# Store away as a single set
data=rbind(data, cdata)
stats=rbind(stats, sdata)
s = c(
"Test"=testname,
"n"=sdata$num_containers,
"size"=(sdata$mem_consumed) / 1024,
"kb/n"=round((sdata$mem_consumed / sdata$num_containers) / 1024, digits=1),
"n/Gb"= round((1*1024*1024*1024) / (sdata$mem_consumed / sdata$num_containers), digits=1)
)
rstats=rbind(rstats, s)
count = count + 1
}
}
}
# Set up the text table headers
colnames(rstats)=c("Test", "n", "Tot_Kb", "avg_Kb", "n_per_Gb")
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
theme=ttheme(base_size=10),
rows=NULL
))
# plot how samples varioed over 'time'
line_plot <- ggplot() +
geom_point( data=data, aes(count, avail_mb, group=testname, color=payload, shape=dataset)) +
geom_line( data=data, aes(count, avail_mb, group=testname, color=payload)) +
xlab("Containers") +
ylab("System Avail (Mb)") +
ggtitle("System Memory free") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
master_plot = grid.arrange(
line_plot,
stats_plot,
nrow=2,
ncol=1 )

View File

@ -0,0 +1,19 @@
#!/bin/bash
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
REPORTNAME="metrics_report.pdf"
cd scripts
Rscript --slave -e "library(knitr);knit('pdf.Rmd')"
Rscript --slave -e "library(knitr);pandoc('pdf.md', format='latex')"
Rscript --slave -e "library(knitr);knit('html.Rmd')"
Rscript --slave -e "library(knitr);pandoc('html.md', format='html')"
cp /scripts/pdf.pdf /outputdir/${REPORTNAME}
cp /scripts/figure/*.png /outputdir/
echo "PNGs of graphs and tables can be found in the output directory."
echo "The report, named ${REPORTNAME}, can be found in the output directory"

View File

@ -0,0 +1,23 @@
---
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
title: "Kata Containers metrics report"
author: "Auto generated"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
html_document:
urlcolor: blue
---
```{r setup, include=FALSE}
#Set these opts to get pdf images which fit into beamer slides better
opts_chunk$set(dev = 'png')
# Pick up any env set by the invoking script, such as the root dir of the
# results data tree
source("/inputdir/Env.R")
```
```{r child = 'metrics_report.Rmd'}
```

View File

@ -0,0 +1,157 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Display how long the various phases of a container lifecycle (run, execute, die etc.
# take.
library(ggplot2) # ability to plot nicely.
suppressMessages(suppressWarnings(library(tidyr))) # for gather().
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
testnames=c(
"boot-times"
)
data=c()
stats=c()
rstats=c()
rstats_names=c()
# For each set of results
for (currentdir in resultdirs) {
count=1
dirstats=c()
for (testname in testnames) {
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
# Import the data
fdata=fromJSON(fname)
# De-nest the test specific name
fdata=fdata[[testname]]
cdata=data.frame(workload=as.numeric(fdata$Results$'to-workload'$Result))
cdata=cbind(cdata, quit=as.numeric(fdata$Results$'to-quit'$Result))
cdata=cbind(cdata, tokernel=as.numeric(fdata$Results$'to-kernel'$Result))
cdata=cbind(cdata, inkernel=as.numeric(fdata$Results$'in-kernel'$Result))
cdata=cbind(cdata, total=as.numeric(fdata$Results$'total'$Result))
cdata=cbind(cdata, count=seq_len(length(cdata[,"workload"])))
cdata=cbind(cdata, runtime=rep(datasetname, length(cdata[, "workload"]) ))
# Calculate some stats for total time
sdata=data.frame(workload_mean=mean(cdata$workload))
sdata=cbind(sdata, workload_min=min(cdata$workload))
sdata=cbind(sdata, workload_max=max(cdata$workload))
sdata=cbind(sdata, workload_sd=sd(cdata$workload))
sdata=cbind(sdata, workload_cov=((sdata$workload_sd / sdata$workload_mean) * 100))
sdata=cbind(sdata, runtime=datasetname)
sdata=cbind(sdata, quit_mean = mean(cdata$quit))
sdata=cbind(sdata, quit_min = min(cdata$quit))
sdata=cbind(sdata, quit_max = max(cdata$quit))
sdata=cbind(sdata, quit_sd = sd(cdata$quit))
sdata=cbind(sdata, quit_cov = (sdata$quit_sd / sdata$quit_mean) * 100)
sdata=cbind(sdata, tokernel_mean = mean(cdata$tokernel))
sdata=cbind(sdata, inkernel_mean = mean(cdata$inkernel))
sdata=cbind(sdata, total_mean = mean(cdata$total))
# Store away as a single set
data=rbind(data, cdata)
stats=rbind(stats, sdata)
# Store away some stats for the text table
dirstats[count]=round(sdata$tokernel_mean, digits=2)
count = count + 1
dirstats[count]=round(sdata$inkernel_mean, digits=2)
count = count + 1
dirstats[count]=round(sdata$workload_mean, digits=2)
count = count + 1
dirstats[count]=round(sdata$quit_mean, digits=2)
count = count + 1
dirstats[count]=round(sdata$total_mean, digits=2)
count = count + 1
}
rstats=rbind(rstats, dirstats)
rstats_names=rbind(rstats_names, datasetname)
}
unts=c("s", "s", "s", "s", "s")
rstats=rbind(rstats, unts)
rstats_names=rbind(rstats_names, "Units")
# If we have only 2 sets of results, then we can do some more
# stats math for the text table
if (length(resultdirs) == 2) {
# This is a touch hard wired - but we *know* we only have two
# datasets...
diff=c()
for( i in 1:5) {
difference = as.double(rstats[2,i]) - as.double(rstats[1,i])
val = 100 * (difference/as.double(rstats[1,i]))
diff[i] = paste(round(val, digits=2), "%", sep=" ")
}
rstats=rbind(rstats, diff)
rstats_names=rbind(rstats_names, "Diff")
}
rstats=cbind(rstats_names, rstats)
# Set up the text table headers
colnames(rstats)=c("Results", "2k", "ik", "2w", "2q", "tot")
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(rstats, check.names=FALSE),
theme=ttheme(base_size=8),
rows=NULL
))
# plot how samples varioed over 'time'
line_plot <- ggplot() +
geom_line( data=data, aes(count, workload, color=runtime)) +
geom_smooth( data=data, aes(count, workload, color=runtime), se=FALSE, method="loess") +
xlab("Iteration") +
ylab("Time (s)") +
ggtitle("Boot to workload", subtitle="First container") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
boot_boxplot <- ggplot() +
geom_boxplot( data=data, aes(runtime, workload, color=runtime), show.legend=FALSE) +
ylim(0, NA) +
ylab("Time (s)")
# convert the stats to a long format so we can more easily do a side-by-side barplot
longstats <- gather(stats, measure, value, workload_mean, quit_mean, inkernel_mean, tokernel_mean, total_mean)
bar_plot <- ggplot() +
geom_bar( data=longstats, aes(measure, value, fill=runtime), stat="identity", position="dodge", show.legend=FALSE) +
xlab("Phase") +
ylab("Time (s)") +
ggtitle("Lifecycle phase times", subtitle="Mean") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
master_plot = grid.arrange(
bar_plot,
line_plot,
stats_plot,
boot_boxplot,
nrow=2,
ncol=2 )

View File

@ -0,0 +1,142 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Analyse the runtime component memory footprint data.
library(ggplot2) # ability to plot nicely.
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
testnames=c(
"memory-footprint-inside-container"
)
data=c()
rstats=c()
rstats_rows=c()
rstats_cols=c()
# For each set of results
for (currentdir in resultdirs) {
dirstats=c()
# For the two different types of memory footprint measures
for (testname in testnames) {
# R seems not to like double path slashes '//' ?
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
# Import the data
fdata=fromJSON(fname)
fdata=fdata[[testname]]
# Copy the average result into a shorter, more accesible name
fdata$requested=fdata$Results$memrequest$Result
fdata$total=fdata$Results$memtotal$Result
fdata$free=fdata$Results$memfree$Result
fdata$avail=fdata$Results$memavailable$Result
# And lets work out what % we have 'lost' between the amount requested
# and the total the container actually sees.
fdata$lost=fdata$requested - fdata$total
fdata$pctotal= 100 * (fdata$lost/ fdata$requested)
fdata$Runtime=rep(datasetname, length(fdata$Result) )
# Store away the bits we need
data=rbind(data, data.frame(
Result=fdata$requested,
Type="requested",
Runtime=fdata$Runtime ))
data=rbind(data, data.frame(
Result=fdata$total,
Type="total",
Runtime=fdata$Runtime ))
data=rbind(data, data.frame(
Result=fdata$free,
Type="free",
Runtime=fdata$Runtime ))
data=rbind(data, data.frame(
Result=fdata$avail,
Type="avail",
Runtime=fdata$Runtime ))
data=rbind(data, data.frame(
Result=fdata$lost,
Type="lost",
Runtime=fdata$Runtime ))
data=rbind(data, data.frame(
Result=fdata$pctotal,
Type="% consumed",
Runtime=fdata$Runtime ))
# Store away some stats for the text table
dirstats=rbind(dirstats, round(fdata$requested, digits=2) )
dirstats=rbind(dirstats, round(fdata$total, digits=2) )
dirstats=rbind(dirstats, round(fdata$free, digits=2) )
dirstats=rbind(dirstats, round(fdata$avail, digits=2) )
dirstats=rbind(dirstats, round(fdata$lost, digits=2) )
dirstats=rbind(dirstats, round(fdata$pctotal, digits=2) )
}
rstats=cbind(rstats, dirstats)
rstats_cols=append(rstats_cols, datasetname)
}
rstats_rows=c("Requested", "Total", "Free", "Avail", "Consumed", "% Consumed")
unts=c("Kb", "Kb", "Kb", "Kb", "Kb", "%")
rstats=cbind(rstats, unts)
rstats_cols=append(rstats_cols, "Units")
# If we have only 2 sets of results, then we can do some more
# stats math for the text table
if (length(resultdirs) == 2) {
# This is a touch hard wired - but we *know* we only have two
# datasets...
diff=c()
# Just the first three entries - meaningless for the pctotal entry
for (n in 1:5) {
difference = (as.double(rstats[n,2]) - as.double(rstats[n,1]))
val = 100 * (difference/as.double(rstats[n,1]))
diff=rbind(diff, round(val, digits=2))
}
# Add a blank entry for the other entries
diff=rbind(diff, "")
rstats=cbind(rstats, diff)
rstats_cols=append(rstats_cols, "Diff %")
}
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
theme=ttheme(base_size=10),
rows=rstats_rows, cols=rstats_cols
))
bardata <- subset(data, Type %in% c("requested", "total", "free", "avail"))
# plot how samples varioed over 'time'
barplot <- ggplot() +
geom_bar(data=bardata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") +
xlab("Measure") +
ylab("Size (Kb)") +
ggtitle("In-container memory statistics") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
master_plot = grid.arrange(
barplot,
stats_plot,
nrow=2,
ncol=1 )

View File

@ -0,0 +1,121 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Analyse the runtime component memory footprint data.
library(ggplot2) # ability to plot nicely.
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
testnames=c(
"memory-footprint",
"memory-footprint-ksm"
)
resultsfilesshort=c(
"noKSM",
"KSM"
)
data=c()
rstats=c()
rstats_names=c()
# For each set of results
for (currentdir in resultdirs) {
count=1
dirstats=c()
# For the two different types of memory footprint measures
for (testname in testnames) {
# R seems not to like double path slashes '//' ?
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
datasetvariant=resultsfilesshort[count]
# Import the data
fdata=fromJSON(fname)
fdata=fdata[[testname]]
# Copy the average result into a shorter, more accesible name
fdata$Result=fdata$Results$average$Result
fdata$variant=rep(datasetvariant, length(fdata$Result) )
fdata$Runtime=rep(datasetname, length(fdata$Result) )
fdata$Count=seq_len(length(fdata$Result))
# Calculate some stats
fdata.mean = mean(fdata$Result)
fdata.min = min(fdata$Result)
fdata.max = max(fdata$Result)
fdata.sd = sd(fdata$Result)
fdata.cov = (fdata.sd / fdata.mean) * 100
# Store away the bits we need
data=rbind(data, data.frame(
Result=fdata$Result,
Count=fdata$Count,
Runtime=fdata$Runtime,
variant=fdata$variant ) )
# Store away some stats for the text table
dirstats[count]=round(fdata.mean, digits=2)
count = count + 1
}
rstats=rbind(rstats, dirstats)
rstats_names=rbind(rstats_names, datasetname)
}
rstats=cbind(rstats_names, rstats)
unts=rep("Kb", length(resultdirs))
# If we have only 2 sets of results, then we can do some more
# stats math for the text table
if (length(resultdirs) == 2) {
# This is a touch hard wired - but we *know* we only have two
# datasets...
diff=c("diff")
difference = (as.double(rstats[2,2]) - as.double(rstats[1,2]))
val = 100 * (difference/as.double(rstats[1,2]))
diff[2] = round(val, digits=2)
difference = (as.double(rstats[2,3]) - as.double(rstats[1,3]))
val = 100 * (difference/as.double(rstats[1,3]))
diff[3] = round(val, digits=2)
rstats=rbind(rstats, diff)
unts[3]="%"
}
rstats=cbind(rstats, unts)
# Set up the text table headers
colnames(rstats)=c("Results", resultsfilesshort, "Units")
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
theme=ttheme(base_size=10),
rows=NULL
))
# plot how samples varioed over 'time'
point_plot <- ggplot() +
geom_point( data=data, aes(Runtime, Result, color=variant), position=position_dodge(0.1)) +
xlab("Dataset") +
ylab("Size (Kb)") +
ggtitle("Average PSS footprint", subtitle="per container") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
master_plot = grid.arrange(
point_plot,
stats_plot,
nrow=1,
ncol=2 )

View File

@ -0,0 +1,63 @@
---
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
---
\pagebreak
# Introduction
This report compares the metrics between multiple sets of data generated from
the [Kata Containers report generation scripts](https://github.com/kata-containers/kata-containers/tree/main/metrics/report/README.md).
This report was generated using the data from the **`r resultdirs`** results directories.
\pagebreak
# Container scaling system footprint
This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/footprint_data.sh)
measures the system memory footprint impact whilst running an increasing number
of containers. For this test, [KSM](https://en.wikipedia.org/wiki/Kernel_same-page_merging)
is enabled. The results show how system memory is consumed for different sized
containers, and their average system memory footprint cost and density (how many
containers you can fit per Gb) is calculated.
```{r footprint-density, echo=FALSE, fig.cap="System Memory density"}
test_name_extra="-ksm"
source('footprint-density.R')
rm(test_name_extra)
```
\pagebreak
# Memory used inside container
This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/memory_usage_inside_container.sh)
measures the memory inside a container taken by the container runtime. It shows the difference between the amount of memory requested for the container, and the amount the container can actually 'see'.
The *% Consumed* is the key row in the table, which compares the *Requested* against *Total* values.
```{r mem-in-cont, echo=FALSE, fig.cap="System Memory density"}
source('mem-in-cont.R')
```
\pagebreak
# Container boot lifecycle times
This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/time/launch_times.sh)
uses the `date` command on the host and in the container, as well as data from the container
kernel `dmesg`, to ascertain how long different phases of the create/boot/run/delete
Docker container lifecycle take for the first launched container.
To decode the stats table, the prefixes are 'to(`2`)' and '`i`n'. The suffixes are '`k`ernel', '`w`orkload' and '`q`uit'. 'tot' is the total time for a complete container start-to-finished cycle.
```{r lifecycle-time, echo=FALSE, fig.cap="Execution lifecycle times"}
source('lifecycle-time.R')
```
\pagebreak
# Test setup details
This table describes the test system details, as derived from the information contained
in the test results files.
```{r dut, echo=FALSE, fig.cap="System configuration details"}
source('dut-details.R')

View File

@ -0,0 +1,132 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Analyse the runtime component memory footprint data.
library(ggplot2) # ability to plot nicely.
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
testnames=c(
"cpu-information"
)
resultsfilesshort=c(
"CPU"
)
data=c()
rstats=c()
rstats_rows=c()
rstats_cols=c()
Gdenom = (1000.0 * 1000.0 * 1000.0)
# For each set of results
for (currentdir in resultdirs) {
dirstats=c()
# For the two different types of memory footprint measures
for (testname in testnames) {
# R seems not to like double path slashes '//' ?
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
datasetvariant=resultsfilesshort[count]
# Import the data
fdata=fromJSON(fname)
fdata=fdata[[testname]]
# Copy the average result into a shorter, more accesible name
fdata$ips=fdata$Results$"instructions per cycle"$Result
fdata$Gcycles=fdata$Results$cycles$Result / Gdenom
fdata$Ginstructions=fdata$Results$instructions$Result / Gdenom
fdata$variant=rep(datasetvariant, length(fdata$Result) )
fdata$Runtime=rep(datasetname, length(fdata$Result) )
# Store away the bits we need
data=rbind(data, data.frame(
Result=fdata$ips,
Type="ips",
Runtime=fdata$Runtime,
variant=fdata$variant ) )
data=rbind(data, data.frame(
Result=fdata$Gcycles,
Type="Gcycles",
Runtime=fdata$Runtime,
variant=fdata$variant ) )
data=rbind(data, data.frame(
Result=fdata$Ginstructions,
Type="Ginstr",
Runtime=fdata$Runtime,
variant=fdata$variant ) )
# Store away some stats for the text table
dirstats=rbind(dirstats, round(fdata$ips, digits=2) )
dirstats=rbind(dirstats, round(fdata$Gcycles, digits=2) )
dirstats=rbind(dirstats, round(fdata$Ginstructions, digits=2) )
}
rstats=cbind(rstats, dirstats)
rstats_cols=append(rstats_cols, datasetname)
}
rstats_rows=c("IPS", "GCycles", "GInstr")
unts=c("Ins/Cyc", "G", "G")
rstats=cbind(rstats, unts)
rstats_cols=append(rstats_cols, "Units")
# If we have only 2 sets of results, then we can do some more
# stats math for the text table
if (length(resultdirs) == 2) {
# This is a touch hard wired - but we *know* we only have two
# datasets...
diff=c()
for (n in 1:3) {
difference = (as.double(rstats[n,2]) - as.double(rstats[n,1]))
val = 100 * (difference/as.double(rstats[n,1]))
diff=rbind(diff, round(val, digits=2))
}
rstats=cbind(rstats, diff)
rstats_cols=append(rstats_cols, "Diff %")
}
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
theme=ttheme(base_size=10),
rows=rstats_rows, cols=rstats_cols
))
# plot how samples varioed over 'time'
ipsdata <- subset(data, Type %in% c("ips"))
ips_plot <- ggplot() +
geom_bar(data=ipsdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") +
xlab("Measure") +
ylab("IPS") +
ggtitle("Instructions Per Cycle") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
cycdata <- subset(data, Type %in% c("Gcycles", "Ginstr"))
cycles_plot <- ggplot() +
geom_bar(data=cycdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge", show.legend=FALSE) +
xlab("Measure") +
ylab("Count (G)") +
ggtitle("Cycles and Instructions") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
master_plot = grid.arrange(
ips_plot,
cycles_plot,
stats_plot,
nrow=2,
ncol=2 )