metrics: Add metrics report R files

This PR adds the metrics report R files.

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
This commit is contained in:
Gabriela Cervantes 2023-08-29 16:45:22 +00:00
parent 08812074d1
commit fce2487971
8 changed files with 1314 additions and 0 deletions

View File

@ -0,0 +1,122 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Display details for the 'Device Under Test', for all data sets being processed.
suppressMessages(suppressWarnings(library(tidyr))) # for gather().
library(tibble)
suppressMessages(suppressWarnings(library(plyr))) # rbind.fill
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
# A list of all the known results files we might find the information inside.
resultsfiles=c(
"boot-times.json",
"memory-footprint.json",
"memory-footprint-ksm.json",
"memory-footprint-inside-container.json"
)
data=c()
stats=c()
stats_names=c()
# For each set of results
for (currentdir in resultdirs) {
count=1
dirstats=c()
for (resultsfile in resultsfiles) {
fname=paste(inputdir, currentdir, resultsfile, sep="/")
if ( !file.exists(fname)) {
#warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
# Import the data
fdata=fromJSON(fname)
if (length(fdata$'kata-env') != 0 ) {
# We have kata-runtime data
dirstats=tibble("Run Ver"=as.character(fdata$'kata-env'$Runtime$Version$Semver))
dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'kata-env'$Runtime$Version$Commit))
pver=as.character(fdata$'kata-env'$Proxy$Version)
pver=sub("^[[:alpha:][:blank:]-]*", "", pver)
# uncomment if you want to drop the commit sha as well
#pver=sub("([[:digit:].]*).*", "\\1", pver)
dirstats=cbind(dirstats, "Proxy Ver"=pver)
# Trim the shim string
sver=as.character(fdata$'kata-env'$Shim$Version)
sver=sub("^[[:alpha:][:blank:]-]*", "", sver)
# uncomment if you want to drop the commit sha as well
#sver=sub("([[:digit:].]*).*", "\\1", sver)
dirstats=cbind(dirstats, "Shim Ver"=sver)
# Default QEMU ver string is far too long and noisy - trim.
hver=as.character(fdata$'kata-env'$Hypervisor$Version)
hver=sub("^[[:alpha:][:blank:]]*", "", hver)
hver=sub("([[:digit:].]*).*", "\\1", hver)
dirstats=cbind(dirstats, "Hyper Ver"=hver)
iver=as.character(fdata$'kata-env'$Image$Path)
iver=sub("^[[:alpha:]/-]*", "", iver)
dirstats=cbind(dirstats, "Image Ver"=iver)
kver=as.character(fdata$'kata-env'$Kernel$Path)
kver=sub("^[[:alpha:]/-]*", "", kver)
dirstats=cbind(dirstats, "Guest Krnl"=kver)
dirstats=cbind(dirstats, "Host arch"=as.character(fdata$'kata-env'$Host$Architecture))
dirstats=cbind(dirstats, "Host Distro"=as.character(fdata$'kata-env'$Host$Distro$Name))
dirstats=cbind(dirstats, "Host DistVer"=as.character(fdata$'kata-env'$Host$Distro$Version))
dirstats=cbind(dirstats, "Host Model"=as.character(fdata$'kata-env'$Host$CPU$Model))
dirstats=cbind(dirstats, "Host Krnl"=as.character(fdata$'kata-env'$Host$Kernel))
dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime))
break
} else {
if (length(fdata$'runc-env') != 0 ) {
dirstats=tibble("Run Ver"=as.character(fdata$'runc-env'$Version$Semver))
dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'runc-env'$Version$Commit))
dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime))
} else {
dirstats=tibble("runtime"="Unknown")
}
break
}
}
if ( length(dirstats) == 0 ) {
warning(paste("No valid data found for directory ", currentdir))
}
# use plyr rbind.fill so we can combine disparate version info frames
stats=rbind.fill(stats, dirstats)
stats_names=rbind(stats_names, datasetname)
}
rownames(stats) = stats_names
# Rotate the tibble so we get data dirs as the columns
spun_stats = as_tibble(cbind(What=names(stats), t(stats)))
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(spun_stats, check.names=FALSE),
theme=ttheme(base_size=6),
rows=NULL
))
# It may seem odd doing a grid of 1x1, but it should ensure we get a uniform format and
# layout to match the other charts and tables in the report.
master_plot = grid.arrange(
stats_plot,
nrow=1,
ncol=1 )

View File

@ -0,0 +1,269 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Display details for `fio` random read storage IO tests.
library(ggplot2) # ability to plot nicely
library(gridExtra) # So we can plot multiple graphs together
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable
suppressMessages(library(jsonlite)) # to load the data
suppressMessages(suppressWarnings(library(tidyr))) # for gather
library(tibble)
testnames=c(
"fio-randread-128",
"fio-randread-256",
"fio-randread-512",
"fio-randread-1k",
"fio-randread-2k",
"fio-randread-4k",
"fio-randread-8k",
"fio-randread-16k",
"fio-randread-32k",
"fio-randread-64k"
)
data2=c()
all_ldata=c()
all_ldata2=c()
stats=c()
rstats=c()
rstats_names=c()
# Where to store up the stats for the tables
read_bw_stats=c()
read_iops_stats=c()
read_lat95_stats=c()
read_lat99_stats=c()
# For each set of results
for (currentdir in resultdirs) {
bw_dirstats=c()
iops_dirstats=c()
lat95_dirstats=c()
lat99_dirstats=c()
# Derive the name from the test result dirname
datasetname=basename(currentdir)
for (testname in testnames) {
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
#warning(paste("Skipping non-existent file: ", fname))
next
}
# Import the data
fdata=fromJSON(fname)
# De-ref the test named unique data
fdata=fdata[[testname]]
blocksize=fdata$Raw$'global options'$bs
# Extract the latency data - it comes as a table of percentiles, so
# we have to do a little work...
clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$read$clat_ns$percentile)
# Generate a clat data set with 'clean' percentile numbers so
# we can sensibly plot it later on.
clat2=clat
colnames(clat2)<-sub("clat_ns.", "", colnames(clat2))
colnames(clat2)<-sub("0000", "", colnames(clat2))
ldata2=gather(clat2)
colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile"
colnames(ldata2)[colnames(ldata2)=="value"] <- "ms"
ldata2$ms=ldata2$ms/1000000 #ns->ms
ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile)))
ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile)))
# Pull the 95 and 99 percentile numbers for the boxplot
# Plotting all values for all runtimes and blocksizes is just way too
# noisy to make a meaninful picture, so we use this subset.
# Our values fall more in the range of ms...
pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000)
pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile)))
pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000)
pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile)))
ldata=rbind(pc95data, pc99data)
ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile)))
# We want total bandwidth, so that is the sum of the bandwidths
# from all the read 'jobs'.
mdata=data.frame(read_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$read$bw)/1024))
mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$read$iops)))
mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "read_bw_mps"]) ))
mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "read_bw_mps"]) ))
# Extract the stats tables
bw_dirstats=rbind(bw_dirstats, round(mdata$read_bw_mps, digits=1))
# Rowname hack to get the blocksize recorded
rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize
iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1))
rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize
# And do the 95 and 99 percentiles as tables as well
lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1))
rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize
lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1))
rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize
# Collect up as sets across all files and runtimes.
data2=rbind(data2, mdata)
all_ldata=rbind(all_ldata, ldata)
all_ldata2=rbind(all_ldata2, ldata2)
}
# Collect up for each dir we process into a column
read_bw_stats=cbind(read_bw_stats, bw_dirstats)
colnames(read_bw_stats)[ncol(read_bw_stats)]=datasetname
read_iops_stats=cbind(read_iops_stats, iops_dirstats)
colnames(read_iops_stats)[ncol(read_iops_stats)]=datasetname
read_lat95_stats=cbind(read_lat95_stats, lat95_dirstats)
colnames(read_lat95_stats)[ncol(read_lat95_stats)]=datasetname
read_lat99_stats=cbind(read_lat99_stats, lat99_dirstats)
colnames(read_lat99_stats)[ncol(read_lat99_stats)]=datasetname
}
# To get a nice looking table, we need to extract the rownames into their
# own column
read_bw_stats=cbind(Bandwidth=rownames(read_bw_stats), read_bw_stats)
read_bw_stats=cbind(read_bw_stats, Units=rep("MB/s", nrow(read_bw_stats)))
read_iops_stats=cbind(IOPS=rownames(read_iops_stats), read_iops_stats)
read_iops_stats=cbind(read_iops_stats, Units=rep("IOP/s", nrow(read_iops_stats)))
read_lat95_stats=cbind('lat 95pc'=rownames(read_lat95_stats), read_lat95_stats)
read_lat95_stats=cbind(read_lat95_stats, Units=rep("ms", nrow(read_lat95_stats)))
read_lat99_stats=cbind('lat 99pc'=rownames(read_lat99_stats), read_lat99_stats)
read_lat99_stats=cbind(read_lat99_stats, Units=rep("ms", nrow(read_lat99_stats)))
# Bandwidth line plot
read_bw_line_plot <- ggplot() +
geom_line( data=data2, aes(blocksize, read_bw_mps, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Read total bandwidth") +
xlab("Blocksize") +
ylab("Bandwidth (MiB/s)") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
# IOPS line plot
read_iops_line_plot <- ggplot() +
geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Read total IOPS") +
xlab("Blocksize") +
ylab("IOPS") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
# 95 and 99 percentile box plot
read_clat_box_plot <- ggplot() +
geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) +
stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") +
ylim(0, NA) +
ggtitle("Random Read completion latency", subtitle="95&99 percentiles, boxplot over jobs") +
xlab("Blocksize") +
ylab("Latency (ms)") +
theme(axis.text.x=element_text(angle=90)) +
# Use the 'paired' colour matrix as we are setting these up as pairs of
# 95 and 99 percentiles, and it is much easier to visually group those to
# each runtime if we use this colourmap.
scale_colour_brewer(palette="Paired")
# it would be nice to use the same legend theme as the other plots on this
# page, but because of the number of entries it tends to flow off the picture.
# theme(
# axis.text.x=element_text(angle=90),
# legend.position=c(0.35,0.8),
# legend.title=element_text(size=5),
# legend.text=element_text(size=5),
# legend.background = element_rect(fill=alpha('blue', 0.2))
# )
# As the boxplot is actually quite hard to interpret, also show a linegraph
# of all the percentiles for a single blocksize.
which_blocksize='4k'
clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ")
single_blocksize=subset(all_ldata2, blocksize==which_blocksize)
clat_line=aggregate(
single_blocksize$ms,
by=list(
percentile=single_blocksize$percentile,
blocksize=single_blocksize$blocksize,
runtime=single_blocksize$runtime
),
FUN=mean
)
clat_line$percentile=as.numeric(clat_line$percentile)
read_clat_line_plot <- ggplot() +
geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Read completion latency percentiles", subtitle=clat_line_subtitle) +
xlab("Percentile") +
ylab("Time (ms)") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
# Output the pretty pictures
graphics_plot = grid.arrange(
read_bw_line_plot,
read_iops_line_plot,
read_clat_box_plot,
read_clat_line_plot,
nrow=2,
ncol=2 )
# A bit of an odd tweak to force a pagebreak between the pictures and
# the tables. This only works because we have a `results='asis'` in the Rmd
# R fragment.
cat("\n\n\\pagebreak\n")
read_bw_stats_plot = suppressWarnings(ggtexttable(read_bw_stats,
theme=ttheme(base_size=10),
rows=NULL
))
read_iops_stats_plot = suppressWarnings(ggtexttable(read_iops_stats,
theme=ttheme(base_size=10),
rows=NULL
))
read_lat95_stats_plot = suppressWarnings(ggtexttable(read_lat95_stats,
theme=ttheme(base_size=10),
rows=NULL
))
read_lat99_stats_plot = suppressWarnings(ggtexttable(read_lat99_stats,
theme=ttheme(base_size=10),
rows=NULL
))
# and then the statistics tables
stats_plot = grid.arrange(
read_bw_stats_plot,
read_iops_stats_plot,
read_lat95_stats_plot,
read_lat99_stats_plot,
nrow=4,
ncol=1 )

View File

@ -0,0 +1,260 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Display details for 'fio' random writes storage IO tests.
library(ggplot2) # ability to plot nicely
library(gridExtra) # So we can plot multiple graphs together
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable
suppressMessages(library(jsonlite)) # to load the data
suppressMessages(suppressWarnings(library(tidyr))) # for gather
library(tibble)
testnames=c(
"fio-randwrite-128",
"fio-randwrite-256",
"fio-randwrite-512",
"fio-randwrite-1k",
"fio-randwrite-2k",
"fio-randwrite-4k",
"fio-randwrite-8k",
"fio-randwrite-16k",
"fio-randwrite-32k",
"fio-randwrite-64k"
)
data2=c()
all_ldata=c()
all_ldata2=c()
stats=c()
rstats=c()
rstats_names=c()
# Where to store up the stats for the tables
write_bw_stats=c()
write_iops_stats=c()
write_lat95_stats=c()
write_lat99_stats=c()
# For each set of results
for (currentdir in resultdirs) {
bw_dirstats=c()
iops_dirstats=c()
lat95_dirstats=c()
lat99_dirstats=c()
# Derive the name from the test result dirname
datasetname=basename(currentdir)
for (testname in testnames) {
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
#warning(paste("Skipping non-existent file: ", fname))
next
}
# Import the data
fdata=fromJSON(fname)
# De-nest the test specific named data
fdata=fdata[[testname]]
blocksize=fdata$Raw$'global options'$bs
# Extract the latency data - it comes as a table of percentiles, so
# we have to do a little work...
clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$write$clat_ns$percentile)
# Generate a clat data set with 'clean' percentile numbers so
# we can sensibly plot it later on.
clat2=clat
colnames(clat2)<-sub("clat_ns.", "", colnames(clat2))
colnames(clat2)<-sub("0000", "", colnames(clat2))
ldata2=gather(clat2)
colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile"
colnames(ldata2)[colnames(ldata2)=="value"] <- "ms"
ldata2$ms=ldata2$ms/1000000 #ns->ms
ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile)))
ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile)))
# Pull the 95 and 99 percentiles for the boxplot diagram.
# Our values fall more in the range of ms...
pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000)
pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile)))
pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000)
pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile)))
ldata=rbind(pc95data, pc99data)
ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile)))
# We want total bandwidth, so that is the sum of the bandwidths
# from all the write 'jobs'.
mdata=data.frame(write_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$write$bw)/1024))
mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$write$iops)))
mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "write_bw_mps"]) ))
mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "write_bw_mps"]) ))
# Extract the stats tables
bw_dirstats=rbind(bw_dirstats, round(mdata$write_bw_mps, digits=1))
# Rowname hack to get the blocksize recorded
rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize
iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1))
rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize
# And do the 95 and 99 percentiles as tables as well
lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1))
rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize
lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1))
rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize
# Store away as single sets
data2=rbind(data2, mdata)
all_ldata=rbind(all_ldata, ldata)
all_ldata2=rbind(all_ldata2, ldata2)
}
# Collect up for each dir we process into a column
write_bw_stats=cbind(write_bw_stats, bw_dirstats)
colnames(write_bw_stats)[ncol(write_bw_stats)]=datasetname
write_iops_stats=cbind(write_iops_stats, iops_dirstats)
colnames(write_iops_stats)[ncol(write_iops_stats)]=datasetname
write_lat95_stats=cbind(write_lat95_stats, lat95_dirstats)
colnames(write_lat95_stats)[ncol(write_lat95_stats)]=datasetname
write_lat99_stats=cbind(write_lat99_stats, lat99_dirstats)
colnames(write_lat99_stats)[ncol(write_lat99_stats)]=datasetname
}
# To get a nice looking table, we need to extract the rownames into their
# own column
write_bw_stats=cbind(Bandwidth=rownames(write_bw_stats), write_bw_stats)
write_bw_stats=cbind(write_bw_stats, Units=rep("MB/s", nrow(write_bw_stats)))
write_iops_stats=cbind(IOPS=rownames(write_iops_stats), write_iops_stats)
write_iops_stats=cbind(write_iops_stats, Units=rep("IOP/s", nrow(write_iops_stats)))
write_lat95_stats=cbind('lat 95pc'=rownames(write_lat95_stats), write_lat95_stats)
write_lat95_stats=cbind(write_lat95_stats, Units=rep("ms", nrow(write_lat95_stats)))
write_lat99_stats=cbind('lat 99pc'=rownames(write_lat99_stats), write_lat99_stats)
write_lat99_stats=cbind(write_lat99_stats, Units=rep("ms", nrow(write_lat99_stats)))
# lineplot of total bandwidth across blocksizes.
write_bw_line_plot <- ggplot() +
geom_line( data=data2, aes(blocksize, write_bw_mps, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Write total bandwidth") +
xlab("Blocksize") +
ylab("Bandwidth (MiB/s)") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
# lineplot of IOPS across blocksizes
write_iops_line_plot <- ggplot() +
geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Write total IOPS") +
xlab("Blocksize") +
ylab("IOPS") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
# boxplot of 95 and 99 percentiles covering the parallel jobs, shown across
# the blocksizes.
write_clat_box_plot <- ggplot() +
geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) +
stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") +
ylim(0, NA) +
ggtitle("Random Write completion latency", subtitle="95&99 Percentiles, boxplot across jobs") +
xlab("Blocksize") +
ylab("Latency (ms)") +
theme(axis.text.x=element_text(angle=90)) +
# Use the 'paired' colour matrix as we are setting these up as pairs of
# 95 and 99 percentiles, and it is much easier to visually group those to
# each runtime if we use this colourmap.
scale_colour_brewer(palette="Paired")
# completion latency line plot across the percentiles, for a specific blocksize only
# as otherwise the graph would be far too noisy.
which_blocksize='4k'
clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ")
single_blocksize=subset(all_ldata2, blocksize==which_blocksize)
clat_line=aggregate(
single_blocksize$ms,
by=list(
percentile=single_blocksize$percentile,
blocksize=single_blocksize$blocksize,
runtime=single_blocksize$runtime
),
FUN=mean
)
clat_line$percentile=as.numeric(clat_line$percentile)
write_clat_line_plot <- ggplot() +
geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) +
ylim(0, NA) +
ggtitle("Random Write completion latency percentiles", subtitle=clat_line_subtitle) +
xlab("Percentile") +
ylab("Time (ms)") +
theme(
axis.text.x=element_text(angle=90),
legend.position=c(0.35,0.8),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.background = element_rect(fill=alpha('blue', 0.2))
)
master_plot = grid.arrange(
write_bw_line_plot,
write_iops_line_plot,
write_clat_box_plot,
write_clat_line_plot,
nrow=2,
ncol=2 )
# A bit of an odd tweak to force a pagebreak between the pictures and
# the tables. This only works because we have a `results='asis'` in the Rmd
# R fragment.
cat("\n\n\\pagebreak\n")
write_bw_stats_plot = suppressWarnings(ggtexttable(write_bw_stats,
theme=ttheme(base_size=10),
rows=NULL
))
write_iops_stats_plot = suppressWarnings(ggtexttable(write_iops_stats,
theme=ttheme(base_size=10),
rows=NULL
))
write_lat95_stats_plot = suppressWarnings(ggtexttable(write_lat95_stats,
theme=ttheme(base_size=10),
rows=NULL
))
write_lat99_stats_plot = suppressWarnings(ggtexttable(write_lat99_stats,
theme=ttheme(base_size=10),
rows=NULL
))
# and then the statistics tables
stats_plot = grid.arrange(
write_bw_stats_plot,
write_iops_stats_plot,
write_lat95_stats_plot,
write_lat99_stats_plot,
nrow=4,
ncol=1 )

View File

@ -0,0 +1,111 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Show system memory reduction, and hence container 'density', by analysing the
# scaling footprint data results and the 'system free' memory.
library(ggplot2) # ability to plot nicely.
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
testnames=c(
paste("footprint-busybox.*", test_name_extra, sep=""),
paste("footprint-mysql.*", test_name_extra, sep=""),
paste("footprint-elasticsearch.*", test_name_extra, sep="")
)
data=c()
stats=c()
rstats=c()
rstats_names=c()
for (currentdir in resultdirs) {
count=1
dirstats=c()
for (testname in testnames) {
matchdir=paste(inputdir, currentdir, sep="")
matchfile=paste(testname, '\\.json', sep="")
files=list.files(matchdir, pattern=matchfile)
if ( length(files) == 0 ) {
#warning(paste("Pattern [", matchdir, "/", matchfile, "] matched nothing"))
}
for (ffound in files) {
fname=paste(inputdir, currentdir, ffound, sep="")
if ( !file.exists(fname)) {
warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
# Import the data
fdata=fromJSON(fname)
# De-nest the test name specific data
shortname=substr(ffound, 1, nchar(ffound)-nchar(".json"))
fdata=fdata[[shortname]]
payload=fdata$Config$payload
testname=paste(datasetname, payload)
cdata=data.frame(avail_mb=as.numeric(fdata$Results$system$avail)/(1024*1024))
cdata=cbind(cdata, avail_decr=as.numeric(fdata$Results$system$avail_decr))
cdata=cbind(cdata, count=seq_len(length(cdata[, "avail_mb"])))
cdata=cbind(cdata, testname=rep(testname, length(cdata[, "avail_mb"]) ))
cdata=cbind(cdata, payload=rep(payload, length(cdata[, "avail_mb"]) ))
cdata=cbind(cdata, dataset=rep(datasetname, length(cdata[, "avail_mb"]) ))
# Gather our statistics
sdata=data.frame(num_containers=length(cdata[, "avail_mb"]))
# Pick out the last avail_decr value - which in theory should be
# the most we have consumed...
sdata=cbind(sdata, mem_consumed=cdata[, "avail_decr"][length(cdata[, "avail_decr"])])
sdata=cbind(sdata, avg_bytes_per_c=sdata$mem_consumed / sdata$num_containers)
sdata=cbind(sdata, runtime=testname)
# Store away as a single set
data=rbind(data, cdata)
stats=rbind(stats, sdata)
s = c(
"Test"=testname,
"n"=sdata$num_containers,
"size"=(sdata$mem_consumed) / 1024,
"kb/n"=round((sdata$mem_consumed / sdata$num_containers) / 1024, digits=1),
"n/Gb"= round((1*1024*1024*1024) / (sdata$mem_consumed / sdata$num_containers), digits=1)
)
rstats=rbind(rstats, s)
count = count + 1
}
}
}
# Set up the text table headers
colnames(rstats)=c("Test", "n", "Tot_Kb", "avg_Kb", "n_per_Gb")
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
theme=ttheme(base_size=10),
rows=NULL
))
# plot how samples varioed over 'time'
line_plot <- ggplot() +
geom_point( data=data, aes(count, avail_mb, group=testname, color=payload, shape=dataset)) +
geom_line( data=data, aes(count, avail_mb, group=testname, color=payload)) +
xlab("Containers") +
ylab("System Avail (Mb)") +
ggtitle("System Memory free") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
master_plot = grid.arrange(
line_plot,
stats_plot,
nrow=2,
ncol=1 )

View File

@ -0,0 +1,157 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Display how long the various phases of a container lifecycle (run, execute, die etc.
# take.
library(ggplot2) # ability to plot nicely.
suppressMessages(suppressWarnings(library(tidyr))) # for gather().
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
testnames=c(
"boot-times"
)
data=c()
stats=c()
rstats=c()
rstats_names=c()
# For each set of results
for (currentdir in resultdirs) {
count=1
dirstats=c()
for (testname in testnames) {
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
# Import the data
fdata=fromJSON(fname)
# De-nest the test specific name
fdata=fdata[[testname]]
cdata=data.frame(workload=as.numeric(fdata$Results$'to-workload'$Result))
cdata=cbind(cdata, quit=as.numeric(fdata$Results$'to-quit'$Result))
cdata=cbind(cdata, tokernel=as.numeric(fdata$Results$'to-kernel'$Result))
cdata=cbind(cdata, inkernel=as.numeric(fdata$Results$'in-kernel'$Result))
cdata=cbind(cdata, total=as.numeric(fdata$Results$'total'$Result))
cdata=cbind(cdata, count=seq_len(length(cdata[,"workload"])))
cdata=cbind(cdata, runtime=rep(datasetname, length(cdata[, "workload"]) ))
# Calculate some stats for total time
sdata=data.frame(workload_mean=mean(cdata$workload))
sdata=cbind(sdata, workload_min=min(cdata$workload))
sdata=cbind(sdata, workload_max=max(cdata$workload))
sdata=cbind(sdata, workload_sd=sd(cdata$workload))
sdata=cbind(sdata, workload_cov=((sdata$workload_sd / sdata$workload_mean) * 100))
sdata=cbind(sdata, runtime=datasetname)
sdata=cbind(sdata, quit_mean = mean(cdata$quit))
sdata=cbind(sdata, quit_min = min(cdata$quit))
sdata=cbind(sdata, quit_max = max(cdata$quit))
sdata=cbind(sdata, quit_sd = sd(cdata$quit))
sdata=cbind(sdata, quit_cov = (sdata$quit_sd / sdata$quit_mean) * 100)
sdata=cbind(sdata, tokernel_mean = mean(cdata$tokernel))
sdata=cbind(sdata, inkernel_mean = mean(cdata$inkernel))
sdata=cbind(sdata, total_mean = mean(cdata$total))
# Store away as a single set
data=rbind(data, cdata)
stats=rbind(stats, sdata)
# Store away some stats for the text table
dirstats[count]=round(sdata$tokernel_mean, digits=2)
count = count + 1
dirstats[count]=round(sdata$inkernel_mean, digits=2)
count = count + 1
dirstats[count]=round(sdata$workload_mean, digits=2)
count = count + 1
dirstats[count]=round(sdata$quit_mean, digits=2)
count = count + 1
dirstats[count]=round(sdata$total_mean, digits=2)
count = count + 1
}
rstats=rbind(rstats, dirstats)
rstats_names=rbind(rstats_names, datasetname)
}
unts=c("s", "s", "s", "s", "s")
rstats=rbind(rstats, unts)
rstats_names=rbind(rstats_names, "Units")
# If we have only 2 sets of results, then we can do some more
# stats math for the text table
if (length(resultdirs) == 2) {
# This is a touch hard wired - but we *know* we only have two
# datasets...
diff=c()
for( i in 1:5) {
difference = as.double(rstats[2,i]) - as.double(rstats[1,i])
val = 100 * (difference/as.double(rstats[1,i]))
diff[i] = paste(round(val, digits=2), "%", sep=" ")
}
rstats=rbind(rstats, diff)
rstats_names=rbind(rstats_names, "Diff")
}
rstats=cbind(rstats_names, rstats)
# Set up the text table headers
colnames(rstats)=c("Results", "2k", "ik", "2w", "2q", "tot")
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(rstats, check.names=FALSE),
theme=ttheme(base_size=8),
rows=NULL
))
# plot how samples varioed over 'time'
line_plot <- ggplot() +
geom_line( data=data, aes(count, workload, color=runtime)) +
geom_smooth( data=data, aes(count, workload, color=runtime), se=FALSE, method="loess") +
xlab("Iteration") +
ylab("Time (s)") +
ggtitle("Boot to workload", subtitle="First container") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
boot_boxplot <- ggplot() +
geom_boxplot( data=data, aes(runtime, workload, color=runtime), show.legend=FALSE) +
ylim(0, NA) +
ylab("Time (s)")
# convert the stats to a long format so we can more easily do a side-by-side barplot
longstats <- gather(stats, measure, value, workload_mean, quit_mean, inkernel_mean, tokernel_mean, total_mean)
bar_plot <- ggplot() +
geom_bar( data=longstats, aes(measure, value, fill=runtime), stat="identity", position="dodge", show.legend=FALSE) +
xlab("Phase") +
ylab("Time (s)") +
ggtitle("Lifecycle phase times", subtitle="Mean") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
master_plot = grid.arrange(
bar_plot,
line_plot,
stats_plot,
boot_boxplot,
nrow=2,
ncol=2 )

View File

@ -0,0 +1,142 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Analyse the runtime component memory footprint data.
library(ggplot2) # ability to plot nicely.
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
testnames=c(
"memory-footprint-inside-container"
)
data=c()
rstats=c()
rstats_rows=c()
rstats_cols=c()
# For each set of results
for (currentdir in resultdirs) {
dirstats=c()
# For the two different types of memory footprint measures
for (testname in testnames) {
# R seems not to like double path slashes '//' ?
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
# Import the data
fdata=fromJSON(fname)
fdata=fdata[[testname]]
# Copy the average result into a shorter, more accesible name
fdata$requested=fdata$Results$memrequest$Result
fdata$total=fdata$Results$memtotal$Result
fdata$free=fdata$Results$memfree$Result
fdata$avail=fdata$Results$memavailable$Result
# And lets work out what % we have 'lost' between the amount requested
# and the total the container actually sees.
fdata$lost=fdata$requested - fdata$total
fdata$pctotal= 100 * (fdata$lost/ fdata$requested)
fdata$Runtime=rep(datasetname, length(fdata$Result) )
# Store away the bits we need
data=rbind(data, data.frame(
Result=fdata$requested,
Type="requested",
Runtime=fdata$Runtime ))
data=rbind(data, data.frame(
Result=fdata$total,
Type="total",
Runtime=fdata$Runtime ))
data=rbind(data, data.frame(
Result=fdata$free,
Type="free",
Runtime=fdata$Runtime ))
data=rbind(data, data.frame(
Result=fdata$avail,
Type="avail",
Runtime=fdata$Runtime ))
data=rbind(data, data.frame(
Result=fdata$lost,
Type="lost",
Runtime=fdata$Runtime ))
data=rbind(data, data.frame(
Result=fdata$pctotal,
Type="% consumed",
Runtime=fdata$Runtime ))
# Store away some stats for the text table
dirstats=rbind(dirstats, round(fdata$requested, digits=2) )
dirstats=rbind(dirstats, round(fdata$total, digits=2) )
dirstats=rbind(dirstats, round(fdata$free, digits=2) )
dirstats=rbind(dirstats, round(fdata$avail, digits=2) )
dirstats=rbind(dirstats, round(fdata$lost, digits=2) )
dirstats=rbind(dirstats, round(fdata$pctotal, digits=2) )
}
rstats=cbind(rstats, dirstats)
rstats_cols=append(rstats_cols, datasetname)
}
rstats_rows=c("Requested", "Total", "Free", "Avail", "Consumed", "% Consumed")
unts=c("Kb", "Kb", "Kb", "Kb", "Kb", "%")
rstats=cbind(rstats, unts)
rstats_cols=append(rstats_cols, "Units")
# If we have only 2 sets of results, then we can do some more
# stats math for the text table
if (length(resultdirs) == 2) {
# This is a touch hard wired - but we *know* we only have two
# datasets...
diff=c()
# Just the first three entries - meaningless for the pctotal entry
for (n in 1:5) {
difference = (as.double(rstats[n,2]) - as.double(rstats[n,1]))
val = 100 * (difference/as.double(rstats[n,1]))
diff=rbind(diff, round(val, digits=2))
}
# Add a blank entry for the other entries
diff=rbind(diff, "")
rstats=cbind(rstats, diff)
rstats_cols=append(rstats_cols, "Diff %")
}
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
theme=ttheme(base_size=10),
rows=rstats_rows, cols=rstats_cols
))
bardata <- subset(data, Type %in% c("requested", "total", "free", "avail"))
# plot how samples varioed over 'time'
barplot <- ggplot() +
geom_bar(data=bardata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") +
xlab("Measure") +
ylab("Size (Kb)") +
ggtitle("In-container memory statistics") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
master_plot = grid.arrange(
barplot,
stats_plot,
nrow=2,
ncol=1 )

View File

@ -0,0 +1,121 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Analyse the runtime component memory footprint data.
library(ggplot2) # ability to plot nicely.
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
testnames=c(
"memory-footprint",
"memory-footprint-ksm"
)
resultsfilesshort=c(
"noKSM",
"KSM"
)
data=c()
rstats=c()
rstats_names=c()
# For each set of results
for (currentdir in resultdirs) {
count=1
dirstats=c()
# For the two different types of memory footprint measures
for (testname in testnames) {
# R seems not to like double path slashes '//' ?
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
datasetvariant=resultsfilesshort[count]
# Import the data
fdata=fromJSON(fname)
fdata=fdata[[testname]]
# Copy the average result into a shorter, more accesible name
fdata$Result=fdata$Results$average$Result
fdata$variant=rep(datasetvariant, length(fdata$Result) )
fdata$Runtime=rep(datasetname, length(fdata$Result) )
fdata$Count=seq_len(length(fdata$Result))
# Calculate some stats
fdata.mean = mean(fdata$Result)
fdata.min = min(fdata$Result)
fdata.max = max(fdata$Result)
fdata.sd = sd(fdata$Result)
fdata.cov = (fdata.sd / fdata.mean) * 100
# Store away the bits we need
data=rbind(data, data.frame(
Result=fdata$Result,
Count=fdata$Count,
Runtime=fdata$Runtime,
variant=fdata$variant ) )
# Store away some stats for the text table
dirstats[count]=round(fdata.mean, digits=2)
count = count + 1
}
rstats=rbind(rstats, dirstats)
rstats_names=rbind(rstats_names, datasetname)
}
rstats=cbind(rstats_names, rstats)
unts=rep("Kb", length(resultdirs))
# If we have only 2 sets of results, then we can do some more
# stats math for the text table
if (length(resultdirs) == 2) {
# This is a touch hard wired - but we *know* we only have two
# datasets...
diff=c("diff")
difference = (as.double(rstats[2,2]) - as.double(rstats[1,2]))
val = 100 * (difference/as.double(rstats[1,2]))
diff[2] = round(val, digits=2)
difference = (as.double(rstats[2,3]) - as.double(rstats[1,3]))
val = 100 * (difference/as.double(rstats[1,3]))
diff[3] = round(val, digits=2)
rstats=rbind(rstats, diff)
unts[3]="%"
}
rstats=cbind(rstats, unts)
# Set up the text table headers
colnames(rstats)=c("Results", resultsfilesshort, "Units")
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
theme=ttheme(base_size=10),
rows=NULL
))
# plot how samples varioed over 'time'
point_plot <- ggplot() +
geom_point( data=data, aes(Runtime, Result, color=variant), position=position_dodge(0.1)) +
xlab("Dataset") +
ylab("Size (Kb)") +
ggtitle("Average PSS footprint", subtitle="per container") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
master_plot = grid.arrange(
point_plot,
stats_plot,
nrow=1,
ncol=2 )

View File

@ -0,0 +1,132 @@
#!/usr/bin/env Rscript
# Copyright (c) 2018-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Analyse the runtime component memory footprint data.
library(ggplot2) # ability to plot nicely.
# So we can plot multiple graphs
library(gridExtra) # together.
suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable.
suppressMessages(library(jsonlite)) # to load the data.
testnames=c(
"cpu-information"
)
resultsfilesshort=c(
"CPU"
)
data=c()
rstats=c()
rstats_rows=c()
rstats_cols=c()
Gdenom = (1000.0 * 1000.0 * 1000.0)
# For each set of results
for (currentdir in resultdirs) {
dirstats=c()
# For the two different types of memory footprint measures
for (testname in testnames) {
# R seems not to like double path slashes '//' ?
fname=paste(inputdir, currentdir, testname, '.json', sep="")
if ( !file.exists(fname)) {
warning(paste("Skipping non-existent file: ", fname))
next
}
# Derive the name from the test result dirname
datasetname=basename(currentdir)
datasetvariant=resultsfilesshort[count]
# Import the data
fdata=fromJSON(fname)
fdata=fdata[[testname]]
# Copy the average result into a shorter, more accesible name
fdata$ips=fdata$Results$"instructions per cycle"$Result
fdata$Gcycles=fdata$Results$cycles$Result / Gdenom
fdata$Ginstructions=fdata$Results$instructions$Result / Gdenom
fdata$variant=rep(datasetvariant, length(fdata$Result) )
fdata$Runtime=rep(datasetname, length(fdata$Result) )
# Store away the bits we need
data=rbind(data, data.frame(
Result=fdata$ips,
Type="ips",
Runtime=fdata$Runtime,
variant=fdata$variant ) )
data=rbind(data, data.frame(
Result=fdata$Gcycles,
Type="Gcycles",
Runtime=fdata$Runtime,
variant=fdata$variant ) )
data=rbind(data, data.frame(
Result=fdata$Ginstructions,
Type="Ginstr",
Runtime=fdata$Runtime,
variant=fdata$variant ) )
# Store away some stats for the text table
dirstats=rbind(dirstats, round(fdata$ips, digits=2) )
dirstats=rbind(dirstats, round(fdata$Gcycles, digits=2) )
dirstats=rbind(dirstats, round(fdata$Ginstructions, digits=2) )
}
rstats=cbind(rstats, dirstats)
rstats_cols=append(rstats_cols, datasetname)
}
rstats_rows=c("IPS", "GCycles", "GInstr")
unts=c("Ins/Cyc", "G", "G")
rstats=cbind(rstats, unts)
rstats_cols=append(rstats_cols, "Units")
# If we have only 2 sets of results, then we can do some more
# stats math for the text table
if (length(resultdirs) == 2) {
# This is a touch hard wired - but we *know* we only have two
# datasets...
diff=c()
for (n in 1:3) {
difference = (as.double(rstats[n,2]) - as.double(rstats[n,1]))
val = 100 * (difference/as.double(rstats[n,1]))
diff=rbind(diff, round(val, digits=2))
}
rstats=cbind(rstats, diff)
rstats_cols=append(rstats_cols, "Diff %")
}
# Build us a text table of numerical results
stats_plot = suppressWarnings(ggtexttable(data.frame(rstats),
theme=ttheme(base_size=10),
rows=rstats_rows, cols=rstats_cols
))
# plot how samples varioed over 'time'
ipsdata <- subset(data, Type %in% c("ips"))
ips_plot <- ggplot() +
geom_bar(data=ipsdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") +
xlab("Measure") +
ylab("IPS") +
ggtitle("Instructions Per Cycle") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
cycdata <- subset(data, Type %in% c("Gcycles", "Ginstr"))
cycles_plot <- ggplot() +
geom_bar(data=cycdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge", show.legend=FALSE) +
xlab("Measure") +
ylab("Count (G)") +
ggtitle("Cycles and Instructions") +
ylim(0, NA) +
theme(axis.text.x=element_text(angle=90))
master_plot = grid.arrange(
ips_plot,
cycles_plot,
stats_plot,
nrow=2,
ncol=2 )