diff --git a/tests/metrics/README.md b/tests/metrics/README.md index 56af313187..9e6272b9c5 100644 --- a/tests/metrics/README.md +++ b/tests/metrics/README.md @@ -209,7 +209,3 @@ set if necessary. ## `checkmetrics` `checkmetrics` is a CLI tool to check a metrics CI results file. For further reference see the [`checkmetrics`](cmd/checkmetrics). - -## Report generator - -See the [report generator](report) documentation. diff --git a/tests/metrics/report/README.md b/tests/metrics/report/README.md deleted file mode 100644 index da586953d1..0000000000 --- a/tests/metrics/report/README.md +++ /dev/null @@ -1,64 +0,0 @@ -# Kata Containers metrics report generator - -The files within this directory can be used to generate a "metrics report" for Kata Containers. The -primary workflow consists of two stages: -1) Run the provided report metrics data gathering scripts on the system(s) you wish to analyze. -2) Run the provided report generation script to analyze the data and generate a report file. - -## Data gathering - -Data gathering is provided by the `grabdata.sh` script. When run, this script executes a set of -tests from the `tests/metrics` directory. The JSON results files will be placed into the -`tests/metrics/results` directory. Once the results are generated, create a suitably named -subdirectory of `tests/metrics/results`, and move the JSON files into it. Repeat this process if -you want to compare multiple sets of results. Note, the report generation scripts process all -subdirectories of `tests/metrics/results` when generating the report. - -> **Note:** By default, the `grabdata.sh` script tries to launch some moderately large containers -> (i.e. 8Gbyte RAM) and may fail to produce some results on a memory constrained system. - -You can restrict the subset of tests run by `grabdata.sh` via its commandline parameters: - -| Option | Description | -| ------ | ------------------------| -| -a | Run all tests (default) | -| -d | Run the density tests | -| -h | Print this help | -| -s | Run the storage tests | -| -t | Run the time tests | - -## Report generation - -Report generation is provided by the `makereport.sh` script. By default this script processes all -subdirectories of the `tests/metrics/results` directory to generate the report. To run in the -default mode, execute the following: - -``` -$ ./makereport.sh -``` - -The report generation tool uses [`Rmarkdown`](https://github.com/rstudio/rmarkdown), [R](https://www.r-project.org/about.html) and -[Pandoc](https://pandoc.org/) to produce a PDF report. To avoid the need for all users to set up a -working environment with all the necessary tooling, the `makereport.sh` script utilises a -`Dockerfile` with the environment pre-defined in order to produce the report. Thus, you need to -have Docker installed on your system in order to run the report generation. The resulting -`metrics_report.pdf` is generated into the `output` subdirectory of the `report` directory. - -## Debugging and development - -To aid in script development and debugging, the `makereport.sh` script offers a debug facility via -the `-d` command line option. Using this option will place you into a `bash` shell within the -running `Dockerfile` image used to generate the report, whilst also mapping your host side `R` -scripts from the `report_dockerfile` subdirectory into the container, thus facilitating a "live" -edit/reload/run development cycle. From there you can examine the Docker image environment, and -execute the generation scripts. E.g., to test the `scaling.R` script, you can execute: - -``` -$ makereport.sh -d -# R -> source('/inputdir/Env.R') -> source('/scripts/lifecycle-time.R') -``` - -You can then edit the `report_dockerfile/lifecycle-time.R` file on the host, and re-run -the `source('/scripts/lifecycle-time.R')` command inside the still running `R` container. diff --git a/tests/metrics/report/grabdata.sh b/tests/metrics/report/grabdata.sh deleted file mode 100755 index 3544339f8a..0000000000 --- a/tests/metrics/report/grabdata.sh +++ /dev/null @@ -1,168 +0,0 @@ -#!/bin/bash -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Run a set of the metrics tests to gather data to be used with the report -# generator. The general ideal is to have the tests configured to generate -# useful, meaninful and repeatable (stable, with minimised variance) results. -# If the tests have to be run more or longer to achieve that, then generally -# that is fine - this test is not intended to be quick, it is intended to -# be repeatable. - -# Note - no 'set -e' in this file - if one of the metrics tests fails -# then we wish to continue to try the rest. -# Finally at the end, in some situations, we explicitly exit with a -# failure code if necessary. - -SCRIPT_DIR=$(dirname "$(readlink -f "$0")") -source "${SCRIPT_DIR}/../lib/common.bash" -RESULTS_DIR=${SCRIPT_DIR}/../results - -# By default we run all the tests -RUN_ALL=1 - -help() { - usage=$(cat << EOF -Usage: $0 [-h] [options] - Description: - This script gathers a number of metrics for use in the - report generation script. Which tests are run can be - configured on the commandline. Specifically enabling - individual tests will disable the 'all' option, unless - 'all' is also specified last. - Options: - -a, Run all tests (default). - -d, Run the density tests. - -h, Print this help. - -s, Run the storage tests. - -t, Run the time tests. -EOF -) - echo "$usage" -} - -# Set up the initial state -init() { - metrics_onetime_init - - local OPTIND - while getopts "adhst" opt;do - case ${opt} in - a) - RUN_ALL=1 - ;; - d) - RUN_DENSITY=1 - RUN_ALL= - ;; - h) - help - exit 0; - ;; - s) - RUN_STORAGE=1 - RUN_ALL= - ;; - t) - RUN_TIME=1 - RUN_ALL= - ;; - ?) - # parse failure - help - die "Failed to parse arguments" - ;; - esac - done - shift $((OPTIND-1)) -} - -run_density_ksm() { - echo "Running KSM density tests" - - # Run the memory footprint test - the main test that - # KSM affects. Run for a sufficient number of containers - # (that gives us a fair view of how memory gets shared across - # containers), and a large enough timeout for KSM to settle. - # If KSM has not settled down by then, just take the measurement. - # 'auto' mode should detect when KSM has settled automatically. - bash density/memory_usage.sh 20 300 auto - - # Get a measure for the overhead we take from the container memory - bash density/memory_usage_inside_container.sh -} - -run_density() { - echo "Running non-KSM density tests" - - # Run the density tests - no KSM, so no need to wait for settle - # Set a token short timeout, and use enough containers to get a - # good average measurement. - bash density/memory_usage.sh 20 5 -} - -run_time() { - echo "Running time tests" - # Run the time tests - take time measures for an ubuntu image, over - # 100 'first and only container' launches. - # NOTE - whichever container you test here must support a full 'date' - # command - busybox based containers (including Alpine) will not work. - bash time/launch_times.sh -i public.ecr.aws/ubuntu/ubuntu:latest -n 100 -} - -run_storage() { - echo "Running storage tests" - - bash storage/blogbench.sh -} - - -# Execute metrics scripts -run() { - pushd "$SCRIPT_DIR/.." - - # If KSM is available on this platform, let's run any tests that are - # affected by having KSM on/off first, and then turn it off for the - # rest of the tests, as KSM may introduce some extra noise in the - # results by stealing CPU time for instance. - if [[ -f ${KSM_ENABLE_FILE} ]]; then - # No point enabling and disabling KSM if we have nothing to test. - if [ -n "$RUN_ALL" ] || [ -n "$RUN_DENSITY" ]; then - save_ksm_settings - trap restore_ksm_settings EXIT QUIT KILL - set_ksm_aggressive - - run_density_ksm - - # And now ensure KSM is turned off for the rest of the tests - disable_ksm - fi - else - echo "No KSM control file, skipping KSM tests" - fi - - if [ -n "$RUN_ALL" ] || [ -n "$RUN_TIME" ]; then - run_time - fi - - if [ -n "$RUN_ALL" ] || [ -n "$RUN_DENSITY" ]; then - run_density - fi - - if [ -n "$RUN_ALL" ] || [ -n "$RUN_STORAGE" ]; then - run_storage - fi - - popd -} - -finish() { - echo "Now please create a suitably descriptively named subdirectory in" - echo "$RESULTS_DIR and copy the .json results files into it before running" - echo "this script again." -} - -init "$@" -run -finish diff --git a/tests/metrics/report/makereport.sh b/tests/metrics/report/makereport.sh deleted file mode 100755 index 3926d7567f..0000000000 --- a/tests/metrics/report/makereport.sh +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/bash -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Take the data found in subdirectories of the metrics 'results' directory, -# and turn them into a PDF report. Use a Dockerfile containing all the tooling -# and scripts we need to do that. - -set -e - -SCRIPT_PATH=$(dirname "$(readlink -f "$0")") -source "${SCRIPT_PATH}/../lib/common.bash" - -IMAGE="${IMAGE:-metrics-report}" -DOCKERFILE="${SCRIPT_PATH}/report_dockerfile/Dockerfile" - -HOST_INPUT_DIR="${SCRIPT_PATH}/../results" -R_ENV_FILE="${HOST_INPUT_DIR}/Env.R" -HOST_OUTPUT_DIR="${SCRIPT_PATH}/output" - -GUEST_INPUT_DIR="/inputdir/" -GUEST_OUTPUT_DIR="/outputdir/" - -# If in debugging mode, we also map in the scripts dir so you can -# dynamically edit and re-load them at the R prompt -HOST_SCRIPT_DIR="${SCRIPT_PATH}/report_dockerfile" -GUEST_SCRIPT_DIR="/scripts/" - -setup() { - echo "Checking subdirectories" - check_subdir="$(ls -dx ${HOST_INPUT_DIR}/*/ 2> /dev/null | wc -l)" - if [ $check_subdir -eq 0 ]; then - die "No subdirs in [${HOST_INPUT_DIR}] to read results from." - fi - - echo "Checking Dockerfile" - check_dockerfiles_images "$IMAGE" "$DOCKERFILE" - - mkdir -p "$HOST_OUTPUT_DIR" && true - - echo "inputdir=\"${GUEST_INPUT_DIR}\"" > ${R_ENV_FILE} - echo "outputdir=\"${GUEST_OUTPUT_DIR}\"" >> ${R_ENV_FILE} - - # A bit of a hack to get an R syntax'd list of dirs to process - # Also, need it as not host-side dir path - so short relative names - resultdirs="$(cd ${HOST_INPUT_DIR}; ls -dx */)" - resultdirslist=$(echo ${resultdirs} | sed 's/ \+/", "/g') - echo "resultdirs=c(" >> ${R_ENV_FILE} - echo " \"${resultdirslist}\"" >> ${R_ENV_FILE} - echo ")" >> ${R_ENV_FILE} -} - -run() { - docker run -ti --rm -v ${HOST_INPUT_DIR}:${GUEST_INPUT_DIR} -v ${HOST_OUTPUT_DIR}:${GUEST_OUTPUT_DIR} ${extra_volumes} ${IMAGE} ${extra_command} - ls -la ${HOST_OUTPUT_DIR}/* -} - -help() { - usage=$(cat << EOF -Usage: $0 [-h] [options] - Description: - This script generates a metrics report document - from the results directory one level up in the - directory tree (../results). - Options: - -d, Run in debug (interactive) mode - -h, Print this help -EOF -) - echo "$usage" -} - -main() { - - local OPTIND - while getopts "d" opt;do - case ${opt} in - d) - # In debug mode, run a shell instead of the default report generation - extra_command="bash" - extra_volumes="-v ${HOST_SCRIPT_DIR}:${GUEST_SCRIPT_DIR}" - ;; - ?) - # parse failure - help - die "Failed to parse arguments" - ;; - esac - done - shift $((OPTIND-1)) - - setup - run -} - -main "$@" diff --git a/tests/metrics/report/report_dockerfile/Dockerfile b/tests/metrics/report/report_dockerfile/Dockerfile deleted file mode 100644 index 74c5026524..0000000000 --- a/tests/metrics/report/report_dockerfile/Dockerfile +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Set up an Ubuntu image with the components needed to generate a -# metrics report. That includes: -# - R -# - The R 'tidyverse' -# - pandoc -# - The report generation R files and helper scripts - -# Start with the base rocker tidyverse. -# We would have used the 'verse' base, that already has some of the docs processing -# installed, but I could not figure out how to add in the extra bits we needed to -# the lite tex version is uses. -# Here we specify a tag for base image instead of using latest to let it free from -# the risk from the update of latest base image. -FROM rocker/tidyverse:3.6.0 - -# Version of the Dockerfile -LABEL DOCKERFILE_VERSION="1.2" - -# Without this some of the package installs stop to try and ask questions... -ENV DEBIAN_FRONTEND=noninteractive - -# Install the extra doc processing parts we need for our Rmarkdown PDF flow. -RUN apt-get update -qq && \ - apt-get install -y --no-install-recommends \ - texlive-latex-base \ - texlive-fonts-recommended \ - latex-xcolor && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists - -# Install the extra R packages we need. -RUN install2.r --error --deps TRUE \ - gridExtra \ - ggpubr - -# Pull in our actual worker scripts -COPY . /scripts - -# By default generate the report -CMD ["/scripts/genreport.sh"] diff --git a/tests/metrics/report/report_dockerfile/dut-details.R b/tests/metrics/report/report_dockerfile/dut-details.R deleted file mode 100644 index 3b16c8c2b7..0000000000 --- a/tests/metrics/report/report_dockerfile/dut-details.R +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/env Rscript -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Display details for the 'Device Under Test', for all data sets being processed. - -suppressMessages(suppressWarnings(library(tidyr))) # for gather(). -library(tibble) -suppressMessages(suppressWarnings(library(plyr))) # rbind.fill - # So we can plot multiple graphs -library(gridExtra) # together. -suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. -suppressMessages(library(jsonlite)) # to load the data. - -# A list of all the known results files we might find the information inside. -resultsfiles=c( - "boot-times.json", - "memory-footprint.json", - "memory-footprint-ksm.json", - "memory-footprint-inside-container.json" - ) - -data=c() -stats=c() -stats_names=c() - -# For each set of results -for (currentdir in resultdirs) { - count=1 - dirstats=c() - for (resultsfile in resultsfiles) { - fname=paste(inputdir, currentdir, resultsfile, sep="/") - if ( !file.exists(fname)) { - #warning(paste("Skipping non-existent file: ", fname)) - next - } - - # Derive the name from the test result dirname - datasetname=basename(currentdir) - - # Import the data - fdata=fromJSON(fname) - - if (length(fdata$'kata-env') != 0 ) { - # We have kata-runtime data - dirstats=tibble("Run Ver"=as.character(fdata$'kata-env'$Runtime$Version$Semver)) - dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'kata-env'$Runtime$Version$Commit)) - - pver=as.character(fdata$'kata-env'$Proxy$Version) - pver=sub("^[[:alpha:][:blank:]-]*", "", pver) - # uncomment if you want to drop the commit sha as well - #pver=sub("([[:digit:].]*).*", "\\1", pver) - dirstats=cbind(dirstats, "Proxy Ver"=pver) - - # Trim the shim string - sver=as.character(fdata$'kata-env'$Shim$Version) - sver=sub("^[[:alpha:][:blank:]-]*", "", sver) - # uncomment if you want to drop the commit sha as well - #sver=sub("([[:digit:].]*).*", "\\1", sver) - dirstats=cbind(dirstats, "Shim Ver"=sver) - - # Default QEMU ver string is far too long and noisy - trim. - hver=as.character(fdata$'kata-env'$Hypervisor$Version) - hver=sub("^[[:alpha:][:blank:]]*", "", hver) - hver=sub("([[:digit:].]*).*", "\\1", hver) - dirstats=cbind(dirstats, "Hyper Ver"=hver) - - iver=as.character(fdata$'kata-env'$Image$Path) - iver=sub("^[[:alpha:]/-]*", "", iver) - dirstats=cbind(dirstats, "Image Ver"=iver) - - kver=as.character(fdata$'kata-env'$Kernel$Path) - kver=sub("^[[:alpha:]/-]*", "", kver) - dirstats=cbind(dirstats, "Guest Krnl"=kver) - - dirstats=cbind(dirstats, "Host arch"=as.character(fdata$'kata-env'$Host$Architecture)) - dirstats=cbind(dirstats, "Host Distro"=as.character(fdata$'kata-env'$Host$Distro$Name)) - dirstats=cbind(dirstats, "Host DistVer"=as.character(fdata$'kata-env'$Host$Distro$Version)) - dirstats=cbind(dirstats, "Host Model"=as.character(fdata$'kata-env'$Host$CPU$Model)) - dirstats=cbind(dirstats, "Host Krnl"=as.character(fdata$'kata-env'$Host$Kernel)) - dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime)) - - break - } else { - if (length(fdata$'runc-env') != 0 ) { - dirstats=tibble("Run Ver"=as.character(fdata$'runc-env'$Version$Semver)) - dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'runc-env'$Version$Commit)) - dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime)) - } else { - dirstats=tibble("runtime"="Unknown") - } - break - } - } - - if ( length(dirstats) == 0 ) { - warning(paste("No valid data found for directory ", currentdir)) - } - - # use plyr rbind.fill so we can combine disparate version info frames - stats=rbind.fill(stats, dirstats) - stats_names=rbind(stats_names, datasetname) -} - -rownames(stats) = stats_names - -# Rotate the tibble so we get data dirs as the columns -spun_stats = as_tibble(cbind(What=names(stats), t(stats))) - -# Build us a text table of numerical results -stats_plot = suppressWarnings(ggtexttable(data.frame(spun_stats, check.names=FALSE), - theme=ttheme(base_size=6), - rows=NULL - )) - -# It may seem odd doing a grid of 1x1, but it should ensure we get a uniform format and -# layout to match the other charts and tables in the report. -master_plot = grid.arrange( - stats_plot, - nrow=1, - ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/fio-reads.R b/tests/metrics/report/report_dockerfile/fio-reads.R deleted file mode 100644 index 5f3f1c212a..0000000000 --- a/tests/metrics/report/report_dockerfile/fio-reads.R +++ /dev/null @@ -1,269 +0,0 @@ -#!/usr/bin/env Rscript -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Display details for `fio` random read storage IO tests. - - -library(ggplot2) # ability to plot nicely -library(gridExtra) # So we can plot multiple graphs together -suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable -suppressMessages(library(jsonlite)) # to load the data -suppressMessages(suppressWarnings(library(tidyr))) # for gather -library(tibble) - -testnames=c( - "fio-randread-128", - "fio-randread-256", - "fio-randread-512", - "fio-randread-1k", - "fio-randread-2k", - "fio-randread-4k", - "fio-randread-8k", - "fio-randread-16k", - "fio-randread-32k", - "fio-randread-64k" - ) - -data2=c() -all_ldata=c() -all_ldata2=c() -stats=c() -rstats=c() -rstats_names=c() - -# Where to store up the stats for the tables -read_bw_stats=c() -read_iops_stats=c() -read_lat95_stats=c() -read_lat99_stats=c() - -# For each set of results -for (currentdir in resultdirs) { - bw_dirstats=c() - iops_dirstats=c() - lat95_dirstats=c() - lat99_dirstats=c() - # Derive the name from the test result dirname - datasetname=basename(currentdir) - - for (testname in testnames) { - fname=paste(inputdir, currentdir, testname, '.json', sep="") - if ( !file.exists(fname)) { - #warning(paste("Skipping non-existent file: ", fname)) - next - } - - # Import the data - fdata=fromJSON(fname) - # De-ref the test named unique data - fdata=fdata[[testname]] - - blocksize=fdata$Raw$'global options'$bs - - # Extract the latency data - it comes as a table of percentiles, so - # we have to do a little work... - clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$read$clat_ns$percentile) - - # Generate a clat data set with 'clean' percentile numbers so - # we can sensibly plot it later on. - clat2=clat - colnames(clat2)<-sub("clat_ns.", "", colnames(clat2)) - colnames(clat2)<-sub("0000", "", colnames(clat2)) - ldata2=gather(clat2) - colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile" - colnames(ldata2)[colnames(ldata2)=="value"] <- "ms" - ldata2$ms=ldata2$ms/1000000 #ns->ms - ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile))) - ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile))) - - # Pull the 95 and 99 percentile numbers for the boxplot - # Plotting all values for all runtimes and blocksizes is just way too - # noisy to make a meaninful picture, so we use this subset. - # Our values fall more in the range of ms... - pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000) - pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile))) - pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000) - pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile))) - ldata=rbind(pc95data, pc99data) - ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile))) - - # We want total bandwidth, so that is the sum of the bandwidths - # from all the read 'jobs'. - mdata=data.frame(read_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$read$bw)/1024)) - mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$read$iops))) - mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "read_bw_mps"]) )) - mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "read_bw_mps"]) )) - - # Extract the stats tables - bw_dirstats=rbind(bw_dirstats, round(mdata$read_bw_mps, digits=1)) - # Rowname hack to get the blocksize recorded - rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize - - iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1)) - rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize - - # And do the 95 and 99 percentiles as tables as well - lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1)) - rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize - lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1)) - rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize - - # Collect up as sets across all files and runtimes. - data2=rbind(data2, mdata) - all_ldata=rbind(all_ldata, ldata) - all_ldata2=rbind(all_ldata2, ldata2) - } - - # Collect up for each dir we process into a column - read_bw_stats=cbind(read_bw_stats, bw_dirstats) - colnames(read_bw_stats)[ncol(read_bw_stats)]=datasetname - - read_iops_stats=cbind(read_iops_stats, iops_dirstats) - colnames(read_iops_stats)[ncol(read_iops_stats)]=datasetname - - read_lat95_stats=cbind(read_lat95_stats, lat95_dirstats) - colnames(read_lat95_stats)[ncol(read_lat95_stats)]=datasetname - read_lat99_stats=cbind(read_lat99_stats, lat99_dirstats) - colnames(read_lat99_stats)[ncol(read_lat99_stats)]=datasetname -} - -# To get a nice looking table, we need to extract the rownames into their -# own column -read_bw_stats=cbind(Bandwidth=rownames(read_bw_stats), read_bw_stats) -read_bw_stats=cbind(read_bw_stats, Units=rep("MB/s", nrow(read_bw_stats))) - -read_iops_stats=cbind(IOPS=rownames(read_iops_stats), read_iops_stats) -read_iops_stats=cbind(read_iops_stats, Units=rep("IOP/s", nrow(read_iops_stats))) - -read_lat95_stats=cbind('lat 95pc'=rownames(read_lat95_stats), read_lat95_stats) -read_lat95_stats=cbind(read_lat95_stats, Units=rep("ms", nrow(read_lat95_stats))) -read_lat99_stats=cbind('lat 99pc'=rownames(read_lat99_stats), read_lat99_stats) -read_lat99_stats=cbind(read_lat99_stats, Units=rep("ms", nrow(read_lat99_stats))) - -# Bandwidth line plot -read_bw_line_plot <- ggplot() + - geom_line( data=data2, aes(blocksize, read_bw_mps, group=runtime, color=runtime)) + - ylim(0, NA) + - ggtitle("Random Read total bandwidth") + - xlab("Blocksize") + - ylab("Bandwidth (MiB/s)") + - theme( - axis.text.x=element_text(angle=90), - legend.position=c(0.35,0.8), - legend.title=element_text(size=5), - legend.text=element_text(size=5), - legend.background = element_rect(fill=alpha('blue', 0.2)) - ) - -# IOPS line plot -read_iops_line_plot <- ggplot() + - geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) + - ylim(0, NA) + - ggtitle("Random Read total IOPS") + - xlab("Blocksize") + - ylab("IOPS") + - theme( - axis.text.x=element_text(angle=90), - legend.position=c(0.35,0.8), - legend.title=element_text(size=5), - legend.text=element_text(size=5), - legend.background = element_rect(fill=alpha('blue', 0.2)) - ) - -# 95 and 99 percentile box plot -read_clat_box_plot <- ggplot() + - geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) + - stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") + - ylim(0, NA) + - ggtitle("Random Read completion latency", subtitle="95&99 percentiles, boxplot over jobs") + - xlab("Blocksize") + - ylab("Latency (ms)") + - theme(axis.text.x=element_text(angle=90)) + - # Use the 'paired' colour matrix as we are setting these up as pairs of - # 95 and 99 percentiles, and it is much easier to visually group those to - # each runtime if we use this colourmap. - scale_colour_brewer(palette="Paired") -# it would be nice to use the same legend theme as the other plots on this -# page, but because of the number of entries it tends to flow off the picture. -# theme( -# axis.text.x=element_text(angle=90), -# legend.position=c(0.35,0.8), -# legend.title=element_text(size=5), -# legend.text=element_text(size=5), -# legend.background = element_rect(fill=alpha('blue', 0.2)) -# ) - -# As the boxplot is actually quite hard to interpret, also show a linegraph -# of all the percentiles for a single blocksize. -which_blocksize='4k' -clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ") -single_blocksize=subset(all_ldata2, blocksize==which_blocksize) -clat_line=aggregate( - single_blocksize$ms, - by=list( - percentile=single_blocksize$percentile, - blocksize=single_blocksize$blocksize, - runtime=single_blocksize$runtime - ), - FUN=mean -) - -clat_line$percentile=as.numeric(clat_line$percentile) - -read_clat_line_plot <- ggplot() + - geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) + - ylim(0, NA) + - ggtitle("Random Read completion latency percentiles", subtitle=clat_line_subtitle) + - xlab("Percentile") + - ylab("Time (ms)") + - theme( - axis.text.x=element_text(angle=90), - legend.position=c(0.35,0.8), - legend.title=element_text(size=5), - legend.text=element_text(size=5), - legend.background = element_rect(fill=alpha('blue', 0.2)) - ) - -# Output the pretty pictures -graphics_plot = grid.arrange( - read_bw_line_plot, - read_iops_line_plot, - read_clat_box_plot, - read_clat_line_plot, - nrow=2, - ncol=2 ) - -# A bit of an odd tweak to force a pagebreak between the pictures and -# the tables. This only works because we have a `results='asis'` in the Rmd -# R fragment. -cat("\n\n\\pagebreak\n") - -read_bw_stats_plot = suppressWarnings(ggtexttable(read_bw_stats, - theme=ttheme(base_size=10), - rows=NULL - )) - -read_iops_stats_plot = suppressWarnings(ggtexttable(read_iops_stats, - theme=ttheme(base_size=10), - rows=NULL - )) - -read_lat95_stats_plot = suppressWarnings(ggtexttable(read_lat95_stats, - theme=ttheme(base_size=10), - rows=NULL - )) -read_lat99_stats_plot = suppressWarnings(ggtexttable(read_lat99_stats, - theme=ttheme(base_size=10), - rows=NULL - )) - -# and then the statistics tables -stats_plot = grid.arrange( - read_bw_stats_plot, - read_iops_stats_plot, - read_lat95_stats_plot, - read_lat99_stats_plot, - nrow=4, - ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/fio-writes.R b/tests/metrics/report/report_dockerfile/fio-writes.R deleted file mode 100644 index 5fa2231766..0000000000 --- a/tests/metrics/report/report_dockerfile/fio-writes.R +++ /dev/null @@ -1,260 +0,0 @@ -#!/usr/bin/env Rscript -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Display details for 'fio' random writes storage IO tests. - - -library(ggplot2) # ability to plot nicely -library(gridExtra) # So we can plot multiple graphs together -suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable -suppressMessages(library(jsonlite)) # to load the data -suppressMessages(suppressWarnings(library(tidyr))) # for gather -library(tibble) - -testnames=c( - "fio-randwrite-128", - "fio-randwrite-256", - "fio-randwrite-512", - "fio-randwrite-1k", - "fio-randwrite-2k", - "fio-randwrite-4k", - "fio-randwrite-8k", - "fio-randwrite-16k", - "fio-randwrite-32k", - "fio-randwrite-64k" - ) - -data2=c() -all_ldata=c() -all_ldata2=c() -stats=c() -rstats=c() -rstats_names=c() - - -# Where to store up the stats for the tables -write_bw_stats=c() -write_iops_stats=c() -write_lat95_stats=c() -write_lat99_stats=c() - -# For each set of results -for (currentdir in resultdirs) { - bw_dirstats=c() - iops_dirstats=c() - lat95_dirstats=c() - lat99_dirstats=c() - # Derive the name from the test result dirname - datasetname=basename(currentdir) - - for (testname in testnames) { - fname=paste(inputdir, currentdir, testname, '.json', sep="") - if ( !file.exists(fname)) { - #warning(paste("Skipping non-existent file: ", fname)) - next - } - - # Import the data - fdata=fromJSON(fname) - # De-nest the test specific named data - fdata=fdata[[testname]] - - blocksize=fdata$Raw$'global options'$bs - - # Extract the latency data - it comes as a table of percentiles, so - # we have to do a little work... - clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$write$clat_ns$percentile) - - # Generate a clat data set with 'clean' percentile numbers so - # we can sensibly plot it later on. - clat2=clat - colnames(clat2)<-sub("clat_ns.", "", colnames(clat2)) - colnames(clat2)<-sub("0000", "", colnames(clat2)) - ldata2=gather(clat2) - colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile" - colnames(ldata2)[colnames(ldata2)=="value"] <- "ms" - ldata2$ms=ldata2$ms/1000000 #ns->ms - ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile))) - ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile))) - - # Pull the 95 and 99 percentiles for the boxplot diagram. - # Our values fall more in the range of ms... - pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000) - pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile))) - pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000) - pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile))) - ldata=rbind(pc95data, pc99data) - ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile))) - - # We want total bandwidth, so that is the sum of the bandwidths - # from all the write 'jobs'. - mdata=data.frame(write_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$write$bw)/1024)) - mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$write$iops))) - mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "write_bw_mps"]) )) - mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "write_bw_mps"]) )) - - # Extract the stats tables - bw_dirstats=rbind(bw_dirstats, round(mdata$write_bw_mps, digits=1)) - # Rowname hack to get the blocksize recorded - rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize - - iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1)) - rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize - - # And do the 95 and 99 percentiles as tables as well - lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1)) - rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize - lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1)) - rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize - - # Store away as single sets - data2=rbind(data2, mdata) - all_ldata=rbind(all_ldata, ldata) - all_ldata2=rbind(all_ldata2, ldata2) - } - - # Collect up for each dir we process into a column - write_bw_stats=cbind(write_bw_stats, bw_dirstats) - colnames(write_bw_stats)[ncol(write_bw_stats)]=datasetname - - write_iops_stats=cbind(write_iops_stats, iops_dirstats) - colnames(write_iops_stats)[ncol(write_iops_stats)]=datasetname - - write_lat95_stats=cbind(write_lat95_stats, lat95_dirstats) - colnames(write_lat95_stats)[ncol(write_lat95_stats)]=datasetname - write_lat99_stats=cbind(write_lat99_stats, lat99_dirstats) - colnames(write_lat99_stats)[ncol(write_lat99_stats)]=datasetname -} - -# To get a nice looking table, we need to extract the rownames into their -# own column -write_bw_stats=cbind(Bandwidth=rownames(write_bw_stats), write_bw_stats) -write_bw_stats=cbind(write_bw_stats, Units=rep("MB/s", nrow(write_bw_stats))) - -write_iops_stats=cbind(IOPS=rownames(write_iops_stats), write_iops_stats) -write_iops_stats=cbind(write_iops_stats, Units=rep("IOP/s", nrow(write_iops_stats))) - -write_lat95_stats=cbind('lat 95pc'=rownames(write_lat95_stats), write_lat95_stats) -write_lat95_stats=cbind(write_lat95_stats, Units=rep("ms", nrow(write_lat95_stats))) -write_lat99_stats=cbind('lat 99pc'=rownames(write_lat99_stats), write_lat99_stats) -write_lat99_stats=cbind(write_lat99_stats, Units=rep("ms", nrow(write_lat99_stats))) - -# lineplot of total bandwidth across blocksizes. -write_bw_line_plot <- ggplot() + - geom_line( data=data2, aes(blocksize, write_bw_mps, group=runtime, color=runtime)) + - ylim(0, NA) + - ggtitle("Random Write total bandwidth") + - xlab("Blocksize") + - ylab("Bandwidth (MiB/s)") + - theme( - axis.text.x=element_text(angle=90), - legend.position=c(0.35,0.8), - legend.title=element_text(size=5), - legend.text=element_text(size=5), - legend.background = element_rect(fill=alpha('blue', 0.2)) - ) - -# lineplot of IOPS across blocksizes -write_iops_line_plot <- ggplot() + - geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) + - ylim(0, NA) + - ggtitle("Random Write total IOPS") + - xlab("Blocksize") + - ylab("IOPS") + - theme( - axis.text.x=element_text(angle=90), - legend.position=c(0.35,0.8), - legend.title=element_text(size=5), - legend.text=element_text(size=5), - legend.background = element_rect(fill=alpha('blue', 0.2)) - ) - -# boxplot of 95 and 99 percentiles covering the parallel jobs, shown across -# the blocksizes. -write_clat_box_plot <- ggplot() + - geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) + - stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") + - ylim(0, NA) + - ggtitle("Random Write completion latency", subtitle="95&99 Percentiles, boxplot across jobs") + - xlab("Blocksize") + - ylab("Latency (ms)") + - theme(axis.text.x=element_text(angle=90)) + - # Use the 'paired' colour matrix as we are setting these up as pairs of - # 95 and 99 percentiles, and it is much easier to visually group those to - # each runtime if we use this colourmap. - scale_colour_brewer(palette="Paired") - - -# completion latency line plot across the percentiles, for a specific blocksize only -# as otherwise the graph would be far too noisy. -which_blocksize='4k' -clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ") -single_blocksize=subset(all_ldata2, blocksize==which_blocksize) -clat_line=aggregate( - single_blocksize$ms, - by=list( - percentile=single_blocksize$percentile, - blocksize=single_blocksize$blocksize, - runtime=single_blocksize$runtime - ), - FUN=mean -) - -clat_line$percentile=as.numeric(clat_line$percentile) - -write_clat_line_plot <- ggplot() + - geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) + - ylim(0, NA) + - ggtitle("Random Write completion latency percentiles", subtitle=clat_line_subtitle) + - xlab("Percentile") + - ylab("Time (ms)") + - theme( - axis.text.x=element_text(angle=90), - legend.position=c(0.35,0.8), - legend.title=element_text(size=5), - legend.text=element_text(size=5), - legend.background = element_rect(fill=alpha('blue', 0.2)) - ) - -master_plot = grid.arrange( - write_bw_line_plot, - write_iops_line_plot, - write_clat_box_plot, - write_clat_line_plot, - nrow=2, - ncol=2 ) - -# A bit of an odd tweak to force a pagebreak between the pictures and -# the tables. This only works because we have a `results='asis'` in the Rmd -# R fragment. -cat("\n\n\\pagebreak\n") - -write_bw_stats_plot = suppressWarnings(ggtexttable(write_bw_stats, - theme=ttheme(base_size=10), - rows=NULL - )) - -write_iops_stats_plot = suppressWarnings(ggtexttable(write_iops_stats, - theme=ttheme(base_size=10), - rows=NULL - )) - -write_lat95_stats_plot = suppressWarnings(ggtexttable(write_lat95_stats, - theme=ttheme(base_size=10), - rows=NULL - )) -write_lat99_stats_plot = suppressWarnings(ggtexttable(write_lat99_stats, - theme=ttheme(base_size=10), - rows=NULL - )) - -# and then the statistics tables -stats_plot = grid.arrange( - write_bw_stats_plot, - write_iops_stats_plot, - write_lat95_stats_plot, - write_lat99_stats_plot, - nrow=4, - ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/footprint-density.R b/tests/metrics/report/report_dockerfile/footprint-density.R deleted file mode 100644 index 3539dc9cc9..0000000000 --- a/tests/metrics/report/report_dockerfile/footprint-density.R +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env Rscript -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Show system memory reduction, and hence container 'density', by analysing the -# scaling footprint data results and the 'system free' memory. - -library(ggplot2) # ability to plot nicely. - # So we can plot multiple graphs -library(gridExtra) # together. -suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. -suppressMessages(library(jsonlite)) # to load the data. - -testnames=c( - paste("footprint-busybox.*", test_name_extra, sep=""), - paste("footprint-mysql.*", test_name_extra, sep=""), - paste("footprint-elasticsearch.*", test_name_extra, sep="") -) - -data=c() -stats=c() -rstats=c() -rstats_names=c() - -for (currentdir in resultdirs) { - count=1 - dirstats=c() - for (testname in testnames) { - matchdir=paste(inputdir, currentdir, sep="") - matchfile=paste(testname, '\\.json', sep="") - files=list.files(matchdir, pattern=matchfile) - if ( length(files) == 0 ) { - #warning(paste("Pattern [", matchdir, "/", matchfile, "] matched nothing")) - } - for (ffound in files) { - fname=paste(inputdir, currentdir, ffound, sep="") - if ( !file.exists(fname)) { - warning(paste("Skipping non-existent file: ", fname)) - next - } - - # Derive the name from the test result dirname - datasetname=basename(currentdir) - - # Import the data - fdata=fromJSON(fname) - # De-nest the test name specific data - shortname=substr(ffound, 1, nchar(ffound)-nchar(".json")) - fdata=fdata[[shortname]] - - payload=fdata$Config$payload - testname=paste(datasetname, payload) - - cdata=data.frame(avail_mb=as.numeric(fdata$Results$system$avail)/(1024*1024)) - cdata=cbind(cdata, avail_decr=as.numeric(fdata$Results$system$avail_decr)) - cdata=cbind(cdata, count=seq_len(length(cdata[, "avail_mb"]))) - cdata=cbind(cdata, testname=rep(testname, length(cdata[, "avail_mb"]) )) - cdata=cbind(cdata, payload=rep(payload, length(cdata[, "avail_mb"]) )) - cdata=cbind(cdata, dataset=rep(datasetname, length(cdata[, "avail_mb"]) )) - - # Gather our statistics - sdata=data.frame(num_containers=length(cdata[, "avail_mb"])) - # Pick out the last avail_decr value - which in theory should be - # the most we have consumed... - sdata=cbind(sdata, mem_consumed=cdata[, "avail_decr"][length(cdata[, "avail_decr"])]) - sdata=cbind(sdata, avg_bytes_per_c=sdata$mem_consumed / sdata$num_containers) - sdata=cbind(sdata, runtime=testname) - - # Store away as a single set - data=rbind(data, cdata) - stats=rbind(stats, sdata) - - s = c( - "Test"=testname, - "n"=sdata$num_containers, - "size"=(sdata$mem_consumed) / 1024, - "kb/n"=round((sdata$mem_consumed / sdata$num_containers) / 1024, digits=1), - "n/Gb"= round((1*1024*1024*1024) / (sdata$mem_consumed / sdata$num_containers), digits=1) - ) - - rstats=rbind(rstats, s) - count = count + 1 - } - } -} - -# Set up the text table headers -colnames(rstats)=c("Test", "n", "Tot_Kb", "avg_Kb", "n_per_Gb") - -# Build us a text table of numerical results -stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), - theme=ttheme(base_size=10), - rows=NULL - )) - -# plot how samples varioed over 'time' -line_plot <- ggplot() + - geom_point( data=data, aes(count, avail_mb, group=testname, color=payload, shape=dataset)) + - geom_line( data=data, aes(count, avail_mb, group=testname, color=payload)) + - xlab("Containers") + - ylab("System Avail (Mb)") + - ggtitle("System Memory free") + - ylim(0, NA) + - theme(axis.text.x=element_text(angle=90)) - -master_plot = grid.arrange( - line_plot, - stats_plot, - nrow=2, - ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/genreport.sh b/tests/metrics/report/report_dockerfile/genreport.sh deleted file mode 100644 index 5689c93d0f..0000000000 --- a/tests/metrics/report/report_dockerfile/genreport.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -REPORTNAME="metrics_report.pdf" - -cd scripts - -Rscript --slave -e "library(knitr);knit('pdf.Rmd')" -Rscript --slave -e "library(knitr);pandoc('pdf.md', format='latex')" - -Rscript --slave -e "library(knitr);knit('html.Rmd')" -Rscript --slave -e "library(knitr);pandoc('html.md', format='html')" - -cp /scripts/pdf.pdf /outputdir/${REPORTNAME} -cp /scripts/figure/*.png /outputdir/ -echo "PNGs of graphs and tables can be found in the output directory." -echo "The report, named ${REPORTNAME}, can be found in the output directory" diff --git a/tests/metrics/report/report_dockerfile/html.Rmd b/tests/metrics/report/report_dockerfile/html.Rmd deleted file mode 100644 index 86229f335c..0000000000 --- a/tests/metrics/report/report_dockerfile/html.Rmd +++ /dev/null @@ -1,23 +0,0 @@ ---- -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -title: "Kata Containers metrics report" -author: "Auto generated" -date: "`r format(Sys.time(), '%d %B, %Y')`" -output: - html_document: -urlcolor: blue ---- - -```{r setup, include=FALSE} -#Set these opts to get pdf images which fit into beamer slides better -opts_chunk$set(dev = 'png') -# Pick up any env set by the invoking script, such as the root dir of the -# results data tree -source("/inputdir/Env.R") -``` - -```{r child = 'metrics_report.Rmd'} -``` diff --git a/tests/metrics/report/report_dockerfile/lifecycle-time.R b/tests/metrics/report/report_dockerfile/lifecycle-time.R deleted file mode 100644 index 4969f23c97..0000000000 --- a/tests/metrics/report/report_dockerfile/lifecycle-time.R +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env Rscript -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Display how long the various phases of a container lifecycle (run, execute, die etc. -# take. - -library(ggplot2) # ability to plot nicely. -suppressMessages(suppressWarnings(library(tidyr))) # for gather(). - # So we can plot multiple graphs -library(gridExtra) # together. -suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. -suppressMessages(library(jsonlite)) # to load the data. - -testnames=c( - "boot-times" -) - -data=c() -stats=c() -rstats=c() -rstats_names=c() - -# For each set of results -for (currentdir in resultdirs) { - count=1 - dirstats=c() - for (testname in testnames) { - fname=paste(inputdir, currentdir, testname, '.json', sep="") - if ( !file.exists(fname)) { - warning(paste("Skipping non-existent file: ", fname)) - next - } - - # Derive the name from the test result dirname - datasetname=basename(currentdir) - - # Import the data - fdata=fromJSON(fname) - # De-nest the test specific name - fdata=fdata[[testname]] - - cdata=data.frame(workload=as.numeric(fdata$Results$'to-workload'$Result)) - cdata=cbind(cdata, quit=as.numeric(fdata$Results$'to-quit'$Result)) - - cdata=cbind(cdata, tokernel=as.numeric(fdata$Results$'to-kernel'$Result)) - cdata=cbind(cdata, inkernel=as.numeric(fdata$Results$'in-kernel'$Result)) - cdata=cbind(cdata, total=as.numeric(fdata$Results$'total'$Result)) - - cdata=cbind(cdata, count=seq_len(length(cdata[,"workload"]))) - cdata=cbind(cdata, runtime=rep(datasetname, length(cdata[, "workload"]) )) - - # Calculate some stats for total time - sdata=data.frame(workload_mean=mean(cdata$workload)) - sdata=cbind(sdata, workload_min=min(cdata$workload)) - sdata=cbind(sdata, workload_max=max(cdata$workload)) - sdata=cbind(sdata, workload_sd=sd(cdata$workload)) - sdata=cbind(sdata, workload_cov=((sdata$workload_sd / sdata$workload_mean) * 100)) - sdata=cbind(sdata, runtime=datasetname) - - sdata=cbind(sdata, quit_mean = mean(cdata$quit)) - sdata=cbind(sdata, quit_min = min(cdata$quit)) - sdata=cbind(sdata, quit_max = max(cdata$quit)) - sdata=cbind(sdata, quit_sd = sd(cdata$quit)) - sdata=cbind(sdata, quit_cov = (sdata$quit_sd / sdata$quit_mean) * 100) - - sdata=cbind(sdata, tokernel_mean = mean(cdata$tokernel)) - sdata=cbind(sdata, inkernel_mean = mean(cdata$inkernel)) - sdata=cbind(sdata, total_mean = mean(cdata$total)) - - # Store away as a single set - data=rbind(data, cdata) - stats=rbind(stats, sdata) - - # Store away some stats for the text table - dirstats[count]=round(sdata$tokernel_mean, digits=2) - count = count + 1 - dirstats[count]=round(sdata$inkernel_mean, digits=2) - count = count + 1 - dirstats[count]=round(sdata$workload_mean, digits=2) - count = count + 1 - dirstats[count]=round(sdata$quit_mean, digits=2) - count = count + 1 - dirstats[count]=round(sdata$total_mean, digits=2) - count = count + 1 - } - rstats=rbind(rstats, dirstats) - rstats_names=rbind(rstats_names, datasetname) -} - -unts=c("s", "s", "s", "s", "s") -rstats=rbind(rstats, unts) -rstats_names=rbind(rstats_names, "Units") - - -# If we have only 2 sets of results, then we can do some more -# stats math for the text table -if (length(resultdirs) == 2) { - # This is a touch hard wired - but we *know* we only have two - # datasets... - diff=c() - for( i in 1:5) { - difference = as.double(rstats[2,i]) - as.double(rstats[1,i]) - val = 100 * (difference/as.double(rstats[1,i])) - diff[i] = paste(round(val, digits=2), "%", sep=" ") - } - - rstats=rbind(rstats, diff) - rstats_names=rbind(rstats_names, "Diff") -} - -rstats=cbind(rstats_names, rstats) - -# Set up the text table headers -colnames(rstats)=c("Results", "2k", "ik", "2w", "2q", "tot") - - -# Build us a text table of numerical results -stats_plot = suppressWarnings(ggtexttable(data.frame(rstats, check.names=FALSE), - theme=ttheme(base_size=8), - rows=NULL - )) - -# plot how samples varioed over 'time' -line_plot <- ggplot() + - geom_line( data=data, aes(count, workload, color=runtime)) + - geom_smooth( data=data, aes(count, workload, color=runtime), se=FALSE, method="loess") + - xlab("Iteration") + - ylab("Time (s)") + - ggtitle("Boot to workload", subtitle="First container") + - ylim(0, NA) + - theme(axis.text.x=element_text(angle=90)) - -boot_boxplot <- ggplot() + - geom_boxplot( data=data, aes(runtime, workload, color=runtime), show.legend=FALSE) + - ylim(0, NA) + - ylab("Time (s)") - -# convert the stats to a long format so we can more easily do a side-by-side barplot -longstats <- gather(stats, measure, value, workload_mean, quit_mean, inkernel_mean, tokernel_mean, total_mean) - -bar_plot <- ggplot() + - geom_bar( data=longstats, aes(measure, value, fill=runtime), stat="identity", position="dodge", show.legend=FALSE) + - xlab("Phase") + - ylab("Time (s)") + - ggtitle("Lifecycle phase times", subtitle="Mean") + - ylim(0, NA) + - theme(axis.text.x=element_text(angle=90)) - -master_plot = grid.arrange( - bar_plot, - line_plot, - stats_plot, - boot_boxplot, - nrow=2, - ncol=2 ) diff --git a/tests/metrics/report/report_dockerfile/mem-in-cont.R b/tests/metrics/report/report_dockerfile/mem-in-cont.R deleted file mode 100644 index 8dede6b997..0000000000 --- a/tests/metrics/report/report_dockerfile/mem-in-cont.R +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env Rscript -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Analyse the runtime component memory footprint data. - -library(ggplot2) # ability to plot nicely. - # So we can plot multiple graphs -library(gridExtra) # together. -suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. -suppressMessages(library(jsonlite)) # to load the data. - -testnames=c( - "memory-footprint-inside-container" -) - -data=c() -rstats=c() -rstats_rows=c() -rstats_cols=c() - -# For each set of results -for (currentdir in resultdirs) { - dirstats=c() - # For the two different types of memory footprint measures - for (testname in testnames) { - # R seems not to like double path slashes '//' ? - fname=paste(inputdir, currentdir, testname, '.json', sep="") - if ( !file.exists(fname)) { - warning(paste("Skipping non-existent file: ", fname)) - next - } - - # Derive the name from the test result dirname - datasetname=basename(currentdir) - - # Import the data - fdata=fromJSON(fname) - fdata=fdata[[testname]] - # Copy the average result into a shorter, more accesible name - fdata$requested=fdata$Results$memrequest$Result - fdata$total=fdata$Results$memtotal$Result - fdata$free=fdata$Results$memfree$Result - fdata$avail=fdata$Results$memavailable$Result - - # And lets work out what % we have 'lost' between the amount requested - # and the total the container actually sees. - fdata$lost=fdata$requested - fdata$total - fdata$pctotal= 100 * (fdata$lost/ fdata$requested) - - fdata$Runtime=rep(datasetname, length(fdata$Result) ) - - # Store away the bits we need - data=rbind(data, data.frame( - Result=fdata$requested, - Type="requested", - Runtime=fdata$Runtime )) - - data=rbind(data, data.frame( - Result=fdata$total, - Type="total", - Runtime=fdata$Runtime )) - - data=rbind(data, data.frame( - Result=fdata$free, - Type="free", - Runtime=fdata$Runtime )) - - data=rbind(data, data.frame( - Result=fdata$avail, - Type="avail", - Runtime=fdata$Runtime )) - - data=rbind(data, data.frame( - Result=fdata$lost, - Type="lost", - Runtime=fdata$Runtime )) - - data=rbind(data, data.frame( - Result=fdata$pctotal, - Type="% consumed", - Runtime=fdata$Runtime )) - - # Store away some stats for the text table - dirstats=rbind(dirstats, round(fdata$requested, digits=2) ) - dirstats=rbind(dirstats, round(fdata$total, digits=2) ) - dirstats=rbind(dirstats, round(fdata$free, digits=2) ) - dirstats=rbind(dirstats, round(fdata$avail, digits=2) ) - dirstats=rbind(dirstats, round(fdata$lost, digits=2) ) - dirstats=rbind(dirstats, round(fdata$pctotal, digits=2) ) - } - rstats=cbind(rstats, dirstats) - rstats_cols=append(rstats_cols, datasetname) -} - -rstats_rows=c("Requested", "Total", "Free", "Avail", "Consumed", "% Consumed") - -unts=c("Kb", "Kb", "Kb", "Kb", "Kb", "%") -rstats=cbind(rstats, unts) -rstats_cols=append(rstats_cols, "Units") - -# If we have only 2 sets of results, then we can do some more -# stats math for the text table -if (length(resultdirs) == 2) { - # This is a touch hard wired - but we *know* we only have two - # datasets... - diff=c() - # Just the first three entries - meaningless for the pctotal entry - for (n in 1:5) { - difference = (as.double(rstats[n,2]) - as.double(rstats[n,1])) - val = 100 * (difference/as.double(rstats[n,1])) - diff=rbind(diff, round(val, digits=2)) - } - - # Add a blank entry for the other entries - diff=rbind(diff, "") - rstats=cbind(rstats, diff) - rstats_cols=append(rstats_cols, "Diff %") -} - -# Build us a text table of numerical results -stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), - theme=ttheme(base_size=10), - rows=rstats_rows, cols=rstats_cols - )) - -bardata <- subset(data, Type %in% c("requested", "total", "free", "avail")) -# plot how samples varioed over 'time' -barplot <- ggplot() + - geom_bar(data=bardata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") + - xlab("Measure") + - ylab("Size (Kb)") + - ggtitle("In-container memory statistics") + - ylim(0, NA) + - theme(axis.text.x=element_text(angle=90)) - -master_plot = grid.arrange( - barplot, - stats_plot, - nrow=2, - ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/memory-footprint.R b/tests/metrics/report/report_dockerfile/memory-footprint.R deleted file mode 100644 index 4d5f53129d..0000000000 --- a/tests/metrics/report/report_dockerfile/memory-footprint.R +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env Rscript -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Analyse the runtime component memory footprint data. - -library(ggplot2) # ability to plot nicely. - # So we can plot multiple graphs -library(gridExtra) # together. -suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. -suppressMessages(library(jsonlite)) # to load the data. - -testnames=c( - "memory-footprint", - "memory-footprint-ksm" -) - -resultsfilesshort=c( - "noKSM", - "KSM" -) - -data=c() -rstats=c() -rstats_names=c() - -# For each set of results -for (currentdir in resultdirs) { - count=1 - dirstats=c() - # For the two different types of memory footprint measures - for (testname in testnames) { - # R seems not to like double path slashes '//' ? - fname=paste(inputdir, currentdir, testname, '.json', sep="") - if ( !file.exists(fname)) { - warning(paste("Skipping non-existent file: ", fname)) - next - } - - # Derive the name from the test result dirname - datasetname=basename(currentdir) - datasetvariant=resultsfilesshort[count] - - # Import the data - fdata=fromJSON(fname) - fdata=fdata[[testname]] - # Copy the average result into a shorter, more accesible name - fdata$Result=fdata$Results$average$Result - fdata$variant=rep(datasetvariant, length(fdata$Result) ) - fdata$Runtime=rep(datasetname, length(fdata$Result) ) - fdata$Count=seq_len(length(fdata$Result)) - - # Calculate some stats - fdata.mean = mean(fdata$Result) - fdata.min = min(fdata$Result) - fdata.max = max(fdata$Result) - fdata.sd = sd(fdata$Result) - fdata.cov = (fdata.sd / fdata.mean) * 100 - - # Store away the bits we need - data=rbind(data, data.frame( - Result=fdata$Result, - Count=fdata$Count, - Runtime=fdata$Runtime, - variant=fdata$variant ) ) - - # Store away some stats for the text table - dirstats[count]=round(fdata.mean, digits=2) - - count = count + 1 - } - rstats=rbind(rstats, dirstats) - rstats_names=rbind(rstats_names, datasetname) -} - -rstats=cbind(rstats_names, rstats) -unts=rep("Kb", length(resultdirs)) - -# If we have only 2 sets of results, then we can do some more -# stats math for the text table -if (length(resultdirs) == 2) { - # This is a touch hard wired - but we *know* we only have two - # datasets... - diff=c("diff") - difference = (as.double(rstats[2,2]) - as.double(rstats[1,2])) - val = 100 * (difference/as.double(rstats[1,2])) - diff[2] = round(val, digits=2) - difference = (as.double(rstats[2,3]) - as.double(rstats[1,3])) - val = 100 * (difference/as.double(rstats[1,3])) - diff[3] = round(val, digits=2) - rstats=rbind(rstats, diff) - - unts[3]="%" -} - -rstats=cbind(rstats, unts) - -# Set up the text table headers -colnames(rstats)=c("Results", resultsfilesshort, "Units") - -# Build us a text table of numerical results -stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), - theme=ttheme(base_size=10), - rows=NULL - )) - -# plot how samples varioed over 'time' -point_plot <- ggplot() + - geom_point( data=data, aes(Runtime, Result, color=variant), position=position_dodge(0.1)) + - xlab("Dataset") + - ylab("Size (Kb)") + - ggtitle("Average PSS footprint", subtitle="per container") + - ylim(0, NA) + - theme(axis.text.x=element_text(angle=90)) - -master_plot = grid.arrange( - point_plot, - stats_plot, - nrow=1, - ncol=2 ) diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd deleted file mode 100644 index 9dca2b49f6..0000000000 --- a/tests/metrics/report/report_dockerfile/metrics_report.Rmd +++ /dev/null @@ -1,63 +0,0 @@ ---- -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 ---- -\pagebreak - -# Introduction -This report compares the metrics between multiple sets of data generated from -the [Kata Containers report generation scripts](https://github.com/kata-containers/kata-containers/tree/main/metrics/report/README.md). - -This report was generated using the data from the **`r resultdirs`** results directories. - -\pagebreak - -# Container scaling system footprint -This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/footprint_data.sh) -measures the system memory footprint impact whilst running an increasing number -of containers. For this test, [KSM](https://en.wikipedia.org/wiki/Kernel_same-page_merging) - is enabled. The results show how system memory is consumed for different sized -containers, and their average system memory footprint cost and density (how many -containers you can fit per Gb) is calculated. - -```{r footprint-density, echo=FALSE, fig.cap="System Memory density"} -test_name_extra="-ksm" -source('footprint-density.R') -rm(test_name_extra) -``` -\pagebreak - -# Memory used inside container -This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/memory_usage_inside_container.sh) -measures the memory inside a container taken by the container runtime. It shows the difference between the amount of memory requested for the container, and the amount the container can actually 'see'. - -The *% Consumed* is the key row in the table, which compares the *Requested* against *Total* values. - - -```{r mem-in-cont, echo=FALSE, fig.cap="System Memory density"} -source('mem-in-cont.R') -``` -\pagebreak - -# Container boot lifecycle times -This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/time/launch_times.sh) -uses the `date` command on the host and in the container, as well as data from the container -kernel `dmesg`, to ascertain how long different phases of the create/boot/run/delete -Docker container lifecycle take for the first launched container. - -To decode the stats table, the prefixes are 'to(`2`)' and '`i`n'. The suffixes are '`k`ernel', '`w`orkload' and '`q`uit'. 'tot' is the total time for a complete container start-to-finished cycle. - -```{r lifecycle-time, echo=FALSE, fig.cap="Execution lifecycle times"} -source('lifecycle-time.R') -``` -\pagebreak - -# Test setup details - -This table describes the test system details, as derived from the information contained -in the test results files. - - -```{r dut, echo=FALSE, fig.cap="System configuration details"} -source('dut-details.R') diff --git a/tests/metrics/report/report_dockerfile/network-cpu.R b/tests/metrics/report/report_dockerfile/network-cpu.R deleted file mode 100644 index b6df50b304..0000000000 --- a/tests/metrics/report/report_dockerfile/network-cpu.R +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env Rscript -# Copyright (c) 2018-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Analyse the runtime component memory footprint data. - -library(ggplot2) # ability to plot nicely. - # So we can plot multiple graphs -library(gridExtra) # together. -suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. -suppressMessages(library(jsonlite)) # to load the data. - -testnames=c( - "cpu-information" -) - -resultsfilesshort=c( - "CPU" -) - -data=c() -rstats=c() -rstats_rows=c() -rstats_cols=c() - -Gdenom = (1000.0 * 1000.0 * 1000.0) - -# For each set of results -for (currentdir in resultdirs) { - dirstats=c() - # For the two different types of memory footprint measures - for (testname in testnames) { - # R seems not to like double path slashes '//' ? - fname=paste(inputdir, currentdir, testname, '.json', sep="") - if ( !file.exists(fname)) { - warning(paste("Skipping non-existent file: ", fname)) - next - } - - # Derive the name from the test result dirname - datasetname=basename(currentdir) - datasetvariant=resultsfilesshort[count] - - # Import the data - fdata=fromJSON(fname) - fdata=fdata[[testname]] - # Copy the average result into a shorter, more accesible name - fdata$ips=fdata$Results$"instructions per cycle"$Result - fdata$Gcycles=fdata$Results$cycles$Result / Gdenom - fdata$Ginstructions=fdata$Results$instructions$Result / Gdenom - fdata$variant=rep(datasetvariant, length(fdata$Result) ) - fdata$Runtime=rep(datasetname, length(fdata$Result) ) - - # Store away the bits we need - data=rbind(data, data.frame( - Result=fdata$ips, - Type="ips", - Runtime=fdata$Runtime, - variant=fdata$variant ) ) - data=rbind(data, data.frame( - Result=fdata$Gcycles, - Type="Gcycles", - Runtime=fdata$Runtime, - variant=fdata$variant ) ) - data=rbind(data, data.frame( - Result=fdata$Ginstructions, - Type="Ginstr", - Runtime=fdata$Runtime, - variant=fdata$variant ) ) - - # Store away some stats for the text table - dirstats=rbind(dirstats, round(fdata$ips, digits=2) ) - dirstats=rbind(dirstats, round(fdata$Gcycles, digits=2) ) - dirstats=rbind(dirstats, round(fdata$Ginstructions, digits=2) ) - } - rstats=cbind(rstats, dirstats) - rstats_cols=append(rstats_cols, datasetname) -} - -rstats_rows=c("IPS", "GCycles", "GInstr") - -unts=c("Ins/Cyc", "G", "G") -rstats=cbind(rstats, unts) -rstats_cols=append(rstats_cols, "Units") - -# If we have only 2 sets of results, then we can do some more -# stats math for the text table -if (length(resultdirs) == 2) { - # This is a touch hard wired - but we *know* we only have two - # datasets... - diff=c() - for (n in 1:3) { - difference = (as.double(rstats[n,2]) - as.double(rstats[n,1])) - val = 100 * (difference/as.double(rstats[n,1])) - diff=rbind(diff, round(val, digits=2)) - } - rstats=cbind(rstats, diff) - rstats_cols=append(rstats_cols, "Diff %") -} - -# Build us a text table of numerical results -stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), - theme=ttheme(base_size=10), - rows=rstats_rows, cols=rstats_cols - )) - -# plot how samples varioed over 'time' -ipsdata <- subset(data, Type %in% c("ips")) -ips_plot <- ggplot() + - geom_bar(data=ipsdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") + - xlab("Measure") + - ylab("IPS") + - ggtitle("Instructions Per Cycle") + - ylim(0, NA) + - theme(axis.text.x=element_text(angle=90)) - -cycdata <- subset(data, Type %in% c("Gcycles", "Ginstr")) -cycles_plot <- ggplot() + - geom_bar(data=cycdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge", show.legend=FALSE) + - xlab("Measure") + - ylab("Count (G)") + - ggtitle("Cycles and Instructions") + - ylim(0, NA) + - theme(axis.text.x=element_text(angle=90)) - -master_plot = grid.arrange( - ips_plot, - cycles_plot, - stats_plot, - nrow=2, - ncol=2 )