From b76423b31dd7f6639d699400d77480b8e930b5d3 Mon Sep 17 00:00:00 2001 From: Mark Stemm Date: Fri, 10 Jun 2016 15:35:15 -0700 Subject: [PATCH] Useful scripts to collect/display perf results. Add shell scripts to make it easier to collect performance results from traces, live tests, and phoronix tests. With run_performance_tests.sh you specify the following: - a subject program to run, using --root - a name to give to this set of results, using --variant - a test to run, using --test - a file to write the results to, using --results. For tests that start with "trace", the script runs falco/sysdig on the trace file and measures the time taken to read the file. For other tests, he script handles starting falco/sysdig, starting a cpu measurement script (a wrapper around top, just to provide identical values to what you would see using top) to measure the cpu usage of falco/sysdig, and running a live test. The measurement interval for cpu usage depends on the test being run--10 seconds for most tests, 2 seconds for shorter tests. The output is written as json to the file specified in --results. Also add R scripts to easily display the results from the shell script. plot-live.r shows a linechart of the cpu usage for the provided variants over time. plot-traces.r shows grouped barcharts showing user/system/total time taken for the provided variants and traces. One bug--you have to make the results file actual json by adding leading/trailing []s. --- test/cpu_monitor.sh | 9 + test/plot-live.r | 40 +++++ test/plot-traces.r | 35 ++++ test/run_performance_tests.sh | 316 ++++++++++++++++++++++++++++++++++ 4 files changed, 400 insertions(+) create mode 100644 test/cpu_monitor.sh create mode 100644 test/plot-live.r create mode 100644 test/plot-traces.r create mode 100644 test/run_performance_tests.sh diff --git a/test/cpu_monitor.sh b/test/cpu_monitor.sh new file mode 100644 index 00000000..594536d3 --- /dev/null +++ b/test/cpu_monitor.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +SUBJ_PID=$1 +BENCHMARK=$2 +VARIANT=$3 +RESULTS_FILE=$4 +CPU_INTERVAL=$5 + +top -d $CPU_INTERVAL -b -p $SUBJ_PID | grep -E '(falco|sysdig)' --line-buffered | awk -v benchmark=$BENCHMARK -v variant=$VARIANT '{printf("{\"sample\": %d, \"benchmark\": \"%s\", \"variant\": \"%s\", \"cpu_usage\": %s},\n", NR, benchmark, variant, $9); fflush();}' >> $RESULTS_FILE diff --git a/test/plot-live.r b/test/plot-live.r new file mode 100644 index 00000000..1305da65 --- /dev/null +++ b/test/plot-live.r @@ -0,0 +1,40 @@ +require(jsonlite) +library(ggplot2) +library(GetoptLong) + +initial.options <- commandArgs(trailingOnly = FALSE) +file.arg.name <- "--file=" +script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) +script.basename <- dirname(script.name) + +if (substr(script.basename, 1, 1) != '/') { + script.basename = paste(getwd(), script.basename, sep='/') +} + +results = paste(script.basename, "results.json", sep='/') +output = "./output.png" + +GetoptLong( + "results=s", "Path to results file", + "benchmark=s", "Benchmark from results file to graph", + "variant=s@", "Variant(s) to include in graph. Can be specified multiple times", + "output=s", "Output graph file" +) + +res <- fromJSON(results, flatten=TRUE) + +res2 = res[res$benchmark == benchmark & res$variant %in% variant,] + +plot <- ggplot(data=res2, aes(x=sample, y=cpu_usage, group=variant, colour=variant)) + + geom_line() + + ylab("CPU Usage (%)") + + xlab("Time") + + ggtitle(sprintf("Falco/Sysdig CPU Usage: %s", benchmark)) + theme(legend.position=c(.2, .88)); + +print(paste("Writing graph to", output, sep=" ")) +ggsave(file=output) + + + + diff --git a/test/plot-traces.r b/test/plot-traces.r new file mode 100644 index 00000000..a38bfaf9 --- /dev/null +++ b/test/plot-traces.r @@ -0,0 +1,35 @@ +require(jsonlite) +library(ggplot2) +library(reshape) + +res <- fromJSON("/home/mstemm/results.txt", flatten=TRUE) + +plot <- ggplot(data=res, aes(x=config, y=elapsed.real)) + + geom_bar(stat = "summary", fun.y = "mean") + + coord_flip() + + facet_grid(shortfile ~ .) + + ylab("Wall Clock Time (sec)") + + xlab("Trace File/Program") + + +ggsave(file="/mnt/sf_mstemm/res-real.png") + +plot <- ggplot(data=res, aes(x=config, y=elapsed.user)) + + geom_bar(stat = "summary", fun.y = "mean") + + coord_flip() + + facet_grid(shortfile ~ .) + + ylab("User Time (sec)") + + xlab("Trace File/Program") + + +ggsave(file="/mnt/sf_mstemm/res-user.png") + +res2 <- melt(res, id.vars = c("config", "shortfile"), measure.vars = c("elapsed.sys", "elapsed.user")) +plot <- ggplot(data=res2, aes(x=config, y=value, fill=variable, order=variable)) + + geom_bar(stat = "summary", fun.y = "mean") + + coord_flip() + + facet_grid(shortfile ~ .) + + ylab("User/System Time (sec)") + + xlab("Trace File/Program") + +ggsave(file="/mnt/sf_mstemm/res-sys-user.png") diff --git a/test/run_performance_tests.sh b/test/run_performance_tests.sh new file mode 100644 index 00000000..4bdf2bc3 --- /dev/null +++ b/test/run_performance_tests.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +#set -x + +trap "cleanup; exit" SIGHUP SIGINT SIGTERM + +function download_trace_files() { + + (mkdir -p $TRACEDIR && rm -rf $TRACEDIR/traces-perf && curl -fo $TRACEDIR/traces-perf.zip https://s3.amazonaws.com/download.draios.com/falco-tests/traces-perf.zip && unzip -d $TRACEDIR $TRACEDIR/traces-perf.zip && rm -f $TRACEDIR/traces-perf.zip) || exit 1 + +} + +function time_cmd() { + cmd="$1" + file="$2" + + benchmark=`basename $file .scap` + + echo -n "$benchmark: " + for i in `seq 1 5`; do + echo -n "$i " + time=`date --iso-8601=sec` + /usr/bin/time -a -o $RESULTS_FILE --format "{\"time\": \"$time\", \"benchmark\": \"$benchmark\", \"file\": \"$file\", \"variant\": \"$VARIANT\", \"elapsed\": {\"real\": %e, \"user\": %U, \"sys\": %S}}," $cmd >> $OUTPUT_FILE 2>&1 + done + echo "" +} + +function run_falco_on() { + file="$1" + + cmd="$ROOT/userspace/falco/falco -c $ROOT/../falco.yaml -r $ROOT/../rules/falco_rules.yaml --option=stdout_output.enabled=false -e $file" + + time_cmd "$cmd" "$file" +} + +function run_sysdig_on() { + file="$1" + + cmd="$ROOT/userspace/sysdig/sysdig -N -z -r $file evt.type=none" + + time_cmd "$cmd" "$file" +} + +function run_trace() { + + if [ ! -e $TRACEDIR ]; then + download_trace_files + fi + + trace_file="$1" + + if [ $trace_file == "all" ]; then + files=($TRACEDIR/traces-perf/*.scap) + else + files=($TRACEDIR/traces-perf/$trace_file.scap) + fi + + for file in ${files[@]}; do + if [[ $ROOT == *"falco"* ]]; then + run_falco_on "$file" + else + run_sysdig_on "$file" + fi + done +} + +function start_monitor_cpu_usage() { + echo " monitoring cpu usage for sysdig/falco program" + + setsid bash `dirname $0`/cpu_monitor.sh $SUBJ_PID $live_test $VARIANT $RESULTS_FILE $CPU_INTERVAL & + CPU_PID=$! + sleep 5 +} + +function start_subject_prog() { + + echo " starting falco/sysdig program" + # Do a blocking sudo command now just to ensure we have a password + sudo bash -c "" + + if [[ $ROOT == *"falco"* ]]; then + sudo $ROOT/userspace/falco/falco -c $ROOT/../falco.yaml -r $ROOT/../rules/falco_rules.yaml --option=stdout_output.enabled=false > ./prog-output.txt 2>&1 & + else + sudo $ROOT/userspace/sysdig/sysdig -N -z evt.type=none & + fi + + SUDO_PID=$! + sleep 5 + SUBJ_PID=`ps -h -o pid --ppid $SUDO_PID` + + if [ -z $SUBJ_PID ]; then + echo "Could not find pid of subject program--did it start successfully? Not continuing." + exit 1 + fi +} + +function run_htop() { + screen -S htop-screen -d -m /usr/bin/htop -d2 + sleep 90 + screen -X -S htop-screen quit +} + +function run_juttle_examples() { + pushd $SCRIPTDIR/../../juttle-engine/examples + docker-compose -f dc-juttle-engine.yml -f aws-cloudwatch/dc-aws-cloudwatch.yml -f elastic-newstracker/dc-elastic.yml -f github-tutorial/dc-elastic.yml -f nginx_logs/dc-nginx-logs.yml -f postgres-diskstats/dc-postgres.yml -f cadvisor-influx/dc-cadvisor-influx.yml up -d + sleep 120 + docker-compose -f dc-juttle-engine.yml -f aws-cloudwatch/dc-aws-cloudwatch.yml -f elastic-newstracker/dc-elastic.yml -f github-tutorial/dc-elastic.yml -f nginx_logs/dc-nginx-logs.yml -f postgres-diskstats/dc-postgres.yml -f cadvisor-influx/dc-cadvisor-influx.yml stop + docker-compose -f dc-juttle-engine.yml -f aws-cloudwatch/dc-aws-cloudwatch.yml -f elastic-newstracker/dc-elastic.yml -f github-tutorial/dc-elastic.yml -f nginx_logs/dc-nginx-logs.yml -f postgres-diskstats/dc-postgres.yml -f cadvisor-influx/dc-cadvisor-influx.yml rm -fv + popd +} + +function run_kubernetes_demo() { + pushd $SCRIPTDIR/../../infrastructure/test-infrastructures/kubernetes-demo + bash run-local.sh + bash init.sh + sleep 600 + docker stop $(docker ps -qa) + docker rm -fv $(docker ps -qa) + popd +} + +function run_live_test() { + + live_test="$1" + + echo "Running live test $live_test" + + case "$live_test" in + htop ) CPU_INTERVAL=2;; + * ) CPU_INTERVAL=10;; + esac + + start_subject_prog + start_monitor_cpu_usage + + echo " starting live program and waiting for it to finish" + case "$live_test" in + htop ) run_htop ;; + juttle-examples ) run_juttle_examples ;; + kube-demo ) run_kubernetes_demo ;; + * ) usage; cleanup; exit 1 ;; + esac + + cleanup + +} + +function cleanup() { + + if [ -n "$SUBJ_PID" ] ; then + echo " stopping falco/sysdig program $SUBJ_PID" + sudo kill $SUBJ_PID + fi + + if [ -n "$CPU_PID" ] ; then + echo " stopping cpu monitor program $CPU_PID" + kill -- -$CPU_PID + fi +} + +run_live_tests() { + test="$1" + + if [ $test == "all" ]; then + tests="htop juttle-examples kube-demo" + else + tests=$test + fi + + for test in $tests; do + run_live_test $test + done +} + +function run_phoronix_test() { + + live_test="$1" + + case "$live_test" in + pts/aio-stress | pts/fs-mark | pts/iozone | pts/network-loopback | pts/nginx | pts/pybench | pts/redis | pts/sqlite | pts/unpack-linux ) CPU_INTERVAL=2;; + * ) CPU_INTERVAL=10;; + esac + + echo "Running phoronix test $live_test" + + start_subject_prog + start_monitor_cpu_usage + + echo " starting phoronix test and waiting for it to finish" + + TEST_RESULTS_NAME=$VARIANT FORCE_TIMES_TO_RUN=1 phoronix-test-suite default-run $live_test + + cleanup + +} + +# To install and configure phoronix: +# (redhat instructions, adapt as necessary for ubuntu or other distros) +# - install phoronix: yum install phoronix-test-suite.noarch +# - install dependencies not handled by phoronix: yum install libaio-devel pcre-devel popt-devel glibc-static zlib-devel nc bc +# - fix trivial bugs in tests: +# - edit ~/.phoronix-test-suite/installed-tests/pts/network-loopback-1.0.1/network-loopback line "nc -d -l 9999 > /dev/null &" to "nc -d -l 9999 > /dev/null &" +# - edit ~/.phoronix-test-suite/test-profiles/pts/nginx-1.1.0/test-definition.xml line "-n 500000 -c 100 http://localhost:8088/test.html" to "-n 500000 -c 100 http://127.0.0.1:8088/test.html" +# - phoronix batch-install + +function run_phoronix_tests() { + + test="$1" + + if [ $test == "all" ]; then + tests="pts/aio-stress pts/apache pts/blogbench pts/compilebench pts/dbench pts/fio pts/fs-mark pts/iozone pts/network-loopback pts/nginx pts/pgbench pts/phpbench pts/postmark pts/pybench pts/redis pts/sqlite pts/unpack-linux" + else + tests=$test + fi + + for test in $tests; do + run_phoronix_test $test + done +} + +run_tests() { + + IFS=':' read -ra PARTS <<< "$TEST" + + case "${PARTS[0]}" in + trace ) run_trace "${PARTS[1]}" ;; + live ) run_live_tests "${PARTS[1]}" ;; + phoronix ) run_phoronix_tests "${PARTS[1]}" ;; + * ) usage; exit 1 ;; + esac +} + +usage() { + echo "Usage: $0 [options]" + echo "" + echo "Options:" + echo " -h/--help: show this help" + echo " -v/--variant: a variant name to attach to this set of test results" + echo " -r/--root: root directory containing falco/sysdig binaries (i.e. where you ran 'cmake')" + echo " -R/--results: append test results to this file" + echo " -o/--output: append program output to this file" + echo " -t/--test: test to run. Argument has the following format:" + echo " trace:: read the specified trace file." + echo " trace:all means run all traces" + echo " live:: run the specified live test." + echo " live:all means run all live tests." + echo " possible live tests:" + echo " live:htop: run htop -d2" + echo " live:kube-demo: run kubernetes demo from infrastructure repo" + echo " live:juttle-examples: run a juttle demo environment based on docker-compose" + echo " phoronix:: run the specified phoronix test." + echo " if is not 'all', it is passed directly to the command line of \"phoronix-test-suite run \"" + echo " if is 'all', a built-in set of phoronix tests will be chosen and run" + echo " -T/--tracedir: Look for trace files in this directory. If doesn't exist, will download trace files from s3" +} + +OPTS=`getopt -o hv:r:R:o:t:T: --long help,variant:,root:,results:,output:,test:,tracedir: -n $0 -- "$@"` + +if [ $? != 0 ]; then + echo "Exiting" >&2 + exit 1 +fi + +eval set -- "$OPTS" + +VARIANT="falco" +ROOT=`dirname $0`/../build +SCRIPTDIR=`dirname $0` +RESULTS_FILE=`dirname $0`/results.json +OUTPUT_FILE=`dirname $0`/program-output.txt +TEST=trace:all +TRACEDIR=/tmp/falco-perf-traces.$USER +CPU_INTERVAL=10 + +while true; do + case "$1" in + -h | --help ) usage; exit 1;; + -v | --variant ) VARIANT="$2"; shift 2;; + -r | --root ) ROOT="$2"; shift 2;; + -R | --results ) RESULTS_FILE="$2"; shift 2;; + -o | --output ) OUTPUT_FILE="$2"; shift 2;; + -t | --test ) TEST="$2"; shift 2;; + -T | --tracedir ) TRACEDIR="$2"; shift 2;; + * ) break;; + esac +done + +if [ -z $VARIANT ]; then + echo "A test variant name must be provided. Not continuing." + exit 1 +fi + +if [ -z $ROOT ]; then + echo "A root directory containing a falco/sysdig binary must be provided. Not continuing." + exit 1 +fi + +ROOT=`realpath $ROOT` + + +if [ -z $RESULTS_FILE ]; then + echo "An output file for test results must be provided. Not continuing." + exit 1 +fi + +if [ -z $OUTPUT_FILE ]; then + echo "An file for program output must be provided. Not continuing." + exit 1 +fi + +if [ -z $TEST ]; then + echo "A test must be provided. Not continuing." + exit 1 +fi + +run_tests