kubeshark/performance_analysis/plot_from_tapper_logs.py
Nimrod Gilboa Markevich ab38f4c011
Add profiling tools (#1087)
* Add gin-contrib/pprof dependency

* Run pprof server on agent with --profiler flag

* Add --profiler flag to cli

* Fix error message

* Print cpu usage percentage

* measure cpu of current pid instead of globaly on the system

* Add scripts to plot performance

* Plot packetsCount in analysis

* Concat to DataFrame

* Plot in turbo colorscheme

* Make COLORMAP const

* Fix rss units

* Reduce code repetition by adding function for plotting

* Allow grouping based on filenames

* Temporary: Marked with comments where to disable code for experiments

* Add newline at end of file

* Add tap.cpuprofile flag. Change memprofile flag to tap.memprofile

* create tapper modes for debugging using env vars

* Fix rss plot units (MB instead of bytes)

* Remove comment

* Add info to plot script

* Remove tap.cpumemprofile. Rename tap.memprofile to memprofile

* Remove unused import

* Remove whitespaces

Co-authored-by: M. Mert Yıldıran <mehmet@up9.com>

* Remove whitespaces

Co-authored-by: M. Mert Yıldıran <mehmet@up9.com>

* Remove whitespaces

Co-authored-by: M. Mert Yıldıran <mehmet@up9.com>

* Remove whitespaces

Co-authored-by: M. Mert Yıldıran <mehmet@up9.com>

* Remove whitespaces

Co-authored-by: M. Mert Yıldıran <mehmet@up9.com>

* Remove whitespaces

Co-authored-by: M. Mert Yıldıran <mehmet@up9.com>

* Rename debug env vars

* Create package for debug env vars, read each env var once

* Run go mod tidy

* Increment MatchedPairs before emitting

* Only count cores once

* Count virtual and physical cores

* Add dbgctl replace in cli

* Fix lint: Check return values

* Add tap/dbgctl to test-lint make rule

* Replace tap/dbgctl in all modules

* #run_acceptance_tests

* Copy dbgctl module to docker image

* Debug/profile tapper benchmark (#1093)

* add mizu debug env to avoid all extensions

* add readme + run_tapper_benchmark.sh

* temporary change branch name

* fix readme

* fix MIZU_BENCHMARK_CLIENTS_COUNT env

* change tap target to tcp stream

* track live tcp streams

* pr fixes

* rename tapperPacketsCount to ignored_packets_count

* change mizu tapper to mizu debugg

Co-authored-by: David Levanon <dvdlevanon@gmail.com>
Co-authored-by: M. Mert Yıldıran <mehmet@up9.com>
2022-05-18 15:42:13 +03:00

183 lines
6.9 KiB
Python

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pathlib
import re
import sys
import typing
COLORMAP = plt.get_cmap('turbo')
# Extract cpu and rss samples from log files and plot them
# Input: List of log files
#
# example:
# python plot_from_tapper_logs.py 01_no_pcap_01.log 99_normal_00.log
#
# The script assumes that the log file names start with a number (pattern '\d+')
# and groups based on this number. Files that start will the same number will be plotted with the same color.
# Change group_pattern to an empty string to disable this, or change to a regex of your liking.
def get_sample(name: str, line: str, default_value: float):
pattern = name + r': ?(\d+(\.\d+)?)'
maybe_sample = re.findall(pattern, line)
if len(maybe_sample) == 0:
return default_value
sample = float(maybe_sample[0][0])
return sample
def append_sample(name: str, line: str, samples: typing.List[float]):
sample = get_sample(name, line, -1)
if sample == -1:
return
samples.append(sample)
def extract_samples(f: typing.IO) -> typing.Tuple[pd.Series, pd.Series, pd.Series, pd.Series, pd.Series, pd.Series, pd.Series, pd.Series]:
cpu_samples = []
rss_samples = []
count_samples = []
matched_samples = []
live_samples = []
processed_samples = []
heap_samples = []
goroutines_samples = []
for line in f:
append_sample('cpu', line, cpu_samples)
append_sample('rss', line, rss_samples)
ignored_packets_count = get_sample('"ignoredPacketsCount"', line, -1)
packets_count = get_sample('"packetsCount"', line, -1)
if ignored_packets_count != -1 and packets_count != -1:
count_samples.append(packets_count - ignored_packets_count)
append_sample('"matchedPairs"', line, matched_samples)
append_sample('"liveTcpStreams"', line, live_samples)
append_sample('"processedBytes"', line, processed_samples)
append_sample('mem', line, heap_samples)
append_sample('goroutines', line, goroutines_samples)
cpu_samples = pd.Series(cpu_samples)
rss_samples = pd.Series(rss_samples)
count_samples = pd.Series(count_samples)
matched_samples = pd.Series(matched_samples)
live_samples = pd.Series(live_samples)
processed_samples = pd.Series(processed_samples)
heap_samples = pd.Series(heap_samples)
goroutines_samples = pd.Series(goroutines_samples)
return cpu_samples, rss_samples, count_samples, matched_samples, live_samples, processed_samples, heap_samples, goroutines_samples
def plot(ax, df: pd.DataFrame, title: str, xlabel: str, ylabel: str, group_pattern: typing.Optional[str]):
if group_pattern:
color = get_group_color(df.columns, group_pattern)
df.plot(color=color, ax=ax)
else:
df.plot(cmap=COLORMAP, ax=ax)
ax.ticklabel_format(style='plain')
plt.title(title)
plt.legend()
plt.xlabel(xlabel)
plt.ylabel(ylabel)
def get_group_color(names, pattern):
props = [int(re.findall(pattern, pathlib.Path(name).name)[0]) for name in names]
key = dict(zip(sorted(list(set(props))), range(len(set(props)))))
n_colors = len(key)
color_options = plt.get_cmap('jet')(np.linspace(0, 1, n_colors))
groups = [key[prop] for prop in props]
color = color_options[groups] # type: ignore
return color
if __name__ == '__main__':
filenames = sys.argv[1:]
cpu_samples_all_files = []
rss_samples_all_files = []
count_samples_all_files = []
matched_samples_all_files = []
live_samples_all_files = []
processed_samples_all_files = []
heap_samples_all_files = []
goroutines_samples_all_files = []
for ii, filename in enumerate(filenames):
print("Analyzing {}".format(filename))
with open(filename, 'r') as f:
cpu_samples, rss_samples, count_samples, matched_samples, live_samples, processed_samples, heap_samples, goroutines_samples = extract_samples(f)
cpu_samples.name = pathlib.Path(filename).name
rss_samples.name = pathlib.Path(filename).name
count_samples.name = pathlib.Path(filename).name
matched_samples.name = pathlib.Path(filename).name
live_samples.name = pathlib.Path(filename).name
processed_samples.name = pathlib.Path(filename).name
heap_samples.name = pathlib.Path(filename).name
goroutines_samples.name = pathlib.Path(filename).name
cpu_samples_all_files.append(cpu_samples)
rss_samples_all_files.append(rss_samples)
count_samples_all_files.append(count_samples)
matched_samples_all_files.append(matched_samples)
live_samples_all_files.append(live_samples)
processed_samples_all_files.append(processed_samples)
heap_samples_all_files.append(processed_samples)
goroutines_samples_all_files.append(processed_samples)
cpu_samples_df = pd.concat(cpu_samples_all_files, axis=1)
rss_samples_df = pd.concat(rss_samples_all_files, axis=1)
count_samples_df = pd.concat(count_samples_all_files, axis=1)
matched_samples_df = pd.concat(matched_samples_all_files, axis=1)
live_samples_df = pd.concat(live_samples_all_files, axis=1)
processed_samples_df = pd.concat(processed_samples_all_files, axis=1)
heap_samples_df = pd.concat(heap_samples_all_files, axis=1)
goroutines_samples_df = pd.concat(goroutines_samples_all_files, axis=1)
group_pattern = r'^\d+'
cpu_plot = plt.subplot(8, 2, 1)
plot(cpu_plot, cpu_samples_df, 'cpu', '', 'cpu (%)', group_pattern)
cpu_plot.legend().remove()
mem_plot = plt.subplot(8, 2, 2)
plot(mem_plot, (rss_samples_df / 1024 / 1024), 'rss', '', 'mem (mega)', group_pattern)
mem_plot.legend(loc='center left', bbox_to_anchor=(1, 0.5))
packets_plot = plt.subplot(8, 2, 3)
plot(packets_plot, count_samples_df, 'packetsCount', '', 'packetsCount', group_pattern)
packets_plot.legend().remove()
matched_plot = plt.subplot(8, 2, 4)
plot(matched_plot, matched_samples_df, 'matchedCount', '', 'matchedCount', group_pattern)
matched_plot.legend().remove()
live_plot = plt.subplot(8, 2, 5)
plot(live_plot, live_samples_df, 'liveStreamsCount', '', 'liveStreamsCount', group_pattern)
live_plot.legend().remove()
processed_plot = plt.subplot(8, 2, 6)
plot(processed_plot, (processed_samples_df / 1024 / 1024), 'processedBytes', '', 'bytes (mega)', group_pattern)
processed_plot.legend().remove()
heap_plot = plt.subplot(8, 2, 7)
plot(heap_plot, (heap_samples_df / 1024 / 1024), 'heap', '', 'heap (mega)', group_pattern)
heap_plot.legend().remove()
goroutines_plot = plt.subplot(8, 2, 8)
plot(goroutines_plot, (goroutines_samples_df / 1024 / 1024), 'goroutines', '', 'goroutines', group_pattern)
goroutines_plot.legend().remove()
fig = plt.gcf()
fig.set_size_inches(20, 18)
print('Saving graph to graph.png')
plt.savefig('graph.png', bbox_inches='tight')