mirror of
https://github.com/projectacrn/acrn-hypervisor.git
synced 2025-04-27 11:22:17 +00:00
hv: trace: show cpu usage of vms in pcpu sharing case
To maximize the cpu utilization, core 0 is usually shared by service vm and guest vm. But there are no statistics to show the cpu occupation of each vm. This patch is to provide cpu usage statistic for users. To calculate it, a new trace event is added and marked in scheduling context switch, accompanying with a new python script to analyze the data from acrntrace output. Tracked-On: #8621 Signed-off-by: nacui <na.cui@intel.com> Reviewed-by: Junjie Mao <junjie.mao@intel.com> Reviewed-by: Haiwei Li <haiwei.li@intel.com>
This commit is contained in:
parent
926f2346df
commit
512c98fd79
@ -13,6 +13,7 @@
|
||||
#include <schedule.h>
|
||||
#include <sprintf.h>
|
||||
#include <asm/irq.h>
|
||||
#include <trace.h>
|
||||
|
||||
bool is_idle_thread(const struct thread_object *obj)
|
||||
{
|
||||
@ -172,6 +173,7 @@ void schedule(void)
|
||||
struct thread_object *next = &per_cpu(idle, pcpu_id);
|
||||
struct thread_object *prev = ctl->curr_obj;
|
||||
uint64_t rflag;
|
||||
char name[16];
|
||||
|
||||
obtain_schedule_lock(pcpu_id, &rflag);
|
||||
if (ctl->scheduler->pick_next != NULL) {
|
||||
@ -182,6 +184,10 @@ void schedule(void)
|
||||
/* If we picked different sched object, switch context */
|
||||
if (prev != next) {
|
||||
if (prev != NULL) {
|
||||
memcpy_erms(name, prev->name, 4);
|
||||
memcpy_erms(name + 4, next->name, 4);
|
||||
memset(name + 8, 0, sizeof(name) - 8);
|
||||
TRACE_16STR(TRACE_SCHED_NEXT, name);
|
||||
if (prev->switch_out != NULL) {
|
||||
prev->switch_out(prev);
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ void TRACE_6C(uint32_t evid, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uin
|
||||
#define TRACE_ENTER TRACE_16STR(TRACE_FUNC_ENTER, __func__)
|
||||
#define TRACE_EXIT TRACE_16STR(TRACE_FUNC_EXIT, __func__)
|
||||
|
||||
static inline void TRACE_16STR(uint32_t evid, const char name[])
|
||||
void TRACE_16STR(uint32_t evid, const char name[])
|
||||
{
|
||||
struct trace_entry entry;
|
||||
uint16_t cpu_id = get_pcpu_id();
|
||||
|
@ -20,6 +20,10 @@
|
||||
|
||||
#define TRACE_VM_EXIT 0x10U
|
||||
#define TRACE_VM_ENTER 0X11U
|
||||
|
||||
/* event to calculate cpu usage with shared pcpu */
|
||||
#define TRACE_SCHED_NEXT 0x20U
|
||||
|
||||
#define TRACE_VMEXIT_ENTRY 0x10000U
|
||||
|
||||
#define TRACE_VMEXIT_EXCEPTION_OR_NMI (TRACE_VMEXIT_ENTRY + 0x00000000U)
|
||||
@ -44,5 +48,6 @@
|
||||
void TRACE_2L(uint32_t evid, uint64_t e, uint64_t f);
|
||||
void TRACE_4I(uint32_t evid, uint32_t a, uint32_t b, uint32_t c, uint32_t d);
|
||||
void TRACE_6C(uint32_t evid, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t b1, uint8_t b2);
|
||||
void TRACE_16STR(uint32_t evid, const char name[]);
|
||||
|
||||
#endif /* TRACE_H */
|
||||
|
@ -12,6 +12,7 @@ import getopt
|
||||
import os
|
||||
from vmexit_analyze import analyze_vm_exit
|
||||
from irq_analyze import analyze_irq
|
||||
from cpuusage_analyze import analyze_cpu_usage
|
||||
|
||||
def usage():
|
||||
"""print the usage of the script
|
||||
@ -29,6 +30,7 @@ def usage():
|
||||
-f, --frequency=[unsigned int]: TSC frequency in MHz
|
||||
--vm_exit: to generate vm_exit report
|
||||
--irq: to generate irq related report
|
||||
--cpu_usage: to generate cpu_usage report
|
||||
''')
|
||||
|
||||
def do_analysis(ifile, ofile, analyzer, freq):
|
||||
@ -62,7 +64,7 @@ def main(argv):
|
||||
# Default TSC frequency of MRB in MHz
|
||||
freq = 1881.6
|
||||
opts_short = "hi:o:f:"
|
||||
opts_long = ["ifile=", "ofile=", "frequency=", "vm_exit", "irq"]
|
||||
opts_long = ["ifile=", "ofile=", "frequency=", "vm_exit", "irq", "cpu_usage"]
|
||||
analyzer = []
|
||||
|
||||
try:
|
||||
@ -85,6 +87,8 @@ def main(argv):
|
||||
analyzer.append(analyze_vm_exit)
|
||||
elif opt == "--irq":
|
||||
analyzer.append(analyze_irq)
|
||||
elif opt == "--cpu_usage":
|
||||
analyzer.append(analyze_cpu_usage)
|
||||
else:
|
||||
assert False, "unhandled option"
|
||||
|
||||
|
196
misc/debug_tools/acrn_trace/scripts/cpuusage_analyze.py
Executable file
196
misc/debug_tools/acrn_trace/scripts/cpuusage_analyze.py
Executable file
@ -0,0 +1,196 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
"""
|
||||
This script defines the function to do cpu usage analysis
|
||||
"""
|
||||
import sys
|
||||
import string
|
||||
import struct
|
||||
import csv
|
||||
import os
|
||||
|
||||
cpu_id = 0
|
||||
stat_tsc = 0
|
||||
|
||||
# map event TRACE_SCHED_NEXT defined in file trace.h
|
||||
SCHED_NEXT = 0x20
|
||||
|
||||
# max number of vm is 16, another 1 is for hv
|
||||
VM_NUM = 16
|
||||
time_vm_running = [0] * (VM_NUM + 1)
|
||||
count_all_trace = 0
|
||||
count_sched_trace = 0
|
||||
|
||||
# Q: 64-bit tsc, Q: 64-bit event, 16c: char name[16]
|
||||
TRCREC = "QQ16c"
|
||||
|
||||
def process_trace_data(ifile):
|
||||
"""parse the trace data file
|
||||
Args:
|
||||
ifile: input trace data file
|
||||
Return:
|
||||
None
|
||||
"""
|
||||
|
||||
global stat_tsc, cpu_id, time_vm_running, count_all_trace, count_sched_trace
|
||||
|
||||
# The duration of cpu running is tsc_end - tsc_begin
|
||||
tsc_begin = 0
|
||||
tsc_end = 0
|
||||
time_ambiguous = 0
|
||||
|
||||
fd = open(ifile, 'rb')
|
||||
while True:
|
||||
try:
|
||||
count_all_trace += 1
|
||||
line = fd.read(struct.calcsize(TRCREC))
|
||||
if not line:
|
||||
break
|
||||
x = struct.unpack(TRCREC, line)
|
||||
if x[0] == 0:
|
||||
break
|
||||
tsc_end = x[0]
|
||||
if count_all_trace == 1:
|
||||
tsc_begin = tsc_end
|
||||
tsc_last_sched = tsc_begin
|
||||
event = x[1]
|
||||
cpu_id = event >> 56
|
||||
event = event & 0xffffffffffff
|
||||
if event == SCHED_NEXT:
|
||||
count_sched_trace += 1
|
||||
s=''
|
||||
for _ in list(x[2:]):
|
||||
d = _.decode('ascii', errors='ignore')
|
||||
s += d
|
||||
|
||||
if s[:2] == "vm":
|
||||
vm_prev = int(s[2])
|
||||
if s[3] != ":":
|
||||
vm_prev = vm_prev*10 + int(s[3])
|
||||
elif s[:4] == "idle":
|
||||
vm_prev = VM_NUM
|
||||
else:
|
||||
print("Error: trace data is not correct!")
|
||||
return
|
||||
|
||||
if s[4:6] == "vm":
|
||||
vm_next = int(s[6])
|
||||
if s[7] != ":":
|
||||
vm_next = vm_next*10 + int(s[7])
|
||||
elif s[4:8] == "idle":
|
||||
vm_next = VM_NUM
|
||||
else:
|
||||
print("Error: trace data is not correct!")
|
||||
return
|
||||
|
||||
if (count_sched_trace == 1) or (vm_prev == vm_prev_last):
|
||||
time_vm_running[vm_prev] += tsc_end - tsc_last_sched
|
||||
else:
|
||||
print("line %d: last_next =vm%d, current_prev=vm%d" % (count_all_trace, vm_prev, vm_prev_last))
|
||||
print("Warning: last schedule next is not the current task. Trace log is lost.")
|
||||
time_ambiguous += tsc_end - tsc_last_sched
|
||||
|
||||
tsc_last_sched = tsc_end
|
||||
vm_prev_last = vm_next
|
||||
|
||||
except (IOError, struct.error) as e:
|
||||
sys.exit()
|
||||
|
||||
print ("Start trace %d tsc cycle" % (tsc_begin))
|
||||
print ("End trace %d tsc cycle" % (tsc_end))
|
||||
stat_tsc = tsc_end - tsc_begin
|
||||
assert stat_tsc != 0, "total_run_time in statistic is 0,\
|
||||
tsc_end %d, tsc_begin %d"\
|
||||
% (tsc_end, tsc_begin)
|
||||
|
||||
if count_sched_trace == 0:
|
||||
print ("There is no context switch in HV scheduling during this period. "
|
||||
"This CPU may be exclusively owned by one vm.\n"
|
||||
"The CPU usage is 100%")
|
||||
return
|
||||
if time_ambiguous > 0:
|
||||
print("Warning: ambiguous running time: %d tsc cycle, occupying %2.2f%% cpu."
|
||||
% (time_ambiguous, float(time_ambiguous)*100/stat_tsc))
|
||||
|
||||
# the last time
|
||||
time_vm_running[vm_next] += tsc_end - tsc_last_sched
|
||||
|
||||
|
||||
def generate_report(ofile, freq):
|
||||
""" generate analysis report
|
||||
Args:
|
||||
ofile: output report
|
||||
freq: TSC frequency of the device trace data from
|
||||
Return:
|
||||
None
|
||||
"""
|
||||
global stat_tsc, cpu_id, time_vm_running, count_all_trace, count_sched_trace
|
||||
|
||||
if (count_sched_trace == 0):
|
||||
return
|
||||
|
||||
csv_name = ofile + '.csv'
|
||||
try:
|
||||
with open(csv_name, 'a') as filep:
|
||||
f_csv = csv.writer(filep)
|
||||
|
||||
stat_sec = float(stat_tsc) / (float(freq) * 1000 * 1000)
|
||||
print ("Total run time: %d cpu cycles" % (stat_tsc))
|
||||
print ("TSC Freq: %s MHz" % (freq))
|
||||
print ("Total run time: %.2f sec" % (stat_sec))
|
||||
print ("Total trace items: %d" % (count_all_trace))
|
||||
print ("Total scheduling trace: %d" % (count_sched_trace))
|
||||
|
||||
f_csv.writerow(['Total run time(tsc cycles)', 'Total run time(sec)', 'Freq(MHz)'])
|
||||
f_csv.writerow(['%d' % (stat_tsc),
|
||||
'%.2f' % (stat_sec),
|
||||
'%s' % (freq)])
|
||||
|
||||
print ("%-28s\t%-12s\t%-12s\t%-24s\t%-16s" % ("PCPU ID", "VM ID",
|
||||
"VM Running/sec", "VM Running(tsc cycles)", "CPU Usage"))
|
||||
f_csv.writerow(['PCPU ID',
|
||||
'VM_ID',
|
||||
'Time Consumed/sec',
|
||||
'Time Consumed(tsc cycles)',
|
||||
'CPU Usage%'])
|
||||
|
||||
for vmid, tsc in enumerate(time_vm_running):
|
||||
run_tsc = tsc
|
||||
run_per = float(run_tsc) * 100 / float(stat_tsc)
|
||||
run_sec = float(run_tsc) / (float(freq) * 1000 * 1000)
|
||||
if vmid != VM_NUM:
|
||||
print ("%-28d\t%-12d\t%-10.2f\t%-24d\t%-2.2f%%" %
|
||||
(cpu_id, vmid, run_sec, run_tsc, run_per))
|
||||
row = [cpu_id, vmid, '%.2f' % (run_sec), run_tsc, '%2.2f' % (run_per)]
|
||||
else:
|
||||
print ("%-28d\t%-12s\t%-10.2f\t%-24d\t%-2.2f%%" %
|
||||
(cpu_id, 'Idle', run_sec, run_tsc, run_per))
|
||||
row = [cpu_id, 'Idle', '%.2f' % (run_sec), run_tsc, '%2.2f' % (run_per)]
|
||||
f_csv.writerow(row)
|
||||
|
||||
except IOError as err:
|
||||
print ("Output File Error: " + str(err))
|
||||
|
||||
def analyze_cpu_usage(ifile, ofile, freq):
|
||||
"""do cpu usage analysis of each vm
|
||||
Args:
|
||||
ifile: input trace data file
|
||||
ofile: output report file
|
||||
freq: TSC frequency of the host where we capture the trace data
|
||||
Return:
|
||||
None
|
||||
"""
|
||||
|
||||
print("VM CPU usage analysis started... \n\tinput file: %s\n"
|
||||
"\toutput file: %s.csv" % (ifile, ofile))
|
||||
|
||||
if os.stat(ifile).st_size == 0:
|
||||
print("The input trace file is empty. The corresponding CPU may be offline.")
|
||||
return
|
||||
if float(freq) == 0.0:
|
||||
print("The input cpu frequency cannot be zero!")
|
||||
return
|
||||
process_trace_data(ifile)
|
||||
# print cpu usage of each vm
|
||||
generate_report(ofile, freq)
|
@ -20,3 +20,6 @@
|
||||
# For TRACE_4I
|
||||
0x0001001E CPU%(cpu)d 0x%(event)016x %(tsc)d IO instruction [port = %(1)d, direction = %(2)d, sz = %(3)d, cur_context_idx = %(4)d]
|
||||
0x00010000 CPU%(cpu)d 0x%(event)016x %(tsc)d exception or nmi [vector = 0x%(1)08x, err = %(2)d, d3 = %(1)d, d4 = %(2)d]
|
||||
|
||||
# For TRACE_16STR
|
||||
0x00000005 CPU%(cpu)d 0x%(event)016x %(tsc)d sched next [data = %(1)c%(2)c%(3)c%(4)c%(5)c%(6)c%(7)c%(8)c%(9)c%(10)c%(11)c%(12)c%(13)c%(14)c%(15)c]
|
Loading…
Reference in New Issue
Block a user