diff --git a/hypervisor/common/hv_main.c b/hypervisor/common/hv_main.c index 1fab9b8c1..8ff5aaea9 100644 --- a/hypervisor/common/hv_main.c +++ b/hypervisor/common/hv_main.c @@ -17,6 +17,9 @@ void vcpu_thread(struct thread_object *obj) { +#ifdef HV_DEBUG + uint64_t vmexit_begin = 0UL, vmexit_end = 0UL; +#endif struct acrn_vcpu *vcpu = container_of(obj, struct acrn_vcpu, thread_obj); uint32_t basic_exit_reason = 0U; int32_t ret = 0; @@ -43,6 +46,35 @@ void vcpu_thread(struct thread_object *obj) reset_event(&vcpu->events[VCPU_EVENT_VIRTUAL_INTERRUPT]); profiling_vmenter_handler(vcpu); +#ifdef HV_DEBUG + vmexit_end = rdtsc(); + if (vmexit_begin != 0UL) { + uint64_t delta = vmexit_end - vmexit_begin; + uint32_t us = (uint32_t)ticks_to_us(delta); + uint16_t fls = (uint16_t)(fls32(us) + 1); /* to avoid us = 0 case, then fls=0xFFFF */ + uint16_t index = 0; + + if (fls >= MAX_VMEXIT_LEVEL) { + index = MAX_VMEXIT_LEVEL - 1; + } else if (fls > 0) { //if fls == 0, it means the us = 0 + index = fls - 1; + } + + get_cpu_var(vmexit_cnt)[basic_exit_reason][index]++; + get_cpu_var(vmexit_time)[basic_exit_reason][0] += delta; + + vcpu->vmexit_cnt[basic_exit_reason][index]++; + vcpu->vmexit_time[basic_exit_reason][0] += delta; + + if (us > get_cpu_var(vmexit_time)[basic_exit_reason][1]) { + get_cpu_var(vmexit_time)[basic_exit_reason][1] = us; + } + + if (us > vcpu->vmexit_time[basic_exit_reason][1]) { + vcpu->vmexit_time[basic_exit_reason][1] = us; + } + } +#endif TRACE_2L(TRACE_VM_ENTER, 0UL, 0UL); ret = run_vcpu(vcpu); if (ret != 0) { @@ -54,6 +86,11 @@ void vcpu_thread(struct thread_object *obj) basic_exit_reason = vcpu->arch.exit_reason & 0xFFFFU; TRACE_2L(TRACE_VM_EXIT, basic_exit_reason, vcpu_get_rip(vcpu)); +#ifdef HV_DEBUG + vmexit_begin = rdtsc(); + get_cpu_var(vmexit_cnt)[basic_exit_reason][TOTAL_ARRAY_LEVEL - 1]++; + vcpu->vmexit_cnt[basic_exit_reason][TOTAL_ARRAY_LEVEL - 1]++; +#endif vcpu->arch.nrexits++; profiling_pre_vmexit_handler(vcpu); diff --git a/hypervisor/common/schedule.c b/hypervisor/common/schedule.c index 74c75338b..6ac409349 100644 --- a/hypervisor/common/schedule.c +++ b/hypervisor/common/schedule.c @@ -163,6 +163,10 @@ void schedule(void) struct thread_object *prev = ctl->curr_obj; uint64_t rflag; +#ifdef HV_DEBUG + get_cpu_var(resched_times)++; +#endif + obtain_schedule_lock(pcpu_id, &rflag); if (ctl->scheduler->pick_next != NULL) { next = ctl->scheduler->pick_next(ctl); diff --git a/hypervisor/debug/shell.c b/hypervisor/debug/shell.c index f441343ee..0ea36d89f 100644 --- a/hypervisor/debug/shell.c +++ b/hypervisor/debug/shell.c @@ -27,7 +27,7 @@ #define MAX_STR_SIZE 256U #define SHELL_PROMPT_STR "ACRN:\\>" -#define SHELL_LOG_BUF_SIZE (PAGE_SIZE * MAX_PCPU_NUM / 2U) +#define SHELL_LOG_BUF_SIZE (PAGE_SIZE * MAX_PCPU_NUM) static char shell_log_buf[SHELL_LOG_BUF_SIZE]; /* Input Line Other - Switch to the "other" input line (there are only two @@ -52,6 +52,7 @@ static int32_t shell_cpuid(int32_t argc, char **argv); static int32_t shell_reboot(int32_t argc, char **argv); static int32_t shell_rdmsr(int32_t argc, char **argv); static int32_t shell_wrmsr(int32_t argc, char **argv); +static int32_t shell_show_vmexit_profile(__unused int argc, __unused char **argv); static struct shell_cmd shell_cmds[] = { { @@ -156,6 +157,12 @@ static struct shell_cmd shell_cmds[] = { .help_str = SHELL_CMD_WRMSR_HELP, .fcn = shell_wrmsr, }, + { + .str = SHELL_CMD_VMEXIT, + .cmd_param = SHELL_CMD_VMEXIT_PARAM, + .help_str = SHELL_CMD_VMEXIT_HELP, + .fcn = shell_show_vmexit_profile, + }, }; /* The initial log level*/ @@ -1452,3 +1459,477 @@ static int32_t shell_wrmsr(int32_t argc, char **argv) return ret; } + +static void get_vmexit_profile_per_pcpu(char *str_arg, size_t str_max) +{ + char *str = str_arg; + uint16_t cpu, i; + size_t len, size = str_max; + uint16_t pcpu_num = get_pcpu_nums(); + + len = snprintf(str, size, "\r\nNow(us) = %16lld\r\n", ticks_to_us(rdtsc())); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + len = snprintf(str, size, "\r\nREASON"); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (cpu = 0U; cpu < pcpu_num; cpu++) { + len = snprintf(str, size, "\t CPU%hu\t US", cpu); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + } + + for (i = 0U; i < 64U; i++) { + + /* to ignore the 0 count vmexit */ + for (cpu = 0U; cpu < pcpu_num; cpu++) { + if (per_cpu(vmexit_cnt, cpu)[i][TOTAL_ARRAY_LEVEL - 1] != 0) + break; + } + + if (cpu == pcpu_num) + continue; + + len = snprintf(str, size, "\r\n0x%02x", i); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (cpu = 0U; cpu < pcpu_num; cpu++) { + len = snprintf(str, size, "\t%10lld\t%10lld", + per_cpu(vmexit_cnt, cpu)[i][TOTAL_ARRAY_LEVEL - 1], + ticks_to_us(per_cpu(vmexit_time, cpu)[i][0])); + if (len >= size) { + goto overflow; + } + + size -= len; + str += len; + } + } + + for (cpu = 0U; cpu < pcpu_num; cpu++) { + len = snprintf(str, size, "\r\ncpu%d thread resched: %ld", cpu, per_cpu(resched_times, cpu)); + + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + } + + snprintf(str, size, "\r\n"); + return; + +overflow: + printf("buffer size could not be enough! please check!\n"); +} + +static void get_vmexit_profile_per_vcpu(char *str_arg, size_t str_max) +{ + char *str = str_arg; + uint16_t vm_id, i, vcpu_id; + size_t len, size = str_max; + struct acrn_vm *vm; + struct acrn_vcpu *vcpu; + bool has_vmexit = false; + + len = snprintf(str, size, "\r\nNow(us) = %16lld; total vmexit per vm/vCPU (count & time):\r\n", + ticks_to_us(rdtsc())); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + len = snprintf(str, size, "\r\nREASON"); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) { + vm = get_vm_from_vmid(vm_id); + if (is_poweroff_vm(vm)) + continue; + + foreach_vcpu(vcpu_id, vm, vcpu) { + len = snprintf(str, size, "\t VM%u/vCPU%u\t US", vm_id, vcpu_id); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + } + } + + for (i = 0U; i < 64U; i++) { + + for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) { + vm = get_vm_from_vmid(vm_id); + + if (is_poweroff_vm(vm)) + continue; + + has_vmexit = false; + foreach_vcpu(vcpu_id, vm, vcpu) { + if (vcpu->vmexit_cnt[i][TOTAL_ARRAY_LEVEL - 1] != 0) { + has_vmexit = true; + break; + } + } + + if (has_vmexit) + break; + } + + if (vm_id == CONFIG_MAX_VM_NUM) + continue; + + len = snprintf(str, size, "\r\n0x%02x", i); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) { + vm = get_vm_from_vmid(vm_id); + if (is_poweroff_vm(vm)) + continue; + + foreach_vcpu(vcpu_id, vm, vcpu) { + len = snprintf(str, size, "\t%10lld\t%10lld", + vcpu->vmexit_cnt[i][TOTAL_ARRAY_LEVEL - 1], + ticks_to_us(vcpu->vmexit_time[i][0])); + if (len >= size) { + goto overflow; + } + + size -= len; + str += len; + } + } + } + + snprintf(str, size, "\r\n"); + return; + +overflow: + printf("buffer size could not be enough! please check!\n"); +} + +static const char *level_info[MAX_VMEXIT_LEVEL] = { + " 0us - 2us", + " 2us - 4us", + " 4us - 8us", + " 8us - 16us", + " 16us - 32us", + " 32us - 64us", + " 64us - 128us", + " 128us - 256us", + " 256us - 512us", + " 512us -1024us", + "1024us -2048us", + "2048us -4096us", + "4096us -8192us", + "8192us - more", +}; + +static void get_vmexit_details_per_pcpu(char *str_arg, size_t str_max) +{ + char *str = str_arg; + uint16_t cpu, i, level; + size_t len, size = str_max; + uint16_t pcpu_num = get_pcpu_nums(); + + len = snprintf(str, size, "\r\nNow=%lldus, for detailed latency of each vmexit on each cpu:", + ticks_to_us(rdtsc())); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (i = 0U; i < 64U; i++) { + + /* to ignore the 0 count vmexit */ + for (cpu = 0U; cpu < pcpu_num; cpu++) { + if (per_cpu(vmexit_cnt, cpu)[i][TOTAL_ARRAY_LEVEL - 1] != 0) + break; + } + + if (cpu == pcpu_num) + continue; + + len = snprintf(str, size, "\r\n\r\n VMEXIT/0x%02x", i); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (cpu = 0U; cpu < pcpu_num; cpu++) { + len = snprintf(str, size, " CPU%hu", cpu); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + } + + for (level = 0; level < MAX_VMEXIT_LEVEL; level++) { + + /* to ignore the 0 count vmexit */ + for (cpu = 0U; cpu < pcpu_num; cpu++) { + if (per_cpu(vmexit_cnt, cpu)[i][level] != 0) + break; + } + + if (cpu == pcpu_num) + continue; + + len = snprintf(str, size, "\r\n%s", level_info[level]); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (cpu = 0U; cpu < pcpu_num; cpu++) { + len = snprintf(str, size, "%12lld", per_cpu(vmexit_cnt, cpu)[i][level]); + if (len >= size) { + goto overflow; + } + + size -= len; + str += len; + } + } + + len = snprintf(str, size, "\r\n Max Lat(us):"); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (cpu = 0U; cpu < pcpu_num; cpu++) { + len = snprintf(str, size, "%12lld", per_cpu(vmexit_time, cpu)[i][1]); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + } + + } + + snprintf(str, size, "\r\n"); + return; + +overflow: + printf("buffer size could not be enough! please check!\n"); +} + + +static void get_vmexit_details_per_vcpu(char *str_arg, size_t str_max) +{ + char *str = str_arg; + uint16_t vm_id, i, level, vcpu_id; + size_t len, size = str_max; + struct acrn_vm *vm; + struct acrn_vcpu *vcpu; + bool has_vmexit = false; + + len = snprintf(str, size, "\r\nNow=%lldus, for detailed latency of each vmexit on each VM/vCPU:", + ticks_to_us(rdtsc())); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (i = 0U; i < 64U; i++) { + + /* to ignore the 0 count vmexit */ + for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) { + vm = get_vm_from_vmid(vm_id); + if (is_poweroff_vm(vm)) + continue; + + has_vmexit = false; + foreach_vcpu(vcpu_id, vm, vcpu) { + if (vcpu->vmexit_cnt[i][TOTAL_ARRAY_LEVEL - 1] != 0) { + has_vmexit = true; + break; + } + } + + if (has_vmexit) + break; + } + + if (vm_id == CONFIG_MAX_VM_NUM) + continue; + + len = snprintf(str, size, "\r\n\r\n VMEXIT/0x%02x", i); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) { + vm = get_vm_from_vmid(vm_id); + if (is_poweroff_vm(vm)) + continue; + + foreach_vcpu(vcpu_id, vm, vcpu) { + len = snprintf(str, size, " VM%u/vCPU%u", vm_id, vcpu_id); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + } + } + + for (level = 0; level < MAX_VMEXIT_LEVEL; level++) { + + /* to ignore the 0 count vmexit */ + for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) { + vm = get_vm_from_vmid(vm_id); + + if (is_poweroff_vm(vm)) + continue; + + has_vmexit = false; + foreach_vcpu(vcpu_id, vm, vcpu) { + if (vcpu->vmexit_cnt[i][level] != 0) { + has_vmexit = true; + break; + } + } + + if (has_vmexit) + break; + } + + if (vm_id == CONFIG_MAX_VM_NUM) + continue; + + len = snprintf(str, size, "\r\n%s", level_info[level]); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) { + vm = get_vm_from_vmid(vm_id); + if (is_poweroff_vm(vm)) + continue; + + foreach_vcpu(vcpu_id, vm, vcpu) { + len = snprintf(str, size, "%12lld", vcpu->vmexit_cnt[i][level]); + if (len >= size) { + goto overflow; + } + + size -= len; + str += len; + } + } + + } + + len = snprintf(str, size, "\r\n Max Lat(us):"); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + + for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) { + vm = get_vm_from_vmid(vm_id); + if (is_poweroff_vm(vm)) + continue; + + foreach_vcpu(vcpu_id, vm, vcpu) { + len = snprintf(str, size, "%12lld", vcpu->vmexit_time[i][1]); + if (len >= size) { + goto overflow; + } + size -= len; + str += len; + } + } + } + + snprintf(str, size, "\r\n"); + return; + +overflow: + printf("buffer size could not be enough! please check!\n"); +} + +static void clear_vmexit_info_buffer(void) +{ + uint16_t cpu, vm_id, vcpu_id; + uint16_t pcpu_num = get_pcpu_nums(); + struct acrn_vm *vm; + struct acrn_vcpu *vcpu; + + for (cpu = 0U; cpu < pcpu_num; cpu++) { + memset(per_cpu(vmexit_cnt, cpu), 0, sizeof(per_cpu(vmexit_cnt, cpu))); + memset(per_cpu(vmexit_time, cpu), 0, sizeof(per_cpu(vmexit_time, cpu))); + per_cpu(resched_times, cpu) = 0; + } + + for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) { + vm = get_vm_from_vmid(vm_id); + if (!is_poweroff_vm(vm)) { + foreach_vcpu(vcpu_id, vm, vcpu) { + memset(vcpu->vmexit_cnt, 0, sizeof(vcpu->vmexit_cnt)); + memset(vcpu->vmexit_time, 0, sizeof(vcpu->vmexit_time)); + } + } + } + +} + +static int shell_show_vmexit_profile(__unused int argc, __unused char **argv) +{ + if ((argc == 2) && (strcmp(argv[1], "clear") == 0)) { + clear_vmexit_info_buffer(); + } + + get_vmexit_profile_per_pcpu(shell_log_buf, SHELL_LOG_BUF_SIZE); + shell_puts(shell_log_buf); + + get_vmexit_details_per_pcpu(shell_log_buf, SHELL_LOG_BUF_SIZE); + shell_puts(shell_log_buf); + + get_vmexit_profile_per_vcpu(shell_log_buf, SHELL_LOG_BUF_SIZE); + shell_puts(shell_log_buf); + + get_vmexit_details_per_vcpu(shell_log_buf, SHELL_LOG_BUF_SIZE); + shell_puts(shell_log_buf); + + return 0; +} diff --git a/hypervisor/debug/shell_priv.h b/hypervisor/debug/shell_priv.h index af741aa26..21a010d21 100644 --- a/hypervisor/debug/shell_priv.h +++ b/hypervisor/debug/shell_priv.h @@ -106,4 +106,9 @@ struct shell { #define SHELL_CMD_WRMSR_PARAM "[-p] " #define SHELL_CMD_WRMSR_HELP "Write value (in hexadecimal) to the MSR at msr_index (in hexadecimal) for CPU"\ " ID pcpu_id" + +#define SHELL_CMD_VMEXIT "vmexit" +#define SHELL_CMD_VMEXIT_PARAM NULL +#define SHELL_CMD_VMEXIT_HELP "show vmexit profiling" + #endif /* SHELL_PRIV_H */ diff --git a/hypervisor/include/arch/x86/guest/vcpu.h b/hypervisor/include/arch/x86/guest/vcpu.h index 052a69e1e..4b192cbe6 100644 --- a/hypervisor/include/arch/x86/guest/vcpu.h +++ b/hypervisor/include/arch/x86/guest/vcpu.h @@ -274,6 +274,12 @@ struct acrn_vcpu { uint64_t reg_updated; struct sched_event events[VCPU_EVENT_NUM]; + +#ifdef HV_DEBUG + uint64_t vmexit_cnt[64][TOTAL_ARRAY_LEVEL]; + uint64_t vmexit_time[64][2]; /*0 for total latency, 1 for max latency */ +#endif + } __aligned(PAGE_SIZE); struct vcpu_dump { diff --git a/hypervisor/include/arch/x86/guest/vm.h b/hypervisor/include/arch/x86/guest/vm.h index b0e5b797b..1bef94bc4 100644 --- a/hypervisor/include/arch/x86/guest/vm.h +++ b/hypervisor/include/arch/x86/guest/vm.h @@ -156,6 +156,7 @@ struct acrn_vm { uint8_t vrtc_offset; uint64_t intr_inject_delay_delta; /* delay of intr injection */ + } __aligned(PAGE_SIZE); /* diff --git a/hypervisor/include/arch/x86/per_cpu.h b/hypervisor/include/arch/x86/per_cpu.h index 05dfbf794..b7b0f3c56 100644 --- a/hypervisor/include/arch/x86/per_cpu.h +++ b/hypervisor/include/arch/x86/per_cpu.h @@ -19,6 +19,8 @@ #include #include +#include + struct per_cpu_region { /* vmxon_region MUST be 4KB-aligned */ uint8_t vmxon_region[PAGE_SIZE]; @@ -26,6 +28,12 @@ struct per_cpu_region { #ifdef HV_DEBUG struct shared_buf *sbuf[ACRN_SBUF_ID_MAX]; char logbuf[LOG_MESSAGE_MAX_SIZE]; + + uint64_t vmexit_cnt[64][TOTAL_ARRAY_LEVEL]; + uint64_t vmexit_time[64][2]; /*0 for total latency, 1 for max latency */ + + uint64_t resched_times; + uint32_t npk_log_ref; #endif uint64_t irq_count[NR_IRQS]; diff --git a/hypervisor/include/public/acrn_common.h b/hypervisor/include/public/acrn_common.h index eddb03401..19cde685a 100644 --- a/hypervisor/include/public/acrn_common.h +++ b/hypervisor/include/public/acrn_common.h @@ -46,6 +46,11 @@ #error "VIOAPIC_RTE_NUM must be larger than 23" #endif +#ifdef HV_DEBUG +#define MAX_VMEXIT_LEVEL 14 /* from 0 to 14 for counts*/ +#define TOTAL_ARRAY_LEVEL (MAX_VMEXIT_LEVEL + 1) +#endif + /* Generic VM flags from guest OS */ #define GUEST_FLAG_SECURE_WORLD_ENABLED (1UL << 0U) /* Whether secure world is enabled */ #define GUEST_FLAG_LAPIC_PASSTHROUGH (1UL << 1U) /* Whether LAPIC is passed through */