From 5985c1216e2f467953f406f411b4c28d956a7286 Mon Sep 17 00:00:00 2001 From: "Chinthapally, Manisha" Date: Mon, 22 Oct 2018 16:39:41 -0700 Subject: [PATCH] HV:Added implementation for PMI handler function irq.c/.h: Added new variables(ctx_rflags, ctx_rip, ctx_cs) in irq_desc On each interrupt this information is populated Added api's to access the irq_desc members profiling.c: profiling_pmi_handler:On each PMI generates gets the context and other information that caused it Tracked-On: projectacrn#1409 Acked-by: Eddie Dong Signed-off-by: Chinthapally, Manisha --- hypervisor/arch/x86/irq.c | 9 +- hypervisor/arch/x86/virq.c | 3 + hypervisor/debug/profiling.c | 151 +++++++++++++++++- hypervisor/include/arch/x86/msr.h | 10 ++ hypervisor/include/common/irq.h | 5 + hypervisor/include/debug/profiling_internal.h | 46 ++++++ 6 files changed, 219 insertions(+), 5 deletions(-) diff --git a/hypervisor/arch/x86/irq.c b/hypervisor/arch/x86/irq.c index b250d33aa..fc1bf9421 100644 --- a/hypervisor/arch/x86/irq.c +++ b/hypervisor/arch/x86/irq.c @@ -12,7 +12,7 @@ static spinlock_t irq_alloc_spinlock = { .head = 0U, .tail = 0U, }; #define IRQ_ALLOC_BITMAP_SIZE INT_DIV_ROUNDUP(NR_IRQS, 64U) static uint64_t irq_alloc_bitmap[IRQ_ALLOC_BITMAP_SIZE]; -static struct irq_desc irq_desc_array[NR_IRQS]; +struct irq_desc irq_desc_array[NR_IRQS]; static uint32_t vector_to_irq[NR_MAX_VECTOR + 1]; spurious_handler_t spurious_handler; @@ -350,7 +350,12 @@ void dispatch_interrupt(const struct intr_excp_ctx *ctx) /* mask irq if possible */ goto ERR; } - +#ifdef PROFILING_ON + /* Saves ctx info into irq_desc */ + desc->ctx_rip = ctx->rip; + desc->ctx_rflags = ctx->rflags; + desc->ctx_cs = ctx->cs; +#endif handle_irq(desc); return; ERR: diff --git a/hypervisor/arch/x86/virq.c b/hypervisor/arch/x86/virq.c index 82e54c0ff..c644fd267 100644 --- a/hypervisor/arch/x86/virq.c +++ b/hypervisor/arch/x86/virq.c @@ -372,6 +372,9 @@ int external_interrupt_vmexit_handler(struct vcpu *vcpu) } ctx.vector = intr_info & 0xFFU; + ctx.rip = vcpu_get_rip(vcpu); + ctx.rflags = vcpu_get_rflags(vcpu); + ctx.cs = exec_vmread32(VMX_GUEST_CS_SEL); #ifdef CONFIG_PARTITION_MODE partition_mode_dispatch_interrupt(&ctx); diff --git a/hypervisor/debug/profiling.c b/hypervisor/debug/profiling.c index d60243711..b5207c1af 100644 --- a/hypervisor/debug/profiling.c +++ b/hypervisor/debug/profiling.c @@ -13,7 +13,8 @@ #define MAJOR_VERSION 1 #define MINOR_VERSION 0 - +#define LBR_NUM_REGISTERS 32U +#define PERF_OVF_BIT_MASK 0xC0000070000000FULL #define LVT_PERFCTR_BIT_UNMASK 0xFFFEFFFFU #define LVT_PERFCTR_BIT_MASK 0x10000U #define VALID_DEBUGCTL_BIT_MASK 0x1801U @@ -24,6 +25,8 @@ static bool in_pmu_profiling; static uint32_t profiling_pmi_irq = IRQ_INVALID; +extern struct irq_desc irq_desc_array[NR_IRQS]; + static void profiling_initialize_vmsw(void) { dev_dbg(ACRN_DBG_PROFILING, "%s: entering cpu%d", @@ -324,9 +327,151 @@ static void profiling_handle_msrops(void) /* * Interrupt handler for performance monitoring interrupts */ -static void profiling_pmi_handler(__unused unsigned int irq, __unused void *data) +static void profiling_pmi_handler(unsigned int irq, __unused void *data) { - /* to be implemented */ + uint64_t perf_ovf_status; + uint32_t lvt_perf_ctr; + uint32_t i; + uint32_t group_id; + struct profiling_msr_op *msrop = NULL; + struct pmu_sample *psample = &(get_cpu_var(profiling_info.pmu_sample)); + struct sep_state *ss = &(get_cpu_var(profiling_info.sep_state)); + + if ((ss == NULL) || (psample == NULL)) { + dev_dbg(ACRN_ERR_PROFILING, "%s: exiting cpu%d", + __func__, get_cpu_id()); + return; + } + /* Stop all the counters first */ + msr_write(MSR_IA32_PERF_GLOBAL_CTRL, 0x0U); + + group_id = ss->current_pmi_group_id; + for (i = 0U; i < MAX_MSR_LIST_NUM; i++) { + msrop = &(ss->pmi_entry_msr_list[group_id][i]); + if (msrop != NULL) { + if (msrop->msr_id == (uint32_t)-1) { + break; + } + if (msrop->msr_op_type == (uint8_t)MSR_OP_WRITE) { + msr_write(msrop->msr_id, msrop->value); + } + } + } + + ss->total_pmi_count++; + perf_ovf_status = msr_read(MSR_IA32_PERF_GLOBAL_STATUS); + lvt_perf_ctr = (uint32_t)msr_read(MSR_IA32_EXT_APIC_LVT_PMI); + + if (perf_ovf_status == 0U) { + goto reconfig; + } + + if ((perf_ovf_status & 0x80000000000000FULL) == 0U) { + ss->nofrozen_pmi++; + } + + (void)memset(psample, 0U, sizeof(struct pmu_sample)); + + /* Attribute PMI to guest context */ + if ((get_cpu_var(profiling_info.vm_info).vmexit_reason + == VMX_EXIT_REASON_EXTERNAL_INTERRUPT) && + ((uint64_t)get_cpu_var(profiling_info.vm_info).external_vector + == VECTOR_PMI)) { + psample->csample.os_id + =(uint32_t) get_cpu_var(profiling_info.vm_info).guest_vm_id; + (void)memset(psample->csample.task, 0U, 16); + psample->csample.cpu_id = get_cpu_id(); + psample->csample.process_id = 0U; + psample->csample.task_id = 0U; + psample->csample.overflow_status = perf_ovf_status; + psample->csample.rip = get_cpu_var(profiling_info.vm_info).guest_rip; + psample->csample.rflags + = (uint32_t)get_cpu_var(profiling_info.vm_info).guest_rflags; + psample->csample.cs + = (uint32_t)get_cpu_var(profiling_info.vm_info).guest_cs; + get_cpu_var(profiling_info.vm_info).vmexit_reason = 0U; + get_cpu_var(profiling_info.vm_info).external_vector = -1; + /* Attribute PMI to hypervisor context */ + } else { + psample->csample.os_id = 0xFFFFFFFFU; + (void)memcpy_s(psample->csample.task, 16, "VMM\0", 4); + psample->csample.cpu_id = get_cpu_id(); + psample->csample.process_id = 0U; + psample->csample.task_id = 0U; + psample->csample.overflow_status = perf_ovf_status; + psample->csample.rip = irq_desc_array[irq].ctx_rip; + psample->csample.rflags + = (uint32_t)irq_desc_array[irq].ctx_rflags; + psample->csample.cs = (uint32_t)irq_desc_array[irq].ctx_cs; + } + + if ((sep_collection_switch & + (1UL << (uint64_t)LBR_PMU_SAMPLING)) > 0UL) { + psample->lsample.lbr_tos = msr_read(MSR_CORE_LASTBRANCH_TOS); + for (i = 0U; i < LBR_NUM_REGISTERS; i++) { + psample->lsample.lbr_from_ip[i] + = msr_read(MSR_CORE_LASTBRANCH_0_FROM_IP + i); + psample->lsample.lbr_to_ip[i] + = msr_read(MSR_CORE_LASTBRANCH_0_TO_IP + i); + } + /* Generate core pmu sample and lbr data */ + (void)profiling_generate_data(COLLECT_PROFILE_DATA, LBR_PMU_SAMPLING); + } else { + /* Generate core pmu sample only */ + (void)profiling_generate_data(COLLECT_PROFILE_DATA, CORE_PMU_SAMPLING); + } + + /* Clear PERF_GLOBAL_OVF_STATUS bits */ + msr_write(MSR_IA32_PERF_GLOBAL_OVF_CTRL, + perf_ovf_status & PERF_OVF_BIT_MASK); + + ss->valid_pmi_count++; + + group_id = ss->current_pmi_group_id; + for (i = 0U; i < MAX_MSR_LIST_NUM; i++) { + msrop = &(ss->pmi_exit_msr_list[group_id][i]); + if (msrop != NULL) { + if (msrop->msr_id == (uint32_t)-1) { + break; + } + if (msrop->msr_op_type == (uint8_t)MSR_OP_WRITE) { + if (msrop->reg_type != (uint8_t)PMU_MSR_DATA) { + if (msrop->msr_id != MSR_IA32_PERF_GLOBAL_CTRL) { + msr_write(msrop->msr_id, msrop->value); + } + } + else { + if (((perf_ovf_status >> msrop->param) & 0x1U) > 0U) { + msr_write(msrop->msr_id, msrop->value); + } + } + } + } + } + +reconfig: + + if (ss->pmu_state == PMU_RUNNING) { + /* Unmask the interrupt */ + lvt_perf_ctr &= LVT_PERFCTR_BIT_UNMASK; + msr_write(MSR_IA32_EXT_APIC_LVT_PMI, lvt_perf_ctr); + group_id = ss->current_pmi_group_id; + for (i = 0U; i < MAX_MSR_LIST_NUM; i++) { + msrop = &(ss->pmi_start_msr_list[group_id][i]); + if (msrop != NULL) { + if (msrop->msr_id == (uint32_t)-1) { + break; + } + if (msrop->msr_op_type == (uint8_t)MSR_OP_WRITE) { + msr_write(msrop->msr_id, msrop->value); + } + } + } + } else { + /* Mask the interrupt */ + lvt_perf_ctr |= LVT_PERFCTR_BIT_MASK; + msr_write(MSR_IA32_EXT_APIC_LVT_PMI, lvt_perf_ctr); + } } /* diff --git a/hypervisor/include/arch/x86/msr.h b/hypervisor/include/arch/x86/msr.h index e118b21c3..7e4e26c0f 100644 --- a/hypervisor/include/arch/x86/msr.h +++ b/hypervisor/include/arch/x86/msr.h @@ -488,6 +488,16 @@ #define MSR_ATOM_LER_TO_LIP 0x000001DEU /* Last exception record to linear IP */ +#ifdef PROFILING_ON +/* Core (and Goldmont) specific MSRs */ +#define MSR_CORE_LASTBRANCH_TOS 0x000001C9U +/* Last branch record stack TOS */ +#define MSR_CORE_LASTBRANCH_0_FROM_IP 0x00000680U +/* Last branch record 0 from IP */ +#define MSR_CORE_LASTBRANCH_0_TO_IP 0x000006C0U +/* Last branch record 0 to IP */ +#endif + /* LINCROFT specific MSRs */ #define MSR_LNC_BIOS_CACHE_AS_RAM 0x000002E0U /* Configure CAR */ diff --git a/hypervisor/include/common/irq.h b/hypervisor/include/common/irq.h index 7d4b160c8..3bedd0853 100644 --- a/hypervisor/include/common/irq.h +++ b/hypervisor/include/common/irq.h @@ -24,6 +24,11 @@ struct irq_desc { uint32_t flags; /* flags for trigger mode/ptdev */ spinlock_t lock; +#ifdef PROFILING_ON + uint64_t ctx_rip; + uint64_t ctx_rflags; + uint64_t ctx_cs; +#endif }; int32_t request_irq(uint32_t req_irq, irq_action_t action_fn, void *priv_data, diff --git a/hypervisor/include/debug/profiling_internal.h b/hypervisor/include/debug/profiling_internal.h index 56b591629..f0ae728f7 100644 --- a/hypervisor/include/debug/profiling_internal.h +++ b/hypervisor/include/debug/profiling_internal.h @@ -33,6 +33,11 @@ enum MSR_CMD_TYPE { MSR_OP_READ_CLEAR }; +enum PMU_MSR_TYPE { + PMU_MSR_CCCR = 0, + PMU_MSR_ESCR, + PMU_MSR_DATA +}; typedef enum IPI_COMMANDS { IPI_MSR_OP = 0, IPI_PMU_CONFIG, @@ -212,6 +217,46 @@ struct sep_state { uint64_t saved_debugctl_value; } __aligned(8); +struct core_pmu_sample { + /* context where PMI is triggered */ + uint32_t os_id; + /* the task id */ + uint32_t task_id; + /* instruction pointer */ + uint64_t rip; + /* the task name */ + char task[16]; + /* physical cpu ID */ + uint32_t cpu_id; + /* the process id */ + uint32_t process_id; + /* perf global status msr value (for overflow status) */ + uint64_t overflow_status; + /* rflags */ + uint32_t rflags; + /* code segment */ + uint32_t cs; +} __aligned(SEP_BUF_ENTRY_SIZE); + +#define NUM_LBR_ENTRY 32 + +struct lbr_pmu_sample { + /* LBR TOS */ + uint64_t lbr_tos; + /* LBR FROM IP */ + uint64_t lbr_from_ip[NUM_LBR_ENTRY]; + /* LBR TO IP */ + uint64_t lbr_to_ip[NUM_LBR_ENTRY]; + /* LBR info */ + uint64_t lbr_info[NUM_LBR_ENTRY]; +} __aligned(SEP_BUF_ENTRY_SIZE); + +struct pmu_sample { + /* core pmu sample */ + struct core_pmu_sample csample; + /* lbr pmu sample */ + struct lbr_pmu_sample lsample; +} __aligned(SEP_BUF_ENTRY_SIZE); struct vm_switch_trace { uint64_t vm_enter_tsc; @@ -228,6 +273,7 @@ struct profiling_info_wrapper { struct sep_state sep_state; struct guest_vm_info vm_info; ipi_commands ipi_cmd; + struct pmu_sample pmu_sample; struct vm_switch_trace vm_switch_trace; socwatch_state soc_state; struct sw_msr_op_info sw_msr_op_info;