mirror of
https://github.com/projectacrn/acrn-hypervisor.git
synced 2025-09-21 00:38:28 +00:00
initial import
internal commit: 14ac2bc2299032fa6714d1fefa7cf0987b3e3085 Signed-off-by: Eddie Dong <eddie.dong@intel.com>
This commit is contained in:
1015
hypervisor/arch/x86/assign.c
Normal file
1015
hypervisor/arch/x86/assign.c
Normal file
File diff suppressed because it is too large
Load Diff
650
hypervisor/arch/x86/cpu.c
Normal file
650
hypervisor/arch/x86/cpu.c
Normal file
@@ -0,0 +1,650 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <bsp_extern.h>
|
||||
#include <hv_arch.h>
|
||||
#include <schedule.h>
|
||||
#include <version.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
extern uint32_t efi_physical_available_ap_bitmap;
|
||||
#endif
|
||||
|
||||
uint64_t tsc_clock_freq = 1000000000;
|
||||
|
||||
spinlock_t cpu_secondary_spinlock = {
|
||||
.head = 0,
|
||||
.tail = 0
|
||||
};
|
||||
|
||||
spinlock_t up_count_spinlock = {
|
||||
.head = 0,
|
||||
.tail = 0
|
||||
};
|
||||
|
||||
void *per_cpu_data_base_ptr;
|
||||
int phy_cpu_num;
|
||||
unsigned long pcpu_sync = 0;
|
||||
uint32_t up_count = 0;
|
||||
|
||||
DEFINE_CPU_DATA(uint8_t[STACK_SIZE], stack) __aligned(16);
|
||||
DEFINE_CPU_DATA(uint8_t, lapic_id);
|
||||
DEFINE_CPU_DATA(void *, vcpu);
|
||||
DEFINE_CPU_DATA(int, state);
|
||||
|
||||
/* TODO: add more capability per requirement */
|
||||
struct cpu_capability {
|
||||
bool tsc_adjust_supported;
|
||||
bool ibrs_ibpb_supported;
|
||||
bool stibp_supported;
|
||||
bool apicv_supported;
|
||||
bool monitor_supported;
|
||||
};
|
||||
static struct cpu_capability cpu_caps;
|
||||
|
||||
static void apicv_cap_detect(void);
|
||||
static void cpu_set_logical_id(uint32_t logical_id);
|
||||
static void print_hv_banner(void);
|
||||
bool check_monitor_support(void);
|
||||
int cpu_find_logical_id(uint32_t lapic_id);
|
||||
#ifndef CONFIG_EFI_STUB
|
||||
static void start_cpus();
|
||||
#endif
|
||||
static void pcpu_sync_sleep(unsigned long *sync, int mask_bit);
|
||||
int ibrs_type;
|
||||
static void check_cpu_capability(void)
|
||||
{
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
|
||||
memset(&cpu_caps, 0, sizeof(struct cpu_capability));
|
||||
|
||||
cpuid(CPUID_EXTEND_FEATURE, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
cpu_caps.tsc_adjust_supported = (ebx & CPUID_EBX_TSC_ADJ) ?
|
||||
(true) : (false);
|
||||
cpu_caps.ibrs_ibpb_supported = (edx & CPUID_EDX_IBRS_IBPB) ?
|
||||
(true) : (false);
|
||||
cpu_caps.stibp_supported = (edx & CPUID_EDX_STIBP) ?
|
||||
(true) : (false);
|
||||
|
||||
/* For speculation defence.
|
||||
* The default way is to set IBRS at vmexit and then do IBPB at vcpu
|
||||
* context switch(ibrs_type == IBRS_RAW).
|
||||
* Now provide an optimized way (ibrs_type == IBRS_OPT) which set
|
||||
* STIBP and do IBPB at vmexit,since having STIBP always set has less
|
||||
* impact than having IBRS always set. Also since IBPB is already done
|
||||
* at vmexit, it is no necessary to do so at vcpu context switch then.
|
||||
*/
|
||||
ibrs_type = IBRS_NONE;
|
||||
|
||||
/* Currently for APL, if we enabled retpoline, then IBRS should not
|
||||
* take effect
|
||||
* TODO: add IA32_ARCH_CAPABILITIES[1] check, if this bit is set, IBRS
|
||||
* should be set all the time instead of relying on retpoline
|
||||
*/
|
||||
#ifndef CONFIG_RETPOLINE
|
||||
if (cpu_caps.ibrs_ibpb_supported) {
|
||||
ibrs_type = IBRS_RAW;
|
||||
if (cpu_caps.stibp_supported)
|
||||
ibrs_type = IBRS_OPT;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool check_tsc_adjust_support(void)
|
||||
{
|
||||
return cpu_caps.tsc_adjust_supported;
|
||||
}
|
||||
|
||||
bool check_ibrs_ibpb_support(void)
|
||||
{
|
||||
return cpu_caps.ibrs_ibpb_supported;
|
||||
}
|
||||
|
||||
bool check_stibp_support(void)
|
||||
{
|
||||
return cpu_caps.stibp_supported;
|
||||
}
|
||||
|
||||
static void alloc_phy_cpu_data(int pcpu_num)
|
||||
{
|
||||
phy_cpu_num = pcpu_num;
|
||||
|
||||
per_cpu_data_base_ptr = calloc(1, PER_CPU_DATA_SIZE * pcpu_num);
|
||||
ASSERT(per_cpu_data_base_ptr != NULL, "");
|
||||
}
|
||||
|
||||
int __attribute__((weak)) parse_madt(uint8_t *lapic_id_base)
|
||||
{
|
||||
static const uint32_t lapic_id[] = {0, 2, 4, 6};
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(lapic_id); i++)
|
||||
*lapic_id_base++ = lapic_id[i];
|
||||
|
||||
return ARRAY_SIZE(lapic_id);
|
||||
}
|
||||
|
||||
static int init_phy_cpu_storage(void)
|
||||
{
|
||||
int i, pcpu_num = 0;
|
||||
int bsp_cpu_id;
|
||||
uint8_t bsp_lapic_id = 0;
|
||||
uint8_t *lapic_id_base;
|
||||
|
||||
/*
|
||||
* allocate memory to save all lapic_id detected in parse_mdt.
|
||||
* We allocate 4K size which could save 4K CPUs lapic_id info.
|
||||
*/
|
||||
lapic_id_base = alloc_page(CPU_PAGE_SIZE);
|
||||
ASSERT(lapic_id_base != NULL, "fail to alloc page");
|
||||
|
||||
pcpu_num = parse_madt(lapic_id_base);
|
||||
alloc_phy_cpu_data(pcpu_num);
|
||||
|
||||
for (i = 0; i < pcpu_num; i++) {
|
||||
per_cpu(lapic_id, i) = *lapic_id_base++;
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
efi_physical_available_ap_bitmap |= 1 << per_cpu(lapic_id, i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* free memory after lapic_id are saved in per_cpu data */
|
||||
free(lapic_id_base);
|
||||
|
||||
bsp_lapic_id = get_cur_lapic_id();
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
efi_physical_available_ap_bitmap &= ~(1 << bsp_lapic_id);
|
||||
#endif
|
||||
|
||||
bsp_cpu_id = cpu_find_logical_id(bsp_lapic_id);
|
||||
ASSERT(bsp_cpu_id >= 0, "fail to get phy cpu id");
|
||||
|
||||
return bsp_cpu_id;
|
||||
}
|
||||
|
||||
static void cpu_set_current_state(uint32_t logical_id, int state)
|
||||
{
|
||||
spinlock_obtain(&up_count_spinlock);
|
||||
|
||||
/* Check if state is initializing */
|
||||
if (state == CPU_STATE_INITIALIZING) {
|
||||
/* Increment CPU up count */
|
||||
up_count++;
|
||||
|
||||
/* Save this CPU's logical ID to the TSC AUX MSR */
|
||||
cpu_set_logical_id(logical_id);
|
||||
}
|
||||
|
||||
/* Set state for the specified CPU */
|
||||
per_cpu(state, logical_id) = state;
|
||||
|
||||
spinlock_release(&up_count_spinlock);
|
||||
}
|
||||
|
||||
#ifdef STACK_PROTECTOR
|
||||
struct stack_canary {
|
||||
/* Gcc generates extra code, using [fs:40] to access canary */
|
||||
uint8_t reserved[40];
|
||||
uint64_t canary;
|
||||
};
|
||||
|
||||
static DEFINE_CPU_DATA(struct stack_canary, stack_canary);
|
||||
|
||||
static uint64_t get_random_value(void)
|
||||
{
|
||||
uint64_t random = 0;
|
||||
|
||||
asm volatile ("1: rdrand %%rax\n"
|
||||
"jnc 1b\n"
|
||||
"mov %%rax, %0\n"
|
||||
: "=r"(random) :: );
|
||||
return random;
|
||||
}
|
||||
|
||||
static void set_fs_base(void)
|
||||
{
|
||||
struct stack_canary *psc = &get_cpu_var(stack_canary);
|
||||
|
||||
psc->canary = get_random_value();
|
||||
msr_write(MSR_IA32_FS_BASE, (uint64_t)psc);
|
||||
}
|
||||
#endif
|
||||
|
||||
void bsp_boot_init(void)
|
||||
{
|
||||
#ifdef HV_DEBUG
|
||||
uint64_t start_tsc = rdtsc();
|
||||
#endif
|
||||
|
||||
/* Clear BSS */
|
||||
memset(_ld_bss_start, 0, _ld_bss_end - _ld_bss_start);
|
||||
|
||||
/* Build time sanity checks to make sure hard-coded offset
|
||||
* is matching the actual offset!
|
||||
*/
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, rax) ==
|
||||
VMX_MACHINE_T_GUEST_RAX_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, rbx) ==
|
||||
VMX_MACHINE_T_GUEST_RBX_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, rcx) ==
|
||||
VMX_MACHINE_T_GUEST_RCX_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, rdx) ==
|
||||
VMX_MACHINE_T_GUEST_RDX_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, rbp) ==
|
||||
VMX_MACHINE_T_GUEST_RBP_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, rsi) ==
|
||||
VMX_MACHINE_T_GUEST_RSI_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, rdi) ==
|
||||
VMX_MACHINE_T_GUEST_RDI_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, r8) ==
|
||||
VMX_MACHINE_T_GUEST_R8_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, r9) ==
|
||||
VMX_MACHINE_T_GUEST_R9_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, r10) ==
|
||||
VMX_MACHINE_T_GUEST_R10_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, r11) ==
|
||||
VMX_MACHINE_T_GUEST_R11_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, r12) ==
|
||||
VMX_MACHINE_T_GUEST_R12_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, r13) ==
|
||||
VMX_MACHINE_T_GUEST_R13_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, r14) ==
|
||||
VMX_MACHINE_T_GUEST_R14_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct cpu_regs, r15) ==
|
||||
VMX_MACHINE_T_GUEST_R15_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct run_context, cr2) ==
|
||||
VMX_MACHINE_T_GUEST_CR2_OFFSET);
|
||||
STATIC_ASSERT(offsetof(struct run_context, ia32_spec_ctrl) ==
|
||||
VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET);
|
||||
|
||||
/* Initialize the hypervisor paging */
|
||||
init_paging();
|
||||
|
||||
early_init_lapic();
|
||||
|
||||
init_phy_cpu_storage();
|
||||
|
||||
load_gdtr_and_tr();
|
||||
|
||||
/* Switch to run-time stack */
|
||||
CPU_SP_WRITE(&get_cpu_var(stack)[STACK_SIZE - 1]);
|
||||
|
||||
#ifdef STACK_PROTECTOR
|
||||
set_fs_base();
|
||||
#endif
|
||||
|
||||
check_cpu_capability();
|
||||
|
||||
apicv_cap_detect();
|
||||
|
||||
/* Set state for this CPU to initializing */
|
||||
cpu_set_current_state(CPU_BOOT_ID, CPU_STATE_INITIALIZING);
|
||||
|
||||
/* Perform any necessary BSP initialization */
|
||||
init_bsp();
|
||||
|
||||
/* Initialize Serial */
|
||||
serial_init();
|
||||
|
||||
/* Initialize console */
|
||||
console_init();
|
||||
|
||||
/* Print Hypervisor Banner */
|
||||
print_hv_banner();
|
||||
|
||||
/* Make sure rdtsc is enabled */
|
||||
check_tsc();
|
||||
|
||||
/* Calculate TSC Frequency */
|
||||
tsc_clock_freq = tsc_cycles_in_period(1000) / 1000 * 1000000;
|
||||
|
||||
/* Enable logging */
|
||||
init_logmsg(LOG_BUF_SIZE,
|
||||
LOG_DESTINATION);
|
||||
|
||||
#ifdef HV_DEBUG
|
||||
/* Log first messages */
|
||||
printf("HV version %d.%d-%s-%s build by %s, start time %lluus\r\n",
|
||||
HV_MAJOR_VERSION, HV_MINOR_VERSION, HV_BUILD_TIME,
|
||||
HV_BUILD_VERSION, HV_BUILD_USER,
|
||||
TICKS_TO_US(start_tsc));
|
||||
#endif
|
||||
pr_dbg("Core %d is up", CPU_BOOT_ID);
|
||||
|
||||
/* Warn for security feature not ready */
|
||||
if (!check_ibrs_ibpb_support() && !check_stibp_support()) {
|
||||
pr_fatal("SECURITY WARNING!!!!!!");
|
||||
pr_fatal("Please apply the latest CPU uCode patch!");
|
||||
}
|
||||
|
||||
/* Initialize the shell */
|
||||
shell_init();
|
||||
|
||||
/* Initialize interrupts */
|
||||
interrupt_init(CPU_BOOT_ID);
|
||||
|
||||
timer_init();
|
||||
setup_notification();
|
||||
ptdev_init();
|
||||
|
||||
init_scheduler();
|
||||
|
||||
#ifndef CONFIG_EFI_STUB
|
||||
/* Start all secondary cores */
|
||||
start_cpus();
|
||||
|
||||
/* Trigger event to allow secondary CPUs to continue */
|
||||
bitmap_set(0, &pcpu_sync);
|
||||
#else
|
||||
memcpy_s(_ld_cpu_secondary_reset_start,
|
||||
(unsigned long)&_ld_cpu_secondary_reset_size,
|
||||
_ld_cpu_secondary_reset_load,
|
||||
(unsigned long)&_ld_cpu_secondary_reset_size);
|
||||
#endif
|
||||
|
||||
ASSERT(get_cpu_id() == CPU_BOOT_ID, "");
|
||||
|
||||
init_iommu();
|
||||
|
||||
console_setup_timer();
|
||||
|
||||
/* Start initializing the VM for this CPU */
|
||||
hv_main(CPU_BOOT_ID);
|
||||
|
||||
/* Control should not come here */
|
||||
cpu_halt(CPU_BOOT_ID);
|
||||
}
|
||||
|
||||
void cpu_secondary_init(void)
|
||||
{
|
||||
/* NOTE: Use of local / stack variables in this function is problematic
|
||||
* since the stack is switched in the middle of the function. For this
|
||||
* reason, the logical id is only temporarily stored in a static
|
||||
* variable, but this will be over-written once subsequent CPUs
|
||||
* start-up. Once the spin-lock is released, the cpu_logical_id_get()
|
||||
* API is used to obtain the logical ID
|
||||
*/
|
||||
|
||||
/* Switch this CPU to use the same page tables set-up by the
|
||||
* primary/boot CPU
|
||||
*/
|
||||
enable_paging(get_paging_pml4());
|
||||
early_init_lapic();
|
||||
|
||||
/* Find the logical ID of this CPU given the LAPIC ID
|
||||
* temp_logical_id =
|
||||
* cpu_find_logical_id(get_cur_lapic_id());
|
||||
*/
|
||||
cpu_find_logical_id(get_cur_lapic_id());
|
||||
|
||||
/* Set state for this CPU to initializing */
|
||||
cpu_set_current_state(cpu_find_logical_id
|
||||
(get_cur_lapic_id()),
|
||||
CPU_STATE_INITIALIZING);
|
||||
|
||||
/* Switch to run-time stack */
|
||||
CPU_SP_WRITE(&get_cpu_var(stack)[STACK_SIZE - 1]);
|
||||
|
||||
#ifdef STACK_PROTECTOR
|
||||
set_fs_base();
|
||||
#endif
|
||||
|
||||
load_gdtr_and_tr();
|
||||
|
||||
/* Make sure rdtsc is enabled */
|
||||
check_tsc();
|
||||
|
||||
pr_dbg("Core %d is up", get_cpu_id());
|
||||
|
||||
/* Release secondary boot spin-lock to allow one of the next CPU(s) to
|
||||
* perform this common initialization
|
||||
*/
|
||||
spinlock_release(&cpu_secondary_spinlock);
|
||||
|
||||
/* Initialize secondary processor interrupts. */
|
||||
interrupt_init(get_cpu_id());
|
||||
|
||||
timer_init();
|
||||
|
||||
/* Wait for boot processor to signal all secondary cores to continue */
|
||||
pcpu_sync_sleep(&pcpu_sync, 0);
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
bitmap_clr(0, &pcpu_sync);
|
||||
#endif
|
||||
|
||||
hv_main(get_cpu_id());
|
||||
|
||||
/* Control will only come here for secondary CPUs not configured for
|
||||
* use or if an error occurs in hv_main
|
||||
*/
|
||||
cpu_halt(get_cpu_id());
|
||||
}
|
||||
|
||||
int cpu_find_logical_id(uint32_t lapic_id)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < phy_cpu_num; i++) {
|
||||
if (per_cpu(lapic_id, i) == lapic_id)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_EFI_STUB
|
||||
/*
|
||||
* Start all secondary CPUs.
|
||||
*/
|
||||
static void start_cpus()
|
||||
{
|
||||
uint32_t timeout;
|
||||
uint32_t expected_up;
|
||||
|
||||
/*Copy segment for AP initialization code below 1MB */
|
||||
memcpy_s(_ld_cpu_secondary_reset_start,
|
||||
(unsigned long)&_ld_cpu_secondary_reset_size,
|
||||
_ld_cpu_secondary_reset_load,
|
||||
(unsigned long)&_ld_cpu_secondary_reset_size);
|
||||
|
||||
/* Set flag showing number of CPUs expected to be up to all
|
||||
* cpus
|
||||
*/
|
||||
expected_up = phy_cpu_num;
|
||||
|
||||
/* Broadcast IPIs to all other CPUs */
|
||||
send_startup_ipi(INTR_CPU_STARTUP_ALL_EX_SELF,
|
||||
-1U, ((paddr_t) cpu_secondary_reset));
|
||||
|
||||
/* Wait until global count is equal to expected CPU up count or
|
||||
* configured time-out has expired
|
||||
*/
|
||||
timeout = CPU_UP_TIMEOUT * 1000;
|
||||
while ((up_count != expected_up) && (timeout != 0)) {
|
||||
/* Delay 10us */
|
||||
udelay(10);
|
||||
|
||||
/* Decrement timeout value */
|
||||
timeout -= 10;
|
||||
}
|
||||
|
||||
/* Check to see if all expected CPUs are actually up */
|
||||
if (up_count != expected_up) {
|
||||
/* Print error */
|
||||
pr_fatal("Secondary CPUs failed to come up");
|
||||
|
||||
/* Error condition - loop endlessly for now */
|
||||
do {
|
||||
} while (1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void cpu_halt(uint32_t logical_id)
|
||||
{
|
||||
/* For debug purposes, using a stack variable in the while loop enables
|
||||
* us to modify the value using a JTAG probe and resume if needed.
|
||||
*/
|
||||
int halt = 1;
|
||||
|
||||
/* Set state to show CPU is halted */
|
||||
cpu_set_current_state(logical_id, CPU_STATE_HALTED);
|
||||
|
||||
/* Halt the CPU */
|
||||
do {
|
||||
asm volatile ("hlt");
|
||||
} while (halt);
|
||||
}
|
||||
|
||||
static void cpu_set_logical_id(uint32_t logical_id)
|
||||
{
|
||||
/* Write TSC AUX register */
|
||||
msr_write(MSR_IA32_TSC_AUX, (uint64_t) logical_id);
|
||||
}
|
||||
|
||||
static void print_hv_banner(void)
|
||||
{
|
||||
char *boot_msg = "ACRN Hypervisor\n\r";
|
||||
|
||||
/* Print the boot message */
|
||||
printf(boot_msg);
|
||||
}
|
||||
|
||||
static void pcpu_sync_sleep(unsigned long *sync, int mask_bit)
|
||||
{
|
||||
int wake_sync = (1 << mask_bit);
|
||||
|
||||
if (check_monitor_support()) {
|
||||
/* Wait for the event to be set using monitor/mwait */
|
||||
asm volatile ("1: cmpl %%ebx,(%%eax)\n"
|
||||
" je 2f\n"
|
||||
" monitor\n"
|
||||
" mwait\n"
|
||||
" jmp 1b\n"
|
||||
"2:\n"
|
||||
:
|
||||
: "a" (sync), "d"(0), "c"(0),
|
||||
"b"(wake_sync)
|
||||
: "cc");
|
||||
} else {
|
||||
/* Wait for the event to be set using pause */
|
||||
asm volatile ("1: cmpl %%ebx,(%%eax)\n"
|
||||
" je 2f\n"
|
||||
" pause\n"
|
||||
" jmp 1b\n"
|
||||
"2:\n"
|
||||
:
|
||||
: "a" (sync), "d"(0), "c"(0),
|
||||
"b"(wake_sync)
|
||||
: "cc");
|
||||
}
|
||||
}
|
||||
|
||||
/*check allowed ONEs setting in vmx control*/
|
||||
static bool is_ctrl_setting_allowed(uint64_t msr_val, uint32_t ctrl)
|
||||
{
|
||||
/*
|
||||
* Intel SDM Appendix A.3
|
||||
* - bitX in ctrl can be set 1
|
||||
* only if bit 32+X in msr_val is 1
|
||||
*/
|
||||
return ((((uint32_t)(msr_val >> 32)) & ctrl) == ctrl);
|
||||
}
|
||||
|
||||
static void apicv_cap_detect(void)
|
||||
{
|
||||
uint64_t val64;
|
||||
uint32_t ctrl;
|
||||
bool result;
|
||||
|
||||
ctrl = VMX_PROCBASED_CTLS_TPR_SHADOW;
|
||||
val64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS);
|
||||
|
||||
result = is_ctrl_setting_allowed(val64, ctrl);
|
||||
if (result) {
|
||||
ctrl = VMX_PROCBASED_CTLS2_VAPIC |
|
||||
VMX_PROCBASED_CTLS2_VAPIC_REGS |
|
||||
VMX_PROCBASED_CTLS2_VIRQ;
|
||||
|
||||
val64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS2);
|
||||
result = is_ctrl_setting_allowed(val64, ctrl);
|
||||
}
|
||||
|
||||
cpu_caps.apicv_supported = result;
|
||||
}
|
||||
|
||||
bool is_apicv_enabled(void)
|
||||
{
|
||||
return cpu_caps.apicv_supported;
|
||||
}
|
||||
|
||||
static void monitor_cap_detect(void)
|
||||
{
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
uint32_t family;
|
||||
uint32_t model;
|
||||
|
||||
/* Run CPUID to determine if MONITOR support available */
|
||||
cpuid(CPUID_FEATURES, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
/* See if MONITOR feature bit is set in ECX */
|
||||
if (ecx & CPUID_ECX_MONITOR)
|
||||
cpu_caps.monitor_supported = true;
|
||||
|
||||
/* don't use monitor for CPU (family: 0x6 model: 0x5c)
|
||||
* in hypervisor, but still expose it to the guests and
|
||||
* let them handle it correctly
|
||||
*/
|
||||
family = (eax >> 8) & 0xff;
|
||||
if (family == 0xF)
|
||||
family += (eax >> 20) & 0xff;
|
||||
|
||||
model = (eax >> 4) & 0xf;
|
||||
if (family >= 0x06)
|
||||
model += ((eax >> 16) & 0xf) << 4;
|
||||
|
||||
if (cpu_caps.monitor_supported &&
|
||||
(family == 0x06) &&
|
||||
(model == 0x5c)) {
|
||||
cpu_caps.monitor_supported = false;
|
||||
}
|
||||
}
|
||||
|
||||
bool check_monitor_support(void)
|
||||
{
|
||||
return cpu_caps.monitor_supported;
|
||||
}
|
228
hypervisor/arch/x86/cpu_primary.S
Normal file
228
hypervisor/arch/x86/cpu_primary.S
Normal file
@@ -0,0 +1,228 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <cpu.h>
|
||||
#include <mmu.h>
|
||||
#include <gdt.h>
|
||||
#include <idt.h>
|
||||
#include <msr.h>
|
||||
|
||||
/* MULTIBOOT HEADER */
|
||||
#define MULTIBOOT_HEADER_MAGIC 0x1badb002
|
||||
#define MULTIBOOT_HEADER_FLAGS 0x00000002 /*flags bit 1 : enable mem_*, mmap_**/
|
||||
|
||||
.section multiboot_header, "a"
|
||||
|
||||
.align 4
|
||||
|
||||
/* header magic */
|
||||
.long MULTIBOOT_HEADER_MAGIC
|
||||
/* header flags - flags bit 6 : enable mmap_* */
|
||||
.long MULTIBOOT_HEADER_FLAGS
|
||||
/* header checksum = -(magic + flags) */
|
||||
.long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
|
||||
|
||||
.section entry, "ax"
|
||||
|
||||
.align 8
|
||||
.code32
|
||||
|
||||
.global cpu_primary_start_32
|
||||
cpu_primary_start_32:
|
||||
/* Disable interrupts */
|
||||
cli
|
||||
|
||||
/* Clear direction flag */
|
||||
cld
|
||||
|
||||
/* save eax and ebx */
|
||||
movl %eax, %esp
|
||||
movl %ebx, %ebp
|
||||
|
||||
/* detect whether it is in long mode */
|
||||
movl $MSR_IA32_EFER, %ecx
|
||||
rdmsr
|
||||
test $MSR_IA32_EFER_LMA_BIT, %eax
|
||||
|
||||
/* jump to 64bit entry if it is already in long mode */
|
||||
jne cpu_primary_start_64
|
||||
|
||||
/* save the MULTBOOT magic number & MBI */
|
||||
movl %esp, (boot_regs)
|
||||
movl %ebp, (boot_regs+4)
|
||||
|
||||
/* Disable paging */
|
||||
mov %cr0, %ebx
|
||||
andl $~CR0_PG, %ebx
|
||||
mov %ebx, %cr0
|
||||
|
||||
/* Set DE, PAE, MCE and OS support bits in CR4 */
|
||||
movl $(CR4_DE | CR4_PAE | CR4_MCE | CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
|
||||
mov %eax, %cr4
|
||||
|
||||
/* Set CR3 to PML4 table address */
|
||||
movl $cpu_boot32_page_tables_start, %edi
|
||||
mov %edi, %cr3
|
||||
|
||||
/* Set LME bit in EFER */
|
||||
movl $MSR_IA32_EFER, %ecx
|
||||
rdmsr
|
||||
orl $MSR_IA32_EFER_LME_BIT, %eax
|
||||
wrmsr
|
||||
|
||||
/* Enable paging, protection, numeric error and co-processor
|
||||
monitoring in CR0 to enter long mode */
|
||||
mov %cr0, %ebx
|
||||
orl $(CR0_PG | CR0_PE | CR0_MP | CR0_NE), %ebx
|
||||
mov %ebx, %cr0
|
||||
|
||||
/* Load temportary GDT pointer value */
|
||||
mov $cpu_primary32_gdt_ptr, %ebx
|
||||
lgdt (%ebx)
|
||||
|
||||
/* Perform a long jump based to start executing in 64-bit mode */
|
||||
ljmp $HOST_GDT_RING0_CODE_SEL, $primary_start_long_mode
|
||||
|
||||
.code64
|
||||
.org 0x200
|
||||
.global cpu_primary_start_64
|
||||
cpu_primary_start_64:
|
||||
/* save the MULTBOOT magic number & MBI */
|
||||
movl %edi, (boot_regs)
|
||||
movl %esi, (boot_regs+4)
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
movl %edx, (boot_regs+8)
|
||||
#endif
|
||||
|
||||
primary_start_long_mode:
|
||||
|
||||
/* Fix up the IDT desciptors */
|
||||
movl $HOST_IDT, %edx
|
||||
movl $HOST_IDT_ENTRIES, %ecx
|
||||
.LFixUpIDT_Entries:
|
||||
xorl %eax, %eax
|
||||
xchgl %eax, 12(%edx) /* Set rsvd bits to 0; eax now has
|
||||
high 32 of entry point */
|
||||
xchgl %eax, 8(%edx) /* Set bits 63..32 of entry point;
|
||||
eax now has low 32 of entry point */
|
||||
movw %ax, (%edx) /* Set bits 0-15 of procedure entry
|
||||
point */
|
||||
shr $16, %eax
|
||||
movw %ax, 6(%edx) /* Set bits 16-31 of entry point */
|
||||
addl $X64_IDT_DESC_SIZE,%edx
|
||||
loop .LFixUpIDT_Entries
|
||||
|
||||
/* Load IDT */
|
||||
mov $HOST_IDTR, %rcx
|
||||
lidtq (%rcx)
|
||||
|
||||
/* Load temportary GDT pointer value */
|
||||
mov $cpu_primary32_gdt_ptr, %ebx
|
||||
lgdt (%ebx)
|
||||
|
||||
/* Replace CS with the correct value should we need it */
|
||||
mov $HOST_GDT_RING0_CODE_SEL, %bx
|
||||
mov %bx, jcs
|
||||
movabsq $jmpbuf, %rax
|
||||
rex.w ljmp *(%rax)
|
||||
.data
|
||||
jmpbuf: .quad after
|
||||
jcs: .word 0
|
||||
.text
|
||||
after:
|
||||
|
||||
/* Initialize temporary stack pointer */
|
||||
movq $_ld_bss_end, %rsp
|
||||
add $CPU_PAGE_SIZE,%rsp
|
||||
and $(~(CPU_STACK_ALIGN - 1)),%rsp
|
||||
|
||||
// load all selector registers with appropriate values
|
||||
xor %edx, %edx
|
||||
lldt %dx
|
||||
movl $HOST_GDT_RING0_DATA_SEL,%eax
|
||||
mov %eax,%ss // Was 32bit POC Stack
|
||||
mov %eax,%ds // Was 32bit POC Data
|
||||
mov %eax,%es // Was 32bit POC Data
|
||||
mov %edx,%fs // Was 32bit POC Data
|
||||
mov %edx,%gs // Was 32bit POC CLS
|
||||
|
||||
/* Push sp magic to top of stack for call trace */
|
||||
pushq $SP_BOTTOM_MAGIC
|
||||
/* continue with chipset level initialization */
|
||||
call bsp_boot_init
|
||||
|
||||
loop:
|
||||
jmp loop
|
||||
|
||||
.align 4
|
||||
.global boot_regs
|
||||
boot_regs:
|
||||
.long 0x00000000
|
||||
.long 0x00000000
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
.long 0x00000000
|
||||
#endif
|
||||
|
||||
/* GDT table */
|
||||
.align 4
|
||||
cpu_primary32_gdt:
|
||||
.quad 0x0000000000000000
|
||||
.quad 0x00af9b000000ffff
|
||||
.quad 0x00cf93000000ffff
|
||||
cpu_primary32_gdt_end:
|
||||
|
||||
/* GDT pointer */
|
||||
.align 2
|
||||
cpu_primary32_gdt_ptr:
|
||||
.short (cpu_primary32_gdt_end - cpu_primary32_gdt) - 1
|
||||
.quad cpu_primary32_gdt
|
||||
|
||||
/* PML4, PDPT, and PD tables initialized to map first 4 GBytes of memory */
|
||||
.align CPU_PAGE_SIZE
|
||||
.global cpu_boot32_page_tables_start
|
||||
cpu_boot32_page_tables_start:
|
||||
.quad cpu_primary32_pdpt_addr + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
|
||||
.align CPU_PAGE_SIZE
|
||||
cpu_primary32_pdpt_addr:
|
||||
address = 0
|
||||
.rept 4
|
||||
.quad cpu_primary32_pdt_addr + address + \
|
||||
(IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
|
||||
address = address + CPU_PAGE_SIZE
|
||||
.endr
|
||||
.align CPU_PAGE_SIZE
|
||||
cpu_primary32_pdt_addr:
|
||||
address = 0
|
||||
.rept 2048
|
||||
.quad address + (IA32E_PDPTE_PS_BIT | IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
|
||||
address = address + 0x200000
|
||||
.endr
|
||||
|
197
hypervisor/arch/x86/cpu_secondary.S
Normal file
197
hypervisor/arch/x86/cpu_secondary.S
Normal file
@@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <spinlock.h>
|
||||
#include <gdt.h>
|
||||
#include <cpu.h>
|
||||
#include <mmu.h>
|
||||
#include <msr.h>
|
||||
|
||||
|
||||
.extern cpu_secondary_init
|
||||
.extern cpu_logical_id
|
||||
.extern _ld_bss_end
|
||||
.extern HOST_GDTR
|
||||
|
||||
.section .cpu_secondary_reset,"ax"
|
||||
|
||||
.align 4
|
||||
.code16
|
||||
.global cpu_secondary_reset
|
||||
cpu_secondary_reset:
|
||||
|
||||
/* Disable local interrupts */
|
||||
|
||||
cli
|
||||
|
||||
/* Set DE, PAE, MCE and OS support bits in CR4 */
|
||||
|
||||
movl $(CR4_DE | CR4_PAE | CR4_MCE | CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
|
||||
mov %eax, %cr4
|
||||
|
||||
/* Set CR3 to PML4 table address */
|
||||
|
||||
movl $CPU_Boot_Page_Tables_Start, %edi
|
||||
mov %edi, %cr3
|
||||
|
||||
/* Set LME bit in EFER */
|
||||
|
||||
movl $MSR_IA32_EFER, %ecx
|
||||
rdmsr
|
||||
orl $MSR_IA32_EFER_LME_BIT, %eax
|
||||
wrmsr
|
||||
|
||||
/* Enable paging, protection, numeric error and co-processor
|
||||
monitoring in CR0 to enter long mode */
|
||||
|
||||
mov %cr0, %ebx
|
||||
orl $(CR0_PG | CR0_PE | CR0_MP | CR0_NE), %ebx
|
||||
mov %ebx, %cr0
|
||||
|
||||
/* Load temportary GDT pointer value */
|
||||
|
||||
mov $cpu_secondary_gdt_ptr, %ebx
|
||||
lgdt (%ebx)
|
||||
|
||||
/* Perform a long jump based to start executing in 64-bit mode */
|
||||
|
||||
data32 ljmp $HOST_GDT_RING0_CODE_SEL, $cpu_secondary_long_mode
|
||||
|
||||
.code64
|
||||
cpu_secondary_long_mode:
|
||||
|
||||
/* Set up all other data segment registers */
|
||||
|
||||
movl $HOST_GDT_RING0_DATA_SEL, %eax
|
||||
mov %eax, %ss
|
||||
mov %eax, %ds
|
||||
mov %eax, %es
|
||||
mov %eax, %fs
|
||||
mov %eax, %gs
|
||||
|
||||
/* Obtain secondary CPU spin-lock to serialize
|
||||
booting of secondary cores for a bit */
|
||||
|
||||
spinlock_obtain(cpu_secondary_spinlock)
|
||||
|
||||
/* Initialize temporary stack pointer
|
||||
NOTE: Using the PML4 memory (PDPT address is top of memory
|
||||
for the PML4 page) for the temporary stack
|
||||
as we are only using the very first entry in
|
||||
this page and the stack is growing down from
|
||||
the top of this page. This stack is only
|
||||
used for a VERY short period of time, so
|
||||
this reuse of PML4 memory should be acceptable. */
|
||||
|
||||
movq $cpu_secondary_pdpt_addr, %rsp
|
||||
|
||||
/* Push sp magic to top of stack for call trace */
|
||||
pushq $SP_BOTTOM_MAGIC
|
||||
|
||||
/* Jump to C entry for the AP */
|
||||
|
||||
call cpu_secondary_init
|
||||
|
||||
cpu_secondary_error:
|
||||
|
||||
/* Error condition trap */
|
||||
|
||||
jmp cpu_secondary_error
|
||||
|
||||
/* GDT table */
|
||||
.align 4
|
||||
cpu_secondary_gdt:
|
||||
.quad 0x0000000000000000
|
||||
.quad 0x00af9b000000ffff
|
||||
.quad 0x00cf93000000ffff
|
||||
cpu_secondary_gdt_end:
|
||||
|
||||
/* GDT pointer */
|
||||
.align 2
|
||||
cpu_secondary_gdt_ptr:
|
||||
.short (cpu_secondary_gdt_end - cpu_secondary_gdt) - 1
|
||||
.quad cpu_secondary_gdt
|
||||
|
||||
/* PML4, PDPT, and PD tables initialized to map first 4 GBytes of memory */
|
||||
|
||||
.align CPU_PAGE_SIZE
|
||||
.global CPU_Boot_Page_Tables_Start
|
||||
CPU_Boot_Page_Tables_Start:
|
||||
.quad cpu_secondary_pdpt_addr + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
|
||||
.align CPU_PAGE_SIZE
|
||||
cpu_secondary_pdpt_addr:
|
||||
address = 0
|
||||
.rept 4
|
||||
.quad cpu_secondary_pdt_addr + address + \
|
||||
(IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
|
||||
address = address + CPU_PAGE_SIZE
|
||||
.endr
|
||||
.align CPU_PAGE_SIZE
|
||||
cpu_secondary_pdt_addr:
|
||||
address = 0
|
||||
.rept 2048
|
||||
.quad address + (IA32E_PDPTE_PS_BIT | IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
|
||||
address = address + 0x200000
|
||||
.endr
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* GUEST initial 4G page table
|
||||
*
|
||||
* guest starts with long mode, HV needs to prepare Guest identity
|
||||
* mapped page table.
|
||||
*
|
||||
* guest page tables covers 4G size, with 2M page size.
|
||||
*
|
||||
* HV copy this page table (6 pages) to guest address
|
||||
* CPU_Boot_Page_Tables_Start_VM before executing guest instruction.
|
||||
*
|
||||
******************************************************************/
|
||||
.align CPU_PAGE_SIZE
|
||||
.global CPU_Boot_Page_Tables_Start_VM
|
||||
CPU_Boot_Page_Tables_Start_VM:
|
||||
.quad vm_cpu_pdpt_addr + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
|
||||
.align CPU_PAGE_SIZE
|
||||
vm_cpu_pdpt_addr:
|
||||
address = 0
|
||||
.rept 4
|
||||
.quad vm_cpu_pdt_addr + address + (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
|
||||
address = address + CPU_PAGE_SIZE
|
||||
.endr
|
||||
.align CPU_PAGE_SIZE
|
||||
vm_cpu_pdt_addr:
|
||||
address = 0
|
||||
.rept 2048
|
||||
.quad address + (IA32E_PDPTE_PS_BIT | IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT)
|
||||
address = address + 0x200000
|
||||
.endr
|
||||
|
||||
.end
|
195
hypervisor/arch/x86/cpuid.c
Normal file
195
hypervisor/arch/x86/cpuid.c
Normal file
@@ -0,0 +1,195 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <cpu.h>
|
||||
|
||||
void emulate_cpuid(struct vcpu *vcpu, uint32_t src_op, uint32_t *eax_ptr,
|
||||
uint32_t *ebx_ptr, uint32_t *ecx_ptr, uint32_t *edx_ptr)
|
||||
{
|
||||
uint32_t apicid = vlapic_get_id(vcpu->arch_vcpu.vlapic);
|
||||
static const char sig[12] = "ACRNACRNACRN";
|
||||
const uint32_t *sigptr = (const uint32_t *)sig;
|
||||
uint32_t count = *ecx_ptr;
|
||||
|
||||
if ((src_op != 0x40000000) && (src_op != 0x40000010))
|
||||
cpuid_count(src_op, count, eax_ptr, ebx_ptr, ecx_ptr, edx_ptr);
|
||||
|
||||
switch (src_op) {
|
||||
/* Virtualize cpuid 0x01 */
|
||||
case 0x01:
|
||||
/* Patching initial APIC ID */
|
||||
*ebx_ptr &= ~APIC_ID_MASK;
|
||||
*ebx_ptr |= (apicid & APIC_ID_MASK);
|
||||
|
||||
/* mask mtrr */
|
||||
*edx_ptr &= ~CPUID_EDX_MTRR;
|
||||
|
||||
/* Patching X2APIC, X2APIC mode is disabled by default. */
|
||||
if (x2apic_enabled)
|
||||
*ecx_ptr |= CPUID_ECX_x2APIC;
|
||||
else
|
||||
*ecx_ptr &= ~CPUID_ECX_x2APIC;
|
||||
|
||||
/* mask pcid */
|
||||
*ecx_ptr &= ~CPUID_ECX_PCID;
|
||||
|
||||
/*mask vmx to guest os */
|
||||
*ecx_ptr &= ~CPUID_ECX_VMX;
|
||||
|
||||
break;
|
||||
|
||||
/* Virtualize cpuid 0x07 */
|
||||
case 0x07:
|
||||
/* mask invpcid */
|
||||
*ebx_ptr &= ~CPUID_EBX_INVPCID;
|
||||
|
||||
break;
|
||||
|
||||
case 0x0a:
|
||||
/* not support pmu */
|
||||
*eax_ptr &= ~0xff;
|
||||
break;
|
||||
|
||||
/* Virtualize cpuid 0x0b */
|
||||
case 0x0b:
|
||||
/* Patching X2APIC */
|
||||
if (!x2apic_enabled) {
|
||||
*eax_ptr = 0;
|
||||
*ebx_ptr = 0;
|
||||
*ecx_ptr = 0;
|
||||
*edx_ptr = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
/*
|
||||
* Leaf 0x40000000
|
||||
* This leaf returns the CPUID leaf range supported by the
|
||||
* hypervisor and the hypervisor vendor signature.
|
||||
*
|
||||
* EAX: The maximum input value for CPUID supported by the
|
||||
* hypervisor.
|
||||
* EBX, ECX, EDX: Hypervisor vendor ID signature.
|
||||
*/
|
||||
case 0x40000000:
|
||||
*eax_ptr = 0x40000010;
|
||||
*ebx_ptr = sigptr[0];
|
||||
*ecx_ptr = sigptr[1];
|
||||
*edx_ptr = sigptr[2];
|
||||
break;
|
||||
|
||||
/*
|
||||
* Leaf 0x40000010 - Timing Information.
|
||||
* This leaf returns the current TSC frequency and
|
||||
* current Bus frequency in kHz.
|
||||
*
|
||||
* EAX: (Virtual) TSC frequency in kHz.
|
||||
* TSC frequency is calculated from PIT in ACRN
|
||||
* EBX: (Virtual) Bus (local apic timer) frequency in kHz.
|
||||
* Bus (local apic timer) frequency is hardcoded as
|
||||
* (128 * 1024 * 1024) in ACRN
|
||||
* ECX, EDX: RESERVED (reserved fields are set to zero).
|
||||
*/
|
||||
case 0x40000010:
|
||||
*eax_ptr = (uint32_t)(tsc_clock_freq / 1000);
|
||||
*ebx_ptr = (128 * 1024 * 1024) / 1000;
|
||||
*ecx_ptr = 0;
|
||||
*edx_ptr = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static DEFINE_CPU_DATA(struct cpuid_cache_entry[CPUID_EXTEND_FEATURE_CACHE_MAX],
|
||||
cpuid_cache);
|
||||
|
||||
static inline struct cpuid_cache_entry *find_cpuid_cache_entry(uint32_t op,
|
||||
uint32_t count)
|
||||
{
|
||||
int pcpu_id = get_cpu_id();
|
||||
enum cpuid_cache_idx idx = CPUID_EXTEND_FEATURE_CACHE_MAX;
|
||||
|
||||
if ((count != 0))
|
||||
return NULL;
|
||||
|
||||
switch (op) {
|
||||
case CPUID_VENDORSTRING:
|
||||
idx = CPUID_VENDORSTRING_CACHE_IDX;
|
||||
break;
|
||||
|
||||
case CPUID_FEATURES:
|
||||
idx = CPUID_FEATURES_CACHE_IDX;
|
||||
break;
|
||||
|
||||
case CPUID_EXTEND_FEATURE:
|
||||
idx = CPUID_EXTEND_FEATURE_CACHE_IDX;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (idx == CPUID_EXTEND_FEATURE_CACHE_MAX)
|
||||
return NULL;
|
||||
|
||||
return &per_cpu(cpuid_cache, pcpu_id)[idx];
|
||||
}
|
||||
|
||||
inline void cpuid_count(uint32_t op, uint32_t count,
|
||||
uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
|
||||
{
|
||||
struct cpuid_cache_entry *entry;
|
||||
|
||||
entry = find_cpuid_cache_entry(op, count);
|
||||
|
||||
if (entry == NULL) {
|
||||
native_cpuid_count(op, count, a, b, c, d);
|
||||
} else if (entry->inited) {
|
||||
*a = entry->a;
|
||||
*b = entry->b;
|
||||
*c = entry->c;
|
||||
*d = entry->d;
|
||||
} else {
|
||||
native_cpuid_count(op, count, a, b, c, d);
|
||||
|
||||
entry->a = *a;
|
||||
entry->b = *b;
|
||||
entry->c = *c;
|
||||
entry->d = *d;
|
||||
|
||||
entry->inited = 1;
|
||||
}
|
||||
}
|
||||
|
569
hypervisor/arch/x86/ept.c
Normal file
569
hypervisor/arch/x86/ept.c
Normal file
@@ -0,0 +1,569 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <acrn_hv_defs.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hypercall.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
#include "guest/instr_emul_wrapper.h"
|
||||
#include "guest/instr_emul.h"
|
||||
|
||||
#define ACRN_DBG_EPT 6
|
||||
|
||||
void *create_guest_paging(struct vm *vm)
|
||||
{
|
||||
void *hva_dest;
|
||||
void *hva_src;
|
||||
|
||||
/* copy guest identity mapped 4G page table to guest */
|
||||
hva_dest = GPA2HVA(vm,
|
||||
(uint64_t)CPU_Boot_Page_Tables_Start_VM);
|
||||
hva_src = (void *)(_ld_cpu_secondary_reset_load
|
||||
+ (CPU_Boot_Page_Tables_Start_VM
|
||||
- _ld_cpu_secondary_reset_start));
|
||||
/* 2MB page size, need to copy 6 pages */
|
||||
memcpy_s(hva_dest, 6 * CPU_PAGE_SIZE, hva_src, 6 * CPU_PAGE_SIZE);
|
||||
return (void *)CPU_Boot_Page_Tables_Start_VM;
|
||||
}
|
||||
|
||||
static void *find_next_table(uint32_t table_offset,
|
||||
void *table_base)
|
||||
{
|
||||
uint64_t table_entry;
|
||||
uint64_t table_present;
|
||||
void *sub_table_addr = 0;
|
||||
|
||||
/* Read the table entry */
|
||||
table_entry = MEM_READ64(table_base
|
||||
+ (table_offset * IA32E_COMM_ENTRY_SIZE));
|
||||
|
||||
/* If bit 7 is set, entry is not a subtable. */
|
||||
if ((table_entry & IA32E_PDPTE_PS_BIT)
|
||||
|| (table_entry & IA32E_PDE_PS_BIT))
|
||||
return sub_table_addr;
|
||||
|
||||
/* Set table present bits to any of the read/write/execute bits */
|
||||
table_present = (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT | IA32E_EPT_X_BIT);
|
||||
|
||||
/* Determine if a valid entry exists */
|
||||
if ((table_entry & table_present) == 0) {
|
||||
/* No entry present */
|
||||
return sub_table_addr;
|
||||
}
|
||||
|
||||
/* Get address of the sub-table */
|
||||
sub_table_addr = (void *)(table_entry & IA32E_REF_MASK);
|
||||
|
||||
/* Return the next table in the walk */
|
||||
return sub_table_addr;
|
||||
}
|
||||
|
||||
|
||||
void free_ept_mem(void *pml4_addr)
|
||||
{
|
||||
void *pdpt_addr;
|
||||
void *pde_addr;
|
||||
void *pte_addr;
|
||||
uint32_t pml4_index;
|
||||
uint32_t pdpt_index;
|
||||
uint32_t pde_index;
|
||||
|
||||
for (pml4_index = 0; pml4_index < IA32E_NUM_ENTRIES; pml4_index++) {
|
||||
/* Walk from the PML4 table to the PDPT table */
|
||||
pdpt_addr = find_next_table(pml4_index, pml4_addr);
|
||||
if (pdpt_addr == NULL)
|
||||
continue;
|
||||
|
||||
for (pdpt_index = 0; pdpt_index < IA32E_NUM_ENTRIES;
|
||||
pdpt_index++) {
|
||||
/* Walk from the PDPT table to the PD table */
|
||||
pde_addr = find_next_table(pdpt_index, pdpt_addr);
|
||||
|
||||
if (pde_addr == NULL)
|
||||
continue;
|
||||
|
||||
for (pde_index = 0; pde_index < IA32E_NUM_ENTRIES;
|
||||
pde_index++) {
|
||||
/* Walk from the PD table to the page table */
|
||||
pte_addr = find_next_table(pde_index,
|
||||
pde_addr);
|
||||
|
||||
/* Free page table entry table */
|
||||
if (pte_addr)
|
||||
free(pte_addr);
|
||||
}
|
||||
/* Free page directory entry table */
|
||||
if (pde_addr)
|
||||
free(pde_addr);
|
||||
}
|
||||
free(pdpt_addr);
|
||||
}
|
||||
free(pml4_addr);
|
||||
}
|
||||
|
||||
void destroy_ept(struct vm *vm)
|
||||
{
|
||||
free_ept_mem(vm->arch_vm.ept);
|
||||
free_ept_mem(vm->arch_vm.m2p);
|
||||
}
|
||||
|
||||
uint64_t gpa2hpa_check(struct vm *vm, uint64_t gpa,
|
||||
uint64_t size, int *found, bool assert)
|
||||
{
|
||||
uint64_t hpa = 0;
|
||||
int _found = 0;
|
||||
struct entry_params entry;
|
||||
struct map_params map_params;
|
||||
|
||||
map_params.page_table_type = PT_EPT;
|
||||
map_params.pml4_base = vm->arch_vm.ept;
|
||||
map_params.pml4_inverted = vm->arch_vm.m2p;
|
||||
obtain_last_page_table_entry(&map_params, &entry,
|
||||
(void *)gpa, true);
|
||||
if (entry.entry_present == PT_PRESENT
|
||||
/* if cross several pages, now not handle it,
|
||||
* only print error info
|
||||
*/
|
||||
&& ((gpa % entry.page_size) + size) <= entry.page_size) {
|
||||
_found = 1;
|
||||
hpa = ((entry.entry_val & (~(entry.page_size - 1)))
|
||||
| (gpa & (entry.page_size - 1)));
|
||||
}
|
||||
|
||||
if (found != NULL)
|
||||
*found = _found;
|
||||
|
||||
if (_found == 0 && assert) {
|
||||
pr_err("VM %d GPA2HPA: failed for gpa 0x%llx",
|
||||
vm->attr.boot_idx, gpa);
|
||||
ASSERT(_found != 0, "GPA2HPA not found");
|
||||
}
|
||||
|
||||
pr_dbg("GPA2HPA: 0x%llx->0x%llx", gpa, hpa);
|
||||
|
||||
return hpa;
|
||||
}
|
||||
|
||||
uint64_t gpa2hpa(struct vm *vm, uint64_t gpa)
|
||||
{
|
||||
return gpa2hpa_check(vm, gpa, 0, NULL, true);
|
||||
}
|
||||
|
||||
uint64_t hpa2gpa(struct vm *vm, uint64_t hpa)
|
||||
{
|
||||
struct entry_params entry;
|
||||
struct map_params map_params;
|
||||
|
||||
map_params.page_table_type = PT_EPT;
|
||||
map_params.pml4_base = vm->arch_vm.ept;
|
||||
map_params.pml4_inverted = vm->arch_vm.m2p;
|
||||
|
||||
obtain_last_page_table_entry(&map_params, &entry,
|
||||
(void *)hpa, false);
|
||||
|
||||
if (entry.entry_present == PT_NOT_PRESENT) {
|
||||
pr_err("VM %d hpa2gpa: failed for hpa 0x%llx",
|
||||
vm->attr.boot_idx, hpa);
|
||||
ASSERT(false, "hpa2gpa not found");
|
||||
}
|
||||
return ((entry.entry_val & (~(entry.page_size - 1)))
|
||||
| (hpa & (entry.page_size - 1)));
|
||||
}
|
||||
|
||||
int is_ept_supported(void)
|
||||
{
|
||||
uint16_t status;
|
||||
uint64_t tmp64;
|
||||
|
||||
/* Read primary processor based VM control. */
|
||||
tmp64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS);
|
||||
|
||||
/* Check if secondary processor based VM control is available. */
|
||||
if (tmp64 & MMU_MEM_ATTR_BIT_EXECUTE_DISABLE) {
|
||||
/* Read primary processor based VM control. */
|
||||
tmp64 = msr_read(MSR_IA32_VMX_PROCBASED_CTLS2);
|
||||
|
||||
/* Check if EPT is supported. */
|
||||
if (tmp64 & (((uint64_t)VMX_PROCBASED_CTLS2_EPT) << 32)) {
|
||||
/* EPT is present. */
|
||||
status = 1;
|
||||
} else {
|
||||
status = 0;
|
||||
}
|
||||
|
||||
} else {
|
||||
/* Secondary processor based VM control is not present */
|
||||
status = 0;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static int check_hv_mmio_range(struct vm *vm, struct mem_io *mmio)
|
||||
{
|
||||
int status = false;
|
||||
struct list_head *pos;
|
||||
struct mem_io_node *mmio_node;
|
||||
|
||||
|
||||
list_for_each(pos, &vm->mmio_list) {
|
||||
mmio_node = list_entry(pos, struct mem_io_node, list);
|
||||
/* Check if this handler's range covers this memory access */
|
||||
if ((mmio->paddr >= mmio_node->range_start) &&
|
||||
(mmio->paddr + mmio->access_size <=
|
||||
mmio_node->range_end)) {
|
||||
status = true;
|
||||
|
||||
/* Break from loop - only 1 handler allowed to support
|
||||
* a given memory range
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return success for now */
|
||||
return status;
|
||||
}
|
||||
|
||||
static int hv_emulate_mmio(struct vcpu *vcpu, struct mem_io *mmio)
|
||||
{
|
||||
int status = -EINVAL;
|
||||
struct list_head *pos;
|
||||
struct mem_io_node *mmio_node;
|
||||
struct vm *vm = vcpu->vm;
|
||||
|
||||
list_for_each(pos, &vm->mmio_list) {
|
||||
mmio_node = list_entry(pos, struct mem_io_node, list);
|
||||
/* Check if this handler's range covers this memory access */
|
||||
if ((mmio->paddr >= mmio_node->range_start) &&
|
||||
(mmio->paddr + mmio->access_size
|
||||
<= mmio_node->range_end)) {
|
||||
|
||||
ASSERT((mmio->paddr % mmio->access_size) == 0,
|
||||
"access size not align with paddr");
|
||||
|
||||
/* Handle this MMIO operation */
|
||||
status = mmio_node->read_write(vcpu, mmio,
|
||||
mmio_node->handler_private_data);
|
||||
|
||||
/* Break from loop - only 1 handler allowed to support
|
||||
* given memory range
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return success for now */
|
||||
return status;
|
||||
}
|
||||
|
||||
int register_mmio_emulation_handler(struct vm *vm,
|
||||
hv_mem_io_handler_t read_write, uint64_t start,
|
||||
uint64_t end, void *handler_private_data)
|
||||
{
|
||||
int status = -EINVAL;
|
||||
struct mem_io_node *mmio_node;
|
||||
|
||||
if (vm->hw.created_vcpus > 0 && vm->hw.vcpu_array[0]->launched) {
|
||||
ASSERT(0, "register mmio handler after vm launched");
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Ensure both a read/write handler and range check function exist */
|
||||
if ((read_write != HV_NULL) && (end > start)) {
|
||||
/* Allocate memory for node */
|
||||
mmio_node =
|
||||
(struct mem_io_node *)calloc(1, sizeof(struct mem_io_node));
|
||||
|
||||
/* Ensure memory successfully allocated */
|
||||
if (mmio_node) {
|
||||
/* Fill in information for this node */
|
||||
mmio_node->read_write = read_write;
|
||||
mmio_node->handler_private_data = handler_private_data;
|
||||
|
||||
INIT_LIST_HEAD(&mmio_node->list);
|
||||
list_add(&mmio_node->list, &vm->mmio_list);
|
||||
|
||||
mmio_node->range_start = start;
|
||||
mmio_node->range_end = end;
|
||||
ept_mmap(vm, start, start, end - start,
|
||||
MAP_UNMAP, 0);
|
||||
|
||||
/* Return success */
|
||||
status = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return status to caller */
|
||||
return status;
|
||||
}
|
||||
|
||||
void unregister_mmio_emulation_handler(struct vm *vm, uint64_t start,
|
||||
uint64_t end)
|
||||
{
|
||||
struct list_head *pos, *tmp;
|
||||
struct mem_io_node *mmio_node;
|
||||
|
||||
list_for_each_safe(pos, tmp, &vm->mmio_list) {
|
||||
mmio_node = list_entry(pos, struct mem_io_node, list);
|
||||
|
||||
if ((mmio_node->range_start == start) &&
|
||||
(mmio_node->range_end == end)) {
|
||||
/* assume only one entry found in mmio_list */
|
||||
list_del_init(&mmio_node->list);
|
||||
free(mmio_node);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int dm_emulate_mmio_post(struct vcpu *vcpu)
|
||||
{
|
||||
int ret = 0;
|
||||
int cur = vcpu->vcpu_id;
|
||||
struct vhm_request_buffer *req_buf =
|
||||
(void *)HPA2HVA(vcpu->vm->sw.req_buf);
|
||||
|
||||
vcpu->req.reqs.mmio_request.value =
|
||||
req_buf->req_queue[cur].reqs.mmio_request.value;
|
||||
|
||||
/* VHM emulation data already copy to req, mark to free slot now */
|
||||
req_buf->req_queue[cur].valid = false;
|
||||
|
||||
if (req_buf->req_queue[cur].processed == REQ_STATE_SUCCESS)
|
||||
vcpu->mmio.mmio_status = MMIO_TRANS_VALID;
|
||||
else {
|
||||
vcpu->mmio.mmio_status = MMIO_TRANS_INVALID;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (vcpu->mmio.read_write == HV_MEM_IO_READ) {
|
||||
vcpu->mmio.value = vcpu->req.reqs.mmio_request.value;
|
||||
/* Emulate instruction and update vcpu register set */
|
||||
ret = emulate_instruction(vcpu, &vcpu->mmio);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dm_emulate_mmio_pre(struct vcpu *vcpu, uint64_t exit_qual)
|
||||
{
|
||||
int status;
|
||||
|
||||
status = analyze_instruction(vcpu, &vcpu->mmio);
|
||||
if (status != 0)
|
||||
return status;
|
||||
|
||||
if (vcpu->mmio.read_write == HV_MEM_IO_WRITE) {
|
||||
status = emulate_instruction(vcpu, &vcpu->mmio);
|
||||
if (status != 0)
|
||||
return status;
|
||||
vcpu->req.reqs.mmio_request.value = vcpu->mmio.value;
|
||||
/* XXX: write access while EPT perm RX -> WP */
|
||||
if ((exit_qual & 0x38) == 0x28)
|
||||
vcpu->req.type = REQ_WP;
|
||||
}
|
||||
|
||||
if (vcpu->req.type == 0)
|
||||
vcpu->req.type = REQ_MMIO;
|
||||
vcpu->req.reqs.mmio_request.direction = vcpu->mmio.read_write;
|
||||
vcpu->req.reqs.mmio_request.address = (long)vcpu->mmio.paddr;
|
||||
vcpu->req.reqs.mmio_request.size = vcpu->mmio.access_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ept_violation_handler(struct vcpu *vcpu)
|
||||
{
|
||||
int status;
|
||||
uint64_t exit_qual;
|
||||
uint64_t gpa;
|
||||
|
||||
/* Handle page fault from guest */
|
||||
exit_qual = exec_vmread(VMX_EXIT_QUALIFICATION);
|
||||
|
||||
memset(&vcpu->req, 0, sizeof(struct vhm_request));
|
||||
|
||||
/* Specify if read or write operation */
|
||||
if (exit_qual & 0x2) {
|
||||
/* Write operation */
|
||||
vcpu->mmio.read_write = HV_MEM_IO_WRITE;
|
||||
|
||||
/* Get write value from appropriate register in context */
|
||||
/* TODO: Need to figure out how to determine value being
|
||||
* written
|
||||
*/
|
||||
vcpu->mmio.value = 0;
|
||||
} else {
|
||||
/* Read operation */
|
||||
vcpu->mmio.read_write = HV_MEM_IO_READ;
|
||||
|
||||
/* Get sign extension requirements for read */
|
||||
/* TODO: Need to determine how sign extension is determined for
|
||||
* reads
|
||||
*/
|
||||
vcpu->mmio.sign_extend_read = 0;
|
||||
}
|
||||
|
||||
/* Get the guest physical address */
|
||||
gpa = exec_vmread64(VMX_GUEST_PHYSICAL_ADDR_FULL);
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_EPT_VIOLATION, exit_qual, gpa);
|
||||
|
||||
/* Adjust IPA appropriately and OR page offset to get full IPA of abort
|
||||
*/
|
||||
vcpu->mmio.paddr = gpa;
|
||||
|
||||
/* Check if the MMIO access has a HV registered handler */
|
||||
status = check_hv_mmio_range((struct vm *) vcpu->vm, &vcpu->mmio);
|
||||
|
||||
if (status == true) {
|
||||
/* Fetch and decode current vcpu instruction */
|
||||
status = analyze_instruction(vcpu, &vcpu->mmio);
|
||||
|
||||
if (status != 0)
|
||||
goto out;
|
||||
|
||||
if (vcpu->mmio.read_write == HV_MEM_IO_WRITE) {
|
||||
status = emulate_instruction(vcpu, &vcpu->mmio);
|
||||
if (status != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Call generic memory emulation handler
|
||||
* For MMIO write, call hv_emulate_mmio after
|
||||
* instruction emulation. For MMIO read,
|
||||
* call hv_emulate_mmio at first.
|
||||
*/
|
||||
status = hv_emulate_mmio(vcpu, &vcpu->mmio);
|
||||
|
||||
if (vcpu->mmio.read_write == HV_MEM_IO_READ) {
|
||||
/* Emulate instruction and update vcpu register set */
|
||||
status = emulate_instruction(vcpu, &vcpu->mmio);
|
||||
if (status != 0)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* No mmio handler from HV side, search from VHM in Dom0
|
||||
*
|
||||
* ACRN insert request to VHM and inject upcall
|
||||
* For MMIO write, ask DM to run MMIO emulation after
|
||||
* instruction emulation. For MMIO read, ask DM to run MMIO
|
||||
* emulation at first.
|
||||
*/
|
||||
status = dm_emulate_mmio_pre(vcpu, exit_qual);
|
||||
if (status != 0)
|
||||
goto out;
|
||||
status = acrn_insert_request_wait(vcpu, &vcpu->req);
|
||||
}
|
||||
|
||||
return status;
|
||||
|
||||
out:
|
||||
pr_fatal("Guest Linear Address: 0x%016llx",
|
||||
exec_vmread(VMX_GUEST_LINEAR_ADDR));
|
||||
|
||||
pr_fatal("Guest Physical Address address: 0x%016llx",
|
||||
gpa);
|
||||
|
||||
ASSERT(status == true, "EPT violation");
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
int ept_misconfig_handler(__unused struct vcpu *vcpu)
|
||||
{
|
||||
int status;
|
||||
|
||||
status = -EINVAL;
|
||||
|
||||
/* TODO - EPT Violation handler */
|
||||
pr_info("%s, Guest linear address: 0x%016llx ",
|
||||
__func__, exec_vmread64(VMX_GUEST_LINEAR_ADDR));
|
||||
|
||||
pr_info("%s, Guest physical address: 0x%016llx ",
|
||||
__func__, exec_vmread64(VMX_GUEST_PHYSICAL_ADDR_FULL));
|
||||
|
||||
ASSERT(status == 0, "EPT Misconfiguration is not handled.\n");
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_EPT_MISCONFIGURATION, 0, 0);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
int ept_mmap(struct vm *vm, uint64_t hpa,
|
||||
uint64_t gpa, uint64_t size, uint32_t type, uint32_t prot)
|
||||
{
|
||||
struct map_params map_params;
|
||||
int i;
|
||||
struct vcpu *vcpu;
|
||||
|
||||
/* Setup memory map parameters */
|
||||
map_params.page_table_type = PT_EPT;
|
||||
if (vm->arch_vm.ept) {
|
||||
map_params.pml4_base = vm->arch_vm.ept;
|
||||
map_params.pml4_inverted = vm->arch_vm.m2p;
|
||||
} else {
|
||||
map_params.pml4_base =
|
||||
alloc_paging_struct();
|
||||
vm->arch_vm.ept = map_params.pml4_base;
|
||||
map_params.pml4_inverted = alloc_paging_struct();
|
||||
vm->arch_vm.m2p = map_params.pml4_inverted;
|
||||
}
|
||||
|
||||
if (type == MAP_MEM || type == MAP_MMIO) {
|
||||
map_mem(&map_params, (void *)hpa,
|
||||
(void *)gpa, size, prot);
|
||||
|
||||
} else if (type == MAP_UNMAP) {
|
||||
unmap_mem(&map_params, (void *)hpa, (void *)gpa,
|
||||
size, prot);
|
||||
} else
|
||||
ASSERT(0, "unknown map type");
|
||||
|
||||
foreach_vcpu(i, vm, vcpu) {
|
||||
vcpu_make_request(vcpu, ACRN_REQUEST_TLB_FLUSH);
|
||||
}
|
||||
|
||||
dev_dbg(ACRN_DBG_EPT, "ept map: %s hpa: 0x%016llx gpa: 0x%016llx ",
|
||||
type == MAP_UNMAP ? "unmap" : "map", hpa, gpa);
|
||||
dev_dbg(ACRN_DBG_EPT, "size: 0x%016llx prot: 0x%x\n", size, prot);
|
||||
|
||||
return 0;
|
||||
}
|
84
hypervisor/arch/x86/gdt.c
Normal file
84
hypervisor/arch/x86/gdt.c
Normal file
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hv_lib.h>
|
||||
#include <cpu.h>
|
||||
#include <gdt.h>
|
||||
|
||||
DEFINE_CPU_DATA(struct tss_64, tss);
|
||||
DEFINE_CPU_DATA(struct host_gdt, gdt);
|
||||
DEFINE_CPU_DATA(uint8_t[STACK_SIZE], mc_stack) __aligned(16);
|
||||
DEFINE_CPU_DATA(uint8_t[STACK_SIZE], df_stack) __aligned(16);
|
||||
DEFINE_CPU_DATA(uint8_t[STACK_SIZE], sf_stack) __aligned(16);
|
||||
|
||||
static void set_tss_desc(union tss_64_descriptor *desc,
|
||||
void *tss, int tss_limit, int type)
|
||||
{
|
||||
uint32_t u1, u2, u3;
|
||||
|
||||
u1 = ((uint64_t)tss << 16) & 0xFFFFFFFF;
|
||||
u2 = (uint64_t)tss & 0xFF000000;
|
||||
u3 = ((uint64_t)tss & 0x00FF0000) >> 16;
|
||||
|
||||
|
||||
desc->low32.value = u1 | (tss_limit & 0xFFFF);
|
||||
desc->base_addr_63_32 = (uint32_t)((uint64_t)tss >> 32);
|
||||
desc->high32.value = (u2 | ((uint32_t)type << 8) | 0x8000 | u3);
|
||||
}
|
||||
|
||||
void load_gdtr_and_tr(void)
|
||||
{
|
||||
struct host_gdt *gdt = &get_cpu_var(gdt);
|
||||
struct host_gdt_descriptor gdtr;
|
||||
struct tss_64 *tss = &get_cpu_var(tss);
|
||||
|
||||
/* first entry is not used */
|
||||
gdt->rsvd = 0xAAAAAAAAAAAAAAAA;
|
||||
/* ring 0 code sel descriptor */
|
||||
gdt->host_gdt_code_descriptor.value = 0x00Af9b000000ffff;
|
||||
/* ring 0 data sel descriptor */
|
||||
gdt->host_gdt_data_descriptor.value = 0x00cf93000000ffff;
|
||||
|
||||
tss->ist1 = (uint64_t)get_cpu_var(mc_stack) + STACK_SIZE;
|
||||
tss->ist2 = (uint64_t)get_cpu_var(df_stack) + STACK_SIZE;
|
||||
tss->ist3 = (uint64_t)get_cpu_var(sf_stack) + STACK_SIZE;
|
||||
tss->ist4 = 0L;
|
||||
|
||||
/* tss descriptor */
|
||||
set_tss_desc(&gdt->host_gdt_tss_descriptors,
|
||||
(void *)tss, sizeof(struct tss_64), TSS_AVAIL);
|
||||
|
||||
gdtr.len = sizeof(struct host_gdt) - 1;
|
||||
gdtr.gdt = gdt;
|
||||
|
||||
asm volatile ("lgdt %0" ::"m"(gdtr));
|
||||
|
||||
CPU_LTR_EXECUTE(HOST_GDT_RING0_CPU_TSS_SEL);
|
||||
}
|
389
hypervisor/arch/x86/guest/guest.c
Normal file
389
hypervisor/arch/x86/guest/guest.c
Normal file
@@ -0,0 +1,389 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <bsp_cfg.h>
|
||||
#include <bsp_extern.h>
|
||||
#include <acrn_hv_defs.h>
|
||||
#include <hv_debug.h>
|
||||
#include <multiboot.h>
|
||||
|
||||
#define BOOT_ARGS_LOAD_ADDR 0x24EFC000
|
||||
|
||||
#define ACRN_DBG_GUEST 6
|
||||
|
||||
/* for VM0 e820 */
|
||||
uint32_t e820_entries;
|
||||
struct e820_entry e820[E820_MAX_ENTRIES];
|
||||
struct e820_mem_params e820_mem;
|
||||
|
||||
inline bool
|
||||
is_vm0(struct vm *vm)
|
||||
{
|
||||
return (vm->attr.boot_idx & 0x7F) == 0;
|
||||
}
|
||||
|
||||
inline struct vcpu *vcpu_from_vid(struct vm *vm, int vcpu_id)
|
||||
{
|
||||
int i;
|
||||
struct vcpu *vcpu;
|
||||
|
||||
foreach_vcpu(i, vm, vcpu) {
|
||||
if (vcpu->vcpu_id == vcpu_id)
|
||||
return vcpu;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
inline struct vcpu *vcpu_from_pid(struct vm *vm, int pcpu_id)
|
||||
{
|
||||
int i;
|
||||
struct vcpu *vcpu;
|
||||
|
||||
foreach_vcpu(i, vm, vcpu) {
|
||||
if (vcpu->pcpu_id == pcpu_id)
|
||||
return vcpu;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
inline struct vcpu *get_primary_vcpu(struct vm *vm)
|
||||
{
|
||||
int i;
|
||||
struct vcpu *vcpu;
|
||||
|
||||
foreach_vcpu(i, vm, vcpu) {
|
||||
if (is_vcpu_bsp(vcpu))
|
||||
return vcpu;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
inline uint64_t vcpumask2pcpumask(struct vm *vm, uint64_t vdmask)
|
||||
{
|
||||
int vcpu_id;
|
||||
uint64_t dmask = 0;
|
||||
struct vcpu *vcpu;
|
||||
|
||||
while ((vcpu_id = bitmap_ffs(&vdmask)) >= 0) {
|
||||
bitmap_clr(vcpu_id, &vdmask);
|
||||
vcpu = vcpu_from_vid(vm, vcpu_id);
|
||||
ASSERT(vcpu, "vcpu_from_vid failed");
|
||||
bitmap_set(vcpu->pcpu_id, &dmask);
|
||||
}
|
||||
|
||||
return dmask;
|
||||
}
|
||||
|
||||
inline bool vm_lapic_disabled(struct vm *vm)
|
||||
{
|
||||
int i;
|
||||
struct vcpu *vcpu;
|
||||
|
||||
foreach_vcpu(i, vm, vcpu) {
|
||||
if (vlapic_enabled(vcpu->arch_vcpu.vlapic))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int init_vm0_boot_info(struct vm *vm)
|
||||
{
|
||||
struct multiboot_module *mods = NULL;
|
||||
struct multiboot_info *mbi = NULL;
|
||||
|
||||
if (!is_vm0(vm)) {
|
||||
pr_err("just for vm0 to get info!");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (boot_regs[0] != MULTIBOOT_INFO_MAGIC) {
|
||||
ASSERT(0, "no multiboot info found");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mbi = (struct multiboot_info *)((uint64_t)boot_regs[1]);
|
||||
|
||||
dev_dbg(ACRN_DBG_GUEST, "Multiboot detected, flag=0x%x", mbi->mi_flags);
|
||||
if (!(mbi->mi_flags & MULTIBOOT_INFO_HAS_MODS)) {
|
||||
ASSERT(0, "no sos kernel info found");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dev_dbg(ACRN_DBG_GUEST, "mod counts=%d\n", mbi->mi_mods_count);
|
||||
|
||||
/* mod[0] is for kernel&cmdline, other mod for ramdisk/firmware info*/
|
||||
mods = (struct multiboot_module *)(uint64_t)mbi->mi_mods_addr;
|
||||
|
||||
dev_dbg(ACRN_DBG_GUEST, "mod0 start=0x%x, end=0x%x",
|
||||
mods[0].mm_mod_start, mods[0].mm_mod_end);
|
||||
dev_dbg(ACRN_DBG_GUEST, "cmd addr=0x%x, str=%s", mods[0].mm_string,
|
||||
(char *) (uint64_t)mods[0].mm_string);
|
||||
|
||||
vm->sw.kernel_type = VM_LINUX_GUEST;
|
||||
vm->sw.kernel_info.kernel_src_addr =
|
||||
(void *)(uint64_t)mods[0].mm_mod_start;
|
||||
vm->sw.kernel_info.kernel_size =
|
||||
mods[0].mm_mod_end - mods[0].mm_mod_start;
|
||||
vm->sw.kernel_info.kernel_load_addr =
|
||||
(void *)(uint64_t)mods[0].mm_mod_start;
|
||||
|
||||
vm->sw.linux_info.bootargs_src_addr =
|
||||
(void *)(uint64_t)mods[0].mm_string;
|
||||
vm->sw.linux_info.bootargs_load_addr =
|
||||
(void *)BOOT_ARGS_LOAD_ADDR;
|
||||
vm->sw.linux_info.bootargs_size =
|
||||
strnlen_s((char *)(uint64_t) mods[0].mm_string, MEM_2K);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t gva2gpa(struct vm *vm, uint64_t cr3, uint64_t gva)
|
||||
{
|
||||
int level, index, shift;
|
||||
uint64_t *base, addr, entry, page_size;
|
||||
uint64_t gpa = 0;
|
||||
|
||||
addr = cr3;
|
||||
|
||||
for (level = 3; level >= 0; level--) {
|
||||
addr = addr & IA32E_REF_MASK;
|
||||
base = GPA2HVA(vm, addr);
|
||||
ASSERT(base != NULL, "invalid ptp base.");
|
||||
shift = level * 9 + 12;
|
||||
index = (gva >> shift) & 0x1FF;
|
||||
page_size = 1UL << shift;
|
||||
|
||||
entry = base[index];
|
||||
if (level > 0 && (entry & MMU_32BIT_PDE_PS) != 0)
|
||||
break;
|
||||
addr = entry;
|
||||
}
|
||||
|
||||
entry >>= shift; entry <<= (shift + 12); entry >>= 12;
|
||||
gpa = entry | (gva & (page_size - 1));
|
||||
|
||||
return gpa;
|
||||
}
|
||||
|
||||
void init_e820(void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (boot_regs[0] == MULTIBOOT_INFO_MAGIC) {
|
||||
struct multiboot_info *mbi =
|
||||
(struct multiboot_info *)((uint64_t)boot_regs[1]);
|
||||
pr_info("Multiboot info detected\n");
|
||||
if (mbi->mi_flags & 0x40) {
|
||||
struct multiboot_mmap *mmap =
|
||||
(struct multiboot_mmap *)
|
||||
((uint64_t)mbi->mi_mmap_addr);
|
||||
e820_entries = mbi->mi_mmap_length/
|
||||
sizeof(struct multiboot_mmap);
|
||||
if (e820_entries > E820_MAX_ENTRIES) {
|
||||
pr_err("Too many E820 entries %d\n",
|
||||
e820_entries);
|
||||
e820_entries = E820_MAX_ENTRIES;
|
||||
}
|
||||
dev_dbg(ACRN_DBG_GUEST,
|
||||
"mmap length 0x%x addr 0x%x entries %d\n",
|
||||
mbi->mi_mmap_length, mbi->mi_mmap_addr,
|
||||
e820_entries);
|
||||
for (i = 0; i < e820_entries; i++) {
|
||||
e820[i].baseaddr = mmap[i].baseaddr;
|
||||
e820[i].length = mmap[i].length;
|
||||
e820[i].type = mmap[i].type;
|
||||
|
||||
dev_dbg(ACRN_DBG_GUEST,
|
||||
"mmap table: %d type: 0x%x\n",
|
||||
i, mmap[i].type);
|
||||
dev_dbg(ACRN_DBG_GUEST,
|
||||
"Base: 0x%016llx length: 0x%016llx",
|
||||
mmap[i].baseaddr, mmap[i].length);
|
||||
}
|
||||
}
|
||||
} else
|
||||
ASSERT(0, "no multiboot info found");
|
||||
}
|
||||
|
||||
|
||||
void obtain_e820_mem_info(void)
|
||||
{
|
||||
unsigned int i;
|
||||
struct e820_entry *entry;
|
||||
|
||||
e820_mem.mem_bottom = UINT64_MAX;
|
||||
e820_mem.mem_top = 0x00;
|
||||
e820_mem.max_ram_blk_base = 0;
|
||||
e820_mem.max_ram_blk_size = 0;
|
||||
|
||||
for (i = 0; i < e820_entries; i++) {
|
||||
entry = &e820[i];
|
||||
if (e820_mem.mem_bottom > entry->baseaddr)
|
||||
e820_mem.mem_bottom = entry->baseaddr;
|
||||
|
||||
if (entry->baseaddr + entry->length
|
||||
> e820_mem.mem_top) {
|
||||
e820_mem.mem_top = entry->baseaddr
|
||||
+ entry->length;
|
||||
}
|
||||
|
||||
if (entry->baseaddr == UOS_DEFAULT_START_ADDR
|
||||
&& entry->type == E820_TYPE_RAM) {
|
||||
e820_mem.max_ram_blk_base =
|
||||
entry->baseaddr;
|
||||
e820_mem.max_ram_blk_size = entry->length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void rebuild_vm0_e820(void)
|
||||
{
|
||||
unsigned int i;
|
||||
uint64_t entry_start;
|
||||
uint64_t entry_end;
|
||||
uint64_t hv_start = CONFIG_RAM_START;
|
||||
uint64_t hv_end = hv_start + CONFIG_RAM_SIZE;
|
||||
struct e820_entry *entry, new_entry = {0};
|
||||
|
||||
/* hypervisor mem need be filter out from e820 table
|
||||
* it's hv itself + other hv reserved mem like vgt etc
|
||||
*/
|
||||
for (i = 0; i < e820_entries; i++) {
|
||||
entry = &e820[i];
|
||||
entry_start = entry->baseaddr;
|
||||
entry_end = entry->baseaddr + entry->length;
|
||||
|
||||
/* No need handle in these cases*/
|
||||
if (entry->type != E820_TYPE_RAM || entry_end <= hv_start
|
||||
|| entry_start >= hv_end) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* filter out hv mem and adjust length of this entry*/
|
||||
if (entry_start < hv_start && entry_end <= hv_end) {
|
||||
entry->length = hv_start - entry_start;
|
||||
continue;
|
||||
}
|
||||
/* filter out hv mem and need to create a new entry*/
|
||||
if (entry_start < hv_start && entry_end > hv_end) {
|
||||
entry->length = hv_start - entry_start;
|
||||
new_entry.baseaddr = hv_end;
|
||||
new_entry.length = entry_end - hv_end;
|
||||
new_entry.type = E820_TYPE_RAM;
|
||||
continue;
|
||||
}
|
||||
/* This entry is within the range of hv mem
|
||||
* change to E820_TYPE_RESERVED
|
||||
*/
|
||||
if (entry_start >= hv_start && entry_end <= hv_end) {
|
||||
entry->type = E820_TYPE_RESERVED;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (entry_start >= hv_start && entry_start < hv_end
|
||||
&& entry_end > hv_end) {
|
||||
entry->baseaddr = hv_end;
|
||||
entry->length = entry_end - hv_end;
|
||||
continue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (new_entry.length > 0) {
|
||||
e820_entries++;
|
||||
ASSERT(e820_entries <= E820_MAX_ENTRIES,
|
||||
"e820 entry overflow");
|
||||
entry = &e820[e820_entries - 1];
|
||||
entry->baseaddr = new_entry.baseaddr;
|
||||
entry->length = new_entry.length;
|
||||
entry->type = new_entry.type;
|
||||
}
|
||||
|
||||
}
|
||||
int prepare_vm0_memmap_and_e820(struct vm *vm)
|
||||
{
|
||||
unsigned int i;
|
||||
uint32_t attr_wb = (MMU_MEM_ATTR_READ |
|
||||
MMU_MEM_ATTR_WRITE |
|
||||
MMU_MEM_ATTR_EXECUTE |
|
||||
MMU_MEM_ATTR_WB_CACHE);
|
||||
uint32_t attr_uc = (MMU_MEM_ATTR_READ |
|
||||
MMU_MEM_ATTR_WRITE |
|
||||
MMU_MEM_ATTR_EXECUTE |
|
||||
MMU_MEM_ATTR_UNCACHED);
|
||||
struct e820_entry *entry;
|
||||
|
||||
|
||||
ASSERT(is_vm0(vm), "This func only for vm0");
|
||||
|
||||
rebuild_vm0_e820();
|
||||
dev_dbg(ACRN_DBG_GUEST,
|
||||
"vm0: bottom memory - 0x%llx, top memory - 0x%llx\n",
|
||||
e820_mem.mem_bottom, e820_mem.mem_top);
|
||||
|
||||
/* create real ept map for all ranges with UC */
|
||||
ept_mmap(vm, e820_mem.mem_bottom, e820_mem.mem_bottom,
|
||||
(e820_mem.mem_top - e820_mem.mem_bottom),
|
||||
MAP_MMIO, attr_uc);
|
||||
|
||||
/* update ram entries to WB attr */
|
||||
for (i = 0; i < e820_entries; i++) {
|
||||
entry = &e820[i];
|
||||
if (entry->type == E820_TYPE_RAM)
|
||||
ept_mmap(vm, entry->baseaddr, entry->baseaddr,
|
||||
entry->length, MAP_MEM, attr_wb);
|
||||
}
|
||||
|
||||
|
||||
dev_dbg(ACRN_DBG_GUEST, "VM0 e820 layout:\n");
|
||||
for (i = 0; i < e820_entries; i++) {
|
||||
entry = &e820[i];
|
||||
dev_dbg(ACRN_DBG_GUEST,
|
||||
"e820 table: %d type: 0x%x", i, entry->type);
|
||||
dev_dbg(ACRN_DBG_GUEST,
|
||||
"BaseAddress: 0x%016llx length: 0x%016llx\n",
|
||||
entry->baseaddr, entry->length);
|
||||
}
|
||||
|
||||
/* unmap hypervisor itself for safety
|
||||
* will cause EPT violation if sos accesses hv memory
|
||||
*/
|
||||
ept_mmap(vm, CONFIG_RAM_START, CONFIG_RAM_START,
|
||||
CONFIG_RAM_SIZE, MAP_UNMAP, 0);
|
||||
return 0;
|
||||
}
|
2137
hypervisor/arch/x86/guest/instr_emul.c
Normal file
2137
hypervisor/arch/x86/guest/instr_emul.c
Normal file
File diff suppressed because it is too large
Load Diff
95
hypervisor/arch/x86/guest/instr_emul.h
Normal file
95
hypervisor/arch/x86/guest/instr_emul.h
Normal file
@@ -0,0 +1,95 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* Copyright (c) 2017 Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VMM_INSTRUCTION_EMUL_H_
|
||||
#define _VMM_INSTRUCTION_EMUL_H_
|
||||
|
||||
/*
|
||||
* Callback functions to read and write memory regions.
|
||||
*/
|
||||
typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa,
|
||||
uint64_t *rval, int rsize, void *arg);
|
||||
|
||||
typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa,
|
||||
uint64_t wval, int wsize, void *arg);
|
||||
|
||||
/*
|
||||
* Emulate the decoded 'vie' instruction.
|
||||
*
|
||||
* The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
|
||||
* containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
|
||||
* callback functions.
|
||||
*
|
||||
* 'void *vm' should be 'struct vm *' when called from kernel context and
|
||||
* 'struct vmctx *' when called from user context.
|
||||
* s
|
||||
*/
|
||||
int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
|
||||
struct vm_guest_paging *paging, mem_region_read_t mrr,
|
||||
mem_region_write_t mrw, void *mrarg);
|
||||
|
||||
int vie_update_register(struct vcpu *vcpu, enum vm_reg_name reg,
|
||||
uint64_t val, int size);
|
||||
|
||||
/*
|
||||
* Returns 1 if an alignment check exception should be injected and 0 otherwise.
|
||||
*/
|
||||
int vie_alignment_check(int cpl, int operand_size, uint64_t cr0,
|
||||
uint64_t rflags, uint64_t gla);
|
||||
|
||||
/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */
|
||||
int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
|
||||
|
||||
uint64_t vie_size2mask(int size);
|
||||
|
||||
int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
|
||||
struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot,
|
||||
uint64_t *gla);
|
||||
|
||||
void vie_init(struct vie *vie, const char *inst_bytes, int inst_length);
|
||||
|
||||
/*
|
||||
* Decode the instruction fetched into 'vie' so it can be emulated.
|
||||
*
|
||||
* 'gla' is the guest linear address provided by the hardware assist
|
||||
* that caused the nested page table fault. It is used to verify that
|
||||
* the software instruction decoding is in agreement with the hardware.
|
||||
*
|
||||
* Some hardware assists do not provide the 'gla' to the hypervisor.
|
||||
* To skip the 'gla' verification for this or any other reason pass
|
||||
* in VIE_INVALID_GLA instead.
|
||||
*/
|
||||
#define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */
|
||||
int vmm_decode_instruction(struct vcpu *vcpu, uint64_t gla,
|
||||
enum vm_cpu_mode cpu_mode, int csd, struct vie *vie);
|
||||
|
||||
int emulate_instruction(struct vcpu *vcpu, struct mem_io *mmio);
|
||||
int analyze_instruction(struct vcpu *vcpu, struct mem_io *mmio);
|
||||
|
||||
#endif /* _VMM_INSTRUCTION_EMUL_H_ */
|
466
hypervisor/arch/x86/guest/instr_emul_wrapper.c
Normal file
466
hypervisor/arch/x86/guest/instr_emul_wrapper.c
Normal file
@@ -0,0 +1,466 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <bsp_extern.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
#include "instr_emul_wrapper.h"
|
||||
#include "instr_emul.h"
|
||||
|
||||
struct emul_cnx {
|
||||
struct vie vie;
|
||||
struct vm_guest_paging paging;
|
||||
struct vcpu *vcpu;
|
||||
struct mem_io *mmio;
|
||||
};
|
||||
|
||||
static DEFINE_CPU_DATA(struct emul_cnx, g_inst_ctxt);
|
||||
|
||||
static int
|
||||
encode_vmcs_seg_desc(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc);
|
||||
|
||||
static int32_t
|
||||
get_vmcs_field(int ident);
|
||||
|
||||
static bool
|
||||
is_segment_register(int reg);
|
||||
|
||||
static bool
|
||||
is_descriptor_table(int reg);
|
||||
|
||||
int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
|
||||
{
|
||||
struct run_context *cur_context;
|
||||
|
||||
if (!vcpu)
|
||||
return -EINVAL;
|
||||
if ((reg >= VM_REG_LAST) || (reg < VM_REG_GUEST_RAX))
|
||||
return -EINVAL;
|
||||
|
||||
if ((reg >= VM_REG_GUEST_RAX) && (reg <= VM_REG_GUEST_RDI)) {
|
||||
cur_context =
|
||||
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
|
||||
*retval = cur_context->guest_cpu_regs.longs[reg];
|
||||
} else if ((reg > VM_REG_GUEST_RDI) && (reg < VM_REG_LAST)) {
|
||||
int32_t field = get_vmcs_field(reg);
|
||||
|
||||
if (field != -1)
|
||||
*retval = exec_vmread(field);
|
||||
else
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
|
||||
{
|
||||
struct run_context *cur_context;
|
||||
|
||||
if (!vcpu)
|
||||
return -EINVAL;
|
||||
if ((reg >= VM_REG_LAST) || (reg < VM_REG_GUEST_RAX))
|
||||
return -EINVAL;
|
||||
|
||||
if ((reg >= VM_REG_GUEST_RAX) && (reg <= VM_REG_GUEST_RDI)) {
|
||||
cur_context =
|
||||
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
|
||||
cur_context->guest_cpu_regs.longs[reg] = val;
|
||||
} else if ((reg > VM_REG_GUEST_RDI) && (reg < VM_REG_LAST)) {
|
||||
int32_t field = get_vmcs_field(reg);
|
||||
|
||||
if (field != -1)
|
||||
exec_vmwrite(field, val);
|
||||
else
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vm_set_seg_desc(struct vcpu *vcpu, int seg, struct seg_desc *ret_desc)
|
||||
{
|
||||
int error;
|
||||
uint32_t base, limit, access;
|
||||
|
||||
if ((!vcpu) || (!ret_desc))
|
||||
return -EINVAL;
|
||||
|
||||
if (!is_segment_register(seg) && !is_descriptor_table(seg))
|
||||
return -EINVAL;
|
||||
|
||||
error = encode_vmcs_seg_desc(seg, &base, &limit, &access);
|
||||
if ((error != 0) || (access == 0xffffffff))
|
||||
return -EINVAL;
|
||||
|
||||
exec_vmwrite(base, ret_desc->base);
|
||||
exec_vmwrite(limit, ret_desc->limit);
|
||||
exec_vmwrite(access, ret_desc->access);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vm_get_seg_desc(struct vcpu *vcpu, int seg, struct seg_desc *desc)
|
||||
{
|
||||
int error;
|
||||
uint32_t base, limit, access;
|
||||
|
||||
if ((!vcpu) || (!desc))
|
||||
return -EINVAL;
|
||||
|
||||
if (!is_segment_register(seg) && !is_descriptor_table(seg))
|
||||
return -EINVAL;
|
||||
|
||||
error = encode_vmcs_seg_desc(seg, &base, &limit, &access);
|
||||
if ((error != 0) || (access == 0xffffffff))
|
||||
return -EINVAL;
|
||||
|
||||
desc->base = exec_vmread(base);
|
||||
desc->limit = exec_vmread(limit);
|
||||
desc->access = exec_vmread(access);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vm_restart_instruction(struct vcpu *vcpu)
|
||||
{
|
||||
if (!vcpu)
|
||||
return -EINVAL;
|
||||
|
||||
VCPU_RETAIN_RIP(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool is_descriptor_table(int reg)
|
||||
{
|
||||
switch (reg) {
|
||||
case VM_REG_GUEST_IDTR:
|
||||
case VM_REG_GUEST_GDTR:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_segment_register(int reg)
|
||||
{
|
||||
switch (reg) {
|
||||
case VM_REG_GUEST_ES:
|
||||
case VM_REG_GUEST_CS:
|
||||
case VM_REG_GUEST_SS:
|
||||
case VM_REG_GUEST_DS:
|
||||
case VM_REG_GUEST_FS:
|
||||
case VM_REG_GUEST_GS:
|
||||
case VM_REG_GUEST_TR:
|
||||
case VM_REG_GUEST_LDTR:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static int encode_vmcs_seg_desc(int seg, uint32_t *base, uint32_t *lim,
|
||||
uint32_t *acc)
|
||||
{
|
||||
switch (seg) {
|
||||
case VM_REG_GUEST_ES:
|
||||
*base = VMX_GUEST_ES_BASE;
|
||||
*lim = VMX_GUEST_ES_LIMIT;
|
||||
*acc = VMX_GUEST_ES_ATTR;
|
||||
break;
|
||||
case VM_REG_GUEST_CS:
|
||||
*base = VMX_GUEST_CS_BASE;
|
||||
*lim = VMX_GUEST_CS_LIMIT;
|
||||
*acc = VMX_GUEST_CS_ATTR;
|
||||
break;
|
||||
case VM_REG_GUEST_SS:
|
||||
*base = VMX_GUEST_SS_BASE;
|
||||
*lim = VMX_GUEST_SS_LIMIT;
|
||||
*acc = VMX_GUEST_SS_ATTR;
|
||||
break;
|
||||
case VM_REG_GUEST_DS:
|
||||
*base = VMX_GUEST_DS_BASE;
|
||||
*lim = VMX_GUEST_DS_LIMIT;
|
||||
*acc = VMX_GUEST_DS_ATTR;
|
||||
break;
|
||||
case VM_REG_GUEST_FS:
|
||||
*base = VMX_GUEST_FS_BASE;
|
||||
*lim = VMX_GUEST_FS_LIMIT;
|
||||
*acc = VMX_GUEST_FS_ATTR;
|
||||
break;
|
||||
case VM_REG_GUEST_GS:
|
||||
*base = VMX_GUEST_GS_BASE;
|
||||
*lim = VMX_GUEST_GS_LIMIT;
|
||||
*acc = VMX_GUEST_GS_ATTR;
|
||||
break;
|
||||
case VM_REG_GUEST_TR:
|
||||
*base = VMX_GUEST_TR_BASE;
|
||||
*lim = VMX_GUEST_TR_LIMIT;
|
||||
*acc = VMX_GUEST_TR_ATTR;
|
||||
break;
|
||||
case VM_REG_GUEST_LDTR:
|
||||
*base = VMX_GUEST_LDTR_BASE;
|
||||
*lim = VMX_GUEST_LDTR_LIMIT;
|
||||
*acc = VMX_GUEST_LDTR_ATTR;
|
||||
break;
|
||||
case VM_REG_GUEST_IDTR:
|
||||
*base = VMX_GUEST_IDTR_BASE;
|
||||
*lim = VMX_GUEST_IDTR_LIMIT;
|
||||
*acc = 0xffffffff;
|
||||
break;
|
||||
case VM_REG_GUEST_GDTR:
|
||||
*base = VMX_GUEST_GDTR_BASE;
|
||||
*lim = VMX_GUEST_GDTR_LIMIT;
|
||||
*acc = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t get_vmcs_field(int ident)
|
||||
{
|
||||
switch (ident) {
|
||||
case VM_REG_GUEST_CR0:
|
||||
return VMX_GUEST_CR0;
|
||||
case VM_REG_GUEST_CR3:
|
||||
return VMX_GUEST_CR3;
|
||||
case VM_REG_GUEST_CR4:
|
||||
return VMX_GUEST_CR4;
|
||||
case VM_REG_GUEST_DR7:
|
||||
return VMX_GUEST_DR7;
|
||||
case VM_REG_GUEST_RSP:
|
||||
return VMX_GUEST_RSP;
|
||||
case VM_REG_GUEST_RIP:
|
||||
return VMX_GUEST_RIP;
|
||||
case VM_REG_GUEST_RFLAGS:
|
||||
return VMX_GUEST_RFLAGS;
|
||||
case VM_REG_GUEST_ES:
|
||||
return VMX_GUEST_ES_SEL;
|
||||
case VM_REG_GUEST_CS:
|
||||
return VMX_GUEST_CS_SEL;
|
||||
case VM_REG_GUEST_SS:
|
||||
return VMX_GUEST_SS_SEL;
|
||||
case VM_REG_GUEST_DS:
|
||||
return VMX_GUEST_DS_SEL;
|
||||
case VM_REG_GUEST_FS:
|
||||
return VMX_GUEST_FS_SEL;
|
||||
case VM_REG_GUEST_GS:
|
||||
return VMX_GUEST_GS_SEL;
|
||||
case VM_REG_GUEST_TR:
|
||||
return VMX_GUEST_TR_SEL;
|
||||
case VM_REG_GUEST_LDTR:
|
||||
return VMX_GUEST_LDTR_SEL;
|
||||
case VM_REG_GUEST_EFER:
|
||||
return VMX_GUEST_IA32_EFER_FULL;
|
||||
case VM_REG_GUEST_PDPTE0:
|
||||
return VMX_GUEST_PDPTE0_FULL;
|
||||
case VM_REG_GUEST_PDPTE1:
|
||||
return VMX_GUEST_PDPTE1_FULL;
|
||||
case VM_REG_GUEST_PDPTE2:
|
||||
return VMX_GUEST_PDPTE2_FULL;
|
||||
case VM_REG_GUEST_PDPTE3:
|
||||
return VMX_GUEST_PDPTE3_FULL;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static enum vm_cpu_mode get_vmx_cpu_mode(void)
|
||||
{
|
||||
uint32_t csar;
|
||||
|
||||
if (exec_vmread(VMX_GUEST_IA32_EFER_FULL) & EFER_LMA) {
|
||||
csar = exec_vmread(VMX_GUEST_CS_ATTR);
|
||||
if (csar & 0x2000)
|
||||
return CPU_MODE_64BIT; /* CS.L = 1 */
|
||||
else
|
||||
return CPU_MODE_COMPATIBILITY;
|
||||
} else if (exec_vmread(VMX_GUEST_CR0) & CR0_PE) {
|
||||
return CPU_MODE_PROTECTED;
|
||||
} else {
|
||||
return CPU_MODE_REAL;
|
||||
}
|
||||
}
|
||||
|
||||
static void get_guest_paging_info(struct vcpu *vcpu, struct emul_cnx *emul_cnx)
|
||||
{
|
||||
uint32_t cpl, csar;
|
||||
|
||||
ASSERT(emul_cnx != NULL && vcpu != NULL, "Error in input arguments");
|
||||
|
||||
csar = exec_vmread(VMX_GUEST_CS_ATTR);
|
||||
cpl = (csar >> 5) & 3;
|
||||
emul_cnx->paging.cr3 =
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3;
|
||||
emul_cnx->paging.cpl = cpl;
|
||||
emul_cnx->paging.cpu_mode = get_vmx_cpu_mode();
|
||||
emul_cnx->paging.paging_mode = PAGING_MODE_FLAT;/*maybe change later*/
|
||||
}
|
||||
|
||||
static int mmio_read(struct vcpu *vcpu, __unused uint64_t gpa, uint64_t *rval,
|
||||
__unused int size, __unused void *arg)
|
||||
{
|
||||
struct emul_cnx *emul_cnx;
|
||||
struct mem_io *mmio;
|
||||
|
||||
if (!vcpu)
|
||||
return -EINVAL;
|
||||
|
||||
emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
|
||||
mmio = emul_cnx->mmio;
|
||||
|
||||
ASSERT(mmio != NULL, "invalid mmio when reading");
|
||||
|
||||
*rval = mmio->value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mmio_write(struct vcpu *vcpu, __unused uint64_t gpa, uint64_t wval,
|
||||
__unused int size, __unused void *arg)
|
||||
{
|
||||
struct emul_cnx *emul_cnx;
|
||||
struct mem_io *mmio;
|
||||
|
||||
if (!vcpu)
|
||||
return -EINVAL;
|
||||
|
||||
emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
|
||||
mmio = emul_cnx->mmio;
|
||||
|
||||
ASSERT(mmio != NULL, "invalid mmio when writing");
|
||||
|
||||
mmio->value = wval;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vm_gva2gpa(struct vcpu *vcpu, uint64_t gva, uint64_t *gpa)
|
||||
{
|
||||
|
||||
ASSERT(gpa != NULL, "Error in input arguments");
|
||||
ASSERT(vcpu != NULL,
|
||||
"Invalid vcpu id when gva2gpa");
|
||||
|
||||
*gpa = gva2gpa(vcpu->vm,
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3, gva);
|
||||
}
|
||||
|
||||
int analyze_instruction(struct vcpu *vcpu, struct mem_io *mmio)
|
||||
{
|
||||
uint64_t guest_rip_gva, guest_rip_gpa;
|
||||
char *guest_rip_hva;
|
||||
struct emul_cnx *emul_cnx;
|
||||
uint32_t csar;
|
||||
int retval = 0;
|
||||
enum vm_cpu_mode cpu_mode;
|
||||
int i;
|
||||
|
||||
guest_rip_gva =
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].rip;
|
||||
|
||||
guest_rip_gpa = gva2gpa(vcpu->vm,
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3,
|
||||
guest_rip_gva);
|
||||
|
||||
guest_rip_hva = GPA2HVA(vcpu->vm, guest_rip_gpa);
|
||||
emul_cnx = &per_cpu(g_inst_ctxt, vcpu->pcpu_id);
|
||||
emul_cnx->mmio = mmio;
|
||||
emul_cnx->vcpu = vcpu;
|
||||
|
||||
/* by now, HVA <-> HPA is 1:1 mapping, so use hpa is OK*/
|
||||
vie_init(&emul_cnx->vie, guest_rip_hva,
|
||||
vcpu->arch_vcpu.inst_len);
|
||||
|
||||
get_guest_paging_info(vcpu, emul_cnx);
|
||||
csar = exec_vmread(VMX_GUEST_CS_ATTR);
|
||||
cpu_mode = get_vmx_cpu_mode();
|
||||
|
||||
mmio->private_data = emul_cnx;
|
||||
|
||||
retval = vmm_decode_instruction(vcpu, guest_rip_gva,
|
||||
cpu_mode, SEG_DESC_DEF32(csar), &emul_cnx->vie);
|
||||
|
||||
mmio->access_size = emul_cnx->vie.opsize;
|
||||
|
||||
if (retval != 0) {
|
||||
/* dump to instruction when decoding failed */
|
||||
pr_err("decode following instruction failed @ 0x%016llx:",
|
||||
exec_vmread(VMX_GUEST_RIP));
|
||||
for (i = 0; i < emul_cnx->vie.num_valid; i++) {
|
||||
if (i >= VIE_INST_SIZE)
|
||||
break;
|
||||
|
||||
if (i == 0)
|
||||
pr_err("\n");
|
||||
pr_err("%d=%02hhx ",
|
||||
i, emul_cnx->vie.inst[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
int emulate_instruction(struct vcpu *vcpu, struct mem_io *mmio)
|
||||
{
|
||||
struct emul_cnx *emul_cnx = (struct emul_cnx *)(mmio->private_data);
|
||||
struct vm_guest_paging *paging = &emul_cnx->paging;
|
||||
int i, retval = 0;
|
||||
uint64_t gpa = mmio->paddr;
|
||||
mem_region_read_t mread = mmio_read;
|
||||
mem_region_write_t mwrite = mmio_write;
|
||||
|
||||
retval = vmm_emulate_instruction(vcpu, gpa,
|
||||
&emul_cnx->vie, paging, mread, mwrite, &retval);
|
||||
|
||||
if (retval != 0) {
|
||||
/* dump to instruction when emulation failed */
|
||||
pr_err("emulate following instruction failed @ 0x%016llx:",
|
||||
exec_vmread(VMX_GUEST_RIP));
|
||||
for (i = 0; i < emul_cnx->vie.num_valid; i++) {
|
||||
if (i >= VIE_INST_SIZE)
|
||||
break;
|
||||
|
||||
if (i == 0)
|
||||
pr_err("\n");
|
||||
|
||||
pr_err("%d=%02hhx ",
|
||||
i, emul_cnx->vie.inst[i]);
|
||||
}
|
||||
}
|
||||
return retval;
|
||||
}
|
203
hypervisor/arch/x86/guest/instr_emul_wrapper.h
Normal file
203
hypervisor/arch/x86/guest/instr_emul_wrapper.h
Normal file
@@ -0,0 +1,203 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 NetApp, Inc.
|
||||
* Copyright (c) 2017 Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <cpu.h>
|
||||
|
||||
struct vie_op {
|
||||
uint8_t op_byte; /* actual opcode byte */
|
||||
uint8_t op_type; /* type of operation (e.g. MOV) */
|
||||
uint16_t op_flags;
|
||||
};
|
||||
|
||||
#define VIE_INST_SIZE 15
|
||||
struct vie {
|
||||
uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
|
||||
uint8_t num_valid; /* size of the instruction */
|
||||
uint8_t num_processed;
|
||||
|
||||
uint8_t addrsize:4, opsize:4; /* address and operand sizes */
|
||||
uint8_t rex_w:1, /* REX prefix */
|
||||
rex_r:1,
|
||||
rex_x:1,
|
||||
rex_b:1,
|
||||
rex_present:1,
|
||||
repz_present:1, /* REP/REPE/REPZ prefix */
|
||||
repnz_present:1, /* REPNE/REPNZ prefix */
|
||||
opsize_override:1, /* Operand size override */
|
||||
addrsize_override:1, /* Address size override */
|
||||
segment_override:1; /* Segment override */
|
||||
|
||||
uint8_t mod:2, /* ModRM byte */
|
||||
reg:4,
|
||||
rm:4;
|
||||
|
||||
uint8_t ss:2, /* SIB byte */
|
||||
index:4,
|
||||
base:4;
|
||||
|
||||
uint8_t disp_bytes;
|
||||
uint8_t imm_bytes;
|
||||
|
||||
uint8_t scale;
|
||||
int base_register; /* VM_REG_GUEST_xyz */
|
||||
int index_register; /* VM_REG_GUEST_xyz */
|
||||
int segment_register; /* VM_REG_GUEST_xyz */
|
||||
|
||||
int64_t displacement; /* optional addr displacement */
|
||||
int64_t immediate; /* optional immediate operand */
|
||||
|
||||
uint8_t decoded; /* set to 1 if successfully decoded */
|
||||
|
||||
struct vie_op op; /* opcode description */
|
||||
};
|
||||
|
||||
#define PSL_C 0x00000001 /* carry bit */
|
||||
#define PSL_PF 0x00000004 /* parity bit */
|
||||
#define PSL_AF 0x00000010 /* bcd carry bit */
|
||||
#define PSL_Z 0x00000040 /* zero bit */
|
||||
#define PSL_N 0x00000080 /* negative bit */
|
||||
#define PSL_T 0x00000100 /* trace enable bit */
|
||||
#define PSL_I 0x00000200 /* interrupt enable bit */
|
||||
#define PSL_D 0x00000400 /* string instruction direction bit */
|
||||
#define PSL_V 0x00000800 /* overflow bit */
|
||||
#define PSL_IOPL 0x00003000 /* i/o privilege level */
|
||||
#define PSL_NT 0x00004000 /* nested task bit */
|
||||
#define PSL_RF 0x00010000 /* resume flag bit */
|
||||
#define PSL_VM 0x00020000 /* virtual 8086 mode bit */
|
||||
#define PSL_AC 0x00040000 /* alignment checking */
|
||||
#define PSL_VIF 0x00080000 /* virtual interrupt enable */
|
||||
#define PSL_VIP 0x00100000 /* virtual interrupt pending */
|
||||
#define PSL_ID 0x00200000 /* identification bit */
|
||||
|
||||
/*
|
||||
* The 'access' field has the format specified in Table 21-2 of the Intel
|
||||
* Architecture Manual vol 3b.
|
||||
*
|
||||
* XXX The contents of the 'access' field are architecturally defined except
|
||||
* bit 16 - Segment Unusable.
|
||||
*/
|
||||
struct seg_desc {
|
||||
uint64_t base;
|
||||
uint32_t limit;
|
||||
uint32_t access;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Protections are chosen from these bits, or-ed together
|
||||
*/
|
||||
#define PROT_NONE 0x00 /* no permissions */
|
||||
#define PROT_READ 0x01 /* pages can be read */
|
||||
#define PROT_WRITE 0x02 /* pages can be written */
|
||||
#define PROT_EXEC 0x04 /* pages can be executed */
|
||||
|
||||
#define SEG_DESC_TYPE(access) ((access) & 0x001f)
|
||||
#define SEG_DESC_DPL(access) (((access) >> 5) & 0x3)
|
||||
#define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0)
|
||||
#define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0)
|
||||
#define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0)
|
||||
#define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0)
|
||||
|
||||
enum vm_cpu_mode {
|
||||
CPU_MODE_REAL,
|
||||
CPU_MODE_PROTECTED,
|
||||
CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */
|
||||
CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */
|
||||
};
|
||||
|
||||
enum vm_paging_mode {
|
||||
PAGING_MODE_FLAT,
|
||||
PAGING_MODE_32,
|
||||
PAGING_MODE_PAE,
|
||||
PAGING_MODE_64,
|
||||
};
|
||||
|
||||
struct vm_guest_paging {
|
||||
uint64_t cr3;
|
||||
int cpl;
|
||||
enum vm_cpu_mode cpu_mode;
|
||||
enum vm_paging_mode paging_mode;
|
||||
};
|
||||
|
||||
/*
|
||||
* Identifiers for architecturally defined registers.
|
||||
*/
|
||||
enum vm_reg_name {
|
||||
VM_REG_GUEST_RAX,
|
||||
VM_REG_GUEST_RBX,
|
||||
VM_REG_GUEST_RCX,
|
||||
VM_REG_GUEST_RDX,
|
||||
VM_REG_GUEST_RBP,
|
||||
VM_REG_GUEST_RSI,
|
||||
VM_REG_GUEST_R8,
|
||||
VM_REG_GUEST_R9,
|
||||
VM_REG_GUEST_R10,
|
||||
VM_REG_GUEST_R11,
|
||||
VM_REG_GUEST_R12,
|
||||
VM_REG_GUEST_R13,
|
||||
VM_REG_GUEST_R14,
|
||||
VM_REG_GUEST_R15,
|
||||
VM_REG_GUEST_RDI,
|
||||
VM_REG_GUEST_CR0,
|
||||
VM_REG_GUEST_CR3,
|
||||
VM_REG_GUEST_CR4,
|
||||
VM_REG_GUEST_DR7,
|
||||
VM_REG_GUEST_RSP,
|
||||
VM_REG_GUEST_RIP,
|
||||
VM_REG_GUEST_RFLAGS,
|
||||
VM_REG_GUEST_ES,
|
||||
VM_REG_GUEST_CS,
|
||||
VM_REG_GUEST_SS,
|
||||
VM_REG_GUEST_DS,
|
||||
VM_REG_GUEST_FS,
|
||||
VM_REG_GUEST_GS,
|
||||
VM_REG_GUEST_LDTR,
|
||||
VM_REG_GUEST_TR,
|
||||
VM_REG_GUEST_IDTR,
|
||||
VM_REG_GUEST_GDTR,
|
||||
VM_REG_GUEST_EFER,
|
||||
VM_REG_GUEST_CR2,
|
||||
VM_REG_GUEST_PDPTE0,
|
||||
VM_REG_GUEST_PDPTE1,
|
||||
VM_REG_GUEST_PDPTE2,
|
||||
VM_REG_GUEST_PDPTE3,
|
||||
VM_REG_GUEST_INTR_SHADOW,
|
||||
VM_REG_LAST
|
||||
};
|
||||
|
||||
typedef unsigned long u_long;
|
||||
|
||||
int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
|
||||
int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
|
||||
int vm_get_seg_desc(struct vcpu *vcpu, int reg,
|
||||
struct seg_desc *ret_desc);
|
||||
int vm_set_seg_desc(struct vcpu *vcpu, int reg,
|
||||
struct seg_desc *desc);
|
||||
int vm_restart_instruction(struct vcpu *vcpu);
|
||||
void vm_gva2gpa(struct vcpu *vcpu, uint64_t gla, uint64_t *gpa);
|
118
hypervisor/arch/x86/guest/time.h
Normal file
118
hypervisor/arch/x86/guest/time.h
Normal file
@@ -0,0 +1,118 @@
|
||||
/*-
|
||||
* Copyright (c) 1982, 1986, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
* Copyright (c) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)time.h 8.5 (Berkeley) 5/4/95
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _TIME_H_
|
||||
#define _TIME_H_
|
||||
|
||||
struct callout {
|
||||
void *c_arg; /* function argument */
|
||||
void (*c_func)(void *); /* function to call */
|
||||
short c_flags; /* User State */
|
||||
};
|
||||
|
||||
#define CALLOUT_ACTIVE 0x0002 /* callout is currently active */
|
||||
#define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */
|
||||
#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE)
|
||||
#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE)
|
||||
#define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING)
|
||||
|
||||
typedef int64_t time_t;
|
||||
typedef int64_t sbintime_t;
|
||||
|
||||
struct bintime {
|
||||
time_t sec;
|
||||
uint64_t frac;
|
||||
};
|
||||
|
||||
static inline void
|
||||
bintime_add(struct bintime *_bt, const struct bintime *_bt2)
|
||||
{
|
||||
uint64_t _u;
|
||||
|
||||
_u = _bt->frac;
|
||||
_bt->frac += _bt2->frac;
|
||||
if (_u > _bt->frac)
|
||||
_bt->sec++;
|
||||
_bt->sec += _bt2->sec;
|
||||
}
|
||||
|
||||
static inline void
|
||||
bintime_sub(struct bintime *_bt, const struct bintime *_bt2)
|
||||
{
|
||||
uint64_t _u;
|
||||
|
||||
_u = _bt->frac;
|
||||
_bt->frac -= _bt2->frac;
|
||||
if (_u < _bt->frac)
|
||||
_bt->sec--;
|
||||
_bt->sec -= _bt2->sec;
|
||||
}
|
||||
|
||||
static inline void
|
||||
bintime_mul(struct bintime *_bt, uint32_t _x)
|
||||
{
|
||||
uint64_t _p1, _p2;
|
||||
|
||||
_p1 = (_bt->frac & 0xffffffffull) * _x;
|
||||
_p2 = (_bt->frac >> 32) * _x + (_p1 >> 32);
|
||||
_bt->sec *= _x;
|
||||
_bt->sec += (_p2 >> 32);
|
||||
_bt->frac = (_p2 << 32) | (_p1 & 0xffffffffull);
|
||||
}
|
||||
|
||||
#define bintime_cmp(a, b, cmp) \
|
||||
(((a)->sec == (b)->sec) ? \
|
||||
((a)->frac cmp(b)->frac) : \
|
||||
((a)->sec cmp(b)->sec))
|
||||
|
||||
#define SBT_1S ((sbintime_t)1 << 32)
|
||||
#define SBT_1US (SBT_1S / 1000000)
|
||||
|
||||
#define BT2FREQ(bt) \
|
||||
(((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \
|
||||
((bt)->frac >> 1))
|
||||
|
||||
#define FREQ2BT(freq, bt) \
|
||||
{ \
|
||||
(bt)->sec = 0; \
|
||||
(bt)->frac = ((uint64_t)0x8000000000000000 / (freq)) << 1; \
|
||||
}
|
||||
|
||||
static inline sbintime_t
|
||||
bttosbt(const struct bintime _bt)
|
||||
{
|
||||
|
||||
return (((sbintime_t)_bt.sec << 32) + (_bt.frac >> 32));
|
||||
}
|
||||
|
||||
#endif /* !_TIME_H_ */
|
357
hypervisor/arch/x86/guest/vcpu.c
Normal file
357
hypervisor/arch/x86/guest/vcpu.c
Normal file
@@ -0,0 +1,357 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <schedule.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
vm_sw_loader_t vm_sw_loader;
|
||||
|
||||
/***********************************************************************
|
||||
* vcpu_id/pcpu_id mapping table:
|
||||
*
|
||||
* if
|
||||
* VM0_CPUS[2] = {0, 2} , VM1_CPUS[2] = {3, 1};
|
||||
* then
|
||||
* for physical CPU 0 : vcpu->pcpu_id = 0, vcpu->vcpu_id = 0, vmid = 0;
|
||||
* for physical CPU 2 : vcpu->pcpu_id = 2, vcpu->vcpu_id = 1, vmid = 0;
|
||||
* for physical CPU 3 : vcpu->pcpu_id = 3, vcpu->vcpu_id = 0, vmid = 1;
|
||||
* for physical CPU 1 : vcpu->pcpu_id = 1, vcpu->vcpu_id = 1, vmid = 1;
|
||||
*
|
||||
***********************************************************************/
|
||||
int create_vcpu(int cpu_id, struct vm *vm, struct vcpu **rtn_vcpu_handle)
|
||||
{
|
||||
struct vcpu *vcpu;
|
||||
|
||||
ASSERT(vm != NULL, "");
|
||||
ASSERT(rtn_vcpu_handle != NULL, "");
|
||||
|
||||
pr_info("Creating VCPU %d", cpu_id);
|
||||
|
||||
/* Allocate memory for VCPU */
|
||||
vcpu = calloc(1, sizeof(struct vcpu));
|
||||
ASSERT(vcpu != NULL, "");
|
||||
|
||||
/* Initialize the physical CPU ID for this VCPU */
|
||||
vcpu->pcpu_id = cpu_id;
|
||||
|
||||
/* Initialize the parent VM reference */
|
||||
vcpu->vm = vm;
|
||||
|
||||
/* Initialize the virtual ID for this VCPU */
|
||||
/* FIXME:
|
||||
* We have assumption that we always destroys vcpus in one
|
||||
* shot (like when vm is destroyed). If we need to support
|
||||
* specific vcpu destroy on fly, this vcpu_id assignment
|
||||
* needs revise.
|
||||
*/
|
||||
|
||||
/*
|
||||
* vcpu->vcpu_id = vm->hw.created_vcpus;
|
||||
* vm->hw.created_vcpus++;
|
||||
*/
|
||||
vcpu->vcpu_id = atomic_xadd_int(&vm->hw.created_vcpus, 1);
|
||||
/* vm->hw.vcpu_array[vcpu->vcpu_id] = vcpu; */
|
||||
atomic_store_rel_64(
|
||||
(unsigned long *)&vm->hw.vcpu_array[vcpu->vcpu_id],
|
||||
(unsigned long)vcpu);
|
||||
|
||||
ASSERT(vcpu->vcpu_id < vm->hw.num_vcpus,
|
||||
"Allocated vcpu_id is out of range!");
|
||||
|
||||
per_cpu(vcpu, cpu_id) = vcpu;
|
||||
|
||||
pr_info("PCPU%d is working as VM%d VCPU%d, Role: %s",
|
||||
vcpu->pcpu_id, vcpu->vm->attr.id, vcpu->vcpu_id,
|
||||
is_vcpu_bsp(vcpu) ? "PRIMARY" : "SECONDARY");
|
||||
|
||||
/* Is this VCPU a VM BSP, create page hierarchy for this VM */
|
||||
if (is_vcpu_bsp(vcpu)) {
|
||||
/* Set up temporary guest page tables */
|
||||
vm->arch_vm.guest_pml4 = create_guest_paging(vm);
|
||||
pr_info("VM *d VCPU %d CR3: 0x%016llx ",
|
||||
vm->attr.id, vcpu->vcpu_id, vm->arch_vm.guest_pml4);
|
||||
}
|
||||
|
||||
/* Allocate VMCS region for this VCPU */
|
||||
vcpu->arch_vcpu.vmcs = alloc_page();
|
||||
ASSERT(vcpu->arch_vcpu.vmcs != NULL, "");
|
||||
|
||||
/* Memset VMCS region for this VCPU */
|
||||
memset(vcpu->arch_vcpu.vmcs, 0, CPU_PAGE_SIZE);
|
||||
|
||||
/* Initialize exception field in VCPU context */
|
||||
vcpu->arch_vcpu.exception_info.exception = -1;
|
||||
|
||||
/* Initialize cur context */
|
||||
vcpu->arch_vcpu.cur_context = NORMAL_WORLD;
|
||||
|
||||
/* Create per vcpu vlapic */
|
||||
vlapic_create(vcpu);
|
||||
|
||||
/* Populate the return handle */
|
||||
*rtn_vcpu_handle = vcpu;
|
||||
|
||||
vcpu->launched = false;
|
||||
vcpu->paused_cnt = 0;
|
||||
vcpu->running = 0;
|
||||
vcpu->ioreq_pending = 0;
|
||||
vcpu->arch_vcpu.nr_sipi = 0;
|
||||
vcpu->pending_pre_work = 0;
|
||||
vcpu->state = VCPU_INIT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int start_vcpu(struct vcpu *vcpu)
|
||||
{
|
||||
uint64_t rip, instlen;
|
||||
struct run_context *cur_context =
|
||||
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
|
||||
int64_t status = 0;
|
||||
|
||||
ASSERT(vcpu != NULL, "Incorrect arguments");
|
||||
|
||||
/* If this VCPU is not already launched, launch it */
|
||||
if (!vcpu->launched) {
|
||||
pr_info("VM %d Starting VCPU %d",
|
||||
vcpu->vm->attr.id, vcpu->vcpu_id);
|
||||
|
||||
/* Set vcpu launched */
|
||||
vcpu->launched = true;
|
||||
|
||||
/* avoid VMCS recycling RSB usage, set IBPB.
|
||||
* NOTE: this should be done for any time vmcs got switch
|
||||
* currently, there is no other place to do vmcs switch
|
||||
* Please add IBPB set for future vmcs switch case(like trusty)
|
||||
*/
|
||||
if (ibrs_type == IBRS_RAW)
|
||||
msr_write(MSR_IA32_PRED_CMD, PRED_SET_IBPB);
|
||||
|
||||
/* Launch the VM */
|
||||
status = vmx_vmrun(cur_context, VM_LAUNCH, ibrs_type);
|
||||
|
||||
/* See if VM launched successfully */
|
||||
if (status == 0) {
|
||||
if (is_vcpu_bsp(vcpu)) {
|
||||
pr_info("VM %d VCPU %d successfully launched",
|
||||
vcpu->vm->attr.id, vcpu->vcpu_id);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* This VCPU was already launched, check if the last guest
|
||||
* instruction needs to be repeated and resume VCPU accordingly
|
||||
*/
|
||||
instlen = vcpu->arch_vcpu.inst_len;
|
||||
rip = cur_context->rip;
|
||||
exec_vmwrite(VMX_GUEST_RIP, ((rip + instlen) &
|
||||
0xFFFFFFFFFFFFFFFF));
|
||||
|
||||
/* Resume the VM */
|
||||
status = vmx_vmrun(cur_context, VM_RESUME, ibrs_type);
|
||||
}
|
||||
|
||||
/* Save guest CR3 register */
|
||||
cur_context->cr3 = exec_vmread(VMX_GUEST_CR3);
|
||||
|
||||
/* Obtain current VCPU instruction pointer and length */
|
||||
cur_context->rip = exec_vmread(VMX_GUEST_RIP);
|
||||
vcpu->arch_vcpu.inst_len = exec_vmread(VMX_EXIT_INSTR_LEN);
|
||||
|
||||
cur_context->rsp = exec_vmread(VMX_GUEST_RSP);
|
||||
cur_context->rflags = exec_vmread(VMX_GUEST_RFLAGS);
|
||||
|
||||
/* Obtain VM exit reason */
|
||||
vcpu->arch_vcpu.exit_reason = exec_vmread(VMX_EXIT_REASON);
|
||||
|
||||
if (status != 0) {
|
||||
/* refer to 64-ia32 spec section 24.9.1 volume#3 */
|
||||
if (vcpu->arch_vcpu.exit_reason & VMX_VMENTRY_FAIL)
|
||||
pr_fatal("vmentry fail reason=%lx", vcpu->arch_vcpu.exit_reason);
|
||||
else
|
||||
pr_fatal("vmexit fail err_inst=%lx", exec_vmread(VMX_INSTR_ERROR));
|
||||
|
||||
ASSERT(status == 0, "vm fail");
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
int shutdown_vcpu(__unused struct vcpu *vcpu)
|
||||
{
|
||||
/* TODO : Implement VCPU shutdown sequence */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int destroy_vcpu(struct vcpu *vcpu)
|
||||
{
|
||||
ASSERT(vcpu != NULL, "Incorrect arguments");
|
||||
|
||||
/* vcpu->vm->hw.vcpu_array[vcpu->vcpu_id] = NULL; */
|
||||
atomic_store_rel_64(
|
||||
(unsigned long *)&vcpu->vm->hw.vcpu_array[vcpu->vcpu_id],
|
||||
(unsigned long)NULL);
|
||||
|
||||
atomic_subtract_int(&vcpu->vm->hw.created_vcpus, 1);
|
||||
|
||||
vlapic_free(vcpu);
|
||||
free(vcpu->arch_vcpu.vmcs);
|
||||
free(vcpu->guest_msrs);
|
||||
free_pcpu(vcpu->pcpu_id);
|
||||
free(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* NOTE:
|
||||
* vcpu should be paused before call this function.
|
||||
*/
|
||||
void reset_vcpu(struct vcpu *vcpu)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
|
||||
pr_dbg("vcpu%d reset", vcpu->vcpu_id);
|
||||
ASSERT(vcpu->state != VCPU_RUNNING,
|
||||
"reset vcpu when it's running");
|
||||
|
||||
if (vcpu->state == VCPU_INIT)
|
||||
return;
|
||||
|
||||
vcpu->state = VCPU_INIT;
|
||||
|
||||
vcpu->launched = false;
|
||||
vcpu->paused_cnt = 0;
|
||||
vcpu->running = 0;
|
||||
vcpu->ioreq_pending = 0;
|
||||
vcpu->arch_vcpu.nr_sipi = 0;
|
||||
vcpu->pending_pre_work = 0;
|
||||
vlapic = vcpu->arch_vcpu.vlapic;
|
||||
vlapic_init(vlapic);
|
||||
}
|
||||
|
||||
void init_vcpu(struct vcpu *vcpu)
|
||||
{
|
||||
if (is_vcpu_bsp(vcpu))
|
||||
vcpu->arch_vcpu.cpu_mode = PAGE_PROTECTED_MODE;
|
||||
else
|
||||
vcpu->arch_vcpu.cpu_mode = REAL_MODE;
|
||||
/* init_vmcs is delayed to vcpu vmcs launch first time */
|
||||
}
|
||||
|
||||
void pause_vcpu(struct vcpu *vcpu, enum vcpu_state new_state)
|
||||
{
|
||||
int pcpu_id = get_cpu_id();
|
||||
|
||||
pr_dbg("vcpu%d paused, new state: %d",
|
||||
vcpu->vcpu_id, new_state);
|
||||
|
||||
vcpu->prev_state = vcpu->state;
|
||||
vcpu->state = new_state;
|
||||
|
||||
get_schedule_lock(pcpu_id);
|
||||
if (atomic_load_acq_32(&vcpu->running) == 1) {
|
||||
remove_vcpu_from_runqueue(vcpu);
|
||||
make_reschedule_request(vcpu);
|
||||
release_schedule_lock(pcpu_id);
|
||||
|
||||
if (vcpu->pcpu_id != pcpu_id) {
|
||||
while (atomic_load_acq_32(&vcpu->running) == 1)
|
||||
__asm__ __volatile("pause" ::: "memory");
|
||||
}
|
||||
} else {
|
||||
remove_vcpu_from_runqueue(vcpu);
|
||||
release_schedule_lock(pcpu_id);
|
||||
}
|
||||
}
|
||||
|
||||
void resume_vcpu(struct vcpu *vcpu)
|
||||
{
|
||||
pr_dbg("vcpu%d resumed", vcpu->vcpu_id);
|
||||
|
||||
vcpu->state = vcpu->prev_state;
|
||||
|
||||
get_schedule_lock(vcpu->pcpu_id);
|
||||
if (vcpu->state == VCPU_RUNNING) {
|
||||
add_vcpu_to_runqueue(vcpu);
|
||||
make_reschedule_request(vcpu);
|
||||
}
|
||||
release_schedule_lock(vcpu->pcpu_id);
|
||||
}
|
||||
|
||||
void schedule_vcpu(struct vcpu *vcpu)
|
||||
{
|
||||
vcpu->state = VCPU_RUNNING;
|
||||
pr_dbg("vcpu%d scheduled", vcpu->vcpu_id);
|
||||
|
||||
get_schedule_lock(vcpu->pcpu_id);
|
||||
add_vcpu_to_runqueue(vcpu);
|
||||
make_reschedule_request(vcpu);
|
||||
release_schedule_lock(vcpu->pcpu_id);
|
||||
}
|
||||
|
||||
/* help function for vcpu create */
|
||||
int prepare_vcpu(struct vm *vm, int pcpu_id)
|
||||
{
|
||||
int ret = 0;
|
||||
struct vcpu *vcpu = NULL;
|
||||
|
||||
ret = create_vcpu(pcpu_id, vm, &vcpu);
|
||||
ASSERT(ret == 0, "vcpu create failed");
|
||||
|
||||
if (is_vcpu_bsp(vcpu)) {
|
||||
/* Load VM SW */
|
||||
if (!vm_sw_loader)
|
||||
vm_sw_loader = general_sw_loader;
|
||||
vm_sw_loader(vm, vcpu);
|
||||
vcpu->arch_vcpu.cpu_mode = PAGE_PROTECTED_MODE;
|
||||
} else {
|
||||
vcpu->arch_vcpu.cpu_mode = REAL_MODE;
|
||||
}
|
||||
|
||||
/* init_vmcs is delayed to vcpu vmcs launch first time */
|
||||
|
||||
/* initialize the vcpu tsc aux */
|
||||
vcpu->msr_tsc_aux_guest = vcpu->vcpu_id;
|
||||
|
||||
set_pcpu_used(pcpu_id);
|
||||
|
||||
INIT_LIST_HEAD(&vcpu->run_list);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void request_vcpu_pre_work(struct vcpu *vcpu, int pre_work_id)
|
||||
{
|
||||
bitmap_set(pre_work_id, &vcpu->pending_pre_work);
|
||||
}
|
662
hypervisor/arch/x86/guest/vioapic.c
Normal file
662
hypervisor/arch/x86/guest/vioapic.c
Normal file
@@ -0,0 +1,662 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
|
||||
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
|
||||
* Copyright (c) 2017 Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "vioapic: " fmt
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
#define IOREGSEL 0x00
|
||||
#define IOWIN 0x10
|
||||
#define IOEOI 0x40
|
||||
|
||||
#define REDIR_ENTRIES_HW 120 /* SOS align with native ioapic */
|
||||
#define REDIR_ENTRIES_UOS 24 /* UOS pins*/
|
||||
#define RTBL_RO_BITS ((uint64_t)(IOAPIC_RTE_REM_IRR | IOAPIC_RTE_DELIVS))
|
||||
|
||||
#define ACRN_DBG_IOAPIC 6
|
||||
|
||||
struct vioapic {
|
||||
struct vm *vm;
|
||||
spinlock_t mtx;
|
||||
uint32_t id;
|
||||
uint32_t ioregsel;
|
||||
struct {
|
||||
uint64_t reg;
|
||||
int acnt; /* sum of pin asserts (+1) and deasserts (-1) */
|
||||
} rtbl[REDIR_ENTRIES_HW];
|
||||
};
|
||||
|
||||
#define VIOAPIC_LOCK(vioapic) spinlock_obtain(&((vioapic)->mtx))
|
||||
#define VIOAPIC_UNLOCK(vioapic) spinlock_release(&((vioapic)->mtx))
|
||||
|
||||
static inline const char *pinstate_str(bool asserted)
|
||||
{
|
||||
return (asserted) ? "asserted" : "deasserted";
|
||||
}
|
||||
|
||||
struct vioapic *
|
||||
vm_ioapic(struct vm *vm)
|
||||
{
|
||||
return (struct vioapic *)vm->arch_vm.virt_ioapic;
|
||||
}
|
||||
|
||||
static void
|
||||
vioapic_send_intr(struct vioapic *vioapic, int pin)
|
||||
{
|
||||
int vector, delmode;
|
||||
uint32_t low, high, dest;
|
||||
bool level, phys;
|
||||
|
||||
if (pin < 0 || pin >= vioapic_pincount(vioapic->vm))
|
||||
pr_err("vioapic_send_intr: invalid pin number %d", pin);
|
||||
|
||||
low = vioapic->rtbl[pin].reg;
|
||||
high = vioapic->rtbl[pin].reg >> 32;
|
||||
|
||||
if ((low & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET) {
|
||||
dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: masked", pin);
|
||||
return;
|
||||
}
|
||||
|
||||
phys = ((low & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY);
|
||||
delmode = low & IOAPIC_RTE_DELMOD;
|
||||
level = low & IOAPIC_RTE_TRGRLVL ? true : false;
|
||||
if (level)
|
||||
vioapic->rtbl[pin].reg |= IOAPIC_RTE_REM_IRR;
|
||||
|
||||
vector = low & IOAPIC_RTE_INTVEC;
|
||||
dest = high >> APIC_ID_SHIFT;
|
||||
vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector);
|
||||
}
|
||||
|
||||
static void
|
||||
vioapic_set_pinstate(struct vioapic *vioapic, int pin, bool newstate)
|
||||
{
|
||||
int oldcnt, newcnt;
|
||||
bool needintr;
|
||||
|
||||
if (pin < 0 || pin >= vioapic_pincount(vioapic->vm))
|
||||
pr_err("vioapic_set_pinstate: invalid pin number %d", pin);
|
||||
|
||||
oldcnt = vioapic->rtbl[pin].acnt;
|
||||
if (newstate)
|
||||
vioapic->rtbl[pin].acnt++;
|
||||
else
|
||||
vioapic->rtbl[pin].acnt--;
|
||||
newcnt = vioapic->rtbl[pin].acnt;
|
||||
|
||||
if (newcnt < 0) {
|
||||
pr_err("ioapic pin%d: bad acnt %d", pin, newcnt);
|
||||
}
|
||||
|
||||
needintr = false;
|
||||
if (oldcnt == 0 && newcnt == 1) {
|
||||
needintr = true;
|
||||
dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: asserted", pin);
|
||||
} else if (oldcnt == 1 && newcnt == 0) {
|
||||
dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: deasserted", pin);
|
||||
} else {
|
||||
dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: %s, ignored, acnt %d",
|
||||
pin, pinstate_str(newstate), newcnt);
|
||||
}
|
||||
|
||||
if (needintr)
|
||||
vioapic_send_intr(vioapic, pin);
|
||||
}
|
||||
|
||||
enum irqstate {
|
||||
IRQSTATE_ASSERT,
|
||||
IRQSTATE_DEASSERT,
|
||||
IRQSTATE_PULSE
|
||||
};
|
||||
|
||||
static int
|
||||
vioapic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
|
||||
{
|
||||
struct vioapic *vioapic;
|
||||
|
||||
if (irq < 0 || irq >= vioapic_pincount(vm))
|
||||
return -EINVAL;
|
||||
|
||||
vioapic = vm_ioapic(vm);
|
||||
|
||||
VIOAPIC_LOCK(vioapic);
|
||||
switch (irqstate) {
|
||||
case IRQSTATE_ASSERT:
|
||||
vioapic_set_pinstate(vioapic, irq, true);
|
||||
break;
|
||||
case IRQSTATE_DEASSERT:
|
||||
vioapic_set_pinstate(vioapic, irq, false);
|
||||
break;
|
||||
case IRQSTATE_PULSE:
|
||||
vioapic_set_pinstate(vioapic, irq, true);
|
||||
vioapic_set_pinstate(vioapic, irq, false);
|
||||
break;
|
||||
default:
|
||||
panic("vioapic_set_irqstate: invalid irqstate %d", irqstate);
|
||||
}
|
||||
VIOAPIC_UNLOCK(vioapic);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
vioapic_assert_irq(struct vm *vm, int irq)
|
||||
{
|
||||
return vioapic_set_irqstate(vm, irq, IRQSTATE_ASSERT);
|
||||
}
|
||||
|
||||
int
|
||||
vioapic_deassert_irq(struct vm *vm, int irq)
|
||||
{
|
||||
return vioapic_set_irqstate(vm, irq, IRQSTATE_DEASSERT);
|
||||
}
|
||||
|
||||
int
|
||||
vioapic_pulse_irq(struct vm *vm, int irq)
|
||||
{
|
||||
return vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset the vlapic's trigger-mode register to reflect the ioapic pin
|
||||
* configuration.
|
||||
*/
|
||||
void
|
||||
vioapic_update_tmr(struct vcpu *vcpu)
|
||||
{
|
||||
struct vioapic *vioapic;
|
||||
struct vlapic *vlapic;
|
||||
uint32_t low;
|
||||
int delmode, pin, vector;
|
||||
bool level;
|
||||
|
||||
vlapic = vcpu->arch_vcpu.vlapic;
|
||||
vioapic = vm_ioapic(vcpu->vm);
|
||||
|
||||
VIOAPIC_LOCK(vioapic);
|
||||
for (pin = 0; pin < vioapic_pincount(vioapic->vm); pin++) {
|
||||
low = vioapic->rtbl[pin].reg;
|
||||
|
||||
level = low & IOAPIC_RTE_TRGRLVL ? true : false;
|
||||
|
||||
/*
|
||||
* For a level-triggered 'pin' let the vlapic figure out if
|
||||
* an assertion on this 'pin' would result in an interrupt
|
||||
* being delivered to it. If yes, then it will modify the
|
||||
* TMR bit associated with this vector to level-triggered.
|
||||
*/
|
||||
delmode = low & IOAPIC_RTE_DELMOD;
|
||||
vector = low & IOAPIC_RTE_INTVEC;
|
||||
vlapic_set_tmr_one_vec(vlapic, delmode, vector, level);
|
||||
}
|
||||
vlapic_apicv_batch_set_tmr(vlapic);
|
||||
VIOAPIC_UNLOCK(vioapic);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
vioapic_read(struct vioapic *vioapic, uint32_t addr)
|
||||
{
|
||||
int regnum, pin, rshift;
|
||||
|
||||
regnum = addr & 0xff;
|
||||
switch (regnum) {
|
||||
case IOAPIC_ID:
|
||||
return vioapic->id;
|
||||
case IOAPIC_VER:
|
||||
return ((vioapic_pincount(vioapic->vm) - 1) << MAX_RTE_SHIFT)
|
||||
| 0x11;
|
||||
case IOAPIC_ARB:
|
||||
return vioapic->id;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* redirection table entries */
|
||||
if (regnum >= IOAPIC_REDTBL &&
|
||||
regnum < IOAPIC_REDTBL + vioapic_pincount(vioapic->vm) * 2) {
|
||||
pin = (regnum - IOAPIC_REDTBL) / 2;
|
||||
if ((regnum - IOAPIC_REDTBL) % 2)
|
||||
rshift = 32;
|
||||
else
|
||||
rshift = 0;
|
||||
|
||||
return vioapic->rtbl[pin].reg >> rshift;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* version 0x20+ ioapic has EOI register. And cpu could write vector to this
|
||||
* register to clear related IRR.
|
||||
*/
|
||||
static void
|
||||
vioapic_write_eoi(struct vioapic *vioapic, int32_t vector)
|
||||
{
|
||||
struct vm *vm = vioapic->vm;
|
||||
int pin;
|
||||
|
||||
if (vector < VECTOR_FOR_INTR_START || vector > NR_MAX_VECTOR)
|
||||
pr_err("vioapic_process_eoi: invalid vector %d", vector);
|
||||
|
||||
VIOAPIC_LOCK(vioapic);
|
||||
for (pin = 0; pin < vioapic_pincount(vm); pin++) {
|
||||
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
|
||||
continue;
|
||||
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
|
||||
(uint64_t)vector)
|
||||
continue;
|
||||
|
||||
vioapic->rtbl[pin].reg &= ~IOAPIC_RTE_REM_IRR;
|
||||
if (vioapic->rtbl[pin].acnt > 0) {
|
||||
dev_dbg(ACRN_DBG_IOAPIC,
|
||||
"ioapic pin%d: asserted at eoi, acnt %d",
|
||||
pin, vioapic->rtbl[pin].acnt);
|
||||
vioapic_send_intr(vioapic, pin);
|
||||
}
|
||||
}
|
||||
VIOAPIC_UNLOCK(vioapic);
|
||||
}
|
||||
|
||||
static void
|
||||
vioapic_write(struct vioapic *vioapic, uint32_t addr, uint32_t data)
|
||||
{
|
||||
uint64_t data64, mask64;
|
||||
uint64_t last, new, changed;
|
||||
int regnum, pin, lshift;
|
||||
|
||||
regnum = addr & 0xff;
|
||||
switch (regnum) {
|
||||
case IOAPIC_ID:
|
||||
vioapic->id = data & APIC_ID_MASK;
|
||||
break;
|
||||
case IOAPIC_VER:
|
||||
case IOAPIC_ARB:
|
||||
/* readonly */
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* redirection table entries */
|
||||
if (regnum >= IOAPIC_REDTBL &&
|
||||
regnum < IOAPIC_REDTBL + vioapic_pincount(vioapic->vm) * 2) {
|
||||
pin = (regnum - IOAPIC_REDTBL) / 2;
|
||||
if ((regnum - IOAPIC_REDTBL) % 2)
|
||||
lshift = 32;
|
||||
else
|
||||
lshift = 0;
|
||||
|
||||
last = new = vioapic->rtbl[pin].reg;
|
||||
|
||||
data64 = (uint64_t)data << lshift;
|
||||
mask64 = (uint64_t)0xffffffff << lshift;
|
||||
new &= ~mask64 | RTBL_RO_BITS;
|
||||
new |= data64 & ~RTBL_RO_BITS;
|
||||
|
||||
changed = last ^ new;
|
||||
/* pin0 from vpic mask/unmask */
|
||||
if (pin == 0 && (changed & IOAPIC_RTE_INTMASK)) {
|
||||
/* mask -> umask */
|
||||
if ((last & IOAPIC_RTE_INTMASK) &&
|
||||
((new & IOAPIC_RTE_INTMASK) == 0)) {
|
||||
if ((vioapic->vm->vpic_wire_mode
|
||||
== VPIC_WIRE_NULL) ||
|
||||
(vioapic->vm->vpic_wire_mode
|
||||
== VPIC_WIRE_INTR)) {
|
||||
atomic_set_int(
|
||||
&vioapic->vm->vpic_wire_mode,
|
||||
VPIC_WIRE_IOAPIC);
|
||||
dev_dbg(ACRN_DBG_IOAPIC,
|
||||
"vpic wire mode -> IOAPIC");
|
||||
} else {
|
||||
pr_err("WARNING: invalid vpic wire mode change");
|
||||
return;
|
||||
}
|
||||
/* unmask -> mask */
|
||||
} else if (((last & IOAPIC_RTE_INTMASK) == 0) &&
|
||||
(new & IOAPIC_RTE_INTMASK)) {
|
||||
if (vioapic->vm->vpic_wire_mode
|
||||
== VPIC_WIRE_IOAPIC) {
|
||||
atomic_set_int(
|
||||
&vioapic->vm->vpic_wire_mode,
|
||||
VPIC_WIRE_INTR);
|
||||
dev_dbg(ACRN_DBG_IOAPIC,
|
||||
"vpic wire mode -> INTR");
|
||||
}
|
||||
}
|
||||
}
|
||||
vioapic->rtbl[pin].reg = new;
|
||||
dev_dbg(ACRN_DBG_IOAPIC, "ioapic pin%d: redir table entry %#lx",
|
||||
pin, vioapic->rtbl[pin].reg);
|
||||
/*
|
||||
* If any fields in the redirection table entry (except mask
|
||||
* or polarity) have changed then rendezvous all the vcpus
|
||||
* to update their vlapic trigger-mode registers.
|
||||
*/
|
||||
if (changed & ~(IOAPIC_RTE_INTMASK | IOAPIC_RTE_INTPOL)) {
|
||||
int i;
|
||||
struct vcpu *vcpu;
|
||||
|
||||
dev_dbg(ACRN_DBG_IOAPIC,
|
||||
"ioapic pin%d: recalculate vlapic trigger-mode reg",
|
||||
pin);
|
||||
|
||||
VIOAPIC_UNLOCK(vioapic);
|
||||
|
||||
foreach_vcpu(i, vioapic->vm, vcpu) {
|
||||
vcpu_make_request(vcpu, ACRN_REQUEST_TMR_UPDATE);
|
||||
}
|
||||
VIOAPIC_LOCK(vioapic);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate an interrupt if the following conditions are met:
|
||||
* - pin is not masked
|
||||
* - previous interrupt has been EOIed
|
||||
* - pin level is asserted
|
||||
*/
|
||||
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTMASK) ==
|
||||
IOAPIC_RTE_INTMCLR &&
|
||||
(vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0 &&
|
||||
(vioapic->rtbl[pin].acnt > 0)) {
|
||||
dev_dbg(ACRN_DBG_IOAPIC,
|
||||
"ioapic pin%d: asserted at rtbl write, acnt %d",
|
||||
pin, vioapic->rtbl[pin].acnt);
|
||||
vioapic_send_intr(vioapic, pin);
|
||||
}
|
||||
|
||||
/* remap for active: interrupt mask -> unmask
|
||||
* remap for deactive: interrupt mask & vector set to 0
|
||||
*/
|
||||
data64 = vioapic->rtbl[pin].reg;
|
||||
if ((((data64 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMCLR)
|
||||
&& ((last & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET))
|
||||
|| (((data64 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET)
|
||||
&& ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) == 0))) {
|
||||
/* VM enable intr */
|
||||
struct ptdev_intx_info intx;
|
||||
|
||||
/* NOTE: only support max 256 pin */
|
||||
intx.virt_pin = (uint8_t)pin;
|
||||
intx.vpin_src = PTDEV_VPIN_IOAPIC;
|
||||
ptdev_intx_pin_remap(vioapic->vm, &intx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vioapic_mmio_rw(struct vioapic *vioapic, uint64_t gpa,
|
||||
uint64_t *data, int size, bool doread)
|
||||
{
|
||||
uint64_t offset;
|
||||
|
||||
offset = gpa - VIOAPIC_BASE;
|
||||
|
||||
/*
|
||||
* The IOAPIC specification allows 32-bit wide accesses to the
|
||||
* IOREGSEL (offset 0) and IOWIN (offset 16) registers.
|
||||
*/
|
||||
if (size != 4 || (offset != IOREGSEL && offset != IOWIN &&
|
||||
offset != IOEOI)) {
|
||||
if (doread)
|
||||
*data = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
VIOAPIC_LOCK(vioapic);
|
||||
if (offset == IOREGSEL) {
|
||||
if (doread)
|
||||
*data = vioapic->ioregsel;
|
||||
else
|
||||
vioapic->ioregsel = *data;
|
||||
} else if (offset == IOEOI) {
|
||||
/* only need to handle write operation */
|
||||
if (!doread)
|
||||
vioapic_write_eoi(vioapic, *data);
|
||||
} else {
|
||||
if (doread) {
|
||||
*data = vioapic_read(vioapic, vioapic->ioregsel);
|
||||
} else {
|
||||
vioapic_write(vioapic, vioapic->ioregsel,
|
||||
*data);
|
||||
}
|
||||
}
|
||||
VIOAPIC_UNLOCK(vioapic);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
vioapic_mmio_read(void *vm, uint64_t gpa, uint64_t *rval,
|
||||
int size)
|
||||
{
|
||||
int error;
|
||||
struct vioapic *vioapic;
|
||||
|
||||
vioapic = vm_ioapic(vm);
|
||||
error = vioapic_mmio_rw(vioapic, gpa, rval, size, true);
|
||||
return error;
|
||||
}
|
||||
|
||||
int
|
||||
vioapic_mmio_write(void *vm, uint64_t gpa, uint64_t wval,
|
||||
int size)
|
||||
{
|
||||
int error;
|
||||
struct vioapic *vioapic;
|
||||
|
||||
vioapic = vm_ioapic(vm);
|
||||
error = vioapic_mmio_rw(vioapic, gpa, &wval, size, false);
|
||||
return error;
|
||||
}
|
||||
|
||||
void
|
||||
vioapic_process_eoi(struct vm *vm, int vector)
|
||||
{
|
||||
struct vioapic *vioapic;
|
||||
int pin;
|
||||
|
||||
if (vector < VECTOR_FOR_INTR_START || vector > NR_MAX_VECTOR)
|
||||
pr_err("vioapic_process_eoi: invalid vector %d", vector);
|
||||
|
||||
vioapic = vm_ioapic(vm);
|
||||
dev_dbg(ACRN_DBG_IOAPIC, "ioapic processing eoi for vector %d", vector);
|
||||
|
||||
/* notify device to ack if assigned pin */
|
||||
for (pin = 0; pin < vioapic_pincount(vm); pin++) {
|
||||
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
|
||||
continue;
|
||||
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
|
||||
(uint64_t)vector)
|
||||
continue;
|
||||
ptdev_intx_ack(vm, pin, PTDEV_VPIN_IOAPIC);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX keep track of the pins associated with this vector instead
|
||||
* of iterating on every single pin each time.
|
||||
*/
|
||||
VIOAPIC_LOCK(vioapic);
|
||||
for (pin = 0; pin < vioapic_pincount(vm); pin++) {
|
||||
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_REM_IRR) == 0)
|
||||
continue;
|
||||
if ((vioapic->rtbl[pin].reg & IOAPIC_RTE_INTVEC) !=
|
||||
(uint64_t)vector)
|
||||
continue;
|
||||
|
||||
vioapic->rtbl[pin].reg &= ~IOAPIC_RTE_REM_IRR;
|
||||
if (vioapic->rtbl[pin].acnt > 0) {
|
||||
dev_dbg(ACRN_DBG_IOAPIC,
|
||||
"ioapic pin%d: asserted at eoi, acnt %d",
|
||||
pin, vioapic->rtbl[pin].acnt);
|
||||
vioapic_send_intr(vioapic, pin);
|
||||
}
|
||||
}
|
||||
VIOAPIC_UNLOCK(vioapic);
|
||||
}
|
||||
|
||||
struct vioapic *
|
||||
vioapic_init(struct vm *vm)
|
||||
{
|
||||
int i;
|
||||
struct vioapic *vioapic;
|
||||
|
||||
vioapic = calloc(1, sizeof(struct vioapic));
|
||||
ASSERT(vioapic != NULL, "");
|
||||
|
||||
vioapic->vm = vm;
|
||||
spinlock_init(&vioapic->mtx);
|
||||
|
||||
/* Initialize all redirection entries to mask all interrupts */
|
||||
for (i = 0; i < vioapic_pincount(vioapic->vm); i++)
|
||||
vioapic->rtbl[i].reg = 0x0001000000010000UL;
|
||||
|
||||
register_mmio_emulation_handler(vm,
|
||||
vioapic_mmio_access_handler,
|
||||
(uint64_t)VIOAPIC_BASE,
|
||||
(uint64_t)VIOAPIC_BASE + VIOAPIC_SIZE,
|
||||
(void *) 0);
|
||||
|
||||
return vioapic;
|
||||
}
|
||||
|
||||
void
|
||||
vioapic_cleanup(struct vioapic *vioapic)
|
||||
{
|
||||
unregister_mmio_emulation_handler(vioapic->vm,
|
||||
(uint64_t)VIOAPIC_BASE,
|
||||
(uint64_t)VIOAPIC_BASE + VIOAPIC_SIZE);
|
||||
free(vioapic);
|
||||
}
|
||||
|
||||
int
|
||||
vioapic_pincount(struct vm *vm)
|
||||
{
|
||||
if (is_vm0(vm))
|
||||
return REDIR_ENTRIES_HW;
|
||||
else
|
||||
return REDIR_ENTRIES_UOS;
|
||||
}
|
||||
|
||||
int vioapic_mmio_access_handler(struct vcpu *vcpu, struct mem_io *mmio,
|
||||
void *handler_private_data)
|
||||
{
|
||||
struct vm *vm = vcpu->vm;
|
||||
uint64_t gpa = mmio->paddr;
|
||||
int ret = 0;
|
||||
|
||||
(void)handler_private_data;
|
||||
|
||||
/* Note all RW to IOAPIC are 32-Bit in size */
|
||||
ASSERT(mmio->access_size == 4,
|
||||
"All RW to LAPIC must be 32-bits in size");
|
||||
|
||||
if (mmio->read_write == HV_MEM_IO_READ) {
|
||||
ret = vioapic_mmio_read(vm,
|
||||
gpa,
|
||||
&mmio->value,
|
||||
mmio->access_size);
|
||||
mmio->mmio_status = MMIO_TRANS_VALID;
|
||||
|
||||
} else if (mmio->read_write == HV_MEM_IO_WRITE) {
|
||||
ret = vioapic_mmio_write(vm,
|
||||
gpa,
|
||||
mmio->value,
|
||||
mmio->access_size);
|
||||
|
||||
mmio->mmio_status = MMIO_TRANS_VALID;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool vioapic_get_rte(struct vm *vm, int pin, void *rte)
|
||||
{
|
||||
struct vioapic *vioapic;
|
||||
|
||||
vioapic = vm_ioapic(vm);
|
||||
if (vioapic && rte) {
|
||||
*(uint64_t *)rte = vioapic->rtbl[pin].reg;
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
|
||||
int get_vioapic_info(char *str, int str_max, int vmid)
|
||||
{
|
||||
int pin, len, size = str_max, vector, delmode;
|
||||
uint64_t rte;
|
||||
uint32_t low, high, dest;
|
||||
bool level, phys, remote_irr, mask;
|
||||
struct vm *vm = get_vm_from_vmid(vmid);
|
||||
|
||||
if (!vm) {
|
||||
len = snprintf(str, size,
|
||||
"\r\nvm is not exist for vmid %d", vmid);
|
||||
size -= len;
|
||||
str += len;
|
||||
goto END;
|
||||
}
|
||||
|
||||
len = snprintf(str, size,
|
||||
"\r\nPIN\tVEC\tDM\tDEST\tTM\tDELM\tIRR\tMASK");
|
||||
size -= len;
|
||||
str += len;
|
||||
|
||||
for (pin = 0 ; pin < vioapic_pincount(vm); pin++) {
|
||||
vioapic_get_rte(vm, pin, (void *)&rte);
|
||||
low = rte;
|
||||
high = rte >> 32;
|
||||
mask = ((low & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET);
|
||||
remote_irr = ((low & IOAPIC_RTE_REM_IRR) == IOAPIC_RTE_REM_IRR);
|
||||
phys = ((low & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY);
|
||||
delmode = low & IOAPIC_RTE_DELMOD;
|
||||
level = low & IOAPIC_RTE_TRGRLVL ? true : false;
|
||||
vector = low & IOAPIC_RTE_INTVEC;
|
||||
dest = high >> APIC_ID_SHIFT;
|
||||
|
||||
len = snprintf(str, size,
|
||||
"\r\n%d\t0x%X\t%s\t0x%X\t%s\t%d\t%d\t%d",
|
||||
pin, vector, phys ? "phys" : "logic",
|
||||
dest, level ? "level" : "edge",
|
||||
delmode >> 8, remote_irr, mask);
|
||||
size -= len;
|
||||
str += len;
|
||||
}
|
||||
END:
|
||||
snprintf(str, size, "\r\n");
|
||||
return 0;
|
||||
}
|
2398
hypervisor/arch/x86/guest/vlapic.c
Normal file
2398
hypervisor/arch/x86/guest/vlapic.c
Normal file
File diff suppressed because it is too large
Load Diff
153
hypervisor/arch/x86/guest/vlapic_priv.h
Normal file
153
hypervisor/arch/x86/guest/vlapic_priv.h
Normal file
@@ -0,0 +1,153 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
|
||||
* Copyright (c) 2017 Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _VLAPIC_PRIV_H_
|
||||
#define _VLAPIC_PRIV_H_
|
||||
|
||||
/*
|
||||
* APIC Register: Offset Description
|
||||
*/
|
||||
#define APIC_OFFSET_ID 0x20 /* Local APIC ID */
|
||||
#define APIC_OFFSET_VER 0x30 /* Local APIC Version */
|
||||
#define APIC_OFFSET_TPR 0x80 /* Task Priority Register */
|
||||
#define APIC_OFFSET_APR 0x90 /* Arbitration Priority */
|
||||
#define APIC_OFFSET_PPR 0xA0 /* Processor Priority Register */
|
||||
#define APIC_OFFSET_EOI 0xB0 /* EOI Register */
|
||||
#define APIC_OFFSET_RRR 0xC0 /* Remote read */
|
||||
#define APIC_OFFSET_LDR 0xD0 /* Logical Destination */
|
||||
#define APIC_OFFSET_DFR 0xE0 /* Destination Format Register */
|
||||
#define APIC_OFFSET_SVR 0xF0 /* Spurious Vector Register */
|
||||
#define APIC_OFFSET_ISR0 0x100 /* In Service Register */
|
||||
#define APIC_OFFSET_ISR1 0x110
|
||||
#define APIC_OFFSET_ISR2 0x120
|
||||
#define APIC_OFFSET_ISR3 0x130
|
||||
#define APIC_OFFSET_ISR4 0x140
|
||||
#define APIC_OFFSET_ISR5 0x150
|
||||
#define APIC_OFFSET_ISR6 0x160
|
||||
#define APIC_OFFSET_ISR7 0x170
|
||||
#define APIC_OFFSET_TMR0 0x180 /* Trigger Mode Register */
|
||||
#define APIC_OFFSET_TMR1 0x190
|
||||
#define APIC_OFFSET_TMR2 0x1A0
|
||||
#define APIC_OFFSET_TMR3 0x1B0
|
||||
#define APIC_OFFSET_TMR4 0x1C0
|
||||
#define APIC_OFFSET_TMR5 0x1D0
|
||||
#define APIC_OFFSET_TMR6 0x1E0
|
||||
#define APIC_OFFSET_TMR7 0x1F0
|
||||
#define APIC_OFFSET_IRR0 0x200 /* Interrupt Request Register */
|
||||
#define APIC_OFFSET_IRR1 0x210
|
||||
#define APIC_OFFSET_IRR2 0x220
|
||||
#define APIC_OFFSET_IRR3 0x230
|
||||
#define APIC_OFFSET_IRR4 0x240
|
||||
#define APIC_OFFSET_IRR5 0x250
|
||||
#define APIC_OFFSET_IRR6 0x260
|
||||
#define APIC_OFFSET_IRR7 0x270
|
||||
#define APIC_OFFSET_ESR 0x280 /* Error Status Register */
|
||||
#define APIC_OFFSET_CMCI_LVT 0x2F0 /* Local Vector Table (CMCI) */
|
||||
#define APIC_OFFSET_ICR_LOW 0x300 /* Interrupt Command Register */
|
||||
#define APIC_OFFSET_ICR_HI 0x310
|
||||
#define APIC_OFFSET_TIMER_LVT 0x320 /* Local Vector Table (Timer) */
|
||||
#define APIC_OFFSET_THERM_LVT 0x330 /* Local Vector Table (Thermal) */
|
||||
#define APIC_OFFSET_PERF_LVT 0x340 /* Local Vector Table (PMC) */
|
||||
#define APIC_OFFSET_LINT0_LVT 0x350 /* Local Vector Table (LINT0) */
|
||||
#define APIC_OFFSET_LINT1_LVT 0x360 /* Local Vector Table (LINT1) */
|
||||
#define APIC_OFFSET_ERROR_LVT 0x370 /* Local Vector Table (ERROR) */
|
||||
#define APIC_OFFSET_TIMER_ICR 0x380 /* Timer's Initial Count */
|
||||
#define APIC_OFFSET_TIMER_CCR 0x390 /* Timer's Current Count */
|
||||
#define APIC_OFFSET_TIMER_DCR 0x3E0 /* Timer's Divide Configuration */
|
||||
#define APIC_OFFSET_SELF_IPI 0x3F0 /* Self IPI register */
|
||||
|
||||
/*
|
||||
* 16 priority levels with at most one vector injected per level.
|
||||
*/
|
||||
#define ISRVEC_STK_SIZE (16 + 1)
|
||||
|
||||
#define VLAPIC_MAXLVT_INDEX APIC_LVT_CMCI
|
||||
|
||||
struct vlapic;
|
||||
|
||||
struct pir_desc {
|
||||
uint64_t pir[4];
|
||||
uint64_t pending;
|
||||
uint64_t unused[3];
|
||||
} __aligned(64);
|
||||
|
||||
struct vlapic_ops {
|
||||
int (*apicv_set_intr_ready)
|
||||
(struct vlapic *vlapic, int vector, bool level);
|
||||
int (*apicv_pending_intr)(struct vlapic *vlapic, int *vecptr);
|
||||
void (*apicv_intr_accepted)(struct vlapic *vlapic, int vector);
|
||||
void (*apicv_post_intr)(struct vlapic *vlapic, int hostcpu);
|
||||
void (*apicv_set_tmr)(struct vlapic *vlapic, int vector, bool level);
|
||||
void (*apicv_batch_set_tmr)(struct vlapic *vlapic);
|
||||
void (*enable_x2apic_mode)(struct vlapic *vlapic);
|
||||
};
|
||||
|
||||
struct vlapic {
|
||||
struct vm *vm;
|
||||
struct vcpu *vcpu;
|
||||
struct lapic *apic_page;
|
||||
struct pir_desc *pir_desc;
|
||||
struct vlapic_ops ops;
|
||||
|
||||
uint32_t esr_pending;
|
||||
int esr_firing;
|
||||
|
||||
struct callout callout; /* vlapic timer */
|
||||
struct bintime timer_fire_bt; /* callout expiry time */
|
||||
struct bintime timer_freq_bt; /* timer frequency */
|
||||
struct bintime timer_period_bt; /* timer period */
|
||||
long last_timer; /* the last timer id */
|
||||
|
||||
spinlock_t timer_mtx;
|
||||
|
||||
/*
|
||||
* The 'isrvec_stk' is a stack of vectors injected by the local apic.
|
||||
* A vector is popped from the stack when the processor does an EOI.
|
||||
* The vector on the top of the stack is used to compute the
|
||||
* Processor Priority in conjunction with the TPR.
|
||||
*/
|
||||
uint8_t isrvec_stk[ISRVEC_STK_SIZE];
|
||||
int isrvec_stk_top;
|
||||
|
||||
uint64_t msr_apicbase;
|
||||
|
||||
/*
|
||||
* Copies of some registers in the virtual APIC page. We do this for
|
||||
* a couple of different reasons:
|
||||
* - to be able to detect what changed (e.g. svr_last)
|
||||
* - to maintain a coherent snapshot of the register (e.g. lvt_last)
|
||||
*/
|
||||
uint32_t svr_last;
|
||||
uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1];
|
||||
struct pir_desc pir;
|
||||
};
|
||||
|
||||
void vlapic_cleanup(struct vlapic *vlapic);
|
||||
|
||||
#endif /* _VLAPIC_PRIV_H_ */
|
324
hypervisor/arch/x86/guest/vm.c
Normal file
324
hypervisor/arch/x86/guest/vm.c
Normal file
@@ -0,0 +1,324 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <bsp_extern.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
/* Local variables */
|
||||
|
||||
/* VMs list */
|
||||
struct list_head vm_list = {
|
||||
.next = &vm_list,
|
||||
.prev = &vm_list,
|
||||
};
|
||||
|
||||
/* Lock for VMs list */
|
||||
spinlock_t vm_list_lock = {
|
||||
.head = 0,
|
||||
.tail = 0
|
||||
};
|
||||
|
||||
/* used for vmid allocation. And this means the max vm number is 64 */
|
||||
static unsigned long vmid_bitmap;
|
||||
|
||||
static void init_vm(struct vm_description *vm_desc,
|
||||
struct vm *vm_handle)
|
||||
{
|
||||
/* Populate VM attributes from VM description */
|
||||
vm_handle->hw.num_vcpus = vm_desc->vm_hw_num_cores;
|
||||
vm_handle->state_info.privilege = vm_desc->vm_state_info_privilege;
|
||||
vm_handle->state_info.boot_count = 0;
|
||||
}
|
||||
|
||||
/* return a pointer to the virtual machine structure associated with
|
||||
* this VM ID
|
||||
*/
|
||||
struct vm *get_vm_from_vmid(int vm_id)
|
||||
{
|
||||
struct vm *vm = NULL;
|
||||
struct list_head *pos;
|
||||
|
||||
spinlock_obtain(&vm_list_lock);
|
||||
list_for_each(pos, &vm_list) {
|
||||
vm = list_entry(pos, struct vm, list);
|
||||
if (vm->attr.id == vm_id) {
|
||||
spinlock_release(&vm_list_lock);
|
||||
return vm;
|
||||
}
|
||||
}
|
||||
spinlock_release(&vm_list_lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int create_vm(struct vm_description *vm_desc, struct vm **rtn_vm)
|
||||
{
|
||||
unsigned int id;
|
||||
struct vm *vm;
|
||||
int status = 0;
|
||||
|
||||
if ((vm_desc == NULL) || (rtn_vm == NULL))
|
||||
status = -EINVAL;
|
||||
|
||||
if (status == 0) {
|
||||
/* Allocate memory for virtual machine */
|
||||
vm = calloc(1, sizeof(struct vm));
|
||||
ASSERT(vm != NULL, "vm allocation failed");
|
||||
|
||||
/*
|
||||
* Map Virtual Machine to its VM Description
|
||||
*/
|
||||
init_vm(vm_desc, vm);
|
||||
|
||||
|
||||
/* Init mmio list */
|
||||
INIT_LIST_HEAD(&vm->mmio_list);
|
||||
|
||||
if (vm->hw.num_vcpus == 0)
|
||||
vm->hw.num_vcpus = phy_cpu_num;
|
||||
|
||||
vm->hw.vcpu_array =
|
||||
calloc(1, sizeof(struct vcpu *) * vm->hw.num_vcpus);
|
||||
ASSERT(vm->hw.vcpu_array != NULL,
|
||||
"vcpu_array allocation failed");
|
||||
|
||||
for (id = 0; id < sizeof(long) * 8; id++)
|
||||
if (bitmap_test_and_set(id, &vmid_bitmap) == 0)
|
||||
break;
|
||||
vm->attr.id = vm->attr.boot_idx = id;
|
||||
snprintf(&vm->attr.name[0], MAX_VM_NAME_LEN, "vm_%d",
|
||||
vm->attr.id);
|
||||
|
||||
atomic_store_rel_int(&vm->hw.created_vcpus, 0);
|
||||
|
||||
/* gpa_lowtop are used for system start up */
|
||||
vm->hw.gpa_lowtop = 0;
|
||||
/* Only for SOS: Configure VM software information */
|
||||
/* For UOS: This VM software information is configure in DM */
|
||||
if (is_vm0(vm)) {
|
||||
prepare_vm0_memmap_and_e820(vm);
|
||||
#ifndef CONFIG_EFI_STUB
|
||||
status = init_vm0_boot_info(vm);
|
||||
#endif
|
||||
} else {
|
||||
/* populate UOS vm fields according to vm_desc */
|
||||
vm->secure_world_enabled =
|
||||
vm_desc->secure_world_enabled;
|
||||
memcpy_s(&vm->GUID[0], sizeof(vm->GUID),
|
||||
&vm_desc->GUID[0],
|
||||
sizeof(vm_desc->GUID));
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&vm->list);
|
||||
spinlock_obtain(&vm_list_lock);
|
||||
list_add(&vm->list, &vm_list);
|
||||
spinlock_release(&vm_list_lock);
|
||||
|
||||
/* Ensure VM software information obtained */
|
||||
if (status == 0) {
|
||||
|
||||
/* Set up IO bit-mask such that VM exit occurs on
|
||||
* selected IO ranges
|
||||
*/
|
||||
setup_io_bitmap(vm);
|
||||
|
||||
/* Create virtual uart */
|
||||
if (is_vm0(vm))
|
||||
vm->vuart = vuart_init(vm);
|
||||
|
||||
vm->vpic = vpic_init(vm);
|
||||
|
||||
/* vpic wire_mode default is INTR */
|
||||
vm->vpic_wire_mode = VPIC_WIRE_INTR;
|
||||
|
||||
/* Allocate full emulated vIOAPIC instance */
|
||||
vm->arch_vm.virt_ioapic = vioapic_init(vm);
|
||||
|
||||
/* Populate return VM handle */
|
||||
*rtn_vm = vm;
|
||||
ptdev_vm_init(vm);
|
||||
vm->sw.req_buf = 0;
|
||||
|
||||
vm->state = VM_CREATED;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Return status to caller */
|
||||
return status;
|
||||
}
|
||||
|
||||
int shutdown_vm(struct vm *vm)
|
||||
{
|
||||
int i, status = 0;
|
||||
struct vcpu *vcpu = NULL;
|
||||
|
||||
if (vm == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
pause_vm(vm);
|
||||
|
||||
/* Only allow shutdown paused vm */
|
||||
if (vm->state != VM_PAUSED)
|
||||
return -EINVAL;
|
||||
|
||||
foreach_vcpu(i, vm, vcpu) {
|
||||
reset_vcpu(vcpu);
|
||||
destroy_vcpu(vcpu);
|
||||
}
|
||||
|
||||
spinlock_obtain(&vm_list_lock);
|
||||
list_del_init(&vm->list);
|
||||
spinlock_release(&vm_list_lock);
|
||||
|
||||
ptdev_vm_deinit(vm);
|
||||
|
||||
/* cleanup and free vioapic */
|
||||
vioapic_cleanup(vm->arch_vm.virt_ioapic);
|
||||
|
||||
/* Free EPT allocated resources assigned to VM */
|
||||
destroy_ept(vm);
|
||||
|
||||
/* Free MSR bitmap */
|
||||
free(vm->arch_vm.msr_bitmap);
|
||||
|
||||
/* TODO: De-initialize I/O Emulation */
|
||||
free_io_emulation_resource(vm);
|
||||
|
||||
/* Free iommu_domain */
|
||||
if (vm->iommu_domain)
|
||||
destroy_iommu_domain(vm->iommu_domain);
|
||||
|
||||
bitmap_clr(vm->attr.id, &vmid_bitmap);
|
||||
|
||||
if (vm->vpic)
|
||||
vpic_cleanup(vm);
|
||||
|
||||
free(vm->hw.vcpu_array);
|
||||
|
||||
/* TODO: De-Configure HV-SW */
|
||||
/* Deallocate VM */
|
||||
free(vm);
|
||||
|
||||
/* Return status to caller */
|
||||
return status;
|
||||
}
|
||||
|
||||
int start_vm(struct vm *vm)
|
||||
{
|
||||
struct vcpu *vcpu = NULL;
|
||||
|
||||
vm->state = VM_STARTED;
|
||||
|
||||
/* Only start BSP (vid = 0) and let BSP start other APs */
|
||||
vcpu = vcpu_from_vid(vm, 0);
|
||||
ASSERT(vcpu != NULL, "vm%d, vcpu0", vm->attr.id);
|
||||
schedule_vcpu(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* DM only pause vm for shutdown/reboot. If we need to
|
||||
* extend the pause vm for DM, this API should be extended.
|
||||
*/
|
||||
int pause_vm(struct vm *vm)
|
||||
{
|
||||
int i;
|
||||
struct vcpu *vcpu = NULL;
|
||||
|
||||
if (vm->state == VM_PAUSED)
|
||||
return 0;
|
||||
|
||||
vm->state = VM_PAUSED;
|
||||
|
||||
foreach_vcpu(i, vm, vcpu)
|
||||
pause_vcpu(vcpu, VCPU_ZOMBIE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vm_resume(struct vm *vm)
|
||||
{
|
||||
int i;
|
||||
struct vcpu *vcpu = NULL;
|
||||
|
||||
foreach_vcpu(i, vm, vcpu)
|
||||
resume_vcpu(vcpu);
|
||||
|
||||
vm->state = VM_STARTED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Finally, we will remove the array and only maintain vm0 desc */
|
||||
struct vm_description *get_vm_desc(int idx)
|
||||
{
|
||||
struct vm_description_array *vm_desc_array;
|
||||
|
||||
/* Obtain base of user defined VM description array data
|
||||
* structure
|
||||
*/
|
||||
vm_desc_array = (struct vm_description_array *)get_vm_desc_base();
|
||||
/* Obtain VM description array base */
|
||||
if (idx >= vm_desc_array->num_vm_desc)
|
||||
return NULL;
|
||||
else
|
||||
return &vm_desc_array->vm_desc_array[idx];
|
||||
}
|
||||
|
||||
/* Create vm/vcpu for vm0 */
|
||||
int prepare_vm0(void)
|
||||
{
|
||||
int i, ret;
|
||||
struct vm *vm = NULL;
|
||||
struct vm_description *vm_desc = NULL;
|
||||
|
||||
vm_desc = get_vm_desc(0);
|
||||
ASSERT(vm_desc, "get vm desc failed");
|
||||
ret = create_vm(vm_desc, &vm);
|
||||
ASSERT(ret == 0, "VM creation failed!");
|
||||
|
||||
prepare_vcpu(vm, vm_desc->vm_hw_logical_core_ids[0]);
|
||||
|
||||
/* Prepare the AP for vm0 */
|
||||
for (i = 1; i < vm_desc->vm_hw_num_cores; i++)
|
||||
prepare_vcpu(vm, vm_desc->vm_hw_logical_core_ids[i]);
|
||||
|
||||
/* start vm0 BSP automatically */
|
||||
start_vm(vm);
|
||||
|
||||
pr_fatal("Start VM0");
|
||||
|
||||
return 0;
|
||||
}
|
148
hypervisor/arch/x86/guest/vmcall.c
Normal file
148
hypervisor/arch/x86/guest/vmcall.c
Normal file
@@ -0,0 +1,148 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
#include <acrn_hv_defs.h>
|
||||
#include <hypercall.h>
|
||||
|
||||
int vmcall_handler(struct vcpu *vcpu)
|
||||
{
|
||||
int64_t ret = 0;
|
||||
struct vm *vm = vcpu->vm;
|
||||
struct run_context *cur_context =
|
||||
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
|
||||
/* hypercall ID from guest*/
|
||||
uint64_t hypcall_id = cur_context->guest_cpu_regs.regs.r8;
|
||||
/* hypercall param1 from guest*/
|
||||
uint64_t param1 = cur_context->guest_cpu_regs.regs.rdi;
|
||||
/* hypercall param2 from guest*/
|
||||
uint64_t param2 = cur_context->guest_cpu_regs.regs.rsi;
|
||||
/* hypercall param3 from guest, reserved*/
|
||||
/* uint64_t param3 = cur_context->guest_cpu_regs.regs.rdx; */
|
||||
/* hypercall param4 from guest, reserved*/
|
||||
/* uint64_t param4 = cur_context->guest_cpu_regs.regs.rcx; */
|
||||
|
||||
/* Dispatch the hypercall handler */
|
||||
switch (hypcall_id) {
|
||||
case HC_GET_API_VERSION:
|
||||
ret = hcall_get_api_version(vm, param1);
|
||||
break;
|
||||
|
||||
case HC_CREATE_VM:
|
||||
ret = hcall_create_vm(vm, param1);
|
||||
break;
|
||||
|
||||
case HC_DESTROY_VM:
|
||||
ret = hcall_destroy_vm(param1);
|
||||
break;
|
||||
|
||||
case HC_START_VM:
|
||||
ret = hcall_resume_vm(param1);
|
||||
break;
|
||||
|
||||
case HC_PAUSE_VM:
|
||||
ret = hcall_pause_vm(param1);
|
||||
break;
|
||||
|
||||
case HC_CREATE_VCPU:
|
||||
ret = hcall_create_vcpu(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_ASSERT_IRQLINE:
|
||||
ret = hcall_assert_irqline(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_DEASSERT_IRQLINE:
|
||||
ret = hcall_deassert_irqline(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_PULSE_IRQLINE:
|
||||
ret = hcall_pulse_irqline(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_INJECT_MSI:
|
||||
ret = hcall_inject_msi(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_SET_IOREQ_BUFFER:
|
||||
ret = hcall_set_ioreq_buffer(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_NOTIFY_REQUEST_FINISH:
|
||||
ret = hcall_notify_req_finish(param1, param2);
|
||||
break;
|
||||
|
||||
case HC_VM_SET_MEMMAP:
|
||||
ret = hcall_set_vm_memmap(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_VM_PCI_MSIX_REMAP:
|
||||
ret = hcall_remap_pci_msix(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_VM_GPA2HPA:
|
||||
ret = hcall_gpa_to_hpa(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_ASSIGN_PTDEV:
|
||||
ret = hcall_assign_ptdev(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_DEASSIGN_PTDEV:
|
||||
ret = hcall_deassign_ptdev(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_SET_PTDEV_INTR_INFO:
|
||||
ret = hcall_set_ptdev_intr_info(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_RESET_PTDEV_INTR_INFO:
|
||||
ret = hcall_reset_ptdev_intr_info(vm, param1, param2);
|
||||
break;
|
||||
|
||||
case HC_SETUP_SBUF:
|
||||
ret = hcall_setup_sbuf(vm, param1);
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_err("op %d: Invalid hypercall\n", hypcall_id);
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
cur_context->guest_cpu_regs.regs.rax = ret;
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_VMCALL, vm->attr.id, hypcall_id);
|
||||
|
||||
return 0;
|
||||
}
|
321
hypervisor/arch/x86/guest/vmsr.c
Normal file
321
hypervisor/arch/x86/guest/vmsr.c
Normal file
@@ -0,0 +1,321 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
/*MRS need to be emulated, the order in this array better as freq of ops*/
|
||||
static const uint32_t emulated_msrs[] = {
|
||||
MSR_IA32_TSC_DEADLINE, /* Enable TSC_DEADLINE VMEXIT */
|
||||
|
||||
/* following MSR not emulated now */
|
||||
/*
|
||||
* MSR_IA32_APIC_BASE,
|
||||
* MSR_IA32_SYSENTER_CS,
|
||||
* MSR_IA32_SYSENTER_ESP,
|
||||
* MSR_IA32_SYSENTER_EIP,
|
||||
* MSR_IA32_TSC_AUX,
|
||||
* MSR_IA32_TIME_STAMP_COUNTER,
|
||||
*/
|
||||
};
|
||||
|
||||
/* the index is matched with emulated msrs array*/
|
||||
enum {
|
||||
IDX_TSC_DEADLINE,
|
||||
|
||||
IDX_MAX_MSR
|
||||
};
|
||||
|
||||
static void enable_msr_interception(uint8_t *bitmap, uint32_t msr)
|
||||
{
|
||||
uint8_t *read_map;
|
||||
uint8_t *write_map;
|
||||
uint8_t value;
|
||||
/* low MSR */
|
||||
if (msr < 0x1FFF) {
|
||||
read_map = bitmap;
|
||||
write_map = bitmap + 2048;
|
||||
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
|
||||
read_map = bitmap + 1024;
|
||||
write_map = bitmap + 3072;
|
||||
} else {
|
||||
pr_err("Invalid MSR");
|
||||
return;
|
||||
}
|
||||
|
||||
msr &= 0x1FFF;
|
||||
value = read_map[(msr>>3)];
|
||||
value |= 1<<(msr%8);
|
||||
/* right now we trap for both r/w */
|
||||
read_map[(msr>>3)] = value;
|
||||
write_map[(msr>>3)] = value;
|
||||
}
|
||||
|
||||
/* not used now just leave it for some cases it may be used as API*/
|
||||
void disable_msr_interception(uint8_t *bitmap, uint32_t msr)
|
||||
{
|
||||
uint8_t *read_map;
|
||||
uint8_t *write_map;
|
||||
uint8_t value;
|
||||
/* low MSR */
|
||||
if (msr < 0x1FFF) {
|
||||
read_map = bitmap;
|
||||
write_map = bitmap + 2048;
|
||||
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
|
||||
read_map = bitmap + 1024;
|
||||
write_map = bitmap + 3072;
|
||||
} else {
|
||||
pr_err("Invalid MSR");
|
||||
return;
|
||||
}
|
||||
|
||||
msr &= 0x1FFF;
|
||||
value = read_map[(msr>>3)];
|
||||
value &= ~(1<<(msr%8));
|
||||
/* right now we trap for both r/w */
|
||||
read_map[(msr>>3)] = value;
|
||||
write_map[(msr>>3)] = value;
|
||||
}
|
||||
|
||||
void init_msr_emulation(struct vcpu *vcpu)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
uint32_t msrs_count = ARRAY_SIZE(emulated_msrs);
|
||||
void *msr_bitmap;
|
||||
uint64_t value64;
|
||||
|
||||
ASSERT(msrs_count == IDX_MAX_MSR,
|
||||
"MSR ID should be matched with emulated_msrs");
|
||||
|
||||
/*msr bitmap, just allocated/init once, and used for all vm's vcpu*/
|
||||
if (is_vcpu_bsp(vcpu)) {
|
||||
|
||||
/* Allocate and initialize memory for MSR bitmap region*/
|
||||
vcpu->vm->arch_vm.msr_bitmap = alloc_page();
|
||||
ASSERT(vcpu->vm->arch_vm.msr_bitmap, "");
|
||||
memset(vcpu->vm->arch_vm.msr_bitmap, 0x0, CPU_PAGE_SIZE);
|
||||
|
||||
msr_bitmap = vcpu->vm->arch_vm.msr_bitmap;
|
||||
|
||||
for (i = 0; i < msrs_count; i++)
|
||||
enable_msr_interception(msr_bitmap, emulated_msrs[i]);
|
||||
|
||||
/* below MSR protected from guest OS, if access to inject gp*/
|
||||
enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_CAP);
|
||||
enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_DEF_TYPE);
|
||||
|
||||
for (i = MSR_IA32_MTRR_PHYSBASE_0;
|
||||
i <= MSR_IA32_MTRR_PHYSMASK_9; i++) {
|
||||
enable_msr_interception(msr_bitmap, i);
|
||||
}
|
||||
|
||||
enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX64K_00000);
|
||||
enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX16K_80000);
|
||||
enable_msr_interception(msr_bitmap, MSR_IA32_MTRR_FIX16K_A0000);
|
||||
|
||||
for (i = MSR_IA32_MTRR_FIX4K_C0000;
|
||||
i <= MSR_IA32_MTRR_FIX4K_F8000; i++) {
|
||||
enable_msr_interception(msr_bitmap, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Set up MSR bitmap - pg 2904 24.6.9 */
|
||||
value64 = (int64_t) vcpu->vm->arch_vm.msr_bitmap;
|
||||
exec_vmwrite64(VMX_MSR_BITMAP_FULL, value64);
|
||||
pr_dbg("VMX_MSR_BITMAP: 0x%016llx ", value64);
|
||||
|
||||
vcpu->guest_msrs = (uint64_t *)calloc(msrs_count, sizeof(uint64_t));
|
||||
|
||||
ASSERT(vcpu->guest_msrs != NULL, "");
|
||||
memset(vcpu->guest_msrs, 0, msrs_count * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
int rdmsr_handler(struct vcpu *vcpu)
|
||||
{
|
||||
uint32_t msr;
|
||||
uint64_t v = 0;
|
||||
uint32_t id;
|
||||
int cur_context = vcpu->arch_vcpu.cur_context;
|
||||
|
||||
/* Read the msr value */
|
||||
msr = vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rcx;
|
||||
|
||||
/* Do the required processing for each msr case */
|
||||
switch (msr) {
|
||||
case MSR_IA32_TSC_DEADLINE:
|
||||
{
|
||||
v = vcpu->guest_msrs[IDX_TSC_DEADLINE];
|
||||
break;
|
||||
}
|
||||
|
||||
case MSR_IA32_MTRR_CAP:
|
||||
case MSR_IA32_MTRR_DEF_TYPE:
|
||||
case MSR_IA32_MTRR_PHYSBASE_0 ... MSR_IA32_MTRR_PHYSMASK_9:
|
||||
case MSR_IA32_MTRR_FIX64K_00000 ... MSR_IA32_MTRR_FIX4K_F8000:
|
||||
{
|
||||
vcpu_inject_gp(vcpu);
|
||||
break;
|
||||
}
|
||||
|
||||
/* following MSR not emulated now just left for future */
|
||||
case MSR_IA32_SYSENTER_CS:
|
||||
{
|
||||
v = exec_vmread(VMX_GUEST_IA32_SYSENTER_CS);
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_SYSENTER_ESP:
|
||||
{
|
||||
v = exec_vmread(VMX_GUEST_IA32_SYSENTER_ESP);
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_SYSENTER_EIP:
|
||||
{
|
||||
v = exec_vmread(VMX_GUEST_IA32_SYSENTER_EIP);
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_TSC_AUX:
|
||||
{
|
||||
v = vcpu->arch_vcpu.msr_tsc_aux;
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_TIME_STAMP_COUNTER:
|
||||
{
|
||||
/* Read the host TSC value */
|
||||
CPU_RDTSCP_EXECUTE(&v, &id);
|
||||
|
||||
/* Add the TSC_offset to host TSC and return the value */
|
||||
v += exec_vmread64(VMX_TSC_OFFSET_FULL);
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_APIC_BASE:
|
||||
{
|
||||
bool ret;
|
||||
/* Read APIC base */
|
||||
vlapic_rdmsr(vcpu, msr, &v, &ret);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
pr_warn("rdmsr: %lx should not come here!", msr);
|
||||
v = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Store the MSR contents in RAX and RDX */
|
||||
vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rax =
|
||||
v & 0xffffffff;
|
||||
vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rdx = v >> 32;
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_RDMSR, msr, v);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int wrmsr_handler(struct vcpu *vcpu)
|
||||
{
|
||||
uint32_t msr;
|
||||
uint64_t v;
|
||||
struct run_context *cur_context =
|
||||
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
|
||||
|
||||
/* Read the MSR ID */
|
||||
msr = cur_context->guest_cpu_regs.regs.rcx;
|
||||
|
||||
/* Get the MSR contents */
|
||||
v = (((uint64_t) cur_context->guest_cpu_regs.regs.rdx) << 32) |
|
||||
((uint64_t) cur_context->guest_cpu_regs.regs.rax);
|
||||
|
||||
/* Do the required processing for each msr case */
|
||||
switch (msr) {
|
||||
case MSR_IA32_TSC_DEADLINE:
|
||||
{
|
||||
bool ret;
|
||||
/* Write APIC base */
|
||||
vlapic_wrmsr(vcpu, msr, v, &ret);
|
||||
vcpu->guest_msrs[IDX_TSC_DEADLINE] = v;
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_MTRR_CAP:
|
||||
case MSR_IA32_MTRR_DEF_TYPE:
|
||||
case MSR_IA32_MTRR_PHYSBASE_0 ... MSR_IA32_MTRR_PHYSMASK_9:
|
||||
case MSR_IA32_MTRR_FIX64K_00000 ... MSR_IA32_MTRR_FIX4K_F8000:
|
||||
{
|
||||
vcpu_inject_gp(vcpu);
|
||||
break;
|
||||
}
|
||||
|
||||
/* following MSR not emulated now just left for future */
|
||||
case MSR_IA32_SYSENTER_CS:
|
||||
{
|
||||
exec_vmwrite(VMX_GUEST_IA32_SYSENTER_CS, v);
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_SYSENTER_ESP:
|
||||
{
|
||||
exec_vmwrite(VMX_GUEST_IA32_SYSENTER_ESP, v);
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_SYSENTER_EIP:
|
||||
{
|
||||
exec_vmwrite(VMX_GUEST_IA32_SYSENTER_EIP, v);
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_GS_BASE:
|
||||
{
|
||||
exec_vmwrite(VMX_GUEST_GS_BASE, v);
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_TSC_AUX:
|
||||
{
|
||||
vcpu->arch_vcpu.msr_tsc_aux = v;
|
||||
break;
|
||||
}
|
||||
case MSR_IA32_APIC_BASE:
|
||||
{
|
||||
bool ret;
|
||||
/* Write APIC base */
|
||||
vlapic_wrmsr(vcpu, msr, v, &ret);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
ASSERT(0, "wrmsr: %lx should not come here!", msr);
|
||||
msr_write(msr, v);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_WRMSR, msr, v);
|
||||
|
||||
return 0;
|
||||
}
|
950
hypervisor/arch/x86/guest/vpic.c
Normal file
950
hypervisor/arch/x86/guest/vpic.c
Normal file
@@ -0,0 +1,950 @@
|
||||
/*-
|
||||
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
|
||||
* Copyright (c) 2017 Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "vpic: " fmt
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
#define VPIC_LOCK_INIT(vpic) spinlock_init(&((vpic)->lock))
|
||||
#define VPIC_LOCK(vpic) spinlock_obtain(&((vpic)->lock))
|
||||
#define VPIC_UNLOCK(vpic) spinlock_release(&((vpic)->lock))
|
||||
/* TODO: add spinlock_locked support? */
|
||||
/*#define VPIC_LOCKED(vpic) spinlock_locked(&((vpic)->lock))*/
|
||||
|
||||
#define vm_pic(vm) (vm->vpic)
|
||||
|
||||
#define true 1
|
||||
#define false 0
|
||||
|
||||
#define ACRN_DBG_PIC 6
|
||||
|
||||
enum irqstate {
|
||||
IRQSTATE_ASSERT,
|
||||
IRQSTATE_DEASSERT,
|
||||
IRQSTATE_PULSE
|
||||
};
|
||||
|
||||
struct pic {
|
||||
bool ready;
|
||||
int icw_num;
|
||||
int rd_cmd_reg;
|
||||
|
||||
bool aeoi;
|
||||
bool poll;
|
||||
bool rotate;
|
||||
bool sfn; /* special fully-nested mode */
|
||||
|
||||
int irq_base;
|
||||
uint8_t request; /* Interrupt Request Register (IIR) */
|
||||
uint8_t service; /* Interrupt Service (ISR) */
|
||||
uint8_t mask; /* Interrupt Mask Register (IMR) */
|
||||
uint8_t smm; /* special mask mode */
|
||||
|
||||
int acnt[8]; /* sum of pin asserts and deasserts */
|
||||
int lowprio; /* lowest priority irq */
|
||||
|
||||
bool intr_raised;
|
||||
uint8_t elc;
|
||||
};
|
||||
|
||||
struct vpic {
|
||||
struct vm *vm;
|
||||
spinlock_t lock;
|
||||
struct pic pic[2];
|
||||
};
|
||||
|
||||
/*
|
||||
* Loop over all the pins in priority order from highest to lowest.
|
||||
*/
|
||||
#define PIC_PIN_FOREACH(pinvar, pic, tmpvar) \
|
||||
for (tmpvar = 0, pinvar = (pic->lowprio + 1) & 0x7; \
|
||||
tmpvar < 8; \
|
||||
tmpvar++, pinvar = (pinvar + 1) & 0x7)
|
||||
|
||||
static void vpic_set_pinstate(struct vpic *vpic, int pin, bool newstate);
|
||||
|
||||
static inline bool master_pic(struct vpic *vpic, struct pic *pic)
|
||||
{
|
||||
|
||||
if (pic == &vpic->pic[0])
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int vpic_get_highest_isrpin(struct pic *pic)
|
||||
{
|
||||
int bit, pin;
|
||||
int i;
|
||||
|
||||
PIC_PIN_FOREACH(pin, pic, i) {
|
||||
bit = (1 << pin);
|
||||
|
||||
if (pic->service & bit) {
|
||||
/*
|
||||
* An IS bit that is masked by an IMR bit will not be
|
||||
* cleared by a non-specific EOI in Special Mask Mode.
|
||||
*/
|
||||
if (pic->smm && (pic->mask & bit) != 0)
|
||||
continue;
|
||||
else
|
||||
return pin;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int vpic_get_highest_irrpin(struct pic *pic)
|
||||
{
|
||||
int serviced;
|
||||
int bit, pin, tmp;
|
||||
|
||||
/*
|
||||
* In 'Special Fully-Nested Mode' when an interrupt request from
|
||||
* a slave is in service, the slave is not locked out from the
|
||||
* master's priority logic.
|
||||
*/
|
||||
serviced = pic->service;
|
||||
if (pic->sfn)
|
||||
serviced &= ~(1 << 2);
|
||||
|
||||
/*
|
||||
* In 'Special Mask Mode', when a mask bit is set in OCW1 it inhibits
|
||||
* further interrupts at that level and enables interrupts from all
|
||||
* other levels that are not masked. In other words the ISR has no
|
||||
* bearing on the levels that can generate interrupts.
|
||||
*/
|
||||
if (pic->smm)
|
||||
serviced = 0;
|
||||
|
||||
PIC_PIN_FOREACH(pin, pic, tmp) {
|
||||
bit = 1 << pin;
|
||||
|
||||
/*
|
||||
* If there is already an interrupt in service at the same
|
||||
* or higher priority then bail.
|
||||
*/
|
||||
if ((serviced & bit) != 0)
|
||||
break;
|
||||
|
||||
/*
|
||||
* If an interrupt is asserted and not masked then return
|
||||
* the corresponding 'pin' to the caller.
|
||||
*/
|
||||
if ((pic->request & bit) != 0 && (pic->mask & bit) == 0)
|
||||
return pin;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void vpic_notify_intr(struct vpic *vpic)
|
||||
{
|
||||
struct pic *pic;
|
||||
int pin;
|
||||
|
||||
/*
|
||||
* First check the slave.
|
||||
*/
|
||||
pic = &vpic->pic[1];
|
||||
pin = vpic_get_highest_irrpin(pic);
|
||||
if (!pic->intr_raised && pin != -1) {
|
||||
dev_dbg(ACRN_DBG_PIC,
|
||||
"pic slave notify pin = %d (imr 0x%x irr 0x%x isr 0x%x)\n",
|
||||
pin, pic->mask, pic->request, pic->service);
|
||||
|
||||
/*
|
||||
* Cascade the request from the slave to the master.
|
||||
*/
|
||||
pic->intr_raised = true;
|
||||
vpic_set_pinstate(vpic, 2, true);
|
||||
vpic_set_pinstate(vpic, 2, false);
|
||||
} else {
|
||||
dev_dbg(ACRN_DBG_PIC,
|
||||
"pic slave no eligible interrupt (imr 0x%x irr 0x%x isr 0x%x)",
|
||||
pic->mask, pic->request, pic->service);
|
||||
}
|
||||
|
||||
/*
|
||||
* Then check the master.
|
||||
*/
|
||||
pic = &vpic->pic[0];
|
||||
pin = vpic_get_highest_irrpin(pic);
|
||||
if (!pic->intr_raised && pin != -1) {
|
||||
dev_dbg(ACRN_DBG_PIC,
|
||||
"pic master notify pin = %d (imr 0x%x irr 0x%x isr 0x%x)\n",
|
||||
pin, pic->mask, pic->request, pic->service);
|
||||
|
||||
/*
|
||||
* From Section 3.6.2, "Interrupt Modes", in the
|
||||
* MPtable Specification, Version 1.4
|
||||
*
|
||||
* PIC interrupts are routed to both the Local APIC
|
||||
* and the I/O APIC to support operation in 1 of 3
|
||||
* modes.
|
||||
*
|
||||
* 1. Legacy PIC Mode: the PIC effectively bypasses
|
||||
* all APIC components. In this mode the local APIC is
|
||||
* disabled and LINT0 is reconfigured as INTR to
|
||||
* deliver the PIC interrupt directly to the CPU.
|
||||
*
|
||||
* 2. Virtual Wire Mode: the APIC is treated as a
|
||||
* virtual wire which delivers interrupts from the PIC
|
||||
* to the CPU. In this mode LINT0 is programmed as
|
||||
* ExtINT to indicate that the PIC is the source of
|
||||
* the interrupt.
|
||||
*
|
||||
* 3. Virtual Wire Mode via I/O APIC: PIC interrupts are
|
||||
* fielded by the I/O APIC and delivered to the appropriate
|
||||
* CPU. In this mode the I/O APIC input 0 is programmed
|
||||
* as ExtINT to indicate that the PIC is the source of the
|
||||
* interrupt.
|
||||
*/
|
||||
pic->intr_raised = true;
|
||||
if (vpic->vm->vpic_wire_mode == VPIC_WIRE_INTR) {
|
||||
struct vcpu *vcpu = vcpu_from_vid(vpic->vm, 0);
|
||||
|
||||
ASSERT(vcpu != NULL, "vm%d, vcpu0", vpic->vm->attr.id);
|
||||
vcpu_inject_extint(vcpu);
|
||||
} else {
|
||||
vlapic_set_local_intr(vpic->vm, -1, APIC_LVT_LINT0);
|
||||
/* notify vioapic pin0 if existing
|
||||
* For vPIC + vIOAPIC mode, vpic master irq connected
|
||||
* to vioapic pin0 (irq2)
|
||||
* From MPSpec session 5.1
|
||||
*/
|
||||
vioapic_pulse_irq(vpic->vm, 0);
|
||||
}
|
||||
} else {
|
||||
dev_dbg(ACRN_DBG_PIC,
|
||||
"pic master no eligible interrupt (imr 0x%x irr 0x%x isr 0x%x)",
|
||||
pic->mask, pic->request, pic->service);
|
||||
}
|
||||
}
|
||||
|
||||
static int vpic_icw1(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
|
||||
{
|
||||
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw1 0x%x\n",
|
||||
vpic->vm, val);
|
||||
|
||||
pic->ready = false;
|
||||
|
||||
pic->icw_num = 1;
|
||||
pic->request = 0;
|
||||
pic->mask = 0;
|
||||
pic->lowprio = 7;
|
||||
pic->rd_cmd_reg = 0;
|
||||
pic->poll = 0;
|
||||
pic->smm = 0;
|
||||
|
||||
if ((val & ICW1_SNGL) != 0) {
|
||||
dev_dbg(ACRN_DBG_PIC, "vpic cascade mode required\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((val & ICW1_IC4) == 0) {
|
||||
dev_dbg(ACRN_DBG_PIC, "vpic icw4 required\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
pic->icw_num++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vpic_icw2(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
|
||||
{
|
||||
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw2 0x%x\n",
|
||||
vpic->vm, val);
|
||||
|
||||
pic->irq_base = val & 0xf8;
|
||||
|
||||
pic->icw_num++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vpic_icw3(__unused struct vpic *vpic, struct pic *pic,
|
||||
__unused uint8_t val)
|
||||
{
|
||||
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw3 0x%x\n",
|
||||
vpic->vm, val);
|
||||
|
||||
pic->icw_num++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vpic_icw4(struct vpic *vpic, struct pic *pic, uint8_t val)
|
||||
{
|
||||
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic icw4 0x%x\n",
|
||||
vpic->vm, val);
|
||||
|
||||
if ((val & ICW4_8086) == 0) {
|
||||
dev_dbg(ACRN_DBG_PIC,
|
||||
"vpic microprocessor mode required\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((val & ICW4_AEOI) != 0)
|
||||
pic->aeoi = true;
|
||||
|
||||
if ((val & ICW4_SFNM) != 0) {
|
||||
if (master_pic(vpic, pic)) {
|
||||
pic->sfn = true;
|
||||
} else {
|
||||
dev_dbg(ACRN_DBG_PIC,
|
||||
"Ignoring special fully nested mode on slave pic: %#x",
|
||||
val);
|
||||
}
|
||||
}
|
||||
|
||||
pic->icw_num = 0;
|
||||
pic->ready = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool vpic_is_pin_mask(struct vpic *vpic, uint8_t virt_pin)
|
||||
{
|
||||
struct pic *pic;
|
||||
|
||||
if (virt_pin < 8)
|
||||
pic = &vpic->pic[0];
|
||||
else if (virt_pin < 16) {
|
||||
pic = &vpic->pic[1];
|
||||
virt_pin -= 8;
|
||||
} else
|
||||
return true;
|
||||
|
||||
if (pic->mask & (1 << virt_pin))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static int vpic_ocw1(struct vpic *vpic, struct pic *pic, uint8_t val)
|
||||
{
|
||||
int pin, i, bit;
|
||||
uint8_t old = pic->mask;
|
||||
|
||||
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw1 0x%x\n",
|
||||
vpic->vm, val);
|
||||
|
||||
pic->mask = val & 0xff;
|
||||
|
||||
/* query and setup if pin/irq is for passthrough device */
|
||||
PIC_PIN_FOREACH(pin, pic, i) {
|
||||
bit = (1 << pin);
|
||||
|
||||
/* remap for active: interrupt mask -> unmask
|
||||
* remap for deactive: when vIOAPIC take it over
|
||||
*/
|
||||
if (((pic->mask & bit) == 0) && (old & bit)) {
|
||||
struct ptdev_intx_info intx;
|
||||
|
||||
/* master pic pin2 connect with slave pic,
|
||||
* not device, so not need pt remap
|
||||
*/
|
||||
if ((pin == 2) && master_pic(vpic, pic))
|
||||
continue;
|
||||
|
||||
intx.virt_pin = pin;
|
||||
intx.vpin_src = PTDEV_VPIN_PIC;
|
||||
if (!master_pic(vpic, pic))
|
||||
intx.virt_pin += 8;
|
||||
ptdev_intx_pin_remap(vpic->vm, &intx);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vpic_ocw2(struct vpic *vpic, struct pic *pic, uint8_t val)
|
||||
{
|
||||
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw2 0x%x\n",
|
||||
vpic->vm, val);
|
||||
|
||||
pic->rotate = ((val & OCW2_R) != 0);
|
||||
|
||||
if ((val & OCW2_EOI) != 0) {
|
||||
int isr_bit;
|
||||
|
||||
if ((val & OCW2_SL) != 0) {
|
||||
/* specific EOI */
|
||||
isr_bit = val & 0x7;
|
||||
} else {
|
||||
/* non-specific EOI */
|
||||
isr_bit = vpic_get_highest_isrpin(pic);
|
||||
}
|
||||
|
||||
if (isr_bit != -1) {
|
||||
pic->service &= ~(1 << isr_bit);
|
||||
|
||||
if (pic->rotate)
|
||||
pic->lowprio = isr_bit;
|
||||
}
|
||||
|
||||
/* if level ack PTDEV */
|
||||
if (pic->elc & (1 << (isr_bit & 0x7))) {
|
||||
ptdev_intx_ack(vpic->vm,
|
||||
master_pic(vpic, pic) ? isr_bit : isr_bit + 8,
|
||||
PTDEV_VPIN_PIC);
|
||||
}
|
||||
} else if ((val & OCW2_SL) != 0 && pic->rotate == true) {
|
||||
/* specific priority */
|
||||
pic->lowprio = val & 0x7;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vpic_ocw3(__unused struct vpic *vpic, struct pic *pic, uint8_t val)
|
||||
{
|
||||
dev_dbg(ACRN_DBG_PIC, "vm 0x%x: pic ocw3 0x%x\n",
|
||||
vpic->vm, val);
|
||||
|
||||
if (val & OCW3_ESMM) {
|
||||
pic->smm = val & OCW3_SMM ? 1 : 0;
|
||||
dev_dbg(ACRN_DBG_PIC, "%s pic special mask mode %s\n",
|
||||
master_pic(vpic, pic) ? "master" : "slave",
|
||||
pic->smm ? "enabled" : "disabled");
|
||||
}
|
||||
|
||||
if (val & OCW3_RR) {
|
||||
/* read register command */
|
||||
pic->rd_cmd_reg = val & OCW3_RIS;
|
||||
|
||||
/* Polling mode */
|
||||
pic->poll = ((val & OCW3_P) != 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vpic_set_pinstate(struct vpic *vpic, int pin, bool newstate)
|
||||
{
|
||||
struct pic *pic;
|
||||
int oldcnt, newcnt;
|
||||
bool level;
|
||||
|
||||
ASSERT(pin >= 0 && pin < 16,
|
||||
"vpic_set_pinstate: invalid pin number");
|
||||
|
||||
pic = &vpic->pic[pin >> 3];
|
||||
|
||||
oldcnt = pic->acnt[pin & 0x7];
|
||||
if (newstate)
|
||||
pic->acnt[pin & 0x7]++;
|
||||
else
|
||||
pic->acnt[pin & 0x7]--;
|
||||
newcnt = pic->acnt[pin & 0x7];
|
||||
|
||||
if (newcnt < 0) {
|
||||
pr_warn("pic pin%d: bad acnt %d\n", pin, newcnt);
|
||||
}
|
||||
|
||||
level = ((vpic->pic[pin >> 3].elc & (1 << (pin & 0x7))) != 0);
|
||||
|
||||
if ((oldcnt == 0 && newcnt == 1) || (newcnt > 0 && level == true)) {
|
||||
/* rising edge or level */
|
||||
dev_dbg(ACRN_DBG_PIC, "pic pin%d: asserted\n", pin);
|
||||
pic->request |= (1 << (pin & 0x7));
|
||||
} else if (oldcnt == 1 && newcnt == 0) {
|
||||
/* falling edge */
|
||||
dev_dbg(ACRN_DBG_PIC, "pic pin%d: deasserted\n", pin);
|
||||
if (level)
|
||||
pic->request &= ~(1 << (pin & 0x7));
|
||||
} else {
|
||||
dev_dbg(ACRN_DBG_PIC,
|
||||
"pic pin%d: %s, ignored, acnt %d\n",
|
||||
pin, newstate ? "asserted" : "deasserted", newcnt);
|
||||
}
|
||||
|
||||
vpic_notify_intr(vpic);
|
||||
}
|
||||
|
||||
static int vpic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
|
||||
{
|
||||
struct vpic *vpic;
|
||||
struct pic *pic;
|
||||
|
||||
if (irq < 0 || irq > 15)
|
||||
return -EINVAL;
|
||||
|
||||
vpic = vm_pic(vm);
|
||||
pic = &vpic->pic[irq >> 3];
|
||||
|
||||
if (pic->ready == false)
|
||||
return 0;
|
||||
|
||||
VPIC_LOCK(vpic);
|
||||
switch (irqstate) {
|
||||
case IRQSTATE_ASSERT:
|
||||
vpic_set_pinstate(vpic, irq, true);
|
||||
break;
|
||||
case IRQSTATE_DEASSERT:
|
||||
vpic_set_pinstate(vpic, irq, false);
|
||||
break;
|
||||
case IRQSTATE_PULSE:
|
||||
vpic_set_pinstate(vpic, irq, true);
|
||||
vpic_set_pinstate(vpic, irq, false);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0, "vpic_set_irqstate: invalid irqstate");
|
||||
}
|
||||
VPIC_UNLOCK(vpic);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* hypervisor interface: assert/deassert/pulse irq */
|
||||
int vpic_assert_irq(struct vm *vm, int irq)
|
||||
{
|
||||
return vpic_set_irqstate(vm, irq, IRQSTATE_ASSERT);
|
||||
}
|
||||
|
||||
int vpic_deassert_irq(struct vm *vm, int irq)
|
||||
{
|
||||
return vpic_set_irqstate(vm, irq, IRQSTATE_DEASSERT);
|
||||
}
|
||||
|
||||
int vpic_pulse_irq(struct vm *vm, int irq)
|
||||
{
|
||||
return vpic_set_irqstate(vm, irq, IRQSTATE_PULSE);
|
||||
}
|
||||
|
||||
int vpic_set_irq_trigger(struct vm *vm, int irq, enum vpic_trigger trigger)
|
||||
{
|
||||
struct vpic *vpic;
|
||||
|
||||
if (irq < 0 || irq > 15)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* See comment in vpic_elc_handler. These IRQs must be
|
||||
* edge triggered.
|
||||
*/
|
||||
if (trigger == LEVEL_TRIGGER) {
|
||||
switch (irq) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 8:
|
||||
case 13:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
vpic = vm_pic(vm);
|
||||
|
||||
VPIC_LOCK(vpic);
|
||||
|
||||
if (trigger == LEVEL_TRIGGER)
|
||||
vpic->pic[irq >> 3].elc |= 1 << (irq & 0x7);
|
||||
else
|
||||
vpic->pic[irq >> 3].elc &= ~(1 << (irq & 0x7));
|
||||
|
||||
VPIC_UNLOCK(vpic);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vpic_get_irq_trigger(struct vm *vm, int irq, enum vpic_trigger *trigger)
|
||||
{
|
||||
struct vpic *vpic;
|
||||
|
||||
if (irq < 0 || irq > 15)
|
||||
return -EINVAL;
|
||||
|
||||
vpic = vm_pic(vm);
|
||||
if (!vpic)
|
||||
return -EINVAL;
|
||||
|
||||
if (vpic->pic[irq>>3].elc & (1 << (irq & 0x7)))
|
||||
*trigger = LEVEL_TRIGGER;
|
||||
else
|
||||
*trigger = EDGE_TRIGGER;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vpic_pending_intr(struct vm *vm, int *vecptr)
|
||||
{
|
||||
struct vpic *vpic;
|
||||
struct pic *pic;
|
||||
int pin;
|
||||
|
||||
vpic = vm_pic(vm);
|
||||
|
||||
pic = &vpic->pic[0];
|
||||
|
||||
VPIC_LOCK(vpic);
|
||||
|
||||
pin = vpic_get_highest_irrpin(pic);
|
||||
if (pin == 2) {
|
||||
pic = &vpic->pic[1];
|
||||
pin = vpic_get_highest_irrpin(pic);
|
||||
}
|
||||
|
||||
/*
|
||||
* If there are no pins active at this moment then return the spurious
|
||||
* interrupt vector instead.
|
||||
*/
|
||||
if (pin == -1) {
|
||||
*vecptr = -1;
|
||||
VPIC_UNLOCK(vpic);
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT(pin >= 0 && pin <= 7, "invalid pin");
|
||||
*vecptr = pic->irq_base + pin;
|
||||
|
||||
dev_dbg(ACRN_DBG_PIC, "Got pending vector 0x%x\n", *vecptr);
|
||||
|
||||
VPIC_UNLOCK(vpic);
|
||||
}
|
||||
|
||||
static void vpic_pin_accepted(struct pic *pic, int pin)
|
||||
{
|
||||
pic->intr_raised = false;
|
||||
|
||||
if ((pic->elc & (1 << pin)) == 0) {
|
||||
/*only used edge trigger mode*/
|
||||
pic->request &= ~(1 << pin);
|
||||
}
|
||||
|
||||
if (pic->aeoi == true) {
|
||||
if (pic->rotate == true)
|
||||
pic->lowprio = pin;
|
||||
} else {
|
||||
pic->service |= (1 << pin);
|
||||
}
|
||||
}
|
||||
|
||||
void vpic_intr_accepted(struct vm *vm, int vector)
|
||||
{
|
||||
struct vpic *vpic;
|
||||
int pin;
|
||||
|
||||
vpic = vm_pic(vm);
|
||||
|
||||
VPIC_LOCK(vpic);
|
||||
|
||||
pin = vector & 0x7;
|
||||
|
||||
if ((vector & ~0x7) == vpic->pic[1].irq_base) {
|
||||
vpic_pin_accepted(&vpic->pic[1], pin);
|
||||
/*
|
||||
* If this vector originated from the slave,
|
||||
* accept the cascaded interrupt too.
|
||||
*/
|
||||
vpic_pin_accepted(&vpic->pic[0], 2);
|
||||
} else {
|
||||
vpic_pin_accepted(&vpic->pic[0], pin);
|
||||
}
|
||||
|
||||
vpic_notify_intr(vpic);
|
||||
|
||||
VPIC_UNLOCK(vpic);
|
||||
}
|
||||
|
||||
static int vpic_read(struct vpic *vpic, struct pic *pic,
|
||||
int port, uint32_t *eax)
|
||||
{
|
||||
int pin;
|
||||
|
||||
VPIC_LOCK(vpic);
|
||||
|
||||
if (pic->poll) {
|
||||
pic->poll = 0;
|
||||
pin = vpic_get_highest_irrpin(pic);
|
||||
if (pin >= 0) {
|
||||
vpic_pin_accepted(pic, pin);
|
||||
*eax = 0x80 | pin;
|
||||
} else {
|
||||
*eax = 0;
|
||||
}
|
||||
} else {
|
||||
if (port & ICU_IMR_OFFSET) {
|
||||
/* read interrupt mask register */
|
||||
*eax = pic->mask;
|
||||
} else {
|
||||
if (pic->rd_cmd_reg == OCW3_RIS) {
|
||||
/* read interrupt service register */
|
||||
*eax = pic->service;
|
||||
} else {
|
||||
/* read interrupt request register */
|
||||
*eax = pic->request;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VPIC_UNLOCK(vpic);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vpic_write(struct vpic *vpic, struct pic *pic,
|
||||
int port, uint32_t *eax)
|
||||
{
|
||||
int error;
|
||||
uint8_t val;
|
||||
|
||||
error = 0;
|
||||
val = *eax;
|
||||
|
||||
VPIC_LOCK(vpic);
|
||||
|
||||
if (port & ICU_IMR_OFFSET) {
|
||||
switch (pic->icw_num) {
|
||||
case 2:
|
||||
error = vpic_icw2(vpic, pic, val);
|
||||
break;
|
||||
case 3:
|
||||
error = vpic_icw3(vpic, pic, val);
|
||||
break;
|
||||
case 4:
|
||||
error = vpic_icw4(vpic, pic, val);
|
||||
break;
|
||||
default:
|
||||
error = vpic_ocw1(vpic, pic, val);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (val & (1 << 4))
|
||||
error = vpic_icw1(vpic, pic, val);
|
||||
|
||||
if (pic->ready) {
|
||||
if (val & (1 << 3))
|
||||
error = vpic_ocw3(vpic, pic, val);
|
||||
else
|
||||
error = vpic_ocw2(vpic, pic, val);
|
||||
}
|
||||
}
|
||||
|
||||
if (pic->ready)
|
||||
vpic_notify_intr(vpic);
|
||||
|
||||
VPIC_UNLOCK(vpic);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static int vpic_master_handler(struct vm *vm, bool in, int port, int bytes,
|
||||
uint32_t *eax)
|
||||
{
|
||||
struct vpic *vpic;
|
||||
struct pic *pic;
|
||||
|
||||
vpic = vm_pic(vm);
|
||||
pic = &vpic->pic[0];
|
||||
|
||||
if (bytes != 1)
|
||||
return -1;
|
||||
|
||||
if (in)
|
||||
return vpic_read(vpic, pic, port, eax);
|
||||
|
||||
return vpic_write(vpic, pic, port, eax);
|
||||
}
|
||||
|
||||
static uint32_t vpic_master_io_read(__unused struct vm_io_handler *hdlr,
|
||||
struct vm *vm, ioport_t addr, size_t width)
|
||||
{
|
||||
uint32_t val = 0;
|
||||
|
||||
if (vpic_master_handler(vm, true, (int)addr, (int)width, &val) < 0)
|
||||
pr_err("pic master read port 0x%x width=%d failed\n",
|
||||
addr, width);
|
||||
return val;
|
||||
}
|
||||
|
||||
static void vpic_master_io_write(__unused struct vm_io_handler *hdlr,
|
||||
struct vm *vm, ioport_t addr, size_t width, uint32_t v)
|
||||
{
|
||||
uint32_t val = v;
|
||||
|
||||
if (vpic_master_handler(vm, false, (int)addr, (int)width, &val) < 0)
|
||||
pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
|
||||
__func__, addr, width, val);
|
||||
}
|
||||
|
||||
static int vpic_slave_handler(struct vm *vm, bool in, int port, int bytes,
|
||||
uint32_t *eax)
|
||||
{
|
||||
struct vpic *vpic;
|
||||
struct pic *pic;
|
||||
|
||||
vpic = vm_pic(vm);
|
||||
pic = &vpic->pic[1];
|
||||
|
||||
if (bytes != 1)
|
||||
return -1;
|
||||
|
||||
if (in)
|
||||
return vpic_read(vpic, pic, port, eax);
|
||||
|
||||
return vpic_write(vpic, pic, port, eax);
|
||||
}
|
||||
|
||||
static uint32_t vpic_slave_io_read(__unused struct vm_io_handler *hdlr,
|
||||
struct vm *vm, ioport_t addr, size_t width)
|
||||
{
|
||||
uint32_t val = 0;
|
||||
|
||||
if (vpic_slave_handler(vm, true, (int)addr, (int)width, &val) < 0)
|
||||
pr_err("pic slave read port 0x%x width=%d failed\n",
|
||||
addr, width);
|
||||
return val;
|
||||
}
|
||||
|
||||
static void vpic_slave_io_write(__unused struct vm_io_handler *hdlr,
|
||||
struct vm *vm, ioport_t addr, size_t width, uint32_t v)
|
||||
{
|
||||
uint32_t val = v;
|
||||
|
||||
if (vpic_slave_handler(vm, false, (int)addr, (int)width, &val) < 0)
|
||||
pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
|
||||
__func__, addr, width, val);
|
||||
}
|
||||
|
||||
static int vpic_elc_handler(struct vm *vm, bool in, int port, int bytes,
|
||||
uint32_t *eax)
|
||||
{
|
||||
struct vpic *vpic;
|
||||
bool is_master;
|
||||
|
||||
vpic = vm_pic(vm);
|
||||
is_master = (port == IO_ELCR1);
|
||||
|
||||
if (bytes != 1)
|
||||
return -1;
|
||||
|
||||
VPIC_LOCK(vpic);
|
||||
|
||||
if (in) {
|
||||
if (is_master)
|
||||
*eax = vpic->pic[0].elc;
|
||||
else
|
||||
*eax = vpic->pic[1].elc;
|
||||
} else {
|
||||
/*
|
||||
* For the master PIC the cascade channel (IRQ2), the
|
||||
* heart beat timer (IRQ0), and the keyboard
|
||||
* controller (IRQ1) cannot be programmed for level
|
||||
* mode.
|
||||
*
|
||||
* For the slave PIC the real time clock (IRQ8) and
|
||||
* the floating point error interrupt (IRQ13) cannot
|
||||
* be programmed for level mode.
|
||||
*/
|
||||
if (is_master)
|
||||
vpic->pic[0].elc = (*eax & 0xf8);
|
||||
else
|
||||
vpic->pic[1].elc = (*eax & 0xde);
|
||||
}
|
||||
|
||||
VPIC_UNLOCK(vpic);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t vpic_elc_io_read(__unused struct vm_io_handler *hdlr,
|
||||
struct vm *vm, ioport_t addr, size_t width)
|
||||
{
|
||||
uint32_t val = 0;
|
||||
|
||||
if (vpic_elc_handler(vm, true, (int)addr, (int)width, &val) < 0)
|
||||
pr_err("pic elc read port 0x%x width=%d failed", addr, width);
|
||||
return val;
|
||||
}
|
||||
|
||||
static void vpic_elc_io_write(__unused struct vm_io_handler *hdlr,
|
||||
struct vm *vm, ioport_t addr, size_t width, uint32_t v)
|
||||
{
|
||||
uint32_t val = v;
|
||||
|
||||
if (vpic_elc_handler(vm, false, (int)addr, (int)width, &val) < 0)
|
||||
pr_err("%s: write port 0x%x width=%d value 0x%x failed\n",
|
||||
__func__, addr, width, val);
|
||||
}
|
||||
|
||||
void vpic_register_io_handler(struct vm *vm)
|
||||
{
|
||||
struct vm_io_range master_range = {
|
||||
.flags = IO_ATTR_RW,
|
||||
.base = 0x20,
|
||||
.len = 2
|
||||
};
|
||||
struct vm_io_range slave_range = {
|
||||
.flags = IO_ATTR_RW,
|
||||
.base = 0xa0,
|
||||
.len = 2
|
||||
};
|
||||
struct vm_io_range elcr_range = {
|
||||
.flags = IO_ATTR_RW,
|
||||
.base = 0x4d0,
|
||||
.len = 2
|
||||
};
|
||||
|
||||
register_io_emulation_handler(vm, &master_range,
|
||||
&vpic_master_io_read, &vpic_master_io_write);
|
||||
register_io_emulation_handler(vm, &slave_range,
|
||||
&vpic_slave_io_read, &vpic_slave_io_write);
|
||||
register_io_emulation_handler(vm, &elcr_range,
|
||||
&vpic_elc_io_read, &vpic_elc_io_write);
|
||||
}
|
||||
|
||||
void *vpic_init(struct vm *vm)
|
||||
{
|
||||
struct vpic *vpic;
|
||||
|
||||
vpic_register_io_handler(vm);
|
||||
|
||||
vpic = malloc(sizeof(struct vpic));
|
||||
ASSERT(vpic != NULL, "");
|
||||
vpic->vm = vm;
|
||||
vpic->pic[0].mask = 0xff;
|
||||
vpic->pic[1].mask = 0xff;
|
||||
|
||||
VPIC_LOCK_INIT(vpic);
|
||||
|
||||
return vpic;
|
||||
}
|
||||
|
||||
void vpic_cleanup(struct vm *vm)
|
||||
{
|
||||
if (vm->vpic) {
|
||||
free(vm->vpic);
|
||||
vm->vpic = NULL;
|
||||
}
|
||||
}
|
441
hypervisor/arch/x86/idt.S
Normal file
441
hypervisor/arch/x86/idt.S
Normal file
@@ -0,0 +1,441 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <gdt.h>
|
||||
#include <idt.h>
|
||||
|
||||
.altmacro
|
||||
|
||||
.global HOST_IDT
|
||||
.global HOST_IDTR
|
||||
|
||||
.section .data
|
||||
.align 8
|
||||
.long 0
|
||||
.short 0
|
||||
HOST_IDTR:
|
||||
.short HOST_IDT_SIZE - 1
|
||||
.quad HOST_IDT
|
||||
|
||||
/*
|
||||
* We'll rearrange and fix up the descriptors at runtime
|
||||
*/
|
||||
.macro interrupt_descriptor entry, dpl=0 ist=0
|
||||
.long HOST_GDT_RING0_CODE_SEL << 16
|
||||
.long 0x00008e00 + (dpl << 13) + ist
|
||||
.quad entry
|
||||
.endm
|
||||
|
||||
.macro trap_descriptor entry, dpl=0, ist=0
|
||||
.long HOST_GDT_RING0_CODE_SEL << 16
|
||||
.long 0x00008f00 + (dpl <<13) + ist
|
||||
.quad entry
|
||||
.endm
|
||||
|
||||
|
||||
.macro _external_interrupt_descriptor vector
|
||||
__external_interrupt_descriptor %vector
|
||||
.endm
|
||||
|
||||
|
||||
.macro __external_interrupt_descriptor vector
|
||||
interrupt_descriptor external_interrupt_\vector
|
||||
.endm
|
||||
|
||||
#define MACHINE_CHECK_IST (0x1)
|
||||
#define DOUBLE_FAULT_IST (0x2)
|
||||
#define STACK_FAULT_IST (0x3)
|
||||
|
||||
/*
|
||||
* We'll use interrupt gates. Change to trap or task only as needed.
|
||||
*/
|
||||
.section .rodata
|
||||
.align 16
|
||||
HOST_IDT:
|
||||
interrupt_descriptor excp_divide_error
|
||||
interrupt_descriptor excp_debug, 3
|
||||
interrupt_descriptor excp_nmi
|
||||
interrupt_descriptor excp_breakpoint, 3
|
||||
interrupt_descriptor excp_overflow, 3
|
||||
interrupt_descriptor excp_bounds_check
|
||||
interrupt_descriptor excp_illegal_opcode
|
||||
interrupt_descriptor excp_device_not_available
|
||||
interrupt_descriptor excp_double_fault, 0, DOUBLE_FAULT_IST
|
||||
interrupt_descriptor excp_rsvd_09
|
||||
interrupt_descriptor excp_invalid_tss
|
||||
interrupt_descriptor excp_segment_not_present
|
||||
interrupt_descriptor excp_stack_fault, 0, STACK_FAULT_IST
|
||||
interrupt_descriptor excp_general_protection
|
||||
interrupt_descriptor excp_page_fault
|
||||
interrupt_descriptor excp_rsvd_0f
|
||||
interrupt_descriptor excp_float_error
|
||||
interrupt_descriptor excp_alignment_check
|
||||
interrupt_descriptor expt_machine_check, 0, MACHINE_CHECK_IST
|
||||
interrupt_descriptor excp_simd_fp_error
|
||||
interrupt_descriptor excp_virtualization
|
||||
interrupt_descriptor excp_rsvd_21
|
||||
interrupt_descriptor excp_rsvd_22
|
||||
interrupt_descriptor excp_rsvd_23
|
||||
interrupt_descriptor excp_rsvd_24
|
||||
interrupt_descriptor excp_rsvd_25
|
||||
interrupt_descriptor excp_rsvd_26
|
||||
interrupt_descriptor excp_rsvd_27
|
||||
interrupt_descriptor excp_rsvd_28
|
||||
interrupt_descriptor excp_rsvd_29
|
||||
interrupt_descriptor excp_rsvd_30
|
||||
interrupt_descriptor excp_rsvd_31
|
||||
|
||||
vector = 0x20
|
||||
.rept (0x100 - 0x20)
|
||||
_external_interrupt_descriptor vector
|
||||
vector = vector + 1
|
||||
.endr
|
||||
|
||||
.section .text
|
||||
.align 16
|
||||
excp_divide_error:
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $0x00
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_debug:
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $0x01
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_nmi:
|
||||
|
||||
|
||||
|
||||
|
||||
.align 8
|
||||
excp_breakpoint:
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $0x03
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_overflow:
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $0x04
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_bounds_check:
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $0x05
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_illegal_opcode:
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $0x06
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_device_not_available:
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $0x07
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_double_fault:
|
||||
pushq $0x08
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_invalid_tss:
|
||||
pushq $0x0A
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_segment_not_present:
|
||||
pushq $0x0B
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_stack_fault:
|
||||
pushq $0x0C
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_general_protection:
|
||||
pushq $0x0D
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_page_fault:
|
||||
pushq $0x0E
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_float_error:
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $0x10
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_alignment_check:
|
||||
pushq $0x11
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
expt_machine_check:
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $0x12
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_simd_fp_error:
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $0x13
|
||||
jmp excp_save_frame
|
||||
|
||||
.align 8
|
||||
excp_virtualization:
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $0x14
|
||||
jmp excp_save_frame
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Macros for rsvd vectors. Vectors 0x09, 0x0F, 0x15 through 0x1F
|
||||
*/
|
||||
.macro _rsvd_vector vector
|
||||
__rsvd_vector %vector
|
||||
.endm
|
||||
|
||||
.macro __rsvd_vector vector
|
||||
.align 8
|
||||
excp_rsvd_\vector\():
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $\vector
|
||||
jmp excp_rsvd
|
||||
.endm
|
||||
|
||||
.align 8
|
||||
excp_rsvd_09:
|
||||
_rsvd_vector 0x09
|
||||
|
||||
.align 8
|
||||
excp_rsvd_0f:
|
||||
_rsvd_vector 0x0f
|
||||
|
||||
vector = 0x15
|
||||
.rept (0x20 - 0x15)
|
||||
_rsvd_vector vector
|
||||
vector = vector + 1
|
||||
.endr
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Macros for external interrupts. Vectors$0x20 through$0xFF
|
||||
*/
|
||||
.macro _external_interrupt vector
|
||||
__external_interrupt %vector
|
||||
.endm
|
||||
|
||||
.macro __external_interrupt vector
|
||||
.align 8
|
||||
external_interrupt_\vector\():
|
||||
pushq $0x0 /* pseudo error code */
|
||||
pushq $\vector
|
||||
jmp external_interrupt_save_frame
|
||||
.endm
|
||||
|
||||
vector =0x20
|
||||
.rept (0x100 - 0x20)
|
||||
_external_interrupt vector
|
||||
vector = vector + 1
|
||||
.endr
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Common entry point for defined exceptions
|
||||
*/
|
||||
.align 8
|
||||
excp_save_frame:
|
||||
pushq %r11
|
||||
pushq %r10
|
||||
pushq %r9
|
||||
pushq %r8
|
||||
pushq %rdi
|
||||
pushq %rsi
|
||||
pushq %rdx
|
||||
pushq %rcx
|
||||
pushq %rax
|
||||
pushq %rbp
|
||||
pushq %rbx
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
|
||||
/* Put current stack pointer into 1st param register (rdi) */
|
||||
movq %rsp, %rdi
|
||||
|
||||
call dispatch_exception
|
||||
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbx
|
||||
popq %rbp
|
||||
popq %rax
|
||||
popq %rcx
|
||||
popq %rdx
|
||||
popq %rsi
|
||||
popq %rdi
|
||||
popq %r8
|
||||
popq %r9
|
||||
popq %r10
|
||||
popq %r11
|
||||
|
||||
/* Skip vector and error code*/
|
||||
add $16, %rsp
|
||||
|
||||
iretq
|
||||
|
||||
|
||||
/*
|
||||
* Common entry point for reserved exceptions.
|
||||
* These should never execute.
|
||||
* We put a handler on them anyway to highlight the unexpected.
|
||||
*/
|
||||
.align 8
|
||||
excp_rsvd:
|
||||
pushq %r11
|
||||
pushq %r10
|
||||
pushq %r9
|
||||
pushq %r8
|
||||
pushq %rdi
|
||||
pushq %rsi
|
||||
pushq %rdx
|
||||
pushq %rcx
|
||||
pushq %rax
|
||||
|
||||
|
||||
pushq %rbp
|
||||
pushq %rbx
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
|
||||
/* Put current stack pointer into 1st param register (rdi) */
|
||||
movq %rsp, %rdi
|
||||
|
||||
call dispatch_exception
|
||||
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbx
|
||||
popq %rbp
|
||||
|
||||
popq %rax
|
||||
popq %rcx
|
||||
popq %rdx
|
||||
popq %rsi
|
||||
popq %rdi
|
||||
popq %r8
|
||||
popq %r9
|
||||
popq %r10
|
||||
popq %r11
|
||||
|
||||
/* Skip vector and error code*/
|
||||
add $16, %rsp
|
||||
|
||||
iretq
|
||||
|
||||
|
||||
/*
|
||||
* Common entry point for defined interrupts.
|
||||
* Vectors 0x20 through 0xFF
|
||||
*/
|
||||
.align 8
|
||||
external_interrupt_save_frame:
|
||||
pushq %r11
|
||||
pushq %r10
|
||||
pushq %r9
|
||||
pushq %r8
|
||||
pushq %rdi
|
||||
pushq %rsi
|
||||
pushq %rdx
|
||||
pushq %rcx
|
||||
pushq %rax
|
||||
|
||||
|
||||
pushq %rbp
|
||||
pushq %rbx
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
|
||||
/* Put current stack pointer into 1st param register (rdi) */
|
||||
movq %rsp, %rdi
|
||||
|
||||
call dispatch_interrupt
|
||||
|
||||
/*
|
||||
* We disable softirq path from interrupt IRET, since right now all IRQ
|
||||
* are for Guest, and we can execute softirq in hv_main() loop
|
||||
*/
|
||||
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbx
|
||||
popq %rbp
|
||||
|
||||
popq %rax
|
||||
popq %rcx
|
||||
popq %rdx
|
||||
popq %rsi
|
||||
popq %rdi
|
||||
popq %r8
|
||||
popq %r9
|
||||
popq %r10
|
||||
popq %r11
|
||||
|
||||
/* Skip vector and error code*/
|
||||
add $16, %rsp
|
||||
|
||||
iretq
|
||||
|
431
hypervisor/arch/x86/interrupt.c
Normal file
431
hypervisor/arch/x86/interrupt.c
Normal file
@@ -0,0 +1,431 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
#define EXCEPTION_ERROR_CODE_VALID 8
|
||||
#define INTERRPUT_QUEUE_BUFF_SIZE 255
|
||||
|
||||
#define ACRN_DBG_INTR 6
|
||||
|
||||
static const uint16_t exception_type[] = {
|
||||
[0] = VMX_INT_TYPE_HW_EXP,
|
||||
[1] = VMX_INT_TYPE_HW_EXP,
|
||||
[2] = VMX_INT_TYPE_HW_EXP,
|
||||
[3] = VMX_INT_TYPE_HW_EXP,
|
||||
[4] = VMX_INT_TYPE_HW_EXP,
|
||||
[5] = VMX_INT_TYPE_HW_EXP,
|
||||
[6] = VMX_INT_TYPE_HW_EXP,
|
||||
[7] = VMX_INT_TYPE_HW_EXP,
|
||||
[8] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
|
||||
[9] = VMX_INT_TYPE_HW_EXP,
|
||||
[10] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
|
||||
[11] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
|
||||
[12] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
|
||||
[13] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
|
||||
[14] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
|
||||
[15] = VMX_INT_TYPE_HW_EXP,
|
||||
[16] = VMX_INT_TYPE_HW_EXP,
|
||||
[17] = VMX_INT_TYPE_HW_EXP | EXCEPTION_ERROR_CODE_VALID,
|
||||
[18] = VMX_INT_TYPE_HW_EXP,
|
||||
[19] = VMX_INT_TYPE_HW_EXP,
|
||||
[20] = VMX_INT_TYPE_HW_EXP,
|
||||
[21] = VMX_INT_TYPE_HW_EXP,
|
||||
[22] = VMX_INT_TYPE_HW_EXP,
|
||||
[23] = VMX_INT_TYPE_HW_EXP,
|
||||
[24] = VMX_INT_TYPE_HW_EXP,
|
||||
[25] = VMX_INT_TYPE_HW_EXP,
|
||||
[26] = VMX_INT_TYPE_HW_EXP,
|
||||
[27] = VMX_INT_TYPE_HW_EXP,
|
||||
[28] = VMX_INT_TYPE_HW_EXP,
|
||||
[29] = VMX_INT_TYPE_HW_EXP,
|
||||
[30] = VMX_INT_TYPE_HW_EXP,
|
||||
[31] = VMX_INT_TYPE_HW_EXP
|
||||
};
|
||||
|
||||
static int is_guest_irq_enabled(struct vcpu *vcpu)
|
||||
{
|
||||
struct run_context *cur_context =
|
||||
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
|
||||
uint32_t guest_rflags, guest_state;
|
||||
int status = false;
|
||||
|
||||
/* Read the RFLAGS of the guest */
|
||||
guest_rflags = cur_context->rflags;
|
||||
/* Check the RFLAGS[IF] bit first */
|
||||
if (guest_rflags & HV_ARCH_VCPU_RFLAGS_IF) {
|
||||
/* Interrupts are allowed */
|
||||
/* Check for temporarily disabled interrupts */
|
||||
guest_state = exec_vmread(VMX_GUEST_INTERRUPTIBILITY_INFO);
|
||||
|
||||
if ((guest_state & (HV_ARCH_VCPU_BLOCKED_BY_STI |
|
||||
HV_ARCH_VCPU_BLOCKED_BY_MOVSS)) == 0) {
|
||||
status = true;
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
static bool vcpu_pending_request(struct vcpu *vcpu)
|
||||
{
|
||||
struct vlapic *vlapic;
|
||||
int vector = 0;
|
||||
int ret = 0;
|
||||
|
||||
/* Query vLapic to get vector to inject */
|
||||
vlapic = vcpu->arch_vcpu.vlapic;
|
||||
ret = vlapic_pending_intr(vlapic, &vector);
|
||||
|
||||
/* we need to check and raise request if we have pending event
|
||||
* in LAPIC IRR
|
||||
*/
|
||||
if (ret != 0) {
|
||||
/* we have pending IRR */
|
||||
vcpu_make_request(vcpu, ACRN_REQUEST_EVENT);
|
||||
}
|
||||
|
||||
return vcpu->arch_vcpu.pending_intr != 0;
|
||||
}
|
||||
|
||||
int vcpu_make_request(struct vcpu *vcpu, int eventid)
|
||||
{
|
||||
bitmap_set(eventid, &vcpu->arch_vcpu.pending_intr);
|
||||
/*
|
||||
* if current hostcpu is not the target vcpu's hostcpu, we need
|
||||
* to invoke IPI to wake up target vcpu
|
||||
*
|
||||
* TODO: Here we just compare with cpuid, since cpuid currently is
|
||||
* global under pCPU / vCPU 1:1 mapping. If later we enabled vcpu
|
||||
* scheduling, we need change here to determine it target vcpu is
|
||||
* VMX non-root or root mode
|
||||
*/
|
||||
if ((int)get_cpu_id() != vcpu->pcpu_id)
|
||||
send_single_ipi(vcpu->pcpu_id, VECTOR_NOTIFY_VCPU);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vcpu_do_pending_event(struct vcpu *vcpu)
|
||||
{
|
||||
struct vlapic *vlapic = vcpu->arch_vcpu.vlapic;
|
||||
int vector = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (is_apicv_enabled()) {
|
||||
apicv_inject_pir(vlapic);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Query vLapic to get vector to inject */
|
||||
ret = vlapic_pending_intr(vlapic, &vector);
|
||||
|
||||
/*
|
||||
* From the Intel SDM, Volume 3, 6.3.2 Section "Maskable
|
||||
* Hardware Interrupts":
|
||||
* - maskable interrupt vectors [16,255] can be delivered
|
||||
* through the local APIC.
|
||||
*/
|
||||
if (ret == 0)
|
||||
return -1;
|
||||
|
||||
if (!(vector >= 16 && vector <= 255)) {
|
||||
dev_dbg(ACRN_DBG_INTR, "invalid vector %d from local APIC",
|
||||
vector);
|
||||
return -1;
|
||||
}
|
||||
|
||||
exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD, VMX_INT_INFO_VALID |
|
||||
(vector & 0xFF));
|
||||
|
||||
vlapic_intr_accepted(vlapic, vector);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vcpu_do_pending_extint(struct vcpu *vcpu)
|
||||
{
|
||||
struct vm *vm;
|
||||
struct vcpu *primary;
|
||||
int vector;
|
||||
|
||||
vm = vcpu->vm;
|
||||
|
||||
/* check if there is valid interrupt from vPIC, if yes just inject it */
|
||||
/* PIC only connect with primary CPU */
|
||||
primary = get_primary_vcpu(vm);
|
||||
if (vm->vpic && vcpu == primary) {
|
||||
|
||||
vpic_pending_intr(vcpu->vm, &vector);
|
||||
if (vector > 0) {
|
||||
dev_dbg(ACRN_DBG_INTR, "VPIC: to inject PIC vector %d\n",
|
||||
vector & 0xFF);
|
||||
exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD,
|
||||
VMX_INT_INFO_VALID |
|
||||
(vector & 0xFF));
|
||||
vpic_intr_accepted(vcpu->vm, vector);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vcpu_do_pending_gp(__unused struct vcpu *vcpu)
|
||||
{
|
||||
/* GP vector = 13 */
|
||||
exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD,
|
||||
VMX_INT_INFO_VALID | 13);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* please keep this for interrupt debug:
|
||||
* 1. Timer alive or not
|
||||
* 2. native LAPIC interrupt pending/EOI status
|
||||
* 3. CPU stuck or not
|
||||
*/
|
||||
void dump_lapic(void)
|
||||
{
|
||||
dev_dbg(ACRN_DBG_INTR,
|
||||
"LAPIC: TIME %08x, init=0x%x cur=0x%x ISR=0x%x IRR=0x%x",
|
||||
mmio_read_long(0xFEE00000 + LAPIC_LVT_TIMER_REGISTER),
|
||||
mmio_read_long(0xFEE00000 + LAPIC_INITIAL_COUNT_REGISTER),
|
||||
mmio_read_long(0xFEE00000 + LAPIC_CURRENT_COUNT_REGISTER),
|
||||
mmio_read_long(0xFEE00000 + LAPIC_IN_SERVICE_REGISTER_7),
|
||||
mmio_read_long(0xFEE00000 + LAPIC_INT_REQUEST_REGISTER_7));
|
||||
}
|
||||
|
||||
int vcpu_inject_extint(struct vcpu *vcpu)
|
||||
{
|
||||
return vcpu_make_request(vcpu, ACRN_REQUEST_EXTINT);
|
||||
}
|
||||
|
||||
int vcpu_inject_nmi(struct vcpu *vcpu)
|
||||
{
|
||||
return vcpu_make_request(vcpu, ACRN_REQUEST_NMI);
|
||||
}
|
||||
|
||||
int vcpu_inject_gp(struct vcpu *vcpu)
|
||||
{
|
||||
return vcpu_make_request(vcpu, ACRN_REQUEST_GP);
|
||||
}
|
||||
|
||||
int interrupt_win_exiting_handler(struct vcpu *vcpu)
|
||||
{
|
||||
int value32;
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_INTERRUPT_WINDOW, 0, 0);
|
||||
|
||||
if (!vcpu)
|
||||
return -1;
|
||||
|
||||
if (vcpu_pending_request(vcpu)) {
|
||||
/* Do nothing
|
||||
* acrn_do_intr_process will continue for this vcpu
|
||||
*/
|
||||
} else {
|
||||
/* No interrupts to inject.
|
||||
* Disable the interrupt window exiting
|
||||
*/
|
||||
vcpu->arch_vcpu.irq_window_enabled = 0;
|
||||
value32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS);
|
||||
value32 &= ~(VMX_PROCBASED_CTLS_IRQ_WIN);
|
||||
exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS, value32);
|
||||
}
|
||||
|
||||
VCPU_RETAIN_RIP(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int external_interrupt_handler(struct vcpu *vcpu)
|
||||
{
|
||||
int vector = exec_vmread(VMX_EXIT_INT_INFO) & 0xFF;
|
||||
struct intr_ctx ctx;
|
||||
|
||||
ctx.vector = vector;
|
||||
/* do not RETAIN RIP for spurious interrupt */
|
||||
if (dispatch_interrupt(&ctx) == 0)
|
||||
VCPU_RETAIN_RIP(vcpu);
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_EXTERNAL_INTERRUPT, vector, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int acrn_do_intr_process(struct vcpu *vcpu)
|
||||
{
|
||||
int ret = 0;
|
||||
int vector;
|
||||
int tmp;
|
||||
bool intr_pending = false;
|
||||
uint64_t *pending_intr_bits = &vcpu->arch_vcpu.pending_intr;
|
||||
|
||||
if (bitmap_test_and_clear(ACRN_REQUEST_TLB_FLUSH, pending_intr_bits))
|
||||
mmu_invept(vcpu);
|
||||
|
||||
if (bitmap_test_and_clear(ACRN_REQUEST_TMR_UPDATE, pending_intr_bits))
|
||||
vioapic_update_tmr(vcpu);
|
||||
|
||||
/* handling pending vector injection:
|
||||
* there are many reason inject failed, we need re-inject again
|
||||
*/
|
||||
if (vcpu->arch_vcpu.exit_interrupt_info & VMX_INT_INFO_VALID) {
|
||||
exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD,
|
||||
vcpu->arch_vcpu.exit_interrupt_info);
|
||||
goto INTR_WIN;
|
||||
}
|
||||
|
||||
/* handling exception request */
|
||||
vector = vcpu->arch_vcpu.exception_info.exception;
|
||||
|
||||
/* If there is a valid exception, inject exception to guest */
|
||||
if (vector >= 0) {
|
||||
if (exception_type[vector] &
|
||||
EXCEPTION_ERROR_CODE_VALID) {
|
||||
exec_vmwrite(VMX_ENTRY_EXCEPTION_EC,
|
||||
vcpu->arch_vcpu.exception_info.error);
|
||||
}
|
||||
|
||||
exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD,
|
||||
VMX_INT_INFO_VALID |
|
||||
((exception_type[vector] & 15) << 8)
|
||||
| (vector & 0xFF));
|
||||
|
||||
vcpu->arch_vcpu.exception_info.exception = -1;
|
||||
|
||||
goto INTR_WIN;
|
||||
}
|
||||
|
||||
/* Do pending interrupts process */
|
||||
/* TODO: checkin NMI intr windows before inject */
|
||||
if (bitmap_test_and_clear(ACRN_REQUEST_NMI, pending_intr_bits)) {
|
||||
/* Inject NMI vector = 2 */
|
||||
exec_vmwrite(VMX_ENTRY_INT_INFO_FIELD,
|
||||
VMX_INT_INFO_VALID | (VMX_INT_TYPE_NMI << 8) | 2);
|
||||
|
||||
/* Intel SDM 10.8.1
|
||||
* NMI, SMI, INIT, ExtINT, or SIPI directly deliver to CPU
|
||||
* do not need EOI to LAPIC
|
||||
* However, ExtINT need EOI to PIC
|
||||
*/
|
||||
goto INTR_WIN;
|
||||
}
|
||||
|
||||
/* Guest interruptable or not */
|
||||
if (!is_guest_irq_enabled(vcpu)) {
|
||||
/* interrupt window unavailable */
|
||||
goto INTR_WIN;
|
||||
}
|
||||
|
||||
/* Inject external interrupt first */
|
||||
if (bitmap_test_and_clear(ACRN_REQUEST_EXTINT, pending_intr_bits)) {
|
||||
/* has pending external interrupts */
|
||||
ret = vcpu_do_pending_extint(vcpu);
|
||||
goto INTR_WIN;
|
||||
}
|
||||
|
||||
/* Inject vLAPIC vectors */
|
||||
if (bitmap_test_and_clear(ACRN_REQUEST_EVENT, pending_intr_bits)) {
|
||||
/* has pending vLAPIC interrupts */
|
||||
ret = vcpu_do_pending_event(vcpu);
|
||||
goto INTR_WIN;
|
||||
}
|
||||
|
||||
/* Inject GP event */
|
||||
if (bitmap_test_and_clear(ACRN_REQUEST_GP, pending_intr_bits)) {
|
||||
/* has pending GP interrupts */
|
||||
ret = vcpu_do_pending_gp(vcpu);
|
||||
goto INTR_WIN;
|
||||
}
|
||||
|
||||
INTR_WIN:
|
||||
/* check if we have new interrupt pending for next VMExit */
|
||||
intr_pending = vcpu_pending_request(vcpu);
|
||||
|
||||
/* Enable interrupt window exiting if pending */
|
||||
if (intr_pending && vcpu->arch_vcpu.irq_window_enabled == 0) {
|
||||
vcpu->arch_vcpu.irq_window_enabled = 1;
|
||||
tmp = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS);
|
||||
tmp |= (VMX_PROCBASED_CTLS_IRQ_WIN);
|
||||
exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS, tmp);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int exception_handler(struct vcpu *vcpu)
|
||||
{
|
||||
uint32_t intinfo, int_err_code;
|
||||
uint32_t exception_vector;
|
||||
uint32_t cpl;
|
||||
int status = 0;
|
||||
|
||||
if (vcpu == NULL) {
|
||||
TRACE_4I(TRC_VMEXIT_EXCEPTION_OR_NMI, 0, 0, 0, 0);
|
||||
status = -EINVAL;
|
||||
}
|
||||
|
||||
if (status != 0)
|
||||
return status;
|
||||
|
||||
pr_dbg(" Handling guest exception");
|
||||
|
||||
/* Obtain VM-Exit information field pg 2912 */
|
||||
intinfo = exec_vmread(VMX_EXIT_INT_INFO);
|
||||
exception_vector = intinfo & 0xFF;
|
||||
/* Check if exception caused by the guest is a HW exception. If the
|
||||
* exit occurred due to a HW exception obtain the error code to be
|
||||
* conveyed to get via the stack
|
||||
*/
|
||||
if (intinfo & VMX_INT_INFO_ERR_CODE_VALID) {
|
||||
int_err_code = exec_vmread(VMX_EXIT_INT_EC);
|
||||
|
||||
/* get current privilege level and fault address */
|
||||
cpl = exec_vmread(VMX_GUEST_CS_ATTR);
|
||||
cpl = (cpl >> 5) & 3;
|
||||
|
||||
if (cpl < 3)
|
||||
int_err_code &= ~4;
|
||||
else
|
||||
int_err_code |= 4;
|
||||
} else {
|
||||
int_err_code = 0;
|
||||
}
|
||||
|
||||
/* Handle all other exceptions */
|
||||
VCPU_RETAIN_RIP(vcpu);
|
||||
vcpu->arch_vcpu.exception_info.exception = exception_vector;
|
||||
vcpu->arch_vcpu.exception_info.error = int_err_code;
|
||||
|
||||
TRACE_4I(TRC_VMEXIT_EXCEPTION_OR_NMI,
|
||||
exception_vector, int_err_code, 2, 0);
|
||||
|
||||
return status;
|
||||
}
|
418
hypervisor/arch/x86/intr_lapic.c
Normal file
418
hypervisor/arch/x86/intr_lapic.c
Normal file
@@ -0,0 +1,418 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <bsp_extern.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
/* Rate range 1 to 1000 or 1uSec to 1mSec */
|
||||
#define APIC_TIMER_MAX 0xffffffff
|
||||
#define HYPE_PERIOD_MAX 1000
|
||||
#define APIC_DIVIDE_BY_ONE 0x0b
|
||||
#define PIT_TARGET 0x3FFF
|
||||
|
||||
/* xAPIC/x2APIC Interrupt Command Register (ICR) structure */
|
||||
union apic_icr {
|
||||
uint64_t value;
|
||||
struct {
|
||||
uint32_t lo_32;
|
||||
uint32_t hi_32;
|
||||
} value_32;
|
||||
struct {
|
||||
uint64_t vector:8;
|
||||
uint64_t delivery_mode:3;
|
||||
uint64_t destination_mode:1;
|
||||
uint64_t delivery_status:1;
|
||||
uint64_t rsvd_1:1;
|
||||
uint64_t level:1;
|
||||
uint64_t trigger_mode:1;
|
||||
uint64_t rsvd_2:2;
|
||||
uint64_t shorthand:2;
|
||||
uint64_t rsvd_3:12;
|
||||
uint64_t rsvd_4:32;
|
||||
} bits;
|
||||
struct {
|
||||
uint64_t rsvd_1:32;
|
||||
uint64_t rsvd_2:24;
|
||||
uint64_t dest_field:8;
|
||||
} x_bits;
|
||||
struct {
|
||||
uint64_t rsvd_1:32;
|
||||
uint64_t dest_field:32;
|
||||
} x2_bits;
|
||||
};
|
||||
|
||||
/* xAPIC/x2APIC Interrupt Command Register (ICR) structure */
|
||||
union apic_lvt {
|
||||
uint32_t value;
|
||||
union {
|
||||
struct {
|
||||
uint32_t vector:8;
|
||||
uint32_t rsvd_1:4;
|
||||
uint32_t delivery_status:1;
|
||||
uint32_t rsvd_2:3;
|
||||
uint32_t mask:1;
|
||||
uint32_t mode:2;
|
||||
uint32_t rsvd_3:13;
|
||||
} timer;
|
||||
struct {
|
||||
uint32_t vector:8;
|
||||
uint32_t delivery_mode:3;
|
||||
uint32_t rsvd_1:1;
|
||||
uint32_t delivery_status:1;
|
||||
uint32_t rsvd_2:3;
|
||||
uint32_t mask:1;
|
||||
uint32_t rsvd_3:15;
|
||||
} cmci;
|
||||
struct {
|
||||
uint32_t vector:8;
|
||||
uint32_t delivery_mode:3;
|
||||
uint32_t rsvd_1:1;
|
||||
uint32_t delivery_status:1;
|
||||
uint32_t polarity:1;
|
||||
uint32_t remote_irr:1;
|
||||
uint32_t trigger_mode:1;
|
||||
uint32_t mask:1;
|
||||
uint32_t rsvd_2:15;
|
||||
} lint;
|
||||
struct {
|
||||
uint32_t vector:8;
|
||||
uint32_t rsvd_1:4;
|
||||
uint32_t delivery_status:1;
|
||||
uint32_t rsvd_2:3;
|
||||
uint32_t mask:1;
|
||||
uint32_t rsvd_3:15;
|
||||
} error;
|
||||
struct {
|
||||
uint32_t vector:8;
|
||||
uint32_t delivery_mode:3;
|
||||
uint32_t rsvd_1:1;
|
||||
uint32_t delivery_status:1;
|
||||
uint32_t rsvd_2:3;
|
||||
uint32_t mask:1;
|
||||
uint32_t rsvd_3:15;
|
||||
} pmc;
|
||||
struct {
|
||||
uint32_t vector:8;
|
||||
uint32_t delivery_mode:3;
|
||||
uint32_t rsvd_1:1;
|
||||
uint32_t delivery_status:1;
|
||||
uint32_t rsvd_2:3;
|
||||
uint32_t mask:1;
|
||||
uint32_t rsvd_3:15;
|
||||
} thermal;
|
||||
struct {
|
||||
uint32_t vector:8;
|
||||
uint32_t rsvd_1:4;
|
||||
uint32_t delivery_status:1;
|
||||
uint32_t rsvd_2:3;
|
||||
uint32_t mask:1;
|
||||
uint32_t rsvd_3:15;
|
||||
} common;
|
||||
} bits;
|
||||
};
|
||||
|
||||
union lapic_base_msr {
|
||||
uint64_t value;
|
||||
struct {
|
||||
uint64_t rsvd_1:8;
|
||||
uint64_t bsp:1;
|
||||
uint64_t rsvd_2:1;
|
||||
uint64_t x2APIC_enable:1;
|
||||
uint64_t xAPIC_enable:1;
|
||||
uint64_t lapic_paddr:24;
|
||||
uint64_t rsvd_3:28;
|
||||
} fields;
|
||||
};
|
||||
|
||||
struct lapic_info {
|
||||
int init_status;
|
||||
struct {
|
||||
paddr_t paddr;
|
||||
vaddr_t vaddr;
|
||||
} xapic;
|
||||
|
||||
};
|
||||
|
||||
static struct lapic_info lapic_info;
|
||||
|
||||
static uint32_t read_lapic_reg32(uint32_t offset)
|
||||
{
|
||||
ASSERT((offset >= 0x020) && (offset <= 0x3FF), "");
|
||||
return mmio_read_long(lapic_info.xapic.vaddr + offset);
|
||||
}
|
||||
|
||||
static void write_lapic_reg32(uint32_t offset, uint32_t value)
|
||||
{
|
||||
ASSERT((offset >= 0x020) && (offset <= 0x3FF), "");
|
||||
mmio_write_long(value, lapic_info.xapic.vaddr + offset);
|
||||
}
|
||||
|
||||
static void clear_lapic_isr(void)
|
||||
{
|
||||
uint64_t isr_reg = LAPIC_IN_SERVICE_REGISTER_0;
|
||||
|
||||
/* This is a Intel recommended procedure and assures that the processor
|
||||
* does not get hung up due to already set "in-service" interrupts left
|
||||
* over from the boot loader environment. This actually occurs in real
|
||||
* life, therefore we will ensure all the in-service bits are clear.
|
||||
*/
|
||||
do {
|
||||
if (read_lapic_reg32(isr_reg)) {
|
||||
write_lapic_reg32(LAPIC_EOI_REGISTER, 0);
|
||||
continue;
|
||||
}
|
||||
isr_reg += 0x10;
|
||||
} while (isr_reg <= LAPIC_IN_SERVICE_REGISTER_7);
|
||||
}
|
||||
|
||||
static void map_lapic(void)
|
||||
{
|
||||
/* At some point we may need to translate this paddr to a vaddr. 1:1
|
||||
* mapping for now.
|
||||
*/
|
||||
lapic_info.xapic.vaddr = lapic_info.xapic.paddr;
|
||||
}
|
||||
|
||||
int early_init_lapic(void)
|
||||
{
|
||||
union lapic_base_msr lapic_base_msr;
|
||||
|
||||
/* Get local APIC base address */
|
||||
lapic_base_msr.value = msr_read(MSR_IA32_APIC_BASE);
|
||||
|
||||
/* Initialize globals only 1 time */
|
||||
if (lapic_info.init_status == false) {
|
||||
/* Get Local APIC physical address. */
|
||||
lapic_info.xapic.paddr = LAPIC_BASE;
|
||||
|
||||
/* Map in the local xAPIC */
|
||||
map_lapic();
|
||||
|
||||
lapic_info.init_status = true;
|
||||
}
|
||||
|
||||
/* Check if xAPIC mode enabled */
|
||||
if (lapic_base_msr.fields.xAPIC_enable == 0) {
|
||||
/* Ensure in xAPIC mode */
|
||||
lapic_base_msr.fields.xAPIC_enable = 1;
|
||||
lapic_base_msr.fields.x2APIC_enable = 0;
|
||||
msr_write(MSR_IA32_APIC_BASE, lapic_base_msr.value);
|
||||
} else {
|
||||
/* Check if x2apic is disabled */
|
||||
ASSERT(lapic_base_msr.fields.x2APIC_enable == 0,
|
||||
"Disable X2APIC in BIOS");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int init_lapic(uint32_t cpu_id)
|
||||
{
|
||||
/* Set the Logical Destination Register */
|
||||
write_lapic_reg32(LAPIC_LOGICAL_DESTINATION_REGISTER,
|
||||
(1 << cpu_id) << 24);
|
||||
|
||||
/* Set the Destination Format Register */
|
||||
write_lapic_reg32(LAPIC_DESTINATION_FORMAT_REGISTER, 0xf << 28);
|
||||
|
||||
/* Mask all LAPIC LVT entries before enabling the local APIC */
|
||||
write_lapic_reg32(LAPIC_LVT_CMCI_REGISTER, LAPIC_LVT_MASK);
|
||||
write_lapic_reg32(LAPIC_LVT_TIMER_REGISTER, LAPIC_LVT_MASK);
|
||||
write_lapic_reg32(LAPIC_LVT_THERMAL_SENSOR_REGISTER, LAPIC_LVT_MASK);
|
||||
write_lapic_reg32(LAPIC_LVT_PMC_REGISTER, LAPIC_LVT_MASK);
|
||||
write_lapic_reg32(LAPIC_LVT_LINT0_REGISTER, LAPIC_LVT_MASK);
|
||||
write_lapic_reg32(LAPIC_LVT_LINT1_REGISTER, LAPIC_LVT_MASK);
|
||||
write_lapic_reg32(LAPIC_LVT_ERROR_REGISTER, LAPIC_LVT_MASK);
|
||||
|
||||
/* Enable Local APIC */
|
||||
/* TODO: add spurious-interrupt handler */
|
||||
write_lapic_reg32(LAPIC_SPURIOUS_VECTOR_REGISTER,
|
||||
LAPIC_SVR_APIC_ENABLE_MASK | LAPIC_SVR_VECTOR);
|
||||
|
||||
/* Ensure there are no ISR bits set. */
|
||||
clear_lapic_isr();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int send_lapic_eoi(void)
|
||||
{
|
||||
write_lapic_reg32(LAPIC_EOI_REGISTER, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void wait_for_delivery(void)
|
||||
{
|
||||
union apic_icr tmp;
|
||||
|
||||
do {
|
||||
tmp.value_32.lo_32 =
|
||||
read_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0);
|
||||
} while (tmp.bits.delivery_status);
|
||||
}
|
||||
|
||||
uint32_t get_cur_lapic_id(void)
|
||||
{
|
||||
uint32_t lapic_id;
|
||||
|
||||
lapic_id = read_lapic_reg32(LAPIC_ID_REGISTER);
|
||||
lapic_id = (lapic_id >> 24);
|
||||
|
||||
return lapic_id;
|
||||
}
|
||||
|
||||
int
|
||||
send_startup_ipi(enum intr_cpu_startup_shorthand cpu_startup_shorthand,
|
||||
uint32_t cpu_startup_dest, paddr_t cpu_startup_start_address)
|
||||
{
|
||||
union apic_icr icr;
|
||||
uint8_t shorthand;
|
||||
int status = 0;
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
uint32_t family;
|
||||
|
||||
if (cpu_startup_shorthand >= INTR_CPU_STARTUP_UNKNOWN)
|
||||
status = -EINVAL;
|
||||
|
||||
ASSERT(status == 0, "Incorrect arguments");
|
||||
|
||||
icr.value = 0;
|
||||
icr.bits.destination_mode = INTR_LAPIC_ICR_PHYSICAL;
|
||||
|
||||
if (cpu_startup_shorthand == INTR_CPU_STARTUP_USE_DEST) {
|
||||
shorthand = INTR_LAPIC_ICR_USE_DEST_ARRAY;
|
||||
icr.x_bits.dest_field = per_cpu(lapic_id, cpu_startup_dest);
|
||||
} else { /* Use destination shorthand */
|
||||
shorthand = INTR_LAPIC_ICR_ALL_EX_SELF;
|
||||
icr.value_32.hi_32 = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* family calculation from SDM Vol. 2A
|
||||
* CPUID with INPUT EAX=01h:Returns Model, Family, Stepping Information
|
||||
*/
|
||||
cpuid(CPUID_FEATURES, &eax, &ebx, &ecx, &edx);
|
||||
family = (eax >> 8) & 0xff;
|
||||
if (family == 0xF)
|
||||
family += (eax >> 20) & 0xff;
|
||||
|
||||
/* Assert INIT IPI */
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32);
|
||||
icr.bits.shorthand = shorthand;
|
||||
icr.bits.delivery_mode = INTR_LAPIC_ICR_INIT;
|
||||
icr.bits.level = INTR_LAPIC_ICR_ASSERT;
|
||||
icr.bits.trigger_mode = INTR_LAPIC_ICR_LEVEL;
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32);
|
||||
wait_for_delivery();
|
||||
|
||||
/* Give 10ms for INIT sequence to complete for old processors.
|
||||
* Modern processors (family == 6) don't need to wait here.
|
||||
*/
|
||||
if (family != 6)
|
||||
mdelay(10);
|
||||
|
||||
/* De-assert INIT IPI */
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32);
|
||||
icr.bits.level = INTR_LAPIC_ICR_DEASSERT;
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32);
|
||||
wait_for_delivery();
|
||||
|
||||
/* Send Start IPI with page number of secondary reset code */
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32);
|
||||
icr.value_32.lo_32 = 0;
|
||||
icr.bits.shorthand = shorthand;
|
||||
icr.bits.delivery_mode = INTR_LAPIC_ICR_STARTUP;
|
||||
icr.bits.vector = ((paddr_t) cpu_startup_start_address) >> 12;
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32);
|
||||
wait_for_delivery();
|
||||
|
||||
if (family == 6) /* 10us is enough for Modern processors */
|
||||
udelay(10);
|
||||
else /* 200us for old processors */
|
||||
udelay(200);
|
||||
|
||||
/* Send another start IPI as per the Intel Arch specification */
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32);
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32);
|
||||
wait_for_delivery();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void send_single_ipi(uint32_t pcpu_id, uint32_t vector)
|
||||
{
|
||||
uint32_t dest_lapic_id, hi_32, lo_32;
|
||||
|
||||
/* Get the lapic ID of the destination processor. */
|
||||
dest_lapic_id = per_cpu(lapic_id, pcpu_id);
|
||||
|
||||
/* Set the target processor. */
|
||||
hi_32 = dest_lapic_id << 24;
|
||||
|
||||
/* Set the vector ID. */
|
||||
lo_32 = vector;
|
||||
|
||||
/* Set the destination field to the target processor. */
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, hi_32);
|
||||
|
||||
/* Write the vector ID to ICR. */
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, lo_32);
|
||||
|
||||
wait_for_delivery();
|
||||
}
|
||||
|
||||
int send_shorthand_ipi(uint8_t vector,
|
||||
enum intr_lapic_icr_shorthand shorthand,
|
||||
enum intr_lapic_icr_delivery_mode delivery_mode)
|
||||
{
|
||||
union apic_icr icr;
|
||||
int status = 0;
|
||||
|
||||
if ((shorthand < INTR_LAPIC_ICR_SELF)
|
||||
|| (shorthand > INTR_LAPIC_ICR_ALL_EX_SELF)
|
||||
|| (delivery_mode > INTR_LAPIC_ICR_NMI))
|
||||
status = -EINVAL;
|
||||
|
||||
ASSERT(status == 0, "Incorrect arguments");
|
||||
|
||||
icr.value = 0;
|
||||
icr.bits.shorthand = shorthand;
|
||||
icr.bits.delivery_mode = delivery_mode;
|
||||
icr.bits.vector = vector;
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_1, icr.value_32.hi_32);
|
||||
write_lapic_reg32(LAPIC_INT_COMMAND_REGISTER_0, icr.value_32.lo_32);
|
||||
wait_for_delivery();
|
||||
|
||||
return status;
|
||||
}
|
57
hypervisor/arch/x86/intr_main.c
Normal file
57
hypervisor/arch/x86/intr_main.c
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
int interrupt_init(uint32_t cpu_id)
|
||||
{
|
||||
struct host_idt_descriptor *idtd = &HOST_IDTR;
|
||||
int status;
|
||||
|
||||
set_idt(idtd);
|
||||
|
||||
status = init_lapic(cpu_id);
|
||||
ASSERT(status == 0, "lapic init failed");
|
||||
if (status != 0)
|
||||
return -ENODEV;
|
||||
|
||||
status = init_default_irqs(cpu_id);
|
||||
ASSERT(status == 0, "irqs init failed");
|
||||
if (status != 0)
|
||||
return -ENODEV;
|
||||
|
||||
CPU_IRQ_ENABLE();
|
||||
|
||||
return status;
|
||||
}
|
292
hypervisor/arch/x86/io.c
Normal file
292
hypervisor/arch/x86/io.c
Normal file
@@ -0,0 +1,292 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
#include <hypercall.h>
|
||||
|
||||
int dm_emulate_pio_post(struct vcpu *vcpu)
|
||||
{
|
||||
int cur = vcpu->vcpu_id;
|
||||
int cur_context = vcpu->arch_vcpu.cur_context;
|
||||
struct vhm_request_buffer *req_buf =
|
||||
(void *)HPA2HVA(vcpu->vm->sw.req_buf);
|
||||
uint32_t mask =
|
||||
0xFFFFFFFFul >> (32 - 8 * vcpu->req.reqs.pio_request.size);
|
||||
uint64_t *rax;
|
||||
|
||||
ASSERT(cur_context == 0, "pio emulation only happen in normal wrold");
|
||||
|
||||
rax = &vcpu->arch_vcpu.contexts[cur_context].guest_cpu_regs.regs.rax;
|
||||
vcpu->req.reqs.pio_request.value =
|
||||
req_buf->req_queue[cur].reqs.pio_request.value;
|
||||
|
||||
/* VHM emulation data already copy to req, mark to free slot now */
|
||||
req_buf->req_queue[cur].valid = false;
|
||||
|
||||
if (req_buf->req_queue[cur].processed != REQ_STATE_SUCCESS)
|
||||
return -1;
|
||||
|
||||
if (vcpu->req.reqs.pio_request.direction == REQUEST_READ)
|
||||
*rax = ((*rax) & ~mask) |
|
||||
(vcpu->req.reqs.pio_request.value & mask);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dm_emulate_pio_pre(struct vcpu *vcpu, uint64_t exit_qual,
|
||||
uint32_t sz, uint64_t req_value)
|
||||
{
|
||||
vcpu->req.type = REQ_PORTIO;
|
||||
if (VM_EXIT_IO_INSTRUCTION_ACCESS_DIRECTION(exit_qual))
|
||||
vcpu->req.reqs.pio_request.direction = REQUEST_READ;
|
||||
else
|
||||
vcpu->req.reqs.pio_request.direction = REQUEST_WRITE;
|
||||
|
||||
vcpu->req.reqs.pio_request.address =
|
||||
VM_EXIT_IO_INSTRUCTION_PORT_NUMBER(exit_qual);
|
||||
vcpu->req.reqs.pio_request.size = sz;
|
||||
vcpu->req.reqs.pio_request.value = req_value;
|
||||
}
|
||||
|
||||
int io_instr_handler(struct vcpu *vcpu)
|
||||
{
|
||||
uint32_t sz;
|
||||
uint32_t mask;
|
||||
uint32_t port;
|
||||
int8_t direction;
|
||||
struct vm_io_handler *handler;
|
||||
uint64_t exit_qual;
|
||||
struct vm *vm = vcpu->vm;
|
||||
int cur_context_idx = vcpu->arch_vcpu.cur_context;
|
||||
struct run_context *cur_context;
|
||||
int status = -EINVAL;
|
||||
|
||||
ASSERT(cur_context_idx == 0,
|
||||
"pio emulation only happen in normal wrold");
|
||||
|
||||
cur_context = &vcpu->arch_vcpu.contexts[cur_context_idx];
|
||||
exit_qual = vcpu->arch_vcpu.exit_qualification;
|
||||
|
||||
sz = VM_EXIT_IO_INSTRUCTION_SIZE(exit_qual) + 1;
|
||||
port = VM_EXIT_IO_INSTRUCTION_PORT_NUMBER(exit_qual);
|
||||
direction = VM_EXIT_IO_INSTRUCTION_ACCESS_DIRECTION(exit_qual);
|
||||
mask = 0xfffffffful >> (32 - 8 * sz);
|
||||
|
||||
memset(&vcpu->req, 0, sizeof(struct vhm_request));
|
||||
|
||||
TRACE_4I(TRC_VMEXIT_IO_INSTRUCTION, port, direction, sz,
|
||||
cur_context_idx);
|
||||
|
||||
for (handler = vm->arch_vm.io_handler;
|
||||
handler; handler = handler->next) {
|
||||
|
||||
if ((port >= handler->desc.addr + handler->desc.len) ||
|
||||
(port + sz <= handler->desc.addr))
|
||||
continue;
|
||||
|
||||
/* Dom0 do not require IO emulation */
|
||||
if (is_vm0(vm))
|
||||
status = 0;
|
||||
|
||||
if (direction == 0) {
|
||||
if (handler->desc.io_write == NULL)
|
||||
continue;
|
||||
|
||||
handler->desc.io_write(handler, vm, port, sz,
|
||||
cur_context->guest_cpu_regs.regs.rax);
|
||||
|
||||
pr_dbg("IO write on port %04x, data %08x", port,
|
||||
cur_context->guest_cpu_regs.regs.rax & mask);
|
||||
|
||||
status = 0;
|
||||
break;
|
||||
} else if (handler->desc.io_read) {
|
||||
uint32_t data = handler->desc.io_read(handler, vm,
|
||||
port, sz);
|
||||
|
||||
cur_context->guest_cpu_regs.regs.rax &= ~mask;
|
||||
cur_context->guest_cpu_regs.regs.rax |= data & mask;
|
||||
|
||||
pr_dbg("IO read on port %04x, data %08x", port, data);
|
||||
|
||||
status = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Go for VHM */
|
||||
if (status != 0) {
|
||||
uint64_t *rax = &cur_context->guest_cpu_regs.regs.rax;
|
||||
|
||||
dm_emulate_pio_pre(vcpu, exit_qual, sz, *rax);
|
||||
status = acrn_insert_request_wait(vcpu, &vcpu->req);
|
||||
}
|
||||
|
||||
if (status != 0) {
|
||||
pr_fatal("IO %s access to port 0x%04x, size=%u",
|
||||
direction ? "read" : "write", port, sz);
|
||||
|
||||
}
|
||||
|
||||
/* Catch any problems */
|
||||
ASSERT(status == 0, "Invalid IO access");
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void register_io_handler(struct vm *vm, struct vm_io_handler *hdlr)
|
||||
{
|
||||
if (vm->arch_vm.io_handler)
|
||||
hdlr->next = vm->arch_vm.io_handler;
|
||||
|
||||
vm->arch_vm.io_handler = hdlr;
|
||||
}
|
||||
|
||||
static void empty_io_handler_list(struct vm *vm)
|
||||
{
|
||||
struct vm_io_handler *handler = vm->arch_vm.io_handler;
|
||||
struct vm_io_handler *tmp;
|
||||
|
||||
while (handler) {
|
||||
tmp = handler;
|
||||
handler = tmp->next;
|
||||
free(tmp);
|
||||
}
|
||||
vm->arch_vm.io_handler = NULL;
|
||||
}
|
||||
|
||||
void free_io_emulation_resource(struct vm *vm)
|
||||
{
|
||||
empty_io_handler_list(vm);
|
||||
|
||||
/* Free I/O emulation bitmaps */
|
||||
free(vm->arch_vm.iobitmap[0]);
|
||||
free(vm->arch_vm.iobitmap[1]);
|
||||
}
|
||||
|
||||
static void deny_guest_io_access(struct vm *vm, uint32_t address, uint32_t nbytes)
|
||||
{
|
||||
uint32_t *b;
|
||||
uint32_t i;
|
||||
uint32_t a;
|
||||
|
||||
for (i = 0; i < nbytes; i++) {
|
||||
b = vm->arch_vm.iobitmap[0];
|
||||
if (address & 0x8000)
|
||||
b = vm->arch_vm.iobitmap[1];
|
||||
a = address & 0x7fff;
|
||||
b[a >> 5] |= (1 << (a & 0x1f));
|
||||
address++;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
default_io_read(__unused struct vm_io_handler *hdlr, __unused struct vm *vm,
|
||||
ioport_t address, size_t width)
|
||||
{
|
||||
uint32_t v = io_read(address, width);
|
||||
return v;
|
||||
}
|
||||
|
||||
static void default_io_write(__unused struct vm_io_handler *hdlr,
|
||||
__unused struct vm *vm, ioport_t addr,
|
||||
size_t width, uint32_t v)
|
||||
{
|
||||
io_write(v, addr, width);
|
||||
}
|
||||
|
||||
static struct vm_io_handler *create_io_handler(uint32_t port, uint32_t len,
|
||||
io_read_fn_t io_read_fn_ptr,
|
||||
io_write_fn_t io_write_fn_ptr)
|
||||
{
|
||||
|
||||
struct vm_io_handler *handler;
|
||||
|
||||
handler = calloc(1, sizeof(struct vm_io_handler));
|
||||
|
||||
if (handler != NULL) {
|
||||
handler->desc.addr = port;
|
||||
handler->desc.len = len;
|
||||
handler->desc.io_read = io_read_fn_ptr;
|
||||
handler->desc.io_write = io_write_fn_ptr;
|
||||
} else {
|
||||
pr_err("Error: out of memory");
|
||||
}
|
||||
|
||||
return handler;
|
||||
}
|
||||
|
||||
void setup_io_bitmap(struct vm *vm)
|
||||
{
|
||||
/* Allocate VM architecture state and IO bitmaps A and B */
|
||||
vm->arch_vm.iobitmap[0] = alloc_page();
|
||||
vm->arch_vm.iobitmap[1] = alloc_page();
|
||||
|
||||
ASSERT(vm->arch_vm.iobitmap[0] && vm->arch_vm.iobitmap[1], "");
|
||||
|
||||
if (is_vm0(vm)) {
|
||||
memset(vm->arch_vm.iobitmap[0], 0x00, CPU_PAGE_SIZE);
|
||||
memset(vm->arch_vm.iobitmap[1], 0x00, CPU_PAGE_SIZE);
|
||||
} else {
|
||||
/* block all IO port access from Guest */
|
||||
memset(vm->arch_vm.iobitmap[0], 0xFF, CPU_PAGE_SIZE);
|
||||
memset(vm->arch_vm.iobitmap[1], 0xFF, CPU_PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
void register_io_emulation_handler(struct vm *vm, struct vm_io_range *range,
|
||||
io_read_fn_t io_read_fn_ptr,
|
||||
io_write_fn_t io_write_fn_ptr)
|
||||
{
|
||||
struct vm_io_handler *handler = NULL;
|
||||
io_read_fn_t io_read_fn = &default_io_read;
|
||||
io_write_fn_t io_write_fn = &default_io_write;
|
||||
|
||||
if (range->flags == IO_ATTR_RW && io_read_fn_ptr && io_write_fn_ptr) {
|
||||
io_read_fn = io_read_fn_ptr;
|
||||
io_write_fn = io_write_fn_ptr;
|
||||
} else if (range->flags == IO_ATTR_R) {
|
||||
if (io_read_fn_ptr)
|
||||
io_read_fn = io_read_fn_ptr;
|
||||
io_write_fn = NULL;
|
||||
}
|
||||
|
||||
if (is_vm0(vm))
|
||||
deny_guest_io_access(vm, range->base, range->len);
|
||||
|
||||
handler = create_io_handler(range->base,
|
||||
range->len, io_read_fn, io_write_fn);
|
||||
|
||||
register_io_handler(vm, handler);
|
||||
}
|
439
hypervisor/arch/x86/ioapic.c
Normal file
439
hypervisor/arch/x86/ioapic.c
Normal file
@@ -0,0 +1,439 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
/* Register offsets */
|
||||
#define IOAPIC_REGSEL_OFFSET 0
|
||||
#define IOAPIC_WINSWL_OFFSET 0x10
|
||||
|
||||
/* IOAPIC Redirection Table (RTE) Entry structure */
|
||||
struct ioapic_rte {
|
||||
uint32_t lo_32;
|
||||
uint32_t hi_32;
|
||||
} ioapic_rte;
|
||||
|
||||
struct gsi_table {
|
||||
uint8_t ioapic_id;
|
||||
uint8_t pin;
|
||||
uint64_t addr;
|
||||
};
|
||||
static struct gsi_table gsi_table[NR_MAX_GSI];
|
||||
static int nr_gsi;
|
||||
static spinlock_t ioapic_lock;
|
||||
|
||||
/*
|
||||
* the irq to ioapic pin mapping should extract from ACPI MADT table
|
||||
* hardcoded here
|
||||
*/
|
||||
uint16_t legacy_irq_to_pin[NR_LEGACY_IRQ] = {
|
||||
2, /* IRQ0*/
|
||||
1, /* IRQ1*/
|
||||
0, /* IRQ2 connected to Pin0 (ExtInt source of PIC) if existing */
|
||||
3, /* IRQ3*/
|
||||
4, /* IRQ4*/
|
||||
5, /* IRQ5*/
|
||||
6, /* IRQ6*/
|
||||
7, /* IRQ7*/
|
||||
8, /* IRQ8*/
|
||||
9 | IOAPIC_RTE_TRGRLVL, /* IRQ9*/
|
||||
10, /* IRQ10*/
|
||||
11, /* IRQ11*/
|
||||
12, /* IRQ12*/
|
||||
13, /* IRQ13*/
|
||||
14, /* IRQ14*/
|
||||
15, /* IRQ15*/
|
||||
};
|
||||
|
||||
static uint64_t map_ioapic(
|
||||
uint64_t ioapic_paddr)
|
||||
{
|
||||
/* At some point we may need to translate this paddr to a vaddr.
|
||||
* 1:1 mapping for now.
|
||||
*/
|
||||
return (vaddr_t) ioapic_paddr;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
ioapic_read_reg32(const uint64_t ioapic_base, const uint8_t offset)
|
||||
{
|
||||
uint32_t v;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
spinlock_irqsave_obtain(&ioapic_lock);
|
||||
|
||||
/* Write IOREGSEL */
|
||||
*(uint32_t *)(ioapic_base) = offset;
|
||||
/* Read IOWIN */
|
||||
v = *(uint32_t *)(ioapic_base + IOAPIC_WINSWL_OFFSET);
|
||||
|
||||
spinlock_irqrestore_release(&ioapic_lock);
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline void
|
||||
ioapic_write_reg32(const uint64_t ioapic_base,
|
||||
const uint8_t offset, const uint32_t value)
|
||||
{
|
||||
spinlock_rflags;
|
||||
|
||||
spinlock_irqsave_obtain(&ioapic_lock);
|
||||
|
||||
/* Write IOREGSEL */
|
||||
*(uint32_t *)(ioapic_base) = offset;
|
||||
/* Write IOWIN */
|
||||
*(uint32_t *)(ioapic_base + IOAPIC_WINSWL_OFFSET) = value;
|
||||
|
||||
spinlock_irqrestore_release(&ioapic_lock);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
get_ioapic_base(int apic_id)
|
||||
{
|
||||
uint64_t addr = -1UL;
|
||||
|
||||
/* should extract next ioapic from ACPI MADT table */
|
||||
if (apic_id == 0)
|
||||
addr = DEFAULT_IO_APIC_BASE;
|
||||
else if (apic_id == 1)
|
||||
addr = 0xfec3f000;
|
||||
else if (apic_id == 2)
|
||||
addr = 0xfec7f000;
|
||||
else
|
||||
ASSERT(apic_id <= 2, "ACPI MADT table missing");
|
||||
return addr;
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ioapic_get_rte_entry(uint64_t ioapic_addr,
|
||||
int pin, struct ioapic_rte *rte)
|
||||
{
|
||||
rte->lo_32 = ioapic_read_reg32(ioapic_addr, pin*2 + 0x10);
|
||||
rte->hi_32 = ioapic_read_reg32(ioapic_addr, pin*2 + 0x11);
|
||||
}
|
||||
|
||||
static inline void
|
||||
ioapic_set_rte_entry(uint64_t ioapic_addr,
|
||||
int pin, struct ioapic_rte *rte)
|
||||
{
|
||||
ioapic_write_reg32(ioapic_addr, pin*2 + 0x10, rte->lo_32);
|
||||
ioapic_write_reg32(ioapic_addr, pin*2 + 0x11, rte->hi_32);
|
||||
}
|
||||
|
||||
static inline struct ioapic_rte
|
||||
create_rte_for_legacy_irq(int irq, int vr)
|
||||
{
|
||||
struct ioapic_rte rte = {0, 0};
|
||||
|
||||
/* Legacy IRQ 0-15 setup, default masked
|
||||
* are actually defined in either MPTable or ACPI MADT table
|
||||
* before we have ACPI table parsing in HV we use common hardcode
|
||||
*/
|
||||
|
||||
rte.lo_32 |= IOAPIC_RTE_INTMSET;
|
||||
rte.lo_32 |= (legacy_irq_to_pin[irq] & IOAPIC_RTE_TRGRLVL);
|
||||
rte.lo_32 |= DEFAULT_DEST_MODE;
|
||||
rte.lo_32 |= DEFAULT_DELIVERY_MODE;
|
||||
rte.lo_32 |= (IOAPIC_RTE_INTVEC & vr);
|
||||
|
||||
/* FIXME: Fixed to active Low? */
|
||||
rte.lo_32 |= IOAPIC_RTE_INTALO;
|
||||
|
||||
/* Dest field: legacy irq fixed to CPU0 */
|
||||
rte.hi_32 |= 1 << 24;
|
||||
|
||||
return rte;
|
||||
}
|
||||
|
||||
static inline struct ioapic_rte
|
||||
create_rte_for_gsi_irq(int irq, int vr)
|
||||
{
|
||||
struct ioapic_rte rte = {0, 0};
|
||||
|
||||
if (irq < NR_LEGACY_IRQ)
|
||||
return create_rte_for_legacy_irq(irq, vr);
|
||||
|
||||
/* irq default masked, level trig */
|
||||
rte.lo_32 |= IOAPIC_RTE_INTMSET;
|
||||
rte.lo_32 |= IOAPIC_RTE_TRGRLVL;
|
||||
rte.lo_32 |= DEFAULT_DEST_MODE;
|
||||
rte.lo_32 |= DEFAULT_DELIVERY_MODE;
|
||||
rte.lo_32 |= (IOAPIC_RTE_INTVEC & vr);
|
||||
|
||||
/* FIXME: Fixed to active Low? */
|
||||
rte.lo_32 |= IOAPIC_RTE_INTALO;
|
||||
|
||||
/* Dest field */
|
||||
rte.hi_32 |= ALL_CPUS_MASK << 24;
|
||||
|
||||
return rte;
|
||||
}
|
||||
|
||||
static void ioapic_set_routing(int gsi, int vr)
|
||||
{
|
||||
uint64_t addr;
|
||||
struct ioapic_rte rte;
|
||||
|
||||
addr = gsi_table[gsi].addr;
|
||||
rte = create_rte_for_gsi_irq(gsi, vr);
|
||||
ioapic_set_rte_entry(addr, gsi_table[gsi].pin, &rte);
|
||||
|
||||
if (rte.lo_32 & IOAPIC_RTE_TRGRMOD)
|
||||
update_irq_handler(gsi, handle_level_interrupt_common);
|
||||
else
|
||||
update_irq_handler(gsi, common_handler_edge);
|
||||
|
||||
dev_dbg(ACRN_DBG_IRQ, "GSI: irq:%d pin:%d rte:%x",
|
||||
gsi, gsi_table[gsi].pin,
|
||||
rte.lo_32);
|
||||
}
|
||||
|
||||
void ioapic_get_rte(int irq, uint64_t *rte)
|
||||
{
|
||||
uint64_t addr;
|
||||
struct ioapic_rte _rte;
|
||||
|
||||
if (!irq_is_gsi(irq))
|
||||
return;
|
||||
|
||||
addr = gsi_table[irq].addr;
|
||||
ioapic_get_rte_entry(addr, gsi_table[irq].pin, &_rte);
|
||||
|
||||
*rte = _rte.hi_32;
|
||||
*rte = *rte << 32 | _rte.lo_32;
|
||||
}
|
||||
|
||||
void ioapic_set_rte(int irq, uint64_t raw_rte)
|
||||
{
|
||||
uint64_t addr;
|
||||
struct ioapic_rte rte;
|
||||
|
||||
if (!irq_is_gsi(irq))
|
||||
return;
|
||||
|
||||
addr = gsi_table[irq].addr;
|
||||
rte.lo_32 = raw_rte;
|
||||
rte.hi_32 = raw_rte >> 32;
|
||||
ioapic_set_rte_entry(addr, gsi_table[irq].pin, &rte);
|
||||
|
||||
dev_dbg(ACRN_DBG_IRQ, "GSI: irq:%d pin:%d rte:%x",
|
||||
irq, gsi_table[irq].pin,
|
||||
rte.lo_32);
|
||||
}
|
||||
|
||||
int irq_gsi_num(void)
|
||||
{
|
||||
return nr_gsi;
|
||||
}
|
||||
|
||||
bool irq_is_gsi(int irq)
|
||||
{
|
||||
return irq < nr_gsi;
|
||||
}
|
||||
|
||||
int irq_to_pin(int irq)
|
||||
{
|
||||
if (irq_is_gsi(irq))
|
||||
return gsi_table[irq].pin;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
int pin_to_irq(int pin)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (pin < 0)
|
||||
return IRQ_INVALID;
|
||||
|
||||
for (i = 0; i < nr_gsi; i++) {
|
||||
if (gsi_table[i].pin == (uint8_t) pin)
|
||||
return i;
|
||||
}
|
||||
return IRQ_INVALID;
|
||||
}
|
||||
|
||||
void
|
||||
irq_gsi_mask_unmask(int irq, bool mask)
|
||||
{
|
||||
uint64_t addr = gsi_table[irq].addr;
|
||||
int pin = gsi_table[irq].pin;
|
||||
struct ioapic_rte rte;
|
||||
|
||||
if (!irq_is_gsi(irq))
|
||||
return;
|
||||
|
||||
ioapic_get_rte_entry(addr, pin, &rte);
|
||||
if (mask)
|
||||
rte.lo_32 |= IOAPIC_RTE_INTMSET;
|
||||
else
|
||||
rte.lo_32 &= ~IOAPIC_RTE_INTMASK;
|
||||
ioapic_set_rte_entry(addr, pin, &rte);
|
||||
dev_dbg(ACRN_DBG_PTIRQ, "update: irq:%d pin:%d rte:%x",
|
||||
irq, pin, rte.lo_32);
|
||||
}
|
||||
|
||||
void setup_ioapic_irq(void)
|
||||
{
|
||||
int ioapic_id;
|
||||
int gsi;
|
||||
int vr;
|
||||
|
||||
spinlock_init(&ioapic_lock);
|
||||
|
||||
for (ioapic_id = 0, gsi = 0; ioapic_id < NR_IOAPICS; ioapic_id++) {
|
||||
int pin;
|
||||
int max_pins;
|
||||
int version;
|
||||
uint64_t addr;
|
||||
|
||||
addr = map_ioapic(get_ioapic_base(ioapic_id));
|
||||
version = ioapic_read_reg32(addr, IOAPIC_VER);
|
||||
max_pins = (version & IOAPIC_MAX_RTE_MASK) >> MAX_RTE_SHIFT;
|
||||
dev_dbg(ACRN_DBG_IRQ, "IOAPIC version: %x", version);
|
||||
ASSERT(max_pins > NR_LEGACY_IRQ,
|
||||
"Legacy IRQ num > total GSI");
|
||||
|
||||
for (pin = 0; pin < max_pins; pin++) {
|
||||
gsi_table[gsi].ioapic_id = ioapic_id;
|
||||
gsi_table[gsi].addr = addr;
|
||||
|
||||
if (gsi < NR_LEGACY_IRQ)
|
||||
gsi_table[gsi].pin =
|
||||
legacy_irq_to_pin[gsi] & 0xff;
|
||||
else
|
||||
gsi_table[gsi].pin = pin;
|
||||
|
||||
/* pinned irq before use it */
|
||||
if (irq_mark_used(gsi) < 0) {
|
||||
pr_err("failed to alloc IRQ[%d]", gsi);
|
||||
gsi++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* assign vector for this GSI
|
||||
* for legacy irq, reserved vector and never free
|
||||
*/
|
||||
if (gsi < NR_LEGACY_IRQ) {
|
||||
vr = irq_desc_alloc_vector(gsi, false);
|
||||
if (vr < 0) {
|
||||
pr_err("failed to alloc VR");
|
||||
gsi++;
|
||||
continue;
|
||||
}
|
||||
} else
|
||||
vr = 0; /* not to allocate VR right now */
|
||||
|
||||
ioapic_set_routing(gsi, vr);
|
||||
gsi++;
|
||||
}
|
||||
}
|
||||
|
||||
/* system max gsi numbers */
|
||||
nr_gsi = gsi;
|
||||
ASSERT(nr_gsi < NR_MAX_GSI, "GSI table overflow");
|
||||
}
|
||||
|
||||
void dump_ioapic(void)
|
||||
{
|
||||
int irq;
|
||||
|
||||
for (irq = 0; irq < nr_gsi; irq++) {
|
||||
uint64_t addr = gsi_table[irq].addr;
|
||||
int pin = gsi_table[irq].pin;
|
||||
struct ioapic_rte rte;
|
||||
|
||||
ioapic_get_rte_entry(addr, pin, &rte);
|
||||
dev_dbg(ACRN_DBG_IRQ, "DUMP: irq:%d pin:%d rte:%x",
|
||||
irq, pin, rte.lo_32);
|
||||
}
|
||||
}
|
||||
|
||||
void get_rte_info(struct ioapic_rte *rte, bool *mask, bool *irr,
|
||||
bool *phys, int *delmode, bool *level, int *vector, uint32_t *dest)
|
||||
{
|
||||
*mask = ((rte->lo_32 & IOAPIC_RTE_INTMASK) == IOAPIC_RTE_INTMSET);
|
||||
*irr = ((rte->lo_32 & IOAPIC_RTE_REM_IRR) == IOAPIC_RTE_REM_IRR);
|
||||
*phys = ((rte->lo_32 & IOAPIC_RTE_DESTMOD) == IOAPIC_RTE_DESTPHY);
|
||||
*delmode = rte->lo_32 & IOAPIC_RTE_DELMOD;
|
||||
*level = rte->lo_32 & IOAPIC_RTE_TRGRLVL ? true : false;
|
||||
*vector = rte->lo_32 & IOAPIC_RTE_INTVEC;
|
||||
*dest = rte->hi_32 >> APIC_ID_SHIFT;
|
||||
}
|
||||
|
||||
int get_ioapic_info(char *str, int str_max_len)
|
||||
{
|
||||
int irq, len, size = str_max_len;
|
||||
|
||||
len = snprintf(str, size,
|
||||
"\r\nIRQ\tPIN\tRTE.HI32\tRTE.LO32\tVEC\tDST\tDM\tTM\tDELM\tIRR\tMASK");
|
||||
size -= len;
|
||||
str += len;
|
||||
|
||||
for (irq = 0; irq < nr_gsi; irq++) {
|
||||
uint64_t addr = gsi_table[irq].addr;
|
||||
int pin = gsi_table[irq].pin;
|
||||
struct ioapic_rte rte;
|
||||
|
||||
bool irr, phys, level, mask;
|
||||
int delmode, vector;
|
||||
uint32_t dest;
|
||||
|
||||
ioapic_get_rte_entry(addr, pin, &rte);
|
||||
|
||||
get_rte_info(&rte, &mask, &irr, &phys, &delmode, &level,
|
||||
&vector, &dest);
|
||||
|
||||
len = snprintf(str, size, "\r\n%03d\t%03d\t0x%08X\t0x%08X\t",
|
||||
irq, pin, rte.hi_32, rte.lo_32);
|
||||
size -= len;
|
||||
str += len;
|
||||
|
||||
len = snprintf(str, size, "0x%02X\t0x%02X\t%s\t%s\t%d\t%d\t%d",
|
||||
vector, dest, phys ? "phys" : "logic",
|
||||
level ? "level" : "edge", delmode >> 8, irr, mask);
|
||||
size -= len;
|
||||
str += len;
|
||||
|
||||
if (size < 2) {
|
||||
pr_err("\r\nsmall buffer for ioapic dump");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
snprintf(str, size, "\r\n");
|
||||
return 0;
|
||||
}
|
761
hypervisor/arch/x86/irq.c
Normal file
761
hypervisor/arch/x86/irq.c
Normal file
@@ -0,0 +1,761 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
static spinlock_t exception_spinlock = { .head = 0, .tail = 0, };
|
||||
|
||||
struct irq_request_info {
|
||||
/* vector set to 0xE0 ~ 0xFF for pri_register_handler
|
||||
* and set to -1 for normal_register_handler
|
||||
*/
|
||||
int vector;
|
||||
dev_handler_t func;
|
||||
void *dev_data;
|
||||
bool share;
|
||||
bool lowpri;
|
||||
char *name;
|
||||
};
|
||||
|
||||
/* any field change in below required irq_lock protection with irqsave */
|
||||
struct irq_desc {
|
||||
int irq; /* index to irq_desc_base */
|
||||
enum irq_state used; /* this irq have assigned to device */
|
||||
enum irq_desc_state state; /* irq_desc status */
|
||||
int vector; /* assigned vector */
|
||||
void *handler_data; /* irq_handler private data */
|
||||
int (*irq_handler)(struct irq_desc *irq_desc, void *handler_data);
|
||||
struct dev_handler_node *dev_list;
|
||||
spinlock_t irq_lock;
|
||||
uint64_t *irq_cnt; /* this irq cnt happened on CPUs */
|
||||
uint64_t irq_lost_cnt;
|
||||
};
|
||||
|
||||
static struct irq_desc *irq_desc_base;
|
||||
static int vector_to_irq[NR_MAX_VECTOR + 1];
|
||||
|
||||
static DEFINE_CPU_DATA(uint64_t[NR_MAX_IRQS], irq_count);
|
||||
static DEFINE_CPU_DATA(uint64_t, spurious);
|
||||
|
||||
spurious_handler_t spurious_handler;
|
||||
|
||||
static void init_irq_desc(void)
|
||||
{
|
||||
int i, page_num = 0;
|
||||
int desc_size = NR_MAX_IRQS * sizeof(struct irq_desc);
|
||||
|
||||
page_num = (desc_size + CPU_PAGE_SIZE-1) >> CPU_PAGE_SHIFT;
|
||||
|
||||
irq_desc_base = alloc_pages(page_num);
|
||||
|
||||
ASSERT(irq_desc_base, "page alloc failed!");
|
||||
memset(irq_desc_base, 0, page_num * CPU_PAGE_SIZE);
|
||||
|
||||
for (i = 0; i < NR_MAX_IRQS; i++) {
|
||||
irq_desc_base[i].irq = i;
|
||||
irq_desc_base[i].vector = VECTOR_INVALID;
|
||||
spinlock_init(&irq_desc_base[i].irq_lock);
|
||||
}
|
||||
|
||||
for (i = 0; i <= NR_MAX_VECTOR; i++)
|
||||
vector_to_irq[i] = IRQ_INVALID;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* alloc vector 0x20-0xDF for irq
|
||||
* lowpri: 0x20-0x7F
|
||||
* highpri: 0x80-0xDF
|
||||
*/
|
||||
static int find_available_vector(bool lowpri)
|
||||
{
|
||||
int i, start, end;
|
||||
|
||||
if (lowpri) {
|
||||
start = VECTOR_FOR_NOR_LOWPRI_START;
|
||||
end = VECTOR_FOR_NOR_LOWPRI_END;
|
||||
} else {
|
||||
start = VECTOR_FOR_NOR_HIGHPRI_START;
|
||||
end = VECTOR_FOR_NOR_HIGHPRI_END;
|
||||
}
|
||||
|
||||
/* TODO: vector lock required */
|
||||
for (i = start; i < end; i++) {
|
||||
if (vector_to_irq[i] == IRQ_INVALID)
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* check and set irq to be assigned
|
||||
* return: -1 if irq already assigned otherwise return irq
|
||||
*/
|
||||
int irq_mark_used(int irq)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
if (irq < 0)
|
||||
return -1;
|
||||
|
||||
desc = irq_desc_base + irq;
|
||||
spinlock_irqsave_obtain(&desc->irq_lock);
|
||||
if (desc->used == IRQ_NOT_ASSIGNED)
|
||||
desc->used = IRQ_ASSIGNED_NOSHARE;
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
return irq;
|
||||
}
|
||||
|
||||
/*
|
||||
* system find available irq and set assigned
|
||||
* return: irq, -1 not found
|
||||
*/
|
||||
static int alloc_irq(void)
|
||||
{
|
||||
int i;
|
||||
struct irq_desc *desc;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
for (i = irq_gsi_num(); i < NR_MAX_IRQS; i++) {
|
||||
desc = irq_desc_base + i;
|
||||
spinlock_irqsave_obtain(&desc->irq_lock);
|
||||
if (desc->used == IRQ_NOT_ASSIGNED) {
|
||||
desc->used = IRQ_ASSIGNED_NOSHARE;
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
break;
|
||||
}
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
}
|
||||
return (i == NR_MAX_IRQS) ? -1:i;
|
||||
}
|
||||
|
||||
/* need irq_lock protection before use */
|
||||
static void _irq_desc_set_vector(int irq, int vr)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
|
||||
desc = irq_desc_base + irq;
|
||||
vector_to_irq[vr] = irq;
|
||||
desc->vector = vr;
|
||||
}
|
||||
|
||||
/* lock version of set vector */
|
||||
static void irq_desc_set_vector(int irq, int vr)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
desc = irq_desc_base + irq;
|
||||
spinlock_irqsave_obtain(&desc->irq_lock);
|
||||
vector_to_irq[vr] = irq;
|
||||
desc->vector = vr;
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
}
|
||||
|
||||
/* used with holding irq_lock outside */
|
||||
static void _irq_desc_free_vector(int irq)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
int vr;
|
||||
|
||||
if (irq > NR_MAX_IRQS || irq < 0)
|
||||
return;
|
||||
|
||||
desc = irq_desc_base + irq;
|
||||
|
||||
vr = desc->vector;
|
||||
desc->used = IRQ_NOT_ASSIGNED;
|
||||
desc->state = IRQ_DESC_PENDING;
|
||||
desc->vector = VECTOR_INVALID;
|
||||
|
||||
vr &= NR_MAX_VECTOR;
|
||||
if (vector_to_irq[vr] == irq)
|
||||
vector_to_irq[vr] = IRQ_INVALID;
|
||||
}
|
||||
|
||||
static void disable_pic_irq(void)
|
||||
{
|
||||
io_write_byte(0xff, 0xA1);
|
||||
io_write_byte(0xff, 0x21);
|
||||
}
|
||||
|
||||
static bool
|
||||
irq_desc_append_dev(struct irq_desc *desc, void *node, bool share)
|
||||
{
|
||||
struct dev_handler_node *dev_list;
|
||||
bool added = true;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
spinlock_irqsave_obtain(&desc->irq_lock);
|
||||
dev_list = desc->dev_list;
|
||||
|
||||
/* assign if first node */
|
||||
if (dev_list == NULL) {
|
||||
desc->dev_list = node;
|
||||
desc->used = (share)?IRQ_ASSIGNED_SHARED:IRQ_ASSIGNED_NOSHARE;
|
||||
|
||||
/* Only GSI possible for Level and it already init during
|
||||
* ioapic setup.
|
||||
* caller can later update it with update_irq_handler()
|
||||
*/
|
||||
if (!desc->irq_handler)
|
||||
desc->irq_handler = common_handler_edge;
|
||||
} else if (!share || desc->used == IRQ_ASSIGNED_NOSHARE) {
|
||||
/* dev node added failed */
|
||||
added = false;
|
||||
} else {
|
||||
/* dev_list point to last valid node */
|
||||
while (dev_list->next)
|
||||
dev_list = dev_list->next;
|
||||
/* add node */
|
||||
dev_list->next = node;
|
||||
}
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
|
||||
return added;
|
||||
}
|
||||
|
||||
static struct dev_handler_node*
|
||||
common_register_handler(int irq,
|
||||
struct irq_request_info *info)
|
||||
{
|
||||
struct dev_handler_node *node = NULL;
|
||||
struct irq_desc *desc;
|
||||
bool added = false;
|
||||
|
||||
/* ======================================================
|
||||
* This is low level ISR handler registering function
|
||||
* case: irq = -1
|
||||
* caller did not know which irq to use, and want system to
|
||||
* allocate available irq for it. These irq are in range:
|
||||
* nr_gsi ~ NR_MAX_IRQS
|
||||
* a irq will be allocated and the vector will be assigned to this
|
||||
* irq automatically.
|
||||
*
|
||||
* case: irq >=0 and irq < nr_gsi
|
||||
* caller want to add device ISR handler into ioapic pins.
|
||||
* two kind of devices: legacy device and PCI device with INTx
|
||||
* a vector will automatically assigned.
|
||||
*
|
||||
* case: irq with speical type (not from IOAPIC/MSI)
|
||||
* These irq value are pre-defined for Timer, IPI, Spurious etc
|
||||
* vectors are pre-defined also
|
||||
*
|
||||
* return value: pinned irq and assigned vector for this irq.
|
||||
* caller can use this irq to enable/disable/mask/unmask interrupt
|
||||
* and if this irq is for:
|
||||
* GSI legacy: nothing to do for legacy irq, already initialized
|
||||
* GSI other: need to progam PCI INTx to match this irq pin
|
||||
* MSI: caller need program vector to PCI device
|
||||
*
|
||||
* =====================================================
|
||||
*/
|
||||
ASSERT(info != NULL, "Invalid param");
|
||||
|
||||
/* HV select a irq for device if irq < 0
|
||||
* this vector/irq match to APCI DSDT or PCI INTx/MSI
|
||||
*/
|
||||
if (irq < 0)
|
||||
irq = alloc_irq();
|
||||
else
|
||||
irq = irq_mark_used(irq);
|
||||
|
||||
if (irq < 0) {
|
||||
pr_err("failed to assign IRQ");
|
||||
goto OUT;
|
||||
}
|
||||
|
||||
node = calloc(1, sizeof(struct dev_handler_node));
|
||||
if (node == NULL) {
|
||||
pr_err("failed to alloc node");
|
||||
irq_desc_try_free_vector(irq);
|
||||
goto OUT;
|
||||
}
|
||||
|
||||
desc = irq_desc_base + irq;
|
||||
added = irq_desc_append_dev(desc, node, info->share);
|
||||
if (!added) {
|
||||
free(node);
|
||||
node = NULL;
|
||||
pr_err("failed to add node to non-shared irq");
|
||||
}
|
||||
OUT:
|
||||
if (added) {
|
||||
/* it is safe to call irq_desc_alloc_vector multiple times*/
|
||||
if (info->vector >= VECTOR_FOR_PRI_START &&
|
||||
info->vector <= VECTOR_FOR_PRI_END)
|
||||
irq_desc_set_vector(irq, info->vector);
|
||||
else if (info->vector < 0)
|
||||
irq_desc_alloc_vector(irq, info->lowpri);
|
||||
else {
|
||||
pr_err("the input vector is not correct");
|
||||
free(node);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
node->dev_handler = info->func;
|
||||
node->dev_data = info->dev_data;
|
||||
node->desc = desc;
|
||||
|
||||
/* we are okay using strcpy_s here even with spinlock
|
||||
* since no #PG in HV right now
|
||||
*/
|
||||
strcpy_s(node->name, 32, info->name);
|
||||
dev_dbg(ACRN_DBG_IRQ, "[%s] %s irq%d vr:0x%x",
|
||||
__func__, node->name, irq, desc->vector);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/* it is safe to call irq_desc_alloc_vector multiple times*/
|
||||
int irq_desc_alloc_vector(int irq, bool lowpri)
|
||||
{
|
||||
int vr = -1;
|
||||
struct irq_desc *desc;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
/* irq should be always available at this time */
|
||||
if (irq > NR_MAX_IRQS || irq < 0)
|
||||
return false;
|
||||
|
||||
desc = irq_desc_base + irq;
|
||||
spinlock_irqsave_obtain(&desc->irq_lock);
|
||||
if (desc->vector != VECTOR_INVALID) {
|
||||
/* already allocated a vector */
|
||||
goto OUT;
|
||||
}
|
||||
|
||||
/* FLAT mode, a irq connected to every cpu's same vector */
|
||||
vr = find_available_vector(lowpri);
|
||||
if (vr < 0) {
|
||||
pr_err("no vector found for irq[%d]", irq);
|
||||
goto OUT;
|
||||
}
|
||||
_irq_desc_set_vector(irq, vr);
|
||||
OUT:
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
return vr;
|
||||
}
|
||||
|
||||
void irq_desc_try_free_vector(int irq)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
/* legacy irq's vector is reserved and should not be freed */
|
||||
if (irq > NR_MAX_IRQS || irq < NR_LEGACY_IRQ)
|
||||
return;
|
||||
|
||||
desc = irq_desc_base + irq;
|
||||
spinlock_irqsave_obtain(&desc->irq_lock);
|
||||
if (desc->dev_list == NULL)
|
||||
_irq_desc_free_vector(irq);
|
||||
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
|
||||
}
|
||||
|
||||
int irq_to_vector(int irq)
|
||||
{
|
||||
if (irq < NR_MAX_IRQS)
|
||||
return irq_desc_base[irq].vector;
|
||||
else
|
||||
return VECTOR_INVALID;
|
||||
}
|
||||
|
||||
int dev_to_irq(struct dev_handler_node *node)
|
||||
{
|
||||
return node->desc->irq;
|
||||
}
|
||||
|
||||
int dev_to_vector(struct dev_handler_node *node)
|
||||
{
|
||||
return node->desc->vector;
|
||||
}
|
||||
|
||||
int init_default_irqs(unsigned int cpu_id)
|
||||
{
|
||||
if (cpu_id > 0)
|
||||
return 0;
|
||||
|
||||
init_irq_desc();
|
||||
|
||||
/* we use ioapic only, disable legacy PIC */
|
||||
disable_pic_irq();
|
||||
setup_ioapic_irq();
|
||||
init_softirq();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dispatch_exception(struct intr_ctx *ctx)
|
||||
{
|
||||
unsigned int cpu_id = get_cpu_id();
|
||||
|
||||
/* Obtain lock to ensure exception dump doesn't get corrupted */
|
||||
spinlock_obtain(&exception_spinlock);
|
||||
|
||||
dump_exception(ctx, cpu_id);
|
||||
|
||||
/* Release lock to let other CPUs handle exception */
|
||||
spinlock_release(&exception_spinlock);
|
||||
|
||||
/* Halt the CPU */
|
||||
cpu_halt(cpu_id);
|
||||
}
|
||||
|
||||
int handle_spurious_interrupt(int vector)
|
||||
{
|
||||
send_lapic_eoi();
|
||||
|
||||
get_cpu_var(spurious)++;
|
||||
|
||||
pr_warn("Spurious vector: 0x%x.", vector);
|
||||
|
||||
if (spurious_handler)
|
||||
return spurious_handler(vector);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* do_IRQ() */
|
||||
int dispatch_interrupt(struct intr_ctx *ctx)
|
||||
{
|
||||
int vr = ctx->vector;
|
||||
int irq = vector_to_irq[vr];
|
||||
struct irq_desc *desc;
|
||||
|
||||
if (irq == IRQ_INVALID)
|
||||
goto ERR;
|
||||
|
||||
desc = irq_desc_base + irq;
|
||||
per_cpu(irq_count, get_cpu_id())[irq]++;
|
||||
|
||||
if (vr != desc->vector)
|
||||
goto ERR;
|
||||
|
||||
if (desc->used == IRQ_NOT_ASSIGNED || !desc->irq_handler) {
|
||||
/* mask irq if possible */
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
desc->irq_handler(desc, desc->handler_data);
|
||||
return 0;
|
||||
ERR:
|
||||
return handle_spurious_interrupt(vr);
|
||||
}
|
||||
|
||||
int handle_level_interrupt_common(struct irq_desc *desc,
|
||||
__unused void *handler_data)
|
||||
{
|
||||
struct dev_handler_node *dev = desc->dev_list;
|
||||
spinlock_rflags;
|
||||
|
||||
/*
|
||||
* give other Core a try to return without hold irq_lock
|
||||
* and record irq_lost count here
|
||||
*/
|
||||
if (desc->state != IRQ_DESC_PENDING) {
|
||||
send_lapic_eoi();
|
||||
desc->irq_lost_cnt++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
spinlock_irqsave_obtain(&desc->irq_lock);
|
||||
desc->state = IRQ_DESC_IN_PROCESS;
|
||||
|
||||
/* mask iopaic pin */
|
||||
if (irq_is_gsi(desc->irq))
|
||||
GSI_MASK_IRQ(desc->irq);
|
||||
|
||||
/* Send EOI to LAPIC/IOAPIC IRR */
|
||||
send_lapic_eoi();
|
||||
|
||||
while (dev) {
|
||||
if (dev->dev_handler)
|
||||
dev->dev_handler(desc->irq, dev->dev_data);
|
||||
dev = dev->next;
|
||||
}
|
||||
|
||||
if (irq_is_gsi(desc->irq))
|
||||
GSI_UNMASK_IRQ(desc->irq);
|
||||
|
||||
desc->state = IRQ_DESC_PENDING;
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int common_handler_edge(struct irq_desc *desc, __unused void *handler_data)
|
||||
{
|
||||
struct dev_handler_node *dev = desc->dev_list;
|
||||
spinlock_rflags;
|
||||
|
||||
/*
|
||||
* give other Core a try to return without hold irq_lock
|
||||
* and record irq_lost count here
|
||||
*/
|
||||
if (desc->state != IRQ_DESC_PENDING) {
|
||||
send_lapic_eoi();
|
||||
desc->irq_lost_cnt++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
spinlock_irqsave_obtain(&desc->irq_lock);
|
||||
desc->state = IRQ_DESC_IN_PROCESS;
|
||||
|
||||
/* Send EOI to LAPIC/IOAPIC IRR */
|
||||
send_lapic_eoi();
|
||||
|
||||
while (dev) {
|
||||
if (dev->dev_handler)
|
||||
dev->dev_handler(desc->irq, dev->dev_data);
|
||||
dev = dev->next;
|
||||
}
|
||||
|
||||
desc->state = IRQ_DESC_PENDING;
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int common_dev_handler_level(struct irq_desc *desc, __unused void *handler_data)
|
||||
{
|
||||
struct dev_handler_node *dev = desc->dev_list;
|
||||
spinlock_rflags;
|
||||
|
||||
/*
|
||||
* give other Core a try to return without hold irq_lock
|
||||
* and record irq_lost count here
|
||||
*/
|
||||
if (desc->state != IRQ_DESC_PENDING) {
|
||||
send_lapic_eoi();
|
||||
desc->irq_lost_cnt++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
spinlock_irqsave_obtain(&desc->irq_lock);
|
||||
desc->state = IRQ_DESC_IN_PROCESS;
|
||||
|
||||
/* mask iopaic pin */
|
||||
if (irq_is_gsi(desc->irq))
|
||||
GSI_MASK_IRQ(desc->irq);
|
||||
|
||||
/* Send EOI to LAPIC/IOAPIC IRR */
|
||||
send_lapic_eoi();
|
||||
|
||||
while (dev) {
|
||||
if (dev->dev_handler)
|
||||
dev->dev_handler(desc->irq, dev->dev_data);
|
||||
dev = dev->next;
|
||||
}
|
||||
|
||||
desc->state = IRQ_DESC_PENDING;
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
|
||||
/* we did not unmask irq until guest EOI the vector */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* no desc->irq_lock for quick handling local interrupt like lapic timer */
|
||||
int quick_handler_nolock(struct irq_desc *desc, __unused void *handler_data)
|
||||
{
|
||||
struct dev_handler_node *dev = desc->dev_list;
|
||||
|
||||
/* Send EOI to LAPIC/IOAPIC IRR */
|
||||
send_lapic_eoi();
|
||||
|
||||
while (dev) {
|
||||
if (dev->dev_handler)
|
||||
dev->dev_handler(desc->irq, dev->dev_data);
|
||||
dev = dev->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void update_irq_handler(int irq, irq_handler_t func)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
if (irq >= NR_MAX_IRQS)
|
||||
return;
|
||||
|
||||
desc = irq_desc_base + irq;
|
||||
spinlock_irqsave_obtain(&desc->irq_lock);
|
||||
desc->irq_handler = func;
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
}
|
||||
|
||||
void unregister_handler_common(struct dev_handler_node *node)
|
||||
{
|
||||
struct dev_handler_node *head;
|
||||
struct irq_desc *desc;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
if (node == NULL)
|
||||
return;
|
||||
|
||||
dev_dbg(ACRN_DBG_IRQ, "[%s] %s irq%d vr:0x%x",
|
||||
__func__, node->name,
|
||||
dev_to_irq(node),
|
||||
dev_to_vector(node));
|
||||
|
||||
desc = node->desc;
|
||||
spinlock_irqsave_obtain(&desc->irq_lock);
|
||||
|
||||
head = desc->dev_list;
|
||||
if (head == node) {
|
||||
desc->dev_list = NULL;
|
||||
goto UNLOCK_EXIT;
|
||||
}
|
||||
|
||||
while (head->next) {
|
||||
if (head->next == node)
|
||||
break;
|
||||
head = head->next;
|
||||
}
|
||||
|
||||
head->next = node->next;
|
||||
|
||||
UNLOCK_EXIT:
|
||||
spinlock_irqrestore_release(&desc->irq_lock);
|
||||
irq_desc_try_free_vector(desc->irq);
|
||||
free(node);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate IRQ with Vector from 0x20 ~ 0xDF
|
||||
*/
|
||||
struct dev_handler_node*
|
||||
normal_register_handler(int irq,
|
||||
dev_handler_t func,
|
||||
void *dev_data,
|
||||
bool share,
|
||||
bool lowpri,
|
||||
const char *name)
|
||||
{
|
||||
struct irq_request_info info;
|
||||
|
||||
info.vector = -1;
|
||||
info.lowpri = lowpri;
|
||||
info.func = func;
|
||||
info.dev_data = dev_data;
|
||||
info.share = share;
|
||||
info.name = (char *)name;
|
||||
|
||||
return common_register_handler(irq, &info);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate IRQ with vector from 0xE0 ~ 0xFF
|
||||
* Allocate a IRQ and install isr on that specific cpu
|
||||
* User can install same irq/isr on different CPU by call this function multiple
|
||||
* times
|
||||
*/
|
||||
struct dev_handler_node*
|
||||
pri_register_handler(int irq,
|
||||
int vector,
|
||||
dev_handler_t func,
|
||||
void *dev_data,
|
||||
const char *name)
|
||||
{
|
||||
struct irq_request_info info;
|
||||
|
||||
if (vector < VECTOR_FOR_PRI_START || vector > VECTOR_FOR_PRI_END)
|
||||
return NULL;
|
||||
|
||||
info.vector = vector;
|
||||
info.lowpri = false;
|
||||
info.func = func;
|
||||
info.dev_data = dev_data;
|
||||
info.share = true;
|
||||
info.name = (char *)name;
|
||||
|
||||
return common_register_handler(irq, &info);
|
||||
}
|
||||
|
||||
int get_cpu_interrupt_info(char *str, int str_max)
|
||||
{
|
||||
int irq, vector, pcpu_id, len, size = str_max;
|
||||
struct irq_desc *desc;
|
||||
|
||||
len = snprintf(str, size, "\r\nIRQ\tVECTOR");
|
||||
size -= len;
|
||||
str += len;
|
||||
for (pcpu_id = 0; pcpu_id < phy_cpu_num; pcpu_id++) {
|
||||
len = snprintf(str, size, "\tCPU%d", pcpu_id);
|
||||
size -= len;
|
||||
str += len;
|
||||
}
|
||||
len = snprintf(str, size, "\tLOST\tSHARE");
|
||||
size -= len;
|
||||
str += len;
|
||||
|
||||
for (irq = 0; irq < NR_MAX_IRQS; irq++) {
|
||||
desc = irq_desc_base + irq;
|
||||
vector = irq_to_vector(irq);
|
||||
if (desc->used != IRQ_NOT_ASSIGNED &&
|
||||
vector != VECTOR_INVALID) {
|
||||
len = snprintf(str, size, "\r\n%d\t0x%X", irq, vector);
|
||||
size -= len;
|
||||
str += len;
|
||||
for (pcpu_id = 0; pcpu_id < phy_cpu_num; pcpu_id++) {
|
||||
len = snprintf(str, size, "\t%d",
|
||||
per_cpu(irq_count, pcpu_id)[irq]++);
|
||||
size -= len;
|
||||
str += len;
|
||||
}
|
||||
len = snprintf(str, size, "\t%d\t%s",
|
||||
desc->irq_lost_cnt,
|
||||
desc->used == IRQ_ASSIGNED_SHARED ?
|
||||
"shared" : "no-shared");
|
||||
size -= len;
|
||||
str += len;
|
||||
}
|
||||
}
|
||||
snprintf(str, size, "\r\n");
|
||||
return 0;
|
||||
}
|
932
hypervisor/arch/x86/mmu.c
Normal file
932
hypervisor/arch/x86/mmu.c
Normal file
@@ -0,0 +1,932 @@
|
||||
/*-
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* Copyright (c) 2017 Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <bsp_extern.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
static void *mmu_pml4_addr;
|
||||
|
||||
enum mem_map_request_type {
|
||||
PAGING_REQUEST_TYPE_MAP = 0, /* Creates a new mapping. */
|
||||
PAGING_REQUEST_TYPE_UNMAP = 1, /* Removes a pre-existing entry */
|
||||
PAGING_REQUEST_TYPE_MODIFY = 2,
|
||||
/* Modifies a pre-existing entries attributes. */
|
||||
PAGING_REQUEST_TYPE_UNKNOWN,
|
||||
};
|
||||
|
||||
struct mm_capability {
|
||||
/* EPT and MMU 1-GByte page supported flag */
|
||||
bool ept_1gb_page_supported;
|
||||
bool invept_supported;
|
||||
bool invept_single_context_supported;
|
||||
bool invept_global_context_supported;
|
||||
bool invvpid_supported;
|
||||
bool invvpid_single_context_supported;
|
||||
bool invvpid_global_context_supported;
|
||||
bool mmu_1gb_page_supported;
|
||||
};
|
||||
static struct mm_capability mm_caps;
|
||||
|
||||
#define INVEPT_TYPE_SINGLE_CONTEXT 1UL
|
||||
#define INVEPT_TYPE_ALL_CONTEXTS 2UL
|
||||
#define INVEPT_SET_ERROR_CODE \
|
||||
" jnc 1f\n" \
|
||||
" mov $1, %0\n" /* CF: error = 1 */ \
|
||||
" jmp 3f\n" \
|
||||
"1: jnz 2f\n" \
|
||||
" mov $2, %0\n" /* ZF: error = 2 */ \
|
||||
" jmp 3f\n" \
|
||||
"2: mov $0, %0\n" \
|
||||
"3:"
|
||||
|
||||
struct invept_desc {
|
||||
uint64_t eptp;
|
||||
uint64_t _res;
|
||||
};
|
||||
|
||||
static inline void _invept(uint64_t type, struct invept_desc desc)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
asm volatile ("invept %1, %2\n"
|
||||
INVEPT_SET_ERROR_CODE
|
||||
: "=r" (error)
|
||||
: "m" (desc), "r" (type)
|
||||
: "memory");
|
||||
|
||||
ASSERT(error == 0, "invept error");
|
||||
}
|
||||
|
||||
static void check_mmu_capability(void)
|
||||
{
|
||||
uint64_t val;
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
|
||||
memset(&mm_caps, 0, sizeof(struct mm_capability));
|
||||
|
||||
/* Read the MSR register of EPT and VPID Capability - SDM A.10 */
|
||||
val = msr_read(MSR_IA32_VMX_EPT_VPID_CAP);
|
||||
mm_caps.ept_1gb_page_supported = (val & MSR_VMX_EPT_VPID_CAP_1GB)
|
||||
? (true) : (false);
|
||||
mm_caps.invept_supported =
|
||||
(val & MSR_VMX_INVEPT) ? (true) : (false);
|
||||
mm_caps.invept_single_context_supported =
|
||||
(val & MSR_VMX_INVEPT_SINGLE_CONTEXT) ? (true) : (false);
|
||||
mm_caps.invept_global_context_supported =
|
||||
(val & MSR_VMX_INVEPT_GLOBAL_CONTEXT) ? (true) : (false);
|
||||
mm_caps.invvpid_supported =
|
||||
(val & MSR_VMX_INVVPID) ? (true) : (false);
|
||||
mm_caps.invvpid_single_context_supported =
|
||||
(val & MSR_VMX_INVVPID_SINGLE_CONTEXT) ? (true) : (false);
|
||||
mm_caps.invvpid_global_context_supported =
|
||||
(val & MSR_VMX_INVVPID_GLOBAL_CONTEXT) ? (true) : (false);
|
||||
|
||||
/* Read CPUID to check if PAGE1GB is supported
|
||||
* SDM 4.1.4 If CPUID.80000001H:EDX.Page1GB[bit26]=1,
|
||||
* 1-GByte pages are supported with 4-level paging
|
||||
*/
|
||||
cpuid(CPUID_EXTEND_FUNCTION_1, &eax, &ebx, &ecx, &edx);
|
||||
mm_caps.mmu_1gb_page_supported = (edx & CPUID_EDX_PAGE1GB) ?
|
||||
(true) : (false);
|
||||
}
|
||||
|
||||
static inline bool check_invept_single_support(void)
|
||||
{
|
||||
return mm_caps.invept_supported &&
|
||||
mm_caps.invept_single_context_supported;
|
||||
}
|
||||
|
||||
static inline bool check_invept_global_support(void)
|
||||
{
|
||||
return mm_caps.invept_supported &&
|
||||
mm_caps.invept_global_context_supported;
|
||||
}
|
||||
|
||||
void mmu_invept(struct vcpu *vcpu)
|
||||
{
|
||||
struct invept_desc desc = {0};
|
||||
|
||||
if (check_invept_single_support()) {
|
||||
desc.eptp = (uint64_t) vcpu->vm->arch_vm.ept | (3 << 3) | 6;
|
||||
_invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
|
||||
} else if (check_invept_global_support())
|
||||
_invept(INVEPT_TYPE_ALL_CONTEXTS, desc);
|
||||
}
|
||||
|
||||
static bool check_mmu_1gb_support(struct map_params *map_params)
|
||||
{
|
||||
bool status = false;
|
||||
|
||||
if (map_params->page_table_type == PT_EPT)
|
||||
status = mm_caps.ept_1gb_page_supported;
|
||||
else
|
||||
status = mm_caps.mmu_1gb_page_supported;
|
||||
return status;
|
||||
}
|
||||
|
||||
static uint32_t map_mem_region(void *vaddr, void *paddr,
|
||||
void *table_base, uint64_t attr, uint32_t table_level,
|
||||
int ept_entry, enum mem_map_request_type request_type)
|
||||
{
|
||||
uint64_t table_entry;
|
||||
uint64_t table_present;
|
||||
uint32_t table_offset;
|
||||
uint32_t mapped_size;
|
||||
|
||||
if (table_base == NULL || table_level >= IA32E_UNKNOWN
|
||||
|| request_type >= PAGING_REQUEST_TYPE_UNKNOWN) {
|
||||
/* Shouldn't go here */
|
||||
ASSERT(false, "Incorrect Arguments. Failed to map region");
|
||||
}
|
||||
|
||||
/* switch based on of table */
|
||||
switch (table_level) {
|
||||
case IA32E_PDPT:
|
||||
|
||||
/* Get offset to the entry in the PDPT for this address */
|
||||
table_offset = IA32E_PDPTE_INDEX_CALC(vaddr);
|
||||
|
||||
/* PS bit must be set for these entries to be mapped */
|
||||
attr |= IA32E_PDPTE_PS_BIT;
|
||||
|
||||
/* Set mapped size to 1 GB */
|
||||
mapped_size = MEM_1G;
|
||||
|
||||
break;
|
||||
|
||||
case IA32E_PD:
|
||||
|
||||
/* Get offset to the entry in the PD for this address */
|
||||
table_offset = IA32E_PDE_INDEX_CALC(vaddr);
|
||||
|
||||
/* PS bit must be set for these entries to be mapped */
|
||||
attr |= IA32E_PDE_PS_BIT;
|
||||
|
||||
/* Set mapped size to 2 MB */
|
||||
mapped_size = MEM_2M;
|
||||
|
||||
break;
|
||||
|
||||
case IA32E_PT:
|
||||
|
||||
/* Get offset to the entry in the PT for this address */
|
||||
table_offset = IA32E_PTE_INDEX_CALC(vaddr);
|
||||
|
||||
/* NOTE: No PS bit in page table entries */
|
||||
|
||||
/* Set mapped size to 4 KB */
|
||||
mapped_size = MEM_4K;
|
||||
|
||||
/* If not a EPT entry, see if the PAT bit is set for PDPT entry
|
||||
*/
|
||||
if ((!ept_entry) && (attr & IA32E_PDPTE_PAT_BIT)) {
|
||||
/* The PAT bit is set; Clear it and set the page table
|
||||
* PAT bit instead
|
||||
*/
|
||||
attr &= (uint64_t) (~((uint64_t) IA32E_PDPTE_PAT_BIT));
|
||||
attr |= IA32E_PTE_PAT_BIT;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case IA32E_PML4:
|
||||
default:
|
||||
|
||||
/* Set mapping size to 0 - can't map memory in PML4 */
|
||||
mapped_size = 0;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check to see if mapping should occur */
|
||||
if (mapped_size != 0) {
|
||||
/* Get current table entry */
|
||||
uint64_t tmp = MEM_READ64(table_base + table_offset);
|
||||
|
||||
/* Check if EPT entry */
|
||||
if (ept_entry) {
|
||||
/* Use read/write/execute bits to determine presence of
|
||||
* entry
|
||||
*/
|
||||
table_present = (IA32E_EPT_R_BIT |
|
||||
IA32E_EPT_W_BIT | IA32E_EPT_X_BIT);
|
||||
} else {
|
||||
/* Use the P bit to determine if an entry is present */
|
||||
table_present = IA32E_COMM_P_BIT;
|
||||
}
|
||||
|
||||
switch (request_type) {
|
||||
case PAGING_REQUEST_TYPE_MAP:
|
||||
{
|
||||
/* No need to confirm current table entry
|
||||
* isn't already present
|
||||
* support map-->remap
|
||||
*/
|
||||
table_entry = (ept_entry
|
||||
? attr
|
||||
: (attr | IA32E_COMM_P_BIT));
|
||||
|
||||
table_entry |= (uint64_t)paddr;
|
||||
|
||||
/* Write the table entry to map this memory */
|
||||
MEM_WRITE64(table_base + table_offset, table_entry);
|
||||
break;
|
||||
}
|
||||
case PAGING_REQUEST_TYPE_UNMAP:
|
||||
{
|
||||
if (tmp & table_present) {
|
||||
/* Table is present.
|
||||
* Write the table entry to map this memory
|
||||
*/
|
||||
MEM_WRITE64(table_base + table_offset, 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PAGING_REQUEST_TYPE_MODIFY:
|
||||
{
|
||||
/* Allow mapping or modification as requested. */
|
||||
table_entry = (ept_entry
|
||||
? attr : (attr | IA32E_COMM_P_BIT));
|
||||
|
||||
table_entry |= (uint64_t) paddr;
|
||||
|
||||
/* Write the table entry to map this memory */
|
||||
MEM_WRITE64(table_base + table_offset, table_entry);
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT("Bad memory map request type" == 0, "");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return mapped size to caller */
|
||||
return mapped_size;
|
||||
}
|
||||
|
||||
static uint32_t fetch_page_table_offset(void *addr, uint32_t table_level)
|
||||
{
|
||||
uint32_t table_offset;
|
||||
|
||||
/* Switch based on level of table */
|
||||
switch (table_level) {
|
||||
case IA32E_PML4:
|
||||
|
||||
/* Get offset to the entry in the PML4
|
||||
* for this address
|
||||
*/
|
||||
table_offset = IA32E_PML4E_INDEX_CALC(addr);
|
||||
break;
|
||||
|
||||
case IA32E_PDPT:
|
||||
|
||||
/* Get offset to the entry in the PDPT
|
||||
* for this address
|
||||
*/
|
||||
table_offset = IA32E_PDPTE_INDEX_CALC(addr);
|
||||
break;
|
||||
|
||||
case IA32E_PD:
|
||||
|
||||
/* Get offset to the entry in the PD
|
||||
* for this address
|
||||
*/
|
||||
table_offset = IA32E_PDE_INDEX_CALC(addr);
|
||||
break;
|
||||
|
||||
case IA32E_PT:
|
||||
table_offset = IA32E_PTE_INDEX_CALC(addr);
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_err("Wrong page table level = 0x%lx", table_level);
|
||||
ASSERT(false, "Wrong page table level");
|
||||
break;
|
||||
}
|
||||
|
||||
return table_offset;
|
||||
}
|
||||
|
||||
static inline uint32_t check_page_table_present(struct map_params *map_params,
|
||||
uint64_t table_entry)
|
||||
{
|
||||
if (map_params->page_table_type == PT_EPT) {
|
||||
table_entry &= (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT |
|
||||
IA32E_EPT_X_BIT);
|
||||
} else {
|
||||
table_entry &= (IA32E_COMM_P_BIT);
|
||||
}
|
||||
|
||||
return (table_entry) ? PT_PRESENT : PT_NOT_PRESENT;
|
||||
}
|
||||
|
||||
static uint64_t get_table_entry(struct map_params *map_params, void *addr,
|
||||
void *table_base, uint32_t table_level)
|
||||
{
|
||||
uint32_t table_offset;
|
||||
uint64_t table_entry;
|
||||
int status = 0;
|
||||
|
||||
if (table_base == NULL
|
||||
|| table_level >= IA32E_UNKNOWN
|
||||
|| map_params == NULL) {
|
||||
status = -EINVAL;
|
||||
}
|
||||
ASSERT(status == 0, "Incorrect Arguments");
|
||||
|
||||
table_offset = fetch_page_table_offset(addr, table_level);
|
||||
|
||||
/* Read the table entry */
|
||||
table_entry = MEM_READ64(table_base + table_offset);
|
||||
|
||||
/* Return the next table in the walk */
|
||||
return table_entry;
|
||||
}
|
||||
|
||||
static void *walk_paging_struct(void *addr, void *table_base,
|
||||
uint32_t table_level, struct map_params *map_params)
|
||||
{
|
||||
uint32_t table_offset;
|
||||
uint64_t table_entry;
|
||||
uint64_t table_present;
|
||||
/* if table_level == IA32E_PT Just return the same address
|
||||
* can't walk down any further
|
||||
*/
|
||||
void *sub_table_addr = ((table_level == IA32E_PT) ? table_base:NULL);
|
||||
int status = 0;
|
||||
|
||||
if (table_base == NULL || table_level >= IA32E_UNKNOWN
|
||||
|| map_params == NULL) {
|
||||
status = -EINVAL;
|
||||
}
|
||||
ASSERT(status == 0, "Incorrect Arguments");
|
||||
|
||||
table_offset = fetch_page_table_offset(addr, table_level);
|
||||
|
||||
/* See if we can skip the rest */
|
||||
if (sub_table_addr != table_base) {
|
||||
/* Read the table entry */
|
||||
table_entry = MEM_READ64(table_base + table_offset);
|
||||
|
||||
/* Check if EPT entry being created */
|
||||
if (map_params->page_table_type == PT_EPT) {
|
||||
/* Set table present bits to any of the
|
||||
* read/write/execute bits
|
||||
*/
|
||||
table_present = (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT |
|
||||
IA32E_EPT_X_BIT);
|
||||
} else {
|
||||
/* Set table preset bits to P bit or r/w bit */
|
||||
table_present = (IA32E_COMM_P_BIT | IA32E_COMM_RW_BIT);
|
||||
}
|
||||
|
||||
/* Determine if a valid entry exists */
|
||||
if ((table_entry & table_present) == 0) {
|
||||
/* No entry present - need to allocate a new table */
|
||||
sub_table_addr =
|
||||
alloc_paging_struct();
|
||||
/* Check to ensure memory available for this structure*/
|
||||
if (sub_table_addr == 0) {
|
||||
/* Error: Unable to find table memory necessary
|
||||
* to map memory
|
||||
*/
|
||||
ASSERT(sub_table_addr == 0,
|
||||
"Fail to find table memory "
|
||||
"for map memory");
|
||||
|
||||
return sub_table_addr;
|
||||
}
|
||||
|
||||
/* Write entry to current table to reference the new
|
||||
* sub-table
|
||||
*/
|
||||
MEM_WRITE64(table_base + table_offset,
|
||||
(uint64_t) sub_table_addr | table_present);
|
||||
} else {
|
||||
/* Get address of the sub-table */
|
||||
sub_table_addr = (void *)(table_entry & IA32E_REF_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
/* Return the next table in the walk */
|
||||
return sub_table_addr;
|
||||
}
|
||||
|
||||
void *get_paging_pml4(void)
|
||||
{
|
||||
/* Return address to caller */
|
||||
return mmu_pml4_addr;
|
||||
}
|
||||
|
||||
void enable_paging(void *pml4_base_addr)
|
||||
{
|
||||
CPU_CR_WRITE(cr3, (unsigned long)pml4_base_addr);
|
||||
}
|
||||
|
||||
void init_paging(void)
|
||||
{
|
||||
struct map_params map_params;
|
||||
struct e820_entry *entry;
|
||||
uint32_t i;
|
||||
int attr_wb = (MMU_MEM_ATTR_READ |
|
||||
MMU_MEM_ATTR_WRITE |
|
||||
MMU_MEM_ATTR_EXECUTE |
|
||||
MMU_MEM_ATTR_WB_CACHE);
|
||||
int attr_uc = (MMU_MEM_ATTR_READ |
|
||||
MMU_MEM_ATTR_WRITE |
|
||||
MMU_MEM_ATTR_EXECUTE |
|
||||
MMU_MEM_ATTR_UNCACHED);
|
||||
|
||||
pr_dbg("HV MMU Initialization");
|
||||
|
||||
check_mmu_capability();
|
||||
|
||||
/* Allocate memory for Hypervisor PML4 table */
|
||||
mmu_pml4_addr = alloc_paging_struct();
|
||||
|
||||
init_e820();
|
||||
obtain_e820_mem_info();
|
||||
|
||||
/* Loop through all memory regions in the e820 table */
|
||||
map_params.page_table_type = PT_HOST;
|
||||
map_params.pml4_base = mmu_pml4_addr;
|
||||
|
||||
/* Map all memory regions to UC attribute */
|
||||
map_mem(&map_params, (void *)e820_mem.mem_bottom,
|
||||
(void *)e820_mem.mem_bottom,
|
||||
(e820_mem.mem_top - e820_mem.mem_bottom),
|
||||
attr_uc);
|
||||
|
||||
/* Modify WB attribute for E820_TYPE_RAM */
|
||||
for (i = 0, entry = &e820[0];
|
||||
i < e820_entries;
|
||||
i++, entry = &e820[i]) {
|
||||
if (entry->type == E820_TYPE_RAM) {
|
||||
modify_mem(&map_params, (void *)entry->baseaddr,
|
||||
(void *)entry->baseaddr,
|
||||
entry->length, attr_wb);
|
||||
}
|
||||
}
|
||||
|
||||
pr_dbg("Enabling MMU ");
|
||||
|
||||
/* Enable paging */
|
||||
enable_paging(mmu_pml4_addr);
|
||||
}
|
||||
|
||||
void *alloc_paging_struct(void)
|
||||
{
|
||||
void *ptr = NULL;
|
||||
|
||||
/* Allocate a page from Hypervisor heap */
|
||||
ptr = alloc_page();
|
||||
|
||||
ASSERT(ptr, "page alloc failed!");
|
||||
memset(ptr, 0, CPU_PAGE_SIZE);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
uint64_t config_page_table_attr(struct map_params *map_params, uint32_t flags)
|
||||
{
|
||||
int ept_entry = map_params->page_table_type;
|
||||
uint64_t attr = 0;
|
||||
|
||||
/* Convert generic memory flags to architecture specific attributes */
|
||||
/* Check if read access */
|
||||
if (flags & MMU_MEM_ATTR_READ) {
|
||||
/* Configure for read access */
|
||||
attr |=
|
||||
(ept_entry ? IA32E_EPT_R_BIT : MMU_MEM_ATTR_BIT_READ_WRITE);
|
||||
}
|
||||
|
||||
/* Check for write access */
|
||||
if (flags & MMU_MEM_ATTR_WRITE) {
|
||||
/* Configure for write access */
|
||||
attr |=
|
||||
(ept_entry ? IA32E_EPT_W_BIT : MMU_MEM_ATTR_BIT_READ_WRITE);
|
||||
}
|
||||
|
||||
/* Check for execute access */
|
||||
if (flags & MMU_MEM_ATTR_EXECUTE) {
|
||||
/* Configure for execute (EPT only) */
|
||||
attr |= (ept_entry ? IA32E_EPT_X_BIT : 0);
|
||||
}
|
||||
|
||||
/* EPT & VT-d share the same page tables, set SNP bit
|
||||
* to force snooping of PCIe devices if the page
|
||||
* is cachable
|
||||
*/
|
||||
if ((flags & MMU_MEM_ATTR_UNCACHED) != MMU_MEM_ATTR_UNCACHED
|
||||
&& ept_entry == PT_EPT) {
|
||||
attr |= IA32E_EPT_SNOOP_CTRL;
|
||||
}
|
||||
|
||||
/* Check for cache / memory types */
|
||||
if (flags & MMU_MEM_ATTR_WB_CACHE) {
|
||||
/* Configure for write back cache */
|
||||
attr |=
|
||||
(ept_entry ? IA32E_EPT_WB : MMU_MEM_ATTR_TYPE_CACHED_WB);
|
||||
} else if (flags & MMU_MEM_ATTR_WT_CACHE) {
|
||||
/* Configure for write through cache */
|
||||
attr |=
|
||||
(ept_entry ? IA32E_EPT_WT : MMU_MEM_ATTR_TYPE_CACHED_WT);
|
||||
} else if (flags & MMU_MEM_ATTR_UNCACHED) {
|
||||
/* Configure for uncached */
|
||||
attr |=
|
||||
(ept_entry ? IA32E_EPT_UNCACHED : MMU_MEM_ATTR_TYPE_UNCACHED);
|
||||
} else if (flags & MMU_MEM_ATTR_WC) {
|
||||
/* Configure for write combining */
|
||||
attr |=
|
||||
(ept_entry ? IA32E_EPT_WC : MMU_MEM_ATTR_TYPE_WRITE_COMBINED);
|
||||
} else {
|
||||
/* Configure for write protected */
|
||||
attr |=
|
||||
(ept_entry ? IA32E_EPT_WP : MMU_MEM_ATTR_TYPE_WRITE_PROTECTED);
|
||||
}
|
||||
return attr;
|
||||
|
||||
}
|
||||
|
||||
void obtain_last_page_table_entry(struct map_params *map_params,
|
||||
struct entry_params *entry, void *addr, bool direct)
|
||||
{
|
||||
uint64_t table_entry;
|
||||
uint32_t table_present = 0;
|
||||
/* Obtain the PML4 address */
|
||||
void *table_addr = direct ? (map_params->pml4_base)
|
||||
: (map_params->pml4_inverted);
|
||||
|
||||
/* Obtain page table entry from PML4 table*/
|
||||
table_entry = get_table_entry(map_params, addr,
|
||||
table_addr, IA32E_PML4);
|
||||
table_present = check_page_table_present(map_params, table_entry);
|
||||
if (table_present == PT_NOT_PRESENT) {
|
||||
/* PML4E not present, return PML4 base address */
|
||||
entry->entry_level = IA32E_PML4;
|
||||
entry->entry_base = (uint64_t)table_addr;
|
||||
entry->entry_present = PT_NOT_PRESENT;
|
||||
entry->page_size = check_mmu_1gb_support(map_params) ?
|
||||
(PAGE_SIZE_1G) : (PAGE_SIZE_2M);
|
||||
entry->entry_off = fetch_page_table_offset(addr, IA32E_PML4);
|
||||
entry->entry_val = table_entry;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Obtain page table entry from PDPT table*/
|
||||
table_addr = (void *)(table_entry & IA32E_REF_MASK);
|
||||
table_entry = get_table_entry(map_params, addr,
|
||||
table_addr, IA32E_PDPT);
|
||||
table_present = check_page_table_present(map_params, table_entry);
|
||||
if (table_present == PT_NOT_PRESENT) {
|
||||
/* PDPTE not present, return PDPT base address */
|
||||
entry->entry_level = IA32E_PDPT;
|
||||
entry->entry_base = (uint64_t)table_addr;
|
||||
entry->entry_present = PT_NOT_PRESENT;
|
||||
entry->page_size = check_mmu_1gb_support(map_params) ?
|
||||
(PAGE_SIZE_1G) : (PAGE_SIZE_2M);
|
||||
entry->entry_off = fetch_page_table_offset(addr, IA32E_PDPT);
|
||||
entry->entry_val = table_entry;
|
||||
return;
|
||||
}
|
||||
if (table_entry & IA32E_PDPTE_PS_BIT) {
|
||||
/* 1GB page size, return the base addr of the pg entry*/
|
||||
entry->entry_level = IA32E_PDPT;
|
||||
entry->entry_base = (uint64_t)table_addr;
|
||||
entry->page_size = check_mmu_1gb_support(map_params) ?
|
||||
(PAGE_SIZE_1G) : (PAGE_SIZE_2M);
|
||||
entry->entry_present = PT_PRESENT;
|
||||
entry->entry_off = fetch_page_table_offset(addr, IA32E_PDPT);
|
||||
entry->entry_val = table_entry;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Obtain page table entry from PD table*/
|
||||
table_addr = (void *)(table_entry&IA32E_REF_MASK);
|
||||
table_entry = get_table_entry(map_params, addr,
|
||||
table_addr, IA32E_PD);
|
||||
table_present = check_page_table_present(map_params, table_entry);
|
||||
if (table_present == PT_NOT_PRESENT) {
|
||||
/* PDE not present, return PDE base address */
|
||||
entry->entry_level = IA32E_PD;
|
||||
entry->entry_base = (uint64_t)table_addr;
|
||||
entry->entry_present = PT_NOT_PRESENT;
|
||||
entry->page_size = PAGE_SIZE_2M;
|
||||
entry->entry_off = fetch_page_table_offset(addr, IA32E_PD);
|
||||
entry->entry_val = table_entry;
|
||||
return;
|
||||
|
||||
}
|
||||
if (table_entry & IA32E_PDE_PS_BIT) {
|
||||
/* 2MB page size, return the base addr of the pg entry*/
|
||||
entry->entry_level = IA32E_PD;
|
||||
entry->entry_base = (uint64_t)table_addr;
|
||||
entry->entry_present = PT_PRESENT;
|
||||
entry->page_size = PAGE_SIZE_2M;
|
||||
entry->entry_off = fetch_page_table_offset(addr, IA32E_PD);
|
||||
entry->entry_val = table_entry;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Obtain page table entry from PT table*/
|
||||
table_addr = (void *)(table_entry&IA32E_REF_MASK);
|
||||
table_entry = get_table_entry(map_params, addr,
|
||||
table_addr, IA32E_PT);
|
||||
table_present = check_page_table_present(map_params, table_entry);
|
||||
entry->entry_present = ((table_present == PT_PRESENT)
|
||||
? (PT_PRESENT):(PT_NOT_PRESENT));
|
||||
entry->entry_level = IA32E_PT;
|
||||
entry->entry_base = (uint64_t)table_addr;
|
||||
entry->page_size = PAGE_SIZE_4K;
|
||||
entry->entry_off = fetch_page_table_offset(addr, IA32E_PT);
|
||||
entry->entry_val = table_entry;
|
||||
}
|
||||
|
||||
static uint64_t update_page_table_entry(struct map_params *map_params,
|
||||
void *paddr, void *vaddr, uint64_t size, uint64_t attr,
|
||||
enum mem_map_request_type request_type, bool direct)
|
||||
{
|
||||
uint64_t remaining_size = size;
|
||||
uint32_t adjustment_size;
|
||||
int ept_entry = map_params->page_table_type;
|
||||
/* Obtain the PML4 address */
|
||||
void *table_addr = direct ? (map_params->pml4_base)
|
||||
: (map_params->pml4_inverted);
|
||||
|
||||
/* Walk from the PML4 table to the PDPT table */
|
||||
table_addr = walk_paging_struct(vaddr, table_addr, IA32E_PML4,
|
||||
map_params);
|
||||
|
||||
if ((remaining_size >= MEM_1G)
|
||||
&& (MEM_ALIGNED_CHECK(vaddr, MEM_1G))
|
||||
&& (MEM_ALIGNED_CHECK(paddr, MEM_1G))
|
||||
&& check_mmu_1gb_support(map_params)) {
|
||||
/* Map this 1 GByte memory region */
|
||||
adjustment_size = map_mem_region(vaddr, paddr,
|
||||
table_addr, attr, IA32E_PDPT,
|
||||
ept_entry, request_type);
|
||||
} else if ((remaining_size >= MEM_2M)
|
||||
&& (MEM_ALIGNED_CHECK(vaddr, MEM_2M))
|
||||
&& (MEM_ALIGNED_CHECK(paddr, MEM_2M))) {
|
||||
/* Walk from the PDPT table to the PD table */
|
||||
table_addr = walk_paging_struct(vaddr, table_addr,
|
||||
IA32E_PDPT, map_params);
|
||||
/* Map this 2 MByte memory region */
|
||||
adjustment_size = map_mem_region(vaddr, paddr,
|
||||
table_addr, attr, IA32E_PD, ept_entry,
|
||||
request_type);
|
||||
} else {
|
||||
/* Walk from the PDPT table to the PD table */
|
||||
table_addr = walk_paging_struct(vaddr,
|
||||
table_addr, IA32E_PDPT, map_params);
|
||||
/* Walk from the PD table to the page table */
|
||||
table_addr = walk_paging_struct(vaddr,
|
||||
table_addr, IA32E_PD, map_params);
|
||||
/* Map this 4 KByte memory region */
|
||||
adjustment_size = map_mem_region(vaddr, paddr,
|
||||
table_addr, attr, IA32E_PT,
|
||||
ept_entry, request_type);
|
||||
}
|
||||
|
||||
return adjustment_size;
|
||||
|
||||
}
|
||||
|
||||
static uint64_t break_page_table(struct map_params *map_params, void *paddr,
|
||||
void *vaddr, uint64_t page_size, bool direct)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
uint64_t pa;
|
||||
uint64_t attr = 0x00;
|
||||
uint64_t next_page_size = 0x00;
|
||||
void *sub_tab_addr = NULL;
|
||||
struct entry_params entry;
|
||||
|
||||
switch (page_size) {
|
||||
/* Breaking 1GB page to 2MB page*/
|
||||
case PAGE_SIZE_1G:
|
||||
next_page_size = PAGE_SIZE_2M;
|
||||
attr |= IA32E_PDE_PS_BIT;
|
||||
pr_info("%s, Breaking 1GB -->2MB vaddr=0x%llx",
|
||||
__func__, vaddr);
|
||||
break;
|
||||
|
||||
/* Breaking 2MB page to 4KB page*/
|
||||
case PAGE_SIZE_2M:
|
||||
next_page_size = PAGE_SIZE_4K;
|
||||
pr_info("%s, Breaking 2MB -->4KB vaddr=0x%llx",
|
||||
__func__, vaddr);
|
||||
break;
|
||||
|
||||
/* 4KB page, No action*/
|
||||
case PAGE_SIZE_4K:
|
||||
default:
|
||||
next_page_size = PAGE_SIZE_4K;
|
||||
pr_info("%s, Breaking 4KB no action vaddr=0x%llx",
|
||||
__func__, vaddr);
|
||||
break;
|
||||
}
|
||||
|
||||
if (page_size != next_page_size) {
|
||||
obtain_last_page_table_entry(map_params, &entry, vaddr, direct);
|
||||
|
||||
/* New entry present - need to allocate a new table */
|
||||
sub_tab_addr = alloc_paging_struct();
|
||||
/* Check to ensure memory available for this structure */
|
||||
if (sub_tab_addr == 0) {
|
||||
/* Error:
|
||||
* Unable to find table memory necessary to map memory
|
||||
*/
|
||||
pr_err("Fail to find table memory for map memory");
|
||||
ASSERT(sub_tab_addr == 0, "");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* the physical address maybe be not aligned of
|
||||
* current page size, obtain the starting physical address
|
||||
* aligned of current page size
|
||||
*/
|
||||
pa = ((((uint64_t)paddr) / page_size) * page_size);
|
||||
if (map_params->page_table_type == PT_EPT) {
|
||||
/* Keep original attribute(here &0x3f)
|
||||
* bit 0(R) bit1(W) bit2(X) bit3~5 MT
|
||||
*/
|
||||
attr |= (entry.entry_val & 0x3f);
|
||||
} else {
|
||||
/* Keep original attribute(here &0x7f) */
|
||||
attr |= (entry.entry_val & 0x7f);
|
||||
}
|
||||
/* write all entries and keep original attr*/
|
||||
for (i = 0; i < IA32E_NUM_ENTRIES; i++) {
|
||||
MEM_WRITE64(sub_tab_addr + (i * IA32E_COMM_ENTRY_SIZE),
|
||||
(attr | (pa + (i * next_page_size))));
|
||||
}
|
||||
if (map_params->page_table_type == PT_EPT) {
|
||||
/* Write the table entry to map this memory,
|
||||
* SDM chapter28 figure 28-1
|
||||
* bit 0(R) bit1(W) bit2(X) bit3~5 MUST be reserved
|
||||
* (here &0x07)
|
||||
*/
|
||||
MEM_WRITE64(entry.entry_base + entry.entry_off,
|
||||
((entry.entry_val & 0x07) |
|
||||
((uint64_t)sub_tab_addr)));
|
||||
} else {
|
||||
/* Write the table entry to map this memory,
|
||||
* SDM chapter4 figure 4-11
|
||||
* bit0(P) bit1(RW) bit2(U/S) bit3(PWT) bit4(PCD)
|
||||
* bit5(A) bit6(D or Ignore)
|
||||
*/
|
||||
MEM_WRITE64(entry.entry_base + entry.entry_off,
|
||||
((entry.entry_val & 0x7f) |
|
||||
((uint64_t)sub_tab_addr)));
|
||||
}
|
||||
}
|
||||
|
||||
return next_page_size;
|
||||
}
|
||||
|
||||
static void modify_paging(struct map_params *map_params, void *paddr,
|
||||
void *vaddr, uint64_t size, uint32_t flags,
|
||||
enum mem_map_request_type request_type, bool direct)
|
||||
{
|
||||
int64_t remaining_size;
|
||||
uint64_t adjust_size;
|
||||
uint64_t attr;
|
||||
int status = 0;
|
||||
struct entry_params entry;
|
||||
uint64_t page_size;
|
||||
uint64_t vaddr_end = ((uint64_t)vaddr) + size;
|
||||
|
||||
/* if the address is not PAGE aligned, will drop
|
||||
* the unaligned part
|
||||
*/
|
||||
paddr = (void *)ROUND_PAGE_UP((uint64_t)paddr);
|
||||
vaddr = (void *)ROUND_PAGE_UP((uint64_t)vaddr);
|
||||
vaddr_end = ROUND_PAGE_DOWN(vaddr_end);
|
||||
remaining_size = vaddr_end - (uint64_t)vaddr;
|
||||
|
||||
if ((request_type >= PAGING_REQUEST_TYPE_UNKNOWN)
|
||||
|| (map_params == NULL)) {
|
||||
pr_err("%s: vaddr=0x%llx size=0x%llx req_type=0x%lx",
|
||||
__func__, vaddr, size, request_type);
|
||||
status = -EINVAL;
|
||||
}
|
||||
ASSERT(status == 0, "Incorrect Arguments");
|
||||
|
||||
attr = config_page_table_attr(map_params, flags);
|
||||
/* Loop until the entire block of memory is appropriately
|
||||
* MAP/UNMAP/MODIFY
|
||||
*/
|
||||
while (remaining_size > 0) {
|
||||
obtain_last_page_table_entry(map_params, &entry, vaddr, direct);
|
||||
/* filter the unmap request, no action in this case*/
|
||||
page_size = entry.page_size;
|
||||
if ((request_type == PAGING_REQUEST_TYPE_UNMAP)
|
||||
&& (entry.entry_present == PT_NOT_PRESENT)) {
|
||||
adjust_size =
|
||||
page_size - ((uint64_t)(vaddr) % page_size);
|
||||
vaddr += adjust_size;
|
||||
paddr += adjust_size;
|
||||
remaining_size -= adjust_size;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* if the address is NOT aligned of current page size,
|
||||
* or required memory size < page size
|
||||
* need to break page firstly
|
||||
*/
|
||||
if (entry.entry_present == PT_PRESENT) {
|
||||
/* Maybe need to recursive breaking in this case
|
||||
* e.g. 1GB->2MB->4KB
|
||||
*/
|
||||
while ((uint64_t)remaining_size < page_size
|
||||
|| (!MEM_ALIGNED_CHECK(vaddr, page_size))
|
||||
|| (!MEM_ALIGNED_CHECK(paddr, page_size))) {
|
||||
/* The breaking function return the page size
|
||||
* of next level page table
|
||||
*/
|
||||
page_size = break_page_table(map_params,
|
||||
paddr, vaddr, page_size, direct);
|
||||
}
|
||||
} else {
|
||||
page_size = ((uint64_t)remaining_size < page_size)
|
||||
? ((uint64_t)remaining_size) : (page_size);
|
||||
}
|
||||
/* The function return the memory size that one entry can map */
|
||||
adjust_size = update_page_table_entry(map_params, paddr, vaddr,
|
||||
page_size, attr, request_type, direct);
|
||||
vaddr += adjust_size;
|
||||
paddr += adjust_size;
|
||||
remaining_size -= adjust_size;
|
||||
}
|
||||
}
|
||||
|
||||
void map_mem(struct map_params *map_params, void *paddr, void *vaddr,
|
||||
uint64_t size, uint32_t flags)
|
||||
{
|
||||
/* used for MMU and EPT*/
|
||||
modify_paging(map_params, paddr, vaddr, size, flags,
|
||||
PAGING_REQUEST_TYPE_MAP, true);
|
||||
/* only for EPT */
|
||||
if (map_params->page_table_type == PT_EPT) {
|
||||
modify_paging(map_params, vaddr, paddr, size, flags,
|
||||
PAGING_REQUEST_TYPE_MAP, false);
|
||||
}
|
||||
}
|
||||
|
||||
void unmap_mem(struct map_params *map_params, void *paddr, void *vaddr,
|
||||
uint64_t size, uint32_t flags)
|
||||
{
|
||||
/* used for MMU and EPT */
|
||||
modify_paging(map_params, paddr, vaddr, size, flags,
|
||||
PAGING_REQUEST_TYPE_UNMAP, true);
|
||||
/* only for EPT */
|
||||
if (map_params->page_table_type == PT_EPT) {
|
||||
modify_paging(map_params, vaddr, paddr, size, flags,
|
||||
PAGING_REQUEST_TYPE_UNMAP, false);
|
||||
}
|
||||
}
|
||||
|
||||
void modify_mem(struct map_params *map_params, void *paddr, void *vaddr,
|
||||
uint64_t size, uint32_t flags)
|
||||
{
|
||||
/* used for MMU and EPT*/
|
||||
modify_paging(map_params, paddr, vaddr, size, flags,
|
||||
PAGING_REQUEST_TYPE_MODIFY, true);
|
||||
/* only for EPT */
|
||||
if (map_params->page_table_type == PT_EPT) {
|
||||
modify_paging(map_params, vaddr, paddr, size, flags,
|
||||
PAGING_REQUEST_TYPE_MODIFY, false);
|
||||
}
|
||||
}
|
98
hypervisor/arch/x86/notify.c
Normal file
98
hypervisor/arch/x86/notify.c
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
#include <irq.h>
|
||||
|
||||
static struct dev_handler_node *notification_node;
|
||||
|
||||
/* run in interrupt context */
|
||||
static int kick_notification(__unused int irq, __unused void *data)
|
||||
{
|
||||
/* Notification vector does not require handling here, it's just used
|
||||
* to kick taget cpu out of non-root mode.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int request_notification_irq(dev_handler_t func, void *data,
|
||||
const char *name)
|
||||
{
|
||||
int irq = -1; /* system allocate */
|
||||
struct dev_handler_node *node = NULL;
|
||||
|
||||
if (notification_node != NULL) {
|
||||
pr_info("%s, Notification vector already allocated on this CPU",
|
||||
__func__);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/* all cpu register the same notification vector */
|
||||
node = pri_register_handler(irq, VECTOR_NOTIFY_VCPU, func, data, name);
|
||||
if (node == NULL) {
|
||||
pr_err("Failed to add notify isr");
|
||||
return -1;
|
||||
}
|
||||
update_irq_handler(dev_to_irq(node), quick_handler_nolock);
|
||||
notification_node = node;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void setup_notification(void)
|
||||
{
|
||||
int cpu;
|
||||
char name[32] = {0};
|
||||
|
||||
cpu = get_cpu_id();
|
||||
if (cpu > 0)
|
||||
return;
|
||||
|
||||
/* support IPI notification, VM0 will register all CPU */
|
||||
snprintf(name, 32, "NOTIFY_ISR%d", cpu);
|
||||
if (request_notification_irq(kick_notification, NULL, name) < 0) {
|
||||
pr_err("Failed to setup notification");
|
||||
return;
|
||||
}
|
||||
|
||||
dev_dbg(ACRN_DBG_PTIRQ, "NOTIFY: irq[%d] setup vector %x",
|
||||
dev_to_irq(notification_node),
|
||||
dev_to_vector(notification_node));
|
||||
}
|
||||
|
||||
void cleanup_notification(void)
|
||||
{
|
||||
if (notification_node)
|
||||
unregister_handler_common(notification_node);
|
||||
notification_node = NULL;
|
||||
}
|
117
hypervisor/arch/x86/softirq.c
Normal file
117
hypervisor/arch/x86/softirq.c
Normal file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
|
||||
static DEFINE_CPU_DATA(uint64_t, softirq_pending);
|
||||
|
||||
void disable_softirq(int cpu_id)
|
||||
{
|
||||
bitmap_clr(SOFTIRQ_ATOMIC, &per_cpu(softirq_pending, cpu_id));
|
||||
}
|
||||
|
||||
void enable_softirq(int cpu_id)
|
||||
{
|
||||
bitmap_set(SOFTIRQ_ATOMIC, &per_cpu(softirq_pending, cpu_id));
|
||||
}
|
||||
|
||||
void init_softirq(void)
|
||||
{
|
||||
int cpu_id;
|
||||
|
||||
for (cpu_id = 0; cpu_id < phy_cpu_num; cpu_id++) {
|
||||
per_cpu(softirq_pending, cpu_id) = 0;
|
||||
bitmap_set(SOFTIRQ_ATOMIC, &per_cpu(softirq_pending, cpu_id));
|
||||
}
|
||||
}
|
||||
|
||||
void raise_softirq(int softirq_id)
|
||||
{
|
||||
int cpu_id = get_cpu_id();
|
||||
uint64_t *bitmap = &per_cpu(softirq_pending, cpu_id);
|
||||
|
||||
if (cpu_id >= phy_cpu_num)
|
||||
return;
|
||||
|
||||
bitmap_set(softirq_id, bitmap);
|
||||
}
|
||||
|
||||
void exec_softirq(void)
|
||||
{
|
||||
int cpu_id = get_cpu_id();
|
||||
uint64_t *bitmap = &per_cpu(softirq_pending, cpu_id);
|
||||
|
||||
uint64_t rflag;
|
||||
int softirq_id;
|
||||
|
||||
if (cpu_id >= phy_cpu_num)
|
||||
return;
|
||||
|
||||
/* Disable softirq
|
||||
* SOFTIRQ_ATOMIC bit = 0 means softirq already in execution
|
||||
*/
|
||||
if (!bitmap_test_and_clear(SOFTIRQ_ATOMIC, bitmap))
|
||||
return;
|
||||
|
||||
if (((*bitmap) & SOFTIRQ_MASK) == 0UL)
|
||||
goto ENABLE_AND_EXIT;
|
||||
|
||||
/* check if we are in interrupt context */
|
||||
CPU_RFLAGS_SAVE(&rflag);
|
||||
if (!(rflag & (1<<9)))
|
||||
goto ENABLE_AND_EXIT;
|
||||
|
||||
while (1) {
|
||||
softirq_id = bitmap_ffs(bitmap);
|
||||
if ((softirq_id < 0) || (softirq_id >= SOFTIRQ_MAX))
|
||||
break;
|
||||
|
||||
bitmap_clr(softirq_id, bitmap);
|
||||
|
||||
switch (softirq_id) {
|
||||
case SOFTIRQ_TIMER:
|
||||
timer_softirq(cpu_id);
|
||||
break;
|
||||
case SOFTIRQ_DEV_ASSIGN:
|
||||
ptdev_softirq(cpu_id);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
ENABLE_AND_EXIT:
|
||||
enable_softirq(cpu_id);
|
||||
}
|
||||
|
561
hypervisor/arch/x86/timer.c
Normal file
561
hypervisor/arch/x86/timer.c
Normal file
@@ -0,0 +1,561 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
#define MAX_TIMER_ACTIONS 32
|
||||
|
||||
struct timer_statistics {
|
||||
struct {
|
||||
uint64_t pickup_id;
|
||||
uint64_t pickup_time;
|
||||
uint64_t pickup_deadline;
|
||||
uint64_t added_id;
|
||||
uint64_t added_time;
|
||||
uint64_t added_deadline;
|
||||
} last;
|
||||
uint64_t total_pickup_cnt;
|
||||
uint64_t total_added_cnt;
|
||||
uint64_t irq_cnt;
|
||||
long pending_cnt;
|
||||
};
|
||||
|
||||
struct timer {
|
||||
timer_handle_t func; /* callback if time reached */
|
||||
uint64_t priv_data; /* func private data */
|
||||
uint64_t deadline; /* tsc deadline to interrupt */
|
||||
long handle; /* unique handle for user */
|
||||
int cpu_id; /* armed on which CPU */
|
||||
int id; /* timer ID, used by release */
|
||||
struct list_head node; /* link all timers */
|
||||
};
|
||||
|
||||
struct per_cpu_timers {
|
||||
struct timer *timers_pool; /* it's timers pool for allocation */
|
||||
uint64_t free_bitmap;
|
||||
struct list_head timer_list; /* it's for runtime active timer list */
|
||||
spinlock_t lock;
|
||||
int cpu_id;
|
||||
struct timer_statistics stat;
|
||||
};
|
||||
|
||||
static DEFINE_CPU_DATA(struct per_cpu_timers, cpu_timers);
|
||||
|
||||
#define TIMER_IRQ (NR_MAX_IRQS - 1)
|
||||
|
||||
DEFINE_CPU_DATA(struct dev_handler_node *, timer_node);
|
||||
|
||||
static struct timer*
|
||||
find_expired_timer(struct per_cpu_timers *cpu_timer, uint64_t tsc_now);
|
||||
|
||||
static struct timer *alloc_timer(int cpu_id)
|
||||
{
|
||||
int idx;
|
||||
struct per_cpu_timers *cpu_timer;
|
||||
struct timer *timer;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
cpu_timer = &per_cpu(cpu_timers, cpu_id);
|
||||
spinlock_irqsave_obtain(&cpu_timer->lock);
|
||||
idx = bitmap_ffs(&cpu_timer->free_bitmap);
|
||||
if (idx < 0) {
|
||||
spinlock_irqrestore_release(&cpu_timer->lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bitmap_clr(idx, &cpu_timer->free_bitmap);
|
||||
cpu_timer->stat.total_added_cnt++;
|
||||
cpu_timer->stat.pending_cnt++;
|
||||
|
||||
/* assign unique handle and never duplicate */
|
||||
timer = cpu_timer->timers_pool + idx;
|
||||
timer->handle = cpu_timer->stat.total_added_cnt;
|
||||
spinlock_irqrestore_release(&cpu_timer->lock);
|
||||
|
||||
ASSERT((cpu_timer->timers_pool[cpu_id].cpu_id == cpu_id),
|
||||
"timer cpu_id did not match");
|
||||
return timer;
|
||||
}
|
||||
|
||||
static void release_timer(struct timer *timer)
|
||||
{
|
||||
struct per_cpu_timers *cpu_timer;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
cpu_timer = &per_cpu(cpu_timers, timer->cpu_id);
|
||||
timer->priv_data = 0;
|
||||
timer->func = NULL;
|
||||
timer->deadline = 0;
|
||||
spinlock_irqsave_obtain(&cpu_timer->lock);
|
||||
bitmap_set(timer->id, &cpu_timer->free_bitmap);
|
||||
cpu_timer->stat.pending_cnt--;
|
||||
spinlock_irqrestore_release(&cpu_timer->lock);
|
||||
}
|
||||
|
||||
static int get_target_cpu(void)
|
||||
{
|
||||
/* we should search idle CPU to balance timer service */
|
||||
return get_cpu_id();
|
||||
}
|
||||
|
||||
static struct timer*
|
||||
find_expired_timer(struct per_cpu_timers *cpu_timer, uint64_t tsc_now)
|
||||
{
|
||||
struct timer *timer;
|
||||
struct list_head *pos;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
spinlock_irqsave_obtain(&cpu_timer->lock);
|
||||
list_for_each(pos, &cpu_timer->timer_list) {
|
||||
timer = list_entry(pos, struct timer, node);
|
||||
if (timer->deadline <= tsc_now)
|
||||
goto UNLOCK;
|
||||
}
|
||||
timer = NULL;
|
||||
UNLOCK:
|
||||
spinlock_irqrestore_release(&cpu_timer->lock);
|
||||
return timer;
|
||||
}
|
||||
|
||||
/* need lock protect outside */
|
||||
static struct timer*
|
||||
_search_nearest_timer(struct per_cpu_timers *cpu_timer)
|
||||
{
|
||||
struct timer *timer;
|
||||
struct timer *target = NULL;
|
||||
struct list_head *pos;
|
||||
|
||||
list_for_each(pos, &cpu_timer->timer_list) {
|
||||
timer = list_entry(pos, struct timer, node);
|
||||
if (target == NULL)
|
||||
target = timer;
|
||||
else if (timer->deadline < target->deadline)
|
||||
target = timer;
|
||||
}
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
/* need lock protect outside */
|
||||
static struct timer*
|
||||
_search_timer_by_handle(struct per_cpu_timers *cpu_timer, long handle)
|
||||
{
|
||||
struct timer *timer;
|
||||
struct list_head *pos;
|
||||
|
||||
list_for_each(pos, &cpu_timer->timer_list) {
|
||||
timer = list_entry(pos, struct timer, node);
|
||||
if (timer->handle == handle)
|
||||
goto FOUND;
|
||||
}
|
||||
timer = NULL;
|
||||
FOUND:
|
||||
return timer;
|
||||
}
|
||||
|
||||
static void
|
||||
run_timer(struct per_cpu_timers *cpu_timer, struct timer *timer)
|
||||
{
|
||||
spinlock_rflags;
|
||||
|
||||
/* remove from list first */
|
||||
spinlock_irqsave_obtain(&cpu_timer->lock);
|
||||
list_del(&timer->node);
|
||||
spinlock_irqrestore_release(&cpu_timer->lock);
|
||||
|
||||
/* deadline = 0 means stop timer, we should skip */
|
||||
if (timer->func && timer->deadline != 0UL)
|
||||
timer->func(timer->priv_data);
|
||||
|
||||
cpu_timer->stat.last.pickup_id = timer->id;
|
||||
cpu_timer->stat.last.pickup_deadline = timer->deadline;
|
||||
cpu_timer->stat.last.pickup_time = rdtsc();
|
||||
cpu_timer->stat.total_pickup_cnt++;
|
||||
|
||||
TRACE_4I(TRACE_TIMER_ACTION_PCKUP, timer->id, timer->deadline,
|
||||
timer->deadline >> 32, cpu_timer->stat.total_pickup_cnt);
|
||||
}
|
||||
|
||||
/* run in interrupt context */
|
||||
static int tsc_deadline_handler(__unused int irq, __unused void *data)
|
||||
{
|
||||
raise_softirq(SOFTIRQ_TIMER);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void schedule_next_timer(int cpu)
|
||||
{
|
||||
struct timer *timer;
|
||||
struct per_cpu_timers *cpu_timer = &per_cpu(cpu_timers, cpu);
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
spinlock_irqsave_obtain(&cpu_timer->lock);
|
||||
timer = _search_nearest_timer(cpu_timer);
|
||||
if (timer) {
|
||||
/* it is okay to program a expired time */
|
||||
msr_write(MSR_IA32_TSC_DEADLINE, timer->deadline);
|
||||
}
|
||||
spinlock_irqrestore_release(&cpu_timer->lock);
|
||||
}
|
||||
|
||||
int request_timer_irq(int cpu, dev_handler_t func, void *data, const char *name)
|
||||
{
|
||||
struct dev_handler_node *node = NULL;
|
||||
|
||||
if (cpu >= phy_cpu_num)
|
||||
return -1;
|
||||
|
||||
if (per_cpu(timer_node, cpu)) {
|
||||
pr_err("CPU%d timer isr already added", cpu);
|
||||
unregister_handler_common(per_cpu(timer_node, cpu));
|
||||
}
|
||||
|
||||
node = pri_register_handler(TIMER_IRQ, VECTOR_TIMER, func, data, name);
|
||||
if (node != NULL) {
|
||||
per_cpu(timer_node, cpu) = node;
|
||||
update_irq_handler(TIMER_IRQ, quick_handler_nolock);
|
||||
} else {
|
||||
pr_err("Failed to add timer isr");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*TODO: init in separate cpu */
|
||||
static void init_timer_pool(void)
|
||||
{
|
||||
int i, j;
|
||||
struct per_cpu_timers *cpu_timer;
|
||||
struct timer *timers_pool;
|
||||
|
||||
/* Make sure only init one time*/
|
||||
if (get_cpu_id() > 0)
|
||||
return;
|
||||
|
||||
for (i = 0; i < phy_cpu_num; i++) {
|
||||
cpu_timer = &per_cpu(cpu_timers, i);
|
||||
cpu_timer->cpu_id = i;
|
||||
timers_pool =
|
||||
calloc(MAX_TIMER_ACTIONS, sizeof(struct timer));
|
||||
ASSERT(timers_pool, "Create timers pool failed");
|
||||
|
||||
cpu_timer->timers_pool = timers_pool;
|
||||
cpu_timer->free_bitmap = (1UL<<MAX_TIMER_ACTIONS)-1;
|
||||
|
||||
INIT_LIST_HEAD(&cpu_timer->timer_list);
|
||||
spinlock_init(&cpu_timer->lock);
|
||||
for (j = 0; j < MAX_TIMER_ACTIONS; j++) {
|
||||
timers_pool[j].id = j;
|
||||
timers_pool[j].cpu_id = i;
|
||||
timers_pool[j].priv_data = 0;
|
||||
timers_pool[j].func = NULL;
|
||||
timers_pool[j].deadline = 0;
|
||||
timers_pool[j].handle = -1UL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void init_tsc_deadline_timer(void)
|
||||
{
|
||||
uint32_t val;
|
||||
|
||||
val = VECTOR_TIMER;
|
||||
val |= 0x40000; /* TSC deadline and unmask */
|
||||
mmio_write_long(val, LAPIC_BASE + LAPIC_LVT_TIMER_REGISTER);
|
||||
asm volatile("mfence" : : : "memory");
|
||||
/* disarm timer */
|
||||
msr_write(MSR_IA32_TSC_DEADLINE, 0UL);
|
||||
}
|
||||
|
||||
void timer_init(void)
|
||||
{
|
||||
char name[32] = {0};
|
||||
int cpu = get_cpu_id();
|
||||
|
||||
snprintf(name, 32, "timer_tick[%d]", cpu);
|
||||
if (request_timer_irq(cpu, tsc_deadline_handler, NULL, name) < 0) {
|
||||
pr_err("Timer setup failed");
|
||||
return;
|
||||
}
|
||||
|
||||
init_tsc_deadline_timer();
|
||||
init_timer_pool();
|
||||
}
|
||||
|
||||
void timer_cleanup(void)
|
||||
{
|
||||
int cpu = get_cpu_id();
|
||||
|
||||
if (per_cpu(timer_node, cpu))
|
||||
unregister_handler_common(per_cpu(timer_node, cpu));
|
||||
|
||||
per_cpu(timer_node, cpu) = NULL;
|
||||
}
|
||||
|
||||
int timer_softirq(int cpu_id)
|
||||
{
|
||||
struct per_cpu_timers *cpu_timer;
|
||||
struct timer *timer;
|
||||
int max = MAX_TIMER_ACTIONS;
|
||||
|
||||
/* handle passed timer */
|
||||
cpu_timer = &per_cpu(cpu_timers, cpu_id);
|
||||
cpu_timer->stat.irq_cnt++;
|
||||
|
||||
/* This is to make sure we are not blocked due to delay inside func()
|
||||
* force to exit irq handler after we serviced >31 timers
|
||||
* caller used to add_timer() in timer->func(), if there is a delay
|
||||
* inside func(), it will infinitely loop here, because new added timer
|
||||
* already passed due to previously func()'s delay.
|
||||
*/
|
||||
timer = find_expired_timer(cpu_timer, rdtsc());
|
||||
while (timer && --max > 0) {
|
||||
run_timer(cpu_timer, timer);
|
||||
/* put back to timer pool */
|
||||
release_timer(timer);
|
||||
/* search next one */
|
||||
timer = find_expired_timer(cpu_timer, rdtsc());
|
||||
}
|
||||
|
||||
/* update nearest timer */
|
||||
schedule_next_timer(cpu_id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* add_timer is okay to add passed timer but not 0
|
||||
* return: handle, this handle is unique and can be used to find back
|
||||
* this added timer. handle will be invalid after timer expired
|
||||
*/
|
||||
long add_timer(timer_handle_t func, uint64_t data, uint64_t deadline)
|
||||
{
|
||||
struct timer *timer;
|
||||
struct per_cpu_timers *cpu_timer;
|
||||
int cpu_id = get_target_cpu();
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
if (deadline == 0 || func == NULL)
|
||||
return -1;
|
||||
|
||||
/* possible interrupt context please avoid mem alloct here*/
|
||||
timer = alloc_timer(cpu_id);
|
||||
if (timer == NULL)
|
||||
return -1;
|
||||
|
||||
timer->func = func;
|
||||
timer->priv_data = data;
|
||||
timer->deadline = deadline;
|
||||
timer->cpu_id = get_target_cpu();
|
||||
|
||||
cpu_timer = &per_cpu(cpu_timers, timer->cpu_id);
|
||||
|
||||
/* We need irqsave here even softirq enabled to protect timer_list */
|
||||
spinlock_irqsave_obtain(&cpu_timer->lock);
|
||||
list_add_tail(&timer->node, &cpu_timer->timer_list);
|
||||
cpu_timer->stat.last.added_id = timer->id;
|
||||
cpu_timer->stat.last.added_time = rdtsc();
|
||||
cpu_timer->stat.last.added_deadline = timer->deadline;
|
||||
spinlock_irqrestore_release(&cpu_timer->lock);
|
||||
TRACE_4I(TRACE_TIMER_ACTION_ADDED, timer->id, timer->deadline,
|
||||
timer->deadline >> 32, cpu_timer->stat.total_added_cnt);
|
||||
|
||||
schedule_next_timer(cpu_id);
|
||||
return timer->handle;
|
||||
}
|
||||
|
||||
/*
|
||||
* update_timer existing timer. if not found, add new timer
|
||||
*/
|
||||
long
|
||||
update_timer(long handle, timer_handle_t func, uint64_t data,
|
||||
uint64_t deadline)
|
||||
{
|
||||
struct timer *timer;
|
||||
struct per_cpu_timers *cpu_timer;
|
||||
int cpu_id = get_target_cpu();
|
||||
|
||||
spinlock_rflags;
|
||||
bool ret = false;
|
||||
|
||||
if (deadline == 0)
|
||||
return -1;
|
||||
|
||||
cpu_timer = &per_cpu(cpu_timers, cpu_id);
|
||||
spinlock_irqsave_obtain(&cpu_timer->lock);
|
||||
timer = _search_timer_by_handle(cpu_timer, handle);
|
||||
if (timer) {
|
||||
/* update deadline and re-sort */
|
||||
timer->deadline = deadline;
|
||||
timer->func = func;
|
||||
timer->priv_data = data;
|
||||
TRACE_4I(TRACE_TIMER_ACTION_UPDAT, timer->id,
|
||||
timer->deadline, timer->deadline >> 32,
|
||||
cpu_timer->stat.total_added_cnt);
|
||||
ret = true;
|
||||
}
|
||||
spinlock_irqrestore_release(&cpu_timer->lock);
|
||||
|
||||
if (ret)
|
||||
schedule_next_timer(cpu_id);
|
||||
else {
|
||||
/* if update failed, we add to new, and update handle */
|
||||
/* TODO: the correct behavior should be return failure here */
|
||||
handle = add_timer(func, data, deadline);
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
/* NOTE: cpu_id referred to physical cpu id here */
|
||||
bool cancel_timer(long handle, int cpu_id)
|
||||
{
|
||||
struct timer *timer;
|
||||
struct per_cpu_timers *cpu_timer;
|
||||
|
||||
spinlock_rflags;
|
||||
bool ret = false;
|
||||
|
||||
cpu_timer = &per_cpu(cpu_timers, cpu_id);
|
||||
spinlock_irqsave_obtain(&cpu_timer->lock);
|
||||
timer = _search_timer_by_handle(cpu_timer, handle);
|
||||
if (timer) {
|
||||
/* NOTE: we can not directly release timer here.
|
||||
* Instead we set deadline to expired and clear func.
|
||||
* This timer will be reclaim next timer
|
||||
*/
|
||||
timer->deadline = 0;
|
||||
timer->func = NULL;
|
||||
ret = true;
|
||||
}
|
||||
spinlock_irqrestore_release(&cpu_timer->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void dump_timer_pool_info(int cpu_id)
|
||||
{
|
||||
struct per_cpu_timers *cpu_timer =
|
||||
&per_cpu(cpu_timers, cpu_id);
|
||||
struct list_head *pos;
|
||||
int cn = 0;
|
||||
|
||||
spinlock_rflags;
|
||||
|
||||
if (cpu_id >= phy_cpu_num)
|
||||
return;
|
||||
|
||||
pr_info("Timer%d statistics: Pending: %d\n\t"
|
||||
"total_pickup: %lld total_added: %lld total_irq: %lld",
|
||||
cpu_id,
|
||||
cpu_timer->stat.pending_cnt,
|
||||
cpu_timer->stat.total_pickup_cnt,
|
||||
cpu_timer->stat.total_added_cnt,
|
||||
cpu_timer->stat.irq_cnt);
|
||||
|
||||
pr_info("LAST pickup[%d] time: 0x%llx deadline: 0x%llx",
|
||||
cpu_timer->stat.last.pickup_id,
|
||||
cpu_timer->stat.last.pickup_time,
|
||||
cpu_timer->stat.last.pickup_deadline);
|
||||
|
||||
pr_info("LAST added[%d] time: 0x%llx deadline: 0x%llx",
|
||||
cpu_timer->stat.last.added_id,
|
||||
cpu_timer->stat.last.added_time,
|
||||
cpu_timer->stat.last.added_deadline);
|
||||
|
||||
spinlock_irqsave_obtain(&cpu_timer->lock);
|
||||
list_for_each(pos, &cpu_timer->timer_list) {
|
||||
cn++;
|
||||
pr_info("-->pending: %d trigger: 0x%llx", cn,
|
||||
list_entry(pos, struct timer, node)->deadline);
|
||||
}
|
||||
spinlock_irqrestore_release(&cpu_timer->lock);
|
||||
}
|
||||
|
||||
void check_tsc(void)
|
||||
{
|
||||
uint64_t temp64;
|
||||
|
||||
/* Ensure time-stamp timer is turned on for each CPU */
|
||||
CPU_CR_READ(cr4, &temp64);
|
||||
CPU_CR_WRITE(cr4, (temp64 & ~CR4_TSD));
|
||||
}
|
||||
|
||||
uint64_t tsc_cycles_in_period(uint16_t timer_period_in_us)
|
||||
{
|
||||
uint16_t initial_pit;
|
||||
uint16_t current_pit;
|
||||
uint32_t current_tsc;
|
||||
#define PIT_TARGET 0x3FFF
|
||||
|
||||
if (timer_period_in_us < 1000)
|
||||
pr_warn("Bad timer_period_in_us: %d\n",
|
||||
timer_period_in_us);
|
||||
|
||||
/* Assume the 8254 delivers 18.2 ticks per second when 16 bits fully
|
||||
* wrap. This is about 1.193MHz or a clock period of 0.8384uSec
|
||||
*/
|
||||
initial_pit = (uint16_t)(timer_period_in_us*1193000UL/1000000);
|
||||
initial_pit += PIT_TARGET;
|
||||
|
||||
/* Port 0x43 ==> Control word write; Data 0x30 ==> Select Counter 0,
|
||||
* Read/Write least significant byte first, mode 0, 16 bits.
|
||||
*/
|
||||
|
||||
io_write_byte(0x30, 0x43);
|
||||
io_write_byte(initial_pit & 0x00ff, 0x40); /* Write LSB */
|
||||
io_write_byte(initial_pit >> 8, 0x40); /* Write MSB */
|
||||
|
||||
current_tsc = rdtsc();
|
||||
|
||||
do {
|
||||
/* Port 0x43 ==> Control word write; 0x00 ==> Select
|
||||
* Counter 0, Counter Latch Command, Mode 0; 16 bits
|
||||
*/
|
||||
io_write_byte(0x00, 0x43);
|
||||
|
||||
current_pit = io_read_byte(0x40); /* Read LSB */
|
||||
current_pit |= io_read_byte(0x40) << 8; /* Read MSB */
|
||||
/* Let the counter count down to PIT_TARGET */
|
||||
} while (current_pit > PIT_TARGET);
|
||||
|
||||
current_tsc = rdtsc() - current_tsc;
|
||||
|
||||
return (uint64_t) current_tsc;
|
||||
}
|
||||
|
494
hypervisor/arch/x86/vmexit.c
Normal file
494
hypervisor/arch/x86/vmexit.c
Normal file
@@ -0,0 +1,494 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hypervisor.h>
|
||||
#include <hv_lib.h>
|
||||
#include <acrn_common.h>
|
||||
#include <hv_arch.h>
|
||||
#include <hv_debug.h>
|
||||
|
||||
static int rdtscp_handler(struct vcpu *vcpu);
|
||||
static int unhandled_vmexit_handler(struct vcpu *vcpu);
|
||||
static int rdtsc_handler(struct vcpu *vcpu);
|
||||
/* VM Dispatch table for Exit condition handling */
|
||||
static const struct vm_exit_dispatch dispatch_table[] = {
|
||||
[VMX_EXIT_REASON_EXCEPTION_OR_NMI] = {
|
||||
.handler = exception_handler},
|
||||
[VMX_EXIT_REASON_EXTERNAL_INTERRUPT] = {
|
||||
.handler = external_interrupt_handler},
|
||||
[VMX_EXIT_REASON_TRIPLE_FAULT] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_INIT_SIGNAL] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_STARTUP_IPI] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_IO_SMI] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_OTHER_SMI] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_INTERRUPT_WINDOW] = {
|
||||
.handler = interrupt_win_exiting_handler},
|
||||
[VMX_EXIT_REASON_NMI_WINDOW] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_TASK_SWITCH] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_CPUID] = {
|
||||
.handler = cpuid_handler},
|
||||
[VMX_EXIT_REASON_GETSEC] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_HLT] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_INVD] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_INVLPG] = {
|
||||
.handler = unhandled_vmexit_handler,},
|
||||
[VMX_EXIT_REASON_RDPMC] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_RDTSC] = {
|
||||
.handler = rdtsc_handler},
|
||||
[VMX_EXIT_REASON_RSM] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_VMCALL] = {
|
||||
.handler = vmcall_handler},
|
||||
[VMX_EXIT_REASON_VMCLEAR] {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_VMLAUNCH] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_VMPTRLD] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_VMPTRST] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_VMREAD] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_VMRESUME] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_VMWRITE] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_VMXOFF] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_VMXON] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_CR_ACCESS] = {
|
||||
.handler = cr_access_handler,
|
||||
.need_exit_qualification = 1},
|
||||
[VMX_EXIT_REASON_DR_ACCESS] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_IO_INSTRUCTION] = {
|
||||
.handler = io_instr_handler,
|
||||
.need_exit_qualification = 1},
|
||||
[VMX_EXIT_REASON_RDMSR] = {
|
||||
.handler = rdmsr_handler},
|
||||
[VMX_EXIT_REASON_WRMSR] = {
|
||||
.handler = wrmsr_handler},
|
||||
[VMX_EXIT_REASON_ENTRY_FAILURE_INVALID_GUEST_STATE] = {
|
||||
.handler = unhandled_vmexit_handler,
|
||||
.need_exit_qualification = 1},
|
||||
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_MWAIT] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_MONITOR_TRAP] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_MONITOR] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_PAUSE] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_TPR_BELOW_THRESHOLD] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_APIC_ACCESS] = {
|
||||
.handler = apicv_access_exit_handler},
|
||||
[VMX_EXIT_REASON_VIRTUALIZED_EOI] = {
|
||||
.handler = apicv_virtualized_eoi_exit_handler},
|
||||
[VMX_EXIT_REASON_GDTR_IDTR_ACCESS] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_LDTR_TR_ACCESS] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_EPT_VIOLATION] = {
|
||||
.handler = ept_violation_handler,
|
||||
.need_exit_qualification = 1},
|
||||
[VMX_EXIT_REASON_EPT_MISCONFIGURATION] = {
|
||||
.handler = ept_misconfig_handler,
|
||||
.need_exit_qualification = 1},
|
||||
[VMX_EXIT_REASON_INVEPT] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_RDTSCP] = {
|
||||
.handler = rdtscp_handler},
|
||||
[VMX_EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_INVVPID] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_WBINVD] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_XSETBV] = {
|
||||
.handler = unhandled_vmexit_handler},
|
||||
[VMX_EXIT_REASON_APIC_WRITE] = {
|
||||
.handler = apicv_write_exit_handler}
|
||||
};
|
||||
|
||||
struct vm_exit_dispatch *vmexit_handler(struct vcpu *vcpu)
|
||||
{
|
||||
struct vm_exit_dispatch *dispatch = HV_NULL;
|
||||
uint16_t basic_exit_reason;
|
||||
|
||||
/* Obtain interrupt info */
|
||||
vcpu->arch_vcpu.exit_interrupt_info =
|
||||
exec_vmread(VMX_IDT_VEC_INFO_FIELD);
|
||||
|
||||
/* Calculate basic exit reason (low 16-bits) */
|
||||
basic_exit_reason = vcpu->arch_vcpu.exit_reason & 0xFFFF;
|
||||
|
||||
/* Log details for exit */
|
||||
pr_dbg("Exit Reason: 0x%016llx ", vcpu->arch_vcpu.exit_reason);
|
||||
|
||||
/* Ensure exit reason is within dispatch table */
|
||||
if (basic_exit_reason < ARRAY_SIZE(dispatch_table)) {
|
||||
/* Calculate dispatch table entry */
|
||||
dispatch = (struct vm_exit_dispatch *)
|
||||
(dispatch_table + basic_exit_reason);
|
||||
|
||||
/* See if an exit qualification is necessary for this exit
|
||||
* handler
|
||||
*/
|
||||
if (dispatch->need_exit_qualification) {
|
||||
/* Get exit qualification */
|
||||
vcpu->arch_vcpu.exit_qualification =
|
||||
exec_vmread(VMX_EXIT_QUALIFICATION);
|
||||
}
|
||||
}
|
||||
|
||||
/* Update current vcpu in VM that caused vm exit */
|
||||
vcpu->vm->current_vcpu = vcpu;
|
||||
|
||||
/* Return pointer to exit dispatch entry */
|
||||
return dispatch;
|
||||
}
|
||||
|
||||
static int unhandled_vmexit_handler(__unused struct vcpu *vcpu)
|
||||
{
|
||||
pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016llx ",
|
||||
exec_vmread(VMX_GUEST_RIP));
|
||||
|
||||
pr_fatal("Exit Reason: 0x%016llx ", vcpu->arch_vcpu.exit_reason);
|
||||
|
||||
pr_err("Exit qualification: 0x%016llx ",
|
||||
exec_vmread(VMX_EXIT_QUALIFICATION));
|
||||
|
||||
/* while(1); */
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_UNHANDLED, vcpu->arch_vcpu.exit_reason, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_cr0(struct vcpu *vcpu, uint64_t value)
|
||||
{
|
||||
uint32_t value32;
|
||||
uint64_t value64;
|
||||
|
||||
pr_dbg("VMM: Guest trying to write 0x%08x to CR0", value);
|
||||
|
||||
/* Read host mask value */
|
||||
value64 = exec_vmread(VMX_CR0_MASK);
|
||||
|
||||
/* Clear all bits being written by guest that are owned by host */
|
||||
value &= ~value64;
|
||||
|
||||
/* Update CR0 in guest state */
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr0 |= value;
|
||||
exec_vmwrite(VMX_GUEST_CR0,
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr0);
|
||||
pr_dbg("VMM: Guest allowed to write 0x%08x to CR0",
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr0);
|
||||
|
||||
/* If guest is trying to transition vcpu from unpaged real mode to page
|
||||
* protected mode make necessary changes to VMCS structure to reflect
|
||||
* transition from real mode to paged-protected mode
|
||||
*/
|
||||
if (!is_vcpu_bsp(vcpu) &&
|
||||
(vcpu->arch_vcpu.cpu_mode == REAL_MODE) &&
|
||||
(value & CR0_PG) && (value & CR0_PE)) {
|
||||
/* Enable protected mode */
|
||||
value32 = exec_vmread(VMX_ENTRY_CONTROLS);
|
||||
value32 |= (VMX_ENTRY_CTLS_IA32E_MODE |
|
||||
VMX_ENTRY_CTLS_LOAD_PAT |
|
||||
VMX_ENTRY_CTLS_LOAD_EFER);
|
||||
exec_vmwrite(VMX_ENTRY_CONTROLS, value32);
|
||||
pr_dbg("VMX_ENTRY_CONTROLS: 0x%x ", value32);
|
||||
|
||||
/* Disable unrestricted mode */
|
||||
value32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS2);
|
||||
value32 |= (VMX_PROCBASED_CTLS2_EPT |
|
||||
VMX_PROCBASED_CTLS2_RDTSCP);
|
||||
exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS2, value32);
|
||||
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS2: 0x%x ", value32);
|
||||
|
||||
/* Set up EFER */
|
||||
value64 = exec_vmread64(VMX_GUEST_IA32_EFER_FULL);
|
||||
value64 |= (MSR_IA32_EFER_SCE_BIT |
|
||||
MSR_IA32_EFER_LME_BIT |
|
||||
MSR_IA32_EFER_LMA_BIT | MSR_IA32_EFER_NXE_BIT);
|
||||
exec_vmwrite64(VMX_GUEST_IA32_EFER_FULL, value64);
|
||||
pr_dbg("VMX_GUEST_IA32_EFER: 0x%016llx ", value64);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_cr3(struct vcpu *vcpu, uint64_t value)
|
||||
{
|
||||
/* Write to guest's CR3 */
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3 = value;
|
||||
|
||||
/* Commit new value to VMCS */
|
||||
exec_vmwrite(VMX_GUEST_CR3,
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_cr4(struct vcpu *vcpu, uint64_t value)
|
||||
{
|
||||
uint64_t temp64;
|
||||
|
||||
pr_dbg("VMM: Guest trying to write 0x%08x to CR4", value);
|
||||
|
||||
/* Read host mask value */
|
||||
temp64 = exec_vmread(VMX_CR4_MASK);
|
||||
|
||||
/* Clear all bits being written by guest that are owned by host */
|
||||
value &= ~temp64;
|
||||
|
||||
/* Write updated CR4 (bitwise OR of allowed guest bits and CR4 host
|
||||
* value)
|
||||
*/
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr4 |= value;
|
||||
exec_vmwrite(VMX_GUEST_CR4,
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr4);
|
||||
pr_dbg("VMM: Guest allowed to write 0x%08x to CR4",
|
||||
vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr4);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_cr3(struct vcpu *vcpu, uint64_t *value)
|
||||
{
|
||||
*value = vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context].cr3;
|
||||
|
||||
pr_dbg("VMM: reading 0x%08x from CR3", *value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cpuid_handler(struct vcpu *vcpu)
|
||||
{
|
||||
struct run_context *cur_context =
|
||||
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
|
||||
|
||||
emulate_cpuid(vcpu, (uint32_t)cur_context->guest_cpu_regs.regs.rax,
|
||||
(uint32_t *)&cur_context->guest_cpu_regs.regs.rax,
|
||||
(uint32_t *)&cur_context->guest_cpu_regs.regs.rbx,
|
||||
(uint32_t *)&cur_context->guest_cpu_regs.regs.rcx,
|
||||
(uint32_t *)&cur_context->guest_cpu_regs.regs.rdx);
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_CPUID, vcpu->vcpu_id, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cr_access_handler(struct vcpu *vcpu)
|
||||
{
|
||||
uint64_t *regptr;
|
||||
struct run_context *cur_context =
|
||||
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
|
||||
static const int reg_trans_tab[] = {
|
||||
[0] = VMX_MACHINE_T_GUEST_RAX_INDEX,
|
||||
[1] = VMX_MACHINE_T_GUEST_RCX_INDEX,
|
||||
[2] = VMX_MACHINE_T_GUEST_RDX_INDEX,
|
||||
[3] = VMX_MACHINE_T_GUEST_RBX_INDEX,
|
||||
[4] = 0xFF, /* for sp reg, should not be used, just for init */
|
||||
[5] = VMX_MACHINE_T_GUEST_RBP_INDEX,
|
||||
[6] = VMX_MACHINE_T_GUEST_RSI_INDEX,
|
||||
[7] = VMX_MACHINE_T_GUEST_RDI_INDEX,
|
||||
[8] = VMX_MACHINE_T_GUEST_R8_INDEX,
|
||||
[9] = VMX_MACHINE_T_GUEST_R9_INDEX,
|
||||
[10] = VMX_MACHINE_T_GUEST_R10_INDEX,
|
||||
[11] = VMX_MACHINE_T_GUEST_R11_INDEX,
|
||||
[12] = VMX_MACHINE_T_GUEST_R12_INDEX,
|
||||
[13] = VMX_MACHINE_T_GUEST_R13_INDEX,
|
||||
[14] = VMX_MACHINE_T_GUEST_R14_INDEX,
|
||||
[15] = VMX_MACHINE_T_GUEST_R15_INDEX
|
||||
};
|
||||
int idx = VM_EXIT_CR_ACCESS_REG_IDX(vcpu->arch_vcpu.exit_qualification);
|
||||
|
||||
ASSERT(idx != 4, "index should not be 4 (target SP)");
|
||||
regptr = cur_context->guest_cpu_regs.longs + reg_trans_tab[idx];
|
||||
|
||||
switch ((VM_EXIT_CR_ACCESS_ACCESS_TYPE
|
||||
(vcpu->arch_vcpu.exit_qualification) << 4) |
|
||||
VM_EXIT_CR_ACCESS_CR_NUM(vcpu->arch_vcpu.exit_qualification)) {
|
||||
case 0x00:
|
||||
/* mov to cr0 */
|
||||
write_cr0(vcpu, *regptr);
|
||||
break;
|
||||
|
||||
case 0x03:
|
||||
/* mov to cr3 */
|
||||
write_cr3(vcpu, *regptr);
|
||||
break;
|
||||
|
||||
case 0x04:
|
||||
/* mov to cr4 */
|
||||
write_cr4(vcpu, *regptr);
|
||||
break;
|
||||
|
||||
case 0x13:
|
||||
/* mov from cr3 */
|
||||
read_cr3(vcpu, regptr);
|
||||
break;
|
||||
#if 0
|
||||
case 0x14:
|
||||
/* mov from cr4 (this should not happen) */
|
||||
case 0x10:
|
||||
/* mov from cr0 (this should not happen) */
|
||||
#endif
|
||||
case 0x08:
|
||||
/* mov to cr8 */
|
||||
vlapic_set_cr8(vcpu->arch_vcpu.vlapic, *regptr);
|
||||
break;
|
||||
case 0x18:
|
||||
/* mov from cr8 */
|
||||
*regptr = vlapic_get_cr8(vcpu->arch_vcpu.vlapic);
|
||||
break;
|
||||
default:
|
||||
panic("Unhandled CR access");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_CR_ACCESS,
|
||||
VM_EXIT_CR_ACCESS_ACCESS_TYPE
|
||||
(vcpu->arch_vcpu.exit_qualification),
|
||||
VM_EXIT_CR_ACCESS_CR_NUM
|
||||
(vcpu->arch_vcpu.exit_qualification));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* VMX_PROCBASED_CTLS_INVLPG is not enabled in the VM-execution
|
||||
* control therefore we don't need it's handler.
|
||||
*
|
||||
* INVLPG: this instruction Invalidates any translation lookaside buffer
|
||||
*/
|
||||
int invlpg_handler(__unused struct vcpu *vcpu)
|
||||
{
|
||||
pr_fatal("INVLPG executed");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* XSETBV instruction set's the XCR0 that is used to tell for which components
|
||||
* states can be saved on a context switch using xsave.
|
||||
*
|
||||
* We don't handle this right now because we are on a platform that does not
|
||||
* support XSAVE/XRSTORE feature as reflected by the instruction CPUID.
|
||||
*
|
||||
* to make sure this never get called until we support it we can prevent the
|
||||
* reading of this bit in CPUID VMEXIT.
|
||||
*
|
||||
* Linux checks this in CPUID: cpufeature.h: #define cpu_has_xsave
|
||||
*/
|
||||
static int xsetbv_instr_handler(__unused struct vcpu *vcpu)
|
||||
{
|
||||
ASSERT("Not Supported" == 0, "XSETBV executed");
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int rdtsc_handler(struct vcpu *vcpu)
|
||||
{
|
||||
uint64_t host_tsc, guest_tsc, tsc_offset;
|
||||
uint32_t id;
|
||||
struct run_context *cur_context =
|
||||
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
|
||||
|
||||
/* Read the host TSC value */
|
||||
CPU_RDTSCP_EXECUTE(&host_tsc, &id);
|
||||
|
||||
/* Get the guest TSC offset value from VMCS */
|
||||
tsc_offset =
|
||||
exec_vmread64(VMX_TSC_OFFSET_FULL);
|
||||
|
||||
/* Update the guest TSC value by following: TSC_guest = TSC_host +
|
||||
* TSC_guest_Offset
|
||||
*/
|
||||
guest_tsc = host_tsc + tsc_offset;
|
||||
|
||||
/* Return the TSC_guest in rax:rdx */
|
||||
cur_context->guest_cpu_regs.regs.rax = (uint32_t) guest_tsc;
|
||||
cur_context->guest_cpu_regs.regs.rdx = (uint32_t) (guest_tsc >> 32);
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_RDTSC, host_tsc, tsc_offset);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rdtscp_handler(struct vcpu *vcpu)
|
||||
{
|
||||
uint64_t host_tsc, guest_tsc, tsc_offset;
|
||||
uint32_t id;
|
||||
struct run_context *cur_context =
|
||||
&vcpu->arch_vcpu.contexts[vcpu->arch_vcpu.cur_context];
|
||||
|
||||
/* Read the host TSC value */
|
||||
CPU_RDTSCP_EXECUTE(&host_tsc, &id);
|
||||
|
||||
/* Get the guest TSC offset value from VMCS */
|
||||
tsc_offset =
|
||||
exec_vmread64(VMX_TSC_OFFSET_FULL);
|
||||
|
||||
/* Update the guest TSC value by following: * TSC_guest = TSC_host +
|
||||
* TSC_guest_Offset
|
||||
*/
|
||||
guest_tsc = host_tsc + tsc_offset;
|
||||
|
||||
/* Return the TSC_guest in rax:rdx and IA32_TSC_AUX in rcx */
|
||||
cur_context->guest_cpu_regs.regs.rax = (uint32_t) guest_tsc;
|
||||
cur_context->guest_cpu_regs.regs.rdx = (uint32_t) (guest_tsc >> 32);
|
||||
cur_context->guest_cpu_regs.regs.rcx = vcpu->arch_vcpu.msr_tsc_aux;
|
||||
|
||||
TRACE_2L(TRC_VMEXIT_RDTSCP, guest_tsc, vcpu->arch_vcpu.msr_tsc_aux);
|
||||
|
||||
return 0;
|
||||
}
|
1346
hypervisor/arch/x86/vmx.c
Normal file
1346
hypervisor/arch/x86/vmx.c
Normal file
File diff suppressed because it is too large
Load Diff
245
hypervisor/arch/x86/vmx_asm.S
Normal file
245
hypervisor/arch/x86/vmx_asm.S
Normal file
@@ -0,0 +1,245 @@
|
||||
/*
|
||||
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <vmx.h>
|
||||
#include <msr.h>
|
||||
#include <guest.h>
|
||||
#include <vcpu.h>
|
||||
#include <cpu.h>
|
||||
#include <types.h>
|
||||
|
||||
.text
|
||||
|
||||
/*int vmx_vmrun(struct run_context *context, int launch, int ibrs_type) */
|
||||
.code64
|
||||
.align 8
|
||||
.global vmx_vmrun
|
||||
vmx_vmrun:
|
||||
|
||||
/* Save all host GPRs that must be preserved across function calls
|
||||
per System V ABI */
|
||||
push %rdx
|
||||
push %rbx
|
||||
push %rbp
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
/* Save RDI on top of host stack for easy access to VCPU pointer
|
||||
on return from guest context */
|
||||
push %rdi
|
||||
|
||||
/* rdx = ibrs_type */
|
||||
/* if ibrs_type != IBRS_NONE, means IBRS feature is supported,
|
||||
* restore MSR SPEC_CTRL to guest
|
||||
*/
|
||||
cmp $IBRS_NONE,%rdx
|
||||
je next
|
||||
|
||||
movl $MSR_IA32_SPEC_CTRL,%ecx
|
||||
mov VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET(%rdi),%rax
|
||||
movl $0,%edx
|
||||
wrmsr
|
||||
|
||||
next:
|
||||
|
||||
/* Load VMCS_HOST_RSP_FIELD field value */
|
||||
mov $VMX_HOST_RSP,%rdx
|
||||
|
||||
/* Write the current stack pointer to the VMCS_HOST_RSP_FIELD */
|
||||
vmwrite %rsp,%rdx
|
||||
|
||||
/* Error occurred - handle error */
|
||||
jbe vm_eval_error
|
||||
|
||||
/* Compare the launch flag to see if launching (1) or resuming (0) */
|
||||
cmp $VM_LAUNCH, %rsi
|
||||
|
||||
mov VMX_MACHINE_T_GUEST_CR2_OFFSET(%rdi),%rax
|
||||
mov %rax,%cr2
|
||||
|
||||
mov VMX_MACHINE_T_GUEST_RAX_OFFSET(%rdi),%rax
|
||||
mov VMX_MACHINE_T_GUEST_RBX_OFFSET(%rdi),%rbx
|
||||
mov VMX_MACHINE_T_GUEST_RCX_OFFSET(%rdi),%rcx
|
||||
mov VMX_MACHINE_T_GUEST_RDX_OFFSET(%rdi),%rdx
|
||||
mov VMX_MACHINE_T_GUEST_RBP_OFFSET(%rdi),%rbp
|
||||
mov VMX_MACHINE_T_GUEST_RSI_OFFSET(%rdi),%rsi
|
||||
mov VMX_MACHINE_T_GUEST_R8_OFFSET(%rdi),%r8
|
||||
mov VMX_MACHINE_T_GUEST_R9_OFFSET(%rdi),%r9
|
||||
mov VMX_MACHINE_T_GUEST_R10_OFFSET(%rdi),%r10
|
||||
mov VMX_MACHINE_T_GUEST_R11_OFFSET(%rdi),%r11
|
||||
mov VMX_MACHINE_T_GUEST_R12_OFFSET(%rdi),%r12
|
||||
mov VMX_MACHINE_T_GUEST_R13_OFFSET(%rdi),%r13
|
||||
mov VMX_MACHINE_T_GUEST_R14_OFFSET(%rdi),%r14
|
||||
mov VMX_MACHINE_T_GUEST_R15_OFFSET(%rdi),%r15
|
||||
|
||||
mov VMX_MACHINE_T_GUEST_RDI_OFFSET(%rdi),%rdi
|
||||
|
||||
/* Execute appropriate VMX instruction */
|
||||
je vm_launch
|
||||
|
||||
/* Execute a VM resume */
|
||||
vmresume
|
||||
|
||||
vm_launch:
|
||||
|
||||
/* Execute a VM launch */
|
||||
vmlaunch
|
||||
|
||||
.global vm_exit
|
||||
vm_exit:
|
||||
|
||||
/* Get VCPU data structure pointer from top of host stack and
|
||||
save guest RDI in its place */
|
||||
xchg 0(%rsp),%rdi
|
||||
|
||||
/* Save current GPRs to guest state area */
|
||||
mov %rax,VMX_MACHINE_T_GUEST_RAX_OFFSET(%rdi)
|
||||
|
||||
mov %cr2,%rax
|
||||
mov %rax,VMX_MACHINE_T_GUEST_CR2_OFFSET(%rdi)
|
||||
|
||||
mov %rbx,VMX_MACHINE_T_GUEST_RBX_OFFSET(%rdi)
|
||||
mov %rcx,VMX_MACHINE_T_GUEST_RCX_OFFSET(%rdi)
|
||||
mov %rdx,VMX_MACHINE_T_GUEST_RDX_OFFSET(%rdi)
|
||||
mov %rbp,VMX_MACHINE_T_GUEST_RBP_OFFSET(%rdi)
|
||||
mov %rsi,VMX_MACHINE_T_GUEST_RSI_OFFSET(%rdi)
|
||||
mov %r8,VMX_MACHINE_T_GUEST_R8_OFFSET(%rdi)
|
||||
mov %r9,VMX_MACHINE_T_GUEST_R9_OFFSET(%rdi)
|
||||
mov %r10,VMX_MACHINE_T_GUEST_R10_OFFSET(%rdi)
|
||||
mov %r11,VMX_MACHINE_T_GUEST_R11_OFFSET(%rdi)
|
||||
mov %r12,VMX_MACHINE_T_GUEST_R12_OFFSET(%rdi)
|
||||
mov %r13,VMX_MACHINE_T_GUEST_R13_OFFSET(%rdi)
|
||||
mov %r14,VMX_MACHINE_T_GUEST_R14_OFFSET(%rdi)
|
||||
mov %r15,VMX_MACHINE_T_GUEST_R15_OFFSET(%rdi)
|
||||
|
||||
/* Load guest RDI off host stack and into RDX */
|
||||
mov 0(%rsp),%rdx
|
||||
|
||||
/* Save guest RDI to guest state area */
|
||||
mov %rdx,VMX_MACHINE_T_GUEST_RDI_OFFSET(%rdi)
|
||||
|
||||
/* Save RDI to RSI for later SPEC_CTRL save*/
|
||||
mov %rdi,%rsi
|
||||
|
||||
vm_eval_error:
|
||||
|
||||
/* Restore host GPR System V required registers */
|
||||
pop %rdi
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
pop %rbp
|
||||
pop %rbx
|
||||
pop %rdx
|
||||
|
||||
|
||||
/* Check vm fail, refer to 64-ia32 spec section 26.2 in volume#3 */
|
||||
mov $VM_FAIL,%rax
|
||||
jc vm_return
|
||||
jz vm_return
|
||||
|
||||
/* Clear host registers to prevent speculative use */
|
||||
xor %rcx,%rcx
|
||||
xor %r8,%r8
|
||||
xor %r9,%r9
|
||||
xor %r10,%r10
|
||||
xor %r11,%r11
|
||||
|
||||
/* rdx = ibrs_type */
|
||||
/* IBRS_NONE: no ibrs setting, just flush rsb
|
||||
* IBRS_RAW: set IBRS then flush rsb
|
||||
* IBRS_OPT: set STIBP & IBPB then flush rsb
|
||||
*/
|
||||
cmp $IBRS_NONE,%rdx
|
||||
je stuff_rsb
|
||||
|
||||
cmp $IBRS_OPT,%rdx
|
||||
je ibrs_opt
|
||||
|
||||
/* Save guest MSR SPEC_CTRL, low 32 bit is enough */
|
||||
movl $MSR_IA32_SPEC_CTRL,%ecx
|
||||
rdmsr
|
||||
mov %rax,VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET(%rsi)
|
||||
movl $SPEC_ENABLE_IBRS,%eax
|
||||
movl $0,%edx
|
||||
wrmsr
|
||||
|
||||
jmp stuff_rsb
|
||||
|
||||
ibrs_opt:
|
||||
|
||||
movl $MSR_IA32_PRED_CMD,%ecx
|
||||
movl $PRED_SET_IBPB,%eax
|
||||
movl $0,%edx
|
||||
wrmsr
|
||||
|
||||
/* Save guest MSR SPEC_CTRL, low 32 bit is enough */
|
||||
movl $MSR_IA32_SPEC_CTRL,%ecx
|
||||
rdmsr
|
||||
mov %rax,VMX_MACHINE_T_GUEST_SPEC_CTRL_OFFSET(%rsi)
|
||||
movl $SPEC_ENABLE_STIBP,%eax
|
||||
movl $0,%edx
|
||||
wrmsr
|
||||
|
||||
/* stuff rsb by 32 CALLs, make sure no any "ret" is executed before this
|
||||
* stuffing rsb, otherwise, someone may insert some code before this for
|
||||
* future update.
|
||||
*/
|
||||
stuff_rsb:
|
||||
|
||||
/* stuff 32 RSB, rax = 32/2 */
|
||||
mov $16,%rax
|
||||
.align 16
|
||||
3:
|
||||
call 4f
|
||||
33:
|
||||
pause
|
||||
jmp 33b
|
||||
.align 16
|
||||
4:
|
||||
call 5f
|
||||
44:
|
||||
pause
|
||||
jmp 44b
|
||||
.align 16
|
||||
5: dec %rax
|
||||
jnz 3b
|
||||
/* stuff 32 RSB, rsp += 8*32 */
|
||||
add $(8*32),%rsp
|
||||
|
||||
mov $VM_SUCCESS,%rax
|
||||
|
||||
vm_return:
|
||||
/* Return to caller */
|
||||
ret
|
||||
|
1162
hypervisor/arch/x86/vtd.c
Normal file
1162
hypervisor/arch/x86/vtd.c
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user