hv: nested: support for VMPTRLD emulation

This patch emulates the VMPTRLD instruction. L0 hypervisor (ACRN) caches
the VMCS12 that is passed down from the VMPTRLD instruction, and merges it
with VMCS01 to create VMCS02 to run the nested VM.

- Currently ACRN can't cache multiple VMCS12 on one vCPU, so it needs to
  flushes active but not current VMCS12s to L1 guest.
- ACRN creates VMCS02 to run nested VM based on VMCS12:
  1) copy VMCS12 from guest memory to the per vCPU cache VMCS12
  2) initialize VMCS02 revision ID and host-state area
  3) load shadow fields from cache VMCS12 to VMCS02
  4) enable VMCS shadowing before L1 Vm entry

Tracked-On: #5923
Signed-off-by: Sainath Grandhi <sainath.grandhi@intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
This commit is contained in:
Zide Chen 2021-05-08 20:45:28 -07:00 committed by wenlingz
parent 0a1ac2f4a0
commit f5744174b5
8 changed files with 262 additions and 3 deletions

View File

@ -11,8 +11,12 @@
#include <asm/guest/ept.h> #include <asm/guest/ept.h>
#include <asm/guest/vcpu.h> #include <asm/guest/vcpu.h>
#include <asm/guest/vm.h> #include <asm/guest/vm.h>
#include <asm/guest/vmcs.h>
#include <asm/guest/nested.h> #include <asm/guest/nested.h>
/* Cache the content of MSR_IA32_VMX_BASIC */
static uint32_t vmx_basic;
/* The only purpose of this array is to serve the is_vmx_msr() function */ /* The only purpose of this array is to serve the is_vmx_msr() function */
static const uint32_t vmx_msrs[NUM_VMX_MSRS] = { static const uint32_t vmx_msrs[NUM_VMX_MSRS] = {
LIST_OF_VMX_MSRS LIST_OF_VMX_MSRS
@ -224,6 +228,9 @@ int32_t read_vmx_msr(struct acrn_vcpu *vcpu, uint32_t msr, uint64_t *val)
return err; return err;
} }
/* to be shared by all vCPUs for all nested guests */
static uint64_t vmcs_shadowing_bitmap[PAGE_SIZE / sizeof(uint64_t)] __aligned(PAGE_SIZE);
void nested_vmx_result(enum VMXResult result, int error_number) void nested_vmx_result(enum VMXResult result, int error_number)
{ {
uint64_t rflags = exec_vmread(VMX_GUEST_RFLAGS); uint64_t rflags = exec_vmread(VMX_GUEST_RFLAGS);
@ -430,6 +437,7 @@ int32_t vmxon_vmexit_handler(struct acrn_vcpu *vcpu)
} else { } else {
vcpu->arch.nested.vmxon = true; vcpu->arch.nested.vmxon = true;
vcpu->arch.nested.vmxon_ptr = vmptr_gpa; vcpu->arch.nested.vmxon_ptr = vmptr_gpa;
vcpu->arch.nested.current_vmcs12_ptr = INVALID_GPA;
nested_vmx_result(VMsucceed, 0); nested_vmx_result(VMsucceed, 0);
} }
@ -471,9 +479,210 @@ int32_t vmxoff_vmexit_handler(struct acrn_vcpu *vcpu)
{ {
if (check_vmx_permission(vcpu)) { if (check_vmx_permission(vcpu)) {
vcpu->arch.nested.vmxon = false; vcpu->arch.nested.vmxon = false;
vcpu->arch.nested.current_vmcs12_ptr = INVALID_GPA;
(void)memset(vcpu->arch.nested.vmcs02, 0U, PAGE_SIZE);
(void)memset(&vcpu->arch.nested.vmcs12, 0U, sizeof(struct acrn_vmcs12));
nested_vmx_result(VMsucceed, 0); nested_vmx_result(VMsucceed, 0);
} }
return 0; return 0;
} }
/**
* @brief Sync shadow fields from vmcs02 to cache VMCS12
*
* @pre vcpu != NULL
* @pre vmcs02 is current
*/
static void sync_vmcs02_to_vmcs12(__unused struct acrn_vcpu *vcpu)
{
/* Implemented in next patch */
return;
}
/**
* @brief Sync shadow fields from vmcs12 to vmcs02
*
* @pre vcpu != NULL
* @pre vmcs02 is current
*/
static void sync_vmcs12_to_vmcs02(__unused struct acrn_vcpu *vcpu)
{
/* Implemented in next patch */
return;
}
/*
* @pre vcpu != NULL
* @pre vmcs02 is current
*/
static void flush_current_vmcs12(struct acrn_vcpu *vcpu)
{
/*
* Since we have one cache VMCS12 and one active VMCS02 per vCPU,
* at the time of VMCLEAR current VMCS12, or VMPTRLD a new VMCS12
* on this vCPU, we need to sync the shadow fields from VMCS02 to
* cache VMCS12, and save the cache VMCS12 to guest memory.
*/
sync_vmcs02_to_vmcs12(vcpu);
/* flush cached VMCS12 back to L1 guest */
(void)copy_to_gpa(vcpu->vm, (void *)&vcpu->arch.nested.vmcs12,
vcpu->arch.nested.current_vmcs12_ptr, sizeof(struct acrn_vmcs12));
}
/*
* @pre vcpu != NULL
*/
static void set_vmcs02_shadow_indicator(struct acrn_vcpu *vcpu)
{
/* vmcs02 is shadowing */
*((uint32_t*)vcpu->arch.nested.vmcs02) |= VMCS_SHADOW_BIT_INDICATOR;
}
/*
* @pre vcpu != NULL
* @pre vmcs01 is current
*/
static void enable_vmcs_shadowing(struct acrn_vcpu *vcpu)
{
uint32_t val32;
/*
* This method of using the same bitmap for VMRead and VMWrite is not typical.
* Here we assume L1 hypervisor will not erroneously write to Read-Only fields.
* TODO: may use different bitmap to exclude read-only fields from VMWRITE bitmap.
*/
exec_vmwrite(VMX_VMREAD_BITMAP_FULL, hva2hpa(vmcs_shadowing_bitmap));
exec_vmwrite(VMX_VMWRITE_BITMAP_FULL, hva2hpa(vmcs_shadowing_bitmap));
/* Set VMCS shadowing bit in Secondary Proc Exec Controls */
val32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS2);
val32 |= VMX_PROCBASED_CTLS2_VMCS_SHADW;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS2, val32);
/* Set VMCS Link pointer */
exec_vmwrite(VMX_VMS_LINK_PTR_FULL, hva2hpa(vcpu->arch.nested.vmcs02));
}
/*
* @pre vcpu != NULL
* @pre vmcs01 is current
*/
static void disable_vmcs_shadowing(void)
{
uint32_t val32;
/* clear VMCS shadowing bit in Secondary Proc Exec Controls */
val32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS2);
val32 &= ~VMX_PROCBASED_CTLS2_VMCS_SHADW;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS2, val32);
exec_vmwrite(VMX_VMS_LINK_PTR_FULL, ~0UL);
}
/*
* @pre vcpu != NULL
*/
int32_t vmptrld_vmexit_handler(struct acrn_vcpu *vcpu)
{
struct acrn_nested *nested = &vcpu->arch.nested;
uint64_t vmcs12_gpa;
if (check_vmx_permission(vcpu)) {
vmcs12_gpa = get_vmptr_gpa(vcpu);
if (!validate_vmptr_gpa(vmcs12_gpa)) {
nested_vmx_result(VMfailValid, VMXERR_VMPTRLD_INVALID_ADDRESS);
} else if (vmcs12_gpa == nested->vmxon_ptr) {
nested_vmx_result(VMfailValid, VMXERR_VMPTRLD_VMXON_POINTER);
} else if (!validate_vmcs_revision_id(vcpu, vmcs12_gpa)) {
nested_vmx_result(VMfailValid, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
} else if (nested->current_vmcs12_ptr == vmcs12_gpa) {
/* VMPTRLD current VMCS12, do nothing */
nested_vmx_result(VMsucceed, 0);
} else {
if (nested->current_vmcs12_ptr != INVALID_GPA) {
/*
* L1 hypervisor VMPTRLD a new VMCS12, or VMPTRLD a VMCLEARed VMCS12.
* The current VMCS12 remains active but ACRN needs to sync the content of it
* to guest memory so that the new VMCS12 can be loaded to the cache VMCS12.
*/
/*
* Now VMCS02 is active and being used as a shadow VMCS.
* Disable VMCS shadowing to avoid VMCS02 will be loaded by VMPTRLD
* and referenced by VMCS01 as a shadow VMCS simultaneously.
*/
disable_vmcs_shadowing();
/* Flush shadow VMCS to memory */
clear_va_vmcs(nested->vmcs02);
/* VMPTRLD the shadow VMCS so that we are able to sync it to VMCS12 */
load_va_vmcs(nested->vmcs02);
/* Sync shadow VMCS to cache VMCS12, and copy cache VMCS12 to L1 guest */
flush_current_vmcs12(vcpu);
/*
* The current VMCS12 has been flushed out, so that the active VMCS02
* needs to be VMCLEARed as well
*/
clear_va_vmcs(nested->vmcs02);
}
/* Create the VMCS02 based on this new VMCS12 */
/*
* initialize VMCS02
* VMCS revision ID must equal to what reported by IA32_VMX_BASIC MSR
*/
(void)memcpy_s(nested->vmcs02, 4U, (void *)&vmx_basic, 4U);
/*
* Now VMCS02 is not active, set the shadow-VMCS indicator.
* At L1 VM entry, VMCS02 will be referenced as a shadow VMCS.
*/
set_vmcs02_shadow_indicator(vcpu);
/* VMPTRLD VMCS02 so that we can VMWRITE to it */
load_va_vmcs(nested->vmcs02);
init_host_state();
/* Load VMCS12 from L1 guest memory */
(void)copy_from_gpa(vcpu->vm, (void *)&nested->vmcs12, vmcs12_gpa,
sizeof(struct acrn_vmcs12));
/* Need to load shadow fields from this new VMCS12 to VMCS02 */
sync_vmcs12_to_vmcs02(vcpu);
/* Before VMCS02 is being used as a shadow VMCS, VMCLEAR it */
clear_va_vmcs(nested->vmcs02);
/* Switch back to vmcs01 */
load_va_vmcs(vcpu->arch.vmcs);
/* VMCS02 is referenced by VMCS01 Link Pointer */
enable_vmcs_shadowing(vcpu);
nested->current_vmcs12_ptr = vmcs12_gpa;
nested_vmx_result(VMsucceed, 0);
}
}
return 0;
}
void init_nested_vmx(__unused struct acrn_vm *vm)
{
static bool initialized = false;
if (!initialized) {
initialized = true;
/* Cache the value of physical MSR_IA32_VMX_BASIC */
vmx_basic = (uint32_t)msr_read(MSR_IA32_VMX_BASIC);
}
}

View File

@ -567,6 +567,10 @@ int32_t create_vm(uint16_t vm_id, uint64_t pcpu_bitmap, struct acrn_vm_config *v
init_guest_pm(vm); init_guest_pm(vm);
if (is_nvmx_configured(vm)) {
init_nested_vmx(vm);
}
if (!is_lapic_pt_configured(vm)) { if (!is_lapic_pt_configured(vm)) {
vpic_init(vm); vpic_init(vm);
} }

View File

@ -80,7 +80,7 @@ static void init_guest_state(struct acrn_vcpu *vcpu)
ctx->run_ctx.cr4 & ~(CR4_VMXE | CR4_SMXE | CR4_MCE)); ctx->run_ctx.cr4 & ~(CR4_VMXE | CR4_SMXE | CR4_MCE));
} }
static void init_host_state(void) void init_host_state(void)
{ {
uint16_t value16; uint16_t value16;
uint64_t value64; uint64_t value64;

View File

@ -82,8 +82,6 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
.handler = undefined_vmexit_handler}, .handler = undefined_vmexit_handler},
[VMX_EXIT_REASON_VMLAUNCH] = { [VMX_EXIT_REASON_VMLAUNCH] = {
.handler = undefined_vmexit_handler}, .handler = undefined_vmexit_handler},
[VMX_EXIT_REASON_VMPTRLD] = {
.handler = undefined_vmexit_handler},
[VMX_EXIT_REASON_VMPTRST] = { [VMX_EXIT_REASON_VMPTRST] = {
.handler = undefined_vmexit_handler}, .handler = undefined_vmexit_handler},
[VMX_EXIT_REASON_VMREAD] = { [VMX_EXIT_REASON_VMREAD] = {
@ -93,11 +91,16 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_VMWRITE] = { [VMX_EXIT_REASON_VMWRITE] = {
.handler = undefined_vmexit_handler}, .handler = undefined_vmexit_handler},
#ifndef CONFIG_NVMX_ENABLED #ifndef CONFIG_NVMX_ENABLED
[VMX_EXIT_REASON_VMPTRLD] = {
.handler = undefined_vmexit_handler},
[VMX_EXIT_REASON_VMXOFF] = { [VMX_EXIT_REASON_VMXOFF] = {
.handler = undefined_vmexit_handler}, .handler = undefined_vmexit_handler},
[VMX_EXIT_REASON_VMXON] = { [VMX_EXIT_REASON_VMXON] = {
.handler = undefined_vmexit_handler}, .handler = undefined_vmexit_handler},
#else #else
[VMX_EXIT_REASON_VMPTRLD] = {
.handler = vmptrld_vmexit_handler,
.need_exit_qualification = 1},
[VMX_EXIT_REASON_VMXOFF] = { [VMX_EXIT_REASON_VMXOFF] = {
.handler = vmxoff_vmexit_handler}, .handler = vmxoff_vmexit_handler},
[VMX_EXIT_REASON_VMXON] = { [VMX_EXIT_REASON_VMXON] = {

View File

@ -109,6 +109,28 @@ void exec_vmptrld(void *addr)
: "cc", "memory"); : "cc", "memory");
} }
/*
* @pre vcpu != NULL
*/
void load_va_vmcs(const uint8_t *vmcs_va)
{
uint64_t vmcs_pa;
vmcs_pa = hva2hpa(vmcs_va);
exec_vmptrld((void *)&vmcs_pa);
}
/*
* @pre vcpu != NULL
*/
void clear_va_vmcs(const uint8_t *vmcs_va)
{
uint64_t vmcs_pa;
vmcs_pa = hva2hpa(vmcs_va);
exec_vmclear((void *)&vmcs_pa);
}
/** /**
* only run on current pcpu * only run on current pcpu
*/ */

View File

@ -66,7 +66,12 @@ union value_64 {
#define VMX_II_BASE_REG_VALID(v) ((((v) >> 27U) & 0x1U) == 0U) #define VMX_II_BASE_REG_VALID(v) ((((v) >> 27U) & 0x1U) == 0U)
#define VMX_II_REG2(v) (((v) >> 28U) & 0xfU) #define VMX_II_REG2(v) (((v) >> 28U) & 0xfU)
#define VMCS_SHADOW_BIT_INDICATOR (1U << 31U)
/* refer to ISDM: Table 30-1. VM-Instruction Error Numbers */ /* refer to ISDM: Table 30-1. VM-Instruction Error Numbers */
#define VMXERR_VMPTRLD_INVALID_ADDRESS (9)
#define VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID (10)
#define VMXERR_VMPTRLD_VMXON_POINTER (11)
#define VMXERR_VMXON_IN_VMX_ROOT_OPERATION (15) #define VMXERR_VMXON_IN_VMX_ROOT_OPERATION (15)
/* /*
@ -75,6 +80,9 @@ union value_64 {
*/ */
#define VMCS12_REVISION_ID 0x15407E12U #define VMCS12_REVISION_ID 0x15407E12U
/* Implemented in next patch */
struct acrn_vmcs12 {};
enum VMXResult { enum VMXResult {
VMsucceed, VMsucceed,
VMfailValid, VMfailValid,
@ -83,19 +91,25 @@ enum VMXResult {
void nested_vmx_result(enum VMXResult, int error_number); void nested_vmx_result(enum VMXResult, int error_number);
int32_t vmxon_vmexit_handler(struct acrn_vcpu *vcpu); int32_t vmxon_vmexit_handler(struct acrn_vcpu *vcpu);
int32_t vmxoff_vmexit_handler(struct acrn_vcpu *vcpu); int32_t vmxoff_vmexit_handler(struct acrn_vcpu *vcpu);
int32_t vmptrld_vmexit_handler(struct acrn_vcpu *vcpu);
#ifdef CONFIG_NVMX_ENABLED #ifdef CONFIG_NVMX_ENABLED
struct acrn_nested { struct acrn_nested {
uint8_t vmcs02[PAGE_SIZE]; /* VMCS to run L2 and as Link Pointer in VMCS01 */
struct acrn_vmcs12 vmcs12; /* To cache L1's VMCS12*/
uint64_t current_vmcs12_ptr; /* GPA */
uint64_t vmxon_ptr; /* GPA */ uint64_t vmxon_ptr; /* GPA */
bool vmxon; /* To indicate if vCPU entered VMX operation */ bool vmxon; /* To indicate if vCPU entered VMX operation */
} __aligned(PAGE_SIZE); } __aligned(PAGE_SIZE);
void init_nested_vmx(__unused struct acrn_vm *vm);
bool is_vmx_msr(uint32_t msr); bool is_vmx_msr(uint32_t msr);
void init_vmx_msrs(struct acrn_vcpu *vcpu); void init_vmx_msrs(struct acrn_vcpu *vcpu);
int32_t read_vmx_msr(__unused struct acrn_vcpu *vcpu, uint32_t msr, uint64_t *val); int32_t read_vmx_msr(__unused struct acrn_vcpu *vcpu, uint32_t msr, uint64_t *val);
#else #else
struct acrn_nested {}; struct acrn_nested {};
static inline void init_nested_vmx(__unused struct acrn_vm *vm) {}
static inline bool is_vmx_msr(__unused uint32_t msr) static inline bool is_vmx_msr(__unused uint32_t msr)
{ {
/* /*

View File

@ -42,6 +42,7 @@ static inline uint64_t apic_access_offset(uint64_t qual)
} }
void init_vmcs(struct acrn_vcpu *vcpu); void init_vmcs(struct acrn_vcpu *vcpu);
void load_vmcs(const struct acrn_vcpu *vcpu); void load_vmcs(const struct acrn_vcpu *vcpu);
void init_host_state(void);
void switch_apicv_mode_x2apic(struct acrn_vcpu *vcpu); void switch_apicv_mode_x2apic(struct acrn_vcpu *vcpu);
#endif /* ASSEMBLER */ #endif /* ASSEMBLER */

View File

@ -61,6 +61,10 @@
#define VMX_EOI_EXIT2_HIGH 0x00002021U #define VMX_EOI_EXIT2_HIGH 0x00002021U
#define VMX_EOI_EXIT3_FULL 0x00002022U #define VMX_EOI_EXIT3_FULL 0x00002022U
#define VMX_EOI_EXIT3_HIGH 0x00002023U #define VMX_EOI_EXIT3_HIGH 0x00002023U
#define VMX_VMREAD_BITMAP_FULL 0x00002026U
#define VMX_VMREAD_BITMAP_HIGH 0x00002027U
#define VMX_VMWRITE_BITMAP_FULL 0x00002028U
#define VMX_VMWRITE_BITMAP_HIGH 0x00002029U
#define VMX_XSS_EXITING_BITMAP_FULL 0x0000202CU #define VMX_XSS_EXITING_BITMAP_FULL 0x0000202CU
#define VMX_XSS_EXITING_BITMAP_HIGH 0x0000202DU #define VMX_XSS_EXITING_BITMAP_HIGH 0x0000202DU
@ -443,6 +447,8 @@ void exec_vmwrite64(uint32_t field_full, uint64_t value);
void exec_vmclear(void *addr); void exec_vmclear(void *addr);
void exec_vmptrld(void *addr); void exec_vmptrld(void *addr);
void clear_va_vmcs(const uint8_t *vmcs_va);
void load_va_vmcs(const uint8_t *vmcs_va);
void init_cr0_cr4_flexible_bits(void); void init_cr0_cr4_flexible_bits(void);
bool is_valid_cr0_cr4(uint64_t cr0, uint64_t cr4); bool is_valid_cr0_cr4(uint64_t cr0, uint64_t cr4);