diff --git a/hypervisor/arch/x86/guest/nested.c b/hypervisor/arch/x86/guest/nested.c index f78f4556c..a9b061f01 100644 --- a/hypervisor/arch/x86/guest/nested.c +++ b/hypervisor/arch/x86/guest/nested.c @@ -11,8 +11,12 @@ #include #include #include +#include #include +/* Cache the content of MSR_IA32_VMX_BASIC */ +static uint32_t vmx_basic; + /* The only purpose of this array is to serve the is_vmx_msr() function */ static const uint32_t vmx_msrs[NUM_VMX_MSRS] = { LIST_OF_VMX_MSRS @@ -224,6 +228,9 @@ int32_t read_vmx_msr(struct acrn_vcpu *vcpu, uint32_t msr, uint64_t *val) return err; } +/* to be shared by all vCPUs for all nested guests */ +static uint64_t vmcs_shadowing_bitmap[PAGE_SIZE / sizeof(uint64_t)] __aligned(PAGE_SIZE); + void nested_vmx_result(enum VMXResult result, int error_number) { uint64_t rflags = exec_vmread(VMX_GUEST_RFLAGS); @@ -430,6 +437,7 @@ int32_t vmxon_vmexit_handler(struct acrn_vcpu *vcpu) } else { vcpu->arch.nested.vmxon = true; vcpu->arch.nested.vmxon_ptr = vmptr_gpa; + vcpu->arch.nested.current_vmcs12_ptr = INVALID_GPA; nested_vmx_result(VMsucceed, 0); } @@ -471,9 +479,210 @@ int32_t vmxoff_vmexit_handler(struct acrn_vcpu *vcpu) { if (check_vmx_permission(vcpu)) { vcpu->arch.nested.vmxon = false; + vcpu->arch.nested.current_vmcs12_ptr = INVALID_GPA; + (void)memset(vcpu->arch.nested.vmcs02, 0U, PAGE_SIZE); + (void)memset(&vcpu->arch.nested.vmcs12, 0U, sizeof(struct acrn_vmcs12)); nested_vmx_result(VMsucceed, 0); } return 0; } + +/** + * @brief Sync shadow fields from vmcs02 to cache VMCS12 + * + * @pre vcpu != NULL + * @pre vmcs02 is current + */ +static void sync_vmcs02_to_vmcs12(__unused struct acrn_vcpu *vcpu) +{ + /* Implemented in next patch */ + return; +} + +/** + * @brief Sync shadow fields from vmcs12 to vmcs02 + * + * @pre vcpu != NULL + * @pre vmcs02 is current + */ +static void sync_vmcs12_to_vmcs02(__unused struct acrn_vcpu *vcpu) +{ + /* Implemented in next patch */ + return; +} + +/* + * @pre vcpu != NULL + * @pre vmcs02 is current + */ +static void flush_current_vmcs12(struct acrn_vcpu *vcpu) +{ + /* + * Since we have one cache VMCS12 and one active VMCS02 per vCPU, + * at the time of VMCLEAR current VMCS12, or VMPTRLD a new VMCS12 + * on this vCPU, we need to sync the shadow fields from VMCS02 to + * cache VMCS12, and save the cache VMCS12 to guest memory. + */ + sync_vmcs02_to_vmcs12(vcpu); + + /* flush cached VMCS12 back to L1 guest */ + (void)copy_to_gpa(vcpu->vm, (void *)&vcpu->arch.nested.vmcs12, + vcpu->arch.nested.current_vmcs12_ptr, sizeof(struct acrn_vmcs12)); +} + +/* + * @pre vcpu != NULL + */ +static void set_vmcs02_shadow_indicator(struct acrn_vcpu *vcpu) +{ + /* vmcs02 is shadowing */ + *((uint32_t*)vcpu->arch.nested.vmcs02) |= VMCS_SHADOW_BIT_INDICATOR; +} + +/* + * @pre vcpu != NULL + * @pre vmcs01 is current + */ +static void enable_vmcs_shadowing(struct acrn_vcpu *vcpu) +{ + uint32_t val32; + + /* + * This method of using the same bitmap for VMRead and VMWrite is not typical. + * Here we assume L1 hypervisor will not erroneously write to Read-Only fields. + * TODO: may use different bitmap to exclude read-only fields from VMWRITE bitmap. + */ + exec_vmwrite(VMX_VMREAD_BITMAP_FULL, hva2hpa(vmcs_shadowing_bitmap)); + exec_vmwrite(VMX_VMWRITE_BITMAP_FULL, hva2hpa(vmcs_shadowing_bitmap)); + + /* Set VMCS shadowing bit in Secondary Proc Exec Controls */ + val32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS2); + val32 |= VMX_PROCBASED_CTLS2_VMCS_SHADW; + exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS2, val32); + + /* Set VMCS Link pointer */ + exec_vmwrite(VMX_VMS_LINK_PTR_FULL, hva2hpa(vcpu->arch.nested.vmcs02)); +} + +/* + * @pre vcpu != NULL + * @pre vmcs01 is current + */ +static void disable_vmcs_shadowing(void) +{ + uint32_t val32; + + /* clear VMCS shadowing bit in Secondary Proc Exec Controls */ + val32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS2); + val32 &= ~VMX_PROCBASED_CTLS2_VMCS_SHADW; + exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS2, val32); + + exec_vmwrite(VMX_VMS_LINK_PTR_FULL, ~0UL); +} + +/* + * @pre vcpu != NULL + */ +int32_t vmptrld_vmexit_handler(struct acrn_vcpu *vcpu) +{ + struct acrn_nested *nested = &vcpu->arch.nested; + uint64_t vmcs12_gpa; + + if (check_vmx_permission(vcpu)) { + vmcs12_gpa = get_vmptr_gpa(vcpu); + + if (!validate_vmptr_gpa(vmcs12_gpa)) { + nested_vmx_result(VMfailValid, VMXERR_VMPTRLD_INVALID_ADDRESS); + } else if (vmcs12_gpa == nested->vmxon_ptr) { + nested_vmx_result(VMfailValid, VMXERR_VMPTRLD_VMXON_POINTER); + } else if (!validate_vmcs_revision_id(vcpu, vmcs12_gpa)) { + nested_vmx_result(VMfailValid, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); + } else if (nested->current_vmcs12_ptr == vmcs12_gpa) { + /* VMPTRLD current VMCS12, do nothing */ + nested_vmx_result(VMsucceed, 0); + } else { + if (nested->current_vmcs12_ptr != INVALID_GPA) { + /* + * L1 hypervisor VMPTRLD a new VMCS12, or VMPTRLD a VMCLEARed VMCS12. + * The current VMCS12 remains active but ACRN needs to sync the content of it + * to guest memory so that the new VMCS12 can be loaded to the cache VMCS12. + */ + + /* + * Now VMCS02 is active and being used as a shadow VMCS. + * Disable VMCS shadowing to avoid VMCS02 will be loaded by VMPTRLD + * and referenced by VMCS01 as a shadow VMCS simultaneously. + */ + disable_vmcs_shadowing(); + + /* Flush shadow VMCS to memory */ + clear_va_vmcs(nested->vmcs02); + + /* VMPTRLD the shadow VMCS so that we are able to sync it to VMCS12 */ + load_va_vmcs(nested->vmcs02); + + /* Sync shadow VMCS to cache VMCS12, and copy cache VMCS12 to L1 guest */ + flush_current_vmcs12(vcpu); + + /* + * The current VMCS12 has been flushed out, so that the active VMCS02 + * needs to be VMCLEARed as well + */ + clear_va_vmcs(nested->vmcs02); + } + + /* Create the VMCS02 based on this new VMCS12 */ + + /* + * initialize VMCS02 + * VMCS revision ID must equal to what reported by IA32_VMX_BASIC MSR + */ + (void)memcpy_s(nested->vmcs02, 4U, (void *)&vmx_basic, 4U); + + /* + * Now VMCS02 is not active, set the shadow-VMCS indicator. + * At L1 VM entry, VMCS02 will be referenced as a shadow VMCS. + */ + set_vmcs02_shadow_indicator(vcpu); + + /* VMPTRLD VMCS02 so that we can VMWRITE to it */ + load_va_vmcs(nested->vmcs02); + init_host_state(); + + /* Load VMCS12 from L1 guest memory */ + (void)copy_from_gpa(vcpu->vm, (void *)&nested->vmcs12, vmcs12_gpa, + sizeof(struct acrn_vmcs12)); + + /* Need to load shadow fields from this new VMCS12 to VMCS02 */ + sync_vmcs12_to_vmcs02(vcpu); + + /* Before VMCS02 is being used as a shadow VMCS, VMCLEAR it */ + clear_va_vmcs(nested->vmcs02); + + /* Switch back to vmcs01 */ + load_va_vmcs(vcpu->arch.vmcs); + + /* VMCS02 is referenced by VMCS01 Link Pointer */ + enable_vmcs_shadowing(vcpu); + + nested->current_vmcs12_ptr = vmcs12_gpa; + nested_vmx_result(VMsucceed, 0); + } + } + + return 0; +} + +void init_nested_vmx(__unused struct acrn_vm *vm) +{ + static bool initialized = false; + + if (!initialized) { + initialized = true; + + /* Cache the value of physical MSR_IA32_VMX_BASIC */ + vmx_basic = (uint32_t)msr_read(MSR_IA32_VMX_BASIC); + } +} diff --git a/hypervisor/arch/x86/guest/vm.c b/hypervisor/arch/x86/guest/vm.c index 3a79fc447..7a13fe5d7 100644 --- a/hypervisor/arch/x86/guest/vm.c +++ b/hypervisor/arch/x86/guest/vm.c @@ -567,6 +567,10 @@ int32_t create_vm(uint16_t vm_id, uint64_t pcpu_bitmap, struct acrn_vm_config *v init_guest_pm(vm); + if (is_nvmx_configured(vm)) { + init_nested_vmx(vm); + } + if (!is_lapic_pt_configured(vm)) { vpic_init(vm); } diff --git a/hypervisor/arch/x86/guest/vmcs.c b/hypervisor/arch/x86/guest/vmcs.c index d5311a4bc..c548f4f8d 100644 --- a/hypervisor/arch/x86/guest/vmcs.c +++ b/hypervisor/arch/x86/guest/vmcs.c @@ -80,7 +80,7 @@ static void init_guest_state(struct acrn_vcpu *vcpu) ctx->run_ctx.cr4 & ~(CR4_VMXE | CR4_SMXE | CR4_MCE)); } -static void init_host_state(void) +void init_host_state(void) { uint16_t value16; uint64_t value64; diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c index 8fa25f5ee..fc01838ad 100644 --- a/hypervisor/arch/x86/guest/vmexit.c +++ b/hypervisor/arch/x86/guest/vmexit.c @@ -82,8 +82,6 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { .handler = undefined_vmexit_handler}, [VMX_EXIT_REASON_VMLAUNCH] = { .handler = undefined_vmexit_handler}, - [VMX_EXIT_REASON_VMPTRLD] = { - .handler = undefined_vmexit_handler}, [VMX_EXIT_REASON_VMPTRST] = { .handler = undefined_vmexit_handler}, [VMX_EXIT_REASON_VMREAD] = { @@ -93,11 +91,16 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { [VMX_EXIT_REASON_VMWRITE] = { .handler = undefined_vmexit_handler}, #ifndef CONFIG_NVMX_ENABLED + [VMX_EXIT_REASON_VMPTRLD] = { + .handler = undefined_vmexit_handler}, [VMX_EXIT_REASON_VMXOFF] = { .handler = undefined_vmexit_handler}, [VMX_EXIT_REASON_VMXON] = { .handler = undefined_vmexit_handler}, #else + [VMX_EXIT_REASON_VMPTRLD] = { + .handler = vmptrld_vmexit_handler, + .need_exit_qualification = 1}, [VMX_EXIT_REASON_VMXOFF] = { .handler = vmxoff_vmexit_handler}, [VMX_EXIT_REASON_VMXON] = { diff --git a/hypervisor/arch/x86/vmx.c b/hypervisor/arch/x86/vmx.c index c37e9d90f..6379ca65d 100644 --- a/hypervisor/arch/x86/vmx.c +++ b/hypervisor/arch/x86/vmx.c @@ -109,6 +109,28 @@ void exec_vmptrld(void *addr) : "cc", "memory"); } +/* + * @pre vcpu != NULL + */ +void load_va_vmcs(const uint8_t *vmcs_va) +{ + uint64_t vmcs_pa; + + vmcs_pa = hva2hpa(vmcs_va); + exec_vmptrld((void *)&vmcs_pa); +} + +/* + * @pre vcpu != NULL + */ +void clear_va_vmcs(const uint8_t *vmcs_va) +{ + uint64_t vmcs_pa; + + vmcs_pa = hva2hpa(vmcs_va); + exec_vmclear((void *)&vmcs_pa); +} + /** * only run on current pcpu */ diff --git a/hypervisor/include/arch/x86/asm/guest/nested.h b/hypervisor/include/arch/x86/asm/guest/nested.h index d8b30f4c4..d5a2bbe4a 100644 --- a/hypervisor/include/arch/x86/asm/guest/nested.h +++ b/hypervisor/include/arch/x86/asm/guest/nested.h @@ -66,7 +66,12 @@ union value_64 { #define VMX_II_BASE_REG_VALID(v) ((((v) >> 27U) & 0x1U) == 0U) #define VMX_II_REG2(v) (((v) >> 28U) & 0xfU) +#define VMCS_SHADOW_BIT_INDICATOR (1U << 31U) + /* refer to ISDM: Table 30-1. VM-Instruction Error Numbers */ +#define VMXERR_VMPTRLD_INVALID_ADDRESS (9) +#define VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID (10) +#define VMXERR_VMPTRLD_VMXON_POINTER (11) #define VMXERR_VMXON_IN_VMX_ROOT_OPERATION (15) /* @@ -75,6 +80,9 @@ union value_64 { */ #define VMCS12_REVISION_ID 0x15407E12U +/* Implemented in next patch */ +struct acrn_vmcs12 {}; + enum VMXResult { VMsucceed, VMfailValid, @@ -83,19 +91,25 @@ enum VMXResult { void nested_vmx_result(enum VMXResult, int error_number); int32_t vmxon_vmexit_handler(struct acrn_vcpu *vcpu); int32_t vmxoff_vmexit_handler(struct acrn_vcpu *vcpu); +int32_t vmptrld_vmexit_handler(struct acrn_vcpu *vcpu); #ifdef CONFIG_NVMX_ENABLED struct acrn_nested { + uint8_t vmcs02[PAGE_SIZE]; /* VMCS to run L2 and as Link Pointer in VMCS01 */ + struct acrn_vmcs12 vmcs12; /* To cache L1's VMCS12*/ + uint64_t current_vmcs12_ptr; /* GPA */ uint64_t vmxon_ptr; /* GPA */ bool vmxon; /* To indicate if vCPU entered VMX operation */ } __aligned(PAGE_SIZE); +void init_nested_vmx(__unused struct acrn_vm *vm); bool is_vmx_msr(uint32_t msr); void init_vmx_msrs(struct acrn_vcpu *vcpu); int32_t read_vmx_msr(__unused struct acrn_vcpu *vcpu, uint32_t msr, uint64_t *val); #else struct acrn_nested {}; +static inline void init_nested_vmx(__unused struct acrn_vm *vm) {} static inline bool is_vmx_msr(__unused uint32_t msr) { /* diff --git a/hypervisor/include/arch/x86/asm/guest/vmcs.h b/hypervisor/include/arch/x86/asm/guest/vmcs.h index b121ca6c0..5391686fa 100644 --- a/hypervisor/include/arch/x86/asm/guest/vmcs.h +++ b/hypervisor/include/arch/x86/asm/guest/vmcs.h @@ -42,6 +42,7 @@ static inline uint64_t apic_access_offset(uint64_t qual) } void init_vmcs(struct acrn_vcpu *vcpu); void load_vmcs(const struct acrn_vcpu *vcpu); +void init_host_state(void); void switch_apicv_mode_x2apic(struct acrn_vcpu *vcpu); #endif /* ASSEMBLER */ diff --git a/hypervisor/include/arch/x86/asm/vmx.h b/hypervisor/include/arch/x86/asm/vmx.h index 2050a72f7..87c3022ca 100644 --- a/hypervisor/include/arch/x86/asm/vmx.h +++ b/hypervisor/include/arch/x86/asm/vmx.h @@ -61,6 +61,10 @@ #define VMX_EOI_EXIT2_HIGH 0x00002021U #define VMX_EOI_EXIT3_FULL 0x00002022U #define VMX_EOI_EXIT3_HIGH 0x00002023U +#define VMX_VMREAD_BITMAP_FULL 0x00002026U +#define VMX_VMREAD_BITMAP_HIGH 0x00002027U +#define VMX_VMWRITE_BITMAP_FULL 0x00002028U +#define VMX_VMWRITE_BITMAP_HIGH 0x00002029U #define VMX_XSS_EXITING_BITMAP_FULL 0x0000202CU #define VMX_XSS_EXITING_BITMAP_HIGH 0x0000202DU @@ -443,6 +447,8 @@ void exec_vmwrite64(uint32_t field_full, uint64_t value); void exec_vmclear(void *addr); void exec_vmptrld(void *addr); +void clear_va_vmcs(const uint8_t *vmcs_va); +void load_va_vmcs(const uint8_t *vmcs_va); void init_cr0_cr4_flexible_bits(void); bool is_valid_cr0_cr4(uint64_t cr0, uint64_t cr4);