acrn-hypervisor/hypervisor/arch/x86/guest/nested.c
Zide Chen 3fdad3c6d1 hv: nested: check prerequisites to enter VMX operation
According to VMXON Instruction Reference, do the following checks in the
virtual hardware environment: vCPU CPL, guest CR0, CR4, revision ID
in VMXON region, etc.

Currently ACRN doesn't support 32-bit L1 hypervisor, and injects an #UD
exception if L1 hypervisor is not running in 64-bit mode.

Tracked-On: #5923
Signed-off-by: Zide Chen <zide.chen@intel.com>
Acked-by: Eddie Dong <eddie.dong@Intel.com>
2021-05-24 10:34:01 +08:00

441 lines
14 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (C) 2021 Intel Corporation.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <types.h>
#include <logmsg.h>
#include <asm/guest/virq.h>
#include <asm/guest/vcpu.h>
#include <asm/guest/vm.h>
#include <asm/guest/nested.h>
/* The only purpose of this array is to serve the is_vmx_msr() function */
static const uint32_t vmx_msrs[NUM_VMX_MSRS] = {
LIST_OF_VMX_MSRS
};
bool is_vmx_msr(uint32_t msr)
{
bool found = false;
uint32_t i;
for (i = 0U; i < NUM_VMX_MSRS; i++) {
if (msr == vmx_msrs[i]) {
found = true;
break;
}
}
return found;
}
static uint64_t adjust_vmx_ctrls(uint32_t msr, uint64_t request_bits)
{
union value_64 val64, msr_val;
/*
* ISDM Appendix A.3, A.4, A.5:
* - Bits 31:0 indicate the allowed 0-settings of these controls.
* bit X of the corresponding VM-execution controls field is allowed to be 0
* if bit X in the MSR is cleared to 0
* - Bits 63:32 indicate the allowed 1-settings of these controls.
* VM entry allows control X to be 1 if bit 32+X in the MSR is set to 1
*/
msr_val.full = msr_read(msr);
/*
* The reserved bits in VMCS Control fields could be 0 or 1, determined by the
* corresponding capability MSR. So need to read them from physical MSR.
*
* We consider the bits that are set in the allowed 0-settings group as the
* minimal set of bits that need to be set from the physical processor's perspective.
* Since we shadow this control field, we passthru the allowed 0-settings bits.
*/
val64.u.lo_32 = msr_val.u.lo_32;
/* allowed 1-settings include those bits are NOT allowed to be 0 */
val64.u.hi_32 = msr_val.u.lo_32;
/* make sure the requested features are supported by hardware */
val64.u.hi_32 |= (msr_val.u.hi_32 & request_bits);
return val64.full;
}
/*
* @pre vcpu != NULL
*/
void init_vmx_msrs(struct acrn_vcpu *vcpu)
{
union value_64 val64;
uint64_t request_bits, msr_value;
if (is_nvmx_configured(vcpu->vm)) {
/* MSR_IA32_VMX_BASIC */
val64.full = VMCS12_REVISION_ID /* Bits 30:0 - VMCS revision ID */
| (4096UL << 32U) /* Bits 44:32 - size of VMXON region and VMCS region */
| (6UL << 50U) /* Bits 53:50 - memory type for VMCS etc. (6: Write Back) */
| (1UL << 54U) /* Bit 54: VM-exit instruction-information for INS and OUTS */
| (1UL << 55U); /* Bit 55: VMX controls that default to 1 may be cleared to 0 */
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_BASIC, val64.full);
/* MSR_IA32_VMX_MISC */
/*
* some bits need to read from physical MSR. For exmaple Bits 4:0 report the relationship between
* the rate of the VMX-preemption timer and that of the timestamp counter (TSC).
*/
val64.full = msr_read(MSR_IA32_VMX_MISC);
val64.u.hi_32 = 0U;
/* Don't support Intel® Processor Trace (Intel PT) in VMX operation */
val64.u.lo_32 &= ~(1U << 14U);
/* Don't support SMM in VMX operation */
val64.u.lo_32 &= ~((1U << 15U) | (1U << 28U));
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_MISC, val64.full);
/*
* TODO: These emulated VMX Control MSRs work for Tiger Lake and Kaby Lake,
* potentially it may have problems if run on other platforms.
*
* We haven't put our best efforts to try to enable as much as features as
* possible.
*/
/* MSR_IA32_VMX_PINBASED_CTLS */
request_bits = VMX_PINBASED_CTLS_IRQ_EXIT
| VMX_PINBASED_CTLS_NMI_EXIT
| VMX_PINBASED_CTLS_ENABLE_PTMR;
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PINBASED_CTLS, request_bits);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS, msr_value);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PINBASED_CTLS, msr_value);
/* MSR_IA32_VMX_PROCBASED_CTLS */
request_bits = VMX_PROCBASED_CTLS_IRQ_WIN | VMX_PROCBASED_CTLS_TSC_OFF
| VMX_PROCBASED_CTLS_HLT | VMX_PROCBASED_CTLS_INVLPG
| VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_RDPMC
| VMX_PROCBASED_CTLS_RDTSC | VMX_PROCBASED_CTLS_CR3_LOAD
| VMX_PROCBASED_CTLS_CR3_STORE | VMX_PROCBASED_CTLS_CR8_LOAD
| VMX_PROCBASED_CTLS_CR8_STORE | VMX_PROCBASED_CTLS_NMI_WINEXIT
| VMX_PROCBASED_CTLS_MOV_DR | VMX_PROCBASED_CTLS_UNCOND_IO
| VMX_PROCBASED_CTLS_MSR_BITMAP | VMX_PROCBASED_CTLS_MONITOR
| VMX_PROCBASED_CTLS_PAUSE | VMX_PROCBASED_CTLS_SECONDARY;
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PROCBASED_CTLS, request_bits);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS, msr_value);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS, msr_value);
/* MSR_IA32_VMX_PROCBASED_CTLS2 */
request_bits = VMX_PROCBASED_CTLS2_EPT | VMX_PROCBASED_CTLS2_RDTSCP
| VMX_PROCBASED_CTLS2_VPID | VMX_PROCBASED_CTLS2_WBINVD
| VMX_PROCBASED_CTLS2_UNRESTRICT | VMX_PROCBASED_CTLS2_PAUSE_LOOP
| VMX_PROCBASED_CTLS2_RDRAND | VMX_PROCBASED_CTLS2_INVPCID
| VMX_PROCBASED_CTLS2_RDSEED | VMX_PROCBASED_CTLS2_XSVE_XRSTR
| VMX_PROCBASED_CTLS2_PT_USE_GPA | VMX_PROCBASED_CTLS2_TSC_SCALING;
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PROCBASED_CTLS2, request_bits);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2, msr_value);
/* MSR_IA32_VMX_EXIT_CTLS */
request_bits = VMX_EXIT_CTLS_SAVE_DBG | VMX_EXIT_CTLS_HOST_ADDR64
| VMX_EXIT_CTLS_ACK_IRQ | VMX_EXIT_CTLS_LOAD_PAT
| VMX_EXIT_CTLS_LOAD_EFER;
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_EXIT_CTLS, request_bits);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_EXIT_CTLS, msr_value);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS, msr_value);
/* MSR_IA32_VMX_ENTRY_CTLS */
request_bits = VMX_ENTRY_CTLS_LOAD_DBG | VMX_ENTRY_CTLS_IA32E_MODE
| VMX_ENTRY_CTLS_LOAD_PERF | VMX_ENTRY_CTLS_LOAD_PAT
| VMX_ENTRY_CTLS_LOAD_EFER;
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_ENTRY_CTLS, request_bits);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_ENTRY_CTLS, msr_value);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS, msr_value);
/* For now passthru the value from physical MSR to L1 guest */
msr_value = msr_read(MSR_IA32_VMX_EPT_VPID_CAP);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, msr_value);
msr_value = msr_read(MSR_IA32_VMX_CR0_FIXED0);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR0_FIXED0, msr_value);
msr_value = msr_read(MSR_IA32_VMX_CR0_FIXED1);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR0_FIXED1, msr_value);
msr_value = msr_read(MSR_IA32_VMX_CR4_FIXED0);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR4_FIXED0, msr_value);
msr_value = msr_read(MSR_IA32_VMX_CR4_FIXED1);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR4_FIXED1, msr_value);
msr_value = msr_read(MSR_IA32_VMX_VMCS_ENUM);
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, msr_value);
}
}
/*
* @pre vcpu != NULL
*/
int32_t read_vmx_msr(struct acrn_vcpu *vcpu, uint32_t msr, uint64_t *val)
{
uint64_t v = 0UL;
int32_t err = 0;
if (is_nvmx_configured(vcpu->vm)) {
switch (msr) {
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
case MSR_IA32_VMX_PROCBASED_CTLS:
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
case MSR_IA32_VMX_PROCBASED_CTLS2:
case MSR_IA32_VMX_EXIT_CTLS:
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
case MSR_IA32_VMX_ENTRY_CTLS:
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
case MSR_IA32_VMX_BASIC:
case MSR_IA32_VMX_MISC:
case MSR_IA32_VMX_EPT_VPID_CAP:
case MSR_IA32_VMX_CR0_FIXED0:
case MSR_IA32_VMX_CR0_FIXED1:
case MSR_IA32_VMX_CR4_FIXED0:
case MSR_IA32_VMX_CR4_FIXED1:
case MSR_IA32_VMX_VMCS_ENUM:
{
v = vcpu_get_guest_msr(vcpu, msr);
break;
}
/* Don't support these MSRs yet */
case MSR_IA32_SMBASE:
case MSR_IA32_VMX_PROCBASED_CTLS3:
case MSR_IA32_VMX_VMFUNC:
default:
err = -EACCES;
break;
}
} else {
err = -EACCES;
}
*val = v;
return err;
}
void nested_vmx_result(enum VMXResult result, int error_number)
{
uint64_t rflags = exec_vmread(VMX_GUEST_RFLAGS);
/* ISDM: section 30.2 CONVENTIONS */
rflags &= ~(RFLAGS_C | RFLAGS_P | RFLAGS_A | RFLAGS_Z | RFLAGS_S | RFLAGS_O);
if (result == VMfailValid) {
rflags |= RFLAGS_Z;
exec_vmwrite(VMX_INSTR_ERROR, error_number);
} else if (result == VMfailInvalid) {
rflags |= RFLAGS_C;
} else {
/* VMsucceed, do nothing */
}
if (result != VMsucceed) {
pr_err("VMX failed: %d/%d", result, error_number);
}
exec_vmwrite(VMX_GUEST_RFLAGS, rflags);
}
/**
* @brief get the memory-address operand of a vmx instruction
*
* @pre vcpu != NULL
*/
static uint64_t get_vmx_memory_operand(struct acrn_vcpu *vcpu, uint32_t instr_info)
{
uint64_t gva, gpa, seg_base = 0UL;
uint32_t seg, err_code = 0U;
uint64_t offset;
/*
* According to ISDM 3B: Basic VM-Exit Information: For INVEPT, INVPCID, INVVPID, LGDT,
* LIDT, LLDT, LTR, SGDT, SIDT, SLDT, STR, VMCLEAR, VMPTRLD, VMPTRST, VMREAD, VMWRITE,
* VMXON, XRSTORS, and XSAVES, the exit qualification receives the value of the instructions
* displacement field, which is sign-extended to 64 bits.
*/
offset = vcpu->arch.exit_qualification;
/* TODO: should we consider the cases of address size (bits 9:7 in instr_info) is 16 or 32? */
/*
* refer to ISDM Vol.1-3-24 Operand addressing on how to calculate an effective address
* offset = base + [index * scale] + displacement
* address = segment_base + offset
*/
if (VMX_II_BASE_REG_VALID(instr_info)) {
offset += vcpu_get_gpreg(vcpu, VMX_II_BASE_REG(instr_info));
}
if (VMX_II_IDX_REG_VALID(instr_info)) {
uint64_t val64 = vcpu_get_gpreg(vcpu, VMX_II_IDX_REG(instr_info));
offset += (val64 << VMX_II_SCALING(instr_info));
}
/*
* In 64-bit mode, the processor treats the segment base of CS, DS, ES, SS as zero,
* creating a linear address that is equal to the effective address.
* The exceptions are the FS and GS segments, whose segment registers can be used as
* additional base registers in some linear address calculations.
*/
seg = VMX_II_SEG_REG(instr_info);
if (seg == 4U) {
seg_base = exec_vmread(VMX_GUEST_FS_BASE);
}
if (seg == 5U) {
seg_base = exec_vmread(VMX_GUEST_GS_BASE);
}
gva = seg_base + offset;
(void)gva2gpa(vcpu, gva, &gpa, &err_code);
return gpa;
}
/*
* @pre vcpu != NULL
*/
static uint64_t get_vmptr_gpa(struct acrn_vcpu *vcpu)
{
uint64_t gpa, vmptr;
/* get VMX pointer, which points to the VMCS or VMXON region GPA */
gpa = get_vmx_memory_operand(vcpu, exec_vmread(VMX_INSTR_INFO));
/* get the address (GPA) of the VMCS for VMPTRLD/VMCLEAR, or VMXON region for VMXON */
(void)copy_from_gpa(vcpu->vm, (void *)&vmptr, gpa, sizeof(uint64_t));
return vmptr;
}
static bool validate_vmptr_gpa(uint64_t vmptr_gpa)
{
/* We don't emulate CPUID.80000008H for guests, so check with physical address width */
struct cpuinfo_x86 *cpu_info = get_pcpu_info();
return (mem_aligned_check(vmptr_gpa, PAGE_SIZE) && ((vmptr_gpa >> cpu_info->phys_bits) == 0UL));
}
/**
* @pre vm != NULL
*/
static bool validate_vmcs_revision_id(struct acrn_vcpu *vcpu, uint64_t vmptr_gpa)
{
uint32_t revision_id;
(void)copy_from_gpa(vcpu->vm, (void *)&revision_id, vmptr_gpa, sizeof(uint32_t));
/*
* VMCS revision ID must equal to what reported by the emulated IA32_VMX_BASIC MSR.
* The MSB of VMCS12_REVISION_ID is always smaller than 31, so the following statement
* implicitly validates revision_id[31] as well.
*/
return (revision_id == VMCS12_REVISION_ID);
}
int32_t get_guest_cpl(void)
{
/*
* We get CPL from SS.DPL because:
*
* CS.DPL could not equal to the CPL for conforming code segments. ISDM 5.5 PRIVILEGE LEVELS:
* Conforming code segments can be accessed from any privilege level that is equal to or
* numerically greater (less privileged) than the DPL of the conforming code segment.
*
* ISDM 24.4.1 Guest Register State: The value of the DPL field for SS is always
* equal to the logical processors current privilege level (CPL).
*/
uint32_t ar = exec_vmread32(VMX_GUEST_SS_ATTR);
return ((ar >> 5) & 3);
}
static bool validate_nvmx_cr0_cr4(uint64_t cr0_4, uint64_t fixed0, uint64_t fixed1)
{
bool valid = true;
/* If bit X is 1 in IA32_VMX_CR0/4_FIXED0, then that bit of CR0/4 is fixed to 1 in VMX operation */
if ((cr0_4 & fixed0) != fixed0) {
valid = false;
}
/* if bit X is 0 in IA32_VMX_CR0/4_FIXED1, then that bit of CR0/4 is fixed to 0 in VMX operation */
/* Bits 63:32 of CR0 and CR4 are reserved and must be written with zeros */
if ((uint32_t)(~cr0_4 & ~fixed1) != (uint32_t)~fixed1) {
valid = false;
}
return valid;
}
/*
* @pre vcpu != NULL
*/
static bool validate_nvmx_cr0(struct acrn_vcpu *vcpu)
{
return validate_nvmx_cr0_cr4(vcpu_get_cr0(vcpu), msr_read(MSR_IA32_VMX_CR0_FIXED0),
msr_read(MSR_IA32_VMX_CR0_FIXED1));
}
/*
* @pre vcpu != NULL
*/
static bool validate_nvmx_cr4(struct acrn_vcpu *vcpu)
{
return validate_nvmx_cr0_cr4(vcpu_get_cr4(vcpu), msr_read(MSR_IA32_VMX_CR4_FIXED0),
msr_read(MSR_IA32_VMX_CR4_FIXED1));
}
/*
* @pre vcpu != NULL
*/
int32_t vmxon_vmexit_handler(struct acrn_vcpu *vcpu)
{
const uint64_t features = MSR_IA32_FEATURE_CONTROL_LOCK | MSR_IA32_FEATURE_CONTROL_VMX_NO_SMX;
uint32_t ar = exec_vmread32(VMX_GUEST_CS_ATTR);
if (is_nvmx_configured(vcpu->vm)) {
if (((vcpu_get_cr0(vcpu) & CR0_PE) == 0UL)
|| ((vcpu_get_cr4(vcpu) & CR4_VMXE) == 0UL)
|| ((vcpu_get_rflags(vcpu) & RFLAGS_VM) != 0U)) {
vcpu_inject_ud(vcpu);
} else if (((vcpu_get_efer(vcpu) & MSR_IA32_EFER_LMA_BIT) == 0U)
|| ((ar & (1U << 13U)) == 0U)) {
/* Current ACRN doesn't support 32 bits L1 hypervisor */
vcpu_inject_ud(vcpu);
} else if ((get_guest_cpl() != 0)
|| !validate_nvmx_cr0(vcpu)
|| !validate_nvmx_cr4(vcpu)
|| ((vcpu_get_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL) & features) != features)) {
vcpu_inject_gp(vcpu, 0U);
} else if (vcpu->arch.nested.vmxon == true) {
nested_vmx_result(VMfailValid, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
} else {
uint64_t vmptr_gpa = get_vmptr_gpa(vcpu);
if (!validate_vmptr_gpa(vmptr_gpa)) {
nested_vmx_result(VMfailInvalid, 0);
} else if (!validate_vmcs_revision_id(vcpu, vmptr_gpa)) {
nested_vmx_result(VMfailInvalid, 0);
} else {
vcpu->arch.nested.vmxon = true;
vcpu->arch.nested.vmxon_ptr = vmptr_gpa;
nested_vmx_result(VMsucceed, 0);
}
}
} else {
vcpu_inject_ud(vcpu);
}
return 0;
}