mirror of
https://github.com/projectacrn/acrn-hypervisor.git
synced 2025-06-19 20:22:46 +00:00
According to VMXON Instruction Reference, do the following checks in the virtual hardware environment: vCPU CPL, guest CR0, CR4, revision ID in VMXON region, etc. Currently ACRN doesn't support 32-bit L1 hypervisor, and injects an #UD exception if L1 hypervisor is not running in 64-bit mode. Tracked-On: #5923 Signed-off-by: Zide Chen <zide.chen@intel.com> Acked-by: Eddie Dong <eddie.dong@Intel.com>
441 lines
14 KiB
C
441 lines
14 KiB
C
/*
|
||
* Copyright (C) 2021 Intel Corporation.
|
||
*
|
||
* SPDX-License-Identifier: BSD-3-Clause
|
||
*/
|
||
|
||
#include <types.h>
|
||
#include <logmsg.h>
|
||
#include <asm/guest/virq.h>
|
||
#include <asm/guest/vcpu.h>
|
||
#include <asm/guest/vm.h>
|
||
#include <asm/guest/nested.h>
|
||
|
||
/* The only purpose of this array is to serve the is_vmx_msr() function */
|
||
static const uint32_t vmx_msrs[NUM_VMX_MSRS] = {
|
||
LIST_OF_VMX_MSRS
|
||
};
|
||
|
||
bool is_vmx_msr(uint32_t msr)
|
||
{
|
||
bool found = false;
|
||
uint32_t i;
|
||
|
||
for (i = 0U; i < NUM_VMX_MSRS; i++) {
|
||
if (msr == vmx_msrs[i]) {
|
||
found = true;
|
||
break;
|
||
}
|
||
}
|
||
|
||
return found;
|
||
}
|
||
|
||
static uint64_t adjust_vmx_ctrls(uint32_t msr, uint64_t request_bits)
|
||
{
|
||
union value_64 val64, msr_val;
|
||
|
||
/*
|
||
* ISDM Appendix A.3, A.4, A.5:
|
||
* - Bits 31:0 indicate the allowed 0-settings of these controls.
|
||
* bit X of the corresponding VM-execution controls field is allowed to be 0
|
||
* if bit X in the MSR is cleared to 0
|
||
* - Bits 63:32 indicate the allowed 1-settings of these controls.
|
||
* VM entry allows control X to be 1 if bit 32+X in the MSR is set to 1
|
||
*/
|
||
msr_val.full = msr_read(msr);
|
||
|
||
/*
|
||
* The reserved bits in VMCS Control fields could be 0 or 1, determined by the
|
||
* corresponding capability MSR. So need to read them from physical MSR.
|
||
*
|
||
* We consider the bits that are set in the allowed 0-settings group as the
|
||
* minimal set of bits that need to be set from the physical processor's perspective.
|
||
* Since we shadow this control field, we passthru the allowed 0-settings bits.
|
||
*/
|
||
val64.u.lo_32 = msr_val.u.lo_32;
|
||
|
||
/* allowed 1-settings include those bits are NOT allowed to be 0 */
|
||
val64.u.hi_32 = msr_val.u.lo_32;
|
||
|
||
/* make sure the requested features are supported by hardware */
|
||
val64.u.hi_32 |= (msr_val.u.hi_32 & request_bits);
|
||
|
||
return val64.full;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
void init_vmx_msrs(struct acrn_vcpu *vcpu)
|
||
{
|
||
union value_64 val64;
|
||
uint64_t request_bits, msr_value;
|
||
|
||
if (is_nvmx_configured(vcpu->vm)) {
|
||
/* MSR_IA32_VMX_BASIC */
|
||
val64.full = VMCS12_REVISION_ID /* Bits 30:0 - VMCS revision ID */
|
||
| (4096UL << 32U) /* Bits 44:32 - size of VMXON region and VMCS region */
|
||
| (6UL << 50U) /* Bits 53:50 - memory type for VMCS etc. (6: Write Back) */
|
||
| (1UL << 54U) /* Bit 54: VM-exit instruction-information for INS and OUTS */
|
||
| (1UL << 55U); /* Bit 55: VMX controls that default to 1 may be cleared to 0 */
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_BASIC, val64.full);
|
||
|
||
/* MSR_IA32_VMX_MISC */
|
||
|
||
/*
|
||
* some bits need to read from physical MSR. For exmaple Bits 4:0 report the relationship between
|
||
* the rate of the VMX-preemption timer and that of the timestamp counter (TSC).
|
||
*/
|
||
val64.full = msr_read(MSR_IA32_VMX_MISC);
|
||
val64.u.hi_32 = 0U;
|
||
|
||
/* Don't support Intel® Processor Trace (Intel PT) in VMX operation */
|
||
val64.u.lo_32 &= ~(1U << 14U);
|
||
|
||
/* Don't support SMM in VMX operation */
|
||
val64.u.lo_32 &= ~((1U << 15U) | (1U << 28U));
|
||
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_MISC, val64.full);
|
||
|
||
/*
|
||
* TODO: These emulated VMX Control MSRs work for Tiger Lake and Kaby Lake,
|
||
* potentially it may have problems if run on other platforms.
|
||
*
|
||
* We haven't put our best efforts to try to enable as much as features as
|
||
* possible.
|
||
*/
|
||
|
||
/* MSR_IA32_VMX_PINBASED_CTLS */
|
||
request_bits = VMX_PINBASED_CTLS_IRQ_EXIT
|
||
| VMX_PINBASED_CTLS_NMI_EXIT
|
||
| VMX_PINBASED_CTLS_ENABLE_PTMR;
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PINBASED_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS, msr_value);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PINBASED_CTLS, msr_value);
|
||
|
||
/* MSR_IA32_VMX_PROCBASED_CTLS */
|
||
request_bits = VMX_PROCBASED_CTLS_IRQ_WIN | VMX_PROCBASED_CTLS_TSC_OFF
|
||
| VMX_PROCBASED_CTLS_HLT | VMX_PROCBASED_CTLS_INVLPG
|
||
| VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_RDPMC
|
||
| VMX_PROCBASED_CTLS_RDTSC | VMX_PROCBASED_CTLS_CR3_LOAD
|
||
| VMX_PROCBASED_CTLS_CR3_STORE | VMX_PROCBASED_CTLS_CR8_LOAD
|
||
| VMX_PROCBASED_CTLS_CR8_STORE | VMX_PROCBASED_CTLS_NMI_WINEXIT
|
||
| VMX_PROCBASED_CTLS_MOV_DR | VMX_PROCBASED_CTLS_UNCOND_IO
|
||
| VMX_PROCBASED_CTLS_MSR_BITMAP | VMX_PROCBASED_CTLS_MONITOR
|
||
| VMX_PROCBASED_CTLS_PAUSE | VMX_PROCBASED_CTLS_SECONDARY;
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PROCBASED_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS, msr_value);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS, msr_value);
|
||
|
||
/* MSR_IA32_VMX_PROCBASED_CTLS2 */
|
||
request_bits = VMX_PROCBASED_CTLS2_EPT | VMX_PROCBASED_CTLS2_RDTSCP
|
||
| VMX_PROCBASED_CTLS2_VPID | VMX_PROCBASED_CTLS2_WBINVD
|
||
| VMX_PROCBASED_CTLS2_UNRESTRICT | VMX_PROCBASED_CTLS2_PAUSE_LOOP
|
||
| VMX_PROCBASED_CTLS2_RDRAND | VMX_PROCBASED_CTLS2_INVPCID
|
||
| VMX_PROCBASED_CTLS2_RDSEED | VMX_PROCBASED_CTLS2_XSVE_XRSTR
|
||
| VMX_PROCBASED_CTLS2_PT_USE_GPA | VMX_PROCBASED_CTLS2_TSC_SCALING;
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PROCBASED_CTLS2, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2, msr_value);
|
||
|
||
/* MSR_IA32_VMX_EXIT_CTLS */
|
||
request_bits = VMX_EXIT_CTLS_SAVE_DBG | VMX_EXIT_CTLS_HOST_ADDR64
|
||
| VMX_EXIT_CTLS_ACK_IRQ | VMX_EXIT_CTLS_LOAD_PAT
|
||
| VMX_EXIT_CTLS_LOAD_EFER;
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_EXIT_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_EXIT_CTLS, msr_value);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS, msr_value);
|
||
|
||
/* MSR_IA32_VMX_ENTRY_CTLS */
|
||
request_bits = VMX_ENTRY_CTLS_LOAD_DBG | VMX_ENTRY_CTLS_IA32E_MODE
|
||
| VMX_ENTRY_CTLS_LOAD_PERF | VMX_ENTRY_CTLS_LOAD_PAT
|
||
| VMX_ENTRY_CTLS_LOAD_EFER;
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_ENTRY_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_ENTRY_CTLS, msr_value);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS, msr_value);
|
||
|
||
/* For now passthru the value from physical MSR to L1 guest */
|
||
msr_value = msr_read(MSR_IA32_VMX_EPT_VPID_CAP);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, msr_value);
|
||
|
||
msr_value = msr_read(MSR_IA32_VMX_CR0_FIXED0);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR0_FIXED0, msr_value);
|
||
|
||
msr_value = msr_read(MSR_IA32_VMX_CR0_FIXED1);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR0_FIXED1, msr_value);
|
||
|
||
msr_value = msr_read(MSR_IA32_VMX_CR4_FIXED0);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR4_FIXED0, msr_value);
|
||
|
||
msr_value = msr_read(MSR_IA32_VMX_CR4_FIXED1);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR4_FIXED1, msr_value);
|
||
|
||
msr_value = msr_read(MSR_IA32_VMX_VMCS_ENUM);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, msr_value);
|
||
}
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
int32_t read_vmx_msr(struct acrn_vcpu *vcpu, uint32_t msr, uint64_t *val)
|
||
{
|
||
uint64_t v = 0UL;
|
||
int32_t err = 0;
|
||
|
||
if (is_nvmx_configured(vcpu->vm)) {
|
||
switch (msr) {
|
||
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
|
||
case MSR_IA32_VMX_PINBASED_CTLS:
|
||
case MSR_IA32_VMX_PROCBASED_CTLS:
|
||
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
|
||
case MSR_IA32_VMX_PROCBASED_CTLS2:
|
||
case MSR_IA32_VMX_EXIT_CTLS:
|
||
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
|
||
case MSR_IA32_VMX_ENTRY_CTLS:
|
||
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
|
||
case MSR_IA32_VMX_BASIC:
|
||
case MSR_IA32_VMX_MISC:
|
||
case MSR_IA32_VMX_EPT_VPID_CAP:
|
||
case MSR_IA32_VMX_CR0_FIXED0:
|
||
case MSR_IA32_VMX_CR0_FIXED1:
|
||
case MSR_IA32_VMX_CR4_FIXED0:
|
||
case MSR_IA32_VMX_CR4_FIXED1:
|
||
case MSR_IA32_VMX_VMCS_ENUM:
|
||
{
|
||
v = vcpu_get_guest_msr(vcpu, msr);
|
||
break;
|
||
}
|
||
/* Don't support these MSRs yet */
|
||
case MSR_IA32_SMBASE:
|
||
case MSR_IA32_VMX_PROCBASED_CTLS3:
|
||
case MSR_IA32_VMX_VMFUNC:
|
||
default:
|
||
err = -EACCES;
|
||
break;
|
||
}
|
||
} else {
|
||
err = -EACCES;
|
||
}
|
||
|
||
*val = v;
|
||
return err;
|
||
}
|
||
|
||
void nested_vmx_result(enum VMXResult result, int error_number)
|
||
{
|
||
uint64_t rflags = exec_vmread(VMX_GUEST_RFLAGS);
|
||
|
||
/* ISDM: section 30.2 CONVENTIONS */
|
||
rflags &= ~(RFLAGS_C | RFLAGS_P | RFLAGS_A | RFLAGS_Z | RFLAGS_S | RFLAGS_O);
|
||
|
||
if (result == VMfailValid) {
|
||
rflags |= RFLAGS_Z;
|
||
exec_vmwrite(VMX_INSTR_ERROR, error_number);
|
||
} else if (result == VMfailInvalid) {
|
||
rflags |= RFLAGS_C;
|
||
} else {
|
||
/* VMsucceed, do nothing */
|
||
}
|
||
|
||
if (result != VMsucceed) {
|
||
pr_err("VMX failed: %d/%d", result, error_number);
|
||
}
|
||
|
||
exec_vmwrite(VMX_GUEST_RFLAGS, rflags);
|
||
}
|
||
|
||
/**
|
||
* @brief get the memory-address operand of a vmx instruction
|
||
*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static uint64_t get_vmx_memory_operand(struct acrn_vcpu *vcpu, uint32_t instr_info)
|
||
{
|
||
uint64_t gva, gpa, seg_base = 0UL;
|
||
uint32_t seg, err_code = 0U;
|
||
uint64_t offset;
|
||
|
||
/*
|
||
* According to ISDM 3B: Basic VM-Exit Information: For INVEPT, INVPCID, INVVPID, LGDT,
|
||
* LIDT, LLDT, LTR, SGDT, SIDT, SLDT, STR, VMCLEAR, VMPTRLD, VMPTRST, VMREAD, VMWRITE,
|
||
* VMXON, XRSTORS, and XSAVES, the exit qualification receives the value of the instruction’s
|
||
* displacement field, which is sign-extended to 64 bits.
|
||
*/
|
||
offset = vcpu->arch.exit_qualification;
|
||
|
||
/* TODO: should we consider the cases of address size (bits 9:7 in instr_info) is 16 or 32? */
|
||
|
||
/*
|
||
* refer to ISDM Vol.1-3-24 Operand addressing on how to calculate an effective address
|
||
* offset = base + [index * scale] + displacement
|
||
* address = segment_base + offset
|
||
*/
|
||
if (VMX_II_BASE_REG_VALID(instr_info)) {
|
||
offset += vcpu_get_gpreg(vcpu, VMX_II_BASE_REG(instr_info));
|
||
}
|
||
|
||
if (VMX_II_IDX_REG_VALID(instr_info)) {
|
||
uint64_t val64 = vcpu_get_gpreg(vcpu, VMX_II_IDX_REG(instr_info));
|
||
offset += (val64 << VMX_II_SCALING(instr_info));
|
||
}
|
||
|
||
/*
|
||
* In 64-bit mode, the processor treats the segment base of CS, DS, ES, SS as zero,
|
||
* creating a linear address that is equal to the effective address.
|
||
* The exceptions are the FS and GS segments, whose segment registers can be used as
|
||
* additional base registers in some linear address calculations.
|
||
*/
|
||
seg = VMX_II_SEG_REG(instr_info);
|
||
if (seg == 4U) {
|
||
seg_base = exec_vmread(VMX_GUEST_FS_BASE);
|
||
}
|
||
|
||
if (seg == 5U) {
|
||
seg_base = exec_vmread(VMX_GUEST_GS_BASE);
|
||
}
|
||
|
||
gva = seg_base + offset;
|
||
(void)gva2gpa(vcpu, gva, &gpa, &err_code);
|
||
|
||
return gpa;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static uint64_t get_vmptr_gpa(struct acrn_vcpu *vcpu)
|
||
{
|
||
uint64_t gpa, vmptr;
|
||
|
||
/* get VMX pointer, which points to the VMCS or VMXON region GPA */
|
||
gpa = get_vmx_memory_operand(vcpu, exec_vmread(VMX_INSTR_INFO));
|
||
|
||
/* get the address (GPA) of the VMCS for VMPTRLD/VMCLEAR, or VMXON region for VMXON */
|
||
(void)copy_from_gpa(vcpu->vm, (void *)&vmptr, gpa, sizeof(uint64_t));
|
||
|
||
return vmptr;
|
||
}
|
||
|
||
static bool validate_vmptr_gpa(uint64_t vmptr_gpa)
|
||
{
|
||
/* We don't emulate CPUID.80000008H for guests, so check with physical address width */
|
||
struct cpuinfo_x86 *cpu_info = get_pcpu_info();
|
||
|
||
return (mem_aligned_check(vmptr_gpa, PAGE_SIZE) && ((vmptr_gpa >> cpu_info->phys_bits) == 0UL));
|
||
}
|
||
|
||
/**
|
||
* @pre vm != NULL
|
||
*/
|
||
static bool validate_vmcs_revision_id(struct acrn_vcpu *vcpu, uint64_t vmptr_gpa)
|
||
{
|
||
uint32_t revision_id;
|
||
|
||
(void)copy_from_gpa(vcpu->vm, (void *)&revision_id, vmptr_gpa, sizeof(uint32_t));
|
||
|
||
/*
|
||
* VMCS revision ID must equal to what reported by the emulated IA32_VMX_BASIC MSR.
|
||
* The MSB of VMCS12_REVISION_ID is always smaller than 31, so the following statement
|
||
* implicitly validates revision_id[31] as well.
|
||
*/
|
||
return (revision_id == VMCS12_REVISION_ID);
|
||
}
|
||
|
||
int32_t get_guest_cpl(void)
|
||
{
|
||
/*
|
||
* We get CPL from SS.DPL because:
|
||
*
|
||
* CS.DPL could not equal to the CPL for conforming code segments. ISDM 5.5 PRIVILEGE LEVELS:
|
||
* Conforming code segments can be accessed from any privilege level that is equal to or
|
||
* numerically greater (less privileged) than the DPL of the conforming code segment.
|
||
*
|
||
* ISDM 24.4.1 Guest Register State: The value of the DPL field for SS is always
|
||
* equal to the logical processor’s current privilege level (CPL).
|
||
*/
|
||
uint32_t ar = exec_vmread32(VMX_GUEST_SS_ATTR);
|
||
return ((ar >> 5) & 3);
|
||
}
|
||
|
||
static bool validate_nvmx_cr0_cr4(uint64_t cr0_4, uint64_t fixed0, uint64_t fixed1)
|
||
{
|
||
bool valid = true;
|
||
|
||
/* If bit X is 1 in IA32_VMX_CR0/4_FIXED0, then that bit of CR0/4 is fixed to 1 in VMX operation */
|
||
if ((cr0_4 & fixed0) != fixed0) {
|
||
valid = false;
|
||
}
|
||
|
||
/* if bit X is 0 in IA32_VMX_CR0/4_FIXED1, then that bit of CR0/4 is fixed to 0 in VMX operation */
|
||
/* Bits 63:32 of CR0 and CR4 are reserved and must be written with zeros */
|
||
if ((uint32_t)(~cr0_4 & ~fixed1) != (uint32_t)~fixed1) {
|
||
valid = false;
|
||
}
|
||
|
||
return valid;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static bool validate_nvmx_cr0(struct acrn_vcpu *vcpu)
|
||
{
|
||
return validate_nvmx_cr0_cr4(vcpu_get_cr0(vcpu), msr_read(MSR_IA32_VMX_CR0_FIXED0),
|
||
msr_read(MSR_IA32_VMX_CR0_FIXED1));
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static bool validate_nvmx_cr4(struct acrn_vcpu *vcpu)
|
||
{
|
||
return validate_nvmx_cr0_cr4(vcpu_get_cr4(vcpu), msr_read(MSR_IA32_VMX_CR4_FIXED0),
|
||
msr_read(MSR_IA32_VMX_CR4_FIXED1));
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
int32_t vmxon_vmexit_handler(struct acrn_vcpu *vcpu)
|
||
{
|
||
const uint64_t features = MSR_IA32_FEATURE_CONTROL_LOCK | MSR_IA32_FEATURE_CONTROL_VMX_NO_SMX;
|
||
uint32_t ar = exec_vmread32(VMX_GUEST_CS_ATTR);
|
||
|
||
if (is_nvmx_configured(vcpu->vm)) {
|
||
if (((vcpu_get_cr0(vcpu) & CR0_PE) == 0UL)
|
||
|| ((vcpu_get_cr4(vcpu) & CR4_VMXE) == 0UL)
|
||
|| ((vcpu_get_rflags(vcpu) & RFLAGS_VM) != 0U)) {
|
||
vcpu_inject_ud(vcpu);
|
||
} else if (((vcpu_get_efer(vcpu) & MSR_IA32_EFER_LMA_BIT) == 0U)
|
||
|| ((ar & (1U << 13U)) == 0U)) {
|
||
/* Current ACRN doesn't support 32 bits L1 hypervisor */
|
||
vcpu_inject_ud(vcpu);
|
||
} else if ((get_guest_cpl() != 0)
|
||
|| !validate_nvmx_cr0(vcpu)
|
||
|| !validate_nvmx_cr4(vcpu)
|
||
|| ((vcpu_get_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL) & features) != features)) {
|
||
vcpu_inject_gp(vcpu, 0U);
|
||
} else if (vcpu->arch.nested.vmxon == true) {
|
||
nested_vmx_result(VMfailValid, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
|
||
} else {
|
||
uint64_t vmptr_gpa = get_vmptr_gpa(vcpu);
|
||
|
||
if (!validate_vmptr_gpa(vmptr_gpa)) {
|
||
nested_vmx_result(VMfailInvalid, 0);
|
||
} else if (!validate_vmcs_revision_id(vcpu, vmptr_gpa)) {
|
||
nested_vmx_result(VMfailInvalid, 0);
|
||
} else {
|
||
vcpu->arch.nested.vmxon = true;
|
||
vcpu->arch.nested.vmxon_ptr = vmptr_gpa;
|
||
|
||
nested_vmx_result(VMsucceed, 0);
|
||
}
|
||
}
|
||
} else {
|
||
vcpu_inject_ud(vcpu);
|
||
}
|
||
|
||
return 0;
|
||
}
|