From c9982e8c7e4151cebe6d4632f44dcc2cf5b57846 Mon Sep 17 00:00:00 2001 From: Zide Chen Date: Wed, 12 May 2021 20:51:55 -0700 Subject: [PATCH] hv: nested: setup emulated VMX MSRs We emulated these MSRs: - MSR_IA32_VMX_PINBASED_CTLS - MSR_IA32_VMX_PROCBASED_CTLS - MSR_IA32_VMX_PROCBASED_CTLS2 - MSR_IA32_VMX_EXIT_CTLS - MSR_IA32_VMX_ENTRY_CTLS - MSR_IA32_VMX_BASIC: emulate VMCS revision ID, etc. - MSR_IA32_VMX_MISC For the following MSRs, we pass through the physical value to L1 guests: - MSR_IA32_VMX_EPT_VPID_CAP - MSR_IA32_VMX_VMCS_ENUM - MSR_IA32_VMX_CR0_FIXED0 - MSR_IA32_VMX_CR0_FIXED1 - MSR_IA32_VMX_CR4_FIXED0 - MSR_IA32_VMX_CR4_FIXED1 Tracked-On: #5923 Signed-off-by: Zide Chen Signed-off-by: Sainath Grandhi Acked-by: Eddie Dong --- hypervisor/arch/x86/guest/nested.c | 179 +++++++++++++++++- .../include/arch/x86/asm/guest/nested.h | 15 ++ hypervisor/include/arch/x86/asm/vmx.h | 5 + 3 files changed, 192 insertions(+), 7 deletions(-) diff --git a/hypervisor/arch/x86/guest/nested.c b/hypervisor/arch/x86/guest/nested.c index 98e288579..8a1d23e3a 100644 --- a/hypervisor/arch/x86/guest/nested.c +++ b/hypervisor/arch/x86/guest/nested.c @@ -30,24 +30,189 @@ bool is_vmx_msr(uint32_t msr) return found; } -/* - * @pre vcpu != NULL - */ -void init_vmx_msrs(__unused struct acrn_vcpu *vcpu) +static uint64_t adjust_vmx_ctrls(uint32_t msr, uint64_t request_bits) { - /* implemented in next patch */ + union value_64 val64, msr_val; + + /* + * ISDM Appendix A.3, A.4, A.5: + * - Bits 31:0 indicate the allowed 0-settings of these controls. + * bit X of the corresponding VM-execution controls field is allowed to be 0 + * if bit X in the MSR is cleared to 0 + * - Bits 63:32 indicate the allowed 1-settings of these controls. + * VM entry allows control X to be 1 if bit 32+X in the MSR is set to 1 + */ + msr_val.full = msr_read(msr); + + /* + * The reserved bits in VMCS Control fields could be 0 or 1, determined by the + * corresponding capability MSR. So need to read them from physical MSR. + * + * We consider the bits that are set in the allowed 0-settings group as the + * minimal set of bits that need to be set from the physical processor's perspective. + * Since we shadow this control field, we passthru the allowed 0-settings bits. + */ + val64.u.lo_32 = msr_val.u.lo_32; + + /* allowed 1-settings include those bits are NOT allowed to be 0 */ + val64.u.hi_32 = msr_val.u.lo_32; + + /* make sure the requested features are supported by hardware */ + val64.u.hi_32 |= (msr_val.u.hi_32 & request_bits); + + return val64.full; } /* * @pre vcpu != NULL */ -int32_t read_vmx_msr(struct acrn_vcpu *vcpu, __unused uint32_t msr, uint64_t *val) +void init_vmx_msrs(struct acrn_vcpu *vcpu) +{ + union value_64 val64; + uint64_t request_bits, msr_value; + + if (is_nvmx_configured(vcpu->vm)) { + /* MSR_IA32_VMX_BASIC */ + val64.full = VMCS12_REVISION_ID /* Bits 30:0 - VMCS revision ID */ + | (4096UL << 32U) /* Bits 44:32 - size of VMXON region and VMCS region */ + | (6UL << 50U) /* Bits 53:50 - memory type for VMCS etc. (6: Write Back) */ + | (1UL << 54U) /* Bit 54: VM-exit instruction-information for INS and OUTS */ + | (1UL << 55U); /* Bit 55: VMX controls that default to 1 may be cleared to 0 */ + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_BASIC, val64.full); + + /* MSR_IA32_VMX_MISC */ + + /* + * some bits need to read from physical MSR. For exmaple Bits 4:0 report the relationship between + * the rate of the VMX-preemption timer and that of the timestamp counter (TSC). + */ + val64.full = msr_read(MSR_IA32_VMX_MISC); + val64.u.hi_32 = 0U; + + /* Don't support IntelĀ® Processor Trace (Intel PT) in VMX operation */ + val64.u.lo_32 &= ~(1U << 14U); + + /* Don't support SMM in VMX operation */ + val64.u.lo_32 &= ~((1U << 15U) | (1U << 28U)); + + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_MISC, val64.full); + + /* + * TODO: These emulated VMX Control MSRs work for Tiger Lake and Kaby Lake, + * potentially it may have problems if run on other platforms. + * + * We haven't put our best efforts to try to enable as much as features as + * possible. + */ + + /* MSR_IA32_VMX_PINBASED_CTLS */ + request_bits = VMX_PINBASED_CTLS_IRQ_EXIT + | VMX_PINBASED_CTLS_NMI_EXIT + | VMX_PINBASED_CTLS_ENABLE_PTMR; + msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PINBASED_CTLS, request_bits); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS, msr_value); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PINBASED_CTLS, msr_value); + + /* MSR_IA32_VMX_PROCBASED_CTLS */ + request_bits = VMX_PROCBASED_CTLS_IRQ_WIN | VMX_PROCBASED_CTLS_TSC_OFF + | VMX_PROCBASED_CTLS_HLT | VMX_PROCBASED_CTLS_INVLPG + | VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_RDPMC + | VMX_PROCBASED_CTLS_RDTSC | VMX_PROCBASED_CTLS_CR3_LOAD + | VMX_PROCBASED_CTLS_CR3_STORE | VMX_PROCBASED_CTLS_CR8_LOAD + | VMX_PROCBASED_CTLS_CR8_STORE | VMX_PROCBASED_CTLS_NMI_WINEXIT + | VMX_PROCBASED_CTLS_MOV_DR | VMX_PROCBASED_CTLS_UNCOND_IO + | VMX_PROCBASED_CTLS_MSR_BITMAP | VMX_PROCBASED_CTLS_MONITOR + | VMX_PROCBASED_CTLS_PAUSE | VMX_PROCBASED_CTLS_SECONDARY; + msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PROCBASED_CTLS, request_bits); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS, msr_value); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS, msr_value); + + /* MSR_IA32_VMX_PROCBASED_CTLS2 */ + request_bits = VMX_PROCBASED_CTLS2_EPT | VMX_PROCBASED_CTLS2_RDTSCP + | VMX_PROCBASED_CTLS2_VPID | VMX_PROCBASED_CTLS2_WBINVD + | VMX_PROCBASED_CTLS2_UNRESTRICT | VMX_PROCBASED_CTLS2_PAUSE_LOOP + | VMX_PROCBASED_CTLS2_RDRAND | VMX_PROCBASED_CTLS2_INVPCID + | VMX_PROCBASED_CTLS2_RDSEED | VMX_PROCBASED_CTLS2_XSVE_XRSTR + | VMX_PROCBASED_CTLS2_PT_USE_GPA | VMX_PROCBASED_CTLS2_TSC_SCALING; + msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PROCBASED_CTLS2, request_bits); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2, msr_value); + + /* MSR_IA32_VMX_EXIT_CTLS */ + request_bits = VMX_EXIT_CTLS_SAVE_DBG | VMX_EXIT_CTLS_HOST_ADDR64 + | VMX_EXIT_CTLS_ACK_IRQ | VMX_EXIT_CTLS_LOAD_PAT + | VMX_EXIT_CTLS_LOAD_EFER; + msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_EXIT_CTLS, request_bits); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_EXIT_CTLS, msr_value); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS, msr_value); + + /* MSR_IA32_VMX_ENTRY_CTLS */ + request_bits = VMX_ENTRY_CTLS_LOAD_DBG | VMX_ENTRY_CTLS_IA32E_MODE + | VMX_ENTRY_CTLS_LOAD_PERF | VMX_ENTRY_CTLS_LOAD_PAT + | VMX_ENTRY_CTLS_LOAD_EFER; + msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_ENTRY_CTLS, request_bits); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_ENTRY_CTLS, msr_value); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS, msr_value); + + /* For now passthru the value from physical MSR to L1 guest */ + msr_value = msr_read(MSR_IA32_VMX_EPT_VPID_CAP); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, msr_value); + + msr_value = msr_read(MSR_IA32_VMX_CR0_FIXED0); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR0_FIXED0, msr_value); + + msr_value = msr_read(MSR_IA32_VMX_CR0_FIXED1); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR0_FIXED1, msr_value); + + msr_value = msr_read(MSR_IA32_VMX_CR4_FIXED0); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR4_FIXED0, msr_value); + + msr_value = msr_read(MSR_IA32_VMX_CR4_FIXED1); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR4_FIXED1, msr_value); + + msr_value = msr_read(MSR_IA32_VMX_VMCS_ENUM); + vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, msr_value); + } +} + +/* + * @pre vcpu != NULL + */ +int32_t read_vmx_msr(struct acrn_vcpu *vcpu, uint32_t msr, uint64_t *val) { uint64_t v = 0UL; int32_t err = 0; if (is_nvmx_configured(vcpu->vm)) { - /* implemented in next patch */ + switch (msr) { + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: + case MSR_IA32_VMX_PINBASED_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS: + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS2: + case MSR_IA32_VMX_EXIT_CTLS: + case MSR_IA32_VMX_TRUE_EXIT_CTLS: + case MSR_IA32_VMX_ENTRY_CTLS: + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: + case MSR_IA32_VMX_BASIC: + case MSR_IA32_VMX_MISC: + case MSR_IA32_VMX_EPT_VPID_CAP: + case MSR_IA32_VMX_CR0_FIXED0: + case MSR_IA32_VMX_CR0_FIXED1: + case MSR_IA32_VMX_CR4_FIXED0: + case MSR_IA32_VMX_CR4_FIXED1: + case MSR_IA32_VMX_VMCS_ENUM: + { + v = vcpu_get_guest_msr(vcpu, msr); + break; + } + /* Don't support these MSRs yet */ + case MSR_IA32_SMBASE: + case MSR_IA32_VMX_PROCBASED_CTLS3: + case MSR_IA32_VMX_VMFUNC: + default: + err = -EACCES; + break; + } } else { err = -EACCES; } diff --git a/hypervisor/include/arch/x86/asm/guest/nested.h b/hypervisor/include/arch/x86/asm/guest/nested.h index ccc470490..930ba99cc 100644 --- a/hypervisor/include/arch/x86/asm/guest/nested.h +++ b/hypervisor/include/arch/x86/asm/guest/nested.h @@ -8,6 +8,15 @@ #include +/* helper data structure to make VMX capability MSR manipulation easier */ +union value_64 { + uint64_t full; + struct { + uint32_t lo_32; + uint32_t hi_32; + } u; +}; + /* * Following MSRs are supported if nested virtualization is enabled * - If CONFIG_NVMX_ENABLED is set, these MSRs are included in emulated_guest_msrs[] @@ -36,6 +45,12 @@ MSR_IA32_VMX_VMFUNC, \ MSR_IA32_VMX_PROCBASED_CTLS3 +/* + * This VMCS12 revision id is chosen arbitrarily. + * The emulated MSR_IA32_VMX_BASIC returns this ID in bits 30:0. + */ +#define VMCS12_REVISION_ID 0x15407E12U + #ifdef CONFIG_NVMX_ENABLED bool is_vmx_msr(uint32_t msr); void init_vmx_msrs(struct acrn_vcpu *vcpu); diff --git a/hypervisor/include/arch/x86/asm/vmx.h b/hypervisor/include/arch/x86/asm/vmx.h index ab19babd1..2050a72f7 100644 --- a/hypervisor/include/arch/x86/asm/vmx.h +++ b/hypervisor/include/arch/x86/asm/vmx.h @@ -314,10 +314,15 @@ #define VMX_PROCBASED_CTLS2_INVPCID (1U<<12U) #define VMX_PROCBASED_CTLS2_VM_FUNCS (1U<<13U) #define VMX_PROCBASED_CTLS2_VMCS_SHADW (1U<<14U) +#define VMX_PROCBASED_CTLS2_ENCLS_EXIT (1U<<15U) #define VMX_PROCBASED_CTLS2_RDSEED (1U<<16U) +#define VMX_PROCBASED_CTLS2_PML (1U<<17U) #define VMX_PROCBASED_CTLS2_EPT_VE (1U<<18U) #define VMX_PROCBASED_CTLS2_XSVE_XRSTR (1U<<20U) +#define VMX_PROCBASED_CTLS2_PT_USE_GPA (1U<<22U) +#define VMX_PROCBASED_CTLS2_TSC_SCALING (1U<<25U) #define VMX_PROCBASED_CTLS2_UWAIT_PAUSE (1U<<26U) +#define VMX_PROCBASED_CTLS2_ENCLV_EXIT (1U<<28U) #define VMX_PROCBASED_CTLS3_LOADIWKEY (1U<<0U) /* MSR_IA32_VMX_EPT_VPID_CAP: EPT and VPID capability bits */