acrn-hypervisor/hypervisor/arch/x86/cpu_caps.c
Conghui Chen 821c65b40c hv: fix possible SSE region mismatch issue
During context switch in hypervisor, xsave/xrstore are used to
save/resotre the XSAVE area according to the XCR0 and XSS. The legacy
region in XSAVE area include FPU and SSE, we should make sure the
legacy region be saved during contex switch. FPU in XCR0 is always
enabled according to SDM.
For SSE, we enable it in XCR0 during context switch.

Tracked-On: #5062
Signed-off-by: Conghui Chen <conghui.chen@intel.com>
Acked-by: Eddie Dong <eddie.dong@intel.com>
2020-07-22 14:19:21 +08:00

501 lines
14 KiB
C

/*
* Copyright (C) 2018 Intel Corporation. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <types.h>
#include <msr.h>
#include <page.h>
#include <cpufeatures.h>
#include <cpuid.h>
#include <cpu.h>
#include <per_cpu.h>
#include <vmx.h>
#include <cpu_caps.h>
#include <errno.h>
#include <logmsg.h>
#include <vmcs.h>
/* TODO: add more capability per requirement */
/* APICv features */
#define VAPIC_FEATURE_VIRT_ACCESS (1U << 0U)
#define VAPIC_FEATURE_VIRT_REG (1U << 1U)
#define VAPIC_FEATURE_INTR_DELIVERY (1U << 2U)
#define VAPIC_FEATURE_TPR_SHADOW (1U << 3U)
#define VAPIC_FEATURE_POST_INTR (1U << 4U)
#define VAPIC_FEATURE_VX2APIC_MODE (1U << 5U)
/* BASIC features: must supported by the physical platform and will enabled by default */
#define APICV_BASIC_FEATURE (VAPIC_FEATURE_TPR_SHADOW | VAPIC_FEATURE_VIRT_ACCESS | VAPIC_FEATURE_VX2APIC_MODE)
/* ADVANCED features: enable them by default if the physical platform support them all, otherwise, disable them all */
#define APICV_ADVANCED_FEATURE (VAPIC_FEATURE_VIRT_REG | VAPIC_FEATURE_INTR_DELIVERY | VAPIC_FEATURE_POST_INTR)
static struct cpu_capability {
uint8_t apicv_features;
uint8_t ept_features;
uint32_t vmx_ept;
uint32_t vmx_vpid;
uint32_t core_caps; /* value of MSR_IA32_CORE_CAPABLITIES */
} cpu_caps;
static struct cpuinfo_x86 boot_cpu_data;
bool pcpu_has_cap(uint32_t bit)
{
uint32_t feat_idx = bit >> 5U;
uint32_t feat_bit = bit & 0x1fU;
bool ret;
if (feat_idx >= FEATURE_WORDS) {
ret = false;
} else {
ret = ((boot_cpu_data.cpuid_leaves[feat_idx] & (1U << feat_bit)) != 0U);
}
return ret;
}
bool monitor_cap_buggy(void)
{
bool buggy = false;
if ((boot_cpu_data.family == 0x6U) && (boot_cpu_data.model == 0x5cU)) {
buggy = true;
}
return buggy;
}
bool has_monitor_cap(void)
{
bool ret = false;
if (pcpu_has_cap(X86_FEATURE_MONITOR)) {
/* don't use monitor for CPU (family: 0x6 model: 0x5c)
* in hypervisor, but still expose it to the guests and
* let them handle it correctly
*/
if (!monitor_cap_buggy()) {
ret = true;
}
}
return ret;
}
static inline bool is_fast_string_erms_supported_and_enabled(void)
{
bool ret = false;
uint64_t misc_enable = msr_read(MSR_IA32_MISC_ENABLE);
if ((misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) == 0UL) {
pr_fatal("%s, fast string is not enabled\n", __func__);
} else {
if (!pcpu_has_cap(X86_FEATURE_ERMS)) {
pr_fatal("%s, enhanced rep movsb/stosb not supported\n", __func__);
} else {
ret = true;
}
}
return ret;
}
/*check allowed ONEs setting in vmx control*/
static bool is_ctrl_setting_allowed(uint64_t msr_val, uint32_t ctrl)
{
/*
* Intel SDM Appendix A.3
* - bitX in ctrl can be set 1
* only if bit 32+X in msr_val is 1
*/
return ((((uint32_t)(msr_val >> 32UL)) & ctrl) == ctrl);
}
bool is_apl_platform(void)
{
bool ret = false;
if ((boot_cpu_data.family == 0x6U) && (boot_cpu_data.model == 0x92U)) {
ret = true;
}
return ret;
}
bool has_core_cap(uint32_t bit_mask)
{
return ((cpu_caps.core_caps & bit_mask) != 0U);
}
static void detect_ept_cap(void)
{
uint64_t msr_val;
cpu_caps.ept_features = 0U;
/* Read primary processor based VM control. */
msr_val = msr_read(MSR_IA32_VMX_PROCBASED_CTLS);
/*
* According to SDM A.3.2 Primary Processor-Based VM-Execution Controls:
* The IA32_VMX_PROCBASED_CTLS MSR (index 482H) reports on the allowed
* settings of most of the primary processor-based VM-execution controls
* (see Section 24.6.2):
* Bits 63:32 indicate the allowed 1-settings of these controls.
* VM entry allows control X to be 1 if bit 32+X in the MSR is set to 1;
* if bit 32+X in the MSR is cleared to 0, VM entry fails if control X
* is 1.
*/
msr_val = msr_val >> 32U;
/* Check if secondary processor based VM control is available. */
if ((msr_val & VMX_PROCBASED_CTLS_SECONDARY) != 0UL) {
/* Read secondary processor based VM control. */
msr_val = msr_read(MSR_IA32_VMX_PROCBASED_CTLS2);
if (is_ctrl_setting_allowed(msr_val, VMX_PROCBASED_CTLS2_EPT)) {
cpu_caps.ept_features = 1U;
}
}
}
static void detect_apicv_cap(void)
{
uint8_t features = 0U;
uint64_t msr_val;
msr_val = msr_read(MSR_IA32_VMX_PROCBASED_CTLS);
if (is_ctrl_setting_allowed(msr_val, VMX_PROCBASED_CTLS_TPR_SHADOW)) {
features |= VAPIC_FEATURE_TPR_SHADOW;
}
msr_val = msr_read(MSR_IA32_VMX_PROCBASED_CTLS2);
if (is_ctrl_setting_allowed(msr_val, VMX_PROCBASED_CTLS2_VAPIC)) {
features |= VAPIC_FEATURE_VIRT_ACCESS;
}
if (is_ctrl_setting_allowed(msr_val, VMX_PROCBASED_CTLS2_VX2APIC)) {
features |= VAPIC_FEATURE_VX2APIC_MODE;
}
if (is_ctrl_setting_allowed(msr_val, VMX_PROCBASED_CTLS2_VAPIC_REGS)) {
features |= VAPIC_FEATURE_VIRT_REG;
}
if (is_ctrl_setting_allowed(msr_val, VMX_PROCBASED_CTLS2_VIRQ)) {
features |= VAPIC_FEATURE_INTR_DELIVERY;
}
msr_val = msr_read(MSR_IA32_VMX_PINBASED_CTLS);
if (is_ctrl_setting_allowed(msr_val, VMX_PINBASED_CTLS_POST_IRQ)) {
features |= VAPIC_FEATURE_POST_INTR;
}
cpu_caps.apicv_features = features;
vlapic_set_apicv_ops();
}
static void detect_vmx_mmu_cap(void)
{
uint64_t val;
/* Read the MSR register of EPT and VPID Capability - SDM A.10 */
val = msr_read(MSR_IA32_VMX_EPT_VPID_CAP);
cpu_caps.vmx_ept = (uint32_t) val;
cpu_caps.vmx_vpid = (uint32_t) (val >> 32U);
}
static bool pcpu_vmx_set_32bit_addr_width(void)
{
return ((msr_read(MSR_IA32_VMX_BASIC) & MSR_IA32_VMX_BASIC_ADDR_WIDTH) != 0UL);
}
static void detect_xsave_cap(void)
{
uint32_t unused;
cpuid_subleaf(CPUID_XSAVE_FEATURES, 0x0U,
&boot_cpu_data.cpuid_leaves[FEAT_D_0_EAX],
&unused,
&unused,
&boot_cpu_data.cpuid_leaves[FEAT_D_0_EDX]);
cpuid_subleaf(CPUID_XSAVE_FEATURES, 1U,
&boot_cpu_data.cpuid_leaves[FEAT_D_1_EAX],
&unused,
&boot_cpu_data.cpuid_leaves[FEAT_D_1_ECX],
&boot_cpu_data.cpuid_leaves[FEAT_D_1_EDX]);
}
static void detect_core_caps(void)
{
if (pcpu_has_cap(X86_FEATURE_CORE_CAP)) {
cpu_caps.core_caps = (uint32_t)msr_read(MSR_IA32_CORE_CAPABILITIES);
}
}
static void detect_pcpu_cap(void)
{
detect_apicv_cap();
detect_ept_cap();
detect_vmx_mmu_cap();
detect_xsave_cap();
detect_core_caps();
}
static uint64_t get_address_mask(uint8_t limit)
{
return ((1UL << limit) - 1UL) & PAGE_MASK;
}
void init_pcpu_capabilities(void)
{
uint32_t eax, unused;
uint32_t family, model;
cpuid_subleaf(CPUID_VENDORSTRING, 0x0U,
&boot_cpu_data.cpuid_level,
&unused, &unused, &unused);
cpuid_subleaf(CPUID_FEATURES, 0x0U, &eax, &unused,
&boot_cpu_data.cpuid_leaves[FEAT_1_ECX],
&boot_cpu_data.cpuid_leaves[FEAT_1_EDX]);
family = (eax >> 8U) & 0xfU;
if (family == 0xFU) {
family += ((eax >> 20U) & 0xffU) << 4U;
}
boot_cpu_data.family = (uint8_t)family;
model = (eax >> 4U) & 0xfU;
if (family == 0x06U || family == 0xFU) {
model += ((eax >> 16U) & 0xfU) << 4U;
}
boot_cpu_data.model = (uint8_t)model;
cpuid_subleaf(CPUID_EXTEND_FEATURE, 0x0U, &unused,
&boot_cpu_data.cpuid_leaves[FEAT_7_0_EBX],
&boot_cpu_data.cpuid_leaves[FEAT_7_0_ECX],
&boot_cpu_data.cpuid_leaves[FEAT_7_0_EDX]);
cpuid_subleaf(CPUID_MAX_EXTENDED_FUNCTION, 0x0U,
&boot_cpu_data.extended_cpuid_level,
&unused, &unused, &unused);
if (boot_cpu_data.extended_cpuid_level >= CPUID_EXTEND_FUNCTION_1) {
cpuid_subleaf(CPUID_EXTEND_FUNCTION_1, 0x0U, &unused, &unused,
&boot_cpu_data.cpuid_leaves[FEAT_8000_0001_ECX],
&boot_cpu_data.cpuid_leaves[FEAT_8000_0001_EDX]);
}
if (boot_cpu_data.extended_cpuid_level >= CPUID_EXTEND_INVA_TSC) {
cpuid_subleaf(CPUID_EXTEND_INVA_TSC, 0x0U, &eax, &unused, &unused,
&boot_cpu_data.cpuid_leaves[FEAT_8000_0007_EDX]);
}
if (boot_cpu_data.extended_cpuid_level >= CPUID_EXTEND_ADDRESS_SIZE) {
cpuid_subleaf(CPUID_EXTEND_ADDRESS_SIZE, 0x0U, &eax,
&boot_cpu_data.cpuid_leaves[FEAT_8000_0008_EBX],
&unused, &unused);
/* EAX bits 07-00: #Physical Address Bits
* bits 15-08: #Linear Address Bits
*/
boot_cpu_data.virt_bits = (uint8_t)((eax >> 8U) & 0xffU);
boot_cpu_data.phys_bits = (uint8_t)(eax & 0xffU);
boot_cpu_data.physical_address_mask =
get_address_mask(boot_cpu_data.phys_bits);
}
detect_pcpu_cap();
}
static bool is_ept_supported(void)
{
return (cpu_caps.ept_features != 0U);
}
static inline bool is_apicv_basic_feature_supported(void)
{
return ((cpu_caps.apicv_features & APICV_BASIC_FEATURE) == APICV_BASIC_FEATURE);
}
bool is_apicv_advanced_feature_supported(void)
{
return ((cpu_caps.apicv_features & APICV_ADVANCED_FEATURE) == APICV_ADVANCED_FEATURE);
}
bool pcpu_has_vmx_ept_cap(uint32_t bit_mask)
{
return ((cpu_caps.vmx_ept & bit_mask) != 0U);
}
bool pcpu_has_vmx_vpid_cap(uint32_t bit_mask)
{
return ((cpu_caps.vmx_vpid & bit_mask) != 0U);
}
void init_pcpu_model_name(void)
{
cpuid_subleaf(CPUID_EXTEND_FUNCTION_2, 0x0U,
(uint32_t *)(boot_cpu_data.model_name),
(uint32_t *)(&boot_cpu_data.model_name[4]),
(uint32_t *)(&boot_cpu_data.model_name[8]),
(uint32_t *)(&boot_cpu_data.model_name[12]));
cpuid_subleaf(CPUID_EXTEND_FUNCTION_3, 0x0U,
(uint32_t *)(&boot_cpu_data.model_name[16]),
(uint32_t *)(&boot_cpu_data.model_name[20]),
(uint32_t *)(&boot_cpu_data.model_name[24]),
(uint32_t *)(&boot_cpu_data.model_name[28]));
cpuid_subleaf(CPUID_EXTEND_FUNCTION_4, 0x0U,
(uint32_t *)(&boot_cpu_data.model_name[32]),
(uint32_t *)(&boot_cpu_data.model_name[36]),
(uint32_t *)(&boot_cpu_data.model_name[40]),
(uint32_t *)(&boot_cpu_data.model_name[44]));
boot_cpu_data.model_name[48] = '\0';
}
static inline bool is_vmx_disabled(void)
{
uint64_t msr_val;
bool ret = false;
/* Read Feature ControL MSR */
msr_val = msr_read(MSR_IA32_FEATURE_CONTROL);
/* Check if feature control is locked and vmx cannot be enabled */
if (((msr_val & MSR_IA32_FEATURE_CONTROL_LOCK) != 0U) &&
((msr_val & MSR_IA32_FEATURE_CONTROL_VMX_NO_SMX) == 0U)) {
ret = true;
}
return ret;
}
static inline bool pcpu_has_vmx_unrestricted_guest_cap(void)
{
return ((msr_read(MSR_IA32_VMX_MISC) & MSR_IA32_MISC_UNRESTRICTED_GUEST) != 0UL);
}
static int32_t check_vmx_mmu_cap(void)
{
int32_t ret = 0;
if (!pcpu_has_vmx_ept_cap(VMX_EPT_INVEPT)) {
printf("%s, invept not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_vmx_vpid_cap(VMX_VPID_INVVPID) ||
!pcpu_has_vmx_vpid_cap(VMX_VPID_INVVPID_SINGLE_CONTEXT) ||
!pcpu_has_vmx_vpid_cap(VMX_VPID_INVVPID_GLOBAL_CONTEXT)) {
printf("%s, invvpid not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_vmx_ept_cap(VMX_EPT_2MB_PAGE)) {
printf("%s, ept not support 2MB large page\n", __func__);
ret = -ENODEV;
} else if (pcpu_has_vmx_ept_cap(VMX_EPT_1GB_PAGE) !=
pcpu_has_cap(X86_FEATURE_PAGE1GB)) {
/* This just for simple large_page_support in arch/x86/page.c */
ret = -ENODEV;
printf("%s ept support 1GB large page while mmu is not or opposite\n", __func__);
} else {
/* No other state currently, do nothing */
}
return ret;
}
/*
* basic hardware capability check
* we should supplement which feature/capability we must support
* here later.
*/
int32_t detect_hardware_support(void)
{
int32_t ret;
/* Long Mode (x86-64, 64-bit support) */
if (!pcpu_has_cap(X86_FEATURE_LM)) {
printf("%s, LM not supported\n", __func__);
ret = -ENODEV;
} else if ((boot_cpu_data.phys_bits == 0U) ||
(boot_cpu_data.virt_bits == 0U)) {
printf("%s, can't detect Linear/Physical Address size\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_INVA_TSC)) {
/* check invariant TSC */
printf("%s, invariant TSC not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_TSC_DEADLINE)) {
/* lapic TSC deadline timer */
printf("%s, TSC deadline not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_NX)) {
/* Execute Disable */
printf("%s, NX not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_SMEP)) {
/* Supervisor-Mode Execution Prevention */
printf("%s, SMEP not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_SMAP)) {
/* Supervisor-Mode Access Prevention */
printf("%s, SMAP not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_MTRR)) {
printf("%s, MTRR not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_CLFLUSHOPT)) {
printf("%s, CLFLUSHOPT not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_VMX)) {
printf("%s, vmx not supported\n", __func__);
ret = -ENODEV;
} else if (!is_fast_string_erms_supported_and_enabled()) {
ret = -ENODEV;
} else if (!pcpu_has_vmx_unrestricted_guest_cap()) {
printf("%s, unrestricted guest not supported\n", __func__);
ret = -ENODEV;
} else if (!is_ept_supported()) {
printf("%s, EPT not supported\n", __func__);
ret = -ENODEV;
} else if (!is_apicv_basic_feature_supported()) {
printf("%s, APICV not supported\n", __func__);
ret = -ENODEV;
} else if (boot_cpu_data.cpuid_level < 0x15U) {
printf("%s, required CPU feature not supported\n", __func__);
ret = -ENODEV;
} else if (is_vmx_disabled()) {
printf("%s, VMX can not be enabled\n", __func__);
ret = -ENODEV;
} else if (pcpu_vmx_set_32bit_addr_width()) {
printf("%s, Only support Intel 64 architecture.\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_X2APIC)) {
printf("%s, x2APIC not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_POPCNT)) {
printf("%s, popcnt instruction not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_XSAVES)) {
printf("%s, XSAVES not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_SSE)) {
printf("%s, SSE not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_COMPACTION_EXT)) {
printf("%s, Compaction extensions in XSAVE is not supported\n", __func__);
ret = -ENODEV;
} else {
ret = check_vmx_mmu_cap();
}
return ret;
}
struct cpuinfo_x86 *get_pcpu_info(void)
{
return &boot_cpu_data;
}