hv: support xsave in context switch

xsave area:
    legacy region: 512 bytes
    xsave header: 64 bytes
    extended region: < 3k bytes

So, pre-allocate 4k area for xsave. Use certain instruction to save or
restore the area according to hardware xsave feature set.

Tracked-On: #4166
Signed-off-by: Conghui Chen <conghui.chen@intel.com>
Reviewed-by: Anthony Xu <anthony.xu@intel.com>
Acked-by: Eddie Dong <eddie.dong@intel.com>
This commit is contained in:
Conghui Chen 2019-11-25 18:57:36 +00:00 committed by wenlingz
parent 8ba203a165
commit e61412981d
10 changed files with 153 additions and 37 deletions

View File

@ -476,20 +476,39 @@ static void init_pcpu_xsave(void)
{ {
uint64_t val64; uint64_t val64;
struct cpuinfo_x86 *cpu_info; struct cpuinfo_x86 *cpu_info;
uint64_t xcr0, xss;
uint32_t eax, ecx, unused, xsave_area_size;
if (pcpu_has_cap(X86_FEATURE_XSAVE)) { CPU_CR_READ(cr4, &val64);
CPU_CR_READ(cr4, &val64); val64 |= CR4_OSXSAVE;
val64 |= CR4_OSXSAVE; CPU_CR_WRITE(cr4, val64);
CPU_CR_WRITE(cr4, val64);
if (get_pcpu_id() == BOOT_CPU_ID) { if (get_pcpu_id() == BOOT_CPU_ID) {
uint32_t ecx, unused; cpuid(CPUID_FEATURES, &unused, &unused, &ecx, &unused);
cpuid(CPUID_FEATURES, &unused, &unused, &ecx, &unused);
/* if set, update it */ /* if set, update it */
if ((ecx & CPUID_ECX_OSXSAVE) != 0U) { if ((ecx & CPUID_ECX_OSXSAVE) != 0U) {
cpu_info = get_pcpu_info(); cpu_info = get_pcpu_info();
cpu_info->cpuid_leaves[FEAT_1_ECX] |= CPUID_ECX_OSXSAVE; cpu_info->cpuid_leaves[FEAT_1_ECX] |= CPUID_ECX_OSXSAVE;
/* set xcr0 and xss with the componets bitmap get from cpuid */
xcr0 = ((uint64_t)cpu_info->cpuid_leaves[FEAT_D_0_EDX] << 32U)
+ cpu_info->cpuid_leaves[FEAT_D_0_EAX];
xss = ((uint64_t)cpu_info->cpuid_leaves[FEAT_D_1_EDX] << 32U)
+ cpu_info->cpuid_leaves[FEAT_D_1_ECX];
write_xcr(0, xcr0);
msr_write(MSR_IA32_XSS, xss);
/* get xsave area size, containing all the state components
* corresponding to bits currently set in XCR0 | IA32_XSS */
cpuid_subleaf(CPUID_XSAVE_FEATURES, 1U,
&eax,
&xsave_area_size,
&ecx,
&unused);
if (xsave_area_size > XSAVE_STATE_AREA_SIZE) {
panic("XSAVE area (%d bytes) exceeds the pre-allocated 4K region\n",
xsave_area_size);
} }
} }
} }

View File

@ -189,11 +189,28 @@ static void detect_vmx_mmu_cap(void)
cpu_caps.vmx_vpid = (uint32_t) (val >> 32U); cpu_caps.vmx_vpid = (uint32_t) (val >> 32U);
} }
static void detect_xsave_cap(void)
{
uint32_t unused;
cpuid_subleaf(CPUID_XSAVE_FEATURES, 0U,
&boot_cpu_data.cpuid_leaves[FEAT_D_0_EAX],
&unused,
&unused,
&boot_cpu_data.cpuid_leaves[FEAT_D_0_EDX]);
cpuid_subleaf(CPUID_XSAVE_FEATURES, 1U,
&boot_cpu_data.cpuid_leaves[FEAT_D_1_EAX],
&unused,
&boot_cpu_data.cpuid_leaves[FEAT_D_1_ECX],
&boot_cpu_data.cpuid_leaves[FEAT_D_1_EDX]);
}
static void detect_pcpu_cap(void) static void detect_pcpu_cap(void)
{ {
detect_apicv_cap(); detect_apicv_cap();
detect_ept_cap(); detect_ept_cap();
detect_vmx_mmu_cap(); detect_vmx_mmu_cap();
detect_xsave_cap();
} }
static uint64_t get_address_mask(uint8_t limit) static uint64_t get_address_mask(uint8_t limit)
@ -426,6 +443,12 @@ int32_t detect_hardware_support(void)
} else if (!pcpu_has_cap(X86_FEATURE_POPCNT)) { } else if (!pcpu_has_cap(X86_FEATURE_POPCNT)) {
printf("%s, popcnt instruction not supported\n", __func__); printf("%s, popcnt instruction not supported\n", __func__);
ret = -ENODEV; ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_XSAVES)) {
printf("%s, XSAVES not supported\n", __func__);
ret = -ENODEV;
} else if (!pcpu_has_cap(X86_FEATURE_COMPACTION_EXT)) {
printf("%s, Compaction extensions in XSAVE is not supported\n", __func__);
ret = -ENODEV;
} else { } else {
ret = check_vmx_mmu_cap(); ret = check_vmx_mmu_cap();
} }

View File

@ -193,8 +193,8 @@ static void save_world_ctx(struct acrn_vcpu *vcpu, struct ext_context *ext_ctx)
ext_ctx->ia32_fmask = msr_read(MSR_IA32_FMASK); ext_ctx->ia32_fmask = msr_read(MSR_IA32_FMASK);
ext_ctx->ia32_kernel_gs_base = msr_read(MSR_IA32_KERNEL_GS_BASE); ext_ctx->ia32_kernel_gs_base = msr_read(MSR_IA32_KERNEL_GS_BASE);
/* FX area */ /* XSAVE area */
save_fxstore_guest_area(ext_ctx); save_xsave_area(ext_ctx);
/* For MSRs need isolation between worlds */ /* For MSRs need isolation between worlds */
for (i = 0U; i < NUM_WORLD_MSRS; i++) { for (i = 0U; i < NUM_WORLD_MSRS; i++) {
@ -245,8 +245,8 @@ static void load_world_ctx(struct acrn_vcpu *vcpu, const struct ext_context *ext
msr_write(MSR_IA32_FMASK, ext_ctx->ia32_fmask); msr_write(MSR_IA32_FMASK, ext_ctx->ia32_fmask);
msr_write(MSR_IA32_KERNEL_GS_BASE, ext_ctx->ia32_kernel_gs_base); msr_write(MSR_IA32_KERNEL_GS_BASE, ext_ctx->ia32_kernel_gs_base);
/* FX area */ /* XSAVE area */
rstor_fxstore_guest_area(ext_ctx); rstore_xsave_area(ext_ctx);
/* For MSRs need isolation between worlds */ /* For MSRs need isolation between worlds */
for (i = 0U; i < NUM_WORLD_MSRS; i++) { for (i = 0U; i < NUM_WORLD_MSRS; i++) {

View File

@ -227,6 +227,26 @@ static void set_vcpu_mode(struct acrn_vcpu *vcpu, uint32_t cs_attr, uint64_t ia3
} }
} }
static void init_xsave(struct acrn_vcpu *vcpu)
{
struct cpuinfo_x86 *cpu_info = get_pcpu_info();
struct ext_context *ectx = &(vcpu->arch.contexts[vcpu->arch.cur_context].ext_ctx);
/* Get user state components */
ectx->xcr0 = ((uint64_t)cpu_info->cpuid_leaves[FEAT_D_0_EDX] << 32U)
+ cpu_info->cpuid_leaves[FEAT_D_0_EAX];
/* Get supervisor state components */
ectx->xss = ((uint64_t)cpu_info->cpuid_leaves[FEAT_D_1_EDX] << 32U)
+ cpu_info->cpuid_leaves[FEAT_D_1_ECX];
/* xsaves only support compacted format, so set it in xcomp_bv[63],
* keep the reset area in header area as zero.
* With this config, the first time a vcpu is scheduled in, it will
* initiate all the xsave componets */
ectx->xs_area.xsave_hdr.hdr.xcomp_bv |= XSAVE_COMPACTED_FORMAT;
}
void set_vcpu_regs(struct acrn_vcpu *vcpu, struct acrn_vcpu_regs *vcpu_regs) void set_vcpu_regs(struct acrn_vcpu *vcpu, struct acrn_vcpu_regs *vcpu_regs)
{ {
struct ext_context *ectx; struct ext_context *ectx;
@ -446,6 +466,7 @@ int32_t create_vcpu(uint16_t pcpu_id, struct acrn_vm *vm, struct acrn_vcpu **rtn
vcpu->arch.nr_sipi = 0U; vcpu->arch.nr_sipi = 0U;
vcpu->state = VCPU_INIT; vcpu->state = VCPU_INIT;
init_xsave(vcpu);
reset_vcpu_regs(vcpu); reset_vcpu_regs(vcpu);
(void)memset((void *)&vcpu->req, 0U, sizeof(struct io_request)); (void)memset((void *)&vcpu->req, 0U, sizeof(struct io_request));
vm->hw.created_vcpus++; vm->hw.created_vcpus++;
@ -699,6 +720,28 @@ void resume_vcpu(struct acrn_vcpu *vcpu)
} }
} }
void save_xsave_area(struct ext_context *ectx)
{
ectx->xcr0 = read_xcr(0);
ectx->xss = msr_read(MSR_IA32_XSS);
asm volatile("xsaves %0"
: : "m" (ectx->xs_area),
"d" (UINT32_MAX),
"a" (UINT32_MAX):
"memory");
}
void rstore_xsave_area(const struct ext_context *ectx)
{
write_xcr(0, ectx->xcr0);
msr_write(MSR_IA32_XSS, ectx->xss);
asm volatile("xrstors %0"
: : "m" (ectx->xs_area),
"d" (UINT32_MAX),
"a" (UINT32_MAX):
"memory");
}
/* TODO: /* TODO:
* Now we have switch_out and switch_in callbacks for each thread_object, and schedule * Now we have switch_out and switch_in callbacks for each thread_object, and schedule
* will call them every thread switch. We can implement lazy context swtich , which * will call them every thread switch. We can implement lazy context swtich , which
@ -715,7 +758,7 @@ static void context_switch_out(struct thread_object *prev)
ectx->ia32_fmask = msr_read(MSR_IA32_FMASK); ectx->ia32_fmask = msr_read(MSR_IA32_FMASK);
ectx->ia32_kernel_gs_base = msr_read(MSR_IA32_KERNEL_GS_BASE); ectx->ia32_kernel_gs_base = msr_read(MSR_IA32_KERNEL_GS_BASE);
save_fxstore_guest_area(ectx); save_xsave_area(ectx);
vcpu->running = false; vcpu->running = false;
} }
@ -732,7 +775,7 @@ static void context_switch_in(struct thread_object *next)
msr_write(MSR_IA32_FMASK, ectx->ia32_fmask); msr_write(MSR_IA32_FMASK, ectx->ia32_fmask);
msr_write(MSR_IA32_KERNEL_GS_BASE, ectx->ia32_kernel_gs_base); msr_write(MSR_IA32_KERNEL_GS_BASE, ectx->ia32_kernel_gs_base);
rstor_fxstore_guest_area(ectx); rstore_xsave_area(ectx);
vcpu->running = true; vcpu->running = true;
} }

View File

@ -150,7 +150,14 @@
/* Number of GPRs saved / restored for guest in VCPU structure */ /* Number of GPRs saved / restored for guest in VCPU structure */
#define NUM_GPRS 16U #define NUM_GPRS 16U
#define GUEST_STATE_AREA_SIZE 512
#define XSAVE_STATE_AREA_SIZE 4096U
#define XSAVE_LEGACY_AREA_SIZE 512U
#define XSAVE_HEADER_AREA_SIZE 64U
#define XSAVE_EXTEND_AREA_SIZE (XSAVE_STATE_AREA_SIZE - \
XSAVE_HEADER_AREA_SIZE - \
XSAVE_LEGACY_AREA_SIZE)
#define XSAVE_COMPACTED_FORMAT (1UL << 63U)
#define CPU_CONTEXT_OFFSET_RAX 0U #define CPU_CONTEXT_OFFSET_RAX 0U
#define CPU_CONTEXT_OFFSET_RCX 8U #define CPU_CONTEXT_OFFSET_RCX 8U
@ -180,9 +187,6 @@
#define CPU_CONTEXT_OFFSET_IDTR 192U #define CPU_CONTEXT_OFFSET_IDTR 192U
#define CPU_CONTEXT_OFFSET_LDTR 216U #define CPU_CONTEXT_OFFSET_LDTR 216U
/*sizes of various registers within the VCPU data structure */
#define VMX_CPU_S_FXSAVE_GUEST_AREA_SIZE GUEST_STATE_AREA_SIZE
#ifndef ASSEMBLER #ifndef ASSEMBLER
#define AP_MASK (((1UL << get_pcpu_nums()) - 1UL) & ~(1UL << 0U)) #define AP_MASK (((1UL << get_pcpu_nums()) - 1UL) & ~(1UL << 0U))
@ -344,6 +348,21 @@ struct run_context {
uint64_t ia32_efer; uint64_t ia32_efer;
}; };
union xsave_header {
uint64_t value[XSAVE_HEADER_AREA_SIZE / sizeof(uint64_t)];
struct {
/* bytes 7:0 */
uint64_t xstate_bv;
/* bytes 15:8 */
uint64_t xcomp_bv;
} hdr;
};
struct xsave_area {
uint64_t legacy_region[XSAVE_LEGACY_AREA_SIZE / sizeof(uint64_t)];
union xsave_header xsave_hdr;
uint64_t extend_region[XSAVE_EXTEND_AREA_SIZE / sizeof(uint64_t)];
} __aligned(64);
/* /*
* extended context does not save/restore during vm exit/entry, it's mainly * extended context does not save/restore during vm exit/entry, it's mainly
* used in trusty world switch * used in trusty world switch
@ -377,10 +396,9 @@ struct ext_context {
uint64_t dr7; uint64_t dr7;
uint64_t tsc_offset; uint64_t tsc_offset;
/* The 512 bytes area to save the FPU/MMX/SSE states for the guest */ struct xsave_area xs_area;
uint64_t uint64_t xcr0;
fxstore_guest_area[VMX_CPU_S_FXSAVE_GUEST_AREA_SIZE / sizeof(uint64_t)] uint64_t xss;
__aligned(16);
}; };
struct cpu_context { struct cpu_context {
@ -607,6 +625,13 @@ static inline void write_xcr(int32_t reg, uint64_t val)
asm volatile("xsetbv" : : "c" (reg), "a" ((uint32_t)val), "d" ((uint32_t)(val >> 32U))); asm volatile("xsetbv" : : "c" (reg), "a" ((uint32_t)val), "d" ((uint32_t)(val >> 32U)));
} }
static inline uint64_t read_xcr(int32_t reg)
{
uint32_t xcrl, xcrh;
asm volatile ("xgetbv ": "=a"(xcrl), "=d"(xcrh) : "c" (reg));
return (((uint64_t)xcrh << 32U) | xcrl);
}
/* /*
* stac/clac pair is used to access guest's memory protected by SMAP, * stac/clac pair is used to access guest's memory protected by SMAP,
* following below flow: * following below flow:

View File

@ -24,7 +24,12 @@
#define FEAT_8000_0001_EDX 6U /* CPUID[8000_0001].EDX */ #define FEAT_8000_0001_EDX 6U /* CPUID[8000_0001].EDX */
#define FEAT_8000_0007_EDX 7U /* CPUID[8000_0007].EDX */ #define FEAT_8000_0007_EDX 7U /* CPUID[8000_0007].EDX */
#define FEAT_8000_0008_EBX 8U /* CPUID[8000_0008].EBX */ #define FEAT_8000_0008_EBX 8U /* CPUID[8000_0008].EBX */
#define FEATURE_WORDS 9U #define FEAT_D_0_EAX 9U /* CPUID[D][0].EAX */
#define FEAT_D_0_EDX 10U /* CPUID[D][0].EDX */
#define FEAT_D_1_EAX 11U /* CPUID[D][1].EAX */
#define FEAT_D_1_ECX 13U /* CPUID[D][1].ECX */
#define FEAT_D_1_EDX 14U /* CPUID[D][1].EDX */
#define FEATURE_WORDS 15U
struct cpuinfo_x86 { struct cpuinfo_x86 {
uint8_t family, model; uint8_t family, model;

View File

@ -94,4 +94,8 @@
/* Intel-defined CPU features, CPUID level 0x80000007 (EDX)*/ /* Intel-defined CPU features, CPUID level 0x80000007 (EDX)*/
#define X86_FEATURE_INVA_TSC ((FEAT_8000_0007_EDX << 5U) + 8U) #define X86_FEATURE_INVA_TSC ((FEAT_8000_0007_EDX << 5U) + 8U)
/* Intel-defined CPU features, CPUID level 0x0000000D, sub 0x1 */
#define X86_FEATURE_COMPACTION_EXT ((FEAT_D_1_EAX << 5U) + 1U)
#define X86_FEATURE_XSAVES ((FEAT_D_1_EAX << 5U) + 3U)
#endif /* CPUFEATURES_H */ #endif /* CPUFEATURES_H */

View File

@ -113,6 +113,7 @@
#define CPUID_TLB 2U #define CPUID_TLB 2U
#define CPUID_SERIALNUM 3U #define CPUID_SERIALNUM 3U
#define CPUID_EXTEND_FEATURE 7U #define CPUID_EXTEND_FEATURE 7U
#define CPUID_XSAVE_FEATURES 0xDU
#define CPUID_RSD_ALLOCATION 0x10U #define CPUID_RSD_ALLOCATION 0x10U
#define CPUID_MAX_EXTENDED_FUNCTION 0x80000000U #define CPUID_MAX_EXTENDED_FUNCTION 0x80000000U
#define CPUID_EXTEND_FUNCTION_1 0x80000001U #define CPUID_EXTEND_FUNCTION_1 0x80000001U

View File

@ -544,20 +544,12 @@ static inline bool is_pae(struct acrn_vcpu *vcpu)
return (vcpu_get_cr4(vcpu) & CR4_PAE) != 0UL; return (vcpu_get_cr4(vcpu) & CR4_PAE) != 0UL;
} }
static inline void save_fxstore_guest_area(struct ext_context *ext_ctx)
{
asm volatile("fxsave (%0)"
: : "r" (ext_ctx->fxstore_guest_area) : "memory");
}
static inline void rstor_fxstore_guest_area(const struct ext_context *ext_ctx)
{
asm volatile("fxrstor (%0)" : : "r" (ext_ctx->fxstore_guest_area));
}
struct acrn_vcpu *get_running_vcpu(uint16_t pcpu_id); struct acrn_vcpu *get_running_vcpu(uint16_t pcpu_id);
struct acrn_vcpu* get_ever_run_vcpu(uint16_t pcpu_id); struct acrn_vcpu* get_ever_run_vcpu(uint16_t pcpu_id);
void save_xsave_area(struct ext_context *ectx);
void rstore_xsave_area(const struct ext_context *ectx);
/** /**
* @brief create a vcpu for the target vm * @brief create a vcpu for the target vm
* *

View File

@ -43,6 +43,10 @@ typedef _Bool bool;
#define UINT64_MAX (0xffffffffffffffffUL) #define UINT64_MAX (0xffffffffffffffffUL)
#endif #endif
#ifndef UINT32_MAX
#define UINT32_MAX (0xffffffffU)
#endif
#endif /* ASSEMBLER */ #endif /* ASSEMBLER */
#endif /* INCLUDE_TYPES_H defined */ #endif /* INCLUDE_TYPES_H defined */