From e61412981dd47b69e099dce74bb9048270aee8b7 Mon Sep 17 00:00:00 2001 From: Conghui Chen Date: Mon, 25 Nov 2019 18:57:36 +0000 Subject: [PATCH] hv: support xsave in context switch xsave area: legacy region: 512 bytes xsave header: 64 bytes extended region: < 3k bytes So, pre-allocate 4k area for xsave. Use certain instruction to save or restore the area according to hardware xsave feature set. Tracked-On: #4166 Signed-off-by: Conghui Chen Reviewed-by: Anthony Xu Acked-by: Eddie Dong --- hypervisor/arch/x86/cpu.c | 41 ++++++++++++++------ hypervisor/arch/x86/cpu_caps.c | 23 +++++++++++ hypervisor/arch/x86/guest/trusty.c | 8 ++-- hypervisor/arch/x86/guest/vcpu.c | 47 ++++++++++++++++++++++- hypervisor/include/arch/x86/cpu.h | 41 ++++++++++++++++---- hypervisor/include/arch/x86/cpu_caps.h | 7 +++- hypervisor/include/arch/x86/cpufeatures.h | 4 ++ hypervisor/include/arch/x86/cpuid.h | 1 + hypervisor/include/arch/x86/guest/vcpu.h | 14 ++----- hypervisor/include/lib/types.h | 4 ++ 10 files changed, 153 insertions(+), 37 deletions(-) diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c index 403cc4920..ecbdc8a83 100644 --- a/hypervisor/arch/x86/cpu.c +++ b/hypervisor/arch/x86/cpu.c @@ -476,20 +476,39 @@ static void init_pcpu_xsave(void) { uint64_t val64; struct cpuinfo_x86 *cpu_info; + uint64_t xcr0, xss; + uint32_t eax, ecx, unused, xsave_area_size; - if (pcpu_has_cap(X86_FEATURE_XSAVE)) { - CPU_CR_READ(cr4, &val64); - val64 |= CR4_OSXSAVE; - CPU_CR_WRITE(cr4, val64); + CPU_CR_READ(cr4, &val64); + val64 |= CR4_OSXSAVE; + CPU_CR_WRITE(cr4, val64); - if (get_pcpu_id() == BOOT_CPU_ID) { - uint32_t ecx, unused; - cpuid(CPUID_FEATURES, &unused, &unused, &ecx, &unused); + if (get_pcpu_id() == BOOT_CPU_ID) { + cpuid(CPUID_FEATURES, &unused, &unused, &ecx, &unused); - /* if set, update it */ - if ((ecx & CPUID_ECX_OSXSAVE) != 0U) { - cpu_info = get_pcpu_info(); - cpu_info->cpuid_leaves[FEAT_1_ECX] |= CPUID_ECX_OSXSAVE; + /* if set, update it */ + if ((ecx & CPUID_ECX_OSXSAVE) != 0U) { + cpu_info = get_pcpu_info(); + cpu_info->cpuid_leaves[FEAT_1_ECX] |= CPUID_ECX_OSXSAVE; + + /* set xcr0 and xss with the componets bitmap get from cpuid */ + xcr0 = ((uint64_t)cpu_info->cpuid_leaves[FEAT_D_0_EDX] << 32U) + + cpu_info->cpuid_leaves[FEAT_D_0_EAX]; + xss = ((uint64_t)cpu_info->cpuid_leaves[FEAT_D_1_EDX] << 32U) + + cpu_info->cpuid_leaves[FEAT_D_1_ECX]; + write_xcr(0, xcr0); + msr_write(MSR_IA32_XSS, xss); + + /* get xsave area size, containing all the state components + * corresponding to bits currently set in XCR0 | IA32_XSS */ + cpuid_subleaf(CPUID_XSAVE_FEATURES, 1U, + &eax, + &xsave_area_size, + &ecx, + &unused); + if (xsave_area_size > XSAVE_STATE_AREA_SIZE) { + panic("XSAVE area (%d bytes) exceeds the pre-allocated 4K region\n", + xsave_area_size); } } } diff --git a/hypervisor/arch/x86/cpu_caps.c b/hypervisor/arch/x86/cpu_caps.c index 8627a774b..93b2b67e9 100644 --- a/hypervisor/arch/x86/cpu_caps.c +++ b/hypervisor/arch/x86/cpu_caps.c @@ -189,11 +189,28 @@ static void detect_vmx_mmu_cap(void) cpu_caps.vmx_vpid = (uint32_t) (val >> 32U); } +static void detect_xsave_cap(void) +{ + uint32_t unused; + + cpuid_subleaf(CPUID_XSAVE_FEATURES, 0U, + &boot_cpu_data.cpuid_leaves[FEAT_D_0_EAX], + &unused, + &unused, + &boot_cpu_data.cpuid_leaves[FEAT_D_0_EDX]); + cpuid_subleaf(CPUID_XSAVE_FEATURES, 1U, + &boot_cpu_data.cpuid_leaves[FEAT_D_1_EAX], + &unused, + &boot_cpu_data.cpuid_leaves[FEAT_D_1_ECX], + &boot_cpu_data.cpuid_leaves[FEAT_D_1_EDX]); +} + static void detect_pcpu_cap(void) { detect_apicv_cap(); detect_ept_cap(); detect_vmx_mmu_cap(); + detect_xsave_cap(); } static uint64_t get_address_mask(uint8_t limit) @@ -426,6 +443,12 @@ int32_t detect_hardware_support(void) } else if (!pcpu_has_cap(X86_FEATURE_POPCNT)) { printf("%s, popcnt instruction not supported\n", __func__); ret = -ENODEV; + } else if (!pcpu_has_cap(X86_FEATURE_XSAVES)) { + printf("%s, XSAVES not supported\n", __func__); + ret = -ENODEV; + } else if (!pcpu_has_cap(X86_FEATURE_COMPACTION_EXT)) { + printf("%s, Compaction extensions in XSAVE is not supported\n", __func__); + ret = -ENODEV; } else { ret = check_vmx_mmu_cap(); } diff --git a/hypervisor/arch/x86/guest/trusty.c b/hypervisor/arch/x86/guest/trusty.c index 7432001c5..8b4d3ee1c 100644 --- a/hypervisor/arch/x86/guest/trusty.c +++ b/hypervisor/arch/x86/guest/trusty.c @@ -193,8 +193,8 @@ static void save_world_ctx(struct acrn_vcpu *vcpu, struct ext_context *ext_ctx) ext_ctx->ia32_fmask = msr_read(MSR_IA32_FMASK); ext_ctx->ia32_kernel_gs_base = msr_read(MSR_IA32_KERNEL_GS_BASE); - /* FX area */ - save_fxstore_guest_area(ext_ctx); + /* XSAVE area */ + save_xsave_area(ext_ctx); /* For MSRs need isolation between worlds */ for (i = 0U; i < NUM_WORLD_MSRS; i++) { @@ -245,8 +245,8 @@ static void load_world_ctx(struct acrn_vcpu *vcpu, const struct ext_context *ext msr_write(MSR_IA32_FMASK, ext_ctx->ia32_fmask); msr_write(MSR_IA32_KERNEL_GS_BASE, ext_ctx->ia32_kernel_gs_base); - /* FX area */ - rstor_fxstore_guest_area(ext_ctx); + /* XSAVE area */ + rstore_xsave_area(ext_ctx); /* For MSRs need isolation between worlds */ for (i = 0U; i < NUM_WORLD_MSRS; i++) { diff --git a/hypervisor/arch/x86/guest/vcpu.c b/hypervisor/arch/x86/guest/vcpu.c index b207b1bd0..42aa69902 100644 --- a/hypervisor/arch/x86/guest/vcpu.c +++ b/hypervisor/arch/x86/guest/vcpu.c @@ -227,6 +227,26 @@ static void set_vcpu_mode(struct acrn_vcpu *vcpu, uint32_t cs_attr, uint64_t ia3 } } +static void init_xsave(struct acrn_vcpu *vcpu) +{ + struct cpuinfo_x86 *cpu_info = get_pcpu_info(); + struct ext_context *ectx = &(vcpu->arch.contexts[vcpu->arch.cur_context].ext_ctx); + + /* Get user state components */ + ectx->xcr0 = ((uint64_t)cpu_info->cpuid_leaves[FEAT_D_0_EDX] << 32U) + + cpu_info->cpuid_leaves[FEAT_D_0_EAX]; + + /* Get supervisor state components */ + ectx->xss = ((uint64_t)cpu_info->cpuid_leaves[FEAT_D_1_EDX] << 32U) + + cpu_info->cpuid_leaves[FEAT_D_1_ECX]; + + /* xsaves only support compacted format, so set it in xcomp_bv[63], + * keep the reset area in header area as zero. + * With this config, the first time a vcpu is scheduled in, it will + * initiate all the xsave componets */ + ectx->xs_area.xsave_hdr.hdr.xcomp_bv |= XSAVE_COMPACTED_FORMAT; + +} void set_vcpu_regs(struct acrn_vcpu *vcpu, struct acrn_vcpu_regs *vcpu_regs) { struct ext_context *ectx; @@ -446,6 +466,7 @@ int32_t create_vcpu(uint16_t pcpu_id, struct acrn_vm *vm, struct acrn_vcpu **rtn vcpu->arch.nr_sipi = 0U; vcpu->state = VCPU_INIT; + init_xsave(vcpu); reset_vcpu_regs(vcpu); (void)memset((void *)&vcpu->req, 0U, sizeof(struct io_request)); vm->hw.created_vcpus++; @@ -699,6 +720,28 @@ void resume_vcpu(struct acrn_vcpu *vcpu) } } +void save_xsave_area(struct ext_context *ectx) +{ + ectx->xcr0 = read_xcr(0); + ectx->xss = msr_read(MSR_IA32_XSS); + asm volatile("xsaves %0" + : : "m" (ectx->xs_area), + "d" (UINT32_MAX), + "a" (UINT32_MAX): + "memory"); +} + +void rstore_xsave_area(const struct ext_context *ectx) +{ + write_xcr(0, ectx->xcr0); + msr_write(MSR_IA32_XSS, ectx->xss); + asm volatile("xrstors %0" + : : "m" (ectx->xs_area), + "d" (UINT32_MAX), + "a" (UINT32_MAX): + "memory"); +} + /* TODO: * Now we have switch_out and switch_in callbacks for each thread_object, and schedule * will call them every thread switch. We can implement lazy context swtich , which @@ -715,7 +758,7 @@ static void context_switch_out(struct thread_object *prev) ectx->ia32_fmask = msr_read(MSR_IA32_FMASK); ectx->ia32_kernel_gs_base = msr_read(MSR_IA32_KERNEL_GS_BASE); - save_fxstore_guest_area(ectx); + save_xsave_area(ectx); vcpu->running = false; } @@ -732,7 +775,7 @@ static void context_switch_in(struct thread_object *next) msr_write(MSR_IA32_FMASK, ectx->ia32_fmask); msr_write(MSR_IA32_KERNEL_GS_BASE, ectx->ia32_kernel_gs_base); - rstor_fxstore_guest_area(ectx); + rstore_xsave_area(ectx); vcpu->running = true; } diff --git a/hypervisor/include/arch/x86/cpu.h b/hypervisor/include/arch/x86/cpu.h index 1735a28d5..259c759b4 100644 --- a/hypervisor/include/arch/x86/cpu.h +++ b/hypervisor/include/arch/x86/cpu.h @@ -150,7 +150,14 @@ /* Number of GPRs saved / restored for guest in VCPU structure */ #define NUM_GPRS 16U -#define GUEST_STATE_AREA_SIZE 512 + +#define XSAVE_STATE_AREA_SIZE 4096U +#define XSAVE_LEGACY_AREA_SIZE 512U +#define XSAVE_HEADER_AREA_SIZE 64U +#define XSAVE_EXTEND_AREA_SIZE (XSAVE_STATE_AREA_SIZE - \ + XSAVE_HEADER_AREA_SIZE - \ + XSAVE_LEGACY_AREA_SIZE) +#define XSAVE_COMPACTED_FORMAT (1UL << 63U) #define CPU_CONTEXT_OFFSET_RAX 0U #define CPU_CONTEXT_OFFSET_RCX 8U @@ -180,9 +187,6 @@ #define CPU_CONTEXT_OFFSET_IDTR 192U #define CPU_CONTEXT_OFFSET_LDTR 216U -/*sizes of various registers within the VCPU data structure */ -#define VMX_CPU_S_FXSAVE_GUEST_AREA_SIZE GUEST_STATE_AREA_SIZE - #ifndef ASSEMBLER #define AP_MASK (((1UL << get_pcpu_nums()) - 1UL) & ~(1UL << 0U)) @@ -344,6 +348,21 @@ struct run_context { uint64_t ia32_efer; }; +union xsave_header { + uint64_t value[XSAVE_HEADER_AREA_SIZE / sizeof(uint64_t)]; + struct { + /* bytes 7:0 */ + uint64_t xstate_bv; + /* bytes 15:8 */ + uint64_t xcomp_bv; + } hdr; +}; + +struct xsave_area { + uint64_t legacy_region[XSAVE_LEGACY_AREA_SIZE / sizeof(uint64_t)]; + union xsave_header xsave_hdr; + uint64_t extend_region[XSAVE_EXTEND_AREA_SIZE / sizeof(uint64_t)]; +} __aligned(64); /* * extended context does not save/restore during vm exit/entry, it's mainly * used in trusty world switch @@ -377,10 +396,9 @@ struct ext_context { uint64_t dr7; uint64_t tsc_offset; - /* The 512 bytes area to save the FPU/MMX/SSE states for the guest */ - uint64_t - fxstore_guest_area[VMX_CPU_S_FXSAVE_GUEST_AREA_SIZE / sizeof(uint64_t)] - __aligned(16); + struct xsave_area xs_area; + uint64_t xcr0; + uint64_t xss; }; struct cpu_context { @@ -607,6 +625,13 @@ static inline void write_xcr(int32_t reg, uint64_t val) asm volatile("xsetbv" : : "c" (reg), "a" ((uint32_t)val), "d" ((uint32_t)(val >> 32U))); } +static inline uint64_t read_xcr(int32_t reg) +{ + uint32_t xcrl, xcrh; + + asm volatile ("xgetbv ": "=a"(xcrl), "=d"(xcrh) : "c" (reg)); + return (((uint64_t)xcrh << 32U) | xcrl); +} /* * stac/clac pair is used to access guest's memory protected by SMAP, * following below flow: diff --git a/hypervisor/include/arch/x86/cpu_caps.h b/hypervisor/include/arch/x86/cpu_caps.h index 513e45538..49bd321b5 100644 --- a/hypervisor/include/arch/x86/cpu_caps.h +++ b/hypervisor/include/arch/x86/cpu_caps.h @@ -24,7 +24,12 @@ #define FEAT_8000_0001_EDX 6U /* CPUID[8000_0001].EDX */ #define FEAT_8000_0007_EDX 7U /* CPUID[8000_0007].EDX */ #define FEAT_8000_0008_EBX 8U /* CPUID[8000_0008].EBX */ -#define FEATURE_WORDS 9U +#define FEAT_D_0_EAX 9U /* CPUID[D][0].EAX */ +#define FEAT_D_0_EDX 10U /* CPUID[D][0].EDX */ +#define FEAT_D_1_EAX 11U /* CPUID[D][1].EAX */ +#define FEAT_D_1_ECX 13U /* CPUID[D][1].ECX */ +#define FEAT_D_1_EDX 14U /* CPUID[D][1].EDX */ +#define FEATURE_WORDS 15U struct cpuinfo_x86 { uint8_t family, model; diff --git a/hypervisor/include/arch/x86/cpufeatures.h b/hypervisor/include/arch/x86/cpufeatures.h index dea1efc86..3cffd4225 100644 --- a/hypervisor/include/arch/x86/cpufeatures.h +++ b/hypervisor/include/arch/x86/cpufeatures.h @@ -94,4 +94,8 @@ /* Intel-defined CPU features, CPUID level 0x80000007 (EDX)*/ #define X86_FEATURE_INVA_TSC ((FEAT_8000_0007_EDX << 5U) + 8U) +/* Intel-defined CPU features, CPUID level 0x0000000D, sub 0x1 */ +#define X86_FEATURE_COMPACTION_EXT ((FEAT_D_1_EAX << 5U) + 1U) +#define X86_FEATURE_XSAVES ((FEAT_D_1_EAX << 5U) + 3U) + #endif /* CPUFEATURES_H */ diff --git a/hypervisor/include/arch/x86/cpuid.h b/hypervisor/include/arch/x86/cpuid.h index 973b1e7c9..329b05c40 100644 --- a/hypervisor/include/arch/x86/cpuid.h +++ b/hypervisor/include/arch/x86/cpuid.h @@ -113,6 +113,7 @@ #define CPUID_TLB 2U #define CPUID_SERIALNUM 3U #define CPUID_EXTEND_FEATURE 7U +#define CPUID_XSAVE_FEATURES 0xDU #define CPUID_RSD_ALLOCATION 0x10U #define CPUID_MAX_EXTENDED_FUNCTION 0x80000000U #define CPUID_EXTEND_FUNCTION_1 0x80000001U diff --git a/hypervisor/include/arch/x86/guest/vcpu.h b/hypervisor/include/arch/x86/guest/vcpu.h index 4e726e85a..af04985d4 100644 --- a/hypervisor/include/arch/x86/guest/vcpu.h +++ b/hypervisor/include/arch/x86/guest/vcpu.h @@ -544,20 +544,12 @@ static inline bool is_pae(struct acrn_vcpu *vcpu) return (vcpu_get_cr4(vcpu) & CR4_PAE) != 0UL; } -static inline void save_fxstore_guest_area(struct ext_context *ext_ctx) -{ - asm volatile("fxsave (%0)" - : : "r" (ext_ctx->fxstore_guest_area) : "memory"); -} - -static inline void rstor_fxstore_guest_area(const struct ext_context *ext_ctx) -{ - asm volatile("fxrstor (%0)" : : "r" (ext_ctx->fxstore_guest_area)); -} - struct acrn_vcpu *get_running_vcpu(uint16_t pcpu_id); struct acrn_vcpu* get_ever_run_vcpu(uint16_t pcpu_id); +void save_xsave_area(struct ext_context *ectx); +void rstore_xsave_area(const struct ext_context *ectx); + /** * @brief create a vcpu for the target vm * diff --git a/hypervisor/include/lib/types.h b/hypervisor/include/lib/types.h index 32b789cae..9d5f16aaa 100644 --- a/hypervisor/include/lib/types.h +++ b/hypervisor/include/lib/types.h @@ -43,6 +43,10 @@ typedef _Bool bool; #define UINT64_MAX (0xffffffffffffffffUL) #endif +#ifndef UINT32_MAX +#define UINT32_MAX (0xffffffffU) +#endif + #endif /* ASSEMBLER */ #endif /* INCLUDE_TYPES_H defined */