From 38cd5b481d52a3a47560983d38a31e0d9004e7dc Mon Sep 17 00:00:00 2001 From: Shuo A Liu Date: Mon, 1 Feb 2021 18:15:20 +0800 Subject: [PATCH] hv: keylocker: host keylocker iwkey context switch Different vCPU may have different IWKeys. Hypervisor need do the iwkey context switch. This patch introduce a load_iwkey() function to do that. Switches the host iwkey when the switch_in vCPU satisfies: 1) keylocker feature enabled 2) Different from the current loaded one. Two opportunities to do the load_iwkey(): 1) Guest enables CR4.KL bit. 2) vCPU thread context switch. load_iwkey() costs ~600 cycles when do the load IWKey action. Tracked-On: #5695 Signed-off-by: Shuo A Liu Acked-by: Eddie Dong --- hypervisor/arch/x86/guest/vcpu.c | 31 ++++++++++++++++++++++++ hypervisor/arch/x86/guest/virtual_cr.c | 1 + hypervisor/arch/x86/guest/vmexit.c | 3 ++- hypervisor/include/arch/x86/cpu.h | 2 +- hypervisor/include/arch/x86/guest/vcpu.h | 1 + hypervisor/include/arch/x86/per_cpu.h | 1 + 6 files changed, 37 insertions(+), 2 deletions(-) diff --git a/hypervisor/arch/x86/guest/vcpu.c b/hypervisor/arch/x86/guest/vcpu.c index 8f75eda80..56412ea77 100644 --- a/hypervisor/arch/x86/guest/vcpu.c +++ b/hypervisor/arch/x86/guest/vcpu.c @@ -205,6 +205,35 @@ static void init_iwkey(struct acrn_vcpu *vcpu) vcpu->arch.IWKey.encryption_key[1] = get_random_value(); vcpu->arch.IWKey.encryption_key[2] = get_random_value(); vcpu->arch.IWKey.encryption_key[3] = get_random_value(); + /* It's always safe to clear whose_iwkey */ + per_cpu(whose_iwkey, pcpuid_from_vcpu(vcpu)) = NULL; + } +} + +void load_iwkey(struct acrn_vcpu *vcpu) +{ + uint64_t xmm_save[6]; + + /* Only load IWKey with vCPU CR4 keylocker bit enabled */ + if (pcpu_has_cap(X86_FEATURE_KEYLOCKER) && vcpu->arch.cr4_kl_enabled && + (get_cpu_var(whose_iwkey) != vcpu)) { + /* Save/restore xmm0/xmm1/xmm2 during the process */ + asm volatile ( "movdqu %%xmm0, %0\n" + "movdqu %%xmm1, %1\n" + "movdqu %%xmm2, %2\n" + "movdqu %3, %%xmm0\n" + "movdqu %4, %%xmm1\n" + "movdqu %5, %%xmm2\n" + : "=m"(xmm_save[0]), "=m"(xmm_save[2]), "=m"(xmm_save[4]) + : "m"(vcpu->arch.IWKey.integrity_key[0]), + "m"(vcpu->arch.IWKey.encryption_key[0]), + "m"(vcpu->arch.IWKey.encryption_key[2])); + asm_loadiwkey(0); + asm volatile ( "movdqu %2, %%xmm2\n" + "movdqu %1, %%xmm1\n" + "movdqu %0, %%xmm0\n" + : : "m"(xmm_save[0]), "m"(xmm_save[2]), "m"(xmm_save[4])); + get_cpu_var(whose_iwkey) = vcpu; } } @@ -826,6 +855,8 @@ static void context_switch_in(struct thread_object *next) msr_write(MSR_IA32_FMASK, ectx->ia32_fmask); msr_write(MSR_IA32_KERNEL_GS_BASE, ectx->ia32_kernel_gs_base); + load_iwkey(vcpu); + rstore_xsave_area(vcpu, ectx); } diff --git a/hypervisor/arch/x86/guest/virtual_cr.c b/hypervisor/arch/x86/guest/virtual_cr.c index 40d734203..d5e9e8dc3 100644 --- a/hypervisor/arch/x86/guest/virtual_cr.c +++ b/hypervisor/arch/x86/guest/virtual_cr.c @@ -391,6 +391,7 @@ static void vmx_write_cr4(struct acrn_vcpu *vcpu, uint64_t cr4) if (!err_found && ((cr4_changed_bits & CR4_KL) != 0UL)) { if ((cr4 & CR4_KL) != 0UL) { vcpu->arch.cr4_kl_enabled = true; + load_iwkey(vcpu); } else { vcpu->arch.cr4_kl_enabled = false; } diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c index 0344b950f..0091a9d43 100644 --- a/hypervisor/arch/x86/guest/vmexit.c +++ b/hypervisor/arch/x86/guest/vmexit.c @@ -453,7 +453,8 @@ static int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu) vcpu->arch.IWKey.integrity_key[0] = xmm[0]; vcpu->arch.IWKey.integrity_key[1] = xmm[1]; - loadiwkey(0); + asm_loadiwkey(0); + get_cpu_var(whose_iwkey) = vcpu; } return 0; diff --git a/hypervisor/include/arch/x86/cpu.h b/hypervisor/include/arch/x86/cpu.h index 19244196b..cf8de386f 100644 --- a/hypervisor/include/arch/x86/cpu.h +++ b/hypervisor/include/arch/x86/cpu.h @@ -659,7 +659,7 @@ static inline void xrstors(const struct xsave_area *region_addr, uint64_t mask) "memory"); } -static inline void loadiwkey(uint32_t eax) +static inline void asm_loadiwkey(uint32_t eax) { asm volatile(".byte 0xf3, 0x0f, 0x38, 0xdc, 0xd1;": : "a" (eax)); } diff --git a/hypervisor/include/arch/x86/guest/vcpu.h b/hypervisor/include/arch/x86/guest/vcpu.h index 45c4337a9..40108b8ff 100644 --- a/hypervisor/include/arch/x86/guest/vcpu.h +++ b/hypervisor/include/arch/x86/guest/vcpu.h @@ -602,6 +602,7 @@ struct acrn_vcpu *get_ever_run_vcpu(uint16_t pcpu_id); void save_xsave_area(struct acrn_vcpu *vcpu, struct ext_context *ectx); void rstore_xsave_area(const struct acrn_vcpu *vcpu, const struct ext_context *ectx); +void load_iwkey(struct acrn_vcpu *vcpu); /** * @brief create a vcpu for the target vm diff --git a/hypervisor/include/arch/x86/per_cpu.h b/hypervisor/include/arch/x86/per_cpu.h index e584a37fe..3abf117ab 100644 --- a/hypervisor/include/arch/x86/per_cpu.h +++ b/hypervisor/include/arch/x86/per_cpu.h @@ -59,6 +59,7 @@ struct per_cpu_region { #endif uint64_t shutdown_vm_bitmap; uint64_t tsc_suspend; + struct acrn_vcpu *whose_iwkey; /* * We maintain a per-pCPU array of vCPUs. vCPUs of a VM won't * share same pCPU. So the maximum possible # of vCPUs that can