/*- * Copyright (c) 2011 NetApp, Inc. * Copyright (c) 2017 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include static void *mmu_pml4_addr; static struct vmx_capability { uint32_t ept; uint32_t vpid; } vmx_caps; /* * If the logical processor is in VMX non-root operation and * the "enable VPID" VM-execution control is 1, the current VPID * is the value of the VPID VM-execution control field in the VMCS. * (VM entry ensures that this value is never 0000H). */ static uint16_t vmx_vpid_nr = VMX_MIN_NR_VPID; #define INVEPT_TYPE_SINGLE_CONTEXT 1UL #define INVEPT_TYPE_ALL_CONTEXTS 2UL #define VMFAIL_INVALID_EPT_VPID \ " jnc 1f\n" \ " mov $1, %0\n" /* CF: error = 1 */ \ " jmp 3f\n" \ "1: jnz 2f\n" \ " mov $2, %0\n" /* ZF: error = 2 */ \ " jmp 3f\n" \ "2: mov $0, %0\n" \ "3:" struct invept_desc { uint64_t eptp; uint64_t _res; }; static inline void local_invvpid(uint64_t type, uint16_t vpid, uint64_t gva) { int error = 0; struct { uint32_t vpid : 16; uint32_t rsvd1 : 16; uint32_t rsvd2 : 32; uint64_t gva; } operand = { vpid, 0U, 0U, gva }; asm volatile ("invvpid %1, %2\n" VMFAIL_INVALID_EPT_VPID : "=r" (error) : "m" (operand), "r" (type) : "memory"); ASSERT(error == 0, "invvpid error"); } static inline void local_invept(uint64_t type, struct invept_desc desc) { int error = 0; asm volatile ("invept %1, %2\n" VMFAIL_INVALID_EPT_VPID : "=r" (error) : "m" (desc), "r" (type) : "memory"); ASSERT(error == 0, "invept error"); } static inline bool cpu_has_vmx_ept_cap(uint32_t bit_mask) { return ((vmx_caps.ept & bit_mask) != 0U); } static inline bool cpu_has_vmx_vpid_cap(uint32_t bit_mask) { return ((vmx_caps.vpid & bit_mask) != 0U); } int check_vmx_mmu_cap(void) { uint64_t val; /* Read the MSR register of EPT and VPID Capability - SDM A.10 */ val = msr_read(MSR_IA32_VMX_EPT_VPID_CAP); vmx_caps.ept = (uint32_t) val; vmx_caps.vpid = (uint32_t) (val >> 32); if (!cpu_has_vmx_ept_cap(VMX_EPT_INVEPT)) { pr_fatal("%s, invept not supported\n", __func__); return -ENODEV; } if (!cpu_has_vmx_vpid_cap(VMX_VPID_INVVPID) || !cpu_has_vmx_vpid_cap(VMX_VPID_INVVPID_SINGLE_CONTEXT) || !cpu_has_vmx_vpid_cap(VMX_VPID_INVVPID_GLOBAL_CONTEXT)) { pr_fatal("%s, invvpid not supported\n", __func__); return -ENODEV; } if (!cpu_has_vmx_ept_cap(VMX_EPT_1GB_PAGE)) { pr_fatal("%s, ept not support 1GB large page\n", __func__); return -ENODEV; } return 0; } uint16_t allocate_vpid(void) { uint16_t vpid = atomic_xadd16(&vmx_vpid_nr, 1U); /* TODO: vpid overflow */ if (vpid >= VMX_MAX_NR_VPID) { pr_err("%s, vpid overflow\n", __func__); /* * set vmx_vpid_nr to VMX_MAX_NR_VPID to disable vpid * since next atomic_xadd16 will always large than * VMX_MAX_NR_VPID. */ vmx_vpid_nr = VMX_MAX_NR_VPID; vpid = 0U; } return vpid; } void flush_vpid_single(uint16_t vpid) { if (vpid == 0U) { return; } local_invvpid(VMX_VPID_TYPE_SINGLE_CONTEXT, vpid, 0UL); } void flush_vpid_global(void) { local_invvpid(VMX_VPID_TYPE_ALL_CONTEXT, 0U, 0UL); } void invept(struct vcpu *vcpu) { struct invept_desc desc = {0}; if (cpu_has_vmx_ept_cap(VMX_EPT_INVEPT_SINGLE_CONTEXT)) { desc.eptp = hva2hpa(vcpu->vm->arch_vm.nworld_eptp) | (3UL << 3U) | 6UL; local_invept(INVEPT_TYPE_SINGLE_CONTEXT, desc); if (vcpu->vm->sworld_control.flag.active != 0UL) { desc.eptp = hva2hpa(vcpu->vm->arch_vm.sworld_eptp) | (3UL << 3U) | 6UL; local_invept(INVEPT_TYPE_SINGLE_CONTEXT, desc); } } else if (cpu_has_vmx_ept_cap(VMX_EPT_INVEPT_GLOBAL_CONTEXT)) { local_invept(INVEPT_TYPE_ALL_CONTEXTS, desc); } else { /* Neither type of INVEPT is supported. Skip. */ } } uint64_t get_paging_pml4(void) { /* Return address to caller */ return hva2hpa(mmu_pml4_addr); } void enable_paging(uint64_t pml4_base_addr) { uint64_t tmp64 = 0UL; /* Enable Write Protect, inhibiting writing to read-only pages */ CPU_CR_READ(cr0, &tmp64); CPU_CR_WRITE(cr0, tmp64 | CR0_WP); CPU_CR_WRITE(cr3, pml4_base_addr); } void enable_smep(void) { uint64_t val64 = 0UL; /* Enable CR4.SMEP*/ CPU_CR_READ(cr4, &val64); CPU_CR_WRITE(cr4, val64 | CR4_SMEP); } void init_paging(void) { struct e820_entry *entry; uint64_t hv_hpa; uint32_t i; uint64_t attr_uc = (PAGE_TABLE | PAGE_CACHE_UC); pr_dbg("HV MMU Initialization"); /* Allocate memory for Hypervisor PML4 table */ mmu_pml4_addr = alloc_paging_struct(); init_e820(); obtain_e820_mem_info(); /* Map all memory regions to UC attribute */ mmu_add((uint64_t *)mmu_pml4_addr, e820_mem.mem_bottom, e820_mem.mem_bottom, e820_mem.mem_top - e820_mem.mem_bottom, attr_uc, PTT_PRIMARY); /* Modify WB attribute for E820_TYPE_RAM */ for (i = 0U; i < e820_entries; i++) { entry = &e820[i]; if (entry->type == E820_TYPE_RAM) { mmu_modify_or_del((uint64_t *)mmu_pml4_addr, entry->baseaddr, entry->length, PAGE_CACHE_WB, PAGE_CACHE_MASK, PTT_PRIMARY, MR_MODIFY); } } /* set the paging-structure entries' U/S flag * to supervisor-mode for hypervisor owned memroy. */ hv_hpa = get_hv_image_base(); mmu_modify_or_del((uint64_t *)mmu_pml4_addr, hv_hpa, CONFIG_RAM_SIZE, PAGE_CACHE_WB, PAGE_CACHE_MASK | PAGE_USER, PTT_PRIMARY, MR_MODIFY); /* Enable paging */ enable_paging(hva2hpa(mmu_pml4_addr)); } void *alloc_paging_struct(void) { void *ptr = NULL; /* Allocate a page from Hypervisor heap */ ptr = alloc_page(); ASSERT(ptr != NULL, "page alloc failed!"); (void)memset(ptr, 0U, CPU_PAGE_SIZE); return ptr; } void free_paging_struct(void *ptr) { if (ptr != NULL) { (void)memset(ptr, 0U, CPU_PAGE_SIZE); free(ptr); } } bool check_continuous_hpa(struct vm *vm, uint64_t gpa_arg, uint64_t size_arg) { uint64_t curr_hpa = 0UL; uint64_t next_hpa = 0UL; uint64_t gpa = gpa_arg; uint64_t size = size_arg; /* if size <= PAGE_SIZE_4K, it is continuous,no need check * if size > PAGE_SIZE_4K, need to fetch next page */ while (size > PAGE_SIZE_4K) { curr_hpa = gpa2hpa(vm, gpa); gpa += PAGE_SIZE_4K; next_hpa = gpa2hpa(vm, gpa); if (next_hpa != (curr_hpa + PAGE_SIZE_4K)) { return false; } size -= PAGE_SIZE_4K; } return true; }