acrn-hypervisor/hypervisor/arch/x86/mmu.c
Huihuang Shi 59771ff461 HV:treewide:fix "Reference parameter to procedure is reassigned"
Parameter's type which is pointer should not be changed in the
scope of function,assign it's value to local variable to fixed
it out.

Signed-off-by: Huihuang Shi <huihuang.shi@intel.com>
Reviewed-by: Junjie Mao <junjie.mao@intel.com>
Acked-by: Eddie Dong <eddie.dong@intel.com>
2018-07-27 12:17:12 +08:00

998 lines
26 KiB
C

/*-
* Copyright (c) 2011 NetApp, Inc.
* Copyright (c) 2017 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <hypervisor.h>
#include <reloc.h>
static void *mmu_pml4_addr;
static struct vmx_capability {
uint32_t ept;
uint32_t vpid;
} vmx_caps;
/*
* If the logical processor is in VMX non-root operation and
* the "enable VPID" VM-execution control is 1, the current VPID
* is the value of the VPID VM-execution control field in the VMCS.
* (VM entry ensures that this value is never 0000H).
*/
static uint16_t vmx_vpid_nr = VMX_MIN_NR_VPID;
#define INVEPT_TYPE_SINGLE_CONTEXT 1UL
#define INVEPT_TYPE_ALL_CONTEXTS 2UL
#define VMFAIL_INVALID_EPT_VPID \
" jnc 1f\n" \
" mov $1, %0\n" /* CF: error = 1 */ \
" jmp 3f\n" \
"1: jnz 2f\n" \
" mov $2, %0\n" /* ZF: error = 2 */ \
" jmp 3f\n" \
"2: mov $0, %0\n" \
"3:"
struct invept_desc {
uint64_t eptp;
uint64_t _res;
};
static inline void _invvpid(uint64_t type, uint16_t vpid, uint64_t gva)
{
int error = 0;
struct {
uint32_t vpid : 16;
uint32_t rsvd1 : 16;
uint32_t rsvd2 : 32;
uint64_t gva;
} operand = { vpid, 0U, 0U, gva };
asm volatile ("invvpid %1, %2\n"
VMFAIL_INVALID_EPT_VPID
: "=r" (error)
: "m" (operand), "r" (type)
: "memory");
ASSERT(error == 0, "invvpid error");
}
static inline void _invept(uint64_t type, struct invept_desc desc)
{
int error = 0;
asm volatile ("invept %1, %2\n"
VMFAIL_INVALID_EPT_VPID
: "=r" (error)
: "m" (desc), "r" (type)
: "memory");
ASSERT(error == 0, "invept error");
}
static inline void inv_tlb_one_page(void *addr)
{
asm volatile ("invlpg (%0)" : : "r" (addr) : "memory");
}
static inline bool cpu_has_vmx_ept_cap(uint32_t bit_mask)
{
return ((vmx_caps.ept & bit_mask) != 0U);
}
static inline bool cpu_has_vmx_vpid_cap(uint32_t bit_mask)
{
return ((vmx_caps.vpid & bit_mask) != 0U);
}
int check_vmx_mmu_cap(void)
{
uint64_t val;
/* Read the MSR register of EPT and VPID Capability - SDM A.10 */
val = msr_read(MSR_IA32_VMX_EPT_VPID_CAP);
vmx_caps.ept = (uint32_t) val;
vmx_caps.vpid = (uint32_t) (val >> 32);
if (!cpu_has_vmx_ept_cap(VMX_EPT_INVEPT)) {
pr_fatal("%s, invept not supported\n", __func__);
return -ENODEV;
}
if (!cpu_has_vmx_vpid_cap(VMX_VPID_INVVPID) ||
!cpu_has_vmx_vpid_cap(VMX_VPID_INVVPID_SINGLE_CONTEXT) ||
!cpu_has_vmx_vpid_cap(VMX_VPID_INVVPID_GLOBAL_CONTEXT)) {
pr_fatal("%s, invvpid not supported\n", __func__);
return -ENODEV;
}
return 0;
}
uint16_t allocate_vpid(void)
{
uint16_t vpid = atomic_xadd16(&vmx_vpid_nr, 1U);
/* TODO: vpid overflow */
if (vpid >= VMX_MAX_NR_VPID) {
pr_err("%s, vpid overflow\n", __func__);
/*
* set vmx_vpid_nr to VMX_MAX_NR_VPID to disable vpid
* since next atomic_xadd16 will always large than
* VMX_MAX_NR_VPID.
*/
vmx_vpid_nr = VMX_MAX_NR_VPID;
vpid = 0U;
}
return vpid;
}
void flush_vpid_single(uint16_t vpid)
{
if (vpid == 0U) {
return;
}
_invvpid(VMX_VPID_TYPE_SINGLE_CONTEXT, vpid, 0UL);
}
void flush_vpid_global(void)
{
_invvpid(VMX_VPID_TYPE_ALL_CONTEXT, 0U, 0UL);
}
void invept(struct vcpu *vcpu)
{
struct invept_desc desc = {0};
if (cpu_has_vmx_ept_cap(VMX_EPT_INVEPT_SINGLE_CONTEXT)) {
desc.eptp = HVA2HPA(vcpu->vm->arch_vm.nworld_eptp) |
(3UL << 3U) | 6UL;
_invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
if (vcpu->vm->sworld_control.sworld_enabled &&
vcpu->vm->arch_vm.sworld_eptp != NULL) {
desc.eptp = HVA2HPA(vcpu->vm->arch_vm.sworld_eptp)
| (3UL << 3U) | 6UL;
_invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
}
} else if (cpu_has_vmx_ept_cap(VMX_EPT_INVEPT_GLOBAL_CONTEXT)) {
_invept(INVEPT_TYPE_ALL_CONTEXTS, desc);
} else {
/* Neither type of INVEPT is supported. Skip. */
}
}
bool check_mmu_1gb_support(enum _page_table_type page_table_type)
{
bool status = false;
if (page_table_type == PTT_EPT) {
status = cpu_has_vmx_ept_cap(VMX_EPT_1GB_PAGE);
} else {
status = cpu_has_cap(X86_FEATURE_PAGE1GB);
}
return status;
}
static inline uint32_t
check_page_table_present(enum _page_table_type page_table_type,
uint64_t table_entry_arg)
{
uint64_t table_entry = table_entry_arg;
if (page_table_type == PTT_EPT) {
table_entry &= (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT |
IA32E_EPT_X_BIT);
/* RWX misconfiguration for:
* - write-only
* - write-execute
* - execute-only (if cap not support)
* no check for reserved bits
*/
if ((table_entry == IA32E_EPT_W_BIT) ||
(table_entry == (IA32E_EPT_W_BIT | IA32E_EPT_X_BIT)) ||
((table_entry == IA32E_EPT_X_BIT) &&
!cpu_has_vmx_ept_cap(VMX_EPT_EXECUTE_ONLY))) {
return PT_MISCFG_PRESENT;
}
} else {
table_entry &= (IA32E_COMM_P_BIT);
}
return (table_entry != 0U) ? PT_PRESENT : PT_NOT_PRESENT;
}
static uint32_t map_mem_region(void *vaddr, void *paddr,
void *table_base, uint64_t attr_arg, uint32_t table_level,
enum _page_table_type table_type)
{
uint64_t table_entry;
uint64_t attr = attr_arg;
uint32_t table_offset;
uint32_t mapped_size;
if (table_base == NULL || table_level >= IA32E_UNKNOWN) {
/* Shouldn't go here */
ASSERT(false, "Incorrect Arguments. Failed to map region");
return 0;
}
/* switch based on of table */
switch (table_level) {
case IA32E_PDPT:
/* Get offset to the entry in the PDPT for this address */
table_offset = IA32E_PDPTE_INDEX_CALC(vaddr);
/* PS bit must be set for these entries to be mapped */
attr |= IA32E_PDPTE_PS_BIT;
/* Set mapped size to 1 GB */
mapped_size = MEM_1G;
break;
case IA32E_PD:
/* Get offset to the entry in the PD for this address */
table_offset = IA32E_PDE_INDEX_CALC(vaddr);
/* PS bit must be set for these entries to be mapped */
attr |= IA32E_PDE_PS_BIT;
/* Set mapped size to 2 MB */
mapped_size = MEM_2M;
break;
case IA32E_PT:
/* Get offset to the entry in the PT for this address */
table_offset = IA32E_PTE_INDEX_CALC(vaddr);
/* NOTE: No PS bit in page table entries */
/* Set mapped size to 4 KB */
mapped_size = MEM_4K;
/* If not a EPT entry, see if the PAT bit is set for PDPT entry
*/
if ((table_type == PTT_HOST) && (attr & IA32E_PDPTE_PAT_BIT) != 0U) {
/* The PAT bit is set; Clear it and set the page table
* PAT bit instead
*/
attr &= (uint64_t) (~((uint64_t) IA32E_PDPTE_PAT_BIT));
attr |= IA32E_PTE_PAT_BIT;
}
break;
case IA32E_PML4:
default:
/* Set mapping size to 0 - can't map memory in PML4 */
mapped_size = 0U;
break;
}
/* Check to see if mapping should occur */
if (mapped_size != 0U) {
/* Get current table entry */
uint64_t entry = mem_read64(table_base + table_offset);
bool prev_entry_present = false;
switch(check_page_table_present(table_type, entry)) {
case PT_PRESENT:
prev_entry_present = true;
break;
case PT_NOT_PRESENT:
prev_entry_present = false;
break;
case PT_MISCFG_PRESENT:
default:
ASSERT(false, "entry misconfigurated present bits");
return 0;
}
/* No need to confirm current table entry
* isn't already present
* support map-->remap
*/
table_entry = ((table_type == PTT_EPT)
? attr
: (attr | IA32E_COMM_P_BIT));
table_entry |= (uint64_t)paddr;
/* Write the table entry to map this memory */
mem_write64(table_base + table_offset, table_entry);
/* Invalidate TLB and page-structure cache,
* if it is the first mapping no need to invalidate TLB
*/
if ((table_type == PTT_HOST) && prev_entry_present) {
/* currently, all native mmu update is done at BSP,
* the assumption is that after AP start, there
* is no mmu update - so we can avoid shootdown issue
* for MP system.
* For invlpg after AP start, just panic here.
*
* TODO: add shootdown APs operation if MMU will be
* modified after AP start in the future.
*/
if ((phys_cpu_num != 0U) &&
((pcpu_active_bitmap &
((1UL << phys_cpu_num) - 1))
!= (1UL << BOOT_CPU_ID))) {
panic("need shootdown for invlpg");
}
inv_tlb_one_page(vaddr);
}
}
/* Return mapped size to caller */
return mapped_size;
}
static uint32_t fetch_page_table_offset(void *addr, uint32_t table_level)
{
uint32_t table_offset;
/* Switch based on level of table */
switch (table_level) {
case IA32E_PML4:
/* Get offset to the entry in the PML4
* for this address
*/
table_offset = IA32E_PML4E_INDEX_CALC(addr);
break;
case IA32E_PDPT:
/* Get offset to the entry in the PDPT
* for this address
*/
table_offset = IA32E_PDPTE_INDEX_CALC(addr);
break;
case IA32E_PD:
/* Get offset to the entry in the PD
* for this address
*/
table_offset = IA32E_PDE_INDEX_CALC(addr);
break;
case IA32E_PT:
table_offset = IA32E_PTE_INDEX_CALC(addr);
break;
default:
/* all callers should already make sure it will not come
* to here
*/
panic("Wrong page table level");
break;
}
return table_offset;
}
static int get_table_entry(void *addr, void *table_base,
uint32_t table_level, uint64_t *table_entry)
{
uint32_t table_offset;
if (table_base == NULL || table_level >= IA32E_UNKNOWN) {
ASSERT(false, "Incorrect Arguments");
return -EINVAL;
}
table_offset = fetch_page_table_offset(addr, table_level);
/* Read the table entry */
*table_entry = mem_read64(table_base + table_offset);
return 0;
}
static void *walk_paging_struct(void *addr, void *table_base,
uint32_t table_level, struct map_params *map_params,
uint64_t attr)
{
uint32_t table_offset;
uint64_t table_entry;
uint64_t entry_present;
/* if table_level == IA32E_PT Just return the same address
* can't walk down any further
*/
void *sub_table_addr = (table_level == IA32E_PT) ? table_base : NULL;
if (table_base == NULL || table_level >= IA32E_UNKNOWN
|| map_params == NULL) {
ASSERT(false, "Incorrect Arguments");
return NULL;
}
table_offset = fetch_page_table_offset(addr, table_level);
/* See if we can skip the rest */
if (sub_table_addr != table_base) {
/* Read the table entry */
table_entry = mem_read64(table_base + table_offset);
/* Check if EPT entry being created */
if (map_params->page_table_type == PTT_EPT) {
/* Set table present bits to any of the
* read/write/execute bits
*/
entry_present = (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT |
IA32E_EPT_X_BIT);
} else {
/* Set table preset bits to P bit or r/w bit */
entry_present = IA32E_COMM_P_BIT;
}
/* Determine if a valid entry exists */
if ((table_entry & entry_present) == 0UL) {
/* No entry present - need to allocate a new table */
sub_table_addr = alloc_paging_struct();
/* Check to ensure memory available for this structure*/
if (sub_table_addr == NULL) {
/* Error: Unable to find table memory necessary
* to map memory
*/
ASSERT(false, "Fail to alloc table memory "
"for map memory");
return NULL;
}
/* Write entry to current table to reference the new
* sub-table
*/
if (map_params->page_table_type == PTT_HOST) {
entry_present |= attr;
}
mem_write64(table_base + table_offset,
HVA2HPA(sub_table_addr) | entry_present);
} else {
/* Get address of the sub-table */
sub_table_addr = HPA2HVA(table_entry & IA32E_REF_MASK);
}
}
/* Return the next table in the walk */
return sub_table_addr;
}
uint64_t get_paging_pml4(void)
{
/* Return address to caller */
return HVA2HPA(mmu_pml4_addr);
}
void enable_paging(uint64_t pml4_base_addr)
{
uint64_t tmp64 = 0UL;
/* Enable Write Protect, inhibiting writing to read-only pages */
CPU_CR_READ(cr0, &tmp64);
CPU_CR_WRITE(cr0, tmp64 | CR0_WP);
CPU_CR_WRITE(cr3, pml4_base_addr);
}
void enable_smep(void)
{
uint64_t val64 = 0UL;
/* Enable CR4.SMEP*/
CPU_CR_READ(cr4, &val64);
CPU_CR_WRITE(cr4, val64 | CR4_SMEP);
}
void init_paging(void)
{
struct e820_entry *entry;
uint64_t hv_hpa;
uint32_t i;
uint64_t attr_uc = (PAGE_TABLE | PAGE_CACHE_UC);
pr_dbg("HV MMU Initialization");
/* Allocate memory for Hypervisor PML4 table */
mmu_pml4_addr = alloc_paging_struct();
init_e820();
obtain_e820_mem_info();
/* Map all memory regions to UC attribute */
mmu_add((uint64_t *)mmu_pml4_addr, e820_mem.mem_bottom,
e820_mem.mem_bottom, e820_mem.mem_top - e820_mem.mem_bottom,
attr_uc, PTT_HOST);
/* Modify WB attribute for E820_TYPE_RAM */
for (i = 0U; i < e820_entries; i++) {
entry = &e820[i];
if (entry->type == E820_TYPE_RAM) {
mmu_modify_or_del((uint64_t *)mmu_pml4_addr,
entry->baseaddr, entry->length,
PAGE_CACHE_WB, PAGE_CACHE_MASK,
PTT_HOST, MR_MODIFY);
}
}
/* set the paging-structure entries' U/S flag
* to supervisor-mode for hypervisor owned memroy.
*/
hv_hpa = get_hv_image_base();
mmu_modify_or_del((uint64_t *)mmu_pml4_addr, hv_hpa, CONFIG_RAM_SIZE,
PAGE_CACHE_WB, PAGE_CACHE_MASK | PAGE_USER,
PTT_HOST, MR_MODIFY);
/* Enable paging */
enable_paging(HVA2HPA(mmu_pml4_addr));
}
void *alloc_paging_struct(void)
{
void *ptr = NULL;
/* Allocate a page from Hypervisor heap */
ptr = alloc_page();
ASSERT(ptr != NULL, "page alloc failed!");
(void)memset(ptr, 0, CPU_PAGE_SIZE);
return ptr;
}
void free_paging_struct(void *ptr)
{
if (ptr != NULL) {
(void)memset(ptr, 0, CPU_PAGE_SIZE);
free(ptr);
}
}
bool check_continuous_hpa(struct vm *vm, uint64_t gpa_arg, uint64_t size_arg)
{
uint64_t curr_hpa = 0UL;
uint64_t next_hpa = 0UL;
uint64_t gpa = gpa_arg;
uint64_t size = size_arg;
/* if size <= PAGE_SIZE_4K, it is continuous,no need check
* if size > PAGE_SIZE_4K, need to fetch next page
*/
while (size > PAGE_SIZE_4K) {
curr_hpa = gpa2hpa(vm, gpa);
gpa += PAGE_SIZE_4K;
next_hpa = gpa2hpa(vm, gpa);
if (next_hpa != (curr_hpa + PAGE_SIZE_4K)) {
return false;
}
size -= PAGE_SIZE_4K;
}
return true;
}
int obtain_last_page_table_entry(struct map_params *map_params,
struct entry_params *entry, void *addr, bool direct)
{
uint64_t table_entry;
uint32_t entry_present = 0U;
int ret = 0;
/* Obtain the PML4 address */
void *table_addr = direct ? (map_params->pml4_base)
: (map_params->pml4_inverted);
/* Obtain page table entry from PML4 table*/
ret = get_table_entry(addr, table_addr, IA32E_PML4, &table_entry);
if (ret < 0) {
return ret;
}
entry_present = check_page_table_present(map_params->page_table_type,
table_entry);
if (entry_present == PT_MISCFG_PRESENT) {
pr_err("Present bits misconfigurated");
return -EINVAL;
} else if (entry_present == PT_NOT_PRESENT) {
/* PML4E not present, return PML4 base address */
entry->entry_level = IA32E_PML4;
entry->entry_base = table_addr;
entry->entry_present = PT_NOT_PRESENT;
entry->page_size =
check_mmu_1gb_support(map_params->page_table_type) ?
(PAGE_SIZE_1G) : (PAGE_SIZE_2M);
entry->entry_off = fetch_page_table_offset(addr, IA32E_PML4);
entry->entry_val = table_entry;
return 0;
}
/* Obtain page table entry from PDPT table*/
table_addr = HPA2HVA(table_entry & IA32E_REF_MASK);
ret = get_table_entry(addr, table_addr, IA32E_PDPT, &table_entry);
if (ret < 0) {
return ret;
}
entry_present = check_page_table_present(map_params->page_table_type,
table_entry);
if (entry_present == PT_MISCFG_PRESENT) {
pr_err("Present bits misconfigurated");
return -EINVAL;
} else if (entry_present == PT_NOT_PRESENT) {
/* PDPTE not present, return PDPT base address */
entry->entry_level = IA32E_PDPT;
entry->entry_base = table_addr;
entry->entry_present = PT_NOT_PRESENT;
entry->page_size =
check_mmu_1gb_support(map_params->page_table_type) ?
(PAGE_SIZE_1G) : (PAGE_SIZE_2M);
entry->entry_off = fetch_page_table_offset(addr, IA32E_PDPT);
entry->entry_val = table_entry;
return 0;
}
if ((table_entry & IA32E_PDPTE_PS_BIT) != 0U) {
/* 1GB page size, return the base addr of the pg entry*/
entry->entry_level = IA32E_PDPT;
entry->entry_base = table_addr;
entry->page_size =
check_mmu_1gb_support(map_params->page_table_type) ?
(PAGE_SIZE_1G) : (PAGE_SIZE_2M);
entry->entry_present = PT_PRESENT;
entry->entry_off = fetch_page_table_offset(addr, IA32E_PDPT);
entry->entry_val = table_entry;
return 0;
}
/* Obtain page table entry from PD table*/
table_addr = HPA2HVA(table_entry & IA32E_REF_MASK);
ret = get_table_entry(addr, table_addr, IA32E_PD, &table_entry);
if (ret < 0) {
return ret;
}
entry_present = check_page_table_present(map_params->page_table_type,
table_entry);
if (entry_present == PT_MISCFG_PRESENT) {
pr_err("Present bits misconfigurated");
return -EINVAL;
} else if (entry_present == PT_NOT_PRESENT) {
/* PDE not present, return PDE base address */
entry->entry_level = IA32E_PD;
entry->entry_base = table_addr;
entry->entry_present = PT_NOT_PRESENT;
entry->page_size = PAGE_SIZE_2M;
entry->entry_off = fetch_page_table_offset(addr, IA32E_PD);
entry->entry_val = table_entry;
return 0;
}
if ((table_entry & IA32E_PDE_PS_BIT) != 0U) {
/* 2MB page size, return the base addr of the pg entry*/
entry->entry_level = IA32E_PD;
entry->entry_base = table_addr;
entry->entry_present = PT_PRESENT;
entry->page_size = PAGE_SIZE_2M;
entry->entry_off = fetch_page_table_offset(addr, IA32E_PD);
entry->entry_val = table_entry;
return 0;
}
/* Obtain page table entry from PT table*/
table_addr = HPA2HVA(table_entry & IA32E_REF_MASK);
ret = get_table_entry(addr, table_addr, IA32E_PT, &table_entry);
if (ret < 0) {
return ret;
}
entry_present = check_page_table_present(map_params->page_table_type,
table_entry);
if (entry_present == PT_MISCFG_PRESENT) {
pr_err("Present bits misconfigurated");
return -EINVAL;
}
entry->entry_present = ((entry_present == PT_PRESENT)
? (PT_PRESENT):(PT_NOT_PRESENT));
entry->entry_level = IA32E_PT;
entry->entry_base = table_addr;
entry->page_size = PAGE_SIZE_4K;
entry->entry_off = fetch_page_table_offset(addr, IA32E_PT);
entry->entry_val = table_entry;
return 0;
}
static uint64_t update_page_table_entry(struct map_params *map_params,
void *paddr, void *vaddr, uint64_t size, uint64_t attr,
bool direct)
{
uint64_t remaining_size = size;
uint32_t adjustment_size;
int table_type = map_params->page_table_type;
/* Obtain the PML4 address */
void *table_addr = direct ? (map_params->pml4_base)
: (map_params->pml4_inverted);
/* Walk from the PML4 table to the PDPT table */
table_addr = walk_paging_struct(vaddr, table_addr, IA32E_PML4,
map_params, attr);
if (table_addr == NULL) {
return 0;
}
if ((remaining_size >= MEM_1G)
&& (MEM_ALIGNED_CHECK(vaddr, MEM_1G))
&& (MEM_ALIGNED_CHECK(paddr, MEM_1G))
&& check_mmu_1gb_support(map_params->page_table_type)) {
/* Map this 1 GByte memory region */
adjustment_size = map_mem_region(vaddr, paddr,
table_addr, attr, IA32E_PDPT,
table_type);
} else if ((remaining_size >= MEM_2M)
&& (MEM_ALIGNED_CHECK(vaddr, MEM_2M))
&& (MEM_ALIGNED_CHECK(paddr, MEM_2M))) {
/* Walk from the PDPT table to the PD table */
table_addr = walk_paging_struct(vaddr, table_addr,
IA32E_PDPT, map_params, attr);
if (table_addr == NULL) {
return 0;
}
/* Map this 2 MByte memory region */
adjustment_size = map_mem_region(vaddr, paddr,
table_addr, attr, IA32E_PD, table_type);
} else {
/* Walk from the PDPT table to the PD table */
table_addr = walk_paging_struct(vaddr,
table_addr, IA32E_PDPT, map_params, attr);
if (table_addr == NULL) {
return 0;
}
/* Walk from the PD table to the page table */
table_addr = walk_paging_struct(vaddr,
table_addr, IA32E_PD, map_params, attr);
if (table_addr == NULL) {
return 0;
}
/* Map this 4 KByte memory region */
adjustment_size = map_mem_region(vaddr, paddr,
table_addr, attr, IA32E_PT,
table_type);
}
return adjustment_size;
}
static uint64_t break_page_table(struct map_params *map_params, void *paddr,
void *vaddr, uint64_t page_size, bool direct)
{
uint32_t i = 0U;
uint64_t pa;
uint64_t attr = 0x0UL;
uint64_t next_page_size = 0x0UL;
void *sub_tab_addr = NULL;
struct entry_params entry;
switch (page_size) {
/* Breaking 1GB page to 2MB page*/
case PAGE_SIZE_1G:
next_page_size = PAGE_SIZE_2M;
attr |= IA32E_PDE_PS_BIT;
pr_info("%s, Breaking 1GB -->2MB vaddr=0x%llx",
__func__, vaddr);
break;
/* Breaking 2MB page to 4KB page*/
case PAGE_SIZE_2M:
next_page_size = PAGE_SIZE_4K;
pr_info("%s, Breaking 2MB -->4KB vaddr=0x%llx",
__func__, vaddr);
break;
/* 4KB page, No action*/
case PAGE_SIZE_4K:
default:
next_page_size = PAGE_SIZE_4K;
pr_info("%s, Breaking 4KB no action vaddr=0x%llx",
__func__, vaddr);
break;
}
if (page_size != next_page_size) {
if (obtain_last_page_table_entry(map_params, &entry, vaddr,
direct) < 0) {
pr_err("Fail to obtain last page table entry");
return 0;
}
/* New entry present - need to allocate a new table */
sub_tab_addr = alloc_paging_struct();
/* Check to ensure memory available for this structure */
if (sub_tab_addr == NULL) {
/* Error:
* Unable to find table memory necessary to map memory
*/
pr_err("Fail to find table memory for map memory");
ASSERT(false, "fail to alloc table memory for map memory");
return 0;
}
/* the physical address maybe be not aligned of
* current page size, obtain the starting physical address
* aligned of current page size
*/
pa = ((uint64_t)paddr) & ~(page_size - 1);
if (map_params->page_table_type == PTT_EPT) {
/* Keep original attribute(here &0x3f)
* bit 0(R) bit1(W) bit2(X) bit3~5 MT
*/
attr |= (entry.entry_val & 0x3fUL);
} else {
/* Keep original attribute(here &0x7f) */
attr |= (entry.entry_val & 0x7fUL);
}
/* write all entries and keep original attr*/
for (i = 0U; i < IA32E_NUM_ENTRIES; i++) {
mem_write64(sub_tab_addr + (i * IA32E_COMM_ENTRY_SIZE),
(attr | (pa + (i * next_page_size))));
}
if (map_params->page_table_type == PTT_EPT) {
/* Write the table entry to map this memory,
* SDM chapter28 figure 28-1
* bit 0(R) bit1(W) bit2(X) bit3~5 MUST be reserved
* (here &0x07)
*/
mem_write64(entry.entry_base + entry.entry_off,
(entry.entry_val & 0x07UL) |
HVA2HPA(sub_tab_addr));
} else {
/* Write the table entry to map this memory,
* SDM chapter4 figure 4-11
* bit0(P) bit1(RW) bit2(U/S) bit3(PWT) bit4(PCD)
* bit5(A) bit6(D or Ignore)
*/
mem_write64(entry.entry_base + entry.entry_off,
(entry.entry_val & 0x7fUL) |
HVA2HPA(sub_tab_addr));
}
}
return next_page_size;
}
static int modify_paging(struct map_params *map_params, void *paddr_arg,
void *vaddr_arg, uint64_t size, uint32_t flags, bool direct)
{
void *vaddr = vaddr_arg;
void *paddr = paddr_arg;
int64_t remaining_size;
uint64_t adjust_size;
uint64_t attr = flags;
struct entry_params entry;
uint64_t page_size;
uint64_t vaddr_end = ((uint64_t)vaddr) + size;
/* if the address is not PAGE aligned, will drop
* the unaligned part
*/
paddr = (void *)ROUND_PAGE_UP((uint64_t)paddr);
vaddr = (void *)ROUND_PAGE_UP((uint64_t)vaddr);
vaddr_end = ROUND_PAGE_DOWN(vaddr_end);
remaining_size = vaddr_end - (uint64_t)vaddr;
if (map_params == NULL) {
pr_err("%s: vaddr=0x%llx size=0x%llx",
__func__, vaddr, size);
ASSERT(false, "Incorrect Arguments");
return -EINVAL;
}
/* Check ept misconfigurations,
* rwx misconfiguration in the following conditions:
* - write-only
* - write-execute
* - execute-only(if capability not support)
* here attr & 0x7, rwx bit0:2
*/
ASSERT(!((map_params->page_table_type == PTT_EPT) &&
(((attr & 0x7UL) == IA32E_EPT_W_BIT) ||
((attr & 0x7UL) == (IA32E_EPT_W_BIT | IA32E_EPT_X_BIT)) ||
(((attr & 0x7UL) == IA32E_EPT_X_BIT) &&
!cpu_has_vmx_ept_cap(VMX_EPT_EXECUTE_ONLY)))),
"incorrect memory attribute set!\n");
/* Loop until the entire block of memory is appropriately
* MAP/UNMAP/MODIFY
*/
while (remaining_size > 0) {
if (obtain_last_page_table_entry(map_params, &entry, vaddr,
direct) < 0) {
return -EINVAL;
}
page_size = entry.page_size;
/* if the address is NOT aligned of current page size,
* or required memory size < page size
* need to break page firstly
*/
if (entry.entry_present == PT_PRESENT) {
/* Maybe need to recursive breaking in this case
* e.g. 1GB->2MB->4KB
*/
while ((uint64_t)remaining_size < page_size
|| (!MEM_ALIGNED_CHECK(vaddr, page_size))
|| (!MEM_ALIGNED_CHECK(paddr, page_size))) {
/* The breaking function return the page size
* of next level page table
*/
page_size = break_page_table(map_params,
paddr, vaddr, page_size, direct);
if (page_size == 0UL) {
return -EINVAL;
}
}
} else {
page_size = ((uint64_t)remaining_size < page_size)
? ((uint64_t)remaining_size) : (page_size);
}
/* The function return the memory size that one entry can map */
adjust_size = update_page_table_entry(map_params, paddr, vaddr,
page_size, attr, direct);
if (adjust_size == 0UL) {
return -EINVAL;
}
vaddr += adjust_size;
paddr += adjust_size;
remaining_size -= adjust_size;
}
return 0;
}
int map_mem(struct map_params *map_params, void *paddr, void *vaddr,
uint64_t size, uint32_t flags)
{
int ret = 0;
/* used for MMU and EPT*/
ret = modify_paging(map_params, paddr, vaddr, size, flags, true);
if (ret < 0) {
return ret;
}
/* only for EPT */
if (map_params->page_table_type == PTT_EPT) {
ret = modify_paging(map_params, vaddr, paddr, size, flags,
false);
}
return ret;
}