acrn-hypervisor/hypervisor/arch/x86/pagetable.c
Li Fei1 e5ae37eb69 hv: mmu: minor fix about add_pte
In Commit 127c73c3, we remove the strict check for adding page table mapping. However,
we just replace the ASSERT of pr_fatal in add_pte. This is not enough. We still add
the virtual address by 4K if the page table mapping is exist and check the virtual
address is over the virtual address region for this mapping. Otherwise, The complain
will continue for 512 times at most.

Tracked-On: #3475
Signed-off-by: Li Fei1 <fei1.li@intel.com>
2020-03-09 10:03:01 +08:00

448 lines
13 KiB
C

/*
* Copyright (C) 2018 Intel Corporation. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <types.h>
#include <util.h>
#include <acrn_hv_defs.h>
#include <page.h>
#include <mmu.h>
#include <logmsg.h>
#define DBG_LEVEL_MMU 6U
/*
* Split a large page table into next level page table.
*
* @pre: level could only IA32E_PDPT or IA32E_PD
*/
static void split_large_page(uint64_t *pte, enum _page_table_level level,
uint64_t vaddr, const struct memory_ops *mem_ops)
{
uint64_t *pbase;
uint64_t ref_paddr, paddr, paddrinc;
uint64_t i, ref_prot;
switch (level) {
case IA32E_PDPT:
ref_paddr = (*pte) & PDPTE_PFN_MASK;
paddrinc = PDE_SIZE;
ref_prot = (*pte) & ~PDPTE_PFN_MASK;
pbase = (uint64_t *)mem_ops->get_pd_page(mem_ops->info, vaddr);
break;
default: /* IA32E_PD */
ref_paddr = (*pte) & PDE_PFN_MASK;
paddrinc = PTE_SIZE;
ref_prot = (*pte) & ~PDE_PFN_MASK;
ref_prot &= ~PAGE_PSE;
mem_ops->recover_exe_right(&ref_prot);
pbase = (uint64_t *)mem_ops->get_pt_page(mem_ops->info, vaddr);
break;
}
dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, pbase: 0x%lx\n", __func__, ref_paddr, pbase);
paddr = ref_paddr;
for (i = 0UL; i < PTRS_PER_PTE; i++) {
set_pgentry(pbase + i, paddr | ref_prot, mem_ops);
paddr += paddrinc;
}
ref_prot = mem_ops->get_default_access_right();
set_pgentry(pte, hva2hpa((void *)pbase) | ref_prot, mem_ops);
/* TODO: flush the TLB */
}
static inline void local_modify_or_del_pte(uint64_t *pte,
uint64_t prot_set, uint64_t prot_clr, uint32_t type, const struct memory_ops *mem_ops)
{
if (type == MR_MODIFY) {
uint64_t new_pte = *pte;
new_pte &= ~prot_clr;
new_pte |= prot_set;
set_pgentry(pte, new_pte, mem_ops);
} else {
sanitize_pte_entry(pte, mem_ops);
}
}
/*
* pgentry may means pml4e/pdpte/pde
*/
static inline void construct_pgentry(uint64_t *pde, void *pd_page, uint64_t prot, const struct memory_ops *mem_ops)
{
sanitize_pte((uint64_t *)pd_page, mem_ops);
set_pgentry(pde, hva2hpa(pd_page) | prot, mem_ops);
}
/*
* In PT level,
* type: MR_MODIFY
* modify [vaddr_start, vaddr_end) memory type or page access right.
* type: MR_DEL
* delete [vaddr_start, vaddr_end) MT PT mapping
*/
static void modify_or_del_pte(const uint64_t *pde, uint64_t vaddr_start, uint64_t vaddr_end,
uint64_t prot_set, uint64_t prot_clr, const struct memory_ops *mem_ops, uint32_t type)
{
uint64_t *pt_page = pde_page_vaddr(*pde);
uint64_t vaddr = vaddr_start;
uint64_t index = pte_index(vaddr);
dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: [0x%lx - 0x%lx]\n", __func__, vaddr, vaddr_end);
for (; index < PTRS_PER_PTE; index++) {
uint64_t *pte = pt_page + index;
if ((mem_ops->pgentry_present(*pte) == 0UL)) {
/*suppress warning message for low memory (< 1MBytes),as service VM
* will update MTTR attributes for this region by default whether it
* is present or not.
*/
if ((type == MR_MODIFY) && (vaddr >= MEM_1M)) {
pr_warn("%s, vaddr: 0x%lx pte is not present.\n", __func__, vaddr);
}
} else {
local_modify_or_del_pte(pte, prot_set, prot_clr, type, mem_ops);
}
vaddr += PTE_SIZE;
if (vaddr >= vaddr_end) {
break;
}
}
}
/*
* In PD level,
* type: MR_MODIFY
* modify [vaddr_start, vaddr_end) memory type or page access right.
* type: MR_DEL
* delete [vaddr_start, vaddr_end) MT PT mapping
*/
static void modify_or_del_pde(const uint64_t *pdpte, uint64_t vaddr_start, uint64_t vaddr_end,
uint64_t prot_set, uint64_t prot_clr, const struct memory_ops *mem_ops, uint32_t type)
{
uint64_t *pd_page = pdpte_page_vaddr(*pdpte);
uint64_t vaddr = vaddr_start;
uint64_t index = pde_index(vaddr);
dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: [0x%lx - 0x%lx]\n", __func__, vaddr, vaddr_end);
for (; index < PTRS_PER_PDE; index++) {
uint64_t *pde = pd_page + index;
uint64_t vaddr_next = (vaddr & PDE_MASK) + PDE_SIZE;
if (mem_ops->pgentry_present(*pde) == 0UL) {
if (type == MR_MODIFY) {
pr_warn("%s, addr: 0x%lx pde is not present.\n", __func__, vaddr);
}
} else {
if (pde_large(*pde) != 0UL) {
if ((vaddr_next > vaddr_end) || (!mem_aligned_check(vaddr, PDE_SIZE))) {
split_large_page(pde, IA32E_PD, vaddr, mem_ops);
} else {
local_modify_or_del_pte(pde, prot_set, prot_clr, type, mem_ops);
if (vaddr_next < vaddr_end) {
vaddr = vaddr_next;
continue;
}
break; /* done */
}
}
modify_or_del_pte(pde, vaddr, vaddr_end, prot_set, prot_clr, mem_ops, type);
}
if (vaddr_next >= vaddr_end) {
break; /* done */
}
vaddr = vaddr_next;
}
}
/*
* In PDPT level,
* type: MR_MODIFY
* modify [vaddr_start, vaddr_end) memory type or page access right.
* type: MR_DEL
* delete [vaddr_start, vaddr_end) MT PT mapping
*/
static void modify_or_del_pdpte(const uint64_t *pml4e, uint64_t vaddr_start, uint64_t vaddr_end,
uint64_t prot_set, uint64_t prot_clr, const struct memory_ops *mem_ops, uint32_t type)
{
uint64_t *pdpt_page = pml4e_page_vaddr(*pml4e);
uint64_t vaddr = vaddr_start;
uint64_t index = pdpte_index(vaddr);
dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: [0x%lx - 0x%lx]\n", __func__, vaddr, vaddr_end);
for (; index < PTRS_PER_PDPTE; index++) {
uint64_t *pdpte = pdpt_page + index;
uint64_t vaddr_next = (vaddr & PDPTE_MASK) + PDPTE_SIZE;
if (mem_ops->pgentry_present(*pdpte) == 0UL) {
if (type == MR_MODIFY) {
pr_warn("%s, vaddr: 0x%lx pdpte is not present.\n", __func__, vaddr);
}
} else {
if (pdpte_large(*pdpte) != 0UL) {
if ((vaddr_next > vaddr_end) ||
(!mem_aligned_check(vaddr, PDPTE_SIZE))) {
split_large_page(pdpte, IA32E_PDPT, vaddr, mem_ops);
} else {
local_modify_or_del_pte(pdpte, prot_set, prot_clr, type, mem_ops);
if (vaddr_next < vaddr_end) {
vaddr = vaddr_next;
continue;
}
break; /* done */
}
}
modify_or_del_pde(pdpte, vaddr, vaddr_end, prot_set, prot_clr, mem_ops, type);
}
if (vaddr_next >= vaddr_end) {
break; /* done */
}
vaddr = vaddr_next;
}
}
/*
* type: MR_MODIFY
* modify [vaddr, vaddr + size ) memory type or page access right.
* prot_clr - memory type or page access right want to be clear
* prot_set - memory type or page access right want to be set
* @pre: the prot_set and prot_clr should set before call this function.
* If you just want to modify access rights, you can just set the prot_clr
* to what you want to set, prot_clr to what you want to clear. But if you
* want to modify the MT, you should set the prot_set to what MT you want
* to set, prot_clr to the MT mask.
* type: MR_DEL
* delete [vaddr_base, vaddr_base + size ) memory region page table mapping.
*/
void mmu_modify_or_del(uint64_t *pml4_page, uint64_t vaddr_base, uint64_t size,
uint64_t prot_set, uint64_t prot_clr, const struct memory_ops *mem_ops, uint32_t type)
{
uint64_t vaddr = round_page_up(vaddr_base);
uint64_t vaddr_next, vaddr_end;
uint64_t *pml4e;
vaddr_end = vaddr + round_page_down(size);
dev_dbg(DBG_LEVEL_MMU, "%s, vaddr: 0x%lx, size: 0x%lx\n",
__func__, vaddr, size);
while (vaddr < vaddr_end) {
vaddr_next = (vaddr & PML4E_MASK) + PML4E_SIZE;
pml4e = pml4e_offset(pml4_page, vaddr);
if ((mem_ops->pgentry_present(*pml4e) == 0UL) && (type == MR_MODIFY)) {
ASSERT(false, "invalid op, pml4e not present");
} else {
modify_or_del_pdpte(pml4e, vaddr, vaddr_end, prot_set, prot_clr, mem_ops, type);
vaddr = vaddr_next;
}
}
}
/*
* In PT level,
* add [vaddr_start, vaddr_end) to [paddr_base, ...) MT PT mapping
*/
static void add_pte(const uint64_t *pde, uint64_t paddr_start, uint64_t vaddr_start, uint64_t vaddr_end,
uint64_t prot, const struct memory_ops *mem_ops)
{
uint64_t *pt_page = pde_page_vaddr(*pde);
uint64_t vaddr = vaddr_start;
uint64_t paddr = paddr_start;
uint64_t index = pte_index(vaddr);
dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, vaddr: [0x%lx - 0x%lx]\n",
__func__, paddr, vaddr_start, vaddr_end);
for (; index < PTRS_PER_PTE; index++) {
uint64_t *pte = pt_page + index;
if (mem_ops->pgentry_present(*pte) != 0UL) {
pr_fatal("%s, pte 0x%lx is already present!\n", __func__, vaddr);
} else {
set_pgentry(pte, paddr | prot, mem_ops);
}
paddr += PTE_SIZE;
vaddr += PTE_SIZE;
if (vaddr >= vaddr_end) {
break; /* done */
}
}
}
/*
* In PD level,
* add [vaddr_start, vaddr_end) to [paddr_base, ...) MT PT mapping
*/
static void add_pde(const uint64_t *pdpte, uint64_t paddr_start, uint64_t vaddr_start, uint64_t vaddr_end,
uint64_t prot, const struct memory_ops *mem_ops)
{
uint64_t *pd_page = pdpte_page_vaddr(*pdpte);
uint64_t vaddr = vaddr_start;
uint64_t paddr = paddr_start;
uint64_t index = pde_index(vaddr);
dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, vaddr: [0x%lx - 0x%lx]\n",
__func__, paddr, vaddr, vaddr_end);
for (; index < PTRS_PER_PDE; index++) {
uint64_t *pde = pd_page + index;
uint64_t vaddr_next = (vaddr & PDE_MASK) + PDE_SIZE;
if (pde_large(*pde) != 0UL) {
pr_fatal("%s, pde 0x%lx is already present!\n", __func__, vaddr);
} else {
if (mem_ops->pgentry_present(*pde) == 0UL) {
if (mem_ops->large_page_enabled &&
mem_aligned_check(paddr, PDE_SIZE) &&
mem_aligned_check(vaddr, PDE_SIZE) &&
(vaddr_next <= vaddr_end)) {
mem_ops->tweak_exe_right(&prot);
set_pgentry(pde, paddr | (prot | PAGE_PSE), mem_ops);
if (vaddr_next < vaddr_end) {
paddr += (vaddr_next - vaddr);
vaddr = vaddr_next;
continue;
}
break; /* done */
} else {
void *pt_page = mem_ops->get_pt_page(mem_ops->info, vaddr);
construct_pgentry(pde, pt_page, mem_ops->get_default_access_right(), mem_ops);
}
}
add_pte(pde, paddr, vaddr, vaddr_end, prot, mem_ops);
}
if (vaddr_next >= vaddr_end) {
break; /* done */
}
paddr += (vaddr_next - vaddr);
vaddr = vaddr_next;
}
}
/*
* In PDPT level,
* add [vaddr_start, vaddr_end) to [paddr_base, ...) MT PT mapping
*/
static void add_pdpte(const uint64_t *pml4e, uint64_t paddr_start, uint64_t vaddr_start, uint64_t vaddr_end,
uint64_t prot, const struct memory_ops *mem_ops)
{
uint64_t *pdpt_page = pml4e_page_vaddr(*pml4e);
uint64_t vaddr = vaddr_start;
uint64_t paddr = paddr_start;
uint64_t index = pdpte_index(vaddr);
dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, vaddr: [0x%lx - 0x%lx]\n", __func__, paddr, vaddr, vaddr_end);
for (; index < PTRS_PER_PDPTE; index++) {
uint64_t *pdpte = pdpt_page + index;
uint64_t vaddr_next = (vaddr & PDPTE_MASK) + PDPTE_SIZE;
if (pdpte_large(*pdpte) != 0UL) {
pr_fatal("%s, pdpte 0x%lx is already present!\n", __func__, vaddr);
} else {
if (mem_ops->pgentry_present(*pdpte) == 0UL) {
if (mem_ops->large_page_enabled &&
mem_aligned_check(paddr, PDPTE_SIZE) &&
mem_aligned_check(vaddr, PDPTE_SIZE) &&
(vaddr_next <= vaddr_end)) {
mem_ops->tweak_exe_right(&prot);
set_pgentry(pdpte, paddr | (prot | PAGE_PSE), mem_ops);
if (vaddr_next < vaddr_end) {
paddr += (vaddr_next - vaddr);
vaddr = vaddr_next;
continue;
}
break; /* done */
} else {
void *pd_page = mem_ops->get_pd_page(mem_ops->info, vaddr);
construct_pgentry(pdpte, pd_page, mem_ops->get_default_access_right(), mem_ops);
}
}
add_pde(pdpte, paddr, vaddr, vaddr_end, prot, mem_ops);
}
if (vaddr_next >= vaddr_end) {
break; /* done */
}
paddr += (vaddr_next - vaddr);
vaddr = vaddr_next;
}
}
/*
* action: MR_ADD
* add [vaddr_base, vaddr_base + size ) memory region page table mapping.
* @pre: the prot should set before call this function.
*/
void mmu_add(uint64_t *pml4_page, uint64_t paddr_base, uint64_t vaddr_base, uint64_t size, uint64_t prot,
const struct memory_ops *mem_ops)
{
uint64_t vaddr, vaddr_next, vaddr_end;
uint64_t paddr;
uint64_t *pml4e;
dev_dbg(DBG_LEVEL_MMU, "%s, paddr 0x%lx, vaddr 0x%lx, size 0x%lx\n", __func__, paddr_base, vaddr_base, size);
/* align address to page size*/
vaddr = round_page_up(vaddr_base);
paddr = round_page_up(paddr_base);
vaddr_end = vaddr + round_page_down(size);
while (vaddr < vaddr_end) {
vaddr_next = (vaddr & PML4E_MASK) + PML4E_SIZE;
pml4e = pml4e_offset(pml4_page, vaddr);
if (mem_ops->pgentry_present(*pml4e) == 0UL) {
void *pdpt_page = mem_ops->get_pdpt_page(mem_ops->info, vaddr);
construct_pgentry(pml4e, pdpt_page, mem_ops->get_default_access_right(), mem_ops);
}
add_pdpte(pml4e, paddr, vaddr, vaddr_end, prot, mem_ops);
paddr += (vaddr_next - vaddr);
vaddr = vaddr_next;
}
}
/**
* @pre (pml4_page != NULL) && (pg_size != NULL)
*/
const uint64_t *lookup_address(uint64_t *pml4_page, uint64_t addr, uint64_t *pg_size, const struct memory_ops *mem_ops)
{
const uint64_t *pret = NULL;
bool present = true;
uint64_t *pml4e, *pdpte, *pde, *pte;
pml4e = pml4e_offset(pml4_page, addr);
present = (mem_ops->pgentry_present(*pml4e) != 0UL);
if (present) {
pdpte = pdpte_offset(pml4e, addr);
present = (mem_ops->pgentry_present(*pdpte) != 0UL);
if (present) {
if (pdpte_large(*pdpte) != 0UL) {
*pg_size = PDPTE_SIZE;
pret = pdpte;
} else {
pde = pde_offset(pdpte, addr);
present = (mem_ops->pgentry_present(*pde) != 0UL);
if (present) {
if (pde_large(*pde) != 0UL) {
*pg_size = PDE_SIZE;
pret = pde;
} else {
pte = pte_offset(pde, addr);
present = (mem_ops->pgentry_present(*pte) != 0UL);
if (present) {
*pg_size = PTE_SIZE;
pret = pte;
}
}
}
}
}
}
return pret;
}