hv: page: use dynamic page allocation for pagetable mapping

For FuSa's case, we remove all dynamic memory allocation use in ACRN HV. Instead,
we use static memory allocation or embedded data structure. For pagetable page,
we prefer to use an index (hva for MMU, gpa for EPT) to get a page from a special
page pool. The special page pool should be big enougn for each possible index.
This is not a big problem when we don't support 64 bits MMIO. Without 64 bits MMIO
support, we could use the index to search addrss not larger than DRAM_SIZE + 4G.

However, if ACRN plan to support 64 bits MMIO in SOS, we could not use the static
memory alocation any more. This is because there's a very huge hole between the
top DRAM address and the bottom 64 bits MMIO address. We could not reserve such
many pages for pagetable mapping as the CPU physical address bits may very large.

This patch will use dynamic page allocation for pagetable mapping. We also need
reserve a big enough page pool at first. For HV MMU, we don't use 4K granularity
page table mapping, we need reserve PML4, PDPT and PD pages according the maximum
physical address space (PPT va and pa are identical mapping); For each VM EPT,
we reserve PML4, PDPT and PD pages according to the maximum physical address space
too, (the EPT address sapce can't beyond the physical address space), and we reserve
PT pages by real use cases of DRAM, low MMIO and high MMIO.

Signed-off-by: Li Fei1 <fei1.li@intel.com>
Tracked-On: #5788
This commit is contained in:
Li Fei1
2021-02-19 14:09:58 +08:00
committed by wenlingz
parent 312702f2ec
commit 04a104e856
14 changed files with 161 additions and 253 deletions

View File

@@ -13,22 +13,66 @@
#include <vtd.h>
#include <security.h>
#include <vm.h>
#include <logmsg.h>
#define LINEAR_ADDRESS_SPACE_48_BIT (1UL << 48U)
static struct page ppt_pml4_pages[PML4_PAGE_NUM(LINEAR_ADDRESS_SPACE_48_BIT)];
static struct page ppt_pdpt_pages[PDPT_PAGE_NUM(LINEAR_ADDRESS_SPACE_48_BIT)];
static struct page ppt_pd_pages[PD_PAGE_NUM(CONFIG_PLATFORM_RAM_SIZE + PLATFORM_LO_MMIO_SIZE)];
#define MAX_PHY_ADDRESS_SPACE (1UL << MAXIMUM_PA_WIDTH)
/* ppt: pripary page table */
static union pgtable_pages_info ppt_pages_info = {
.ppt = {
.pml4_base = ppt_pml4_pages,
.pdpt_base = ppt_pdpt_pages,
.pd_base = ppt_pd_pages,
}
/* PPT VA and PA are identical mapping */
#define PPT_PML4_PAGE_NUM PML4_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
#define PPT_PDPT_PAGE_NUM PDPT_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
#define PPT_PD_PAGE_NUM PD_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
#define PPT_PT_PAGE_NUM 0UL /* not support 4K granularity page mapping */
/* must be a multiple of 64 */
#define PPT_PAGE_NUM (roundup((PPT_PML4_PAGE_NUM + PPT_PDPT_PAGE_NUM + \
PPT_PD_PAGE_NUM + PPT_PT_PAGE_NUM), 64U))
static struct page ppt_pages[PPT_PAGE_NUM];
static uint64_t ppt_page_bitmap[PPT_PAGE_NUM / 64];
/* ppt: primary page pool */
static struct page_pool ppt_page_pool = {
.start_page = ppt_pages,
.bitmap_size = PPT_PAGE_NUM / 64,
.bitmap = ppt_page_bitmap,
.last_hint_id = 0UL,
.dummy_page = NULL,
};
struct page *alloc_page(struct page_pool *pool)
{
struct page *page = NULL;
uint64_t loop_idx, idx, bit;
spinlock_obtain(&pool->lock);
for (loop_idx = pool->last_hint_id;
loop_idx < pool->last_hint_id + pool->bitmap_size; loop_idx++) {
idx = loop_idx % pool->bitmap_size;
if (*(pool->bitmap + idx) != ~0UL) {
bit = ffz64(*(pool->bitmap + idx));
bitmap_set_nolock(bit, pool->bitmap + idx);
page = pool->start_page + ((idx << 6U) + bit);
pool->last_hint_id = idx;
break;
}
}
spinlock_release(&pool->lock);
ASSERT(page != NULL, "no page aviable!");
page = (page != NULL) ? page : pool->dummy_page;
if (page == NULL) {
/* For HV MMU pagetable mapping, we didn't use dummy page when there's no page
* aviable in the page pool. This because we only do MMU pagetable mapping on
* the early boot time and we reserve enough pages for it. After that, we would
* not do any MMU pagetable mapping. We would let the system boot fail when page
* allocation failed.
*/
panic("no dummy aviable!");
}
(void)memset(page, 0U, PAGE_SIZE);
return page;
}
/* @pre: The PPT and EPT have same page granularity */
static inline bool large_page_support(enum _page_table_level level)
{
@@ -59,95 +103,82 @@ static inline uint64_t ppt_pgentry_present(uint64_t pte)
return pte & PAGE_PRESENT;
}
static inline struct page *ppt_get_pml4_page(const union pgtable_pages_info *info)
{
struct page *pml4_page = info->ppt.pml4_base;
(void)memset(pml4_page, 0U, PAGE_SIZE);
return pml4_page;
}
static inline struct page *ppt_get_pdpt_page(const union pgtable_pages_info *info, uint64_t gpa)
{
struct page *pdpt_page = info->ppt.pdpt_base + (gpa >> PML4E_SHIFT);
(void)memset(pdpt_page, 0U, PAGE_SIZE);
return pdpt_page;
}
static inline struct page *ppt_get_pd_page(const union pgtable_pages_info *info, uint64_t gpa)
{
struct page *pd_page = info->ppt.pd_base + (gpa >> PDPTE_SHIFT);
(void)memset(pd_page, 0U, PAGE_SIZE);
return pd_page;
}
static inline void nop_tweak_exe_right(uint64_t *entry __attribute__((unused))) {}
static inline void nop_recover_exe_right(uint64_t *entry __attribute__((unused))) {}
const struct memory_ops ppt_mem_ops = {
.info = &ppt_pages_info,
.pool = &ppt_page_pool,
.large_page_support = large_page_support,
.get_default_access_right = ppt_get_default_access_right,
.pgentry_present = ppt_pgentry_present,
.get_pml4_page = ppt_get_pml4_page,
.get_pdpt_page = ppt_get_pdpt_page,
.get_pd_page = ppt_get_pd_page,
.clflush_pagewalk = ppt_clflush_pagewalk,
.tweak_exe_right = nop_tweak_exe_right,
.recover_exe_right = nop_recover_exe_right,
};
static struct page sos_vm_pml4_pages[SOS_VM_NUM][PML4_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE))];
static struct page sos_vm_pdpt_pages[SOS_VM_NUM][PDPT_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE))];
static struct page sos_vm_pd_pages[SOS_VM_NUM][PD_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE))];
/* pre_uos_nworld_pml4_pages */
static struct page pre_uos_nworld_pml4_pages[PRE_VM_NUM][PML4_PAGE_NUM(PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))];
static struct page pre_uos_nworld_pdpt_pages[PRE_VM_NUM][PDPT_PAGE_NUM(PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))];
static struct page pre_uos_nworld_pd_pages[PRE_VM_NUM][PD_PAGE_NUM(PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))];
/* EPT address space will not beyond the platform physical address space */
#define EPT_PML4_PAGE_NUM PML4_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
#define EPT_PDPT_PAGE_NUM PDPT_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
#define EPT_PD_PAGE_NUM PD_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
/* post_uos_nworld_pml4_pages */
static struct page post_uos_nworld_pml4_pages[MAX_POST_VM_NUM][PML4_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))];
static struct page post_uos_nworld_pdpt_pages[MAX_POST_VM_NUM][PDPT_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))];
static struct page post_uos_nworld_pd_pages[MAX_POST_VM_NUM][PD_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))];
/* EPT_PT_PAGE_NUM consists of three parts:
* 1) DRAM - and low MMIO are contiguous (we could assume this because ve820 was build by us),
* CONFIG_MAX_VM_NUM at most
* 2) low MMIO - and DRAM are contiguous, (MEM_1G << 2U) at most
* 3) high MMIO - Only PCI BARs're high MMIO (we didn't build the high MMIO EPT mapping
* except writing PCI 64 bits BARs)
*
* The first two parts may use PT_PAGE_NUM(CONFIG_PLATFORM_RAM_SIZE + (MEM_1G << 2U)) PT pages
* to build EPT mapping at most;
* The high MMIO may use (CONFIG_MAX_PCI_DEV_NUM * 6U) PT pages to build EPT mapping at most:
* this is because: (a) each 64 bits MMIO BAR may spend one PT page at most to build EPT mapping,
* MMIO BAR size must be a power of 2 from 16 bytes;
* MMIO BAR base address must be power of two in size and are aligned with its size;
* So if the MMIO BAR size is less than 2M, one PT page is enough to cover its EPT mapping,
* if the MMIO size is larger than 2M, it must be multiple of 2M, we could use large pages
* to build EPT mapping for it. The single exception is fliter the MSI-X structure part
* from the MSI-X table BAR. In this case, it will also spend one PT page.
* (b) each PCI device may have six 64 bits MMIO (three general BARs plus three VF BARs)
* (c) The Maximum number of PCI devices for ACRN and the Maximum number of virtual PCI devices
* for VM both are CONFIG_PLATFORM_RAM_SIZE
*/
#define EPT_PT_PAGE_NUM (PT_PAGE_NUM(CONFIG_PLATFORM_RAM_SIZE + (MEM_1G << 2U)) + \
CONFIG_MAX_PCI_DEV_NUM * 6U)
/* must be a multiple of 64 */
#define EPT_PAGE_NUM (roundup((EPT_PML4_PAGE_NUM + EPT_PDPT_PAGE_NUM + \
EPT_PD_PAGE_NUM + EPT_PT_PAGE_NUM), 64U))
#define TOTAL_EPT_4K_PAGES_SIZE (CONFIG_MAX_VM_NUM * (EPT_PAGE_NUM) * PAGE_SIZE)
static struct page *ept_pages[CONFIG_MAX_VM_NUM];
static uint64_t ept_page_bitmap[CONFIG_MAX_VM_NUM][EPT_PAGE_NUM / 64];
static struct page ept_dummy_pages[CONFIG_MAX_VM_NUM];
/* ept: extended page pool*/
static struct page_pool ept_page_pool[CONFIG_MAX_VM_NUM];
static struct page post_uos_sworld_pgtable_pages[MAX_POST_VM_NUM][TRUSTY_PGTABLE_PAGE_NUM(TRUSTY_RAM_SIZE)];
/* pre-assumption: TRUSTY_RAM_SIZE is 2M aligned */
static struct page post_uos_sworld_memory[MAX_POST_VM_NUM][TRUSTY_RAM_SIZE >> PAGE_SHIFT] __aligned(MEM_2M);
/* ept: extended page table*/
static union pgtable_pages_info ept_pages_info[CONFIG_MAX_VM_NUM];
#ifdef CONFIG_LAST_LEVEL_EPT_AT_BOOT
/* Array with address space size for each type of load order of VM */
static const uint64_t vm_address_space_size[MAX_LOAD_ORDER] = {
PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE), /* for Pre-Launched VM */
EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE), /* for SOS VM */
EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE), /* for Post-Launched VM */
};
/*
* @brief Reserve space for EPT 4K pages from platform E820 table
*/
void reserve_buffer_for_ept_pages(void)
{
uint64_t pt_base;
uint64_t page_base;
uint16_t vm_id;
uint32_t offset = 0U;
struct acrn_vm_config *vm_config;
pt_base = e820_alloc_memory(TOTAL_EPT_4K_PAGES_SIZE, ~0UL);
ppt_clear_user_bit(pt_base, TOTAL_EPT_4K_PAGES_SIZE);
page_base = e820_alloc_memory(TOTAL_EPT_4K_PAGES_SIZE, ~0UL);
ppt_clear_user_bit(page_base, TOTAL_EPT_4K_PAGES_SIZE);
for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
vm_config = get_vm_config(vm_id);
ept_pages_info[vm_id].ept.nworld_pt_base = (struct page *)(void *)(pt_base + offset);
offset += PT_PAGE_NUM(vm_address_space_size[vm_config->load_order])*MEM_4K;
ept_pages[vm_id] = (struct page *)(void *)(page_base + offset);
/* assume each VM has same amount of EPT pages */
offset += EPT_PAGE_NUM * PAGE_SIZE;
}
}
#else
static struct page sos_vm_pt_pages[SOS_VM_NUM][PT_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE))];
static struct page pre_uos_nworld_pt_pages[PRE_VM_NUM][PT_PAGE_NUM(PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))];
static struct page post_uos_nworld_pt_pages[MAX_POST_VM_NUM][PT_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))];
#endif
void *get_reserve_sworld_memory_base(void)
{
@@ -174,47 +205,6 @@ static inline void ept_clflush_pagewalk(const void* etry)
iommu_flush_cache(etry, sizeof(uint64_t));
}
static inline struct page *ept_get_pml4_page(const union pgtable_pages_info *info)
{
struct page *pml4_page = info->ept.nworld_pml4_base;
(void)memset(pml4_page, 0U, PAGE_SIZE);
return pml4_page;
}
static inline struct page *ept_get_pdpt_page(const union pgtable_pages_info *info, uint64_t gpa)
{
struct page *pdpt_page = info->ept.nworld_pdpt_base + (gpa >> PML4E_SHIFT);
(void)memset(pdpt_page, 0U, PAGE_SIZE);
return pdpt_page;
}
static inline struct page *ept_get_pd_page(const union pgtable_pages_info *info, uint64_t gpa)
{
struct page *pd_page;
if (gpa < TRUSTY_EPT_REBASE_GPA) {
pd_page = info->ept.nworld_pd_base + (gpa >> PDPTE_SHIFT);
} else {
pd_page = info->ept.sworld_pgtable_base + TRUSTY_PML4_PAGE_NUM(TRUSTY_EPT_REBASE_GPA) +
TRUSTY_PDPT_PAGE_NUM(TRUSTY_EPT_REBASE_GPA) + ((gpa - TRUSTY_EPT_REBASE_GPA) >> PDPTE_SHIFT);
}
(void)memset(pd_page, 0U, PAGE_SIZE);
return pd_page;
}
static inline struct page *ept_get_pt_page(const union pgtable_pages_info *info, uint64_t gpa)
{
struct page *pt_page;
if (gpa < TRUSTY_EPT_REBASE_GPA) {
pt_page = info->ept.nworld_pt_base + (gpa >> PDE_SHIFT);
} else {
pt_page = info->ept.sworld_pgtable_base + TRUSTY_PML4_PAGE_NUM(TRUSTY_EPT_REBASE_GPA) +
TRUSTY_PDPT_PAGE_NUM(TRUSTY_EPT_REBASE_GPA) + TRUSTY_PD_PAGE_NUM(TRUSTY_EPT_REBASE_GPA) +
((gpa - TRUSTY_EPT_REBASE_GPA) >> PDE_SHIFT);
}
(void)memset(pt_page, 0U, PAGE_SIZE);
return pt_page;
}
/* The function is used to disable execute right for (2MB / 1GB)large pages in EPT */
static inline void ept_tweak_exe_right(uint64_t *entry)
{
@@ -233,43 +223,25 @@ void init_ept_mem_ops(struct memory_ops *mem_ops, uint16_t vm_id)
{
struct acrn_vm *vm = get_vm_from_vmid(vm_id);
if (is_sos_vm(vm)) {
ept_pages_info[vm_id].ept.top_address_space = EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE);
ept_pages_info[vm_id].ept.nworld_pml4_base = sos_vm_pml4_pages[0U];
ept_pages_info[vm_id].ept.nworld_pdpt_base = sos_vm_pdpt_pages[0U];
ept_pages_info[vm_id].ept.nworld_pd_base = sos_vm_pd_pages[0U];
#ifndef CONFIG_LAST_LEVEL_EPT_AT_BOOT
ept_pages_info[vm_id].ept.nworld_pt_base = sos_vm_pt_pages[0U];
#endif
} else if (is_prelaunched_vm(vm)) {
ept_pages_info[vm_id].ept.top_address_space = PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE);
ept_pages_info[vm_id].ept.nworld_pml4_base = pre_uos_nworld_pml4_pages[vm_id];
ept_pages_info[vm_id].ept.nworld_pdpt_base = pre_uos_nworld_pdpt_pages[vm_id];
ept_pages_info[vm_id].ept.nworld_pd_base = pre_uos_nworld_pd_pages[vm_id];
#ifndef CONFIG_LAST_LEVEL_EPT_AT_BOOT
ept_pages_info[vm_id].ept.nworld_pt_base = pre_uos_nworld_pt_pages[vm_id];
#endif
} else {
ept_page_pool[vm_id].start_page = ept_pages[vm_id];
ept_page_pool[vm_id].bitmap_size = EPT_PAGE_NUM / 64;
ept_page_pool[vm_id].bitmap = ept_page_bitmap[vm_id];
ept_page_pool[vm_id].dummy_page = &ept_dummy_pages[vm_id];
spinlock_init(&ept_page_pool[vm_id].lock);
memset((void *)ept_page_pool[vm_id].bitmap, 0, ept_page_pool[vm_id].bitmap_size * sizeof(uint64_t));
ept_page_pool[vm_id].last_hint_id = 0UL;
if (is_postlaunched_vm(vm)) {
uint16_t sos_vm_id = (get_sos_vm())->vm_id;
uint16_t page_idx = vmid_2_rel_vmid(sos_vm_id, vm_id) - 1U;
ept_pages_info[vm_id].ept.top_address_space = EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE);
ept_pages_info[vm_id].ept.nworld_pml4_base = post_uos_nworld_pml4_pages[page_idx];
ept_pages_info[vm_id].ept.nworld_pdpt_base = post_uos_nworld_pdpt_pages[page_idx];
ept_pages_info[vm_id].ept.nworld_pd_base = post_uos_nworld_pd_pages[page_idx];
#ifndef CONFIG_LAST_LEVEL_EPT_AT_BOOT
ept_pages_info[vm_id].ept.nworld_pt_base = post_uos_nworld_pt_pages[page_idx];
#endif
ept_pages_info[vm_id].ept.sworld_pgtable_base = post_uos_sworld_pgtable_pages[page_idx];
vm->arch_vm.sworld_memory_base_hva = post_uos_sworld_memory[page_idx];
}
mem_ops->info = &ept_pages_info[vm_id];
mem_ops->pool = &ept_page_pool[vm_id];
mem_ops->get_default_access_right = ept_get_default_access_right;
mem_ops->pgentry_present = ept_pgentry_present;
mem_ops->get_pml4_page = ept_get_pml4_page;
mem_ops->get_pdpt_page = ept_get_pdpt_page;
mem_ops->get_pd_page = ept_get_pd_page;
mem_ops->get_pt_page = ept_get_pt_page;
mem_ops->clflush_pagewalk = ept_clflush_pagewalk;
mem_ops->large_page_support = large_page_support;