mirror of
https://github.com/projectacrn/acrn-hypervisor.git
synced 2025-06-03 04:39:50 +00:00
For FuSa's case, we remove all dynamic memory allocation use in ACRN HV. Instead, we use static memory allocation or embedded data structure. For pagetable page, we prefer to use an index (hva for MMU, gpa for EPT) to get a page from a special page pool. The special page pool should be big enougn for each possible index. This is not a big problem when we don't support 64 bits MMIO. Without 64 bits MMIO support, we could use the index to search addrss not larger than DRAM_SIZE + 4G. However, if ACRN plan to support 64 bits MMIO in SOS, we could not use the static memory alocation any more. This is because there's a very huge hole between the top DRAM address and the bottom 64 bits MMIO address. We could not reserve such many pages for pagetable mapping as the CPU physical address bits may very large. This patch will use dynamic page allocation for pagetable mapping. We also need reserve a big enough page pool at first. For HV MMU, we don't use 4K granularity page table mapping, we need reserve PML4, PDPT and PD pages according the maximum physical address space (PPT va and pa are identical mapping); For each VM EPT, we reserve PML4, PDPT and PD pages according to the maximum physical address space too, (the EPT address sapce can't beyond the physical address space), and we reserve PT pages by real use cases of DRAM, low MMIO and high MMIO. Signed-off-by: Li Fei1 <fei1.li@intel.com> Tracked-On: #5788
261 lines
8.7 KiB
C
261 lines
8.7 KiB
C
/*
|
|
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
|
*
|
|
* SPDX-License-Identifier: BSD-3-Clause
|
|
*/
|
|
#include <types.h>
|
|
#include <rtl.h>
|
|
#include <cpufeatures.h>
|
|
#include <pgtable.h>
|
|
#include <page.h>
|
|
#include <mmu.h>
|
|
#include <trusty.h>
|
|
#include <vtd.h>
|
|
#include <security.h>
|
|
#include <vm.h>
|
|
#include <logmsg.h>
|
|
|
|
|
|
#define MAX_PHY_ADDRESS_SPACE (1UL << MAXIMUM_PA_WIDTH)
|
|
|
|
/* PPT VA and PA are identical mapping */
|
|
#define PPT_PML4_PAGE_NUM PML4_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
|
|
#define PPT_PDPT_PAGE_NUM PDPT_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
|
|
#define PPT_PD_PAGE_NUM PD_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
|
|
#define PPT_PT_PAGE_NUM 0UL /* not support 4K granularity page mapping */
|
|
/* must be a multiple of 64 */
|
|
#define PPT_PAGE_NUM (roundup((PPT_PML4_PAGE_NUM + PPT_PDPT_PAGE_NUM + \
|
|
PPT_PD_PAGE_NUM + PPT_PT_PAGE_NUM), 64U))
|
|
static struct page ppt_pages[PPT_PAGE_NUM];
|
|
static uint64_t ppt_page_bitmap[PPT_PAGE_NUM / 64];
|
|
|
|
/* ppt: primary page pool */
|
|
static struct page_pool ppt_page_pool = {
|
|
.start_page = ppt_pages,
|
|
.bitmap_size = PPT_PAGE_NUM / 64,
|
|
.bitmap = ppt_page_bitmap,
|
|
.last_hint_id = 0UL,
|
|
.dummy_page = NULL,
|
|
};
|
|
|
|
struct page *alloc_page(struct page_pool *pool)
|
|
{
|
|
struct page *page = NULL;
|
|
uint64_t loop_idx, idx, bit;
|
|
|
|
spinlock_obtain(&pool->lock);
|
|
for (loop_idx = pool->last_hint_id;
|
|
loop_idx < pool->last_hint_id + pool->bitmap_size; loop_idx++) {
|
|
idx = loop_idx % pool->bitmap_size;
|
|
if (*(pool->bitmap + idx) != ~0UL) {
|
|
bit = ffz64(*(pool->bitmap + idx));
|
|
bitmap_set_nolock(bit, pool->bitmap + idx);
|
|
page = pool->start_page + ((idx << 6U) + bit);
|
|
|
|
pool->last_hint_id = idx;
|
|
break;
|
|
}
|
|
}
|
|
spinlock_release(&pool->lock);
|
|
|
|
ASSERT(page != NULL, "no page aviable!");
|
|
page = (page != NULL) ? page : pool->dummy_page;
|
|
if (page == NULL) {
|
|
/* For HV MMU pagetable mapping, we didn't use dummy page when there's no page
|
|
* aviable in the page pool. This because we only do MMU pagetable mapping on
|
|
* the early boot time and we reserve enough pages for it. After that, we would
|
|
* not do any MMU pagetable mapping. We would let the system boot fail when page
|
|
* allocation failed.
|
|
*/
|
|
panic("no dummy aviable!");
|
|
}
|
|
(void)memset(page, 0U, PAGE_SIZE);
|
|
return page;
|
|
}
|
|
|
|
/* @pre: The PPT and EPT have same page granularity */
|
|
static inline bool large_page_support(enum _page_table_level level)
|
|
{
|
|
bool support;
|
|
|
|
if (level == IA32E_PD) {
|
|
support = true;
|
|
} else if (level == IA32E_PDPT) {
|
|
support = pcpu_has_vmx_ept_cap(VMX_EPT_1GB_PAGE);
|
|
} else {
|
|
support = false;
|
|
}
|
|
|
|
return support;
|
|
}
|
|
|
|
static inline uint64_t ppt_get_default_access_right(void)
|
|
{
|
|
return (PAGE_PRESENT | PAGE_RW | PAGE_USER);
|
|
}
|
|
|
|
static inline void ppt_clflush_pagewalk(const void* entry __attribute__((unused)))
|
|
{
|
|
}
|
|
|
|
static inline uint64_t ppt_pgentry_present(uint64_t pte)
|
|
{
|
|
return pte & PAGE_PRESENT;
|
|
}
|
|
|
|
static inline void nop_tweak_exe_right(uint64_t *entry __attribute__((unused))) {}
|
|
static inline void nop_recover_exe_right(uint64_t *entry __attribute__((unused))) {}
|
|
|
|
const struct memory_ops ppt_mem_ops = {
|
|
.pool = &ppt_page_pool,
|
|
.large_page_support = large_page_support,
|
|
.get_default_access_right = ppt_get_default_access_right,
|
|
.pgentry_present = ppt_pgentry_present,
|
|
.clflush_pagewalk = ppt_clflush_pagewalk,
|
|
.tweak_exe_right = nop_tweak_exe_right,
|
|
.recover_exe_right = nop_recover_exe_right,
|
|
};
|
|
|
|
/* EPT address space will not beyond the platform physical address space */
|
|
#define EPT_PML4_PAGE_NUM PML4_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
|
|
#define EPT_PDPT_PAGE_NUM PDPT_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
|
|
#define EPT_PD_PAGE_NUM PD_PAGE_NUM(MAX_PHY_ADDRESS_SPACE)
|
|
|
|
/* EPT_PT_PAGE_NUM consists of three parts:
|
|
* 1) DRAM - and low MMIO are contiguous (we could assume this because ve820 was build by us),
|
|
* CONFIG_MAX_VM_NUM at most
|
|
* 2) low MMIO - and DRAM are contiguous, (MEM_1G << 2U) at most
|
|
* 3) high MMIO - Only PCI BARs're high MMIO (we didn't build the high MMIO EPT mapping
|
|
* except writing PCI 64 bits BARs)
|
|
*
|
|
* The first two parts may use PT_PAGE_NUM(CONFIG_PLATFORM_RAM_SIZE + (MEM_1G << 2U)) PT pages
|
|
* to build EPT mapping at most;
|
|
* The high MMIO may use (CONFIG_MAX_PCI_DEV_NUM * 6U) PT pages to build EPT mapping at most:
|
|
* this is because: (a) each 64 bits MMIO BAR may spend one PT page at most to build EPT mapping,
|
|
* MMIO BAR size must be a power of 2 from 16 bytes;
|
|
* MMIO BAR base address must be power of two in size and are aligned with its size;
|
|
* So if the MMIO BAR size is less than 2M, one PT page is enough to cover its EPT mapping,
|
|
* if the MMIO size is larger than 2M, it must be multiple of 2M, we could use large pages
|
|
* to build EPT mapping for it. The single exception is fliter the MSI-X structure part
|
|
* from the MSI-X table BAR. In this case, it will also spend one PT page.
|
|
* (b) each PCI device may have six 64 bits MMIO (three general BARs plus three VF BARs)
|
|
* (c) The Maximum number of PCI devices for ACRN and the Maximum number of virtual PCI devices
|
|
* for VM both are CONFIG_PLATFORM_RAM_SIZE
|
|
*/
|
|
#define EPT_PT_PAGE_NUM (PT_PAGE_NUM(CONFIG_PLATFORM_RAM_SIZE + (MEM_1G << 2U)) + \
|
|
CONFIG_MAX_PCI_DEV_NUM * 6U)
|
|
|
|
/* must be a multiple of 64 */
|
|
#define EPT_PAGE_NUM (roundup((EPT_PML4_PAGE_NUM + EPT_PDPT_PAGE_NUM + \
|
|
EPT_PD_PAGE_NUM + EPT_PT_PAGE_NUM), 64U))
|
|
#define TOTAL_EPT_4K_PAGES_SIZE (CONFIG_MAX_VM_NUM * (EPT_PAGE_NUM) * PAGE_SIZE)
|
|
|
|
static struct page *ept_pages[CONFIG_MAX_VM_NUM];
|
|
static uint64_t ept_page_bitmap[CONFIG_MAX_VM_NUM][EPT_PAGE_NUM / 64];
|
|
static struct page ept_dummy_pages[CONFIG_MAX_VM_NUM];
|
|
|
|
/* ept: extended page pool*/
|
|
static struct page_pool ept_page_pool[CONFIG_MAX_VM_NUM];
|
|
|
|
/* pre-assumption: TRUSTY_RAM_SIZE is 2M aligned */
|
|
static struct page post_uos_sworld_memory[MAX_POST_VM_NUM][TRUSTY_RAM_SIZE >> PAGE_SHIFT] __aligned(MEM_2M);
|
|
|
|
|
|
|
|
/*
|
|
* @brief Reserve space for EPT 4K pages from platform E820 table
|
|
*/
|
|
void reserve_buffer_for_ept_pages(void)
|
|
{
|
|
uint64_t page_base;
|
|
uint16_t vm_id;
|
|
uint32_t offset = 0U;
|
|
|
|
page_base = e820_alloc_memory(TOTAL_EPT_4K_PAGES_SIZE, ~0UL);
|
|
ppt_clear_user_bit(page_base, TOTAL_EPT_4K_PAGES_SIZE);
|
|
for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
|
|
ept_pages[vm_id] = (struct page *)(void *)(page_base + offset);
|
|
/* assume each VM has same amount of EPT pages */
|
|
offset += EPT_PAGE_NUM * PAGE_SIZE;
|
|
}
|
|
}
|
|
|
|
void *get_reserve_sworld_memory_base(void)
|
|
{
|
|
return post_uos_sworld_memory;
|
|
}
|
|
|
|
static inline bool large_page_not_support(__unused enum _page_table_level level)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline uint64_t ept_get_default_access_right(void)
|
|
{
|
|
return EPT_RWX;
|
|
}
|
|
|
|
static inline uint64_t ept_pgentry_present(uint64_t pte)
|
|
{
|
|
return pte & EPT_RWX;
|
|
}
|
|
|
|
static inline void ept_clflush_pagewalk(const void* etry)
|
|
{
|
|
iommu_flush_cache(etry, sizeof(uint64_t));
|
|
}
|
|
|
|
/* The function is used to disable execute right for (2MB / 1GB)large pages in EPT */
|
|
static inline void ept_tweak_exe_right(uint64_t *entry)
|
|
{
|
|
*entry &= ~EPT_EXE;
|
|
}
|
|
|
|
/* The function is used to recover the execute right when large pages are breaking into 4KB pages
|
|
* Hypervisor doesn't control execute right for guest memory, recovers execute right by default.
|
|
*/
|
|
static inline void ept_recover_exe_right(uint64_t *entry)
|
|
{
|
|
*entry |= EPT_EXE;
|
|
}
|
|
|
|
void init_ept_mem_ops(struct memory_ops *mem_ops, uint16_t vm_id)
|
|
{
|
|
struct acrn_vm *vm = get_vm_from_vmid(vm_id);
|
|
|
|
ept_page_pool[vm_id].start_page = ept_pages[vm_id];
|
|
ept_page_pool[vm_id].bitmap_size = EPT_PAGE_NUM / 64;
|
|
ept_page_pool[vm_id].bitmap = ept_page_bitmap[vm_id];
|
|
ept_page_pool[vm_id].dummy_page = &ept_dummy_pages[vm_id];
|
|
|
|
spinlock_init(&ept_page_pool[vm_id].lock);
|
|
memset((void *)ept_page_pool[vm_id].bitmap, 0, ept_page_pool[vm_id].bitmap_size * sizeof(uint64_t));
|
|
ept_page_pool[vm_id].last_hint_id = 0UL;
|
|
|
|
if (is_postlaunched_vm(vm)) {
|
|
uint16_t sos_vm_id = (get_sos_vm())->vm_id;
|
|
uint16_t page_idx = vmid_2_rel_vmid(sos_vm_id, vm_id) - 1U;
|
|
|
|
vm->arch_vm.sworld_memory_base_hva = post_uos_sworld_memory[page_idx];
|
|
}
|
|
|
|
mem_ops->pool = &ept_page_pool[vm_id];
|
|
mem_ops->get_default_access_right = ept_get_default_access_right;
|
|
mem_ops->pgentry_present = ept_pgentry_present;
|
|
mem_ops->clflush_pagewalk = ept_clflush_pagewalk;
|
|
mem_ops->large_page_support = large_page_support;
|
|
|
|
/* Mitigation for issue "Machine Check Error on Page Size Change" */
|
|
if (is_ept_force_4k_ipage()) {
|
|
mem_ops->tweak_exe_right = ept_tweak_exe_right;
|
|
mem_ops->recover_exe_right = ept_recover_exe_right;
|
|
/* For RTVM, build 4KB page mapping in EPT */
|
|
if (is_rt_vm(vm)) {
|
|
mem_ops->large_page_support = large_page_not_support;
|
|
}
|
|
} else {
|
|
mem_ops->tweak_exe_right = nop_tweak_exe_right;
|
|
mem_ops->recover_exe_right = nop_recover_exe_right;
|
|
}
|
|
}
|