diff --git a/hypervisor/Makefile b/hypervisor/Makefile index 85667896c..ee909cca6 100644 --- a/hypervisor/Makefile +++ b/hypervisor/Makefile @@ -336,7 +336,6 @@ SYS_INIT_C_OBJS := $(patsubst %.c,$(HV_OBJDIR)/%.o,$(SYS_INIT_C_SRCS)) ifneq ($(CONFIG_RELEASE),y) CFLAGS += -DHV_DEBUG -DPROFILING_ON -fno-omit-frame-pointer endif -CFLAGS += -DCONFIG_LAST_LEVEL_EPT_AT_BOOT MODULES += $(LIB_MOD) MODULES += $(BOOT_MOD) diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c index 67ae352bd..ac9049826 100644 --- a/hypervisor/arch/x86/cpu.c +++ b/hypervisor/arch/x86/cpu.c @@ -255,9 +255,8 @@ void init_pcpu_post(uint16_t pcpu_id) /* * Reserve memory from platform E820 for EPT 4K pages for all VMs */ -#ifdef CONFIG_LAST_LEVEL_EPT_AT_BOOT reserve_buffer_for_ept_pages(); -#endif + /* Start all secondary cores */ startup_paddr = prepare_trampoline(); if (!start_pcpus(AP_MASK)) { diff --git a/hypervisor/arch/x86/cpu_caps.c b/hypervisor/arch/x86/cpu_caps.c index 8853150dd..52ea851db 100644 --- a/hypervisor/arch/x86/cpu_caps.c +++ b/hypervisor/arch/x86/cpu_caps.c @@ -427,6 +427,10 @@ int32_t detect_hardware_support(void) (boot_cpu_data.virt_bits == 0U)) { printf("%s, can't detect Linear/Physical Address size\n", __func__); ret = -ENODEV; + } else if (boot_cpu_data.phys_bits > MAXIMUM_PA_WIDTH) { + printf("%s, physical-address width (%d) over maximum physical-address width (%d)\n", + __func__, boot_cpu_data.phys_bits, MAXIMUM_PA_WIDTH); + ret = -ENODEV; } else if (!pcpu_has_cap(X86_FEATURE_INVA_TSC)) { /* check invariant TSC */ printf("%s, invariant TSC not supported\n", __func__); diff --git a/hypervisor/arch/x86/guest/ept.c b/hypervisor/arch/x86/guest/ept.c index 4643d12b3..ecaa5b1a7 100644 --- a/hypervisor/arch/x86/guest/ept.c +++ b/hypervisor/arch/x86/guest/ept.c @@ -19,22 +19,6 @@ #define DBG_LEVEL_EPT 6U -/* - * to be deprecated, don't use - * Check whether pagetable pages is reserved enough for the GPA range or not. - */ -bool ept_is_mr_valid(const struct acrn_vm *vm, uint64_t base, uint64_t size) -{ - bool valid = true; - uint64_t end = base + size; - uint64_t top_address_space = vm->arch_vm.ept_mem_ops.info->ept.top_address_space; - if ((end <= base) || (end > top_address_space)) { - valid = false; - } - - return valid; -} - /* * To enable the identical map and support of legacy devices/ACPI method in SOS, * ACRN presents the entire host 0-4GB memory region to SOS, except the memory @@ -326,3 +310,8 @@ void walk_ept_table(struct acrn_vm *vm, pge_handler cb) } } } + +struct page *alloc_ept_page(struct acrn_vm *vm) +{ + return alloc_page(vm->arch_vm.ept_mem_ops.pool); +} diff --git a/hypervisor/arch/x86/guest/trusty.c b/hypervisor/arch/x86/guest/trusty.c index 348ca9df2..cbd481776 100644 --- a/hypervisor/arch/x86/guest/trusty.c +++ b/hypervisor/arch/x86/guest/trusty.c @@ -76,17 +76,14 @@ static void create_secure_world_ept(struct acrn_vm *vm, uint64_t gpa_orig, * Normal World.PD/PT are shared in both Secure world's EPT * and Normal World's EPT */ - pml4_base = vm->arch_vm.ept_mem_ops.info->ept.sworld_pgtable_base; - (void)memset(pml4_base, 0U, PAGE_SIZE); + pml4_base = alloc_ept_page(vm); vm->arch_vm.sworld_eptp = pml4_base; sanitize_pte((uint64_t *)vm->arch_vm.sworld_eptp, &vm->arch_vm.ept_mem_ops); /* The trusty memory is remapped to guest physical address * of gpa_rebased to gpa_rebased + size */ - sub_table_addr = vm->arch_vm.ept_mem_ops.info->ept.sworld_pgtable_base + - TRUSTY_PML4_PAGE_NUM(TRUSTY_EPT_REBASE_GPA); - (void)memset(sub_table_addr, 0U, PAGE_SIZE); + sub_table_addr = alloc_ept_page(vm); sworld_pml4e = hva2hpa(sub_table_addr) | table_present; set_pgentry((uint64_t *)pml4_base, sworld_pml4e, &vm->arch_vm.ept_mem_ops); diff --git a/hypervisor/arch/x86/guest/vm.c b/hypervisor/arch/x86/guest/vm.c index 635c960bf..2df788764 100644 --- a/hypervisor/arch/x86/guest/vm.c +++ b/hypervisor/arch/x86/guest/vm.c @@ -370,10 +370,6 @@ static void prepare_sos_vm_memmap(struct acrn_vm *vm) pr_dbg("sos_vm: bottom memory - 0x%lx, top memory - 0x%lx\n", p_mem_range_info->mem_bottom, p_mem_range_info->mem_top); - if (p_mem_range_info->mem_top > EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE)) { - panic("Please configure SOS_VM_ADDRESS_SPACE correctly!\n"); - } - /* create real ept map for all ranges with UC */ ept_add_mr(vm, pml4_page, p_mem_range_info->mem_bottom, p_mem_range_info->mem_bottom, (p_mem_range_info->mem_top - p_mem_range_info->mem_bottom), attr_uc); @@ -497,7 +493,7 @@ int32_t create_vm(uint16_t vm_id, uint64_t pcpu_bitmap, struct acrn_vm_config *v vm->hw.created_vcpus = 0U; init_ept_mem_ops(&vm->arch_vm.ept_mem_ops, vm->vm_id); - vm->arch_vm.nworld_eptp = vm->arch_vm.ept_mem_ops.get_pml4_page(vm->arch_vm.ept_mem_ops.info); + vm->arch_vm.nworld_eptp = alloc_ept_page(vm); sanitize_pte((uint64_t *)vm->arch_vm.nworld_eptp, &vm->arch_vm.ept_mem_ops); (void)memcpy_s(&vm->uuid[0], sizeof(vm->uuid), diff --git a/hypervisor/arch/x86/mmu.c b/hypervisor/arch/x86/mmu.c index 439ee0cd6..96aad5b2d 100644 --- a/hypervisor/arch/x86/mmu.c +++ b/hypervisor/arch/x86/mmu.c @@ -250,7 +250,7 @@ void init_paging(void) } /* Allocate memory for Hypervisor PML4 table */ - ppt_mmu_pml4_addr = ppt_mem_ops.get_pml4_page(ppt_mem_ops.info); + ppt_mmu_pml4_addr = alloc_page(ppt_mem_ops.pool); /* Map all memory regions to UC attribute */ mmu_add((uint64_t *)ppt_mmu_pml4_addr, 0UL, 0UL, high64_max_ram - 0UL, attr_uc, &ppt_mem_ops); diff --git a/hypervisor/arch/x86/page.c b/hypervisor/arch/x86/page.c index 446605414..37974a75a 100644 --- a/hypervisor/arch/x86/page.c +++ b/hypervisor/arch/x86/page.c @@ -13,22 +13,66 @@ #include #include #include +#include -#define LINEAR_ADDRESS_SPACE_48_BIT (1UL << 48U) -static struct page ppt_pml4_pages[PML4_PAGE_NUM(LINEAR_ADDRESS_SPACE_48_BIT)]; -static struct page ppt_pdpt_pages[PDPT_PAGE_NUM(LINEAR_ADDRESS_SPACE_48_BIT)]; -static struct page ppt_pd_pages[PD_PAGE_NUM(CONFIG_PLATFORM_RAM_SIZE + PLATFORM_LO_MMIO_SIZE)]; +#define MAX_PHY_ADDRESS_SPACE (1UL << MAXIMUM_PA_WIDTH) -/* ppt: pripary page table */ -static union pgtable_pages_info ppt_pages_info = { - .ppt = { - .pml4_base = ppt_pml4_pages, - .pdpt_base = ppt_pdpt_pages, - .pd_base = ppt_pd_pages, - } +/* PPT VA and PA are identical mapping */ +#define PPT_PML4_PAGE_NUM PML4_PAGE_NUM(MAX_PHY_ADDRESS_SPACE) +#define PPT_PDPT_PAGE_NUM PDPT_PAGE_NUM(MAX_PHY_ADDRESS_SPACE) +#define PPT_PD_PAGE_NUM PD_PAGE_NUM(MAX_PHY_ADDRESS_SPACE) +#define PPT_PT_PAGE_NUM 0UL /* not support 4K granularity page mapping */ +/* must be a multiple of 64 */ +#define PPT_PAGE_NUM (roundup((PPT_PML4_PAGE_NUM + PPT_PDPT_PAGE_NUM + \ + PPT_PD_PAGE_NUM + PPT_PT_PAGE_NUM), 64U)) +static struct page ppt_pages[PPT_PAGE_NUM]; +static uint64_t ppt_page_bitmap[PPT_PAGE_NUM / 64]; + +/* ppt: pripary page pool */ +static struct page_pool ppt_page_pool = { + .start_page = ppt_pages, + .bitmap_size = PPT_PAGE_NUM / 64, + .bitmap = ppt_page_bitmap, + .last_hint_id = 0UL, + .dummy_page = NULL, }; +struct page *alloc_page(struct page_pool *pool) +{ + struct page *page = NULL; + uint64_t loop_idx, idx, bit; + + spinlock_obtain(&pool->lock); + for (loop_idx = pool->last_hint_id; + loop_idx < pool->last_hint_id + pool->bitmap_size; loop_idx++) { + idx = loop_idx % pool->bitmap_size; + if (*(pool->bitmap + idx) != ~0UL) { + bit = ffz64(*(pool->bitmap + idx)); + bitmap_set_nolock(bit, pool->bitmap + idx); + page = pool->start_page + ((idx << 6U) + bit); + + pool->last_hint_id = idx; + break; + } + } + spinlock_release(&pool->lock); + + ASSERT(page != NULL, "no page aviable!"); + page = (page != NULL) ? page : pool->dummy_page; + if (page == NULL) { + /* For HV MMU pagetable mapping, we didn't use dummy page when there's no page + * aviable in the page pool. This because we only do MMU pagetable mapping on + * the early boot time and we reserve enough pages for it. After that, we would + * not do any MMU pagetable mapping. We would let the system boot fail when page + * allocation failed. + */ + panic("no dummy aviable!"); + } + (void)memset(page, 0U, PAGE_SIZE); + return page; +} + /* @pre: The PPT and EPT have same page granularity */ static inline bool large_page_support(enum _page_table_level level) { @@ -59,95 +103,82 @@ static inline uint64_t ppt_pgentry_present(uint64_t pte) return pte & PAGE_PRESENT; } -static inline struct page *ppt_get_pml4_page(const union pgtable_pages_info *info) -{ - struct page *pml4_page = info->ppt.pml4_base; - (void)memset(pml4_page, 0U, PAGE_SIZE); - return pml4_page; -} - -static inline struct page *ppt_get_pdpt_page(const union pgtable_pages_info *info, uint64_t gpa) -{ - struct page *pdpt_page = info->ppt.pdpt_base + (gpa >> PML4E_SHIFT); - (void)memset(pdpt_page, 0U, PAGE_SIZE); - return pdpt_page; -} - -static inline struct page *ppt_get_pd_page(const union pgtable_pages_info *info, uint64_t gpa) -{ - struct page *pd_page = info->ppt.pd_base + (gpa >> PDPTE_SHIFT); - (void)memset(pd_page, 0U, PAGE_SIZE); - return pd_page; -} - static inline void nop_tweak_exe_right(uint64_t *entry __attribute__((unused))) {} static inline void nop_recover_exe_right(uint64_t *entry __attribute__((unused))) {} const struct memory_ops ppt_mem_ops = { - .info = &ppt_pages_info, + .pool = &ppt_page_pool, .large_page_support = large_page_support, .get_default_access_right = ppt_get_default_access_right, .pgentry_present = ppt_pgentry_present, - .get_pml4_page = ppt_get_pml4_page, - .get_pdpt_page = ppt_get_pdpt_page, - .get_pd_page = ppt_get_pd_page, .clflush_pagewalk = ppt_clflush_pagewalk, .tweak_exe_right = nop_tweak_exe_right, .recover_exe_right = nop_recover_exe_right, }; -static struct page sos_vm_pml4_pages[SOS_VM_NUM][PML4_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE))]; -static struct page sos_vm_pdpt_pages[SOS_VM_NUM][PDPT_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE))]; -static struct page sos_vm_pd_pages[SOS_VM_NUM][PD_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE))]; -/* pre_uos_nworld_pml4_pages */ -static struct page pre_uos_nworld_pml4_pages[PRE_VM_NUM][PML4_PAGE_NUM(PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))]; -static struct page pre_uos_nworld_pdpt_pages[PRE_VM_NUM][PDPT_PAGE_NUM(PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))]; -static struct page pre_uos_nworld_pd_pages[PRE_VM_NUM][PD_PAGE_NUM(PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))]; +/* EPT address space will not beyond the platform physical address space */ +#define EPT_PML4_PAGE_NUM PML4_PAGE_NUM(MAX_PHY_ADDRESS_SPACE) +#define EPT_PDPT_PAGE_NUM PDPT_PAGE_NUM(MAX_PHY_ADDRESS_SPACE) +#define EPT_PD_PAGE_NUM PD_PAGE_NUM(MAX_PHY_ADDRESS_SPACE) -/* post_uos_nworld_pml4_pages */ -static struct page post_uos_nworld_pml4_pages[MAX_POST_VM_NUM][PML4_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))]; -static struct page post_uos_nworld_pdpt_pages[MAX_POST_VM_NUM][PDPT_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))]; -static struct page post_uos_nworld_pd_pages[MAX_POST_VM_NUM][PD_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))]; +/* EPT_PT_PAGE_NUM consists of three parts: + * 1) DRAM - and low MMIO are contiguous (we could assume this because ve820 was build by us), + * CONFIG_MAX_VM_NUM at most + * 2) low MMIO - and DRAM are contiguous, (MEM_1G << 2U) at most + * 3) high MMIO - Only PCI BARs're high MMIO (we didn't build the high MMIO EPT mapping + * except writing PCI 64 bits BARs) + * + * The first two parts may use PT_PAGE_NUM(CONFIG_PLATFORM_RAM_SIZE + (MEM_1G << 2U)) PT pages + * to build EPT mapping at most; + * The high MMIO may use (CONFIG_MAX_PCI_DEV_NUM * 6U) PT pages to build EPT mapping at most: + * this is because: (a) each 64 bits MMIO BAR may spend one PT page at most to build EPT mapping, + * MMIO BAR size must be a power of 2 from 16 bytes; + * MMIO BAR base address must be power of two in size and are aligned with its size; + * So if the MMIO BAR size is less than 2M, one PT page is enough to cover its EPT mapping, + * if the MMIO size is larger than 2M, it must be multiple of 2M, we could use large pages + * to build EPT mapping for it. The single exception is fliter the MSI-X structure part + * from the MSI-X table BAR. In this case, it will also spend one PT page. + * (b) each PCI device may have six 64 bits MMIO (three general BARs plus three VF BARs) + * (c) The Maximum number of PCI devices for ACRN and the Maximum number of virtual PCI devices + * for VM both are CONFIG_PLATFORM_RAM_SIZE + */ +#define EPT_PT_PAGE_NUM (PT_PAGE_NUM(CONFIG_PLATFORM_RAM_SIZE + (MEM_1G << 2U)) + \ + CONFIG_MAX_PCI_DEV_NUM * 6U) + +/* must be a multiple of 64 */ +#define EPT_PAGE_NUM (roundup((EPT_PML4_PAGE_NUM + EPT_PDPT_PAGE_NUM + \ + EPT_PD_PAGE_NUM + EPT_PT_PAGE_NUM), 64U)) +#define TOTAL_EPT_4K_PAGES_SIZE (CONFIG_MAX_VM_NUM * (EPT_PAGE_NUM) * PAGE_SIZE) + +static struct page *ept_pages[CONFIG_MAX_VM_NUM]; +static uint64_t ept_page_bitmap[CONFIG_MAX_VM_NUM][EPT_PAGE_NUM / 64]; +static struct page ept_dummy_pages[CONFIG_MAX_VM_NUM]; + +/* ept: extended page pool*/ +static struct page_pool ept_page_pool[CONFIG_MAX_VM_NUM]; -static struct page post_uos_sworld_pgtable_pages[MAX_POST_VM_NUM][TRUSTY_PGTABLE_PAGE_NUM(TRUSTY_RAM_SIZE)]; /* pre-assumption: TRUSTY_RAM_SIZE is 2M aligned */ static struct page post_uos_sworld_memory[MAX_POST_VM_NUM][TRUSTY_RAM_SIZE >> PAGE_SHIFT] __aligned(MEM_2M); -/* ept: extended page table*/ -static union pgtable_pages_info ept_pages_info[CONFIG_MAX_VM_NUM]; -#ifdef CONFIG_LAST_LEVEL_EPT_AT_BOOT -/* Array with address space size for each type of load order of VM */ -static const uint64_t vm_address_space_size[MAX_LOAD_ORDER] = { - PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE), /* for Pre-Launched VM */ - EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE), /* for SOS VM */ - EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE), /* for Post-Launched VM */ -}; - /* * @brief Reserve space for EPT 4K pages from platform E820 table */ void reserve_buffer_for_ept_pages(void) { - uint64_t pt_base; + uint64_t page_base; uint16_t vm_id; uint32_t offset = 0U; - struct acrn_vm_config *vm_config; - pt_base = e820_alloc_memory(TOTAL_EPT_4K_PAGES_SIZE, ~0UL); - ppt_clear_user_bit(pt_base, TOTAL_EPT_4K_PAGES_SIZE); + page_base = e820_alloc_memory(TOTAL_EPT_4K_PAGES_SIZE, ~0UL); + ppt_clear_user_bit(page_base, TOTAL_EPT_4K_PAGES_SIZE); for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) { - vm_config = get_vm_config(vm_id); - ept_pages_info[vm_id].ept.nworld_pt_base = (struct page *)(void *)(pt_base + offset); - offset += PT_PAGE_NUM(vm_address_space_size[vm_config->load_order])*MEM_4K; + ept_pages[vm_id] = (struct page *)(void *)(page_base + offset); + /* assume each VM has same amount of EPT pages */ + offset += EPT_PAGE_NUM * PAGE_SIZE; } } -#else -static struct page sos_vm_pt_pages[SOS_VM_NUM][PT_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE))]; -static struct page pre_uos_nworld_pt_pages[PRE_VM_NUM][PT_PAGE_NUM(PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))]; -static struct page post_uos_nworld_pt_pages[MAX_POST_VM_NUM][PT_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))]; -#endif void *get_reserve_sworld_memory_base(void) { @@ -174,47 +205,6 @@ static inline void ept_clflush_pagewalk(const void* etry) iommu_flush_cache(etry, sizeof(uint64_t)); } -static inline struct page *ept_get_pml4_page(const union pgtable_pages_info *info) -{ - struct page *pml4_page = info->ept.nworld_pml4_base; - (void)memset(pml4_page, 0U, PAGE_SIZE); - return pml4_page; -} - -static inline struct page *ept_get_pdpt_page(const union pgtable_pages_info *info, uint64_t gpa) -{ - struct page *pdpt_page = info->ept.nworld_pdpt_base + (gpa >> PML4E_SHIFT); - (void)memset(pdpt_page, 0U, PAGE_SIZE); - return pdpt_page; -} - -static inline struct page *ept_get_pd_page(const union pgtable_pages_info *info, uint64_t gpa) -{ - struct page *pd_page; - if (gpa < TRUSTY_EPT_REBASE_GPA) { - pd_page = info->ept.nworld_pd_base + (gpa >> PDPTE_SHIFT); - } else { - pd_page = info->ept.sworld_pgtable_base + TRUSTY_PML4_PAGE_NUM(TRUSTY_EPT_REBASE_GPA) + - TRUSTY_PDPT_PAGE_NUM(TRUSTY_EPT_REBASE_GPA) + ((gpa - TRUSTY_EPT_REBASE_GPA) >> PDPTE_SHIFT); - } - (void)memset(pd_page, 0U, PAGE_SIZE); - return pd_page; -} - -static inline struct page *ept_get_pt_page(const union pgtable_pages_info *info, uint64_t gpa) -{ - struct page *pt_page; - if (gpa < TRUSTY_EPT_REBASE_GPA) { - pt_page = info->ept.nworld_pt_base + (gpa >> PDE_SHIFT); - } else { - pt_page = info->ept.sworld_pgtable_base + TRUSTY_PML4_PAGE_NUM(TRUSTY_EPT_REBASE_GPA) + - TRUSTY_PDPT_PAGE_NUM(TRUSTY_EPT_REBASE_GPA) + TRUSTY_PD_PAGE_NUM(TRUSTY_EPT_REBASE_GPA) + - ((gpa - TRUSTY_EPT_REBASE_GPA) >> PDE_SHIFT); - } - (void)memset(pt_page, 0U, PAGE_SIZE); - return pt_page; -} - /* The function is used to disable execute right for (2MB / 1GB)large pages in EPT */ static inline void ept_tweak_exe_right(uint64_t *entry) { @@ -233,43 +223,25 @@ void init_ept_mem_ops(struct memory_ops *mem_ops, uint16_t vm_id) { struct acrn_vm *vm = get_vm_from_vmid(vm_id); - if (is_sos_vm(vm)) { - ept_pages_info[vm_id].ept.top_address_space = EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE); - ept_pages_info[vm_id].ept.nworld_pml4_base = sos_vm_pml4_pages[0U]; - ept_pages_info[vm_id].ept.nworld_pdpt_base = sos_vm_pdpt_pages[0U]; - ept_pages_info[vm_id].ept.nworld_pd_base = sos_vm_pd_pages[0U]; -#ifndef CONFIG_LAST_LEVEL_EPT_AT_BOOT - ept_pages_info[vm_id].ept.nworld_pt_base = sos_vm_pt_pages[0U]; -#endif - } else if (is_prelaunched_vm(vm)) { - ept_pages_info[vm_id].ept.top_address_space = PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE); - ept_pages_info[vm_id].ept.nworld_pml4_base = pre_uos_nworld_pml4_pages[vm_id]; - ept_pages_info[vm_id].ept.nworld_pdpt_base = pre_uos_nworld_pdpt_pages[vm_id]; - ept_pages_info[vm_id].ept.nworld_pd_base = pre_uos_nworld_pd_pages[vm_id]; -#ifndef CONFIG_LAST_LEVEL_EPT_AT_BOOT - ept_pages_info[vm_id].ept.nworld_pt_base = pre_uos_nworld_pt_pages[vm_id]; -#endif - } else { + ept_page_pool[vm_id].start_page = ept_pages[vm_id]; + ept_page_pool[vm_id].bitmap_size = EPT_PAGE_NUM / 64; + ept_page_pool[vm_id].bitmap = ept_page_bitmap[vm_id]; + ept_page_pool[vm_id].dummy_page = &ept_dummy_pages[vm_id]; + + spinlock_init(&ept_page_pool[vm_id].lock); + memset((void *)ept_page_pool[vm_id].bitmap, 0, ept_page_pool[vm_id].bitmap_size * sizeof(uint64_t)); + ept_page_pool[vm_id].last_hint_id = 0UL; + + if (is_postlaunched_vm(vm)) { uint16_t sos_vm_id = (get_sos_vm())->vm_id; uint16_t page_idx = vmid_2_rel_vmid(sos_vm_id, vm_id) - 1U; - ept_pages_info[vm_id].ept.top_address_space = EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE); - ept_pages_info[vm_id].ept.nworld_pml4_base = post_uos_nworld_pml4_pages[page_idx]; - ept_pages_info[vm_id].ept.nworld_pdpt_base = post_uos_nworld_pdpt_pages[page_idx]; - ept_pages_info[vm_id].ept.nworld_pd_base = post_uos_nworld_pd_pages[page_idx]; -#ifndef CONFIG_LAST_LEVEL_EPT_AT_BOOT - ept_pages_info[vm_id].ept.nworld_pt_base = post_uos_nworld_pt_pages[page_idx]; -#endif - ept_pages_info[vm_id].ept.sworld_pgtable_base = post_uos_sworld_pgtable_pages[page_idx]; vm->arch_vm.sworld_memory_base_hva = post_uos_sworld_memory[page_idx]; } - mem_ops->info = &ept_pages_info[vm_id]; + + mem_ops->pool = &ept_page_pool[vm_id]; mem_ops->get_default_access_right = ept_get_default_access_right; mem_ops->pgentry_present = ept_pgentry_present; - mem_ops->get_pml4_page = ept_get_pml4_page; - mem_ops->get_pdpt_page = ept_get_pdpt_page; - mem_ops->get_pd_page = ept_get_pd_page; - mem_ops->get_pt_page = ept_get_pt_page; mem_ops->clflush_pagewalk = ept_clflush_pagewalk; mem_ops->large_page_support = large_page_support; diff --git a/hypervisor/arch/x86/pagetable.c b/hypervisor/arch/x86/pagetable.c index be5ffc894..a459ed112 100644 --- a/hypervisor/arch/x86/pagetable.c +++ b/hypervisor/arch/x86/pagetable.c @@ -19,7 +19,7 @@ * @pre: level could only IA32E_PDPT or IA32E_PD */ static void split_large_page(uint64_t *pte, enum _page_table_level level, - uint64_t vaddr, const struct memory_ops *mem_ops) + __unused uint64_t vaddr, const struct memory_ops *mem_ops) { uint64_t *pbase; uint64_t ref_paddr, paddr, paddrinc; @@ -30,7 +30,6 @@ static void split_large_page(uint64_t *pte, enum _page_table_level level, ref_paddr = (*pte) & PDPTE_PFN_MASK; paddrinc = PDE_SIZE; ref_prot = (*pte) & ~PDPTE_PFN_MASK; - pbase = (uint64_t *)mem_ops->get_pd_page(mem_ops->info, vaddr); break; default: /* IA32E_PD */ ref_paddr = (*pte) & PDE_PFN_MASK; @@ -38,10 +37,10 @@ static void split_large_page(uint64_t *pte, enum _page_table_level level, ref_prot = (*pte) & ~PDE_PFN_MASK; ref_prot &= ~PAGE_PSE; mem_ops->recover_exe_right(&ref_prot); - pbase = (uint64_t *)mem_ops->get_pt_page(mem_ops->info, vaddr); break; } + pbase = (uint64_t *)alloc_page(mem_ops->pool); dev_dbg(DBG_LEVEL_MMU, "%s, paddr: 0x%lx, pbase: 0x%lx\n", __func__, ref_paddr, pbase); paddr = ref_paddr; @@ -309,7 +308,7 @@ static void add_pde(const uint64_t *pdpte, uint64_t paddr_start, uint64_t vaddr_ } break; /* done */ } else { - void *pt_page = mem_ops->get_pt_page(mem_ops->info, vaddr); + void *pt_page = alloc_page(mem_ops->pool); construct_pgentry(pde, pt_page, mem_ops->get_default_access_right(), mem_ops); } } @@ -357,7 +356,7 @@ static void add_pdpte(const uint64_t *pml4e, uint64_t paddr_start, uint64_t vadd } break; /* done */ } else { - void *pd_page = mem_ops->get_pd_page(mem_ops->info, vaddr); + void *pd_page = alloc_page(mem_ops->pool); construct_pgentry(pdpte, pd_page, mem_ops->get_default_access_right(), mem_ops); } } @@ -394,7 +393,7 @@ void mmu_add(uint64_t *pml4_page, uint64_t paddr_base, uint64_t vaddr_base, uint vaddr_next = (vaddr & PML4E_MASK) + PML4E_SIZE; pml4e = pml4e_offset(pml4_page, vaddr); if (mem_ops->pgentry_present(*pml4e) == 0UL) { - void *pdpt_page = mem_ops->get_pdpt_page(mem_ops->info, vaddr); + void *pdpt_page = alloc_page(mem_ops->pool); construct_pgentry(pml4e, pdpt_page, mem_ops->get_default_access_right(), mem_ops); } add_pdpte(pml4e, paddr, vaddr, vaddr_end, prot, mem_ops); diff --git a/hypervisor/common/hypercall.c b/hypervisor/common/hypercall.c index 5c4e7dfa1..13e818165 100644 --- a/hypervisor/common/hypercall.c +++ b/hypervisor/common/hypercall.c @@ -601,11 +601,9 @@ static int32_t set_vm_memory_region(struct acrn_vm *vm, if (region->type == MR_ADD) { /* if the GPA range is SOS valid GPA or not */ if (ept_is_valid_mr(vm, region->sos_vm_gpa, region->size)) { - /* if pagetable pages is reserved enougn for the GPA range */ - if (ept_is_mr_valid(target_vm, region->gpa, region->size)) { - add_vm_memory_region(vm, target_vm, region, pml4_page); - ret = 0; - } + /* FIXME: how to filter the alias mapping ? */ + add_vm_memory_region(vm, target_vm, region, pml4_page); + ret = 0; } } else { if (ept_is_valid_mr(target_vm, region->gpa, region->size)) { @@ -616,10 +614,9 @@ static int32_t set_vm_memory_region(struct acrn_vm *vm, } dev_dbg((ret == 0) ? DBG_LEVEL_HYCALL : LOG_ERROR, - "[vm%d] type=%d gpa=0x%x sos_gpa=0x%x sz=0x%x, top_addr:0x%lx", + "[vm%d] type=%d gpa=0x%x sos_gpa=0x%x sz=0x%x", target_vm->vm_id, region->type, region->gpa, - region->sos_vm_gpa, region->size, - target_vm->arch_vm.ept_mem_ops.info->ept.top_address_space); + region->sos_vm_gpa, region->size); return ret; } diff --git a/hypervisor/dm/vpci/vdev.c b/hypervisor/dm/vpci/vdev.c index 99927e381..03b223ca0 100644 --- a/hypervisor/dm/vpci/vdev.c +++ b/hypervisor/dm/vpci/vdev.c @@ -135,11 +135,7 @@ static void pci_vdev_update_vbar_base(struct pci_vdev *vdev, uint32_t idx) base &= 0xffffUL; } - if (is_pci_mem_bar(vbar) && (base != 0UL) && !ept_is_mr_valid(vpci2vm(vdev->vpci), base, vdev->vbars[idx].size)) { - pr_warn("%s, %x:%x.%x set invalid bar[%d] base: 0x%lx, size: 0x%lx\n", __func__, - vdev->bdf.bits.b, vdev->bdf.bits.d, vdev->bdf.bits.f, idx, base, vdev->vbars[idx].size); - base = 0UL; /* 0UL means invalid GPA, so that EPT won't map */ - } + /* TODO: 1. check whether the address locate in the MMIO windows 2. base must aligned with size */ vdev->vbars[idx].base_gpa = base; } diff --git a/hypervisor/include/arch/x86/guest/ept.h b/hypervisor/include/arch/x86/guest/ept.h index 574a91632..ac2504b04 100644 --- a/hypervisor/include/arch/x86/guest/ept.h +++ b/hypervisor/include/arch/x86/guest/ept.h @@ -18,18 +18,6 @@ typedef void (*pge_handler)(uint64_t *pgentry, uint64_t size); #define INVALID_HPA (0x1UL << 52U) #define INVALID_GPA (0x1UL << 52U) /* External Interfaces */ -/** - * @brief Check whether pagetable pages is reserved enough for the GPA range or not - * - * @param[in] vm the pointer that points to VM data structure - * @param[in] base The specified start guest physical address of guest - * physical memory region - * @param[in] size The size of guest physical memory region - * - * @retval true if pagetable pages is reserved enough for the GPA range, false otherwise. - */ -bool ept_is_mr_valid(const struct acrn_vm *vm, uint64_t base, uint64_t size); - /** * @brief Check if the GPA range is guest valid GPA or not * @@ -173,4 +161,13 @@ void walk_ept_table(struct acrn_vm *vm, pge_handler cb); */ int32_t ept_misconfig_vmexit_handler(__unused struct acrn_vcpu *vcpu); +/** + * @brief allocate a page from the VM's EPT pagetable page pool + * + * @param[in] vm the pointer that points to VM data structure + * + * @retval a page pointer if there's available used pages in the VM's EPT + * pagetable page pool, null otherwise. + */ +struct page *alloc_ept_page(struct acrn_vm *vm); #endif /* EPT_H */ diff --git a/hypervisor/include/arch/x86/page.h b/hypervisor/include/arch/x86/page.h index d9e9e43e6..cfd7e42db 100644 --- a/hypervisor/include/arch/x86/page.h +++ b/hypervisor/include/arch/x86/page.h @@ -7,12 +7,15 @@ #ifndef PAGE_H #define PAGE_H +#include #include #define PAGE_SHIFT 12U #define PAGE_SIZE (1U << PAGE_SHIFT) #define PAGE_MASK 0xFFFFFFFFFFFFF000UL +#define MAXIMUM_PA_WIDTH 39U /* maximum physical-address width */ + /* size of the low MMIO address space: 2GB */ #define PLATFORM_LO_MMIO_SIZE 0x80000000UL @@ -24,32 +27,6 @@ #define PD_PAGE_NUM(size) (((size) + PDPTE_SIZE - 1UL) >> PDPTE_SHIFT) #define PT_PAGE_NUM(size) (((size) + PDE_SIZE - 1UL) >> PDE_SHIFT) -/* - * The size of the guest physical address space, covered by the EPT page table of a VM. - * With the assumptions: - * - The GPA of DRAM & MMIO are contiguous. - * - Guest OS won't re-program device MMIO bars to the address not covered by - * this EPT_ADDRESS_SPACE. - */ -#define EPT_ADDRESS_SPACE(size) (((size) > MEM_2G) ? \ - ((size) + PLATFORM_LO_MMIO_SIZE + PLATFORM_HI_MMIO_SIZE) \ - : (MEM_2G + PLATFORM_LO_MMIO_SIZE + PLATFORM_HI_MMIO_SIZE)) - -#define PTDEV_HI_MMIO_START ((CONFIG_UOS_RAM_SIZE > MEM_2G) ? \ - (CONFIG_UOS_RAM_SIZE + PLATFORM_LO_MMIO_SIZE) : (MEM_2G + PLATFORM_LO_MMIO_SIZE)) - -#define PRE_VM_EPT_ADDRESS_SPACE(size) (PTDEV_HI_MMIO_START + HI_MMIO_SIZE) - -#define TOTAL_EPT_4K_PAGES_SIZE (PRE_VM_NUM*(PT_PAGE_NUM(PRE_VM_EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))*MEM_4K)) + \ - (SOS_VM_NUM*(PT_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE))*MEM_4K)) + \ - (MAX_POST_VM_NUM*(PT_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_UOS_RAM_SIZE))*MEM_4K)) - -#define TRUSTY_PML4_PAGE_NUM(size) (1UL) -#define TRUSTY_PDPT_PAGE_NUM(size) (1UL) -#define TRUSTY_PD_PAGE_NUM(size) (PD_PAGE_NUM(size)) -#define TRUSTY_PT_PAGE_NUM(size) (PT_PAGE_NUM(size)) -#define TRUSTY_PGTABLE_PAGE_NUM(size) \ -(TRUSTY_PML4_PAGE_NUM(size) + TRUSTY_PDPT_PAGE_NUM(size) + TRUSTY_PD_PAGE_NUM(size) + TRUSTY_PT_PAGE_NUM(size)) /** * @brief Page tables level in IA32 paging mode @@ -79,32 +56,21 @@ struct page { uint8_t contents[PAGE_SIZE]; } __aligned(PAGE_SIZE); -union pgtable_pages_info { - struct { - struct page *pml4_base; - struct page *pdpt_base; - struct page *pd_base; - struct page *pt_base; - } ppt; - struct { - uint64_t top_address_space; - struct page *nworld_pml4_base; - struct page *nworld_pdpt_base; - struct page *nworld_pd_base; - struct page *nworld_pt_base; - struct page *sworld_pgtable_base; - } ept; +struct page_pool { + struct page *start_page; + spinlock_t lock; + uint64_t bitmap_size; + uint64_t *bitmap; + uint64_t last_hint_id; + + struct page *dummy_page; }; struct memory_ops { - union pgtable_pages_info *info; + struct page_pool *pool; bool (*large_page_support)(enum _page_table_level level); uint64_t (*get_default_access_right)(void); uint64_t (*pgentry_present)(uint64_t pte); - struct page *(*get_pml4_page)(const union pgtable_pages_info *info); - struct page *(*get_pdpt_page)(const union pgtable_pages_info *info, uint64_t gpa); - struct page *(*get_pd_page)(const union pgtable_pages_info *info, uint64_t gpa); - struct page *(*get_pt_page)(const union pgtable_pages_info *info, uint64_t gpa); void (*clflush_pagewalk)(const void *p); void (*tweak_exe_right)(uint64_t *entry); void (*recover_exe_right)(uint64_t *entry); @@ -112,9 +78,7 @@ struct memory_ops { extern const struct memory_ops ppt_mem_ops; void init_ept_mem_ops(struct memory_ops *mem_ops, uint16_t vm_id); +struct page *alloc_page(struct page_pool *pool); void *get_reserve_sworld_memory_base(void); - -#ifdef CONFIG_LAST_LEVEL_EPT_AT_BOOT void reserve_buffer_for_ept_pages(void); -#endif #endif /* PAGE_H */