diff --git a/hypervisor/arch/x86/mmu.c b/hypervisor/arch/x86/mmu.c index 41ac617fc..cec807bae 100644 --- a/hypervisor/arch/x86/mmu.c +++ b/hypervisor/arch/x86/mmu.c @@ -199,300 +199,6 @@ bool check_mmu_1gb_support(enum _page_table_type page_table_type) return status; } -static inline uint32_t -check_page_table_present(enum _page_table_type page_table_type, - uint64_t table_entry_arg) -{ - uint64_t table_entry = table_entry_arg; - if (page_table_type == PTT_EPT) { - table_entry &= (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT | - IA32E_EPT_X_BIT); - /* RWX misconfiguration for: - * - write-only - * - write-execute - * - execute-only (if cap not support) - * no check for reserved bits - */ - if ((table_entry == IA32E_EPT_W_BIT) || - (table_entry == (IA32E_EPT_W_BIT | IA32E_EPT_X_BIT)) || - ((table_entry == IA32E_EPT_X_BIT) && - !cpu_has_vmx_ept_cap(VMX_EPT_EXECUTE_ONLY))) { - return PT_MISCFG_PRESENT; - } - } else { - table_entry &= (IA32E_COMM_P_BIT); - } - - return (table_entry != 0U) ? PT_PRESENT : PT_NOT_PRESENT; -} - -static uint32_t map_mem_region(void *vaddr, void *paddr, - void *table_base, uint64_t attr_arg, uint32_t table_level, - enum _page_table_type table_type) -{ - uint64_t table_entry; - uint64_t attr = attr_arg; - uint32_t table_offset; - uint32_t mapped_size; - - if ((table_base == NULL) || (table_level >= IA32E_UNKNOWN)) { - /* Shouldn't go here */ - ASSERT(false, "Incorrect Arguments. Failed to map region"); - return 0; - } - - /* switch based on of table */ - switch (table_level) { - case IA32E_PDPT: - - /* Get offset to the entry in the PDPT for this address */ - table_offset = IA32E_PDPTE_INDEX_CALC(vaddr); - - /* PS bit must be set for these entries to be mapped */ - attr |= IA32E_PDPTE_PS_BIT; - - /* Set mapped size to 1 GB */ - mapped_size = MEM_1G; - - break; - - case IA32E_PD: - - /* Get offset to the entry in the PD for this address */ - table_offset = IA32E_PDE_INDEX_CALC(vaddr); - - /* PS bit must be set for these entries to be mapped */ - attr |= IA32E_PDE_PS_BIT; - - /* Set mapped size to 2 MB */ - mapped_size = MEM_2M; - - break; - - case IA32E_PT: - - /* Get offset to the entry in the PT for this address */ - table_offset = IA32E_PTE_INDEX_CALC(vaddr); - - /* NOTE: No PS bit in page table entries */ - - /* Set mapped size to 4 KB */ - mapped_size = MEM_4K; - - /* If not a EPT entry, see if the PAT bit is set for PDPT entry - */ - if ((table_type == PTT_HOST) && ((attr & IA32E_PDPTE_PAT_BIT) != 0U)) { - /* The PAT bit is set; Clear it and set the page table - * PAT bit instead - */ - attr &= (uint64_t) (~((uint64_t) IA32E_PDPTE_PAT_BIT)); - attr |= IA32E_PTE_PAT_BIT; - } - - break; - - case IA32E_PML4: - default: - - /* Set mapping size to 0 - can't map memory in PML4 */ - mapped_size = 0U; - - break; - } - - /* Check to see if mapping should occur */ - if (mapped_size != 0U) { - /* Get current table entry */ - uint64_t entry = mem_read64(table_base + table_offset); - bool prev_entry_present = false; - - switch(check_page_table_present(table_type, entry)) { - case PT_PRESENT: - prev_entry_present = true; - break; - case PT_NOT_PRESENT: - prev_entry_present = false; - break; - case PT_MISCFG_PRESENT: - default: - ASSERT(false, "entry misconfigurated present bits"); - return 0; - } - - /* No need to confirm current table entry - * isn't already present - * support map-->remap - */ - table_entry = ((table_type == PTT_EPT) - ? attr - : (attr | IA32E_COMM_P_BIT)); - - table_entry |= (uint64_t)paddr; - - /* Write the table entry to map this memory */ - mem_write64(table_base + table_offset, table_entry); - - /* Invalidate TLB and page-structure cache, - * if it is the first mapping no need to invalidate TLB - */ - if ((table_type == PTT_HOST) && prev_entry_present) { - /* currently, all native mmu update is done at BSP, - * the assumption is that after AP start, there - * is no mmu update - so we can avoid shootdown issue - * for MP system. - * For invlpg after AP start, just panic here. - * - * TODO: add shootdown APs operation if MMU will be - * modified after AP start in the future. - */ - if ((phys_cpu_num != 0U) && - ((pcpu_active_bitmap & - ((1UL << phys_cpu_num) - 1)) - != (1UL << BOOT_CPU_ID))) { - panic("need shootdown for invlpg"); - } - inv_tlb_one_page(vaddr); - } - } - - /* Return mapped size to caller */ - return mapped_size; -} - -static uint32_t fetch_page_table_offset(void *addr, uint32_t table_level) -{ - uint32_t table_offset; - - /* Switch based on level of table */ - switch (table_level) { - case IA32E_PML4: - - /* Get offset to the entry in the PML4 - * for this address - */ - table_offset = IA32E_PML4E_INDEX_CALC(addr); - break; - - case IA32E_PDPT: - - /* Get offset to the entry in the PDPT - * for this address - */ - table_offset = IA32E_PDPTE_INDEX_CALC(addr); - break; - - case IA32E_PD: - - /* Get offset to the entry in the PD - * for this address - */ - table_offset = IA32E_PDE_INDEX_CALC(addr); - break; - - case IA32E_PT: - table_offset = IA32E_PTE_INDEX_CALC(addr); - break; - - default: - /* all callers should already make sure it will not come - * to here - */ - panic("Wrong page table level"); - break; - } - - return table_offset; -} - -static int get_table_entry(void *addr, void *table_base, - uint32_t table_level, uint64_t *table_entry) -{ - uint32_t table_offset; - - if ((table_base == NULL) || (table_level >= IA32E_UNKNOWN)) { - ASSERT(false, "Incorrect Arguments"); - return -EINVAL; - } - - table_offset = fetch_page_table_offset(addr, table_level); - - /* Read the table entry */ - *table_entry = mem_read64(table_base + table_offset); - - return 0; -} - -static void *walk_paging_struct(void *addr, void *table_base, - uint32_t table_level, struct mem_map_params *map_params, - uint64_t attr) -{ - uint32_t table_offset; - uint64_t table_entry; - uint64_t entry_present; - /* if table_level == IA32E_PT Just return the same address - * can't walk down any further - */ - void *sub_table_addr = (table_level == IA32E_PT) ? table_base : NULL; - - if ((table_base == NULL) || (table_level >= IA32E_UNKNOWN) - || (map_params == NULL)) { - ASSERT(false, "Incorrect Arguments"); - return NULL; - } - - table_offset = fetch_page_table_offset(addr, table_level); - - /* See if we can skip the rest */ - if (sub_table_addr != table_base) { - /* Read the table entry */ - table_entry = mem_read64(table_base + table_offset); - - /* Check if EPT entry being created */ - if (map_params->page_table_type == PTT_EPT) { - /* Set table present bits to any of the - * read/write/execute bits - */ - entry_present = (IA32E_EPT_R_BIT | IA32E_EPT_W_BIT | - IA32E_EPT_X_BIT); - } else { - /* Set table preset bits to P bit or r/w bit */ - entry_present = IA32E_COMM_P_BIT; - } - - /* Determine if a valid entry exists */ - if ((table_entry & entry_present) == 0UL) { - /* No entry present - need to allocate a new table */ - sub_table_addr = alloc_paging_struct(); - /* Check to ensure memory available for this structure*/ - if (sub_table_addr == NULL) { - /* Error: Unable to find table memory necessary - * to map memory - */ - ASSERT(false, "Fail to alloc table memory " - "for map memory"); - - return NULL; - } - - /* Write entry to current table to reference the new - * sub-table - */ - if (map_params->page_table_type == PTT_HOST) { - entry_present |= attr; - } - - mem_write64(table_base + table_offset, - HVA2HPA(sub_table_addr) | entry_present); - } else { - /* Get address of the sub-table */ - sub_table_addr = HPA2HVA(table_entry & IA32E_REF_MASK); - } - } - - /* Return the next table in the walk */ - return sub_table_addr; -} - uint64_t get_paging_pml4(void) { /* Return address to caller */ @@ -606,391 +312,3 @@ bool check_continuous_hpa(struct vm *vm, uint64_t gpa_arg, uint64_t size_arg) return true; } - -int obtain_last_page_table_entry(struct mem_map_params *map_params, - struct entry_params *entry, void *addr, bool direct) -{ - uint64_t table_entry; - uint32_t entry_present = 0U; - int ret = 0; - /* Obtain the PML4 address */ - void *table_addr = direct ? (map_params->pml4_base) - : (map_params->pml4_inverted); - - /* Obtain page table entry from PML4 table*/ - ret = get_table_entry(addr, table_addr, IA32E_PML4, &table_entry); - if (ret < 0) { - return ret; - } - entry_present = check_page_table_present(map_params->page_table_type, - table_entry); - if (entry_present == PT_MISCFG_PRESENT) { - pr_err("Present bits misconfigurated"); - return -EINVAL; - } else if (entry_present == PT_NOT_PRESENT) { - /* PML4E not present, return PML4 base address */ - entry->entry_level = IA32E_PML4; - entry->entry_base = table_addr; - entry->entry_present = PT_NOT_PRESENT; - entry->page_size = - check_mmu_1gb_support(map_params->page_table_type) ? - (PAGE_SIZE_1G) : (PAGE_SIZE_2M); - entry->entry_off = fetch_page_table_offset(addr, IA32E_PML4); - entry->entry_val = table_entry; - return 0; - } - - /* Obtain page table entry from PDPT table*/ - table_addr = HPA2HVA(table_entry & IA32E_REF_MASK); - ret = get_table_entry(addr, table_addr, IA32E_PDPT, &table_entry); - if (ret < 0) { - return ret; - } - entry_present = check_page_table_present(map_params->page_table_type, - table_entry); - if (entry_present == PT_MISCFG_PRESENT) { - pr_err("Present bits misconfigurated"); - return -EINVAL; - } else if (entry_present == PT_NOT_PRESENT) { - /* PDPTE not present, return PDPT base address */ - entry->entry_level = IA32E_PDPT; - entry->entry_base = table_addr; - entry->entry_present = PT_NOT_PRESENT; - entry->page_size = - check_mmu_1gb_support(map_params->page_table_type) ? - (PAGE_SIZE_1G) : (PAGE_SIZE_2M); - entry->entry_off = fetch_page_table_offset(addr, IA32E_PDPT); - entry->entry_val = table_entry; - return 0; - } - if ((table_entry & IA32E_PDPTE_PS_BIT) != 0U) { - /* 1GB page size, return the base addr of the pg entry*/ - entry->entry_level = IA32E_PDPT; - entry->entry_base = table_addr; - entry->page_size = - check_mmu_1gb_support(map_params->page_table_type) ? - (PAGE_SIZE_1G) : (PAGE_SIZE_2M); - entry->entry_present = PT_PRESENT; - entry->entry_off = fetch_page_table_offset(addr, IA32E_PDPT); - entry->entry_val = table_entry; - return 0; - } - - /* Obtain page table entry from PD table*/ - table_addr = HPA2HVA(table_entry & IA32E_REF_MASK); - ret = get_table_entry(addr, table_addr, IA32E_PD, &table_entry); - if (ret < 0) { - return ret; - } - entry_present = check_page_table_present(map_params->page_table_type, - table_entry); - if (entry_present == PT_MISCFG_PRESENT) { - pr_err("Present bits misconfigurated"); - return -EINVAL; - } else if (entry_present == PT_NOT_PRESENT) { - /* PDE not present, return PDE base address */ - entry->entry_level = IA32E_PD; - entry->entry_base = table_addr; - entry->entry_present = PT_NOT_PRESENT; - entry->page_size = PAGE_SIZE_2M; - entry->entry_off = fetch_page_table_offset(addr, IA32E_PD); - entry->entry_val = table_entry; - return 0; - } - if ((table_entry & IA32E_PDE_PS_BIT) != 0U) { - /* 2MB page size, return the base addr of the pg entry*/ - entry->entry_level = IA32E_PD; - entry->entry_base = table_addr; - entry->entry_present = PT_PRESENT; - entry->page_size = PAGE_SIZE_2M; - entry->entry_off = fetch_page_table_offset(addr, IA32E_PD); - entry->entry_val = table_entry; - return 0; - } - - /* Obtain page table entry from PT table*/ - table_addr = HPA2HVA(table_entry & IA32E_REF_MASK); - ret = get_table_entry(addr, table_addr, IA32E_PT, &table_entry); - if (ret < 0) { - return ret; - } - entry_present = check_page_table_present(map_params->page_table_type, - table_entry); - if (entry_present == PT_MISCFG_PRESENT) { - pr_err("Present bits misconfigurated"); - return -EINVAL; - } - entry->entry_present = ((entry_present == PT_PRESENT) - ? (PT_PRESENT):(PT_NOT_PRESENT)); - entry->entry_level = IA32E_PT; - entry->entry_base = table_addr; - entry->page_size = PAGE_SIZE_4K; - entry->entry_off = fetch_page_table_offset(addr, IA32E_PT); - entry->entry_val = table_entry; - - return 0; -} - -static uint64_t update_page_table_entry(struct mem_map_params *map_params, - void *paddr, void *vaddr, uint64_t size, uint64_t attr, - bool direct) -{ - uint64_t remaining_size = size; - uint32_t adjustment_size; - int table_type = map_params->page_table_type; - /* Obtain the PML4 address */ - void *table_addr = direct ? (map_params->pml4_base) - : (map_params->pml4_inverted); - - /* Walk from the PML4 table to the PDPT table */ - table_addr = walk_paging_struct(vaddr, table_addr, IA32E_PML4, - map_params, attr); - if (table_addr == NULL) { - return 0; - } - - if ((remaining_size >= MEM_1G) - && (MEM_ALIGNED_CHECK(vaddr, MEM_1G)) - && (MEM_ALIGNED_CHECK(paddr, MEM_1G)) - && check_mmu_1gb_support(map_params->page_table_type)) { - /* Map this 1 GByte memory region */ - adjustment_size = map_mem_region(vaddr, paddr, - table_addr, attr, IA32E_PDPT, - table_type); - } else if ((remaining_size >= MEM_2M) - && (MEM_ALIGNED_CHECK(vaddr, MEM_2M)) - && (MEM_ALIGNED_CHECK(paddr, MEM_2M))) { - /* Walk from the PDPT table to the PD table */ - table_addr = walk_paging_struct(vaddr, table_addr, - IA32E_PDPT, map_params, attr); - if (table_addr == NULL) { - return 0; - } - /* Map this 2 MByte memory region */ - adjustment_size = map_mem_region(vaddr, paddr, - table_addr, attr, IA32E_PD, table_type); - } else { - /* Walk from the PDPT table to the PD table */ - table_addr = walk_paging_struct(vaddr, - table_addr, IA32E_PDPT, map_params, attr); - if (table_addr == NULL) { - return 0; - } - /* Walk from the PD table to the page table */ - table_addr = walk_paging_struct(vaddr, - table_addr, IA32E_PD, map_params, attr); - if (table_addr == NULL) { - return 0; - } - /* Map this 4 KByte memory region */ - adjustment_size = map_mem_region(vaddr, paddr, - table_addr, attr, IA32E_PT, - table_type); - } - - return adjustment_size; -} - -static uint64_t break_page_table(struct mem_map_params *map_params, void *paddr, - void *vaddr, uint64_t page_size, bool direct) -{ - uint32_t i = 0U; - uint64_t pa; - uint64_t attr = 0x0UL; - uint64_t next_page_size = 0x0UL; - void *sub_tab_addr = NULL; - struct entry_params entry; - - switch (page_size) { - /* Breaking 1GB page to 2MB page*/ - case PAGE_SIZE_1G: - next_page_size = PAGE_SIZE_2M; - attr |= IA32E_PDE_PS_BIT; - pr_info("%s, Breaking 1GB -->2MB vaddr=0x%llx", - __func__, vaddr); - break; - - /* Breaking 2MB page to 4KB page*/ - case PAGE_SIZE_2M: - next_page_size = PAGE_SIZE_4K; - pr_info("%s, Breaking 2MB -->4KB vaddr=0x%llx", - __func__, vaddr); - break; - - /* 4KB page, No action*/ - case PAGE_SIZE_4K: - default: - next_page_size = PAGE_SIZE_4K; - pr_info("%s, Breaking 4KB no action vaddr=0x%llx", - __func__, vaddr); - break; - } - - if (page_size != next_page_size) { - if (obtain_last_page_table_entry(map_params, &entry, vaddr, - direct) < 0) { - pr_err("Fail to obtain last page table entry"); - return 0; - } - - /* New entry present - need to allocate a new table */ - sub_tab_addr = alloc_paging_struct(); - /* Check to ensure memory available for this structure */ - if (sub_tab_addr == NULL) { - /* Error: - * Unable to find table memory necessary to map memory - */ - pr_err("Fail to find table memory for map memory"); - ASSERT(false, "fail to alloc table memory for map memory"); - return 0; - } - - /* the physical address maybe be not aligned of - * current page size, obtain the starting physical address - * aligned of current page size - */ - pa = ((uint64_t)paddr) & ~(page_size - 1); - if (map_params->page_table_type == PTT_EPT) { - /* Keep original attribute(here &0x3f) - * bit 0(R) bit1(W) bit2(X) bit3~5 MT - */ - attr |= (entry.entry_val & 0x3fUL); - } else { - /* Keep original attribute(here &0x7f) */ - attr |= (entry.entry_val & 0x7fUL); - } - /* write all entries and keep original attr*/ - for (i = 0U; i < IA32E_NUM_ENTRIES; i++) { - mem_write64(sub_tab_addr + (i * IA32E_COMM_ENTRY_SIZE), - (attr | (pa + (i * next_page_size)))); - } - if (map_params->page_table_type == PTT_EPT) { - /* Write the table entry to map this memory, - * SDM chapter28 figure 28-1 - * bit 0(R) bit1(W) bit2(X) bit3~5 MUST be reserved - * (here &0x07) - */ - mem_write64(entry.entry_base + entry.entry_off, - (entry.entry_val & 0x07UL) | - HVA2HPA(sub_tab_addr)); - } else { - /* Write the table entry to map this memory, - * SDM chapter4 figure 4-11 - * bit0(P) bit1(RW) bit2(U/S) bit3(PWT) bit4(PCD) - * bit5(A) bit6(D or Ignore) - */ - mem_write64(entry.entry_base + entry.entry_off, - (entry.entry_val & 0x7fUL) | - HVA2HPA(sub_tab_addr)); - } - } - - return next_page_size; -} - -static int modify_paging(struct mem_map_params *map_params, void *paddr_arg, - void *vaddr_arg, uint64_t size, uint32_t flags, bool direct) -{ - void *vaddr = vaddr_arg; - void *paddr = paddr_arg; - int64_t remaining_size; - uint64_t adjust_size; - uint64_t attr = flags; - struct entry_params entry; - uint64_t page_size; - uint64_t vaddr_end = ((uint64_t)vaddr) + size; - - /* if the address is not PAGE aligned, will drop - * the unaligned part - */ - paddr = (void *)ROUND_PAGE_UP((uint64_t)paddr); - vaddr = (void *)ROUND_PAGE_UP((uint64_t)vaddr); - vaddr_end = ROUND_PAGE_DOWN(vaddr_end); - remaining_size = vaddr_end - (uint64_t)vaddr; - - if (map_params == NULL) { - pr_err("%s: vaddr=0x%llx size=0x%llx", - __func__, vaddr, size); - ASSERT(false, "Incorrect Arguments"); - return -EINVAL; - } - - /* Check ept misconfigurations, - * rwx misconfiguration in the following conditions: - * - write-only - * - write-execute - * - execute-only(if capability not support) - * here attr & 0x7, rwx bit0:2 - */ - ASSERT(!((map_params->page_table_type == PTT_EPT) && - (((attr & 0x7UL) == IA32E_EPT_W_BIT) || - ((attr & 0x7UL) == (IA32E_EPT_W_BIT | IA32E_EPT_X_BIT)) || - (((attr & 0x7UL) == IA32E_EPT_X_BIT) && - !cpu_has_vmx_ept_cap(VMX_EPT_EXECUTE_ONLY)))), - "incorrect memory attribute set!\n"); - /* Loop until the entire block of memory is appropriately - * MAP/UNMAP/MODIFY - */ - while (remaining_size > 0) { - if (obtain_last_page_table_entry(map_params, &entry, vaddr, - direct) < 0) { - return -EINVAL; - } - - page_size = entry.page_size; - - /* if the address is NOT aligned of current page size, - * or required memory size < page size - * need to break page firstly - */ - if (entry.entry_present == PT_PRESENT) { - /* Maybe need to recursive breaking in this case - * e.g. 1GB->2MB->4KB - */ - while (((uint64_t)remaining_size < page_size) - || (!MEM_ALIGNED_CHECK(vaddr, page_size)) - || (!MEM_ALIGNED_CHECK(paddr, page_size))) { - /* The breaking function return the page size - * of next level page table - */ - page_size = break_page_table(map_params, - paddr, vaddr, page_size, direct); - if (page_size == 0UL) { - return -EINVAL; - } - } - } else { - page_size = ((uint64_t)remaining_size < page_size) - ? ((uint64_t)remaining_size) : (page_size); - } - /* The function return the memory size that one entry can map */ - adjust_size = update_page_table_entry(map_params, paddr, vaddr, - page_size, attr, direct); - if (adjust_size == 0UL) { - return -EINVAL; - } - vaddr += adjust_size; - paddr += adjust_size; - remaining_size -= adjust_size; - } - - return 0; -} - -int map_mem(struct mem_map_params *map_params, void *paddr, void *vaddr, - uint64_t size, uint32_t flags) -{ - int ret = 0; - - /* used for MMU and EPT*/ - ret = modify_paging(map_params, paddr, vaddr, size, flags, true); - if (ret < 0) { - return ret; - } - /* only for EPT */ - if (map_params->page_table_type == PTT_EPT) { - ret = modify_paging(map_params, vaddr, paddr, size, flags, - false); - } - return ret; -} diff --git a/hypervisor/include/arch/x86/mmu.h b/hypervisor/include/arch/x86/mmu.h index cb633462e..84af7d2e4 100644 --- a/hypervisor/include/arch/x86/mmu.h +++ b/hypervisor/include/arch/x86/mmu.h @@ -220,23 +220,6 @@ enum _page_table_type { PAGETABLE_TYPE_UNKNOWN, }; -struct mem_map_params { - /* enum _page_table_type: HOST or EPT*/ - enum _page_table_type page_table_type; - /* used HVA->HPA for HOST, used GPA->HPA for EPT */ - void *pml4_base; - /* used HPA->HVA for HOST, used HPA->GPA for EPT */ - void *pml4_inverted; -}; -struct entry_params { - uint32_t entry_level; - uint32_t entry_present; - void *entry_base; - uint64_t entry_off; - uint64_t entry_val; - uint64_t page_size; -}; - /* Represent the 4 levels of translation tables in IA-32e paging mode */ enum _page_table_level { IA32E_PML4 = 0, @@ -310,8 +293,6 @@ void free_paging_struct(void *ptr); void enable_paging(uint64_t pml4_base_addr); void enable_smep(void); void init_paging(void); -int map_mem(struct mem_map_params *map_params, void *paddr, void *vaddr, - uint64_t size, uint32_t flags); int mmu_add(uint64_t *pml4_page, uint64_t paddr_base, uint64_t vaddr_base, uint64_t size, uint64_t prot, enum _page_table_type ptt); @@ -325,8 +306,6 @@ void flush_vpid_single(uint16_t vpid); void flush_vpid_global(void); void invept(struct vcpu *vcpu); bool check_continuous_hpa(struct vm *vm, uint64_t gpa_arg, uint64_t size_arg); -int obtain_last_page_table_entry(struct mem_map_params *map_params, - struct entry_params *entry, void *addr, bool direct); uint64_t *lookup_address(uint64_t *pml4_page, uint64_t addr, uint64_t *pg_size, enum _page_table_type ptt);