diff --git a/hypervisor/Makefile b/hypervisor/Makefile index 2167da77b..e8a1f624c 100644 --- a/hypervisor/Makefile +++ b/hypervisor/Makefile @@ -118,6 +118,7 @@ C_SRCS += arch/x86/cpu.c C_SRCS += arch/x86/softirq.c C_SRCS += arch/x86/cpuid.c C_SRCS += arch/x86/mmu.c +C_SRCS += arch/x86/pagetable.c C_SRCS += arch/x86/notify.c C_SRCS += arch/x86/vtd.c C_SRCS += arch/x86/gdt.c diff --git a/hypervisor/arch/x86/pagetable.c b/hypervisor/arch/x86/pagetable.c new file mode 100644 index 000000000..685953d96 --- /dev/null +++ b/hypervisor/arch/x86/pagetable.c @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +#include + +#define ACRN_DBG_MMU 6U + +/* + * Split a large page table into next level page table. + */ +static int split_large_page(uint64_t *pte, + enum _page_table_level level, + enum _page_table_type ptt) +{ + int ret = -EINVAL; + uint64_t *pbase; + uint64_t ref_paddr, paddr, paddrinc; + uint64_t i, ref_prot; + + switch (level) { + case IA32E_PDPT: + ref_paddr = (*pte) & PDPTE_PFN_MASK; + paddrinc = PDE_SIZE; + ref_prot = (*pte) & ~PDPTE_PFN_MASK; + break; + case IA32E_PD: + ref_paddr = (*pte) & PDE_PFN_MASK; + paddrinc = PTE_SIZE; + ref_prot = (*pte) & ~PDE_PFN_MASK; + ref_prot &= ~PAGE_PSE; + break; + default: + return ret; + } + + dev_dbg(ACRN_DBG_MMU, "%s, paddr: 0x%llx\n", __func__, ref_paddr); + + pbase = (uint64_t *)alloc_paging_struct(); + if (pbase == NULL) { + return -ENOMEM; + } + + paddr = ref_paddr; + for (i = 0UL; i < PTRS_PER_PTE; i++) { + set_pte(pbase + i, paddr | ref_prot); + paddr += paddrinc; + } + + ref_prot = (ptt == PTT_HOST) ? PAGE_TABLE : EPT_RWX; + set_pte(pte, HVA2HPA((void *)pbase) | ref_prot); + + /* TODO: flush the TLB */ + + return 0; +} + +/* + * In PT level, modify [vaddr_start, vaddr_end) MR PTA. + */ +static int modify_pte(uint64_t *pde, + uint64_t vaddr_start, uint64_t vaddr_end, + uint64_t prot_set, uint64_t prot_clr, + enum _page_table_type ptt) +{ + uint64_t *pd_page = pde_page_vaddr(*pde); + uint64_t vaddr = vaddr_start; + uint64_t index = pte_index(vaddr); + + dev_dbg(ACRN_DBG_MMU, "%s, vaddr: [0x%llx - 0x%llx]\n", + __func__, vaddr, vaddr_end); + for (; index < PTRS_PER_PTE; index++) { + uint64_t new_pte, *pte = pd_page + index; + uint64_t vaddr_next = (vaddr & PTE_MASK) + PTE_SIZE; + + if (pgentry_present(ptt, *pte) == 0UL) { + pr_err("%s, invalid op, pte not present\n", __func__); + return -EFAULT; + } + + new_pte = *pte; + new_pte &= ~prot_clr; + new_pte |= prot_set; + set_pte(pte, new_pte); + + if (vaddr_next >= vaddr_end) { + break; + } + vaddr = vaddr_next; + } + + return 0; +} + +/* + * In PD level, modify [vaddr_start, vaddr_end) MR PTA. + */ +static int modify_pde(uint64_t *pdpte, + uint64_t vaddr_start, uint64_t vaddr_end, + uint64_t prot_set, uint64_t prot_clr, + enum _page_table_type ptt) +{ + int ret = 0; + uint64_t *pdpt_page = pdpte_page_vaddr(*pdpte); + uint64_t vaddr = vaddr_start; + uint64_t index = pde_index(vaddr); + + dev_dbg(ACRN_DBG_MMU, "%s, vaddr: [0x%llx - 0x%llx]\n", + __func__, vaddr, vaddr_end); + for (; index < PTRS_PER_PDE; index++) { + uint64_t *pde = pdpt_page + index; + uint64_t vaddr_next = (vaddr & PDE_MASK) + PDE_SIZE; + + if (pgentry_present(ptt, *pde) == 0UL) { + pr_err("%s, invalid op, pde not present\n", __func__); + return -EFAULT; + } + if (pde_large(*pde) != 0UL) { + if (vaddr_next > vaddr_end) { + ret = split_large_page(pde, IA32E_PD, ptt); + if (ret != 0) { + return ret; + } + } else { + uint64_t new_pde = *pde; + new_pde &= ~prot_clr; + new_pde |= prot_set; + set_pte(pde, new_pde); + if (vaddr_next < vaddr_end) { + vaddr = vaddr_next; + continue; + } + return 0; + } + } + ret = modify_pte(pde, vaddr, vaddr_end, + prot_set, prot_clr, ptt); + if (ret != 0 || (vaddr_next >= vaddr_end)) { + return ret; + } + vaddr = vaddr_next; + } + + return ret; +} + +/* + * In PDPT level, modify [vaddr, vaddr_end) MR PTA. + */ +static int modify_pdpte(uint64_t *pml4e, + uint64_t vaddr_start, uint64_t vaddr_end, + uint64_t prot_set, uint64_t prot_clr, + enum _page_table_type ptt) +{ + int ret = 0; + uint64_t *pml4_page = pml4e_page_vaddr(*pml4e); + uint64_t vaddr = vaddr_start; + uint64_t index = pdpte_index(vaddr); + + dev_dbg(ACRN_DBG_MMU, "%s, vaddr: [0x%llx - 0x%llx]\n", + __func__, vaddr, vaddr_end); + for (; index < PTRS_PER_PDPTE; index++) { + uint64_t *pdpte = pml4_page + index; + uint64_t vaddr_next = (vaddr & PDPTE_MASK) + PDPTE_SIZE; + + if (pgentry_present(ptt, *pdpte) == 0UL) { + pr_err("%s, invalid op, pdpte not present\n", __func__); + return -EFAULT; + } + if (pdpte_large(*pdpte) != 0UL) { + if (vaddr_next > vaddr_end) { + ret = split_large_page(pdpte, IA32E_PDPT, ptt); + if (ret != 0) { + return ret; + } + } else { + uint64_t new_pdpte = *pdpte; + new_pdpte &= ~prot_clr; + new_pdpte |= prot_set; + set_pte(pdpte, new_pdpte); + if (vaddr_next < vaddr_end) { + vaddr = vaddr_next; + continue; + } + return 0; + } + } + ret = modify_pde(pdpte, vaddr, vaddr_end, + prot_set, prot_clr, ptt); + if (ret != 0 || (vaddr_next >= vaddr_end)) { + return ret; + } + vaddr = vaddr_next; + } + + return ret; +} + +/* + * modify [vaddr, vaddr + size ) memory region page table attributes. + * prot_clr - attributes want to be clear + * prot_set - attributes want to be set + * @pre: the prot_set and prot_clr should set before call this function. + * If you just want to modify access rights, you can just set the prot_clr + * to what you want to set, prot_clr to what you want to clear. But if you + * want to modify the MT, you should set the prot_set to what MT you want + * to set, prot_clr to the MT mask. + */ +int mmu_modify(uint64_t *pml4_page, + uint64_t vaddr_base, uint64_t size, + uint64_t prot_set, uint64_t prot_clr, + enum _page_table_type ptt) +{ + uint64_t vaddr = vaddr_base; + uint64_t vaddr_next, vaddr_end; + uint64_t *pml4e; + int ret; + + if (!MEM_ALIGNED_CHECK(vaddr, PAGE_SIZE_4K) || + !MEM_ALIGNED_CHECK(size, PAGE_SIZE_4K)) { + pr_err("%s, invalid parameters!\n", __func__); + return -EINVAL; + } + + dev_dbg(ACRN_DBG_MMU, "%s, vaddr: 0x%llx, size: 0x%llx\n", + __func__, vaddr, size); + vaddr_end = vaddr + size; + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + vaddr_next = (vaddr & PML4E_MASK) + PML4E_SIZE; + pml4e = pml4e_offset(pml4_page, vaddr); + if (pgentry_present(ptt, *pml4e) == 0UL) { + pr_err("%s, invalid op, pml4e not present\n", __func__); + return -EFAULT; + } + ret = modify_pdpte(pml4e, vaddr, vaddr_end, + prot_set, prot_clr, ptt); + if (ret != 0) { + return ret; + } + } + + return 0; +} diff --git a/hypervisor/include/arch/x86/hv_arch.h b/hypervisor/include/arch/x86/hv_arch.h index 81bece780..58bcf6c91 100644 --- a/hypervisor/include/arch/x86/hv_arch.h +++ b/hypervisor/include/arch/x86/hv_arch.h @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/hypervisor/include/arch/x86/mmu.h b/hypervisor/include/arch/x86/mmu.h index be80f6cc1..552153653 100644 --- a/hypervisor/include/arch/x86/mmu.h +++ b/hypervisor/include/arch/x86/mmu.h @@ -329,6 +329,10 @@ int modify_mem(struct map_params *map_params, void *paddr, void *vaddr, uint64_t size, uint32_t flags); int modify_mem_mt(struct map_params *map_params, void *paddr, void *vaddr, uint64_t size, uint32_t flags); +int mmu_modify(uint64_t *pml4_page, + uint64_t vaddr_base, uint64_t size, + uint64_t prot_set, uint64_t prot_clr, + enum _page_table_type ptt); int check_vmx_mmu_cap(void); uint16_t allocate_vpid(void); void flush_vpid_single(uint16_t vpid); diff --git a/hypervisor/include/arch/x86/pgtable.h b/hypervisor/include/arch/x86/pgtable.h new file mode 100644 index 000000000..ec174f50b --- /dev/null +++ b/hypervisor/include/arch/x86/pgtable.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef PGTABLE_H +#define PGTABLE_H + +#include + +/* hpa <--> hva, now it is 1:1 mapping */ +#define HPA2HVA(x) ((void *)(x)) +#define HVA2HPA(x) ((uint64_t)(x)) + +static inline uint64_t pml4e_index(uint64_t address) +{ + return (address >> PML4E_SHIFT) & (PTRS_PER_PML4E - 1UL); +} + +static inline uint64_t pdpte_index(uint64_t address) +{ + return (address >> PDPTE_SHIFT) & (PTRS_PER_PDPTE - 1UL); +} + +static inline uint64_t pde_index(uint64_t address) +{ + return (address >> PDE_SHIFT) & (PTRS_PER_PDE - 1UL); +} + +static inline uint64_t pte_index(uint64_t address) +{ + return (address >> PTE_SHIFT) & (PTRS_PER_PTE - 1UL); +} + +static inline uint64_t *pml4e_page_vaddr(uint64_t pml4e) +{ + return HPA2HVA(pml4e & PML4E_PFN_MASK); +} + +static inline uint64_t *pdpte_page_vaddr(uint64_t pdpte) +{ + return HPA2HVA(pdpte & PDPTE_PFN_MASK); +} + +static inline uint64_t *pde_page_vaddr(uint64_t pde) +{ + return HPA2HVA(pde & PDE_PFN_MASK); +} + +static inline uint64_t *pml4e_offset(uint64_t *pml4_page, uint64_t addr) +{ + return pml4_page + pml4e_index(addr); +} + +static inline uint64_t *pdpte_offset(uint64_t *pml4e, uint64_t addr) +{ + return pml4e_page_vaddr(*pml4e) + pdpte_index(addr); +} + +static inline uint64_t *pde_offset(uint64_t *pdpte, uint64_t addr) +{ + return pdpte_page_vaddr(*pdpte) + pde_index(addr); +} + +static inline uint64_t *pte_offset(uint64_t *pde, uint64_t addr) +{ + return pde_page_vaddr(*pde) + pte_index(addr); +} + +static inline uint64_t get_pte(uint64_t *pte) +{ + return *pte; +} + +static inline void set_pte(uint64_t *ptep, uint64_t pte) +{ + *ptep = pte; +} + +static inline uint64_t pde_large(uint64_t pde) +{ + return pde & PAGE_PSE; +} + +static inline uint64_t pdpte_large(uint64_t pdpte) +{ + return pdpte & PAGE_PSE; +} + +static inline uint64_t pgentry_present(enum _page_table_type ptt, uint64_t pte) +{ + return (ptt == PTT_HOST) ? (pte & PAGE_PRESENT) : (pte & EPT_RWX); +} + +#endif /* PGTABLE_H */ diff --git a/hypervisor/include/arch/x86/pgtable_types.h b/hypervisor/include/arch/x86/pgtable_types.h new file mode 100644 index 000000000..9ab18e981 --- /dev/null +++ b/hypervisor/include/arch/x86/pgtable_types.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2018 Intel Corporation. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef PGTABLE_TYPES_H +#define PGTABLE_TYPES_H + +#define PAGE_PRESENT (1UL << 0U) +#define PAGE_RW (1UL << 1U) +#define PAGE_USER (1UL << 2U) +#define PAGE_PWT (1UL << 3U) +#define PAGE_PCD (1UL << 4U) +#define PAGE_ACCESSED (1UL << 5U) +#define PAGE_DIRTY (1UL << 6U) +#define PAGE_PSE (1UL << 7U) +#define PAGE_GLOBAL (1UL << 8U) +#define PAGE_PAT_LARGE (1UL << 12U) +#define PAGE_NX (1UL << 63U) + +#define PAGE_CACHE_MASK (PAGE_PCD | PAGE_PWT) +#define PAGE_CACHE_WB 0UL +#define PAGE_CACHE_WT PAGE_PWT +#define PAGE_CACHE_UC_MINUS PAGE_PCD +#define PAGE_CACHE_UC (PAGE_PCD | PAGE_PWT) + +#define PAGE_TABLE (PAGE_PRESENT | PAGE_RW | PAGE_USER) + + +#define EPT_RD (1UL << 0U) +#define EPT_WR (1UL << 1U) +#define EPT_EXE (1UL << 2U) +#define EPT_MT_SHIFT 3U +#define EPT_UNCACHED (0UL << EPT_MT_SHIFT) +#define EPT_WC (1UL << EPT_MT_SHIFT) +#define EPT_WT (4UL << EPT_MT_SHIFT) +#define EPT_WP (5UL << EPT_MT_SHIFT) +#define EPT_WB (6UL << EPT_MT_SHIFT) +#define EPT_MT_MASK (7UL << EPT_MT_SHIFT) +#define EPT_SNOOP_CTRL (1UL << 11U) +#define EPT_VE (1UL << 63U) + +#define EPT_RWX (EPT_RD | EPT_WR | EPT_EXE) + + +#define PML4E_SHIFT 39U +#define PTRS_PER_PML4E 512UL +#define PML4E_SIZE (1UL << PML4E_SHIFT) +#define PML4E_MASK (~(PML4E_SIZE - 1UL)) + +#define PDPTE_SHIFT 30U +#define PTRS_PER_PDPTE 512UL +#define PDPTE_SIZE (1UL << PDPTE_SHIFT) +#define PDPTE_MASK (~(PDPTE_SIZE - 1UL)) + +#define PDE_SHIFT 21U +#define PTRS_PER_PDE 512UL +#define PDE_SIZE (1UL << PDE_SHIFT) +#define PDE_MASK (~(PDE_SIZE - 1UL)) + +#define PTE_SHIFT 12U +#define PTRS_PER_PTE 512UL +#define PTE_SIZE (1UL << PTE_SHIFT) +#define PTE_MASK (~(PTE_SIZE - 1UL)) + +/* TODO: PAGE_MASK & PHYSICAL_MASK */ +#define PML4E_PFN_MASK 0x0000FFFFFFFFF000UL +#define PDPTE_PFN_MASK 0x0000FFFFFFFFF000UL +#define PDE_PFN_MASK 0x0000FFFFFFFFF000UL + +#endif /* PGTABLE_TYPES_H */ diff --git a/hypervisor/include/hypervisor.h b/hypervisor/include/hypervisor.h index 6a1c3d768..5c5f74d1f 100644 --- a/hypervisor/include/hypervisor.h +++ b/hypervisor/include/hypervisor.h @@ -29,9 +29,6 @@ #include #ifndef ASSEMBLER -/* hpa <--> hva, now it is 1:1 mapping */ -#define HPA2HVA(x) ((void *)(x)) -#define HVA2HPA(x) ((uint64_t)(x)) /* gpa --> hpa -->hva */ #define GPA2HVA(vm, x) HPA2HVA(gpa2hpa(vm, x)) #define HVA2GPA(vm, x) hpa2gpa(vm, HVA2HPA(x))