mirror of
https://github.com/projectacrn/acrn-hypervisor.git
synced 2025-04-30 12:44:07 +00:00
In the current hypervisor hypercall, first all vcpu is allocated to SOS, vcpu's vm field is initialized to vm0. When the vcpu is offlined, vcpu will be paused and vcpu's vm field is set as NULL by DM. When UOS is created, vcpu's vm field is set as UOS's vm. So when vmcall_vmexit_handler is invoked, vcpu's vm filed is always non-NULL. error checking for vm pointer is done in the function "vmcall_vmexit_handler", vmcall_vmexit_handler will guarantee that vm is vm0. So all hypercall functions (these functions is just for handler hypercall, except vmcall_vmexit_handler) don't need to check whether invoking vm is NULL and don't need to check whether invoking vm is vm0 or not. Remove related invoking vm error checking for hypercall handling. V1 --> V2: Add pre-condition for hypercall in the head file. V2 --> V3: Add pre-condition for copy_from_gpa and copy_to_gpa. V3 --> V4: Add pre-condition both in the head file and source file. Tracked-On:#1258 Signed-off-by: Xiangyang Wu <xiangyang.wu@linux.intel.com> Acked-by: Eddie Dong <eddie.dong@intel.com>
749 lines
18 KiB
C
749 lines
18 KiB
C
/*
|
|
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
|
*
|
|
* SPDX-License-Identifier: BSD-3-Clause
|
|
*/
|
|
|
|
#include <hypervisor.h>
|
|
#include <bsp_extern.h>
|
|
#include <multiboot.h>
|
|
#include <reloc.h>
|
|
|
|
#define ACRN_DBG_GUEST 6U
|
|
|
|
/* for VM0 e820 */
|
|
uint32_t e820_entries;
|
|
struct e820_entry e820[E820_MAX_ENTRIES];
|
|
struct e820_mem_params e820_mem;
|
|
|
|
struct page_walk_info {
|
|
uint64_t top_entry; /* Top level paging structure entry */
|
|
uint32_t level;
|
|
uint32_t width;
|
|
bool is_user_mode_access;
|
|
bool is_write_access;
|
|
bool is_inst_fetch;
|
|
bool pse; /* CR4.PSE for 32bit paing,
|
|
* true for PAE/4-level paing */
|
|
bool wp; /* CR0.WP */
|
|
bool nxe; /* MSR_IA32_EFER_NXE_BIT */
|
|
|
|
bool is_smap_on;
|
|
bool is_smep_on;
|
|
};
|
|
|
|
uint64_t vcpumask2pcpumask(struct vm *vm, uint64_t vdmask)
|
|
{
|
|
uint16_t vcpu_id;
|
|
uint64_t dmask = 0UL;
|
|
struct vcpu *vcpu;
|
|
|
|
for (vcpu_id = ffs64(vdmask); vcpu_id != INVALID_BIT_INDEX;
|
|
vcpu_id = ffs64(vdmask)) {
|
|
bitmap_clear_lock(vcpu_id, &vdmask);
|
|
vcpu = vcpu_from_vid(vm, vcpu_id);
|
|
ASSERT(vcpu != NULL, "vcpu_from_vid failed");
|
|
bitmap_set_lock(vcpu->pcpu_id, &dmask);
|
|
}
|
|
|
|
return dmask;
|
|
}
|
|
|
|
enum vm_paging_mode get_vcpu_paging_mode(struct vcpu *vcpu)
|
|
{
|
|
enum vm_cpu_mode cpu_mode;
|
|
|
|
cpu_mode = get_vcpu_mode(vcpu);
|
|
|
|
if (cpu_mode == CPU_MODE_REAL) {
|
|
return PAGING_MODE_0_LEVEL;
|
|
}
|
|
else if (cpu_mode == CPU_MODE_PROTECTED) {
|
|
if ((vcpu_get_cr4(vcpu) & CR4_PAE) != 0U) {
|
|
return PAGING_MODE_3_LEVEL;
|
|
}
|
|
else if ((vcpu_get_cr0(vcpu) & CR0_PG) != 0U) {
|
|
return PAGING_MODE_2_LEVEL;
|
|
}
|
|
return PAGING_MODE_0_LEVEL;
|
|
} else { /* compatibility or 64bit mode */
|
|
return PAGING_MODE_4_LEVEL;
|
|
}
|
|
}
|
|
|
|
/* TODO: Add code to check for Revserved bits, SMAP and PKE when do translation
|
|
* during page walk */
|
|
static int local_gva2gpa_common(struct vcpu *vcpu, struct page_walk_info *pw_info,
|
|
uint64_t gva, uint64_t *gpa, uint32_t *err_code)
|
|
{
|
|
uint32_t i;
|
|
uint64_t index;
|
|
uint32_t shift;
|
|
void *base;
|
|
uint64_t entry;
|
|
uint64_t addr, page_size;
|
|
int ret = 0;
|
|
int fault = 0;
|
|
bool is_user_mode_addr = true;
|
|
bool is_page_rw_flags_on = true;
|
|
|
|
if (pw_info->level < 1U) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
addr = pw_info->top_entry;
|
|
i = pw_info->level;
|
|
while (i != 0U) {
|
|
i--;
|
|
|
|
addr = addr & IA32E_REF_MASK;
|
|
base = gpa2hva(vcpu->vm, addr);
|
|
if (base == NULL) {
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
shift = (i * pw_info->width) + 12U;
|
|
index = (gva >> shift) & ((1UL << pw_info->width) - 1UL);
|
|
page_size = 1UL << shift;
|
|
|
|
if (pw_info->width == 10U) {
|
|
uint32_t *base32 = (uint32_t *)base;
|
|
/* 32bit entry */
|
|
entry = (uint64_t)(*(base32 + index));
|
|
} else {
|
|
uint64_t *base64 = (uint64_t *)base;
|
|
entry = *(base64 + index);
|
|
}
|
|
|
|
/* check if the entry present */
|
|
if ((entry & PAGE_PRESENT) == 0U) {
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
/* check for R/W */
|
|
if ((entry & PAGE_RW) == 0U) {
|
|
if (pw_info->is_write_access) {
|
|
/* Case1: Supermode and wp is 1
|
|
* Case2: Usermode */
|
|
if (pw_info->is_user_mode_access ||
|
|
pw_info->wp) {
|
|
fault = 1;
|
|
goto out;
|
|
}
|
|
}
|
|
is_page_rw_flags_on = false;
|
|
}
|
|
|
|
/* check for nx, since for 32-bit paing, the XD bit is
|
|
* reserved(0), use the same logic as PAE/4-level paging */
|
|
if (pw_info->is_inst_fetch && pw_info->nxe &&
|
|
((entry & PAGE_NX) != 0U)) {
|
|
fault = 1;
|
|
goto out;
|
|
}
|
|
|
|
/* check for U/S */
|
|
if ((entry & PAGE_USER) == 0U) {
|
|
is_user_mode_addr = false;
|
|
|
|
if (pw_info->is_user_mode_access) {
|
|
fault = 1;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if (pw_info->pse && ((i > 0U) && ((entry & PAGE_PSE) != 0U))) {
|
|
break;
|
|
}
|
|
addr = entry;
|
|
}
|
|
|
|
/* When SMAP/SMEP is on, we only need to apply check when address is
|
|
* user-mode address.
|
|
* Also SMAP/SMEP only impact the supervisor-mode access.
|
|
*/
|
|
/* if smap is enabled and supervisor-mode access */
|
|
if (pw_info->is_smap_on && !pw_info->is_user_mode_access &&
|
|
is_user_mode_addr) {
|
|
bool rflags_ac = ((vcpu_get_rflags(vcpu) & RFLAGS_AC) == 1UL);
|
|
|
|
/* read from user mode address, eflags.ac = 0 */
|
|
if (!pw_info->is_write_access && !rflags_ac) {
|
|
fault = 1;
|
|
goto out;
|
|
}
|
|
|
|
/* write to user mode address */
|
|
if (pw_info->is_write_access) {
|
|
/* cr0.wp = 0, eflags.ac = 0 */
|
|
if (!pw_info->wp && !rflags_ac) {
|
|
fault = 1;
|
|
goto out;
|
|
}
|
|
|
|
/* cr0.wp = 1, eflags.ac = 1, r/w flag is 0
|
|
* on any paging structure entry
|
|
*/
|
|
if (pw_info->wp && rflags_ac && !is_page_rw_flags_on) {
|
|
fault = 1;
|
|
goto out;
|
|
}
|
|
|
|
/* cr0.wp = 1, eflags.ac = 0 */
|
|
if (pw_info->wp && !rflags_ac) {
|
|
fault = 1;
|
|
goto out;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* instruction fetch from user-mode address, smep on */
|
|
if (pw_info->is_smep_on && !pw_info->is_user_mode_access &&
|
|
is_user_mode_addr && pw_info->is_inst_fetch) {
|
|
fault = 1;
|
|
goto out;
|
|
}
|
|
|
|
entry >>= shift;
|
|
/* shift left 12bit more and back to clear XD/Prot Key/Ignored bits */
|
|
entry <<= (shift + 12U);
|
|
entry >>= 12U;
|
|
*gpa = entry | (gva & (page_size - 1UL));
|
|
out:
|
|
|
|
if (fault != 0) {
|
|
ret = -EFAULT;
|
|
*err_code |= PAGE_FAULT_P_FLAG;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int local_gva2gpa_pae(struct vcpu *vcpu, struct page_walk_info *pw_info,
|
|
uint64_t gva, uint64_t *gpa, uint32_t *err_code)
|
|
{
|
|
int index;
|
|
uint64_t *base;
|
|
uint64_t entry;
|
|
uint64_t addr;
|
|
int ret;
|
|
|
|
addr = pw_info->top_entry & 0xFFFFFFF0U;
|
|
base = gpa2hva(vcpu->vm, addr);
|
|
if (base == NULL) {
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
index = (gva >> 30) & 0x3UL;
|
|
entry = base[index];
|
|
|
|
if ((entry & PAGE_PRESENT) == 0U) {
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
pw_info->level = 2U;
|
|
pw_info->top_entry = entry;
|
|
ret = local_gva2gpa_common(vcpu, pw_info, gva, gpa, err_code);
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
/* Refer to SDM Vol.3A 6-39 section 6.15 for the format of paging fault error
|
|
* code.
|
|
*
|
|
* Caller should set the contect of err_code properly according to the address
|
|
* usage when calling this function:
|
|
* - If it is an address for write, set PAGE_FAULT_WR_FLAG in err_code.
|
|
* - If it is an address for instruction featch, set PAGE_FAULT_ID_FLAG in
|
|
* err_code.
|
|
* Caller should check the return value to confirm if the function success or
|
|
* not.
|
|
* If a protection volation detected during page walk, this function still will
|
|
* give the gpa translated, it is up to caller to decide if it need to inject a
|
|
* #PF or not.
|
|
* - Return 0 for success.
|
|
* - Return -EINVAL for invalid parameter.
|
|
* - Return -EFAULT for paging fault, and refer to err_code for paging fault
|
|
* error code.
|
|
*/
|
|
int gva2gpa(struct vcpu *vcpu, uint64_t gva, uint64_t *gpa,
|
|
uint32_t *err_code)
|
|
{
|
|
enum vm_paging_mode pm = get_vcpu_paging_mode(vcpu);
|
|
struct page_walk_info pw_info;
|
|
int ret = 0;
|
|
|
|
if ((gpa == NULL) || (err_code == NULL)) {
|
|
return -EINVAL;
|
|
}
|
|
*gpa = 0UL;
|
|
|
|
pw_info.top_entry = exec_vmread(VMX_GUEST_CR3);
|
|
pw_info.level = pm;
|
|
pw_info.is_write_access = ((*err_code & PAGE_FAULT_WR_FLAG) != 0U);
|
|
pw_info.is_inst_fetch = ((*err_code & PAGE_FAULT_ID_FLAG) != 0U);
|
|
|
|
/* SDM vol3 27.3.2
|
|
* If the segment register was unusable, the base, select and some
|
|
* bits of access rights are undefined. With the exception of
|
|
* DPL of SS
|
|
* and others.
|
|
* So we use DPL of SS access rights field for guest DPL.
|
|
*/
|
|
pw_info.is_user_mode_access =
|
|
(((exec_vmread32(VMX_GUEST_SS_ATTR)>>5) & 0x3U) == 3U);
|
|
pw_info.pse = true;
|
|
pw_info.nxe = ((vcpu_get_efer(vcpu) & MSR_IA32_EFER_NXE_BIT) != 0UL);
|
|
pw_info.wp = ((vcpu_get_cr0(vcpu) & CR0_WP) != 0UL);
|
|
pw_info.is_smap_on = ((vcpu_get_cr4(vcpu) & CR4_SMAP) != 0UL);
|
|
pw_info.is_smep_on = ((vcpu_get_cr4(vcpu) & CR4_SMEP) != 0UL);
|
|
|
|
*err_code &= ~PAGE_FAULT_P_FLAG;
|
|
|
|
if (pm == PAGING_MODE_4_LEVEL) {
|
|
pw_info.width = 9U;
|
|
ret = local_gva2gpa_common(vcpu, &pw_info, gva, gpa, err_code);
|
|
} else if (pm == PAGING_MODE_3_LEVEL) {
|
|
pw_info.width = 9U;
|
|
ret = local_gva2gpa_pae(vcpu, &pw_info, gva, gpa, err_code);
|
|
} else if (pm == PAGING_MODE_2_LEVEL) {
|
|
pw_info.width = 10U;
|
|
pw_info.pse = ((vcpu_get_cr4(vcpu) & CR4_PSE) != 0UL);
|
|
pw_info.nxe = false;
|
|
ret = local_gva2gpa_common(vcpu, &pw_info, gva, gpa, err_code);
|
|
} else {
|
|
*gpa = gva;
|
|
}
|
|
|
|
if (ret == -EFAULT) {
|
|
if (pw_info.is_user_mode_access) {
|
|
*err_code |= PAGE_FAULT_US_FLAG;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline uint32_t local_copy_gpa(const struct vm *vm, void *h_ptr, uint64_t gpa,
|
|
uint32_t size, uint32_t fix_pg_size, bool cp_from_vm)
|
|
{
|
|
uint64_t hpa;
|
|
uint32_t offset_in_pg, len, pg_size;
|
|
void *g_ptr;
|
|
|
|
hpa = local_gpa2hpa(vm, gpa, &pg_size);
|
|
if (pg_size == 0U) {
|
|
pr_err("GPA2HPA not found");
|
|
return 0;
|
|
}
|
|
|
|
if (fix_pg_size != 0U) {
|
|
pg_size = fix_pg_size;
|
|
}
|
|
|
|
offset_in_pg = (uint32_t)gpa & (pg_size - 1U);
|
|
len = (size > (pg_size - offset_in_pg)) ?
|
|
(pg_size - offset_in_pg) : size;
|
|
|
|
g_ptr = hpa2hva(hpa);
|
|
|
|
if (cp_from_vm) {
|
|
(void)memcpy_s(h_ptr, len, g_ptr, len);
|
|
} else {
|
|
(void)memcpy_s(g_ptr, len, h_ptr, len);
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static inline int copy_gpa(const struct vm *vm, void *h_ptr_arg, uint64_t gpa_arg,
|
|
uint32_t size_arg, bool cp_from_vm)
|
|
{
|
|
void *h_ptr = h_ptr_arg;
|
|
uint32_t len;
|
|
uint64_t gpa = gpa_arg;
|
|
uint32_t size = size_arg;
|
|
|
|
while (size > 0U) {
|
|
len = local_copy_gpa(vm, h_ptr, gpa, size, 0U, cp_from_vm);
|
|
if (len == 0U) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
gpa += len;
|
|
h_ptr += len;
|
|
size -= len;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int copy_gva(struct vcpu *vcpu, void *h_ptr_arg, uint64_t gva_arg,
|
|
uint32_t size_arg, uint32_t *err_code, uint64_t *fault_addr,
|
|
bool cp_from_vm)
|
|
{
|
|
void *h_ptr = h_ptr_arg;
|
|
uint64_t gpa = 0UL;
|
|
int32_t ret;
|
|
uint32_t len;
|
|
uint64_t gva = gva_arg;
|
|
uint32_t size = size_arg;
|
|
|
|
if (vcpu == NULL) {
|
|
pr_err("guest virt addr copy need vcpu param");
|
|
return -EINVAL;
|
|
}
|
|
if (err_code == NULL) {
|
|
pr_err("guest virt addr copy need err_code param");
|
|
return -EINVAL;
|
|
}
|
|
|
|
while (size > 0U) {
|
|
ret = gva2gpa(vcpu, gva, &gpa, err_code);
|
|
if (ret < 0) {
|
|
*fault_addr = gva;
|
|
pr_err("error[%d] in GVA2GPA, err_code=0x%x",
|
|
ret, *err_code);
|
|
return ret;
|
|
}
|
|
|
|
len = local_copy_gpa(vcpu->vm, h_ptr, gpa, size,
|
|
PAGE_SIZE_4K, cp_from_vm);
|
|
|
|
if (len == 0U) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
gva += len;
|
|
h_ptr += len;
|
|
size -= len;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* @pre Caller(Guest) should make sure gpa is continuous.
|
|
* - gpa from hypercall input which from kernel stack is gpa continuous, not
|
|
* support kernel stack from vmap
|
|
* - some other gpa from hypercall parameters, VHM should make sure it's
|
|
* continuous
|
|
* @pre Pointer vm is non-NULL
|
|
*/
|
|
int copy_from_gpa(const struct vm *vm, void *h_ptr, uint64_t gpa, uint32_t size)
|
|
{
|
|
return copy_gpa(vm, h_ptr, gpa, size, 1);
|
|
}
|
|
/* @pre Caller(Guest) should make sure gpa is continuous.
|
|
* - gpa from hypercall input which from kernel stack is gpa continuous, not
|
|
* support kernel stack from vmap
|
|
* - some other gpa from hypercall parameters, VHM should make sure it's
|
|
* continuous
|
|
* @pre Pointer vm is non-NULL
|
|
*/
|
|
int copy_to_gpa(const struct vm *vm, void *h_ptr, uint64_t gpa, uint32_t size)
|
|
{
|
|
return copy_gpa(vm, h_ptr, gpa, size, 0);
|
|
}
|
|
|
|
int copy_from_gva(struct vcpu *vcpu, void *h_ptr, uint64_t gva,
|
|
uint32_t size, uint32_t *err_code, uint64_t *fault_addr)
|
|
{
|
|
return copy_gva(vcpu, h_ptr, gva, size, err_code, fault_addr, 1);
|
|
}
|
|
|
|
int copy_to_gva(struct vcpu *vcpu, void *h_ptr, uint64_t gva,
|
|
uint32_t size, uint32_t *err_code, uint64_t *fault_addr)
|
|
{
|
|
return copy_gva(vcpu, h_ptr, gva, size, err_code, fault_addr, 0);
|
|
}
|
|
|
|
void init_e820(void)
|
|
{
|
|
uint32_t i;
|
|
|
|
if (boot_regs[0] == MULTIBOOT_INFO_MAGIC) {
|
|
struct multiboot_info *mbi = (struct multiboot_info *)
|
|
(hpa2hva((uint64_t)boot_regs[1]));
|
|
|
|
pr_info("Multiboot info detected\n");
|
|
if ((mbi->mi_flags & MULTIBOOT_INFO_HAS_MMAP) != 0U) {
|
|
struct multiboot_mmap *mmap =
|
|
(struct multiboot_mmap *)
|
|
hpa2hva((uint64_t)mbi->mi_mmap_addr);
|
|
e820_entries = mbi->mi_mmap_length/
|
|
sizeof(struct multiboot_mmap);
|
|
if (e820_entries > E820_MAX_ENTRIES) {
|
|
pr_err("Too many E820 entries %d\n",
|
|
e820_entries);
|
|
e820_entries = E820_MAX_ENTRIES;
|
|
}
|
|
dev_dbg(ACRN_DBG_GUEST,
|
|
"mmap length 0x%x addr 0x%x entries %d\n",
|
|
mbi->mi_mmap_length, mbi->mi_mmap_addr,
|
|
e820_entries);
|
|
for (i = 0U; i < e820_entries; i++) {
|
|
e820[i].baseaddr = mmap[i].baseaddr;
|
|
e820[i].length = mmap[i].length;
|
|
e820[i].type = mmap[i].type;
|
|
|
|
dev_dbg(ACRN_DBG_GUEST,
|
|
"mmap table: %d type: 0x%x\n",
|
|
i, mmap[i].type);
|
|
dev_dbg(ACRN_DBG_GUEST,
|
|
"Base: 0x%016llx length: 0x%016llx",
|
|
mmap[i].baseaddr, mmap[i].length);
|
|
}
|
|
}
|
|
} else {
|
|
ASSERT(false, "no multiboot info found");
|
|
}
|
|
}
|
|
|
|
|
|
void obtain_e820_mem_info(void)
|
|
{
|
|
uint32_t i;
|
|
struct e820_entry *entry;
|
|
|
|
e820_mem.mem_bottom = UINT64_MAX;
|
|
e820_mem.mem_top = 0x0UL;
|
|
e820_mem.total_mem_size = 0UL;
|
|
e820_mem.max_ram_blk_base = 0UL;
|
|
e820_mem.max_ram_blk_size = 0UL;
|
|
|
|
for (i = 0U; i < e820_entries; i++) {
|
|
entry = &e820[i];
|
|
if (e820_mem.mem_bottom > entry->baseaddr) {
|
|
e820_mem.mem_bottom = entry->baseaddr;
|
|
}
|
|
|
|
if ((entry->baseaddr + entry->length)
|
|
> e820_mem.mem_top) {
|
|
e820_mem.mem_top = entry->baseaddr
|
|
+ entry->length;
|
|
}
|
|
|
|
if (entry->type == E820_TYPE_RAM) {
|
|
e820_mem.total_mem_size += entry->length;
|
|
if (entry->baseaddr == UOS_DEFAULT_START_ADDR) {
|
|
e820_mem.max_ram_blk_base =
|
|
entry->baseaddr;
|
|
e820_mem.max_ram_blk_size = entry->length;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void rebuild_vm0_e820(void)
|
|
{
|
|
uint32_t i;
|
|
uint64_t entry_start;
|
|
uint64_t entry_end;
|
|
uint64_t hv_start = get_hv_image_base();
|
|
uint64_t hv_end = hv_start + CONFIG_RAM_SIZE;
|
|
struct e820_entry *entry, new_entry = {0};
|
|
|
|
/* hypervisor mem need be filter out from e820 table
|
|
* it's hv itself + other hv reserved mem like vgt etc
|
|
*/
|
|
for (i = 0U; i < e820_entries; i++) {
|
|
entry = &e820[i];
|
|
entry_start = entry->baseaddr;
|
|
entry_end = entry->baseaddr + entry->length;
|
|
|
|
/* No need handle in these cases*/
|
|
if ((entry->type != E820_TYPE_RAM) || (entry_end <= hv_start)
|
|
|| (entry_start >= hv_end)) {
|
|
continue;
|
|
}
|
|
|
|
/* filter out hv mem and adjust length of this entry*/
|
|
if ((entry_start < hv_start) && (entry_end <= hv_end)) {
|
|
entry->length = hv_start - entry_start;
|
|
continue;
|
|
}
|
|
/* filter out hv mem and need to create a new entry*/
|
|
if ((entry_start < hv_start) && (entry_end > hv_end)) {
|
|
entry->length = hv_start - entry_start;
|
|
new_entry.baseaddr = hv_end;
|
|
new_entry.length = entry_end - hv_end;
|
|
new_entry.type = E820_TYPE_RAM;
|
|
continue;
|
|
}
|
|
/* This entry is within the range of hv mem
|
|
* change to E820_TYPE_RESERVED
|
|
*/
|
|
if ((entry_start >= hv_start) && (entry_end <= hv_end)) {
|
|
entry->type = E820_TYPE_RESERVED;
|
|
continue;
|
|
}
|
|
|
|
if ((entry_start >= hv_start) && (entry_start < hv_end)
|
|
&& (entry_end > hv_end)) {
|
|
entry->baseaddr = hv_end;
|
|
entry->length = entry_end - hv_end;
|
|
continue;
|
|
}
|
|
|
|
}
|
|
|
|
if (new_entry.length > 0UL) {
|
|
e820_entries++;
|
|
ASSERT(e820_entries <= E820_MAX_ENTRIES,
|
|
"e820 entry overflow");
|
|
entry = &e820[e820_entries - 1];
|
|
entry->baseaddr = new_entry.baseaddr;
|
|
entry->length = new_entry.length;
|
|
entry->type = new_entry.type;
|
|
}
|
|
|
|
e820_mem.total_mem_size -= CONFIG_RAM_SIZE;
|
|
}
|
|
|
|
/**
|
|
* @param[inout] vm pointer to a vm descriptor
|
|
*
|
|
* @return 0 - on success
|
|
*
|
|
* @pre vm != NULL
|
|
* @pre is_vm0(vm) == true
|
|
*/
|
|
int prepare_vm0_memmap_and_e820(struct vm *vm)
|
|
{
|
|
uint32_t i;
|
|
uint64_t attr_uc = (EPT_RWX | EPT_UNCACHED);
|
|
struct e820_entry *entry;
|
|
uint64_t hv_hpa;
|
|
uint64_t *pml4_page = (uint64_t *)vm->arch_vm.nworld_eptp;
|
|
|
|
rebuild_vm0_e820();
|
|
dev_dbg(ACRN_DBG_GUEST,
|
|
"vm0: bottom memory - 0x%llx, top memory - 0x%llx\n",
|
|
e820_mem.mem_bottom, e820_mem.mem_top);
|
|
|
|
/* create real ept map for all ranges with UC */
|
|
ept_mr_add(vm, pml4_page,
|
|
e820_mem.mem_bottom, e820_mem.mem_bottom,
|
|
(e820_mem.mem_top - e820_mem.mem_bottom),
|
|
attr_uc);
|
|
|
|
/* update ram entries to WB attr */
|
|
for (i = 0U; i < e820_entries; i++) {
|
|
entry = &e820[i];
|
|
if (entry->type == E820_TYPE_RAM) {
|
|
ept_mr_modify(vm, pml4_page,
|
|
entry->baseaddr, entry->length,
|
|
EPT_WB, EPT_MT_MASK);
|
|
}
|
|
}
|
|
|
|
dev_dbg(ACRN_DBG_GUEST, "VM0 e820 layout:\n");
|
|
for (i = 0U; i < e820_entries; i++) {
|
|
entry = &e820[i];
|
|
dev_dbg(ACRN_DBG_GUEST,
|
|
"e820 table: %d type: 0x%x", i, entry->type);
|
|
dev_dbg(ACRN_DBG_GUEST,
|
|
"BaseAddress: 0x%016llx length: 0x%016llx\n",
|
|
entry->baseaddr, entry->length);
|
|
}
|
|
|
|
/* unmap hypervisor itself for safety
|
|
* will cause EPT violation if sos accesses hv memory
|
|
*/
|
|
hv_hpa = get_hv_image_base();
|
|
ept_mr_del(vm, pml4_page, hv_hpa, CONFIG_RAM_SIZE);
|
|
return 0;
|
|
}
|
|
|
|
uint64_t e820_alloc_low_memory(uint32_t size_arg)
|
|
{
|
|
uint32_t i;
|
|
uint32_t size = size_arg;
|
|
struct e820_entry *entry, *new_entry;
|
|
|
|
/* We want memory in page boundary and integral multiple of pages */
|
|
size = (((size + CPU_PAGE_SIZE) - 1U) >> CPU_PAGE_SHIFT)
|
|
<< CPU_PAGE_SHIFT;
|
|
|
|
for (i = 0U; i < e820_entries; i++) {
|
|
entry = &e820[i];
|
|
uint64_t start, end, length;
|
|
|
|
start = round_page_up(entry->baseaddr);
|
|
end = round_page_down(entry->baseaddr + entry->length);
|
|
length = end - start;
|
|
length = (end > start) ? (end - start) : 0;
|
|
|
|
/* Search for available low memory */
|
|
if ((entry->type != E820_TYPE_RAM)
|
|
|| (length < size)
|
|
|| ((start + size) > MEM_1M)) {
|
|
continue;
|
|
}
|
|
|
|
/* found exact size of e820 entry */
|
|
if (length == size) {
|
|
entry->type = E820_TYPE_RESERVED;
|
|
e820_mem.total_mem_size -= size;
|
|
return start;
|
|
}
|
|
|
|
/*
|
|
* found entry with available memory larger than requested
|
|
* alocate memory from the end of this entry at page boundary
|
|
*/
|
|
new_entry = &e820[e820_entries];
|
|
new_entry->type = E820_TYPE_RESERVED;
|
|
new_entry->baseaddr = end - size;
|
|
new_entry->length = (entry->baseaddr +
|
|
entry->length) - new_entry->baseaddr;
|
|
|
|
/* Shrink the existing entry and total available memory */
|
|
entry->length -= new_entry->length;
|
|
e820_mem.total_mem_size -= new_entry->length;
|
|
e820_entries++;
|
|
|
|
return new_entry->baseaddr;
|
|
}
|
|
|
|
pr_fatal("Can't allocate memory under 1M from E820\n");
|
|
return ACRN_INVALID_HPA;
|
|
}
|
|
|
|
/*******************************************************************
|
|
* GUEST initial GDT table
|
|
*
|
|
* If guest starts with protected mode, HV needs to prepare Guest GDT.
|
|
******************************************************************/
|
|
|
|
#define GUEST_INIT_GDT_SKIP_SIZE 0x8000UL
|
|
#define GUEST_INIT_GDT_START (trampoline_start16_paddr + \
|
|
GUEST_INIT_GDT_SKIP_SIZE)
|
|
|
|
/* The GDT defined below compatible with linux kernel */
|
|
#define GUEST_INIT_GDT_DESC_0 (0x0)
|
|
#define GUEST_INIT_GDT_DESC_1 (0x0)
|
|
#define GUEST_INIT_GDT_DESC_2 (0x00CF9B000000FFFFUL) /* Linear Code */
|
|
#define GUEST_INIT_GDT_DESC_3 (0x00CF93000000FFFFUL) /* Linear Data */
|
|
|
|
static const uint64_t guest_init_gdt[] = {
|
|
GUEST_INIT_GDT_DESC_0,
|
|
GUEST_INIT_GDT_DESC_1,
|
|
GUEST_INIT_GDT_DESC_2,
|
|
GUEST_INIT_GDT_DESC_3,
|
|
};
|
|
|
|
uint64_t create_guest_init_gdt(struct vm *vm, uint32_t *limit)
|
|
{
|
|
void *gtd_addr = gpa2hva(vm, GUEST_INIT_GDT_START);
|
|
|
|
*limit = sizeof(guest_init_gdt) - 1U;
|
|
(void)memcpy_s(gtd_addr, 64U, guest_init_gdt, sizeof(guest_init_gdt));
|
|
|
|
return GUEST_INIT_GDT_START;
|
|
};
|