From 1b3a75c9843c86d172ea0a3aa8ff4cbc3dd1b8e7 Mon Sep 17 00:00:00 2001 From: Victor Sun Date: Tue, 1 Jun 2021 14:17:18 +0800 Subject: [PATCH] HV: place kernel and ramdisk by find_space_from_ve820() We should not hardcode the VM ramdisk load address right after kernel load address because of two reasons: 1. Per Linux kernel boot protocol, the Kernel need a size of contiguous memory(i.e. init_size field in zeropage) from its load address to boot, then the address would overlap with ramdisk; 2. The hardcoded address could not be ensured as a valid address in guest e820 table, especially with a huge ramdisk; Also we should not hardcode the VM kernel load address to its pref_address which work for non-relocatable kernel only. For a relocatable kernel, it could run from any valid address where bootloader load to. The patch will set the VM kernel and ramdisk load address by scanning guest e820 table with find_space_from_ve820() api: 1. For SOS VM, the ramdisk has been loaded by multiboot bootloader already so set the load address as module source address, the relocatable kernel would be relocated to a appropriate address out space of hypervisor and boot modules to avoid guest memory copy corruption; 2. For pre-launched VM, the kernel would be loaded to pref_address first, then ramdisk will be put to a appropriate address out space of kernel according to guest memory layout and maximum ramdisk address limit under 4GB; Tracked-On: #5879 Signed-off-by: Victor Sun Reviewed-by: Jason Chen CJ --- hypervisor/boot/boot.c | 21 ++++ hypervisor/boot/guest/vboot_info.c | 138 +++++++++++++++++++-- hypervisor/boot/include/boot.h | 2 + hypervisor/common/vm_load.c | 6 +- hypervisor/include/arch/x86/asm/zeropage.h | 6 +- 5 files changed, 161 insertions(+), 12 deletions(-) diff --git a/hypervisor/boot/boot.c b/hypervisor/boot/boot.c index fc23fbda7..1b9fa0313 100644 --- a/hypervisor/boot/boot.c +++ b/hypervisor/boot/boot.c @@ -13,6 +13,27 @@ static struct acrn_boot_info acrn_bi = { 0U }; +/** + * @pre (p_start != NULL) && (p_end != NULL) + */ +void get_boot_mods_range(uint64_t *p_start, uint64_t *p_end) +{ + uint32_t i; + uint64_t start = ~0UL, end = 0UL; + struct acrn_boot_info *abi = get_acrn_boot_info(); + + for (i = 0; i < abi->mods_count; i++) { + if (hva2hpa(abi->mods[i].start) < start) { + start = hva2hpa(abi->mods[i].start); + } + if (hva2hpa(abi->mods[i].start + abi->mods[i].size) > end) { + end = hva2hpa(abi->mods[i].start + abi->mods[i].size); + } + } + *p_start = start; + *p_end = end; +} + void init_acrn_boot_info(uint32_t *registers) { (void)init_multiboot_info(registers); diff --git a/hypervisor/boot/guest/vboot_info.c b/hypervisor/boot/guest/vboot_info.c index e08211885..3713cbe69 100644 --- a/hypervisor/boot/guest/vboot_info.c +++ b/hypervisor/boot/guest/vboot_info.c @@ -15,23 +15,103 @@ #include #include #include +#include +#include #include #include #include #define DBG_LEVEL_BOOT 6U +/* TODO: + * The value is referenced from Linux boot protocal for old kernels, + * but this should be configurable for different OS. */ +#define DEFAULT_RAMDISK_GPA_MAX 0x37ffffffUL + +#define PRE_VM_MAX_RAM_ADDR_BELOW_4GB (VIRT_ACPI_DATA_ADDR - 1U) + /** * @pre vm != NULL && mod != NULL */ static void init_vm_ramdisk_info(struct acrn_vm *vm, const struct abi_module *mod) { + uint64_t ramdisk_load_gpa = INVALID_GPA; + uint64_t ramdisk_gpa_max = DEFAULT_RAMDISK_GPA_MAX; + uint64_t kernel_start = (uint64_t)vm->sw.kernel_info.kernel_load_addr; + uint64_t kernel_end = kernel_start + vm->sw.kernel_info.kernel_size; + struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id); + if (mod->start != NULL) { vm->sw.ramdisk_info.src_addr = mod->start; - vm->sw.ramdisk_info.load_addr = vm->sw.kernel_info.kernel_load_addr + vm->sw.kernel_info.kernel_size; - vm->sw.ramdisk_info.load_addr = (void *)round_page_up((uint64_t)vm->sw.ramdisk_info.load_addr); vm->sw.ramdisk_info.size = mod->size; } + + /* Per Linux boot protocol, the Kernel need a size of contiguous + * memory(i.e. init_size field in zeropage) from its extract address to boot, + * and initrd_addr_max field specifies the maximum address of the ramdisk. + * Per kernel src head_64.S, decompressed kernel start at 2M aligned to the + * compressed kernel load address. + */ + if (vm->sw.kernel_type == KERNEL_BZIMAGE) { + struct zero_page *zeropage = (struct zero_page *)vm->sw.kernel_info.kernel_src_addr; + uint32_t kernel_init_size = zeropage->hdr.init_size; + uint32_t initrd_addr_max = zeropage->hdr.initrd_addr_max; + + kernel_end = kernel_start + MEM_2M + kernel_init_size; + if (initrd_addr_max != 0U) { + ramdisk_gpa_max = initrd_addr_max; + } + } + + if (is_sos_vm(vm)) { + + if (vm->sw.ramdisk_info.src_addr != NULL) { + ramdisk_load_gpa = sos_vm_hpa2gpa((uint64_t)vm->sw.ramdisk_info.src_addr); + } + + /* For SOS VM, the ramdisk has been loaded by bootloader, so in most cases + * there is no need to do gpa copy again. But in the case that the ramdisk is + * loaded by bootloader at a address higher than its limit, we should do gpa + * copy then. + */ + if ((ramdisk_load_gpa + vm->sw.ramdisk_info.size) > ramdisk_gpa_max) { + ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, + MEM_1M, kernel_start); + if (ramdisk_load_gpa == INVALID_GPA) { + ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, + kernel_end, ramdisk_gpa_max); + } + } + } else { + /* For pre-launched VM, the ramdisk would be put by searching ve820 table. + */ + ramdisk_gpa_max = min(PRE_VM_MAX_RAM_ADDR_BELOW_4GB, ramdisk_gpa_max); + + if (kernel_end > ramdisk_gpa_max) { + ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, + MEM_1M, min(kernel_start, ramdisk_gpa_max)); + } else { + ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, + kernel_end, ramdisk_gpa_max); + } + } + + if (ramdisk_load_gpa == INVALID_GPA) { + pr_err("no space in guest memory to load VM %d ramdisk", vm->vm_id); + vm->sw.ramdisk_info.size = 0U; + } + + /* Use customer specified ramdisk load addr if it is configured in VM configuration, + * otherwise use allocated address calculated by HV. + */ + if (vm_config->os_config.kernel_ramdisk_addr != 0UL) { + vm->sw.ramdisk_info.load_addr = (void *)vm_config->os_config.kernel_ramdisk_addr; + } else { + vm->sw.ramdisk_info.load_addr = (void *)ramdisk_load_gpa; + } + + dev_dbg(DBG_LEVEL_BOOT, "ramdisk mod start=0x%x, size=0x%x", (uint64_t)mod->start, mod->size); + dev_dbg(DBG_LEVEL_BOOT, "ramdisk load addr = 0x%lx", ramdisk_load_gpa); } /** @@ -60,15 +140,55 @@ static void *get_kernel_load_addr(struct acrn_vm *vm) * in Documentation/x86/boot.txt, a relocating * bootloader should attempt to load kernel at pref_address * if possible. A non-relocatable kernel will unconditionally - * move itself and to run at this address, so no need to copy - * kernel to perf_address by bootloader, if kernel is - * non-relocatable. + * move itself and to run at this address. */ zeropage = (struct zero_page *)sw_info->kernel_info.kernel_src_addr; - if (zeropage->hdr.relocatable_kernel != 0U) { - zeropage = (struct zero_page *)zeropage->hdr.pref_addr; + + if ((is_sos_vm(vm)) && (zeropage->hdr.relocatable_kernel != 0U)) { + uint64_t hv_start, hv_end, mods_start, mods_end; + uint64_t kernel_load_gpa = INVALID_GPA; + uint32_t kernel_align = zeropage->hdr.kernel_alignment; + uint32_t kernel_init_size = zeropage->hdr.init_size; + /* Because the kernel load address need to be up aligned to kernel_align size + * whereas find_space_from_ve820() can only return page aligned address, + * we enlarge the needed size to (kernel_init_size + 2 * kernel_align). + */ + uint32_t kernel_size = kernel_init_size + 2 * kernel_align; + + hv_start = sos_vm_hpa2gpa(get_hv_image_base()); + hv_end = hv_start + CONFIG_HV_RAM_SIZE; + get_boot_mods_range(&mods_start, &mods_end); + mods_start = sos_vm_hpa2gpa(mods_start); + mods_end = sos_vm_hpa2gpa(mods_end); + + if (hv_end < mods_start) { + kernel_load_gpa = find_space_from_ve820(vm, kernel_size, hv_end, mods_start); + } + + if ((kernel_load_gpa == INVALID_GPA) && (max(mods_end, hv_end) < MEM_4G)) { + kernel_load_gpa = find_space_from_ve820(vm, kernel_size, + max(mods_end, hv_end), MEM_4G); + } + + if ((kernel_load_gpa == INVALID_GPA) && (mods_end < hv_start)) { + kernel_load_gpa = find_space_from_ve820(vm, kernel_size, mods_end, hv_start); + } + + if ((kernel_load_gpa == INVALID_GPA) && (min(mods_start, hv_start) > MEM_1M)) { + kernel_load_gpa = find_space_from_ve820(vm, kernel_size, + MEM_1M, min(mods_start, hv_start)); + } + + if (kernel_load_gpa != INVALID_GPA) { + load_addr = (void *)roundup((uint64_t)kernel_load_gpa, kernel_align); + } + } else { + load_addr = (void *)zeropage->hdr.pref_addr; + if (is_sos_vm(vm)) { + /* The non-relocatable SOS kernel might overlap with boot modules. */ + pr_err("Non-relocatable kernel found, risk to boot!"); + } } - load_addr = (void *)zeropage; break; case KERNEL_ZEPHYR: load_addr = (void *)vm_config->os_config.kernel_load_addr; @@ -80,6 +200,8 @@ static void *get_kernel_load_addr(struct acrn_vm *vm) if (load_addr == NULL) { pr_err("Could not get kernel load addr of VM %d .", vm->vm_id); } + + dev_dbg(DBG_LEVEL_BOOT, "VM%d kernel load_addr: 0x%lx", vm->vm_id, load_addr); return load_addr; } diff --git a/hypervisor/boot/include/boot.h b/hypervisor/boot/include/boot.h index a2f464438..1a0652c49 100644 --- a/hypervisor/boot/include/boot.h +++ b/hypervisor/boot/include/boot.h @@ -65,6 +65,8 @@ static inline bool boot_from_uefi(struct acrn_boot_info *abi) return !((abi->uefi_info.systab == 0U) && (abi->uefi_info.systab_hi == 0U)); } +void get_boot_mods_range(uint64_t *p_start, uint64_t *p_end); + int32_t init_multiboot_info(uint32_t *registers); void init_acrn_boot_info(uint32_t *registers); diff --git a/hypervisor/common/vm_load.c b/hypervisor/common/vm_load.c index 48bdd089a..eda4b00a4 100644 --- a/hypervisor/common/vm_load.c +++ b/hypervisor/common/vm_load.c @@ -250,8 +250,10 @@ static void load_sw_modules(struct acrn_vm *vm, uint64_t load_params_gpa) (uint64_t)sw_kernel->kernel_load_addr, sw_kernel->kernel_size); if (vm->sw.kernel_type == KERNEL_BZIMAGE) { - - load_sw_module(vm, ramdisk_info); + /* Don't need to load ramdisk if src_addr and load_addr are pointed to same place. */ + if (gpa2hva(vm, (uint64_t)ramdisk_info->load_addr) != ramdisk_info->src_addr) { + load_sw_module(vm, ramdisk_info); + } bootargs_info->load_addr = (void *)BZIMG_CMDLINE_GPA(load_params_gpa); diff --git a/hypervisor/include/arch/x86/asm/zeropage.h b/hypervisor/include/arch/x86/asm/zeropage.h index cc2b047d3..8b4aac16f 100644 --- a/hypervisor/include/arch/x86/asm/zeropage.h +++ b/hypervisor/include/arch/x86/asm/zeropage.h @@ -28,14 +28,16 @@ struct zero_page { uint32_t ramdisk_size; /* 0x21c */ uint8_t hdr_pad3[0x8]; /* 0x220 */ uint32_t bootargs_addr; /* 0x228 */ - uint8_t hdr_pad4[0x8]; /* 0x22c */ + uint32_t initrd_addr_max; /* 0x22c */ + uint32_t kernel_alignment; /* 0x230 */ uint8_t relocatable_kernel; /* 0x234 */ uint8_t hdr_pad5[0x13]; /* 0x235 */ uint32_t payload_offset;/* 0x248 */ uint32_t payload_length;/* 0x24c */ uint8_t hdr_pad6[0x8]; /* 0x250 */ uint64_t pref_addr; /* 0x258 */ - uint8_t hdr_pad7[8]; /* 0x260 */ + uint32_t init_size; /* 0x260 */ + uint8_t hdr_pad7[4]; /* 0x264 */ } __packed hdr; uint8_t pad3[0x68]; /* 0x268 */