HV: place kernel and ramdisk by find_space_from_ve820()

We should not hardcode the VM ramdisk load address right after kernel
load address because of two reasons:
	1. Per Linux kernel boot protocol, the Kernel need a size of
	   contiguous memory(i.e. init_size field in zeropage) from
	   its load address to boot, then the address would overlap
	   with ramdisk;
	2. The hardcoded address could not be ensured as a valid address
	   in guest e820 table, especially with a huge ramdisk;

Also we should not hardcode the VM kernel load address to its pref_address
which work for non-relocatable kernel only. For a relocatable kernel,
it could run from any valid address where bootloader load to.

The patch will set the VM kernel and ramdisk load address by scanning
guest e820 table with find_space_from_ve820() api:
	1. For SOS VM, the ramdisk has been loaded by multiboot bootloader
	   already so set the load address as module source address,
	   the relocatable kernel would be relocated to a appropriate address
	   out space of hypervisor and boot modules to avoid guest memory
	   copy corruption;
	2. For pre-launched VM, the kernel would be loaded to pref_address
	   first, then ramdisk will be put to a appropriate address out space
	   of kernel according to guest memory layout and maximum ramdisk
	   address limit under 4GB;

Tracked-On: #5879

Signed-off-by: Victor Sun <victor.sun@intel.com>
Reviewed-by: Jason Chen CJ <jason.cj.chen@intel.com>
This commit is contained in:
Victor Sun 2021-06-01 14:17:18 +08:00 committed by wenlingz
parent eca245760a
commit 1b3a75c984
5 changed files with 161 additions and 12 deletions

View File

@ -13,6 +13,27 @@
static struct acrn_boot_info acrn_bi = { 0U }; static struct acrn_boot_info acrn_bi = { 0U };
/**
* @pre (p_start != NULL) && (p_end != NULL)
*/
void get_boot_mods_range(uint64_t *p_start, uint64_t *p_end)
{
uint32_t i;
uint64_t start = ~0UL, end = 0UL;
struct acrn_boot_info *abi = get_acrn_boot_info();
for (i = 0; i < abi->mods_count; i++) {
if (hva2hpa(abi->mods[i].start) < start) {
start = hva2hpa(abi->mods[i].start);
}
if (hva2hpa(abi->mods[i].start + abi->mods[i].size) > end) {
end = hva2hpa(abi->mods[i].start + abi->mods[i].size);
}
}
*p_start = start;
*p_end = end;
}
void init_acrn_boot_info(uint32_t *registers) void init_acrn_boot_info(uint32_t *registers)
{ {
(void)init_multiboot_info(registers); (void)init_multiboot_info(registers);

View File

@ -15,23 +15,103 @@
#include <asm/seed.h> #include <asm/seed.h>
#include <asm/mmu.h> #include <asm/mmu.h>
#include <asm/guest/vm.h> #include <asm/guest/vm.h>
#include <asm/guest/ept.h>
#include <reloc.h>
#include <logmsg.h> #include <logmsg.h>
#include <vboot_info.h> #include <vboot_info.h>
#include <vacpi.h> #include <vacpi.h>
#define DBG_LEVEL_BOOT 6U #define DBG_LEVEL_BOOT 6U
/* TODO:
* The value is referenced from Linux boot protocal for old kernels,
* but this should be configurable for different OS. */
#define DEFAULT_RAMDISK_GPA_MAX 0x37ffffffUL
#define PRE_VM_MAX_RAM_ADDR_BELOW_4GB (VIRT_ACPI_DATA_ADDR - 1U)
/** /**
* @pre vm != NULL && mod != NULL * @pre vm != NULL && mod != NULL
*/ */
static void init_vm_ramdisk_info(struct acrn_vm *vm, const struct abi_module *mod) static void init_vm_ramdisk_info(struct acrn_vm *vm, const struct abi_module *mod)
{ {
uint64_t ramdisk_load_gpa = INVALID_GPA;
uint64_t ramdisk_gpa_max = DEFAULT_RAMDISK_GPA_MAX;
uint64_t kernel_start = (uint64_t)vm->sw.kernel_info.kernel_load_addr;
uint64_t kernel_end = kernel_start + vm->sw.kernel_info.kernel_size;
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
if (mod->start != NULL) { if (mod->start != NULL) {
vm->sw.ramdisk_info.src_addr = mod->start; vm->sw.ramdisk_info.src_addr = mod->start;
vm->sw.ramdisk_info.load_addr = vm->sw.kernel_info.kernel_load_addr + vm->sw.kernel_info.kernel_size;
vm->sw.ramdisk_info.load_addr = (void *)round_page_up((uint64_t)vm->sw.ramdisk_info.load_addr);
vm->sw.ramdisk_info.size = mod->size; vm->sw.ramdisk_info.size = mod->size;
} }
/* Per Linux boot protocol, the Kernel need a size of contiguous
* memory(i.e. init_size field in zeropage) from its extract address to boot,
* and initrd_addr_max field specifies the maximum address of the ramdisk.
* Per kernel src head_64.S, decompressed kernel start at 2M aligned to the
* compressed kernel load address.
*/
if (vm->sw.kernel_type == KERNEL_BZIMAGE) {
struct zero_page *zeropage = (struct zero_page *)vm->sw.kernel_info.kernel_src_addr;
uint32_t kernel_init_size = zeropage->hdr.init_size;
uint32_t initrd_addr_max = zeropage->hdr.initrd_addr_max;
kernel_end = kernel_start + MEM_2M + kernel_init_size;
if (initrd_addr_max != 0U) {
ramdisk_gpa_max = initrd_addr_max;
}
}
if (is_sos_vm(vm)) {
if (vm->sw.ramdisk_info.src_addr != NULL) {
ramdisk_load_gpa = sos_vm_hpa2gpa((uint64_t)vm->sw.ramdisk_info.src_addr);
}
/* For SOS VM, the ramdisk has been loaded by bootloader, so in most cases
* there is no need to do gpa copy again. But in the case that the ramdisk is
* loaded by bootloader at a address higher than its limit, we should do gpa
* copy then.
*/
if ((ramdisk_load_gpa + vm->sw.ramdisk_info.size) > ramdisk_gpa_max) {
ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size,
MEM_1M, kernel_start);
if (ramdisk_load_gpa == INVALID_GPA) {
ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size,
kernel_end, ramdisk_gpa_max);
}
}
} else {
/* For pre-launched VM, the ramdisk would be put by searching ve820 table.
*/
ramdisk_gpa_max = min(PRE_VM_MAX_RAM_ADDR_BELOW_4GB, ramdisk_gpa_max);
if (kernel_end > ramdisk_gpa_max) {
ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size,
MEM_1M, min(kernel_start, ramdisk_gpa_max));
} else {
ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size,
kernel_end, ramdisk_gpa_max);
}
}
if (ramdisk_load_gpa == INVALID_GPA) {
pr_err("no space in guest memory to load VM %d ramdisk", vm->vm_id);
vm->sw.ramdisk_info.size = 0U;
}
/* Use customer specified ramdisk load addr if it is configured in VM configuration,
* otherwise use allocated address calculated by HV.
*/
if (vm_config->os_config.kernel_ramdisk_addr != 0UL) {
vm->sw.ramdisk_info.load_addr = (void *)vm_config->os_config.kernel_ramdisk_addr;
} else {
vm->sw.ramdisk_info.load_addr = (void *)ramdisk_load_gpa;
}
dev_dbg(DBG_LEVEL_BOOT, "ramdisk mod start=0x%x, size=0x%x", (uint64_t)mod->start, mod->size);
dev_dbg(DBG_LEVEL_BOOT, "ramdisk load addr = 0x%lx", ramdisk_load_gpa);
} }
/** /**
@ -60,15 +140,55 @@ static void *get_kernel_load_addr(struct acrn_vm *vm)
* in Documentation/x86/boot.txt, a relocating * in Documentation/x86/boot.txt, a relocating
* bootloader should attempt to load kernel at pref_address * bootloader should attempt to load kernel at pref_address
* if possible. A non-relocatable kernel will unconditionally * if possible. A non-relocatable kernel will unconditionally
* move itself and to run at this address, so no need to copy * move itself and to run at this address.
* kernel to perf_address by bootloader, if kernel is
* non-relocatable.
*/ */
zeropage = (struct zero_page *)sw_info->kernel_info.kernel_src_addr; zeropage = (struct zero_page *)sw_info->kernel_info.kernel_src_addr;
if (zeropage->hdr.relocatable_kernel != 0U) {
zeropage = (struct zero_page *)zeropage->hdr.pref_addr; if ((is_sos_vm(vm)) && (zeropage->hdr.relocatable_kernel != 0U)) {
uint64_t hv_start, hv_end, mods_start, mods_end;
uint64_t kernel_load_gpa = INVALID_GPA;
uint32_t kernel_align = zeropage->hdr.kernel_alignment;
uint32_t kernel_init_size = zeropage->hdr.init_size;
/* Because the kernel load address need to be up aligned to kernel_align size
* whereas find_space_from_ve820() can only return page aligned address,
* we enlarge the needed size to (kernel_init_size + 2 * kernel_align).
*/
uint32_t kernel_size = kernel_init_size + 2 * kernel_align;
hv_start = sos_vm_hpa2gpa(get_hv_image_base());
hv_end = hv_start + CONFIG_HV_RAM_SIZE;
get_boot_mods_range(&mods_start, &mods_end);
mods_start = sos_vm_hpa2gpa(mods_start);
mods_end = sos_vm_hpa2gpa(mods_end);
if (hv_end < mods_start) {
kernel_load_gpa = find_space_from_ve820(vm, kernel_size, hv_end, mods_start);
}
if ((kernel_load_gpa == INVALID_GPA) && (max(mods_end, hv_end) < MEM_4G)) {
kernel_load_gpa = find_space_from_ve820(vm, kernel_size,
max(mods_end, hv_end), MEM_4G);
}
if ((kernel_load_gpa == INVALID_GPA) && (mods_end < hv_start)) {
kernel_load_gpa = find_space_from_ve820(vm, kernel_size, mods_end, hv_start);
}
if ((kernel_load_gpa == INVALID_GPA) && (min(mods_start, hv_start) > MEM_1M)) {
kernel_load_gpa = find_space_from_ve820(vm, kernel_size,
MEM_1M, min(mods_start, hv_start));
}
if (kernel_load_gpa != INVALID_GPA) {
load_addr = (void *)roundup((uint64_t)kernel_load_gpa, kernel_align);
}
} else {
load_addr = (void *)zeropage->hdr.pref_addr;
if (is_sos_vm(vm)) {
/* The non-relocatable SOS kernel might overlap with boot modules. */
pr_err("Non-relocatable kernel found, risk to boot!");
}
} }
load_addr = (void *)zeropage;
break; break;
case KERNEL_ZEPHYR: case KERNEL_ZEPHYR:
load_addr = (void *)vm_config->os_config.kernel_load_addr; load_addr = (void *)vm_config->os_config.kernel_load_addr;
@ -80,6 +200,8 @@ static void *get_kernel_load_addr(struct acrn_vm *vm)
if (load_addr == NULL) { if (load_addr == NULL) {
pr_err("Could not get kernel load addr of VM %d .", vm->vm_id); pr_err("Could not get kernel load addr of VM %d .", vm->vm_id);
} }
dev_dbg(DBG_LEVEL_BOOT, "VM%d kernel load_addr: 0x%lx", vm->vm_id, load_addr);
return load_addr; return load_addr;
} }

View File

@ -65,6 +65,8 @@ static inline bool boot_from_uefi(struct acrn_boot_info *abi)
return !((abi->uefi_info.systab == 0U) && (abi->uefi_info.systab_hi == 0U)); return !((abi->uefi_info.systab == 0U) && (abi->uefi_info.systab_hi == 0U));
} }
void get_boot_mods_range(uint64_t *p_start, uint64_t *p_end);
int32_t init_multiboot_info(uint32_t *registers); int32_t init_multiboot_info(uint32_t *registers);
void init_acrn_boot_info(uint32_t *registers); void init_acrn_boot_info(uint32_t *registers);

View File

@ -250,8 +250,10 @@ static void load_sw_modules(struct acrn_vm *vm, uint64_t load_params_gpa)
(uint64_t)sw_kernel->kernel_load_addr, sw_kernel->kernel_size); (uint64_t)sw_kernel->kernel_load_addr, sw_kernel->kernel_size);
if (vm->sw.kernel_type == KERNEL_BZIMAGE) { if (vm->sw.kernel_type == KERNEL_BZIMAGE) {
/* Don't need to load ramdisk if src_addr and load_addr are pointed to same place. */
load_sw_module(vm, ramdisk_info); if (gpa2hva(vm, (uint64_t)ramdisk_info->load_addr) != ramdisk_info->src_addr) {
load_sw_module(vm, ramdisk_info);
}
bootargs_info->load_addr = (void *)BZIMG_CMDLINE_GPA(load_params_gpa); bootargs_info->load_addr = (void *)BZIMG_CMDLINE_GPA(load_params_gpa);

View File

@ -28,14 +28,16 @@ struct zero_page {
uint32_t ramdisk_size; /* 0x21c */ uint32_t ramdisk_size; /* 0x21c */
uint8_t hdr_pad3[0x8]; /* 0x220 */ uint8_t hdr_pad3[0x8]; /* 0x220 */
uint32_t bootargs_addr; /* 0x228 */ uint32_t bootargs_addr; /* 0x228 */
uint8_t hdr_pad4[0x8]; /* 0x22c */ uint32_t initrd_addr_max; /* 0x22c */
uint32_t kernel_alignment; /* 0x230 */
uint8_t relocatable_kernel; /* 0x234 */ uint8_t relocatable_kernel; /* 0x234 */
uint8_t hdr_pad5[0x13]; /* 0x235 */ uint8_t hdr_pad5[0x13]; /* 0x235 */
uint32_t payload_offset;/* 0x248 */ uint32_t payload_offset;/* 0x248 */
uint32_t payload_length;/* 0x24c */ uint32_t payload_length;/* 0x24c */
uint8_t hdr_pad6[0x8]; /* 0x250 */ uint8_t hdr_pad6[0x8]; /* 0x250 */
uint64_t pref_addr; /* 0x258 */ uint64_t pref_addr; /* 0x258 */
uint8_t hdr_pad7[8]; /* 0x260 */ uint32_t init_size; /* 0x260 */
uint8_t hdr_pad7[4]; /* 0x264 */
} __packed hdr; } __packed hdr;
uint8_t pad3[0x68]; /* 0x268 */ uint8_t pad3[0x68]; /* 0x268 */