diff --git a/hypervisor/Makefile b/hypervisor/Makefile index 1e70bb5e0..f02fe375e 100644 --- a/hypervisor/Makefile +++ b/hypervisor/Makefile @@ -283,6 +283,12 @@ VP_BASE_C_SRCS += arch/x86/guest/nested.c VP_BASE_C_SRCS += arch/x86/guest/vept.c endif VP_BASE_C_SRCS += boot/guest/vboot_info.c +ifeq ($(CONFIG_GUEST_KERNEL_BZIMAGE),y) +VP_BASE_C_SRCS += boot/guest/bzimage_loader.c +endif +ifeq ($(CONFIG_GUEST_KERNEL_RAWIMAGE),y) +VP_BASE_C_SRCS += boot/guest/rawimage_loader.c +endif VP_BASE_C_SRCS += common/hv_main.c VP_BASE_C_SRCS += common/vm_load.c VP_BASE_C_SRCS += arch/x86/configs/pci_dev.c diff --git a/hypervisor/boot/guest/bzimage_loader.c b/hypervisor/boot/guest/bzimage_loader.c new file mode 100644 index 000000000..1a472640f --- /dev/null +++ b/hypervisor/boot/guest/bzimage_loader.c @@ -0,0 +1,430 @@ +/* + * Copyright (C) 2021 Intel Corporation. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DBG_LEVEL_VM_BZIMAGE 6U + +/* Define a 32KB memory block to store LaaG VM load params in guest address space + * The params including: + * Init GDT entries : 1KB (must be 8byte aligned) + * Linux Zeropage : 4KB + * Boot cmdline : 2KB + * EFI memory map : 12KB + * Reserved region for trampoline code : 8KB + * Each param should keep 8byte aligned and the total region size should be less than 32KB + * so that it could be put below MEM_1M. + * Please note in Linux VM, the last 8KB space below MEM_1M is for trampoline code. The block + * should be able to accommodate it and so that avoid the trampoline corruption. + */ +#define BZIMG_LOAD_PARAMS_SIZE (MEM_4K * 8) +#define BZIMG_INITGDT_GPA(load_params_gpa) (load_params_gpa + 0UL) +#define BZIMG_ZEROPAGE_GPA(load_params_gpa) (load_params_gpa + MEM_1K) +#define BZIMG_CMDLINE_GPA(load_params_gpa) (load_params_gpa + MEM_1K + MEM_4K) +#define BZIMG_EFIMMAP_GPA(load_params_gpa) (load_params_gpa + MEM_1K + MEM_4K + MEM_2K) + +/* TODO: + * The value is referenced from Linux boot protocal for old kernels, + * but this should be configurable for different OS. */ +#define DEFAULT_RAMDISK_GPA_MAX 0x37ffffffUL + +#define PRE_VM_MAX_RAM_ADDR_BELOW_4GB (VIRT_ACPI_DATA_ADDR - 1U) + +static void *get_initrd_load_addr(struct acrn_vm *vm, uint64_t kernel_start) +{ + uint64_t ramdisk_load_gpa = INVALID_GPA; + uint64_t ramdisk_gpa_max = DEFAULT_RAMDISK_GPA_MAX; + struct zero_page *zeropage = (struct zero_page *)vm->sw.kernel_info.kernel_src_addr; + uint32_t kernel_init_size, kernel_align, initrd_addr_max; + uint64_t kernel_end; + + /* Per Linux boot protocol, the Kernel need a size of contiguous + * memory(i.e. init_size field in zeropage) from its extract address to boot, + * and initrd_addr_max field specifies the maximum address of the ramdisk. + * Per kernel src head_64.S, decompressed kernel start at 2M aligned to the + * compressed kernel load address. + */ + stac(); + kernel_init_size = zeropage->hdr.init_size; + kernel_align = zeropage->hdr.kernel_alignment; + initrd_addr_max = zeropage->hdr.initrd_addr_max; + stac(); + kernel_end = roundup(kernel_start, kernel_align) + kernel_init_size; + + if (initrd_addr_max != 0U) { + ramdisk_gpa_max = initrd_addr_max; + } + + if (is_sos_vm(vm)) { + uint64_t mods_start, mods_end; + + get_boot_mods_range(&mods_start, &mods_end); + mods_start = sos_vm_hpa2gpa(mods_start); + mods_end = sos_vm_hpa2gpa(mods_end); + + if (vm->sw.ramdisk_info.src_addr != NULL) { + ramdisk_load_gpa = sos_vm_hpa2gpa((uint64_t)vm->sw.ramdisk_info.src_addr); + } + + /* For SOS VM, the ramdisk has been loaded by bootloader, so in most cases + * there is no need to do gpa copy again. But in the case that the ramdisk is + * loaded by bootloader at a address higher than its limit, we should do gpa + * copy then. + */ + if ((ramdisk_load_gpa + vm->sw.ramdisk_info.size) > ramdisk_gpa_max) { + /* In this case, mods_end must be higher than ramdisk_gpa_max, + * so try to locate ramdisk between MEM_1M and mods_start/kernel_start, + * or try the range between kernel_end and mods_start; + */ + ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, + MEM_1M, min(min(mods_start, kernel_start), ramdisk_gpa_max)); + if ((ramdisk_load_gpa == INVALID_GPA) && (kernel_end < min(mods_start, ramdisk_gpa_max))) { + ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, + kernel_end, min(mods_start, ramdisk_gpa_max)); + } + } + } else { + /* For pre-launched VM, the ramdisk would be put by searching ve820 table. + */ + ramdisk_gpa_max = min(PRE_VM_MAX_RAM_ADDR_BELOW_4GB, ramdisk_gpa_max); + + if (kernel_end < ramdisk_gpa_max) { + ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, + kernel_end, ramdisk_gpa_max); + } + if (ramdisk_load_gpa == INVALID_GPA) { + ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, + MEM_1M, min(kernel_start, ramdisk_gpa_max)); + } + } + + if (ramdisk_load_gpa == INVALID_GPA) { + pr_err("no space in guest memory to load VM %d ramdisk", vm->vm_id); + vm->sw.ramdisk_info.size = 0U; + } + dev_dbg(DBG_LEVEL_VM_BZIMAGE, "VM%d ramdisk load_addr: 0x%lx", vm->vm_id, ramdisk_load_gpa); + + return (ramdisk_load_gpa == INVALID_GPA) ? NULL : (void *)ramdisk_load_gpa; +} + +/** + * @pre vm != NULL + */ +static void *get_bzimage_kernel_load_addr(struct acrn_vm *vm) +{ + void *load_addr = NULL; + struct vm_sw_info *sw_info = &vm->sw; + struct zero_page *zeropage; + + /* According to the explaination for pref_address + * in Documentation/x86/boot.txt, a relocating + * bootloader should attempt to load kernel at pref_address + * if possible. A non-relocatable kernel will unconditionally + * move itself and to run at this address. + */ + zeropage = (struct zero_page *)sw_info->kernel_info.kernel_src_addr; + + stac(); + if ((is_sos_vm(vm)) && (zeropage->hdr.relocatable_kernel != 0U)) { + uint64_t mods_start, mods_end; + uint64_t kernel_load_gpa = INVALID_GPA; + uint32_t kernel_align = zeropage->hdr.kernel_alignment; + uint32_t kernel_init_size = zeropage->hdr.init_size; + /* Because the kernel load address need to be up aligned to kernel_align size + * whereas find_space_from_ve820() can only return page aligned address, + * we enlarge the needed size to (kernel_init_size + kernel_align). + */ + uint32_t kernel_size = kernel_init_size + kernel_align; + + get_boot_mods_range(&mods_start, &mods_end); + mods_start = sos_vm_hpa2gpa(mods_start); + mods_end = sos_vm_hpa2gpa(mods_end); + + /* TODO: support load kernel when modules are beyond 4GB space. */ + if (mods_end < MEM_4G) { + kernel_load_gpa = find_space_from_ve820(vm, kernel_size, MEM_1M, mods_start); + + if (kernel_load_gpa == INVALID_GPA) { + kernel_load_gpa = find_space_from_ve820(vm, kernel_size, mods_end, MEM_4G); + } + } + + if (kernel_load_gpa != INVALID_GPA) { + load_addr = (void *)roundup((uint64_t)kernel_load_gpa, kernel_align); + } + } else { + load_addr = (void *)zeropage->hdr.pref_addr; + if (is_sos_vm(vm)) { + /* The non-relocatable SOS kernel might overlap with boot modules. */ + pr_err("Non-relocatable kernel found, risk to boot!"); + } + } + clac(); + + if (load_addr == NULL) { + pr_err("Could not get kernel load addr of VM %d .", vm->vm_id); + } + + dev_dbg(DBG_LEVEL_VM_BZIMAGE, "VM%d kernel load_addr: 0x%lx", vm->vm_id, load_addr); + return load_addr; +} + +/** + * @pre vm != NULL && efi_mmap_desc != NULL + */ +static uint16_t create_sos_vm_efi_mmap_desc(struct acrn_vm *vm, struct efi_memory_desc *efi_mmap_desc) +{ + uint16_t i, desc_idx = 0U; + const struct efi_memory_desc *hv_efi_mmap_desc = get_efi_mmap_entry(); + + for (i = 0U; i < get_efi_mmap_entries_count(); i++) { + /* Below efi mmap desc types in native should be kept as original for SOS VM */ + if ((hv_efi_mmap_desc[i].type == EFI_RESERVED_MEMORYTYPE) + || (hv_efi_mmap_desc[i].type == EFI_UNUSABLE_MEMORY) + || (hv_efi_mmap_desc[i].type == EFI_ACPI_RECLAIM_MEMORY) + || (hv_efi_mmap_desc[i].type == EFI_ACPI_MEMORY_NVS) + || (hv_efi_mmap_desc[i].type == EFI_BOOT_SERVICES_CODE) + || (hv_efi_mmap_desc[i].type == EFI_BOOT_SERVICES_DATA) + || (hv_efi_mmap_desc[i].type == EFI_RUNTIME_SERVICES_CODE) + || (hv_efi_mmap_desc[i].type == EFI_RUNTIME_SERVICES_DATA) + || (hv_efi_mmap_desc[i].type == EFI_MEMORYMAPPED_IO) + || (hv_efi_mmap_desc[i].type == EFI_MEMORYMAPPED_IOPORTSPACE) + || (hv_efi_mmap_desc[i].type == EFI_PALCODE) + || (hv_efi_mmap_desc[i].type == EFI_PERSISTENT_MEMORY)) { + + efi_mmap_desc[desc_idx] = hv_efi_mmap_desc[i]; + desc_idx++; + } + } + + for (i = 0U; i < vm->e820_entry_num; i++) { + /* The memory region with e820 type of RAM could be acted as EFI_CONVENTIONAL_MEMORY + * for SOS VM, the region which occupied by HV and pre-launched VM has been filtered + * already, so it is safe for SOS VM. + * As SOS VM start to run after efi call ExitBootService(), the type of EFI_LOADER_CODE + * and EFI_LOADER_DATA which have been mapped to E820_TYPE_RAM are not needed. + */ + if (vm->e820_entries[i].type == E820_TYPE_RAM) { + efi_mmap_desc[desc_idx].type = EFI_CONVENTIONAL_MEMORY; + efi_mmap_desc[desc_idx].phys_addr = vm->e820_entries[i].baseaddr; + efi_mmap_desc[desc_idx].virt_addr = vm->e820_entries[i].baseaddr; + efi_mmap_desc[desc_idx].num_pages = vm->e820_entries[i].length / PAGE_SIZE; + efi_mmap_desc[desc_idx].attribute = EFI_MEMORY_WB; + desc_idx++; + } + } + + for (i = 0U; i < desc_idx; i++) { + pr_dbg("SOS VM efi mmap desc[%d]: addr: 0x%lx, len: 0x%lx, type: %d", i, + efi_mmap_desc[i].phys_addr, efi_mmap_desc[i].num_pages * PAGE_SIZE, efi_mmap_desc[i].type); + } + + return desc_idx; +} + +/** + * @pre zp != NULL && vm != NULL + */ +static uint32_t create_zeropage_e820(struct zero_page *zp, const struct acrn_vm *vm) +{ + uint32_t entry_num = vm->e820_entry_num; + struct e820_entry *zp_e820 = zp->entries; + const struct e820_entry *vm_e820 = vm->e820_entries; + + if ((zp_e820 == NULL) || (vm_e820 == NULL) || (entry_num == 0U) || (entry_num > E820_MAX_ENTRIES)) { + pr_err("e820 create error"); + entry_num = 0U; + } else { + (void)memcpy_s((void *)zp_e820, entry_num * sizeof(struct e820_entry), + (void *)vm_e820, entry_num * sizeof(struct e820_entry)); + } + return entry_num; +} + +/** + * @pre vm != NULL + * @pre (vm->min_mem_addr <= kernel_load_addr) && (kernel_load_addr < vm->max_mem_addr) + */ +static uint64_t create_zero_page(struct acrn_vm *vm, uint64_t load_params_gpa) +{ + struct zero_page *zeropage, *hva; + struct sw_kernel_info *sw_kernel = &(vm->sw.kernel_info); + struct sw_module_info *bootargs_info = &(vm->sw.bootargs_info); + struct sw_module_info *ramdisk_info = &(vm->sw.ramdisk_info); + uint64_t gpa, addr; + + gpa = BZIMG_ZEROPAGE_GPA(load_params_gpa); + hva = (struct zero_page *)gpa2hva(vm, gpa); + zeropage = hva; + + stac(); + /* clear the zeropage */ + (void)memset(zeropage, 0U, MEM_2K); + +#ifdef CONFIG_MULTIBOOT2 + if (is_sos_vm(vm)) { + struct acrn_boot_info *abi = get_acrn_boot_info(); + + if (boot_from_uefi(abi)) { + struct efi_info *sos_efi_info = &zeropage->boot_efi_info; + uint64_t efi_mmap_gpa = BZIMG_EFIMMAP_GPA(load_params_gpa); + struct efi_memory_desc *efi_mmap_desc = (struct efi_memory_desc *)gpa2hva(vm, efi_mmap_gpa); + uint16_t efi_mmap_desc_nr = create_sos_vm_efi_mmap_desc(vm, efi_mmap_desc); + + sos_efi_info->loader_signature = 0x34364c45; /* "EL64" */ + sos_efi_info->memdesc_version = abi->uefi_info.memdesc_version; + sos_efi_info->memdesc_size = sizeof(struct efi_memory_desc); + sos_efi_info->memmap_size = efi_mmap_desc_nr * sizeof(struct efi_memory_desc); + sos_efi_info->memmap = (uint32_t)efi_mmap_gpa; + sos_efi_info->memmap_hi = (uint32_t)(efi_mmap_gpa >> 32U); + sos_efi_info->systab = abi->uefi_info.systab; + sos_efi_info->systab_hi = abi->uefi_info.systab_hi; + } + } +#endif + /* copy part of the header into the zero page */ + hva = (struct zero_page *)sw_kernel->kernel_src_addr; + (void)memcpy_s(&(zeropage->hdr), sizeof(zeropage->hdr), + &(hva->hdr), sizeof(hva->hdr)); + + /* See if kernel has a RAM disk */ + if (ramdisk_info->src_addr != NULL) { + /* Copy ramdisk load_addr and size in zeropage header structure + */ + addr = (uint64_t)ramdisk_info->load_addr; + zeropage->hdr.ramdisk_addr = (uint32_t)addr; + zeropage->hdr.ramdisk_size = (uint32_t)ramdisk_info->size; + } + + /* Copy bootargs load_addr in zeropage header structure */ + addr = (uint64_t)bootargs_info->load_addr; + zeropage->hdr.bootargs_addr = (uint32_t)addr; + + /* set constant arguments in zero page */ + zeropage->hdr.loader_type = 0xffU; + zeropage->hdr.load_flags |= (1U << 5U); /* quiet */ + + /* Create/add e820 table entries in zeropage */ + zeropage->e820_nentries = (uint8_t)create_zeropage_e820(zeropage, vm); + clac(); + + /* Return Physical Base Address of zeropage */ + return gpa; +} + +/** + * @pre vm != NULL + */ +static void prepare_loading_bzimage(struct acrn_vm *vm, struct acrn_vcpu *vcpu, + uint64_t load_params_gpa, uint64_t kernel_load_gpa) +{ + uint32_t i; + uint32_t prot_code_offset, prot_code_size, kernel_entry_offset; + uint8_t setup_sectors; + const struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id); + struct sw_kernel_info *sw_kernel = &(vm->sw.kernel_info); + struct sw_module_info *bootargs_info = &(vm->sw.bootargs_info); + struct sw_module_info *ramdisk_info = &(vm->sw.ramdisk_info); + struct sw_module_info *acpi_info = &(vm->sw.acpi_info); + struct zero_page *zeropage = (struct zero_page *)sw_kernel->kernel_src_addr; + + /* The bzImage file consists of three parts: + * boot_params(i.e. zero page) + real mode setup code + compressed protected mode code + * The compressed proteced mode code start at offset (setup_sectors + 1U) * 512U of bzImage. + * Only protected mode code need to be loaded. + */ + stac(); + setup_sectors = (zeropage->hdr.setup_sects == 0U) ? 4U : zeropage->hdr.setup_sects; + clac(); + prot_code_offset = (uint32_t)(setup_sectors + 1U) * 512U; + prot_code_size = (sw_kernel->kernel_size > prot_code_offset) ? + (sw_kernel->kernel_size - prot_code_offset) : 0U; + + /* Copy the protected mode part kernel code to its run-time location */ + (void)copy_to_gpa(vm, (sw_kernel->kernel_src_addr + prot_code_offset), kernel_load_gpa, prot_code_size); + + if (vm->sw.ramdisk_info.size > 0U) { + /* Use customer specified ramdisk load addr if it is configured in VM configuration, + * otherwise use allocated address calculated by HV. + */ + if (vm_config->os_config.kernel_ramdisk_addr != 0UL) { + vm->sw.ramdisk_info.load_addr = (void *)vm_config->os_config.kernel_ramdisk_addr; + } else { + vm->sw.ramdisk_info.load_addr = (void *)get_initrd_load_addr(vm, kernel_load_gpa); + if (vm->sw.ramdisk_info.load_addr == NULL) { + pr_err("failed to load initrd for VM%d !", vm->vm_id); + } + } + + /* Don't need to load ramdisk if src_addr and load_addr are pointed to same place. */ + if (gpa2hva(vm, (uint64_t)ramdisk_info->load_addr) != ramdisk_info->src_addr) { + load_sw_module(vm, ramdisk_info); + } + } + + bootargs_info->load_addr = (void *)BZIMG_CMDLINE_GPA(load_params_gpa); + + load_sw_module(vm, bootargs_info); + + /* Copy Guest OS ACPI to its load location */ + load_sw_module(vm, acpi_info); + + /* 32bit kernel entry is at where protected mode code loaded */ + kernel_entry_offset = 0U; + if (vcpu->arch.cpu_mode == CPU_MODE_64BIT) { + /* 64bit entry is the 512bytes after the start */ + kernel_entry_offset += 512U; + } + + sw_kernel->kernel_entry_addr = (void *)(kernel_load_gpa + kernel_entry_offset); + + /* Documentation states: ebx=0, edi=0, ebp=0, esi=ptr to + * zeropage + */ + for (i = 0U; i < NUM_GPRS; i++) { + vcpu_set_gpreg(vcpu, i, 0UL); + } + + /* Create Zeropage and copy Physical Base Address of Zeropage + * in RSI + */ + vcpu_set_gpreg(vcpu, CPU_REG_RSI, create_zero_page(vm, load_params_gpa)); + pr_info("%s, RSI pointing to zero page for VM %d at GPA %X", + __func__, vm->vm_id, vcpu_get_gpreg(vcpu, CPU_REG_RSI)); +} + +int32_t vm_bzimage_loader(struct acrn_vm *vm) +{ + int32_t ret = -ENOMEM; + /* get primary vcpu */ + struct acrn_vcpu *vcpu = vcpu_from_vid(vm, BSP_CPU_ID); + uint64_t load_params_gpa = find_space_from_ve820(vm, BZIMG_LOAD_PARAMS_SIZE, MEM_4K, MEM_1M); + + if (load_params_gpa != INVALID_GPA) { + uint64_t kernel_load_gpa = (uint64_t)get_bzimage_kernel_load_addr(vm); + + if (kernel_load_gpa != 0UL) { + /* We boot bzImage from protected mode directly */ + init_vcpu_protect_mode_regs(vcpu, BZIMG_INITGDT_GPA(load_params_gpa)); + + prepare_loading_bzimage(vm, vcpu, load_params_gpa, kernel_load_gpa); + + ret = 0; + } + } + + return ret; +} diff --git a/hypervisor/boot/guest/rawimage_loader.c b/hypervisor/boot/guest/rawimage_loader.c new file mode 100644 index 000000000..4dd240669 --- /dev/null +++ b/hypervisor/boot/guest/rawimage_loader.c @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2021 Intel Corporation. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include + +/** + * @pre vm != NULL + */ +static void prepare_loading_rawimage(struct acrn_vm *vm) +{ + struct sw_kernel_info *sw_kernel = &(vm->sw.kernel_info); + struct sw_module_info *acpi_info = &(vm->sw.acpi_info); + const struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id); + uint64_t kernel_load_gpa; + + /* TODO: GPA 0 load support */ + kernel_load_gpa = vm_config->os_config.kernel_load_addr; + + /* Copy the guest kernel image to its run-time location */ + (void)copy_to_gpa(vm, sw_kernel->kernel_src_addr, kernel_load_gpa, sw_kernel->kernel_size); + + /* Copy Guest OS ACPI to its load location */ + load_sw_module(vm, acpi_info); + + sw_kernel->kernel_entry_addr = (void *)vm_config->os_config.kernel_entry_addr; +} + +int32_t vm_rawimage_loader(struct acrn_vm *vm) +{ + int32_t ret = 0; + uint64_t vgdt_gpa = 0x800; + + /* + * TODO: + * - We need to initialize the guest bsp registers according to + * guest boot mode (real mode vs protect mode) + * - The memory layout usage is unclear, only GDT might be needed as its boot param. + * currently we only support Zephyr which has no needs on cmdline/e820/efimmap/etc. + * hardcode the vGDT GPA to 0x800 where is not used by Zephyr so far; + */ + init_vcpu_protect_mode_regs(vcpu_from_vid(vm, BSP_CPU_ID), vgdt_gpa); + + prepare_loading_rawimage(vm); + + return ret; +} diff --git a/hypervisor/boot/include/guest/vboot_info.h b/hypervisor/boot/include/guest/vboot_info.h index 7178c53f6..442703bde 100644 --- a/hypervisor/boot/include/guest/vboot_info.h +++ b/hypervisor/boot/include/guest/vboot_info.h @@ -9,5 +9,14 @@ #define VBOOT_INFO_H int32_t init_vm_boot_info(struct acrn_vm *vm); +void load_sw_module(struct acrn_vm *vm, struct sw_module_info *sw_module); + +#ifdef CONFIG_GUEST_KERNEL_BZIMAGE +int32_t vm_bzimage_loader(struct acrn_vm *vm); +#endif +#ifdef CONFIG_GUEST_KERNEL_RAWIMAGE +int32_t vm_rawimage_loader(struct acrn_vm *vm); +#endif + #endif /* end of include guard: VBOOT_INFO_H */ diff --git a/hypervisor/common/vm_load.c b/hypervisor/common/vm_load.c index 9e8104997..dea34c955 100644 --- a/hypervisor/common/vm_load.c +++ b/hypervisor/common/vm_load.c @@ -5,481 +5,20 @@ */ #include -#include -#include -#include -#include -#include -#include -#include +#include #include #include -#define DBG_LEVEL_VMLOAD 6U - -/* Define a 32KB memory block to store LaaG VM load params in guest address space - * The params including: - * Init GDT entries : 1KB (must be 8byte aligned) - * Linux Zeropage : 4KB - * Boot cmdline : 2KB - * EFI memory map : 12KB - * Reserved region for trampoline code : 8KB - * Each param should keep 8byte aligned and the total region size should be less than 32KB - * so that it could be put below MEM_1M. - * Please note in Linux VM, the last 8KB space below MEM_1M is for trampoline code. The block - * should be able to accommodate it and so that avoid the trampoline corruption. - */ -#define BZIMG_LOAD_PARAMS_SIZE (MEM_4K * 8) -#define BZIMG_INITGDT_GPA(load_params_gpa) (load_params_gpa + 0UL) -#define BZIMG_ZEROPAGE_GPA(load_params_gpa) (load_params_gpa + MEM_1K) -#define BZIMG_CMDLINE_GPA(load_params_gpa) (load_params_gpa + MEM_1K + MEM_4K) -#define BZIMG_EFIMMAP_GPA(load_params_gpa) (load_params_gpa + MEM_1K + MEM_4K + MEM_2K) - -/* TODO: - * The value is referenced from Linux boot protocal for old kernels, - * but this should be configurable for different OS. */ -#define DEFAULT_RAMDISK_GPA_MAX 0x37ffffffUL - -#define PRE_VM_MAX_RAM_ADDR_BELOW_4GB (VIRT_ACPI_DATA_ADDR - 1U) - -static void *get_initrd_load_addr(struct acrn_vm *vm, uint64_t kernel_start) -{ - uint64_t ramdisk_load_gpa = INVALID_GPA; - uint64_t ramdisk_gpa_max = DEFAULT_RAMDISK_GPA_MAX; - struct zero_page *zeropage = (struct zero_page *)vm->sw.kernel_info.kernel_src_addr; - uint32_t kernel_init_size, kernel_align, initrd_addr_max; - uint64_t kernel_end; - - /* Per Linux boot protocol, the Kernel need a size of contiguous - * memory(i.e. init_size field in zeropage) from its extract address to boot, - * and initrd_addr_max field specifies the maximum address of the ramdisk. - * Per kernel src head_64.S, decompressed kernel start at 2M aligned to the - * compressed kernel load address. - */ - stac(); - kernel_init_size = zeropage->hdr.init_size; - kernel_align = zeropage->hdr.kernel_alignment; - initrd_addr_max = zeropage->hdr.initrd_addr_max; - stac(); - kernel_end = roundup(kernel_start, kernel_align) + kernel_init_size; - - if (initrd_addr_max != 0U) { - ramdisk_gpa_max = initrd_addr_max; - } - - if (is_sos_vm(vm)) { - uint64_t mods_start, mods_end; - - get_boot_mods_range(&mods_start, &mods_end); - mods_start = sos_vm_hpa2gpa(mods_start); - mods_end = sos_vm_hpa2gpa(mods_end); - - if (vm->sw.ramdisk_info.src_addr != NULL) { - ramdisk_load_gpa = sos_vm_hpa2gpa((uint64_t)vm->sw.ramdisk_info.src_addr); - } - - /* For SOS VM, the ramdisk has been loaded by bootloader, so in most cases - * there is no need to do gpa copy again. But in the case that the ramdisk is - * loaded by bootloader at a address higher than its limit, we should do gpa - * copy then. - */ - if ((ramdisk_load_gpa + vm->sw.ramdisk_info.size) > ramdisk_gpa_max) { - /* In this case, mods_end must be higher than ramdisk_gpa_max, - * so try to locate ramdisk between MEM_1M and mods_start/kernel_start, - * or try the range between kernel_end and mods_start; - */ - ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, - MEM_1M, min(min(mods_start, kernel_start), ramdisk_gpa_max)); - if ((ramdisk_load_gpa == INVALID_GPA) && (kernel_end < min(mods_start, ramdisk_gpa_max))) { - ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, - kernel_end, min(mods_start, ramdisk_gpa_max)); - } - } - } else { - /* For pre-launched VM, the ramdisk would be put by searching ve820 table. - */ - ramdisk_gpa_max = min(PRE_VM_MAX_RAM_ADDR_BELOW_4GB, ramdisk_gpa_max); - - if (kernel_end < ramdisk_gpa_max) { - ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, - kernel_end, ramdisk_gpa_max); - } - if (ramdisk_load_gpa == INVALID_GPA) { - ramdisk_load_gpa = find_space_from_ve820(vm, vm->sw.ramdisk_info.size, - MEM_1M, min(kernel_start, ramdisk_gpa_max)); - } - } - - if (ramdisk_load_gpa == INVALID_GPA) { - pr_err("no space in guest memory to load VM %d ramdisk", vm->vm_id); - vm->sw.ramdisk_info.size = 0U; - } - dev_dbg(DBG_LEVEL_VMLOAD, "VM%d ramdisk load_addr: 0x%lx", vm->vm_id, ramdisk_load_gpa); - - return (ramdisk_load_gpa == INVALID_GPA) ? NULL : (void *)ramdisk_load_gpa; -} - -/** - * @pre vm != NULL - */ -static void *get_bzimage_kernel_load_addr(struct acrn_vm *vm) -{ - void *load_addr = NULL; - struct vm_sw_info *sw_info = &vm->sw; - struct zero_page *zeropage; - - /* According to the explaination for pref_address - * in Documentation/x86/boot.txt, a relocating - * bootloader should attempt to load kernel at pref_address - * if possible. A non-relocatable kernel will unconditionally - * move itself and to run at this address. - */ - zeropage = (struct zero_page *)sw_info->kernel_info.kernel_src_addr; - - stac(); - if ((is_sos_vm(vm)) && (zeropage->hdr.relocatable_kernel != 0U)) { - uint64_t mods_start, mods_end; - uint64_t kernel_load_gpa = INVALID_GPA; - uint32_t kernel_align = zeropage->hdr.kernel_alignment; - uint32_t kernel_init_size = zeropage->hdr.init_size; - /* Because the kernel load address need to be up aligned to kernel_align size - * whereas find_space_from_ve820() can only return page aligned address, - * we enlarge the needed size to (kernel_init_size + kernel_align). - */ - uint32_t kernel_size = kernel_init_size + kernel_align; - - get_boot_mods_range(&mods_start, &mods_end); - mods_start = sos_vm_hpa2gpa(mods_start); - mods_end = sos_vm_hpa2gpa(mods_end); - - /* TODO: support load kernel when modules are beyond 4GB space. */ - if (mods_end < MEM_4G) { - kernel_load_gpa = find_space_from_ve820(vm, kernel_size, MEM_1M, mods_start); - - if (kernel_load_gpa == INVALID_GPA) { - kernel_load_gpa = find_space_from_ve820(vm, kernel_size, mods_end, MEM_4G); - } - } - - if (kernel_load_gpa != INVALID_GPA) { - load_addr = (void *)roundup((uint64_t)kernel_load_gpa, kernel_align); - } - } else { - load_addr = (void *)zeropage->hdr.pref_addr; - if (is_sos_vm(vm)) { - /* The non-relocatable SOS kernel might overlap with boot modules. */ - pr_err("Non-relocatable kernel found, risk to boot!"); - } - } - clac(); - - if (load_addr == NULL) { - pr_err("Could not get kernel load addr of VM %d .", vm->vm_id); - } - - dev_dbg(DBG_LEVEL_VMLOAD, "VM%d kernel load_addr: 0x%lx", vm->vm_id, load_addr); - return load_addr; -} - -/** - * @pre vm != NULL && efi_mmap_desc != NULL - */ -static uint16_t create_sos_vm_efi_mmap_desc(struct acrn_vm *vm, struct efi_memory_desc *efi_mmap_desc) -{ - uint16_t i, desc_idx = 0U; - const struct efi_memory_desc *hv_efi_mmap_desc = get_efi_mmap_entry(); - - for (i = 0U; i < get_efi_mmap_entries_count(); i++) { - /* Below efi mmap desc types in native should be kept as original for SOS VM */ - if ((hv_efi_mmap_desc[i].type == EFI_RESERVED_MEMORYTYPE) - || (hv_efi_mmap_desc[i].type == EFI_UNUSABLE_MEMORY) - || (hv_efi_mmap_desc[i].type == EFI_ACPI_RECLAIM_MEMORY) - || (hv_efi_mmap_desc[i].type == EFI_ACPI_MEMORY_NVS) - || (hv_efi_mmap_desc[i].type == EFI_BOOT_SERVICES_CODE) - || (hv_efi_mmap_desc[i].type == EFI_BOOT_SERVICES_DATA) - || (hv_efi_mmap_desc[i].type == EFI_RUNTIME_SERVICES_CODE) - || (hv_efi_mmap_desc[i].type == EFI_RUNTIME_SERVICES_DATA) - || (hv_efi_mmap_desc[i].type == EFI_MEMORYMAPPED_IO) - || (hv_efi_mmap_desc[i].type == EFI_MEMORYMAPPED_IOPORTSPACE) - || (hv_efi_mmap_desc[i].type == EFI_PALCODE) - || (hv_efi_mmap_desc[i].type == EFI_PERSISTENT_MEMORY)) { - - efi_mmap_desc[desc_idx] = hv_efi_mmap_desc[i]; - desc_idx++; - } - } - - for (i = 0U; i < vm->e820_entry_num; i++) { - /* The memory region with e820 type of RAM could be acted as EFI_CONVENTIONAL_MEMORY - * for SOS VM, the region which occupied by HV and pre-launched VM has been filtered - * already, so it is safe for SOS VM. - * As SOS VM start to run after efi call ExitBootService(), the type of EFI_LOADER_CODE - * and EFI_LOADER_DATA which have been mapped to E820_TYPE_RAM are not needed. - */ - if (vm->e820_entries[i].type == E820_TYPE_RAM) { - efi_mmap_desc[desc_idx].type = EFI_CONVENTIONAL_MEMORY; - efi_mmap_desc[desc_idx].phys_addr = vm->e820_entries[i].baseaddr; - efi_mmap_desc[desc_idx].virt_addr = vm->e820_entries[i].baseaddr; - efi_mmap_desc[desc_idx].num_pages = vm->e820_entries[i].length / PAGE_SIZE; - efi_mmap_desc[desc_idx].attribute = EFI_MEMORY_WB; - desc_idx++; - } - } - - for (i = 0U; i < desc_idx; i++) { - pr_dbg("SOS VM efi mmap desc[%d]: addr: 0x%lx, len: 0x%lx, type: %d", i, - efi_mmap_desc[i].phys_addr, efi_mmap_desc[i].num_pages * PAGE_SIZE, efi_mmap_desc[i].type); - } - - return desc_idx; -} - -/** - * @pre zp != NULL && vm != NULL - */ -static uint32_t create_zeropage_e820(struct zero_page *zp, const struct acrn_vm *vm) -{ - uint32_t entry_num = vm->e820_entry_num; - struct e820_entry *zp_e820 = zp->entries; - const struct e820_entry *vm_e820 = vm->e820_entries; - - if ((zp_e820 == NULL) || (vm_e820 == NULL) || (entry_num == 0U) || (entry_num > E820_MAX_ENTRIES)) { - pr_err("e820 create error"); - entry_num = 0U; - } else { - (void)memcpy_s((void *)zp_e820, entry_num * sizeof(struct e820_entry), - (void *)vm_e820, entry_num * sizeof(struct e820_entry)); - } - return entry_num; -} - -/** - * @pre vm != NULL - * @pre (vm->min_mem_addr <= kernel_load_addr) && (kernel_load_addr < vm->max_mem_addr) - */ -static uint64_t create_zero_page(struct acrn_vm *vm, uint64_t load_params_gpa) -{ - struct zero_page *zeropage, *hva; - struct sw_kernel_info *sw_kernel = &(vm->sw.kernel_info); - struct sw_module_info *bootargs_info = &(vm->sw.bootargs_info); - struct sw_module_info *ramdisk_info = &(vm->sw.ramdisk_info); - uint64_t gpa, addr; - - gpa = BZIMG_ZEROPAGE_GPA(load_params_gpa); - hva = (struct zero_page *)gpa2hva(vm, gpa); - zeropage = hva; - - stac(); - /* clear the zeropage */ - (void)memset(zeropage, 0U, MEM_2K); - -#ifdef CONFIG_MULTIBOOT2 - if (is_sos_vm(vm)) { - struct acrn_boot_info *abi = get_acrn_boot_info(); - - if (boot_from_uefi(abi)) { - struct efi_info *sos_efi_info = &zeropage->boot_efi_info; - uint64_t efi_mmap_gpa = BZIMG_EFIMMAP_GPA(load_params_gpa); - struct efi_memory_desc *efi_mmap_desc = (struct efi_memory_desc *)gpa2hva(vm, efi_mmap_gpa); - uint16_t efi_mmap_desc_nr = create_sos_vm_efi_mmap_desc(vm, efi_mmap_desc); - - sos_efi_info->loader_signature = 0x34364c45; /* "EL64" */ - sos_efi_info->memdesc_version = abi->uefi_info.memdesc_version; - sos_efi_info->memdesc_size = sizeof(struct efi_memory_desc); - sos_efi_info->memmap_size = efi_mmap_desc_nr * sizeof(struct efi_memory_desc); - sos_efi_info->memmap = (uint32_t)efi_mmap_gpa; - sos_efi_info->memmap_hi = (uint32_t)(efi_mmap_gpa >> 32U); - sos_efi_info->systab = abi->uefi_info.systab; - sos_efi_info->systab_hi = abi->uefi_info.systab_hi; - } - } -#endif - /* copy part of the header into the zero page */ - hva = (struct zero_page *)sw_kernel->kernel_src_addr; - (void)memcpy_s(&(zeropage->hdr), sizeof(zeropage->hdr), - &(hva->hdr), sizeof(hva->hdr)); - - /* See if kernel has a RAM disk */ - if (ramdisk_info->src_addr != NULL) { - /* Copy ramdisk load_addr and size in zeropage header structure - */ - addr = (uint64_t)ramdisk_info->load_addr; - zeropage->hdr.ramdisk_addr = (uint32_t)addr; - zeropage->hdr.ramdisk_size = (uint32_t)ramdisk_info->size; - } - - /* Copy bootargs load_addr in zeropage header structure */ - addr = (uint64_t)bootargs_info->load_addr; - zeropage->hdr.bootargs_addr = (uint32_t)addr; - - /* set constant arguments in zero page */ - zeropage->hdr.loader_type = 0xffU; - zeropage->hdr.load_flags |= (1U << 5U); /* quiet */ - - /* Create/add e820 table entries in zeropage */ - zeropage->e820_nentries = (uint8_t)create_zeropage_e820(zeropage, vm); - clac(); - - /* Return Physical Base Address of zeropage */ - return gpa; -} - /** * @pre sw_module != NULL */ -static void load_sw_module(struct acrn_vm *vm, struct sw_module_info *sw_module) +void load_sw_module(struct acrn_vm *vm, struct sw_module_info *sw_module) { if ((sw_module->size != 0) && (sw_module->load_addr != NULL)) { (void)copy_to_gpa(vm, sw_module->src_addr, (uint64_t)sw_module->load_addr, sw_module->size); } } -/** - * @pre vm != NULL - */ -static void prepare_loading_bzimage(struct acrn_vm *vm, struct acrn_vcpu *vcpu, - uint64_t load_params_gpa, uint64_t kernel_load_gpa) -{ - uint32_t i; - uint32_t prot_code_offset, prot_code_size, kernel_entry_offset; - uint8_t setup_sectors; - const struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id); - struct sw_kernel_info *sw_kernel = &(vm->sw.kernel_info); - struct sw_module_info *bootargs_info = &(vm->sw.bootargs_info); - struct sw_module_info *ramdisk_info = &(vm->sw.ramdisk_info); - struct sw_module_info *acpi_info = &(vm->sw.acpi_info); - struct zero_page *zeropage = (struct zero_page *)sw_kernel->kernel_src_addr; - - /* The bzImage file consists of three parts: - * boot_params(i.e. zero page) + real mode setup code + compressed protected mode code - * The compressed proteced mode code start at offset (setup_sectors + 1U) * 512U of bzImage. - * Only protected mode code need to be loaded. - */ - stac(); - setup_sectors = (zeropage->hdr.setup_sects == 0U) ? 4U : zeropage->hdr.setup_sects; - clac(); - prot_code_offset = (uint32_t)(setup_sectors + 1U) * 512U; - prot_code_size = (sw_kernel->kernel_size > prot_code_offset) ? - (sw_kernel->kernel_size - prot_code_offset) : 0U; - - /* Copy the protected mode part kernel code to its run-time location */ - (void)copy_to_gpa(vm, (sw_kernel->kernel_src_addr + prot_code_offset), kernel_load_gpa, prot_code_size); - - if (vm->sw.ramdisk_info.size > 0U) { - /* Use customer specified ramdisk load addr if it is configured in VM configuration, - * otherwise use allocated address calculated by HV. - */ - if (vm_config->os_config.kernel_ramdisk_addr != 0UL) { - vm->sw.ramdisk_info.load_addr = (void *)vm_config->os_config.kernel_ramdisk_addr; - } else { - vm->sw.ramdisk_info.load_addr = (void *)get_initrd_load_addr(vm, kernel_load_gpa); - if (vm->sw.ramdisk_info.load_addr == NULL) { - pr_err("failed to load initrd for VM%d !", vm->vm_id); - } - } - - /* Don't need to load ramdisk if src_addr and load_addr are pointed to same place. */ - if (gpa2hva(vm, (uint64_t)ramdisk_info->load_addr) != ramdisk_info->src_addr) { - load_sw_module(vm, ramdisk_info); - } - } - - bootargs_info->load_addr = (void *)BZIMG_CMDLINE_GPA(load_params_gpa); - - load_sw_module(vm, bootargs_info); - - /* Copy Guest OS ACPI to its load location */ - load_sw_module(vm, acpi_info); - - /* 32bit kernel entry is at where protected mode code loaded */ - kernel_entry_offset = 0U; - if (vcpu->arch.cpu_mode == CPU_MODE_64BIT) { - /* 64bit entry is the 512bytes after the start */ - kernel_entry_offset += 512U; - } - - sw_kernel->kernel_entry_addr = (void *)(kernel_load_gpa + kernel_entry_offset); - - /* Documentation states: ebx=0, edi=0, ebp=0, esi=ptr to - * zeropage - */ - for (i = 0U; i < NUM_GPRS; i++) { - vcpu_set_gpreg(vcpu, i, 0UL); - } - - /* Create Zeropage and copy Physical Base Address of Zeropage - * in RSI - */ - vcpu_set_gpreg(vcpu, CPU_REG_RSI, create_zero_page(vm, load_params_gpa)); - pr_info("%s, RSI pointing to zero page for VM %d at GPA %X", - __func__, vm->vm_id, vcpu_get_gpreg(vcpu, CPU_REG_RSI)); -} - -/** - * @pre vm != NULL - */ -static void prepare_loading_rawimage(struct acrn_vm *vm) -{ - struct sw_kernel_info *sw_kernel = &(vm->sw.kernel_info); - struct sw_module_info *acpi_info = &(vm->sw.acpi_info); - const struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id); - uint64_t kernel_load_gpa; - - /* TODO: GPA 0 load support */ - kernel_load_gpa = vm_config->os_config.kernel_load_addr; - - /* Copy the guest kernel image to its run-time location */ - (void)copy_to_gpa(vm, sw_kernel->kernel_src_addr, kernel_load_gpa, sw_kernel->kernel_size); - - /* Copy Guest OS ACPI to its load location */ - load_sw_module(vm, acpi_info); - - sw_kernel->kernel_entry_addr = (void *)vm_config->os_config.kernel_entry_addr; -} - -static int32_t vm_bzimage_loader(struct acrn_vm *vm) -{ - int32_t ret = -ENOMEM; - /* get primary vcpu */ - struct acrn_vcpu *vcpu = vcpu_from_vid(vm, BSP_CPU_ID); - uint64_t load_params_gpa = find_space_from_ve820(vm, BZIMG_LOAD_PARAMS_SIZE, MEM_4K, MEM_1M); - - if (load_params_gpa != INVALID_GPA) { - uint64_t kernel_load_gpa = (uint64_t)get_bzimage_kernel_load_addr(vm); - - if (kernel_load_gpa != 0UL) { - /* We boot bzImage from protected mode directly */ - init_vcpu_protect_mode_regs(vcpu, BZIMG_INITGDT_GPA(load_params_gpa)); - - prepare_loading_bzimage(vm, vcpu, load_params_gpa, kernel_load_gpa); - - ret = 0; - } - } - - return ret; -} - -static int32_t vm_rawimage_loader(struct acrn_vm *vm) -{ - int32_t ret = 0; - uint64_t vgdt_gpa = 0x800; - - /* - * TODO: - * - We need to initialize the guest bsp registers according to - * guest boot mode (real mode vs protect mode) - * - The memory layout usage is unclear, only GDT might be needed as its boot param. - * currently we only support Zephyr which has no needs on cmdline/e820/efimmap/etc. - * hardcode the vGDT GPA to 0x800 where is not used by Zephyr so far; - */ - init_vcpu_protect_mode_regs(vcpu_from_vid(vm, BSP_CPU_ID), vgdt_gpa); - - prepare_loading_rawimage(vm); - - return ret; -} - /** * @pre vm != NULL */ @@ -490,12 +29,16 @@ int32_t vm_sw_loader(struct acrn_vm *vm) struct acrn_vcpu *vcpu = vcpu_from_vid(vm, BSP_CPU_ID); switch (vm->sw.kernel_type) { +#ifdef CONFIG_GUEST_KERNEL_BZIMAGE case KERNEL_BZIMAGE: ret = vm_bzimage_loader(vm); break; +#endif +#ifdef CONFIG_GUEST_KERNEL_RAWIMAGE case KERNEL_RAWIMAGE: ret = vm_rawimage_loader(vm); break; +#endif default: ret = -EINVAL; break;