Files
acrn-hypervisor/hypervisor/arch/x86/guest/vm.c
Yifan Liu 61afe9e51a hv: vm_load: move vcpu register init to arch_vm_prepare_bsp
For service and pre-launched VMs, the image loaders in ACRN
are functioning the same way as an in-guest bootloader such
as GRUB, which allows ACRN to load a guest image and start
directly from there and therefore skipping the firmware
initialization stage.

To re-use image loader code as much as possible, the image loader
logic is splitted into two stages, the loading stage and
environmental preparation stage. Most part of the loading stage
are common logic, and environmental preparation stage should
be completely arch-specific.

The best place for stage two loader logic is arch_vm_prepare_bsp,
which prepares vcpu register states based on the loaded image
(entry point, load address, etc.).

This commit refactors only the rawimage loader. Other loaders
are left for future improvement.

Tracked-On: #8830
Signed-off-by: Yifan Liu <yifan1.liu@intel.com>
Reviewed-by: Fei Li <fei1.li@intel.com>
Acked-by: Wang Yu1 <yu1.wang@intel.com>
2025-10-30 13:30:32 +08:00

938 lines
26 KiB
C

/*
* Copyright (C) 2018-2025 Intel Corporation.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <types.h>
#include <errno.h>
#include <sprintf.h>
#include <per_cpu.h>
#include <vm.h>
#include <asm/lapic.h>
#include <asm/guest/vm_reset.h>
#include <asm/guest/virq.h>
#include <bits.h>
#include <asm/e820.h>
#include <boot.h>
#include <asm/vtd.h>
#include <reloc.h>
#include <asm/guest/ept.h>
#include <asm/guest/guest_pm.h>
#include <console.h>
#include <ptdev.h>
#include <asm/guest/vmcs.h>
#include <pgtable.h>
#include <asm/mmu.h>
#include <logmsg.h>
#include <vboot.h>
#include <asm/board.h>
#include <asm/sgx.h>
#include <sbuf.h>
#include <asm/pci_dev.h>
#include <vacpi.h>
#include <asm/platform_caps.h>
#include <mmio_dev.h>
#include <asm/trampoline.h>
#include <asm/guest/assign.h>
#include <vgpio.h>
#include <asm/rtcm.h>
#include <asm/irq.h>
#include <uart16550.h>
#include <vcpu.h>
#ifdef CONFIG_SECURITY_VM_FIXUP
#include <quirks/security_vm_fixup.h>
#endif
#include <asm/boot/ld_sym.h>
#include <asm/guest/optee.h>
/* Local variables */
/* pre-assumption: TRUSTY_RAM_SIZE is 2M aligned */
static struct page post_user_vm_sworld_memory[MAX_TRUSTY_VM_NUM][TRUSTY_RAM_SIZE >> PAGE_SHIFT] __aligned(MEM_2M);
void *get_sworld_memory_base(void)
{
return post_user_vm_sworld_memory;
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_lapic_pt_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_LAPIC_PASSTHROUGH) != 0U);
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_pmu_pt_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_PMU_PASSTHROUGH) != 0U);
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_nvmx_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_NVMX_ENABLED) != 0U);
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_vcat_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_VCAT_ENABLED) != 0U);
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_vhwp_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_VHWP) != 0U);
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_vtm_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_VTM) != 0U);
}
/**
* @brief VT-d PI posted mode can possibly be used for PTDEVs assigned
* to this VM if platform supports VT-d PI AND lapic passthru is not configured
* for this VM.
* However, as we can only post single destination IRQ, so meeting these 2 conditions
* does not necessarily mean posted mode will be used for all PTDEVs belonging
* to the VM, unless the IRQ is single-destination for the specific PTDEV
* @pre vm != NULL
*/
bool is_pi_capable(const struct acrn_vm *vm)
{
return (platform_caps.pi && (!is_lapic_pt_configured(vm)));
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool vm_hide_mtrr(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_HIDE_MTRR) != 0U);
}
/**
* @brief Initialize the I/O bitmap for \p vm
*
* @param vm The VM whose I/O bitmap is to be initialized
*/
static void setup_io_bitmap(struct acrn_vm *vm)
{
if (is_service_vm(vm)) {
(void)memset(vm->arch_vm.io_bitmap, 0x00U, PAGE_SIZE * 2U);
} else {
/* block all IO port access from Guest */
(void)memset(vm->arch_vm.io_bitmap, 0xFFU, PAGE_SIZE * 2U);
}
}
/**
* @pre vm != NULL && vm_config != NULL
*/
static void prepare_prelaunched_vm_memmap(struct acrn_vm *vm, const struct acrn_vm_config *vm_config)
{
uint64_t base_hpa;
uint64_t base_gpa;
uint64_t remaining_entry_size;
uint32_t hpa_index;
uint64_t base_size;
uint32_t i;
struct vm_hpa_regions tmp_vm_hpa;
const struct e820_entry *entry;
hpa_index = 0U;
tmp_vm_hpa = vm_config->memory.host_regions[0];
for (i = 0U; i < vm->arch_vm.e820_entry_num; i++) {
entry = &(vm->arch_vm.e820_entries[i]);
if (entry->length == 0UL) {
continue;
} else {
if (is_software_sram_enabled() && (entry->baseaddr == PRE_RTVM_SW_SRAM_BASE_GPA) &&
((vm_config->guest_flags & GUEST_FLAG_RT) != 0U)){
/* pass through Software SRAM to pre-RTVM */
ept_add_mr(vm, (uint64_t *)vm->root_stg2ptp,
get_software_sram_base(), PRE_RTVM_SW_SRAM_BASE_GPA,
get_software_sram_size(), EPT_RWX | EPT_WB);
continue;
}
}
if ((entry->type == E820_TYPE_RESERVED) && (entry->baseaddr > MEM_1M)) {
continue;
}
base_gpa = entry->baseaddr;
remaining_entry_size = entry->length;
while ((hpa_index < vm_config->memory.region_num) && (remaining_entry_size > 0)) {
base_hpa = tmp_vm_hpa.start_hpa;
base_size = min(remaining_entry_size, tmp_vm_hpa.size_hpa);
if (tmp_vm_hpa.size_hpa > remaining_entry_size) {
/* from low to high */
tmp_vm_hpa.start_hpa += base_size;
tmp_vm_hpa.size_hpa -= base_size;
} else {
hpa_index++;
if (hpa_index < vm_config->memory.region_num) {
tmp_vm_hpa = vm_config->memory.host_regions[hpa_index];
}
}
if (entry->type != E820_TYPE_RESERVED) {
ept_add_mr(vm, (uint64_t *)vm->root_stg2ptp, base_hpa, base_gpa,
base_size, EPT_RWX | EPT_WB);
} else {
/* GPAs under 1MB are always backed by physical memory */
ept_add_mr(vm, (uint64_t *)vm->root_stg2ptp, base_hpa, base_gpa,
base_size, EPT_RWX | EPT_UNCACHED);
}
remaining_entry_size -= base_size;
base_gpa += base_size;
}
}
for (i = 0U; i < MAX_MMIO_DEV_NUM; i++) {
/* Now we include the TPM2 event log region in ACPI NVS, so we need to
* delete this potential mapping first.
*/
(void)deassign_mmio_dev(vm, &vm_config->mmiodevs[i]);
(void)assign_mmio_dev(vm, &vm_config->mmiodevs[i]);
#ifdef P2SB_VGPIO_DM_ENABLED
if ((vm_config->pt_p2sb_bar) && (vm_config->mmiodevs[i].res[0].host_pa == P2SB_BAR_ADDR)) {
register_vgpio_handler(vm, &vm_config->mmiodevs[i].res[0]);
}
#endif
}
}
static void deny_pci_bar_access(struct acrn_vm *service_vm, const struct pci_pdev *pdev)
{
uint32_t idx;
struct pci_vbar vbar = {};
uint64_t base = 0UL, size = 0UL, mask;
uint64_t *pml4_page;
pml4_page = (uint64_t *)service_vm->root_stg2ptp;
for ( idx= 0; idx < pdev->nr_bars; idx++) {
vbar.bar_type.bits = pdev->bars[idx].phy_bar;
if (!is_pci_reserved_bar(&vbar)) {
base = pdev->bars[idx].phy_bar;
size = pdev->bars[idx].size_mask;
if (is_pci_mem64lo_bar(&vbar)) {
idx++;
base |= (((uint64_t)pdev->bars[idx].phy_bar) << 32UL);
size |= (((uint64_t)pdev->bars[idx].size_mask) << 32UL);
}
mask = (is_pci_io_bar(&vbar)) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK;
base &= mask;
size &= mask;
size = size & ~(size - 1UL);
if ((base != 0UL)) {
if (is_pci_io_bar(&vbar)) {
base &= 0xffffU;
deny_guest_pio_access(service_vm, base, size);
} else {
/*for passthru device MMIO BAR base must be 4K aligned. This is the requirement of passthru devices.*/
ASSERT((base & PAGE_MASK) != 0U, "%02x:%02x.%d bar[%d] 0x%lx, is not 4K aligned!",
pdev->bdf.bits.b, pdev->bdf.bits.d, pdev->bdf.bits.f, idx, base);
size = round_page_up(size);
ept_del_mr(service_vm, pml4_page, base, size);
}
}
}
}
}
static void deny_pdevs(struct acrn_vm *service_vm, struct acrn_vm_pci_dev_config *pci_devs, uint16_t pci_dev_num)
{
uint16_t i;
for (i = 0; i < pci_dev_num; i++) {
if ( pci_devs[i].pdev != NULL) {
deny_pci_bar_access(service_vm, pci_devs[i].pdev);
}
}
}
static void deny_hv_owned_devices(struct acrn_vm *service_vm)
{
uint16_t pio_address;
uint32_t nbytes, i;
const struct pci_pdev **hv_owned = get_hv_owned_pdevs();
for (i = 0U; i < get_hv_owned_pdev_num(); i++) {
deny_pci_bar_access(service_vm, hv_owned[i]);
}
if (get_pio_dbg_uart_cfg(&pio_address, &nbytes)) {
deny_guest_pio_access(service_vm, pio_address, nbytes);
}
}
/**
* @param[inout] vm pointer to a vm descriptor
*
* @retval 0 on success
*
* @pre vm != NULL
* @pre is_service_vm(vm) == true
*/
static void prepare_service_vm_memmap(struct acrn_vm *vm)
{
uint16_t vm_id;
uint32_t i;
uint64_t hv_hpa;
uint64_t service_vm_high64_max_ram = MEM_4G;
struct acrn_vm_config *vm_config;
uint64_t *pml4_page = (uint64_t *)vm->root_stg2ptp;
struct epc_section* epc_secs;
const struct e820_entry *entry;
uint32_t entries_count = vm->arch_vm.e820_entry_num;
const struct e820_entry *p_e820 = vm->arch_vm.e820_entries;
struct pci_mmcfg_region *pci_mmcfg;
uint64_t trampoline_memory_size = round_page_up((uint64_t)(&ld_trampoline_end - &ld_trampoline_start));
pr_dbg("Service VM e820 layout:\n");
for (i = 0U; i < entries_count; i++) {
entry = p_e820 + i;
pr_dbg("e820 table: %d type: 0x%x", i, entry->type);
pr_dbg("BaseAddress: 0x%016lx length: 0x%016lx\n", entry->baseaddr, entry->length);
service_vm_high64_max_ram = max((entry->baseaddr + entry->length), service_vm_high64_max_ram);
}
/* create real ept map for [0, service_vm_high64_max_ram) with UC */
ept_add_mr(vm, pml4_page, 0UL, 0UL, service_vm_high64_max_ram, EPT_RWX | EPT_UNCACHED);
/* update ram entries to WB attr */
for (i = 0U; i < entries_count; i++) {
entry = p_e820 + i;
if (entry->type == E820_TYPE_RAM) {
ept_modify_mr(vm, pml4_page, entry->baseaddr, entry->length, EPT_WB, EPT_MT_MASK);
}
}
/* Unmap all platform EPC resource from Service VM.
* This part has already been marked as reserved by BIOS in E820
* will cause EPT violation if Service VM accesses EPC resource.
*/
epc_secs = get_phys_epc();
for (i = 0U; (i < MAX_EPC_SECTIONS) && (epc_secs[i].size != 0UL); i++) {
ept_del_mr(vm, pml4_page, epc_secs[i].base, epc_secs[i].size);
}
/* unmap hypervisor itself for safety
* will cause EPT violation if Service VM accesses hv memory
*/
hv_hpa = hva2hpa((void *)(get_hv_image_base()));
ept_del_mr(vm, pml4_page, hv_hpa, get_hv_image_size());
/* unmap prelaunch VM memory */
for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
vm_config = get_vm_config(vm_id);
if (vm_config->load_order == PRE_LAUNCHED_VM) {
for (i = 0; i < vm_config->memory.region_num; i++){
ept_del_mr(vm, pml4_page, vm_config->memory.host_regions[i].start_hpa, vm_config->memory.host_regions[i].size_hpa);
}
/* Remove MMIO/IO bars of pre-launched VM's ptdev */
deny_pdevs(vm, vm_config->pci_devs, vm_config->pci_dev_num);
}
for (i = 0U; i < MAX_MMIO_DEV_NUM; i++) {
(void)deassign_mmio_dev(vm, &vm_config->mmiodevs[i]);
}
}
/* unmap AP trampoline code for security
* This buffer is guaranteed to be page aligned.
*/
ept_del_mr(vm, pml4_page, get_trampoline_start16_paddr(), trampoline_memory_size);
/* unmap PCIe MMCONFIG region since it's owned by hypervisor */
pci_mmcfg = get_mmcfg_region();
ept_del_mr(vm, (uint64_t *)vm->root_stg2ptp, pci_mmcfg->address, get_pci_mmcfg_size(pci_mmcfg));
if (is_software_sram_enabled()) {
/*
* Native Software SRAM resources shall be assigned to either Pre-launched RTVM
* or Service VM. Software SRAM support for Post-launched RTVM is virtualized
* in Service VM.
*
* 1) Native Software SRAM resources are assigned to Pre-launched RTVM:
* - Remove Software SRAM regions from Service VM EPT, to prevent
* Service VM from using clflush to flush the Software SRAM cache.
* - PRE_RTVM_SW_SRAM_MAX_SIZE is the size of Software SRAM that
* Pre-launched RTVM uses, presumed to be starting from Software SRAM base.
* For other cases, PRE_RTVM_SW_SRAM_MAX_SIZE should be defined as 0,
* and no region will be removed from Service VM EPT.
*
* 2) Native Software SRAM resources are assigned to Service VM:
* - Software SRAM regions are added to EPT of Service VM by default
* with memory type UC.
* - But, Service VM need to access Software SRAM regions
* when virtualizing them for Post-launched RTVM.
* - So memory type of Software SRAM regions in EPT shall be updated to EPT_WB.
*/
#if (PRE_RTVM_SW_SRAM_MAX_SIZE > 0U)
ept_del_mr(vm, pml4_page, service_vm_hpa2gpa(get_software_sram_base()), PRE_RTVM_SW_SRAM_MAX_SIZE);
#else
ept_modify_mr(vm, pml4_page, service_vm_hpa2gpa(get_software_sram_base()),
get_software_sram_size(), EPT_WB, EPT_MT_MASK);
#endif
}
/* unmap Intel IOMMU register pages for below reason:
* Service VM can detect IOMMU capability in its ACPI table hence it may access
* IOMMU hardware resources, which is not expected, as IOMMU hardware is owned by hypervisor.
*/
for (i = 0U; i < plat_dmar_info.drhd_count; i++) {
ept_del_mr(vm, pml4_page, plat_dmar_info.drhd_units[i].reg_base_addr, PAGE_SIZE);
}
}
/* Add EPT mapping of EPC reource for the VM */
static void prepare_epc_vm_memmap(struct acrn_vm *vm)
{
struct epc_map* vm_epc_maps;
uint32_t i;
if (is_vsgx_supported(vm->vm_id)) {
vm_epc_maps = get_epc_mapping(vm->vm_id);
for (i = 0U; (i < MAX_EPC_SECTIONS) && (vm_epc_maps[i].size != 0UL); i++) {
ept_add_mr(vm, (uint64_t *)vm->root_stg2ptp, vm_epc_maps[i].hpa,
vm_epc_maps[i].gpa, vm_epc_maps[i].size, EPT_RWX | EPT_WB);
}
}
}
/**
* @brief get bitmap of pCPUs whose vCPUs have LAPIC PT enabled
*
* @param[in] vm pointer to vm data structure
* @pre vm != NULL
*
* @return pCPU bitmap
*/
static uint64_t lapic_pt_enabled_pcpu_bitmap(struct acrn_vm *vm)
{
uint16_t i;
struct acrn_vcpu *vcpu;
uint64_t bitmap = 0UL;
if (is_lapic_pt_configured(vm)) {
foreach_vcpu(i, vm, vcpu) {
if (is_x2apic_enabled(vcpu_vlapic(vcpu))) {
bitmap_set_non_atomic(pcpuid_from_vcpu(vcpu), &bitmap);
}
}
}
return bitmap;
}
void prepare_vm_identical_memmap(struct acrn_vm *vm, uint16_t e820_entry_type, uint64_t prot_orig)
{
const struct e820_entry *entry;
const struct e820_entry *p_e820 = vm->arch_vm.e820_entries;
uint32_t entries_count = vm->arch_vm.e820_entry_num;
uint64_t *pml4_page = (uint64_t *)vm->root_stg2ptp;
uint32_t i;
for (i = 0U; i < entries_count; i++) {
entry = p_e820 + i;
if (entry->type == e820_entry_type) {
ept_add_mr(vm, pml4_page, entry->baseaddr,
entry->baseaddr, entry->length,
prot_orig);
}
}
}
/**
* @pre vm_id < CONFIG_MAX_VM_NUM and should be trusty post-launched VM
*/
int32_t get_sworld_vm_index(uint16_t vm_id)
{
int16_t i;
int32_t vm_idx = MAX_TRUSTY_VM_NUM;
struct acrn_vm_config *vm_config = get_vm_config(vm_id);
if ((vm_config->guest_flags & GUEST_FLAG_SECURE_WORLD_ENABLED) != 0U) {
vm_idx = 0;
for (i = 0; i < vm_id; i++) {
vm_config = get_vm_config(i);
if ((vm_config->guest_flags & GUEST_FLAG_SECURE_WORLD_ENABLED) != 0U) {
vm_idx += 1;
}
}
}
if (vm_idx >= (int32_t)MAX_TRUSTY_VM_NUM) {
pr_err("Can't find sworld memory for vm id: %d", vm_id);
vm_idx = -EINVAL;
}
return vm_idx;
}
int32_t arch_init_vm(struct acrn_vm *vm, struct acrn_vm_config *vm_config)
{
int32_t status = 0;
uint16_t vm_id = vm->vm_id;
#ifdef CONFIG_SECURITY_VM_FIXUP
security_vm_fixup(vm_id);
#endif
/* TODO: Move this to common when guest memory is implemented */
init_ept_pgtable(&vm->stg2_pgtable, vm->vm_id);
vm->root_stg2ptp = pgtable_create_root(&vm->stg2_pgtable);
if (is_service_vm(vm)) {
/* Only for Service VM */
create_service_vm_e820(vm);
prepare_service_vm_memmap(vm);
status = init_vm_boot_info(vm);
} else {
/* For PRE_LAUNCHED_VM and POST_LAUNCHED_VM */
if ((vm_config->guest_flags & GUEST_FLAG_SECURE_WORLD_ENABLED) != 0U) {
vm->arch_vm.sworld_control.flag.supported = 1U;
}
if (vm->arch_vm.sworld_control.flag.supported != 0UL) {
int32_t vm_idx = get_sworld_vm_index(vm_id);
if (vm_idx >= 0)
{
ept_add_mr(vm, (uint64_t *)vm->root_stg2ptp,
hva2hpa(post_user_vm_sworld_memory[vm_idx]),
TRUSTY_EPT_REBASE_GPA, TRUSTY_RAM_SIZE, EPT_WB | EPT_RWX);
} else {
status = -EINVAL;
}
}
if (status == 0) {
if (vm_config->load_order == PRE_LAUNCHED_VM) {
/*
* If a prelaunched VM has the flag GUEST_FLAG_TEE set then it
* is a special prelaunched VM called TEE VM which need special
* memmap, e.g. mapping the REE VM into its space. Otherwise,
* just use the standard preplaunched VM memmap.
*/
if ((vm_config->guest_flags & GUEST_FLAG_TEE) != 0U) {
prepare_tee_vm_memmap(vm, vm_config);
} else {
create_prelaunched_vm_e820(vm);
prepare_prelaunched_vm_memmap(vm, vm_config);
}
status = init_vm_boot_info(vm);
}
}
}
if (status == 0) {
prepare_epc_vm_memmap(vm);
spinlock_init(&vm->arch_vm.vlapic_mode_lock);
spinlock_init(&vm->arch_vm.iwkey_backup_lock);
vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
vm->arch_vm.intr_inject_delay_delta = 0UL;
vm->arch_vm.vcpuid_entry_nr = 0U;
/* Set up IO bit-mask such that VM exit occurs on
* selected IO ranges
*/
setup_io_bitmap(vm);
init_guest_pm(vm);
if (is_nvmx_configured(vm)) {
init_nested_vmx(vm);
}
if (!is_lapic_pt_configured(vm)) {
vpic_init(vm);
}
if (is_rt_vm(vm) || !is_postlaunched_vm(vm)) {
vrtc_init(vm);
}
if (is_service_vm(vm)) {
deny_hv_owned_devices(vm);
}
#ifdef CONFIG_SECURITY_VM_FIXUP
passthrough_smbios(vm, get_acrn_boot_info());
#endif
status = init_vpci(vm);
if (status == 0) {
enable_iommu();
/* Create virtual uart;*/
init_legacy_vuarts(vm, vm_config->vuart);
register_reset_port_handler(vm);
/* vpic wire_mode default is INTR */
vm->arch_vm.wire_mode = VPIC_WIRE_INTR;
/* Init full emulated vIOAPIC instance:
* Present a virtual IOAPIC to guest, as a placeholder interrupt controller,
* even if the guest uses PT LAPIC. This is to satisfy the guest OSes,
* in some cases, though the functionality of vIOAPIC doesn't work.
*/
vioapic_init(vm);
status = set_vcpuid_entries(vm);
}
}
if (status == 0) {
uint16_t i;
for (i = 0U; i < vm_config->pt_intx_num; i++) {
status = ptirq_add_intx_remapping(vm, vm_config->pt_intx[i].virt_gsi,
vm_config->pt_intx[i].phys_gsi, false);
if (status != 0) {
ptirq_remove_configured_intx_remappings(vm);
break;
}
}
}
if ((status != 0) && (vm->root_stg2ptp != NULL)) {
(void)memset(vm->root_stg2ptp, 0U, PAGE_SIZE);
}
if (is_prelaunched_vm(vm)) {
build_vrsdp(vm);
}
return status;
}
static int32_t offline_lapic_pt_enabled_pcpus(const struct acrn_vm *vm, uint64_t pcpu_mask)
{
int32_t ret = 0;
uint16_t i;
uint64_t mask = pcpu_mask;
const struct acrn_vcpu *vcpu = NULL;
uint16_t this_pcpu_id = get_pcpu_id();
if (bitmap_test(this_pcpu_id, &mask)) {
bitmap_clear_non_atomic(this_pcpu_id, &mask);
if (vm->state == VM_POWERED_OFF) {
/*
* If the current pcpu needs to offline itself,
* it will be done after shutdown_vm() completes
* in the idle thread.
*/
make_pcpu_offline(this_pcpu_id);
} else {
/*
* The current pcpu can't reset itself
*/
pr_warn("%s: cannot offline self(%u)",
__func__, this_pcpu_id);
ret = -EINVAL;
}
}
foreach_vcpu(i, vm, vcpu) {
if (bitmap_test(pcpuid_from_vcpu(vcpu), &mask)) {
make_pcpu_offline(pcpuid_from_vcpu(vcpu));
}
}
wait_pcpus_offline(mask);
if (!start_pcpus(mask)) {
pr_fatal("Failed to start all cpus in mask(0x%lx)", mask);
ret = -ETIMEDOUT;
}
return ret;
}
/*
* @pre vm != NULL
* @pre vm->state == VM_PAUSED
*/
int32_t arch_deinit_vm(struct acrn_vm *vm)
{
uint64_t mask;
int32_t ret = 0;
ptirq_remove_configured_intx_remappings(vm);
deinit_legacy_vuarts(vm);
deinit_vpci(vm);
deinit_emul_io(vm);
/* Free EPT allocated resources assigned to VM */
destroy_ept(vm);
mask = lapic_pt_enabled_pcpu_bitmap(vm);
if (mask != 0UL) {
ret = offline_lapic_pt_enabled_pcpus(vm, mask);
}
#ifdef CONFIG_HYPERV_ENABLED
hyperv_page_destory(vm);
#endif
/* Return status to caller */
return ret;
}
/**
* @pre bsp != NULL
* @pre vm->state == VM_CREATED
*/
void arch_vm_prepare_bsp(struct acrn_vcpu *bsp)
{
struct acrn_vm *vm = bsp->vm;
uint64_t vgdt_gpa;
if (!is_postlaunched_vm(vm)) {
vcpu_set_rip(bsp, (uint64_t)bsp->vm->sw.kernel_info.kernel_entry_addr);
if (vm->sw.kernel_type == KERNEL_RAWIMAGE) {
vgdt_gpa = 0x800;
/*
* TODO:
* - We need to initialize the guest BSP(boot strap processor) registers according to
* guest boot mode (real mode vs protect mode)
* - The memory layout usage is unclear, only GDT might be needed as its boot param.
* currently we only support Zephyr which has no needs on cmdline/e820/efimmap/etc.
* hardcode the vGDT GPA to 0x800 where is not used by Zephyr so far;
*/
init_vcpu_protect_mode_regs(bsp, vgdt_gpa);
}
/* TODO: Move vcpu state setting operations from
* other loaders (i.e., bzimage loaders, elf loaders) here */
}
/* For post launched VMs, vbsp init is done through separate hypercall */
vcpu_make_request(bsp, ACRN_REQUEST_INIT_VMCS);
}
/**
* @pre vm != NULL
* @pre vm->state == VM_PAUSED
*/
int32_t arch_reset_vm(struct acrn_vm *vm)
{
uint16_t i;
uint64_t mask;
struct acrn_vcpu *vcpu = NULL;
int32_t ret = 0;
mask = lapic_pt_enabled_pcpu_bitmap(vm);
if (mask != 0UL) {
ret = offline_lapic_pt_enabled_pcpus(vm, mask);
}
foreach_vcpu(i, vm, vcpu) {
reset_vcpu(vcpu);
}
/*
* Set VM vLAPIC state to VM_VLAPIC_XAPIC
*/
vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
reset_vm_ioreqs(vm);
reset_vioapics(vm);
destroy_secure_world(vm, false);
vm->arch_vm.sworld_control.flag.active = 0UL;
vm->arch_vm.iwkey_backup_status = 0UL;
return ret;
}
/**
* @brief Resume vm from S3 state
*
* To resume vm after guest enter S3 state:
* - reset BSP
* - BSP will be put to real mode with entry set as wakeup_vec
* - init_vmcs BSP. We could call init_vmcs here because we know current
* pcpu is mapped to BSP of vm.
*
* @vm[in] vm pointer to vm data structure
* @wakeup_vec[in] The resume address of vm
*
* @pre vm != NULL
* @pre is_service_vm(vm) && vm->state == VM_PAUSED
*/
void resume_vm_from_s3(struct acrn_vm *vm, uint32_t wakeup_vec)
{
struct acrn_vcpu *bsp = vcpu_from_vid(vm, BSP_CPU_ID);
reset_vm(vm);
/* When Service VM resume from S3, it will return to real mode
* with entry set to wakeup_vec.
*/
set_vcpu_startup_entry(bsp, wakeup_vec);
vm->state = VM_RUNNING;
init_vmcs(bsp);
launch_vcpu(bsp);
}
/*
* @brief Update state of vLAPICs of a VM
* vLAPICs of VM switch between modes in an asynchronous fashion. This API
* captures the "transition" state triggered when one vLAPIC switches mode.
* When the VM is created, the state is set to "xAPIC" as all vLAPICs are setup
* in xAPIC mode.
*
* Upon reset, all LAPICs switch to xAPIC mode accroding to SDM 10.12.5
* Considering VM uses x2apic mode for vLAPIC, in reset or shutdown flow, vLAPIC state
* moves to "xAPIC" directly without going thru "transition".
*
* VM_VLAPIC_X2APIC - All the online vCPUs/vLAPICs of this VM use x2APIC mode
* VM_VLAPIC_XAPIC - All the online vCPUs/vLAPICs of this VM use xAPIC mode
* VM_VLAPIC_DISABLED - All the online vCPUs/vLAPICs of this VM are in Disabled mode
* VM_VLAPIC_TRANSITION - Online vCPUs/vLAPICs of this VM are in between transistion
*
* @pre vm != NULL
*/
void update_vm_vlapic_state(struct acrn_vm *vm)
{
uint16_t i;
struct acrn_vcpu *vcpu;
uint16_t vcpus_in_x2apic, vcpus_in_xapic;
enum vm_vlapic_mode vlapic_mode = VM_VLAPIC_XAPIC;
vcpus_in_x2apic = 0U;
vcpus_in_xapic = 0U;
spinlock_obtain(&vm->arch_vm.vlapic_mode_lock);
foreach_vcpu(i, vm, vcpu) {
/* Skip vCPU in state outside of VCPU_RUNNING as it may be offline. */
if (vcpu->state == VCPU_RUNNING) {
if (is_x2apic_enabled(vcpu_vlapic(vcpu))) {
vcpus_in_x2apic++;
} else if (is_xapic_enabled(vcpu_vlapic(vcpu))) {
vcpus_in_xapic++;
} else {
/*
* vCPU is using vLAPIC in Disabled mode
*/
}
}
}
if ((vcpus_in_x2apic == 0U) && (vcpus_in_xapic == 0U)) {
/*
* Check if the counts vcpus_in_x2apic and vcpus_in_xapic are zero
* VM_VLAPIC_DISABLED
*/
vlapic_mode = VM_VLAPIC_DISABLED;
} else if ((vcpus_in_x2apic != 0U) && (vcpus_in_xapic != 0U)) {
/*
* Check if the counts vcpus_in_x2apic and vcpus_in_xapic are non-zero
* VM_VLAPIC_TRANSITION
*/
vlapic_mode = VM_VLAPIC_TRANSITION;
} else if (vcpus_in_x2apic != 0U) {
/*
* Check if the counts vcpus_in_x2apic is non-zero
* VM_VLAPIC_X2APIC
*/
vlapic_mode = VM_VLAPIC_X2APIC;
} else {
/*
* Count vcpus_in_xapic is non-zero
* VM_VLAPIC_XAPIC
*/
vlapic_mode = VM_VLAPIC_XAPIC;
}
vm->arch_vm.vlapic_mode = vlapic_mode;
spinlock_release(&vm->arch_vm.vlapic_mode_lock);
}
/*
* @brief Check mode of vLAPICs of a VM
*
* @pre vm != NULL
*/
enum vm_vlapic_mode check_vm_vlapic_mode(const struct acrn_vm *vm)
{
enum vm_vlapic_mode vlapic_mode;
vlapic_mode = vm->arch_vm.vlapic_mode;
return vlapic_mode;
}
void arch_trigger_level_intr(struct acrn_vm *vm, uint32_t irq, bool assert)
{
union ioapic_rte rte;
uint32_t operation;
vioapic_get_rte(vm, irq, &rte);
/* TODO:
* Here should assert vuart irq according to vCOM1_IRQ polarity.
* The best way is to get the polarity info from ACPI table.
* Here we just get the info from vioapic configuration.
* based on this, we can still have irq storm during guest
* modify the vioapic setting, as it's only for debug uart,
* we want to make it as an known issue.
*/
if (rte.bits.intr_polarity == IOAPIC_RTE_INTPOL_ALO) {
operation = assert ? GSI_SET_LOW : GSI_SET_HIGH;
} else {
operation = assert ? GSI_SET_HIGH : GSI_SET_LOW;
}
vpic_set_irqline(vm_pic(vm), irq, operation);
vioapic_set_irqline_lock(vm, irq, operation);
}