Files
acrn-hypervisor/hypervisor/arch/x86/guest/vm.c
Yifan Liu 127c2074a0 hv: vm: Move some common helpers to common scope
No logic change. Only movement.

Tracked-On: #8830
Signed-off-by: Yifan Liu <yifan1.liu@intel.com>
Reviewed-by: Fei Li <fei1.li@intel.com>
Acked-by: Wang Yu1 <yu1.wang@intel.com>
2025-10-30 13:30:32 +08:00

911 lines
25 KiB
C

/*
* Copyright (C) 2018-2025 Intel Corporation.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <types.h>
#include <errno.h>
#include <sprintf.h>
#include <per_cpu.h>
#include <vm.h>
#include <asm/lapic.h>
#include <asm/guest/vm_reset.h>
#include <asm/guest/virq.h>
#include <bits.h>
#include <asm/e820.h>
#include <boot.h>
#include <asm/vtd.h>
#include <reloc.h>
#include <asm/guest/ept.h>
#include <asm/guest/guest_pm.h>
#include <console.h>
#include <ptdev.h>
#include <asm/guest/vmcs.h>
#include <pgtable.h>
#include <asm/mmu.h>
#include <logmsg.h>
#include <vboot.h>
#include <asm/board.h>
#include <asm/sgx.h>
#include <sbuf.h>
#include <asm/pci_dev.h>
#include <vacpi.h>
#include <asm/platform_caps.h>
#include <mmio_dev.h>
#include <asm/trampoline.h>
#include <asm/guest/assign.h>
#include <vgpio.h>
#include <asm/rtcm.h>
#include <asm/irq.h>
#include <uart16550.h>
#include <vcpu.h>
#ifdef CONFIG_SECURITY_VM_FIXUP
#include <quirks/security_vm_fixup.h>
#endif
#include <asm/boot/ld_sym.h>
#include <asm/guest/optee.h>
/* Local variables */
/* pre-assumption: TRUSTY_RAM_SIZE is 2M aligned */
static struct page post_user_vm_sworld_memory[MAX_TRUSTY_VM_NUM][TRUSTY_RAM_SIZE >> PAGE_SHIFT] __aligned(MEM_2M);
void *get_sworld_memory_base(void)
{
return post_user_vm_sworld_memory;
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_lapic_pt_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_LAPIC_PASSTHROUGH) != 0U);
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_pmu_pt_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_PMU_PASSTHROUGH) != 0U);
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_nvmx_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_NVMX_ENABLED) != 0U);
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_vcat_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_VCAT_ENABLED) != 0U);
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_vhwp_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_VHWP) != 0U);
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_vtm_configured(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_VTM) != 0U);
}
/**
* @brief VT-d PI posted mode can possibly be used for PTDEVs assigned
* to this VM if platform supports VT-d PI AND lapic passthru is not configured
* for this VM.
* However, as we can only post single destination IRQ, so meeting these 2 conditions
* does not necessarily mean posted mode will be used for all PTDEVs belonging
* to the VM, unless the IRQ is single-destination for the specific PTDEV
* @pre vm != NULL
*/
bool is_pi_capable(const struct acrn_vm *vm)
{
return (platform_caps.pi && (!is_lapic_pt_configured(vm)));
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/
bool vm_hide_mtrr(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
return ((vm_config->guest_flags & GUEST_FLAG_HIDE_MTRR) != 0U);
}
/**
* @brief Initialize the I/O bitmap for \p vm
*
* @param vm The VM whose I/O bitmap is to be initialized
*/
static void setup_io_bitmap(struct acrn_vm *vm)
{
if (is_service_vm(vm)) {
(void)memset(vm->arch_vm.io_bitmap, 0x00U, PAGE_SIZE * 2U);
} else {
/* block all IO port access from Guest */
(void)memset(vm->arch_vm.io_bitmap, 0xFFU, PAGE_SIZE * 2U);
}
}
/**
* @pre vm != NULL && vm_config != NULL
*/
static void prepare_prelaunched_vm_memmap(struct acrn_vm *vm, const struct acrn_vm_config *vm_config)
{
uint64_t base_hpa;
uint64_t base_gpa;
uint64_t remaining_entry_size;
uint32_t hpa_index;
uint64_t base_size;
uint32_t i;
struct vm_hpa_regions tmp_vm_hpa;
const struct e820_entry *entry;
hpa_index = 0U;
tmp_vm_hpa = vm_config->memory.host_regions[0];
for (i = 0U; i < vm->arch_vm.e820_entry_num; i++) {
entry = &(vm->arch_vm.e820_entries[i]);
if (entry->length == 0UL) {
continue;
} else {
if (is_software_sram_enabled() && (entry->baseaddr == PRE_RTVM_SW_SRAM_BASE_GPA) &&
((vm_config->guest_flags & GUEST_FLAG_RT) != 0U)){
/* pass through Software SRAM to pre-RTVM */
ept_add_mr(vm, (uint64_t *)vm->root_stg2ptp,
get_software_sram_base(), PRE_RTVM_SW_SRAM_BASE_GPA,
get_software_sram_size(), EPT_RWX | EPT_WB);
continue;
}
}
if ((entry->type == E820_TYPE_RESERVED) && (entry->baseaddr > MEM_1M)) {
continue;
}
base_gpa = entry->baseaddr;
remaining_entry_size = entry->length;
while ((hpa_index < vm_config->memory.region_num) && (remaining_entry_size > 0)) {
base_hpa = tmp_vm_hpa.start_hpa;
base_size = min(remaining_entry_size, tmp_vm_hpa.size_hpa);
if (tmp_vm_hpa.size_hpa > remaining_entry_size) {
/* from low to high */
tmp_vm_hpa.start_hpa += base_size;
tmp_vm_hpa.size_hpa -= base_size;
} else {
hpa_index++;
if (hpa_index < vm_config->memory.region_num) {
tmp_vm_hpa = vm_config->memory.host_regions[hpa_index];
}
}
if (entry->type != E820_TYPE_RESERVED) {
ept_add_mr(vm, (uint64_t *)vm->root_stg2ptp, base_hpa, base_gpa,
base_size, EPT_RWX | EPT_WB);
} else {
/* GPAs under 1MB are always backed by physical memory */
ept_add_mr(vm, (uint64_t *)vm->root_stg2ptp, base_hpa, base_gpa,
base_size, EPT_RWX | EPT_UNCACHED);
}
remaining_entry_size -= base_size;
base_gpa += base_size;
}
}
for (i = 0U; i < MAX_MMIO_DEV_NUM; i++) {
/* Now we include the TPM2 event log region in ACPI NVS, so we need to
* delete this potential mapping first.
*/
(void)deassign_mmio_dev(vm, &vm_config->mmiodevs[i]);
(void)assign_mmio_dev(vm, &vm_config->mmiodevs[i]);
#ifdef P2SB_VGPIO_DM_ENABLED
if ((vm_config->pt_p2sb_bar) && (vm_config->mmiodevs[i].res[0].host_pa == P2SB_BAR_ADDR)) {
register_vgpio_handler(vm, &vm_config->mmiodevs[i].res[0]);
}
#endif
}
}
static void deny_pci_bar_access(struct acrn_vm *service_vm, const struct pci_pdev *pdev)
{
uint32_t idx;
struct pci_vbar vbar = {};
uint64_t base = 0UL, size = 0UL, mask;
uint64_t *pml4_page;
pml4_page = (uint64_t *)service_vm->root_stg2ptp;
for ( idx= 0; idx < pdev->nr_bars; idx++) {
vbar.bar_type.bits = pdev->bars[idx].phy_bar;
if (!is_pci_reserved_bar(&vbar)) {
base = pdev->bars[idx].phy_bar;
size = pdev->bars[idx].size_mask;
if (is_pci_mem64lo_bar(&vbar)) {
idx++;
base |= (((uint64_t)pdev->bars[idx].phy_bar) << 32UL);
size |= (((uint64_t)pdev->bars[idx].size_mask) << 32UL);
}
mask = (is_pci_io_bar(&vbar)) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK;
base &= mask;
size &= mask;
size = size & ~(size - 1UL);
if ((base != 0UL)) {
if (is_pci_io_bar(&vbar)) {
base &= 0xffffU;
deny_guest_pio_access(service_vm, base, size);
} else {
/*for passthru device MMIO BAR base must be 4K aligned. This is the requirement of passthru devices.*/
ASSERT((base & PAGE_MASK) != 0U, "%02x:%02x.%d bar[%d] 0x%lx, is not 4K aligned!",
pdev->bdf.bits.b, pdev->bdf.bits.d, pdev->bdf.bits.f, idx, base);
size = round_page_up(size);
ept_del_mr(service_vm, pml4_page, base, size);
}
}
}
}
}
static void deny_pdevs(struct acrn_vm *service_vm, struct acrn_vm_pci_dev_config *pci_devs, uint16_t pci_dev_num)
{
uint16_t i;
for (i = 0; i < pci_dev_num; i++) {
if ( pci_devs[i].pdev != NULL) {
deny_pci_bar_access(service_vm, pci_devs[i].pdev);
}
}
}
static void deny_hv_owned_devices(struct acrn_vm *service_vm)
{
uint16_t pio_address;
uint32_t nbytes, i;
const struct pci_pdev **hv_owned = get_hv_owned_pdevs();
for (i = 0U; i < get_hv_owned_pdev_num(); i++) {
deny_pci_bar_access(service_vm, hv_owned[i]);
}
if (get_pio_dbg_uart_cfg(&pio_address, &nbytes)) {
deny_guest_pio_access(service_vm, pio_address, nbytes);
}
}
/**
* @param[inout] vm pointer to a vm descriptor
*
* @retval 0 on success
*
* @pre vm != NULL
* @pre is_service_vm(vm) == true
*/
static void prepare_service_vm_memmap(struct acrn_vm *vm)
{
uint16_t vm_id;
uint32_t i;
uint64_t hv_hpa;
uint64_t service_vm_high64_max_ram = MEM_4G;
struct acrn_vm_config *vm_config;
uint64_t *pml4_page = (uint64_t *)vm->root_stg2ptp;
struct epc_section* epc_secs;
const struct e820_entry *entry;
uint32_t entries_count = vm->arch_vm.e820_entry_num;
const struct e820_entry *p_e820 = vm->arch_vm.e820_entries;
struct pci_mmcfg_region *pci_mmcfg;
uint64_t trampoline_memory_size = round_page_up((uint64_t)(&ld_trampoline_end - &ld_trampoline_start));
pr_dbg("Service VM e820 layout:\n");
for (i = 0U; i < entries_count; i++) {
entry = p_e820 + i;
pr_dbg("e820 table: %d type: 0x%x", i, entry->type);
pr_dbg("BaseAddress: 0x%016lx length: 0x%016lx\n", entry->baseaddr, entry->length);
service_vm_high64_max_ram = max((entry->baseaddr + entry->length), service_vm_high64_max_ram);
}
/* create real ept map for [0, service_vm_high64_max_ram) with UC */
ept_add_mr(vm, pml4_page, 0UL, 0UL, service_vm_high64_max_ram, EPT_RWX | EPT_UNCACHED);
/* update ram entries to WB attr */
for (i = 0U; i < entries_count; i++) {
entry = p_e820 + i;
if (entry->type == E820_TYPE_RAM) {
ept_modify_mr(vm, pml4_page, entry->baseaddr, entry->length, EPT_WB, EPT_MT_MASK);
}
}
/* Unmap all platform EPC resource from Service VM.
* This part has already been marked as reserved by BIOS in E820
* will cause EPT violation if Service VM accesses EPC resource.
*/
epc_secs = get_phys_epc();
for (i = 0U; (i < MAX_EPC_SECTIONS) && (epc_secs[i].size != 0UL); i++) {
ept_del_mr(vm, pml4_page, epc_secs[i].base, epc_secs[i].size);
}
/* unmap hypervisor itself for safety
* will cause EPT violation if Service VM accesses hv memory
*/
hv_hpa = hva2hpa((void *)(get_hv_image_base()));
ept_del_mr(vm, pml4_page, hv_hpa, get_hv_image_size());
/* unmap prelaunch VM memory */
for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
vm_config = get_vm_config(vm_id);
if (vm_config->load_order == PRE_LAUNCHED_VM) {
for (i = 0; i < vm_config->memory.region_num; i++){
ept_del_mr(vm, pml4_page, vm_config->memory.host_regions[i].start_hpa, vm_config->memory.host_regions[i].size_hpa);
}
/* Remove MMIO/IO bars of pre-launched VM's ptdev */
deny_pdevs(vm, vm_config->pci_devs, vm_config->pci_dev_num);
}
for (i = 0U; i < MAX_MMIO_DEV_NUM; i++) {
(void)deassign_mmio_dev(vm, &vm_config->mmiodevs[i]);
}
}
/* unmap AP trampoline code for security
* This buffer is guaranteed to be page aligned.
*/
ept_del_mr(vm, pml4_page, get_trampoline_start16_paddr(), trampoline_memory_size);
/* unmap PCIe MMCONFIG region since it's owned by hypervisor */
pci_mmcfg = get_mmcfg_region();
ept_del_mr(vm, (uint64_t *)vm->root_stg2ptp, pci_mmcfg->address, get_pci_mmcfg_size(pci_mmcfg));
if (is_software_sram_enabled()) {
/*
* Native Software SRAM resources shall be assigned to either Pre-launched RTVM
* or Service VM. Software SRAM support for Post-launched RTVM is virtualized
* in Service VM.
*
* 1) Native Software SRAM resources are assigned to Pre-launched RTVM:
* - Remove Software SRAM regions from Service VM EPT, to prevent
* Service VM from using clflush to flush the Software SRAM cache.
* - PRE_RTVM_SW_SRAM_MAX_SIZE is the size of Software SRAM that
* Pre-launched RTVM uses, presumed to be starting from Software SRAM base.
* For other cases, PRE_RTVM_SW_SRAM_MAX_SIZE should be defined as 0,
* and no region will be removed from Service VM EPT.
*
* 2) Native Software SRAM resources are assigned to Service VM:
* - Software SRAM regions are added to EPT of Service VM by default
* with memory type UC.
* - But, Service VM need to access Software SRAM regions
* when virtualizing them for Post-launched RTVM.
* - So memory type of Software SRAM regions in EPT shall be updated to EPT_WB.
*/
#if (PRE_RTVM_SW_SRAM_MAX_SIZE > 0U)
ept_del_mr(vm, pml4_page, service_vm_hpa2gpa(get_software_sram_base()), PRE_RTVM_SW_SRAM_MAX_SIZE);
#else
ept_modify_mr(vm, pml4_page, service_vm_hpa2gpa(get_software_sram_base()),
get_software_sram_size(), EPT_WB, EPT_MT_MASK);
#endif
}
/* unmap Intel IOMMU register pages for below reason:
* Service VM can detect IOMMU capability in its ACPI table hence it may access
* IOMMU hardware resources, which is not expected, as IOMMU hardware is owned by hypervisor.
*/
for (i = 0U; i < plat_dmar_info.drhd_count; i++) {
ept_del_mr(vm, pml4_page, plat_dmar_info.drhd_units[i].reg_base_addr, PAGE_SIZE);
}
}
/* Add EPT mapping of EPC reource for the VM */
static void prepare_epc_vm_memmap(struct acrn_vm *vm)
{
struct epc_map* vm_epc_maps;
uint32_t i;
if (is_vsgx_supported(vm->vm_id)) {
vm_epc_maps = get_epc_mapping(vm->vm_id);
for (i = 0U; (i < MAX_EPC_SECTIONS) && (vm_epc_maps[i].size != 0UL); i++) {
ept_add_mr(vm, (uint64_t *)vm->root_stg2ptp, vm_epc_maps[i].hpa,
vm_epc_maps[i].gpa, vm_epc_maps[i].size, EPT_RWX | EPT_WB);
}
}
}
/**
* @brief get bitmap of pCPUs whose vCPUs have LAPIC PT enabled
*
* @param[in] vm pointer to vm data structure
* @pre vm != NULL
*
* @return pCPU bitmap
*/
static uint64_t lapic_pt_enabled_pcpu_bitmap(struct acrn_vm *vm)
{
uint16_t i;
struct acrn_vcpu *vcpu;
uint64_t bitmap = 0UL;
if (is_lapic_pt_configured(vm)) {
foreach_vcpu(i, vm, vcpu) {
if (is_x2apic_enabled(vcpu_vlapic(vcpu))) {
bitmap_set_non_atomic(pcpuid_from_vcpu(vcpu), &bitmap);
}
}
}
return bitmap;
}
void prepare_vm_identical_memmap(struct acrn_vm *vm, uint16_t e820_entry_type, uint64_t prot_orig)
{
const struct e820_entry *entry;
const struct e820_entry *p_e820 = vm->arch_vm.e820_entries;
uint32_t entries_count = vm->arch_vm.e820_entry_num;
uint64_t *pml4_page = (uint64_t *)vm->root_stg2ptp;
uint32_t i;
for (i = 0U; i < entries_count; i++) {
entry = p_e820 + i;
if (entry->type == e820_entry_type) {
ept_add_mr(vm, pml4_page, entry->baseaddr,
entry->baseaddr, entry->length,
prot_orig);
}
}
}
/**
* @pre vm_id < CONFIG_MAX_VM_NUM and should be trusty post-launched VM
*/
int32_t get_sworld_vm_index(uint16_t vm_id)
{
int16_t i;
int32_t vm_idx = MAX_TRUSTY_VM_NUM;
struct acrn_vm_config *vm_config = get_vm_config(vm_id);
if ((vm_config->guest_flags & GUEST_FLAG_SECURE_WORLD_ENABLED) != 0U) {
vm_idx = 0;
for (i = 0; i < vm_id; i++) {
vm_config = get_vm_config(i);
if ((vm_config->guest_flags & GUEST_FLAG_SECURE_WORLD_ENABLED) != 0U) {
vm_idx += 1;
}
}
}
if (vm_idx >= (int32_t)MAX_TRUSTY_VM_NUM) {
pr_err("Can't find sworld memory for vm id: %d", vm_id);
vm_idx = -EINVAL;
}
return vm_idx;
}
int32_t arch_init_vm(struct acrn_vm *vm, struct acrn_vm_config *vm_config)
{
int32_t status = 0;
uint16_t vm_id = vm->vm_id;
#ifdef CONFIG_SECURITY_VM_FIXUP
security_vm_fixup(vm_id);
#endif
/* TODO: Move this to common when guest memory is implemented */
init_ept_pgtable(&vm->stg2_pgtable, vm->vm_id);
vm->root_stg2ptp = pgtable_create_root(&vm->stg2_pgtable);
if (is_service_vm(vm)) {
/* Only for Service VM */
create_service_vm_e820(vm);
prepare_service_vm_memmap(vm);
status = init_vm_boot_info(vm);
} else {
/* For PRE_LAUNCHED_VM and POST_LAUNCHED_VM */
if ((vm_config->guest_flags & GUEST_FLAG_SECURE_WORLD_ENABLED) != 0U) {
vm->arch_vm.sworld_control.flag.supported = 1U;
}
if (vm->arch_vm.sworld_control.flag.supported != 0UL) {
int32_t vm_idx = get_sworld_vm_index(vm_id);
if (vm_idx >= 0)
{
ept_add_mr(vm, (uint64_t *)vm->root_stg2ptp,
hva2hpa(post_user_vm_sworld_memory[vm_idx]),
TRUSTY_EPT_REBASE_GPA, TRUSTY_RAM_SIZE, EPT_WB | EPT_RWX);
} else {
status = -EINVAL;
}
}
if (status == 0) {
if (vm_config->load_order == PRE_LAUNCHED_VM) {
/*
* If a prelaunched VM has the flag GUEST_FLAG_TEE set then it
* is a special prelaunched VM called TEE VM which need special
* memmap, e.g. mapping the REE VM into its space. Otherwise,
* just use the standard preplaunched VM memmap.
*/
if ((vm_config->guest_flags & GUEST_FLAG_TEE) != 0U) {
prepare_tee_vm_memmap(vm, vm_config);
} else {
create_prelaunched_vm_e820(vm);
prepare_prelaunched_vm_memmap(vm, vm_config);
}
status = init_vm_boot_info(vm);
}
}
}
if (status == 0) {
prepare_epc_vm_memmap(vm);
spinlock_init(&vm->arch_vm.vlapic_mode_lock);
spinlock_init(&vm->arch_vm.iwkey_backup_lock);
vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
vm->arch_vm.intr_inject_delay_delta = 0UL;
vm->arch_vm.vcpuid_entry_nr = 0U;
/* Set up IO bit-mask such that VM exit occurs on
* selected IO ranges
*/
setup_io_bitmap(vm);
init_guest_pm(vm);
if (is_nvmx_configured(vm)) {
init_nested_vmx(vm);
}
if (!is_lapic_pt_configured(vm)) {
vpic_init(vm);
}
if (is_rt_vm(vm) || !is_postlaunched_vm(vm)) {
vrtc_init(vm);
}
if (is_service_vm(vm)) {
deny_hv_owned_devices(vm);
}
#ifdef CONFIG_SECURITY_VM_FIXUP
passthrough_smbios(vm, get_acrn_boot_info());
#endif
status = init_vpci(vm);
if (status == 0) {
enable_iommu();
/* Create virtual uart;*/
init_legacy_vuarts(vm, vm_config->vuart);
register_reset_port_handler(vm);
/* vpic wire_mode default is INTR */
vm->arch_vm.wire_mode = VPIC_WIRE_INTR;
/* Init full emulated vIOAPIC instance:
* Present a virtual IOAPIC to guest, as a placeholder interrupt controller,
* even if the guest uses PT LAPIC. This is to satisfy the guest OSes,
* in some cases, though the functionality of vIOAPIC doesn't work.
*/
vioapic_init(vm);
status = set_vcpuid_entries(vm);
}
}
if (status == 0) {
uint16_t i;
for (i = 0U; i < vm_config->pt_intx_num; i++) {
status = ptirq_add_intx_remapping(vm, vm_config->pt_intx[i].virt_gsi,
vm_config->pt_intx[i].phys_gsi, false);
if (status != 0) {
ptirq_remove_configured_intx_remappings(vm);
break;
}
}
}
if ((status != 0) && (vm->root_stg2ptp != NULL)) {
(void)memset(vm->root_stg2ptp, 0U, PAGE_SIZE);
}
if (is_prelaunched_vm(vm)) {
build_vrsdp(vm);
}
return status;
}
static int32_t offline_lapic_pt_enabled_pcpus(const struct acrn_vm *vm, uint64_t pcpu_mask)
{
int32_t ret = 0;
uint16_t i;
uint64_t mask = pcpu_mask;
const struct acrn_vcpu *vcpu = NULL;
uint16_t this_pcpu_id = get_pcpu_id();
if (bitmap_test(this_pcpu_id, &mask)) {
bitmap_clear_non_atomic(this_pcpu_id, &mask);
if (vm->state == VM_POWERED_OFF) {
/*
* If the current pcpu needs to offline itself,
* it will be done after shutdown_vm() completes
* in the idle thread.
*/
make_pcpu_offline(this_pcpu_id);
} else {
/*
* The current pcpu can't reset itself
*/
pr_warn("%s: cannot offline self(%u)",
__func__, this_pcpu_id);
ret = -EINVAL;
}
}
foreach_vcpu(i, vm, vcpu) {
if (bitmap_test(pcpuid_from_vcpu(vcpu), &mask)) {
make_pcpu_offline(pcpuid_from_vcpu(vcpu));
}
}
wait_pcpus_offline(mask);
if (!start_pcpus(mask)) {
pr_fatal("Failed to start all cpus in mask(0x%lx)", mask);
ret = -ETIMEDOUT;
}
return ret;
}
/*
* @pre vm != NULL
* @pre vm->state == VM_PAUSED
*/
int32_t arch_deinit_vm(struct acrn_vm *vm)
{
uint64_t mask;
int32_t ret = 0;
ptirq_remove_configured_intx_remappings(vm);
deinit_legacy_vuarts(vm);
deinit_vpci(vm);
deinit_emul_io(vm);
/* Free EPT allocated resources assigned to VM */
destroy_ept(vm);
mask = lapic_pt_enabled_pcpu_bitmap(vm);
if (mask != 0UL) {
ret = offline_lapic_pt_enabled_pcpus(vm, mask);
}
#ifdef CONFIG_HYPERV_ENABLED
hyperv_page_destory(vm);
#endif
/* Return status to caller */
return ret;
}
/**
* @pre bsp != NULL
* @pre vm->state == VM_CREATED
*/
void arch_vm_prepare_bsp(struct acrn_vcpu *bsp)
{
vcpu_make_request(bsp, ACRN_REQUEST_INIT_VMCS);
}
/**
* @pre vm != NULL
* @pre vm->state == VM_PAUSED
*/
int32_t arch_reset_vm(struct acrn_vm *vm)
{
uint16_t i;
uint64_t mask;
struct acrn_vcpu *vcpu = NULL;
int32_t ret = 0;
mask = lapic_pt_enabled_pcpu_bitmap(vm);
if (mask != 0UL) {
ret = offline_lapic_pt_enabled_pcpus(vm, mask);
}
foreach_vcpu(i, vm, vcpu) {
reset_vcpu(vcpu);
}
/*
* Set VM vLAPIC state to VM_VLAPIC_XAPIC
*/
vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
reset_vm_ioreqs(vm);
reset_vioapics(vm);
destroy_secure_world(vm, false);
vm->arch_vm.sworld_control.flag.active = 0UL;
vm->arch_vm.iwkey_backup_status = 0UL;
return ret;
}
/**
* @brief Resume vm from S3 state
*
* To resume vm after guest enter S3 state:
* - reset BSP
* - BSP will be put to real mode with entry set as wakeup_vec
* - init_vmcs BSP. We could call init_vmcs here because we know current
* pcpu is mapped to BSP of vm.
*
* @vm[in] vm pointer to vm data structure
* @wakeup_vec[in] The resume address of vm
*
* @pre vm != NULL
* @pre is_service_vm(vm) && vm->state == VM_PAUSED
*/
void resume_vm_from_s3(struct acrn_vm *vm, uint32_t wakeup_vec)
{
struct acrn_vcpu *bsp = vcpu_from_vid(vm, BSP_CPU_ID);
reset_vm(vm);
/* When Service VM resume from S3, it will return to real mode
* with entry set to wakeup_vec.
*/
set_vcpu_startup_entry(bsp, wakeup_vec);
start_vm(vm);
}
/*
* @brief Update state of vLAPICs of a VM
* vLAPICs of VM switch between modes in an asynchronous fashion. This API
* captures the "transition" state triggered when one vLAPIC switches mode.
* When the VM is created, the state is set to "xAPIC" as all vLAPICs are setup
* in xAPIC mode.
*
* Upon reset, all LAPICs switch to xAPIC mode accroding to SDM 10.12.5
* Considering VM uses x2apic mode for vLAPIC, in reset or shutdown flow, vLAPIC state
* moves to "xAPIC" directly without going thru "transition".
*
* VM_VLAPIC_X2APIC - All the online vCPUs/vLAPICs of this VM use x2APIC mode
* VM_VLAPIC_XAPIC - All the online vCPUs/vLAPICs of this VM use xAPIC mode
* VM_VLAPIC_DISABLED - All the online vCPUs/vLAPICs of this VM are in Disabled mode
* VM_VLAPIC_TRANSITION - Online vCPUs/vLAPICs of this VM are in between transistion
*
* @pre vm != NULL
*/
void update_vm_vlapic_state(struct acrn_vm *vm)
{
uint16_t i;
struct acrn_vcpu *vcpu;
uint16_t vcpus_in_x2apic, vcpus_in_xapic;
enum vm_vlapic_mode vlapic_mode = VM_VLAPIC_XAPIC;
vcpus_in_x2apic = 0U;
vcpus_in_xapic = 0U;
spinlock_obtain(&vm->arch_vm.vlapic_mode_lock);
foreach_vcpu(i, vm, vcpu) {
/* Skip vCPU in state outside of VCPU_RUNNING as it may be offline. */
if (vcpu->state == VCPU_RUNNING) {
if (is_x2apic_enabled(vcpu_vlapic(vcpu))) {
vcpus_in_x2apic++;
} else if (is_xapic_enabled(vcpu_vlapic(vcpu))) {
vcpus_in_xapic++;
} else {
/*
* vCPU is using vLAPIC in Disabled mode
*/
}
}
}
if ((vcpus_in_x2apic == 0U) && (vcpus_in_xapic == 0U)) {
/*
* Check if the counts vcpus_in_x2apic and vcpus_in_xapic are zero
* VM_VLAPIC_DISABLED
*/
vlapic_mode = VM_VLAPIC_DISABLED;
} else if ((vcpus_in_x2apic != 0U) && (vcpus_in_xapic != 0U)) {
/*
* Check if the counts vcpus_in_x2apic and vcpus_in_xapic are non-zero
* VM_VLAPIC_TRANSITION
*/
vlapic_mode = VM_VLAPIC_TRANSITION;
} else if (vcpus_in_x2apic != 0U) {
/*
* Check if the counts vcpus_in_x2apic is non-zero
* VM_VLAPIC_X2APIC
*/
vlapic_mode = VM_VLAPIC_X2APIC;
} else {
/*
* Count vcpus_in_xapic is non-zero
* VM_VLAPIC_XAPIC
*/
vlapic_mode = VM_VLAPIC_XAPIC;
}
vm->arch_vm.vlapic_mode = vlapic_mode;
spinlock_release(&vm->arch_vm.vlapic_mode_lock);
}
/*
* @brief Check mode of vLAPICs of a VM
*
* @pre vm != NULL
*/
enum vm_vlapic_mode check_vm_vlapic_mode(const struct acrn_vm *vm)
{
enum vm_vlapic_mode vlapic_mode;
vlapic_mode = vm->arch_vm.vlapic_mode;
return vlapic_mode;
}
void arch_trigger_level_intr(struct acrn_vm *vm, uint32_t irq, bool assert)
{
union ioapic_rte rte;
uint32_t operation;
vioapic_get_rte(vm, irq, &rte);
/* TODO:
* Here should assert vuart irq according to vCOM1_IRQ polarity.
* The best way is to get the polarity info from ACPI table.
* Here we just get the info from vioapic configuration.
* based on this, we can still have irq storm during guest
* modify the vioapic setting, as it's only for debug uart,
* we want to make it as an known issue.
*/
if (rte.bits.intr_polarity == IOAPIC_RTE_INTPOL_ALO) {
operation = assert ? GSI_SET_LOW : GSI_SET_HIGH;
} else {
operation = assert ? GSI_SET_HIGH : GSI_SET_LOW;
}
vpic_set_irqline(vm_pic(vm), irq, operation);
vioapic_set_irqline_lock(vm, irq, operation);
}