hv: add the support of vector remapping for pre-launched VMs

For pre-launched VMs MSI/MSI-x configuration writes are not intercepted by ACRN.
It is pass-thru and interrupts land in ACRN and the guest vector is injected into
the VM's vLAPIC. With this patch, ACRN intercepts MSI/MSI-x config writes and take
the code path to remap interrupt vector/APIC ID as it does for SOS/UOS.

Tracked-On: #2879
Signed-off-by: Sainath Grandhi <sainath.grandhi@intel.com>
Reviewed-by: Eddie Dong <eddie.dong@intel.com>
This commit is contained in:
Sainath Grandhi 2019-04-02 22:51:32 -07:00 committed by Eddie Dong
parent c4ec7ac358
commit 5b795a3312
7 changed files with 235 additions and 160 deletions

View File

@ -592,7 +592,8 @@ void register_pio_emulation_handler(struct acrn_vm *vm, uint32_t pio_idx,
/**
* @brief Register a MMIO handler
*
* This API registers a MMIO handler to \p vm before it is launched.
* This API registers a MMIO handler to \p vm before it is Started
* For Pre-launched VMs, this API can be called after it is Started
*
* @param vm The VM to which the MMIO handler is registered
* @param read_write The handler for emulating accesses to the given range
@ -610,9 +611,7 @@ int32_t register_mmio_emulation_handler(struct acrn_vm *vm,
int32_t status = -EINVAL;
struct mem_io_node *mmio_node;
if ((vm->hw.created_vcpus > 0U) && (vm->hw.vcpu_array[0].launched)) {
pr_err("register mmio handler after vm launched");
} else {
if (is_prelaunched_vm(vm) || (vm->state != VM_STARTED)) {
/* Ensure both a read/write handler and range check function exist */
if ((read_write != NULL) && (end > start)) {
if (vm->emul_mmio_regions >= CONFIG_MAX_EMULATED_MMIO_REGIONS) {
@ -640,6 +639,8 @@ int32_t register_mmio_emulation_handler(struct acrn_vm *vm,
status = 0;
}
}
} else {
pr_err("register mmio handler after VM is Started");
}
/* Return status to caller */

View File

@ -68,6 +68,18 @@ bool is_sos_vm(const struct acrn_vm *vm)
return (vm != NULL) && (get_vm_config(vm->vm_id)->type == SOS_VM);
}
/**
* @pre vm != NULL
* @pre vm->vmid < CONFIG_MAX_VM_NUM
*/
bool is_prelaunched_vm(const struct acrn_vm *vm)
{
struct acrn_vm_config *vm_config;
vm_config = get_vm_config(vm->vm_id);
return (vm_config->type == PRE_LAUNCHED_VM);
}
/**
* @pre vm != NULL && vm_config != NULL && vm->vmid < CONFIG_MAX_VM_NUM
*/

View File

@ -31,7 +31,6 @@
#include <vm.h>
#include <errno.h>
#include <vtd.h>
#include <ept.h>
#include <mmu.h>
#include <logmsg.h>
@ -42,81 +41,6 @@ static inline uint32_t pci_bar_base(uint32_t bar)
return bar & PCIM_BAR_MEM_BASE;
}
#if defined(HV_DEBUG)
/**
* @pre vdev != NULL
*/
static int32_t validate(const struct pci_vdev *vdev)
{
uint32_t idx;
int32_t ret = 0;
for (idx = 0U; idx < PCI_BAR_COUNT; idx++) {
if ((vdev->bar[idx].base != 0x0UL)
|| ((vdev->bar[idx].size & 0xFFFUL) != 0x0UL)
|| ((vdev->bar[idx].type != PCIBAR_MEM32)
&& (vdev->bar[idx].type != PCIBAR_NONE))) {
ret = -EINVAL;
break;
}
}
return ret;
}
#endif
/**
* @pre vdev != NULL
* @pre vdev->vpci != NULL
* @pre vdev->vpci->vm != NULL
*/
void vdev_pt_init(const struct pci_vdev *vdev)
{
int32_t ret;
struct acrn_vm *vm = vdev->vpci->vm;
uint16_t pci_command;
ASSERT(validate(vdev) == 0, "Error, invalid bar defined");
/* Create an iommu domain for target VM if not created */
if (vm->iommu == NULL) {
if (vm->arch_vm.nworld_eptp == 0UL) {
vm->arch_vm.nworld_eptp = vm->arch_vm.ept_mem_ops.get_pml4_page(vm->arch_vm.ept_mem_ops.info);
sanitize_pte((uint64_t *)vm->arch_vm.nworld_eptp);
}
vm->iommu = create_iommu_domain(vm->vm_id,
hva2hpa(vm->arch_vm.nworld_eptp), 48U);
}
ret = assign_iommu_device(vm->iommu, (uint8_t)vdev->pdev->bdf.bits.b,
(uint8_t)(vdev->pdev->bdf.value & 0xFFU));
if (ret != 0) {
panic("failed to assign iommu device!");
}
pci_command = (uint16_t)pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U);
/* Disable INTX */
pci_command |= 0x400U;
pci_pdev_write_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U, pci_command);
}
/**
* @pre vdev != NULL
* @pre vdev->vpci != NULL
* @pre vdev->vpci->vm != NULL
*/
void vdev_pt_deinit(const struct pci_vdev *vdev)
{
int32_t ret;
struct acrn_vm *vm = vdev->vpci->vm;
ret = unassign_iommu_device(vm->iommu, (uint8_t)vdev->pdev->bdf.bits.b,
(uint8_t)(vdev->pdev->bdf.value & 0xFFU));
if (ret != 0) {
panic("failed to unassign iommu device!");
}
}
/**
* @pre vdev != NULL
*/
@ -135,11 +59,123 @@ int32_t vdev_pt_cfgread(const struct pci_vdev *vdev, uint32_t offset,
}
/**
* @pre vdev != NULL
* @pre vdev->pdev != NULL
* @pre vdev->pdev->msix.table_bar < (PCI_BAR_COUNT - 1U)
*/
void vdev_pt_remap_msix_table_bar(struct pci_vdev *vdev)
{
uint32_t i;
uint64_t addr_hi, addr_lo;
struct pci_msix *msix = &vdev->msix;
struct pci_pdev *pdev = vdev->pdev;
struct pci_bar *bar;
struct acrn_vm *vm = vdev->vpci->vm;
struct acrn_vm_config *vm_config;
vm_config = get_vm_config(vm->vm_id);
ASSERT(vdev->pdev->msix.table_bar < (PCI_BAR_COUNT - 1U), "msix->table_bar out of range");
/* Mask all table entries */
for (i = 0U; i < msix->table_count; i++) {
msix->tables[i].vector_control = PCIM_MSIX_VCTRL_MASK;
msix->tables[i].addr = 0U;
msix->tables[i].data = 0U;
}
bar = &pdev->bar[msix->table_bar];
if (bar != NULL) {
vdev->msix.mmio_hpa = bar->base;
if (vm_config->type == PRE_LAUNCHED_VM) {
vdev->msix.mmio_gpa = vdev->bar[msix->table_bar].base;
} else {
vdev->msix.mmio_gpa = sos_vm_hpa2gpa(bar->base);
}
vdev->msix.mmio_size = bar->size;
}
/*
* For SOS:
* --------
* MSI-X Table BAR Contains:
* Other Info + Tables + PBA Ohter info already mapped into EPT (since SOS)
* Tables are handled by HV MMIO handler (4k adjusted up and down)
* and remaps interrupts
* PBA already mapped into EPT (since SOS)
*
* Other Info + Tables Other info already mapped into EPT (since SOS)
* Tables are handled by HV MMIO handler (4k adjusted up and down)
* and remaps interrupts
*
* Tables Tables are handled by HV MMIO handler (4k adjusted up and down)
* and remaps interrupts
*
* For UOS (launched by DM):
* -------------------------
* MSI-X Table BAR Contains:
* Other Info + Tables + PBA Other info mapped into EPT (4k adjusted) by DM
* Tables are handled by DM MMIO handler (4k adjusted up and down) and SOS writes to tables,
* intercepted by HV MMIO handler and HV remaps interrupts
* PBA already mapped into EPT by DM
*
* Other Info + Tables Other info mapped into EPT by DM
* Tables are handled by DM MMIO handler (4k adjusted up and down) and SOS writes to tables,
* intercepted by HV MMIO handler and HV remaps interrupts.
*
* Tables Tables are handled by DM MMIO handler (4k adjusted up and down) and SOS writes to tables,
* intercepted by HV MMIO handler and HV remaps interrupts.
*
* For Pre-launched VMs (no SOS/DM):
* --------------------------------
* MSI-X Table BAR Contains:
* All 3 cases: Writes to MMIO region in MSI-X Table BAR handled by HV MMIO handler
* If the offset falls within the MSI-X table [offset, offset+tables_size), HV remaps
* interrupts.
* Else, HV writes/reads to/from the corresponding HPA
*/
if (msix->mmio_gpa != 0U) {
if (vm_config->type == PRE_LAUNCHED_VM) {
addr_hi = vdev->msix.mmio_gpa + vdev->msix.mmio_size;
addr_lo = vdev->msix.mmio_gpa;
} else {
/*
* PCI Spec: a BAR may also map other usable address space that is not associated
* with MSI-X structures, but it must not share any naturally aligned 4 KB
* address range with one where either MSI-X structure resides.
* The MSI-X Table and MSI-X PBA are permitted to co-reside within a naturally
* aligned 4 KB address range.
*
* If PBA or others reside in the same BAR with MSI-X Table, devicemodel could
* emulate them and maps these memory range at the 4KB boundary. Here, we should
* make sure only intercept the minimum number of 4K pages needed for MSI-X table.
*/
/* The higher boundary of the 4KB aligned address range for MSI-X table */
addr_hi = msix->mmio_gpa + msix->table_offset + (msix->table_count * MSIX_TABLE_ENTRY_SIZE);
addr_hi = round_page_up(addr_hi);
/* The lower boundary of the 4KB aligned address range for MSI-X table */
addr_lo = round_page_down(msix->mmio_gpa + msix->table_offset);
}
(void)register_mmio_emulation_handler(vdev->vpci->vm, vmsix_table_mmio_access_handler,
addr_lo, addr_hi, vdev);
}
}
/**
* @brief Remaps guest BARs other than MSI-x Table BAR
* This API is invoked upon guest re-programming PCI BAR with MMIO region
* @pre vdev != NULL
* @pre vdev->vpci != NULL
* @pre vdev->vpci->vm != NULL
*/
static void vdev_pt_remap_bar(const struct pci_vdev *vdev, uint32_t idx,
static void vdev_pt_remap_generic_bar(const struct pci_vdev *vdev, uint32_t idx,
uint32_t new_base)
{
struct acrn_vm *vm = vdev->vpci->vm;
@ -169,6 +205,7 @@ static void vdev_pt_cfgwrite_bar(struct pci_vdev *vdev, uint32_t offset,
uint32_t idx;
uint32_t new_bar, mask;
bool bar_update_normal;
bool is_msix_table_bar;
if ((bytes != 4U) || ((offset & 0x3U) != 0U)) {
return;
@ -185,12 +222,18 @@ static void vdev_pt_cfgwrite_bar(struct pci_vdev *vdev, uint32_t offset,
case PCIBAR_MEM32:
bar_update_normal = (new_bar_uos != (uint32_t)~0U);
is_msix_table_bar = (has_msix_cap(vdev) && (idx == vdev->msix.table_bar));
new_bar = new_bar_uos & mask;
if (bar_update_normal) {
vdev_pt_remap_bar(vdev, idx,
pci_bar_base(new_bar));
if (is_msix_table_bar) {
vdev->bar[idx].base = pci_bar_base(new_bar);
vdev_pt_remap_msix_table_bar(vdev);
} else {
vdev_pt_remap_generic_bar(vdev, idx,
pci_bar_base(new_bar));
vdev->bar[idx].base = pci_bar_base(new_bar);
vdev->bar[idx].base = pci_bar_base(new_bar);
}
}
break;
@ -218,4 +261,3 @@ int32_t vdev_pt_cfgwrite(struct pci_vdev *vdev, uint32_t offset,
return ret;
}

View File

@ -38,13 +38,6 @@
#include <logmsg.h>
#include "vpci_priv.h"
/**
* @pre vdev != NULL
*/
static inline bool has_msix_cap(const struct pci_vdev *vdev)
{
return (vdev->msix.capoff != 0U);
}
/**
* @pre vdev != NULL
@ -287,7 +280,7 @@ static void vmsix_table_rw(const struct pci_vdev *vdev, struct mmio_request *mmi
* @pre io_req != NULL
* @pre handler_private_data != NULL
*/
static int32_t vmsix_table_mmio_access_handler(struct io_request *io_req, void *handler_private_data)
int32_t vmsix_table_mmio_access_handler(struct io_request *io_req, void *handler_private_data)
{
struct mmio_request *mmio = &io_req->reqs.mmio;
struct pci_vdev *vdev;
@ -334,65 +327,6 @@ static int32_t vmsix_table_mmio_access_handler(struct io_request *io_req, void *
return ret;
}
/**
* @pre vdev != NULL
* @pre vdev->pdev != NULL
* @pre vdev->pdev->msix.table_bar < (PCI_BAR_COUNT - 1U)
*/
static void vmsix_init_helper(struct pci_vdev *vdev)
{
uint32_t i;
uint64_t addr_hi, addr_lo;
struct pci_msix *msix = &vdev->msix;
struct pci_pdev *pdev = vdev->pdev;
struct pci_bar *bar;
ASSERT(vdev->pdev->msix.table_bar < (PCI_BAR_COUNT - 1U), "msix->table_bar out of range");
msix->table_bar = pdev->msix.table_bar;
msix->table_offset = pdev->msix.table_offset;
msix->table_count = pdev->msix.table_count;
/* Mask all table entries */
for (i = 0U; i < msix->table_count; i++) {
msix->tables[i].vector_control = PCIM_MSIX_VCTRL_MASK;
msix->tables[i].addr = 0U;
msix->tables[i].data = 0U;
}
bar = &pdev->bar[msix->table_bar];
if (bar != NULL) {
vdev->msix.mmio_hpa = bar->base;
vdev->msix.mmio_gpa = sos_vm_hpa2gpa(bar->base);
vdev->msix.mmio_size = bar->size;
}
if (msix->mmio_gpa != 0U) {
/*
* PCI Spec: a BAR may also map other usable address space that is not associated
* with MSI-X structures, but it must not share any naturally aligned 4 KB
* address range with one where either MSI-X structure resides.
* The MSI-X Table and MSI-X PBA are permitted to co-reside within a naturally
* aligned 4 KB address range.
*
* If PBA or others reside in the same BAR with MSI-X Table, devicemodel could
* emulate them and maps these memory range at the 4KB boundary. Here, we should
* make sure only intercept the minimum number of 4K pages needed for MSI-X table.
*/
/* The higher boundary of the 4KB aligned address range for MSI-X table */
addr_hi = msix->mmio_gpa + msix->table_offset + (msix->table_count * MSIX_TABLE_ENTRY_SIZE);
addr_hi = round_page_up(addr_hi);
/* The lower boundary of the 4KB aligned address range for MSI-X table */
addr_lo = round_page_down(msix->mmio_gpa + msix->table_offset);
(void)register_mmio_emulation_handler(vdev->vpci->vm, vmsix_table_mmio_access_handler,
addr_lo, addr_hi, vdev);
}
}
/**
* @pre vdev != NULL
*/
@ -402,12 +336,13 @@ void vmsix_init(struct pci_vdev *vdev)
vdev->msix.capoff = pdev->msix.capoff;
vdev->msix.caplen = pdev->msix.caplen;
vdev->msix.table_bar = pdev->msix.table_bar;
vdev->msix.table_offset = pdev->msix.table_offset;
vdev->msix.table_count = pdev->msix.table_count;
if (has_msix_cap(vdev)) {
(void)memcpy_s((void *)&vdev->cfgdata.data_8[pdev->msix.capoff], pdev->msix.caplen,
(void *)&pdev->msix.cap[0U], pdev->msix.caplen);
vmsix_init_helper(vdev);
}
}

View File

@ -28,6 +28,8 @@
*/
#include <vm.h>
#include <vtd.h>
#include <mmu.h>
#include <errno.h>
#include <logmsg.h>
#include "vpci_priv.h"
@ -287,6 +289,56 @@ static inline bool is_valid_bar(const struct pci_bar *bar)
return (is_valid_bar_type(bar) && is_valid_bar_size(bar));
}
/**
* @pre vdev != NULL
* @pre vdev->vpci != NULL
* @pre vdev->vpci->vm != NULL
*/
static void assign_vdev_pt_iommu_domain(const struct pci_vdev *vdev)
{
int32_t ret;
struct acrn_vm *vm = vdev->vpci->vm;
/* Create an iommu domain for target VM if not created */
if (vm->iommu == NULL) {
if (vm->arch_vm.nworld_eptp == 0UL) {
vm->arch_vm.nworld_eptp = vm->arch_vm.ept_mem_ops.get_pml4_page(vm->arch_vm.ept_mem_ops.info);
sanitize_pte((uint64_t *)vm->arch_vm.nworld_eptp);
}
vm->iommu = create_iommu_domain(vm->vm_id,
hva2hpa(vm->arch_vm.nworld_eptp), 48U);
}
ret = assign_iommu_device(vm->iommu, (uint8_t)vdev->pdev->bdf.bits.b,
(uint8_t)(vdev->pdev->bdf.value & 0xFFU));
if (ret != 0) {
panic("failed to assign iommu device!");
}
}
/**
* @pre vdev != NULL
* @pre vdev->vpci != NULL
* @pre vdev->vpci->vm != NULL
*/
static void remove_vdev_pt_iommu_domain(const struct pci_vdev *vdev)
{
int32_t ret;
struct acrn_vm *vm = vdev->vpci->vm;
ret = unassign_iommu_device(vm->iommu, (uint8_t)vdev->pdev->bdf.bits.b,
(uint8_t)(vdev->pdev->bdf.value & 0xFFU));
if (ret != 0) {
/*
*TODO
* panic needs to be removed here
* Currently unassign_iommu_device can fail for multiple reasons
* Once all the reasons and methods to avoid them can be made sure
* panic here is not necessary.
*/
panic("failed to unassign iommu device!");
}
}
/**
* @pre vdev != NULL
*/
@ -295,6 +347,7 @@ static void partition_mode_pdev_init(struct pci_vdev *vdev, union pci_bdf pbdf)
struct pci_pdev *pdev;
uint32_t idx;
struct pci_bar *pbar, *vbar;
uint16_t pci_command;
pdev = find_pci_pdev(pbdf);
ASSERT(pdev != NULL, "pdev is NULL");
@ -316,7 +369,12 @@ static void partition_mode_pdev_init(struct pci_vdev *vdev, union pci_bdf pbdf)
}
}
vdev_pt_init(vdev);
pci_command = (uint16_t)pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U);
/* Disable INTX */
pci_command |= 0x400U;
pci_pdev_write_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U, pci_command);
assign_vdev_pt_iommu_domain(vdev);
}
/**
@ -343,6 +401,10 @@ int32_t partition_mode_vpci_init(const struct acrn_vm *vm)
vdev_hostbridge_init(vdev);
} else {
partition_mode_pdev_init(vdev, ptdev_config->pbdf);
vmsi_init(vdev);
vmsix_init(vdev);
}
}
@ -364,7 +426,11 @@ void partition_mode_vpci_deinit(const struct acrn_vm *vm)
if (is_hostbridge(vdev)) {
vdev_hostbridge_deinit(vdev);
} else {
vdev_pt_deinit(vdev);
remove_vdev_pt_iommu_domain(vdev);
vmsi_deinit(vdev);
vmsix_deinit(vdev);
}
}
}
@ -381,7 +447,10 @@ void partition_mode_cfgread(const struct acrn_vpci *vpci, union pci_bdf vbdf,
if (is_hostbridge(vdev)) {
(void)vdev_hostbridge_cfgread(vdev, offset, bytes, val);
} else {
if (vdev_pt_cfgread(vdev, offset, bytes, val) != 0) {
if ((vdev_pt_cfgread(vdev, offset, bytes, val) != 0)
&& (vmsi_cfgread(vdev, offset, bytes, val) != 0)
&& (vmsix_cfgread(vdev, offset, bytes, val) != 0)
) {
/* Not handled by any handlers, passthru to physical device */
*val = pci_pdev_read_cfg(vdev->pdev->bdf, offset, bytes);
}
@ -401,7 +470,10 @@ void partition_mode_cfgwrite(const struct acrn_vpci *vpci, union pci_bdf vbdf,
if (is_hostbridge(vdev)) {
(void)vdev_hostbridge_cfgwrite(vdev, offset, bytes, val);
} else {
if (vdev_pt_cfgwrite(vdev, offset, bytes, val) != 0){
if ((vdev_pt_cfgwrite(vdev, offset, bytes, val) != 0)
&& (vmsi_cfgwrite(vdev, offset, bytes, val) != 0)
&& (vmsix_cfgwrite(vdev, offset, bytes, val) != 0)
) {
/* Not handled by any handlers, passthru to physical device */
pci_pdev_write_cfg(vdev->pdev->bdf, offset, bytes, val);
}
@ -479,6 +551,10 @@ static void init_vdev_for_pdev(struct pci_pdev *pdev, const void *vm)
vmsi_init(vdev);
vmsix_init(vdev);
if (has_msix_cap(vdev)) {
vdev_pt_remap_msix_table_bar(vdev);
}
}
}

View File

@ -67,21 +67,29 @@ static inline void pci_vdev_write_cfg_u32(struct pci_vdev *vdev, uint32_t offset
vdev->cfgdata.data_32[offset >> 2U] = val;
}
/**
* @pre vdev != NULL
*/
static inline bool has_msix_cap(const struct pci_vdev *vdev)
{
return (vdev->msix.capoff != 0U);
}
void vdev_hostbridge_init(struct pci_vdev *vdev);
int32_t vdev_hostbridge_cfgread(const struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t *val);
int32_t vdev_hostbridge_cfgwrite(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val);
void vdev_hostbridge_deinit(__unused const struct pci_vdev *vdev);
void vdev_pt_init(const struct pci_vdev *vdev);
int32_t vdev_pt_cfgread(const struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t *val);
int32_t vdev_pt_cfgwrite(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val);
void vdev_pt_deinit(const struct pci_vdev *vdev);
void vmsi_init(struct pci_vdev *vdev);
int32_t vmsi_cfgread(const struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t *val);
int32_t vmsi_cfgwrite(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val);
void vmsi_deinit(const struct pci_vdev *vdev);
void vmsix_init(struct pci_vdev *vdev);
void vdev_pt_remap_msix_table_bar(struct pci_vdev *vdev);
int32_t vmsix_table_mmio_access_handler(struct io_request *io_req, void *handler_private_data);
int32_t vmsix_cfgread(const struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t *val);
int32_t vmsix_cfgwrite(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val);
void vmsix_deinit(const struct pci_vdev *vdev);

View File

@ -211,6 +211,7 @@ void prepare_vm(uint16_t vm_id, struct acrn_vm_config *vm_config);
void launch_vms(uint16_t pcpu_id);
bool is_valid_vm(const struct acrn_vm *vm);
bool is_sos_vm(const struct acrn_vm *vm);
bool is_prelaunched_vm(const struct acrn_vm *vm);
uint16_t find_free_vm_id(void);
struct acrn_vm *get_vm_from_vmid(uint16_t vm_id);
struct acrn_vm *get_sos_vm(void);