mirror of
https://github.com/projectacrn/acrn-hypervisor.git
synced 2025-06-24 14:33:38 +00:00
hv: vmsi: add vmsix on msi emulation support
Some passthrough devices require multiple MSI vectors, but don't support MSI-X. In meanwhile, Linux kernel doesn't support continuous vector allocation. On native platform, this issue can be mitigated by IOMMU via interrupt remapping. However, on ACRN, there is no vIOMMU. vMSI-X on MSI emulation is one solution to mitigate this problem on ACRN. This patch adds MSI-X emulation on MSI capability. For the device needs to do MSI-X emulation, HV will hide MSI capability and present MSI-X capability to guest. The guest driver may need to modify to reqeust MSI-X vector. For example: ret = pci_alloc_irq_vectors(pdev, 1, STMMAC_MSI_VEC_MAX, - PCI_IRQ_MSI); + PCI_IRQ_MSI | PCI_IRQ_MSIX); To enable MSI-X emulation, the device should: - 1. The device should be in vmsix_on_msi_devs array. - 2. Support MSI, but don't support MSI-X. - 3. MSI capability should support per-vector mask. - 4. The device should have an unused BAR. - 5. The device driver should not rely on PBA for functionality. Tracked-On: #4831 Signed-off-by: Binbin Wu <binbin.wu@intel.com> Acked-by: Eddie Dong <eddie.dong@intel.com>
This commit is contained in:
parent
da1788c9a3
commit
6be27cdcab
@ -301,6 +301,7 @@ VP_DM_C_SRCS += dm/vpci/vpci_bridge.c
|
||||
VP_DM_C_SRCS += dm/vpci/pci_pt.c
|
||||
VP_DM_C_SRCS += dm/vpci/vmsi.c
|
||||
VP_DM_C_SRCS += dm/vpci/vmsix.c
|
||||
VP_DM_C_SRCS += dm/vpci/vmsix_on_msi.c
|
||||
VP_DM_C_SRCS += dm/vpci/vsriov.c
|
||||
VP_DM_C_SRCS += arch/x86/guest/vlapic.c
|
||||
VP_DM_C_SRCS += arch/x86/guest/pm.c
|
||||
|
@ -374,6 +374,7 @@ void init_vdev_pt(struct pci_vdev *vdev, bool is_pf_vdev)
|
||||
/* Initialize the vdev BARs except SRIOV VF, VF BARs are initialized directly from create_vf function */
|
||||
if (vdev->phyfun == NULL) {
|
||||
init_bars(vdev, is_pf_vdev);
|
||||
init_vmsix_on_msi(vdev);
|
||||
if (is_prelaunched_vm(vpci2vm(vdev->vpci)) && (!is_pf_vdev)) {
|
||||
pci_command = (uint16_t)pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U);
|
||||
|
||||
|
@ -174,7 +174,11 @@ static void rw_vmsix_table(struct pci_vdev *vdev, struct mmio_request *mmio, uin
|
||||
/* Write to pci_vdev */
|
||||
(void)memcpy_s((void *)entry + entry_offset, (size_t)mmio->size,
|
||||
&mmio->value, (size_t)mmio->size);
|
||||
if (vdev->msix.is_vmsix_on_msi) {
|
||||
remap_one_vmsix_entry_on_msi(vdev, index);
|
||||
} else {
|
||||
remap_one_vmsix_entry(vdev, index);
|
||||
}
|
||||
} else {
|
||||
pr_err("%s, Only DWORD and QWORD are permitted", __func__);
|
||||
}
|
||||
@ -205,6 +209,15 @@ int32_t vmsix_handle_table_mmio_access(struct io_request *io_req, void *handler_
|
||||
|
||||
if (msixtable_access(vdev, (uint32_t)offset)) {
|
||||
rw_vmsix_table(vdev, mmio, (uint32_t)offset);
|
||||
} else if (vdev->msix.is_vmsix_on_msi) {
|
||||
/* According to PCI spec, PBA is read-only.
|
||||
* Don't emulate PBA according to the device status, just return 0.
|
||||
*/
|
||||
if (mmio->direction == REQUEST_READ) {
|
||||
mmio->value = 0UL;
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
} else {
|
||||
hva = hpa2hva(vdev->msix.mmio_hpa + offset);
|
||||
|
||||
|
204
hypervisor/dm/vpci/vmsix_on_msi.c
Normal file
204
hypervisor/dm/vpci/vmsix_on_msi.c
Normal file
@ -0,0 +1,204 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include <vm.h>
|
||||
#include <ptdev.h>
|
||||
#include <assign.h>
|
||||
#include <vpci.h>
|
||||
#include <vtd.h>
|
||||
#include <board.h>
|
||||
#include "vpci_priv.h"
|
||||
|
||||
#define PER_VECTOR_MASK_CAP 0x0100U
|
||||
|
||||
/* Pre-assumptions for vMSI-x on MSI emulation:
|
||||
* 1. The device is in vmsix_on_msi_devs array.
|
||||
* 2. The device should support MSI capability as well as per-vector mask
|
||||
* 3. The device doesn't support MSI-x capability.
|
||||
* 4. The device should have an unused BAR (this condition is checked inside init_vmsix_on_msi).
|
||||
* 5. HV doesn't emulate PBA according to physcial device status, the device driver should not rely on PBA
|
||||
* for functionality.
|
||||
*/
|
||||
static bool need_vmsix_on_msi_emulation(__unused struct pci_pdev *pdev, __unused uint16_t *vector_count)
|
||||
{
|
||||
bool ret = false;
|
||||
#if (MAX_VMSIX_ON_MSI_PDEVS_NUM > 0)
|
||||
uint16_t msgctrl;
|
||||
uint32_t i;
|
||||
|
||||
for(i = 0U; i < MAX_VMSIX_ON_MSI_PDEVS_NUM; i++) {
|
||||
if (pdev->bdf.value == vmsix_on_msi_devs[i].bdf.value) {
|
||||
if ((pdev->msi_capoff != 0U) && (pdev->msix.capoff == 0U)) {
|
||||
msgctrl = (uint16_t)pci_pdev_read_cfg(pdev->bdf, pdev->msi_capoff + PCIR_MSI_CTRL, 2U);
|
||||
*vector_count = 1U << ((msgctrl & PCIM_MSICTRL_MMC_MASK) >> 1U);
|
||||
if ((*vector_count > 1U) && ((msgctrl & PER_VECTOR_MASK_CAP) != 0U)) {
|
||||
ret = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void reserve_vmsix_on_msi_irtes(struct pci_pdev *pdev)
|
||||
{
|
||||
struct intr_source intr_src;
|
||||
uint16_t count = 0;
|
||||
int32_t ret;
|
||||
|
||||
if (need_vmsix_on_msi_emulation(pdev, &count)) {
|
||||
intr_src.is_msi = true;
|
||||
intr_src.src.msi.value = pdev->bdf.value;
|
||||
ret = dmar_reserve_irte(&intr_src, count, &pdev->irte_start);
|
||||
if ((ret == 0) && (pdev->irte_start != INVALID_IRTE_ID)) {
|
||||
pdev->irte_count = count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32_t get_mask_bits_offset(const struct pci_vdev *vdev)
|
||||
{
|
||||
return vdev->msi.is_64bit ? (vdev->msix.capoff + 0x10U) : (vdev->msix.capoff + 0xCU);
|
||||
}
|
||||
|
||||
/**
|
||||
* @pre vdev != NULL
|
||||
* @pre vdev->pdev != NULL
|
||||
*/
|
||||
void init_vmsix_on_msi(struct pci_vdev *vdev)
|
||||
{
|
||||
struct pci_pdev *pdev = vdev->pdev;
|
||||
uint32_t i;
|
||||
|
||||
/* irte_count > 1 only when the device needs vMSI-x on MSI emulation and IRTEs are reserved successfully */
|
||||
if (pdev->irte_count > 1U) {
|
||||
/* find an unused BAR */
|
||||
for (i = 0U; i < vdev->nr_bars; i++) {
|
||||
if (vdev->vbars[i].base_hpa == 0UL){
|
||||
break;
|
||||
}
|
||||
if (vdev->vbars[i].type == PCIBAR_MEM64) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
if (i < vdev->nr_bars) {
|
||||
vdev->msix.capoff = pdev->msi_capoff;
|
||||
vdev->msi.capoff = 0U;
|
||||
vdev->msix.is_vmsix_on_msi = true;
|
||||
/* For a device support MSI with per-vector mask, the length of MSI cap is at least 20 bytes */
|
||||
vdev->msix.caplen = MSIX_CAPLEN;
|
||||
vdev->msix.table_bar = i;
|
||||
vdev->msix.table_offset = 0U;
|
||||
vdev->msix.table_count = pdev->irte_count;
|
||||
|
||||
/* capability ID */
|
||||
pci_vdev_write_vcfg(vdev, vdev->msix.capoff, 1U, 0x11U);
|
||||
/* message control, MSI-X Diabled, Function unamsked */
|
||||
pci_vdev_write_vcfg(vdev, vdev->msix.capoff + 2U, 2U, pdev->irte_count - 1U);
|
||||
/* Init MSIX table vBAR, offset is 0 */
|
||||
pci_vdev_write_vcfg(vdev, vdev->msix.capoff + 4U, 4U, i);
|
||||
/* Init PBA table vBAR, offset is 2048 */
|
||||
pci_vdev_write_vcfg(vdev, vdev->msix.capoff + 8U, 4U, 2048U + i);
|
||||
|
||||
vdev->vbars[i].type = PCIBAR_MEM32;
|
||||
vdev->vbars[i].size = 4096U;
|
||||
vdev->vbars[i].base_hpa = 0x0UL;
|
||||
vdev->vbars[i].mask = 0xFFFFF000U & PCI_BASE_ADDRESS_MEM_MASK;
|
||||
/* fixed for memory, 32bit, non-prefetchable */
|
||||
vdev->vbars[i].fixed = 0U;
|
||||
|
||||
/* About MSI-x bar GPA:
|
||||
* - For Service VM: when first time init, it is programmed as 0, then OS will program
|
||||
* the value later and the value is stored in vdev->vbars[MSI-X_BAR_ID].base_gpa.
|
||||
* When the device is assigned to UOS and then assgined back to SOS, the stored base
|
||||
* GPA will be used.
|
||||
* - For Post-launched VM: The GPA is assigned by device model.
|
||||
* - For Pre-launched VM: Not supported yet.
|
||||
*/
|
||||
vdev->msix.mmio_gpa = vdev->vbars[i].base_gpa;
|
||||
vdev_pt_write_vbar(vdev, i, (uint32_t)(vdev->vbars[i].base_gpa & 0xFFFFFFFFUL));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void write_vmsix_cap_reg_on_msi(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val)
|
||||
{
|
||||
uint16_t old_msgctrl, msgctrl;
|
||||
uint16_t msi_msgctrl;
|
||||
|
||||
old_msgctrl = (uint16_t)pci_vdev_read_vcfg(vdev, vdev->msix.capoff + PCIR_MSIX_CTRL, 2U);
|
||||
/* Write to vdev */
|
||||
pci_vdev_write_vcfg(vdev, offset, bytes, val);
|
||||
msgctrl = (uint16_t)pci_vdev_read_vcfg(vdev, vdev->msix.capoff + PCIR_MSIX_CTRL, 2U);
|
||||
|
||||
if (((old_msgctrl ^ msgctrl) & (PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK)) != 0U) {
|
||||
msi_msgctrl = (uint16_t)pci_pdev_read_cfg(vdev->pdev->bdf, offset, 2U);
|
||||
|
||||
msi_msgctrl = msi_msgctrl & (~PCIM_MSICTRL_MME_MASK);
|
||||
msi_msgctrl &= ~ PCIM_MSICTRL_MSI_ENABLE;
|
||||
|
||||
/* If MSI Enable is being set, make sure INTxDIS bit is set */
|
||||
if ((msgctrl & PCIM_MSIXCTRL_MSIX_ENABLE) != 0U) {
|
||||
enable_disable_pci_intx(vdev->pdev->bdf, false);
|
||||
msi_msgctrl |= (msi_msgctrl & PCIM_MSICTRL_MMC_MASK) << 3U;
|
||||
msi_msgctrl |= PCIM_MSICTRL_MSI_ENABLE;
|
||||
}
|
||||
pci_pdev_write_cfg(vdev->pdev->bdf, offset, 2U, msi_msgctrl);
|
||||
|
||||
if ((msgctrl & PCIM_MSIXCTRL_FUNCTION_MASK) != 0U) {
|
||||
pci_pdev_write_cfg(vdev->pdev->bdf, get_mask_bits_offset(vdev), 4U, 0xFFFFFFFFU);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void remap_one_vmsix_entry_on_msi(struct pci_vdev *vdev, uint32_t index)
|
||||
{
|
||||
const struct msix_table_entry *ventry;
|
||||
uint32_t mask_bits;
|
||||
uint32_t vector_mask = 1U << index;
|
||||
struct msi_info info = {};
|
||||
union pci_bdf pbdf = vdev->pdev->bdf;
|
||||
union irte_index ir_index;
|
||||
int32_t ret = 0;
|
||||
uint32_t capoff = vdev->msix.capoff;
|
||||
|
||||
mask_bits = pci_pdev_read_cfg(pbdf, get_mask_bits_offset(vdev), 4U);
|
||||
mask_bits |= vector_mask;
|
||||
pci_pdev_write_cfg(pbdf, get_mask_bits_offset(vdev), 4U, mask_bits);
|
||||
|
||||
ventry = &vdev->msix.table_entries[index];
|
||||
if ((ventry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0U) {
|
||||
info.addr.full = vdev->msix.table_entries[index].addr;
|
||||
info.data.full = vdev->msix.table_entries[index].data;
|
||||
|
||||
ret = ptirq_prepare_msix_remap(vpci2vm(vdev->vpci), vdev->bdf.value, pbdf.value,
|
||||
(uint16_t)index, &info, vdev->pdev->irte_start + (uint16_t)index);
|
||||
if (ret == 0) {
|
||||
if (!vdev->msix.is_vmsix_on_msi_programmed) {
|
||||
ir_index.index = vdev->pdev->irte_start;
|
||||
info.addr.ir_bits.shv = 1U;
|
||||
info.addr.ir_bits.intr_index_high = ir_index.bits.index_high;
|
||||
info.addr.ir_bits.intr_index_low = ir_index.bits.index_low;
|
||||
pci_pdev_write_cfg(pbdf, capoff + PCIR_MSI_ADDR, 0x4U, (uint32_t)info.addr.full);
|
||||
if (vdev->msi.is_64bit) {
|
||||
pci_pdev_write_cfg(pbdf, capoff + PCIR_MSI_ADDR_HIGH, 0x4U,
|
||||
(uint32_t)(info.addr.full >> 32U));
|
||||
pci_pdev_write_cfg(pbdf, capoff + PCIR_MSI_DATA_64BIT, 0x2U,
|
||||
(uint16_t)info.data.full);
|
||||
} else {
|
||||
pci_pdev_write_cfg(pbdf, capoff + PCIR_MSI_DATA, 0x2U,
|
||||
(uint16_t)info.data.full);
|
||||
}
|
||||
vdev->msix.is_vmsix_on_msi_programmed = true;
|
||||
}
|
||||
mask_bits &= ~vector_mask;
|
||||
}
|
||||
}
|
||||
pci_pdev_write_cfg(pbdf, get_mask_bits_offset(vdev), 4U, mask_bits);
|
||||
}
|
@ -500,7 +500,11 @@ static int32_t write_pt_dev_cfg(struct pci_vdev *vdev, uint32_t offset,
|
||||
} else if (msicap_access(vdev, offset)) {
|
||||
write_vmsi_cap_reg(vdev, offset, bytes, val);
|
||||
} else if (msixcap_access(vdev, offset)) {
|
||||
if (vdev->msix.is_vmsix_on_msi) {
|
||||
write_vmsix_cap_reg_on_msi(vdev, offset, bytes, val);
|
||||
} else {
|
||||
write_vmsix_cap_reg(vdev, offset, bytes, val);
|
||||
}
|
||||
} else if (sriovcap_access(vdev, offset)) {
|
||||
write_sriov_cap_reg(vdev, offset, bytes, val);
|
||||
} else {
|
||||
|
@ -128,6 +128,10 @@ void read_vmsix_cap_reg(const struct pci_vdev *vdev, uint32_t offset, uint32_t b
|
||||
void write_vmsix_cap_reg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val);
|
||||
void deinit_vmsix(struct pci_vdev *vdev);
|
||||
|
||||
void init_vmsix_on_msi(struct pci_vdev *vdev);
|
||||
void write_vmsix_cap_reg_on_msi(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val);
|
||||
void remap_one_vmsix_entry_on_msi(struct pci_vdev *vdev, uint32_t index);
|
||||
|
||||
void init_vsriov(struct pci_vdev *vdev);
|
||||
void read_sriov_cap_reg(const struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t *val);
|
||||
void write_sriov_cap_reg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val);
|
||||
|
@ -767,6 +767,7 @@ struct pci_pdev *init_pdev(uint16_t pbdf, uint32_t drhd_index)
|
||||
|
||||
pdev->drhd_index = drhd_index;
|
||||
num_pci_pdev++;
|
||||
reserve_vmsix_on_msi_irtes(pdev);
|
||||
} else {
|
||||
pr_err("%s, %x:%x.%x unsupported headed type: 0x%x\n",
|
||||
__func__, bdf.bits.b, bdf.bits.d, bdf.bits.f, hdr_type);
|
||||
|
@ -67,6 +67,8 @@ struct pci_msix {
|
||||
uint32_t table_bar;
|
||||
uint32_t table_offset;
|
||||
uint32_t table_count;
|
||||
bool is_vmsix_on_msi;
|
||||
bool is_vmsix_on_msi_programmed;
|
||||
};
|
||||
|
||||
/* SRIOV capability structure */
|
||||
|
@ -228,6 +228,9 @@ struct pci_pdev {
|
||||
|
||||
/* IOMMU responsible for DMA and Interrupt Remapping for this device */
|
||||
uint32_t drhd_index;
|
||||
/* Used for vMSI-x on MSI emulation */
|
||||
uint16_t irte_start;
|
||||
uint16_t irte_count;
|
||||
|
||||
/* The bar info of the physical PCI device. */
|
||||
uint32_t nr_bars; /* 6 for normal device, 2 for bridge, 1 for cardbus */
|
||||
@ -359,4 +362,5 @@ bool is_plat_hidden_pdev(union pci_bdf bdf);
|
||||
bool pdev_need_bar_restore(const struct pci_pdev *pdev);
|
||||
void pdev_restore_bar(const struct pci_pdev *pdev);
|
||||
void pci_switch_to_mmio_cfg_ops(void);
|
||||
void reserve_vmsix_on_msi_irtes(struct pci_pdev *pdev);
|
||||
#endif /* PCI_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user