From dcebdb8e98982601a9b8409b9948bd6a2c462f4c Mon Sep 17 00:00:00 2001 From: Zide Chen Date: Mon, 24 Sep 2018 12:29:01 -0700 Subject: [PATCH] hv: implement msi.c to handle MSI remapping for vm0 Emulate MSI Capability structure for vm0 in sharing mode: - it intercepts the IO requests for MSI Capability structure, emulates the Message Control word, and bypasses all other I/O requests to the physical device. - criteria to trigger MSI remapping: MSI Enable bit is being changed, Message Data/Addr is being changed when MSI Enable is set. Tracked-On: #1568 Signed-off-by: dongshen Signed-off-by: Zide Chen Reviewed-by: Zhao Yakui Acked-by: Anthony Xu --- hypervisor/Makefile | 6 +- hypervisor/dm/hw/pci.c | 18 +++ hypervisor/dm/vpci/msi.c | 229 ++++++++++++++++++++++++++++++++++ hypervisor/dm/vpci/pci_priv.h | 8 ++ hypervisor/include/dm/pci.h | 1 + hypervisor/include/dm/vpci.h | 10 ++ 6 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 hypervisor/dm/vpci/msi.c diff --git a/hypervisor/Makefile b/hypervisor/Makefile index 0a9e8d389..f66248c4e 100644 --- a/hypervisor/Makefile +++ b/hypervisor/Makefile @@ -178,9 +178,13 @@ endif C_SRCS += dm/vpic.c C_SRCS += dm/vioapic.c ifeq ($(CONFIG_PARTITION_MODE),y) -C_SRCS += $(wildcard dm/vpci/*.c) C_SRCS += $(wildcard partition/*.c) C_SRCS += dm/hw/pci.c +C_SRCS += dm/vpci/core.c +C_SRCS += dm/vpci/vpci.c +C_SRCS += dm/vpci/partition_mode.c +C_SRCS += dm/vpci/hostbridge.c +C_SRCS += dm/vpci/pci_pt.c C_SRCS += dm/vrtc.c endif diff --git a/hypervisor/dm/hw/pci.c b/hypervisor/dm/hw/pci.c index ed41d77dc..e565a21a2 100644 --- a/hypervisor/dm/hw/pci.c +++ b/hypervisor/dm/hw/pci.c @@ -99,6 +99,24 @@ void pci_pdev_write_cfg(union pci_bdf bdf, uint32_t offset, uint32_t bytes, uint spinlock_release(&pci_device_lock); } +/* enable: 1: enable INTx; 0: Disable INTx */ +void enable_disable_pci_intx(union pci_bdf bdf, bool enable) +{ + uint32_t cmd, new_cmd; + + /* Set or clear the INTXDIS bit in COMMAND register */ + cmd = pci_pdev_read_cfg(bdf, PCIR_COMMAND, 2U); + if (enable) { + new_cmd = cmd & ~PCIM_CMD_INTxDIS; + } else { + new_cmd = cmd | PCIM_CMD_INTxDIS; + } + + if ((cmd ^ new_cmd) != 0U) { + pci_pdev_write_cfg(bdf, PCIR_COMMAND, 0x2U, new_cmd); + } +} + #define BUS_SCAN_SKIP 0U #define BUS_SCAN_PENDING 1U #define BUS_SCAN_COMPLETE 2U diff --git a/hypervisor/dm/vpci/msi.c b/hypervisor/dm/vpci/msi.c new file mode 100644 index 000000000..5b390cd36 --- /dev/null +++ b/hypervisor/dm/vpci/msi.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2011 NetApp, Inc. + * Copyright (c) 2018 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include "pci_priv.h" + +static inline bool msicap_access(struct pci_vdev *vdev, uint32_t offset) +{ + if (vdev->msi.capoff == 0U) { + return 0; + } + + return in_range(offset, vdev->msi.capoff, vdev->msi.caplen); +} + +static int vmsi_remap(struct pci_vdev *vdev, bool enable) +{ + struct ptdev_msi_info info; + union pci_bdf pbdf = vdev->pdev.bdf; + struct vm *vm = vdev->vpci->vm; + uint32_t capoff = vdev->msi.capoff; + uint32_t msgctrl, msgdata; + uint32_t addrlo, addrhi; + int ret; + + /* Disable MSI during configuration */ + msgctrl = pci_vdev_read_cfg(vdev, capoff + PCIR_MSI_CTRL, 2U); + if ((msgctrl & PCIM_MSICTRL_MSI_ENABLE) == PCIM_MSICTRL_MSI_ENABLE) { + pci_pdev_write_cfg(pbdf, capoff + PCIR_MSI_CTRL, 2U, msgctrl & ~PCIM_MSICTRL_MSI_ENABLE); + } + + /* Read the MSI capability structure from virtual device */ + addrlo = pci_vdev_read_cfg_u32(vdev, capoff + PCIR_MSI_ADDR); + if (msgctrl & PCIM_MSICTRL_64BIT) { + msgdata = pci_vdev_read_cfg_u16(vdev, capoff + PCIR_MSI_DATA_64BIT); + addrhi = pci_vdev_read_cfg_u32(vdev, capoff + PCIR_MSI_ADDR_HIGH); + } else { + msgdata = pci_vdev_read_cfg_u16(vdev, capoff + PCIR_MSI_DATA); + addrhi = 0U; + } + + info.is_msix = 0; + info.vmsi_addr = (uint64_t)addrlo | ((uint64_t)addrhi << 32U); + + /* MSI is being enabled or disabled */ + if (enable) { + info.vmsi_data = msgdata; + } else { + info.vmsi_data = 0U; + } + + ret = ptdev_msix_remap(vm, vdev->vbdf.value, 0U, &info); + if (ret != 0) { + return ret; + } + + /* Update MSI Capability structure to physical device */ + pci_pdev_write_cfg(pbdf, capoff + PCIR_MSI_ADDR, 0x4U, (uint32_t)info.pmsi_addr); + if (msgctrl & PCIM_MSICTRL_64BIT) { + pci_pdev_write_cfg(pbdf, capoff + PCIR_MSI_ADDR_HIGH, 0x4U, (uint32_t)(info.pmsi_addr >> 32U)); + pci_pdev_write_cfg(pbdf, capoff + PCIR_MSI_DATA_64BIT, 0x2U, (uint16_t)info.pmsi_data); + } else { + pci_pdev_write_cfg(pbdf, capoff + PCIR_MSI_DATA, 0x2U, (uint16_t)info.pmsi_data); + } + + /* If MSI Enable is being set, make sure INTxDIS bit is set */ + if (enable) { + enable_disable_pci_intx(pbdf, false); + pci_pdev_write_cfg(pbdf, capoff + PCIR_MSI_CTRL, 2U, msgctrl | PCIM_MSICTRL_MSI_ENABLE); + } + + return ret; +} + +static int vmsi_cfgread(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t *val) +{ + /* For PIO access, we emulate Capability Structures only */ + if (msicap_access(vdev, offset)) { + *val = pci_vdev_read_cfg(vdev, offset, bytes); + return 0; + } + + return -ENODEV; +} + +static int vmsi_cfgwrite(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val) +{ + bool message_changed = false; + bool enable; + uint32_t msgctrl; + + /* Writing MSI Capability Structure */ + if (msicap_access(vdev, offset)) { + + /* Save msgctrl for comparison */ + msgctrl = pci_vdev_read_cfg(vdev, vdev->msi.capoff + PCIR_MSI_CTRL, 2U); + + /* Either Message Data or message Addr is being changed */ + if (((offset - vdev->msi.capoff) >= PCIR_MSI_ADDR) && (val != pci_vdev_read_cfg(vdev, offset, bytes))) { + message_changed = true; + } + + /* Write to vdev */ + pci_vdev_write_cfg(vdev, offset, bytes, val); + + /* Do remap if MSI Enable bit is being changed */ + if (((offset - vdev->msi.capoff) == PCIR_MSI_CTRL) && ((msgctrl ^ val) & PCIM_MSICTRL_MSI_ENABLE)) { + enable = ((val & PCIM_MSICTRL_MSI_ENABLE) != 0U); + (void)vmsi_remap(vdev, enable); + } else { + if (message_changed && ((msgctrl & PCIM_MSICTRL_MSI_ENABLE) != 0U)) { + (void)vmsi_remap(vdev, true); + } + } + + return 0; + } + + return -ENODEV; +} + +void populate_msi_struct(struct pci_vdev *vdev) +{ + uint8_t ptr, cap; + uint32_t msgctrl; + uint32_t len, bytes, offset, val; + union pci_bdf pbdf = vdev->pdev.bdf; + + /* Has new Capabilities list? */ + if ((pci_pdev_read_cfg(pbdf, PCIR_STATUS, 2U) & PCIM_STATUS_CAPPRESENT) == 0U) { + return; + } + + ptr = (uint8_t)pci_pdev_read_cfg(pbdf, PCIR_CAP_PTR, 1U); + while ((ptr != 0U) && (ptr != 0xFFU)) { + cap = (uint8_t)pci_pdev_read_cfg(pbdf, ptr + PCICAP_ID, 1U); + + /* Ignore all other Capability IDs for now */ + if ((cap == PCIY_MSI) || (cap == PCIY_MSIX)) { + offset = ptr; + if (cap == PCIY_MSI) { + vdev->msi.capoff = offset; + msgctrl = pci_pdev_read_cfg(pbdf, offset + PCIR_MSI_CTRL, 2U); + + /* + * Ignore the 'mask' and 'pending' bits in the MSI capability + * (msgctrl & PCIM_MSICTRL_VECTOR). + * We'll let the guest manipulate them directly. + */ + len = (msgctrl & PCIM_MSICTRL_64BIT) ? 14U : 10U; + vdev->msi.caplen = len; + + /* Assign MSI handler for configuration read and write */ + add_vdev_handler(vdev, &pci_ops_vdev_msi); + } else { + vdev->msix.capoff = offset; + vdev->msix.caplen = MSIX_CAPLEN; + len = vdev->msix.caplen; + + /* Assign MSI-X handler for configuration read and write */ + add_vdev_handler(vdev, &pci_ops_vdev_msix); + } + + /* Copy MSI/MSI-X capability struct into virtual device */ + while (len > 0U) { + bytes = (len >= 4U) ? 4U : len; + val = pci_pdev_read_cfg(pbdf, offset, bytes); + + if ((cap == PCIY_MSI) && (offset == vdev->msi.capoff)) { + /* + * Don't support multiple vector for now, + * Force Multiple Message Enable and Multiple Message + * Capable to 0 + */ + val &= ~((uint32_t)PCIM_MSICTRL_MMC_MASK << 16U); + val &= ~((uint32_t)PCIM_MSICTRL_MME_MASK << 16U); + } + + pci_vdev_write_cfg(vdev, offset, bytes, val); + len -= bytes; + offset += bytes; + } + } + + ptr = (uint8_t)pci_pdev_read_cfg(pbdf, ptr + PCICAP_NEXTPTR, 1U); + } +} + +static int vmsi_deinit(struct pci_vdev *vdev) +{ + if (vdev->msi.capoff != 0U) { + ptdev_remove_msix_remapping(vdev->vpci->vm, vdev->vbdf.value, 1); + } + + return 0; +} + +struct pci_vdev_ops pci_ops_vdev_msi = { + .init = NULL, + .deinit = vmsi_deinit, + .cfgwrite = vmsi_cfgwrite, + .cfgread = vmsi_cfgread, +}; diff --git a/hypervisor/dm/vpci/pci_priv.h b/hypervisor/dm/vpci/pci_priv.h index 030eff203..9fb000620 100644 --- a/hypervisor/dm/vpci/pci_priv.h +++ b/hypervisor/dm/vpci/pci_priv.h @@ -32,6 +32,11 @@ #include +static inline bool in_range(uint32_t value, uint32_t lower, uint32_t len) +{ + return ((value >= lower) && (value < (lower + len))); +} + static inline uint8_t pci_vdev_read_cfg_u8(struct pci_vdev *vdev, uint32_t offset) { @@ -69,8 +74,11 @@ pci_vdev_write_cfg_u32(struct pci_vdev *vdev, uint32_t offset, uint32_t val) } extern struct vpci_ops partition_mode_vpci_ops; +extern struct pci_vdev_ops pci_ops_vdev_msi; uint32_t pci_vdev_read_cfg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes); void pci_vdev_write_cfg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val); +void populate_msi_struct(struct pci_vdev *vdev); + #endif /* PCI_PRIV_H_ */ diff --git a/hypervisor/include/dm/pci.h b/hypervisor/include/dm/pci.h index 44b189e5b..cc1658f92 100644 --- a/hypervisor/include/dm/pci.h +++ b/hypervisor/include/dm/pci.h @@ -161,6 +161,7 @@ static inline bool pci_bar_access(uint32_t offset) uint32_t pci_pdev_read_cfg(union pci_bdf bdf, uint32_t offset, uint32_t bytes); void pci_pdev_write_cfg(union pci_bdf bdf, uint32_t offset, uint32_t bytes, uint32_t val); +void enable_disable_pci_intx(union pci_bdf bdf, bool enable); void pci_scan_bus(pci_enumeration_cb cb, void *data); diff --git a/hypervisor/include/dm/vpci.h b/hypervisor/include/dm/vpci.h index ba7e105db..3cace9393 100644 --- a/hypervisor/include/dm/vpci.h +++ b/hypervisor/include/dm/vpci.h @@ -59,6 +59,12 @@ struct pci_pdev { union pci_bdf bdf; }; +/* MSI capability structure */ +struct msi { + uint32_t capoff; + uint32_t caplen; +}; + struct pci_vdev { struct pci_vdev_ops *ops; struct vpci *vpci; @@ -71,6 +77,10 @@ struct pci_vdev { /* The bar info of the virtual PCI device. */ struct pci_bar bar[PCI_BAR_COUNT]; + +#ifndef CONFIG_PARTITION_MODE + struct msi msi; +#endif }; struct pci_addr_info {