From 523184972b282cd9ca17a76f6ca4742394856818 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 2 Oct 2017 12:39:09 -0600 Subject: vfio/pci: Virtualize Maximum Payload Size With virtual PCI-Express chipsets, we now see userspace/guest drivers trying to match the physical MPS setting to a virtual downstream port. Of course a lone physical device surrounded by virtual interconnects cannot make a correct decision for a proper MPS setting. Instead, let's virtualize the MPS control register so that writes through to hardware are disallowed. Userspace drivers like QEMU assume they can write anything to the device and we'll filter out anything dangerous. Since mismatched MPS can lead to AER and other faults, let's add it to the kernel side rather than relying on userspace virtualization to handle it. Signed-off-by: Alex Williamson Reviewed-by: Eric Auger --- drivers/vfio/pci/vfio_pci_config.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 5628fe1..91335e6 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -849,11 +849,13 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) /* * Allow writes to device control fields, except devctl_phantom, - * which could confuse IOMMU, and the ARI bit in devctl2, which + * which could confuse IOMMU, MPS, which can break communication + * with other physical devices, and the ARI bit in devctl2, which * is set at probe time. FLR gets virtualized via our writefn. */ p_setw(perm, PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM); + PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD, + ~PCI_EXP_DEVCTL_PHANTOM); p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); return 0; } -- cgit v1.1 From cf0d53ba4947aad6e471491d5b20a567cbe92e56 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 2 Oct 2017 12:39:10 -0600 Subject: vfio/pci: Virtualize Maximum Read Request Size MRRS defines the maximum read request size a device is allowed to make. Drivers will often increase this to allow more data transfer with a single request. Completions to this request are bound by the MPS setting for the bus. Aside from device quirks (none known), it doesn't seem to make sense to set an MRRS value less than MPS, yet this is a likely scenario given that user drivers do not have a system-wide view of the PCI topology. Virtualize MRRS such that the user can set MRRS >= MPS, but use MPS as the floor value that we'll write to hardware. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 91335e6..115a36f 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -808,6 +808,7 @@ static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos, { __le16 *ctrl = (__le16 *)(vdev->vconfig + pos - offset + PCI_EXP_DEVCTL); + int readrq = le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ; count = vfio_default_config_write(vdev, pos, count, perm, offset, val); if (count < 0) @@ -833,6 +834,27 @@ static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos, pci_try_reset_function(vdev->pdev); } + /* + * MPS is virtualized to the user, writes do not change the physical + * register since determining a proper MPS value requires a system wide + * device view. The MRRS is largely independent of MPS, but since the + * user does not have that system-wide view, they might set a safe, but + * inefficiently low value. Here we allow writes through to hardware, + * but we set the floor to the physical device MPS setting, so that + * we can at least use full TLPs, as defined by the MPS value. + * + * NB, if any devices actually depend on an artificially low MRRS + * setting, this will need to be revisited, perhaps with a quirk + * though pcie_set_readrq(). + */ + if (readrq != (le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ)) { + readrq = 128 << + ((le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ) >> 12); + readrq = max(readrq, pcie_get_mps(vdev->pdev)); + + pcie_set_readrq(vdev->pdev, readrq); + } + return count; } @@ -851,11 +873,12 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) * Allow writes to device control fields, except devctl_phantom, * which could confuse IOMMU, MPS, which can break communication * with other physical devices, and the ARI bit in devctl2, which - * is set at probe time. FLR gets virtualized via our writefn. + * is set at probe time. FLR and MRRS get virtualized via our + * writefn. */ p_setw(perm, PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD, - ~PCI_EXP_DEVCTL_PHANTOM); + PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD | + PCI_EXP_DEVCTL_READRQ, ~PCI_EXP_DEVCTL_PHANTOM); p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); return 0; } -- cgit v1.1 From 5c2fefd882d3666f11447e3e0fe663a8a7f64cc9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 2 Oct 2017 12:39:11 -0600 Subject: vfio/spapr: Add cond_resched() for huge updates Clearing very big IOMMU tables can trigger soft lockups. This adds cond_resched() to allow the scheduler to do context switching when it decides to. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_spapr_tce.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 63112c3..759a5bd 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -507,6 +507,8 @@ static int tce_iommu_clear(struct tce_container *container, enum dma_data_direction direction; for ( ; pages; --pages, ++entry) { + cond_resched(); + direction = DMA_NONE; oldhpa = 0; ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction); -- cgit v1.1 From 71a7d3d78e3ca51ea688ae88c389867d948377cd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 20 Oct 2017 11:41:56 -0600 Subject: vfio/type1: silence integer overflow warning I get a static checker warning about the potential integer overflow if we add "unmap->iova + unmap->size". The integer overflow isn't really harmful, but we may as well fix it. Also unmap->size gets truncated to size_t when we pass it to vfio_find_dma() so we could check for too high values of that as well. Signed-off-by: Dan Carpenter Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 92155cc..e30e29a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -767,6 +767,9 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, return -EINVAL; if (!unmap->size || unmap->size & mask) return -EINVAL; + if (unmap->iova + unmap->size < unmap->iova || + unmap->size > SIZE_MAX) + return -EINVAL; WARN_ON(mask & PAGE_MASK); again: -- cgit v1.1 From e25883411e80f314f0f76c8eeacd1b47d7e4805b Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 20 Oct 2017 11:43:24 -0600 Subject: vfio: platform: reset: Add Broadcom FlexRM reset module This patch adds Broadcom FlexRM low-level reset for VFIO platform. It will do the following: 1. Disable/Deactivate each FlexRM ring 2. Flush each FlexRM ring The cleanup sequence for FlexRM rings is adapted from Broadcom FlexRM mailbox driver. Signed-off-by: Anup Patel Reviewed-by: Oza Oza Reviewed-by: Scott Branden Reviewed-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/platform/reset/Kconfig | 9 ++ drivers/vfio/platform/reset/Makefile | 1 + .../vfio/platform/reset/vfio_platform_bcmflexrm.c | 113 +++++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c (limited to 'drivers/vfio') diff --git a/drivers/vfio/platform/reset/Kconfig b/drivers/vfio/platform/reset/Kconfig index 70cccc5..392e3c0 100644 --- a/drivers/vfio/platform/reset/Kconfig +++ b/drivers/vfio/platform/reset/Kconfig @@ -13,3 +13,12 @@ config VFIO_PLATFORM_AMDXGBE_RESET Enables the VFIO platform driver to handle reset for AMD XGBE If you don't know what to do here, say N. + +config VFIO_PLATFORM_BCMFLEXRM_RESET + tristate "VFIO support for Broadcom FlexRM reset" + depends on VFIO_PLATFORM && (ARCH_BCM_IPROC || COMPILE_TEST) + default ARCH_BCM_IPROC + help + Enables the VFIO platform driver to handle reset for Broadcom FlexRM + + If you don't know what to do here, say N. diff --git a/drivers/vfio/platform/reset/Makefile b/drivers/vfio/platform/reset/Makefile index 93f4e23..8d9874b 100644 --- a/drivers/vfio/platform/reset/Makefile +++ b/drivers/vfio/platform/reset/Makefile @@ -5,3 +5,4 @@ ccflags-y += -Idrivers/vfio/platform obj-$(CONFIG_VFIO_PLATFORM_CALXEDAXGMAC_RESET) += vfio-platform-calxedaxgmac.o obj-$(CONFIG_VFIO_PLATFORM_AMDXGBE_RESET) += vfio-platform-amdxgbe.o +obj-$(CONFIG_VFIO_PLATFORM_BCMFLEXRM_RESET) += vfio_platform_bcmflexrm.o diff --git a/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c b/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c new file mode 100644 index 0000000..d45c3be --- /dev/null +++ b/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2017 Broadcom + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * This driver provides reset support for Broadcom FlexRM ring manager + * to VFIO platform. + */ + +#include +#include +#include +#include +#include +#include + +#include "vfio_platform_private.h" + +/* FlexRM configuration */ +#define RING_REGS_SIZE 0x10000 +#define RING_VER_MAGIC 0x76303031 + +/* Per-Ring register offsets */ +#define RING_VER 0x000 +#define RING_CONTROL 0x034 +#define RING_FLUSH_DONE 0x038 + +/* Register RING_CONTROL fields */ +#define CONTROL_FLUSH_SHIFT 5 + +/* Register RING_FLUSH_DONE fields */ +#define FLUSH_DONE_MASK 0x1 + +static int vfio_platform_bcmflexrm_shutdown(void __iomem *ring) +{ + unsigned int timeout; + + /* Disable/inactivate ring */ + writel_relaxed(0x0, ring + RING_CONTROL); + + /* Set ring flush state */ + timeout = 1000; /* timeout of 1s */ + writel_relaxed(BIT(CONTROL_FLUSH_SHIFT), ring + RING_CONTROL); + do { + if (readl_relaxed(ring + RING_FLUSH_DONE) & + FLUSH_DONE_MASK) + break; + mdelay(1); + } while (--timeout); + if (!timeout) + return -ETIMEDOUT; + + /* Clear ring flush state */ + timeout = 1000; /* timeout of 1s */ + writel_relaxed(0x0, ring + RING_CONTROL); + do { + if (!(readl_relaxed(ring + RING_FLUSH_DONE) & + FLUSH_DONE_MASK)) + break; + mdelay(1); + } while (--timeout); + if (!timeout) + return -ETIMEDOUT; + + return 0; +} + +static int vfio_platform_bcmflexrm_reset(struct vfio_platform_device *vdev) +{ + void __iomem *ring; + int rc = 0, ret = 0, ring_num = 0; + struct vfio_platform_region *reg = &vdev->regions[0]; + + /* Map FlexRM ring registers if not mapped */ + if (!reg->ioaddr) { + reg->ioaddr = ioremap_nocache(reg->addr, reg->size); + if (!reg->ioaddr) + return -ENOMEM; + } + + /* Discover and shutdown each FlexRM ring */ + for (ring = reg->ioaddr; + ring < (reg->ioaddr + reg->size); ring += RING_REGS_SIZE) { + if (readl_relaxed(ring + RING_VER) == RING_VER_MAGIC) { + rc = vfio_platform_bcmflexrm_shutdown(ring); + if (rc) { + dev_warn(vdev->device, + "FlexRM ring%d shutdown error %d\n", + ring_num, rc); + ret |= rc; + } + ring_num++; + } + } + + return ret; +} + +module_vfio_reset_handler("brcm,iproc-flexrm-mbox", + vfio_platform_bcmflexrm_reset); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Anup Patel "); +MODULE_DESCRIPTION("Reset support for Broadcom FlexRM VFIO platform device"); -- cgit v1.1