summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/powernv/pci-ioda.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv/pci-ioda.c')
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c102
1 files changed, 53 insertions, 49 deletions
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index bcb3bfc..7ecc186 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -25,6 +25,7 @@
#include <linux/iommu.h>
#include <linux/rculist.h>
#include <linux/sizes.h>
+#include <linux/vmalloc.h>
#include <asm/sections.h>
#include <asm/io.h>
@@ -1088,6 +1089,9 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
pe->pbus = NULL;
pe->mve_number = -1;
pe->rid = dev->bus->number << 8 | pdn->devfn;
+ pe->tces = NULL;
+ pe->tce_tracker = NULL;
+ pe->tce_bitmap = NULL;
pe_info(pe, "Associated device to PE\n");
@@ -1569,6 +1573,9 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
pe->mve_number = -1;
pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
pci_iov_virtfn_devfn(pdev, vf_index);
+ pe->tces = NULL;
+ pe->tce_tracker = NULL;
+ pe->tce_bitmap = NULL;
pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
hose->global_number, pdev->bus->number,
@@ -1774,43 +1781,40 @@ static bool pnv_pci_ioda_pe_single_vendor(struct pnv_ioda_pe *pe)
return true;
}
-/*
- * Reconfigure TVE#0 to be usable as 64-bit DMA space.
- *
- * The first 4GB of virtual memory for a PE is reserved for 32-bit accesses.
- * Devices can only access more than that if bit 59 of the PCI address is set
- * by hardware, which indicates TVE#1 should be used instead of TVE#0.
- * Many PCI devices are not capable of addressing that many bits, and as a
- * result are limited to the 4GB of virtual memory made available to 32-bit
- * devices in TVE#0.
- *
- * In order to work around this, reconfigure TVE#0 to be suitable for 64-bit
- * devices by configuring the virtual memory past the first 4GB inaccessible
- * by 64-bit DMAs. This should only be used by devices that want more than
- * 4GB, and only on PEs that have no 32-bit devices.
- *
- * Currently this will only work on PHB3 (POWER8).
- */
-static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
+static int pnv_pci_pseudo_bypass_setup(struct pnv_ioda_pe *pe)
{
- u64 window_size, table_size, tce_count, addr;
+ u64 tce_count, table_size, window_size;
+ struct pnv_phb *p = pe->phb;
struct page *table_pages;
- u64 tce_order = 28; /* 256MB TCEs */
__be64 *tces;
- s64 rc;
+ int rc = -ENOMEM;
+ int bitmap_size, tracker_entries;
+
+ /*
+ * XXX These are factors for scaling the size of the TCE table, and
+ * the table that tracks these allocations. These should eventually
+ * be kernel command line options with defaults above 1, for situations
+ * where your memory expands after the machine has booted.
+ */
+ int tce_size_factor = 1;
+ int tracking_table_factor = 1;
/*
- * Window size needs to be a power of two, but needs to account for
- * shifting memory by the 4GB offset required to skip 32bit space.
+ * The window size covers all of memory (and optionally more), with
+ * enough tracker entries to cover them all being allocated. So we
+ * create enough TCEs to cover all of memory at once.
*/
- window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32));
- tce_count = window_size >> tce_order;
+ window_size = roundup_pow_of_two(tce_size_factor * memory_hotplug_max());
+ tracker_entries = (tracking_table_factor * memory_hotplug_max()) >>
+ p->ioda.max_tce_order;
+ tce_count = window_size >> p->ioda.max_tce_order;
+ bitmap_size = BITS_TO_LONGS(tce_count) * sizeof(unsigned long);
table_size = tce_count << 3;
if (table_size < PAGE_SIZE)
table_size = PAGE_SIZE;
- table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL,
+ table_pages = alloc_pages_node(p->hose->node, GFP_KERNEL,
get_order(table_size));
if (!table_pages)
goto err;
@@ -1821,26 +1825,33 @@ static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
memset(tces, 0, table_size);
- for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) {
- tces[(addr + (1ULL << 32)) >> tce_order] =
- cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE);
- }
+ pe->tces = tces;
+ pe->tce_count = tce_count;
+ pe->tce_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+ /* The tracking table has two u64s per TCE */
+ pe->tce_tracker = vzalloc(sizeof(u64) * 2 * tracker_entries);
+ spin_lock_init(&pe->tce_alloc_lock);
+
+ /* mark the first 4GB as reserved so this can still be used for 32bit */
+ bitmap_set(pe->tce_bitmap, 0, 1ULL << (32 - p->ioda.max_tce_order));
+
+ pe_info(pe, "pseudo-bypass sizes: tracker %d bitmap %d TCEs %lld\n",
+ tracker_entries, bitmap_size, tce_count);
rc = opal_pci_map_pe_dma_window(pe->phb->opal_id,
pe->pe_number,
- /* reconfigure window 0 */
(pe->pe_number << 1) + 0,
1,
__pa(tces),
table_size,
- 1 << tce_order);
+ 1 << p->ioda.max_tce_order);
if (rc == OPAL_SUCCESS) {
- pe_info(pe, "Using 64-bit DMA iommu bypass (through TVE#0)\n");
+ pe_info(pe, "TCE tables configured for pseudo-bypass\n");
return 0;
}
err:
- pe_err(pe, "Error configuring 64-bit DMA bypass\n");
- return -EIO;
+ pe_err(pe, "error configuring pseudo-bypass\n");
+ return rc;
}
static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
@@ -1851,7 +1862,6 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
struct pnv_ioda_pe *pe;
uint64_t top;
bool bypass = false;
- s64 rc;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return -ENODEV;
@@ -1868,21 +1878,15 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
} else {
/*
* If the device can't set the TCE bypass bit but still wants
- * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
- * bypass the 32-bit region and be usable for 64-bit DMAs.
- * The device needs to be able to address all of this space.
+ * to access 4GB or more, we need to use a different set of DMA
+ * operations with an indirect mapping.
*/
if (dma_mask >> 32 &&
- dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
- pnv_pci_ioda_pe_single_vendor(pe) &&
- phb->model == PNV_PHB_MODEL_PHB3) {
- /* Configure the bypass mode */
- rc = pnv_pci_ioda_dma_64bit_bypass(pe);
- if (rc)
- return rc;
- /* 4GB offset bypasses 32-bit space */
- set_dma_offset(&pdev->dev, (1ULL << 32));
- set_dma_ops(&pdev->dev, &dma_nommu_ops);
+ phb->model != PNV_PHB_MODEL_P7IOC &&
+ pnv_pci_ioda_pe_single_vendor(pe)) {
+ if (!pe->tces)
+ pnv_pci_pseudo_bypass_setup(pe);
+ set_dma_ops(&pdev->dev, &dma_pseudo_bypass_ops);
} else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) {
/*
* Fail the request if a DMA mask between 32 and 64 bits
OpenPOWER on IntegriCloud