summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-20 10:12:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-20 10:12:41 -0700
commitc04a5880299eab3da8c10547db96ea9cdffd44a6 (patch)
tree8708b60e410780ce8ea2074335033b016cab4c4f /arch/powerpc/platforms
parenta1c28b75a95808161cacbb3531c418abe248994e (diff)
parent138a076496e61c68ebc1dcccc088705826bbe26d (diff)
downloadop-kernel-dev-c04a5880299eab3da8c10547db96ea9cdffd44a6.zip
op-kernel-dev-c04a5880299eab3da8c10547db96ea9cdffd44a6.tar.gz
Merge tag 'powerpc-4.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Highlights: - Support for Power ISA 3.0 (Power9) Radix Tree MMU from Aneesh Kumar K.V - Live patching support for ppc64le (also merged via livepatching.git) Various cleanups & minor fixes from: - Aaro Koskinen, Alexey Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Chris Smart, Daniel Axtens, Frederic Barrat, Gavin Shan, Ian Munsie, Lennart Sorensen, Madhavan Srinivasan, Mahesh Salgaonkar, Markus Elfring, Michael Ellerman, Oliver O'Halloran, Paul Gortmaker, Paul Mackerras, Rashmica Gupta, Russell Currey, Suraj Jitindar Singh, Thiago Jung Bauermann, Valentin Rothberg, Vipin K Parashar. General: - Update LMB associativity index during DLPAR add/remove from Nathan Fontenot - Fix branching to OOL handlers in relocatable kernel from Hari Bathini - Add support for userspace Power9 copy/paste from Chris Smart - Always use STRICT_MM_TYPECHECKS from Michael Ellerman - Add mask of possible MMU features from Michael Ellerman PCI: - Enable pass through of NVLink to guests from Alexey Kardashevskiy - Cleanups in preparation for powernv PCI hotplug from Gavin Shan - Don't report error in eeh_pe_reset_and_recover() from Gavin Shan - Restore initial state in eeh_pe_reset_and_recover() from Gavin Shan - Revert "powerpc/eeh: Fix crash in eeh_add_device_early() on Cell" from Guilherme G Piccoli - Remove the dependency on EEH struct in DDW mechanism from Guilherme G Piccoli selftests: - Test cp_abort during context switch from Chris Smart - Add several tests for transactional memory support from Rashmica Gupta perf: - Add support for sampling interrupt register state from Anju T - Add support for unwinding perf-stackdump from Chandan Kumar cxl: - Configure the PSL for two CAPI ports on POWER8NVL from Philippe Bergheaud - Allow initialization on timebase sync failures from Frederic Barrat - Increase timeout for detection of AFU mmio hang from Frederic Barrat - Handle num_of_processes larger than can fit in the SPA from Ian Munsie - Ensure PSL interrupt is configured for contexts with no AFU IRQs from Ian Munsie - Add kernel API to allow a context to operate with relocate disabled from Ian Munsie - Check periodically the coherent platform function's state from Christophe Lombard Freescale: - Updates from Scott: "Contains 86xx fixes, minor device tree fixes, an erratum workaround, and a kconfig dependency fix." * tag 'powerpc-4.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (192 commits) powerpc/86xx: Fix PCI interrupt map definition powerpc/86xx: Move pci1 definition to the include file powerpc/fsl: Fix build of the dtb embedded kernel images powerpc/fsl: Fix rcpm compatible string powerpc/fsl: Remove FSL_SOC dependency from FSL_LBC powerpc/fsl-pci: Add a workaround for PCI 5 errata powerpc/fsl: Fix SPI compatible on t208xrdb and t1040rdb powerpc/powernv/npu: Add PE to PHB's list powerpc/powernv: Fix insufficient memory allocation powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism Revert "powerpc/eeh: Fix crash in eeh_add_device_early() on Cell" powerpc/eeh: Drop unnecessary label in eeh_pe_change_owner() powerpc/eeh: Ignore handlers in eeh_pe_reset_and_recover() powerpc/eeh: Restore initial state in eeh_pe_reset_and_recover() powerpc/eeh: Don't report error in eeh_pe_reset_and_recover() Revert "powerpc/powernv: Exclude root bus in pnv_pci_reset_secondary_bus()" powerpc/powernv/npu: Enable NVLink pass through powerpc/powernv/npu: Rework TCE Kill handling powerpc/powernv/npu: Add set/unset window helpers powerpc/powernv/ioda2: Export debug helper pe_level_printk() ...
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype11
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c4
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c69
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c283
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c8
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c953
-rw-r--r--arch/powerpc/platforms/powernv/pci.c19
-rw-r--r--arch/powerpc/platforms/powernv/pci.h72
-rw-r--r--arch/powerpc/platforms/powernv/setup.c5
-rw-r--r--arch/powerpc/platforms/ps3/htab.c2
-rw-r--r--arch/powerpc/platforms/ps3/spu.c4
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c225
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c24
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c20
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c3
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c4
-rw-r--r--arch/powerpc/platforms/pseries/msi.c4
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c32
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c5
-rw-r--r--arch/powerpc/platforms/pseries/setup.c4
21 files changed, 1031 insertions, 729 deletions
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 142dff5..77e9b8d 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -72,7 +72,7 @@ config PPC_BOOK3S_64
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
select SYS_SUPPORTS_HUGETLBFS
- select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_NUMA_BALANCING
select IRQ_WORK
@@ -331,6 +331,15 @@ config PPC_STD_MMU_64
def_bool y
depends on PPC_STD_MMU && PPC64
+config PPC_RADIX_MMU
+ bool "Radix MMU Support"
+ depends on PPC_BOOK3S_64
+ default y
+ help
+ Enable support for the Power ISA 3.0 Radix style MMU. Currently this
+ is only implemented by IBM Power9 CPUs, if you don't have one of them
+ you can probably disable this.
+
config PPC_MMU_NOHASH
def_bool y
depends on !PPC_STD_MMU
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index f7af74f..3cbe38f 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -24,7 +24,7 @@
#include <linux/interrupt.h>
#include <linux/list.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/wait.h>
@@ -197,7 +197,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
(REGION_ID(ea) != USER_REGION_ID)) {
spin_unlock(&spu->register_lock);
- ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr);
+ ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr);
spin_lock(&spu->register_lock);
if (!ret) {
@@ -805,7 +805,4 @@ static int __init init_spu_base(void)
out:
return ret;
}
-module_init(init_spu_base);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
+device_initcall(init_spu_base);
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index d98f845..e29e4d5 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -141,8 +141,8 @@ int spufs_handle_class1(struct spu_context *ctx)
/* we must not hold the lock when entering copro_handle_mm_fault */
spu_release(ctx);
- access = (_PAGE_PRESENT | _PAGE_USER);
- access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+ access = (_PAGE_PRESENT | _PAGE_READ);
+ access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
local_irq_save(flags);
ret = hash_page(ea, access, 0x300, dsisr);
local_irq_restore(flags);
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 950b3e5..9226df1 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -75,7 +75,7 @@ static int pnv_eeh_init(void)
* and P7IOC separately. So we should regard
* PE#0 as valid for PHB3 and P7IOC.
*/
- if (phb->ioda.reserved_pe != 0)
+ if (phb->ioda.reserved_pe_idx != 0)
eeh_add_flag(EEH_VALID_PE_ZERO);
break;
@@ -1009,8 +1009,9 @@ static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)
static int pnv_eeh_reset(struct eeh_pe *pe, int option)
{
struct pci_controller *hose = pe->phb;
+ struct pnv_phb *phb;
struct pci_bus *bus;
- int ret;
+ int64_t rc;
/*
* For PHB reset, we always have complete reset. For those PEs whose
@@ -1026,45 +1027,39 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
* reset. The side effect is that EEH core has to clear the frozen
* state explicitly after BAR restore.
*/
- if (pe->type & EEH_PE_PHB) {
- ret = pnv_eeh_phb_reset(hose, option);
- } else {
- struct pnv_phb *phb;
- s64 rc;
+ if (pe->type & EEH_PE_PHB)
+ return pnv_eeh_phb_reset(hose, option);
- /*
- * The frozen PE might be caused by PAPR error injection
- * registers, which are expected to be cleared after hitting
- * frozen PE as stated in the hardware spec. Unfortunately,
- * that's not true on P7IOC. So we have to clear it manually
- * to avoid recursive EEH errors during recovery.
- */
- phb = hose->private_data;
- if (phb->model == PNV_PHB_MODEL_P7IOC &&
- (option == EEH_RESET_HOT ||
- option == EEH_RESET_FUNDAMENTAL)) {
- rc = opal_pci_reset(phb->opal_id,
- OPAL_RESET_PHB_ERROR,
- OPAL_ASSERT_RESET);
- if (rc != OPAL_SUCCESS) {
- pr_warn("%s: Failure %lld clearing "
- "error injection registers\n",
- __func__, rc);
- return -EIO;
- }
+ /*
+ * The frozen PE might be caused by PAPR error injection
+ * registers, which are expected to be cleared after hitting
+ * frozen PE as stated in the hardware spec. Unfortunately,
+ * that's not true on P7IOC. So we have to clear it manually
+ * to avoid recursive EEH errors during recovery.
+ */
+ phb = hose->private_data;
+ if (phb->model == PNV_PHB_MODEL_P7IOC &&
+ (option == EEH_RESET_HOT ||
+ option == EEH_RESET_FUNDAMENTAL)) {
+ rc = opal_pci_reset(phb->opal_id,
+ OPAL_RESET_PHB_ERROR,
+ OPAL_ASSERT_RESET);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld clearing error injection registers\n",
+ __func__, rc);
+ return -EIO;
}
-
- bus = eeh_pe_bus_get(pe);
- if (pe->type & EEH_PE_VF)
- ret = pnv_eeh_reset_vf_pe(pe, option);
- else if (pci_is_root_bus(bus) ||
- pci_is_root_bus(bus->parent))
- ret = pnv_eeh_root_reset(hose, option);
- else
- ret = pnv_eeh_bridge_reset(bus->self, option);
}
- return ret;
+ bus = eeh_pe_bus_get(pe);
+ if (pe->type & EEH_PE_VF)
+ return pnv_eeh_reset_vf_pe(pe, option);
+
+ if (pci_is_root_bus(bus) ||
+ pci_is_root_bus(bus->parent))
+ return pnv_eeh_root_reset(hose, option);
+
+ return pnv_eeh_bridge_reset(bus->self, option);
}
/**
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 7229acd..0459e10 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -12,6 +12,7 @@
#include <linux/export.h>
#include <linux/pci.h>
#include <linux/memblock.h>
+#include <linux/iommu.h>
#include <asm/iommu.h>
#include <asm/pnv-pci.h>
@@ -25,8 +26,6 @@
* Other types of TCE cache invalidation are not functional in the
* hardware.
*/
-#define TCE_KILL_INVAL_ALL PPC_BIT(0)
-
static struct pci_dev *get_pci_dev(struct device_node *dn)
{
return PCI_DN(dn)->pcidev;
@@ -138,22 +137,17 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
struct pnv_ioda_pe *pe;
struct pci_dn *pdn;
- if (npe->flags & PNV_IODA_PE_PEER) {
- pe = npe->peers[0];
- pdev = pe->pdev;
- } else {
- pdev = pnv_pci_get_gpu_dev(npe->pdev);
- if (!pdev)
- return NULL;
+ pdev = pnv_pci_get_gpu_dev(npe->pdev);
+ if (!pdev)
+ return NULL;
- pdn = pci_get_pdn(pdev);
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return NULL;
+ pdn = pci_get_pdn(pdev);
+ if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+ return NULL;
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
- pe = &phb->ioda.pe_array[pdn->pe_number];
- }
+ hose = pci_bus_to_host(pdev->bus);
+ phb = hose->private_data;
+ pe = &phb->ioda.pe_array[pdn->pe_number];
if (gpdev)
*gpdev = pdev;
@@ -161,92 +155,70 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
return pe;
}
-void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe)
+long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
+ struct iommu_table *tbl)
{
struct pnv_phb *phb = npe->phb;
+ int64_t rc;
+ const unsigned long size = tbl->it_indirect_levels ?
+ tbl->it_level_size : tbl->it_size;
+ const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
+ const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+
+ pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
+ start_addr, start_addr + win_size - 1,
+ IOMMU_PAGE_SIZE(tbl));
+
+ rc = opal_pci_map_pe_dma_window(phb->opal_id,
+ npe->pe_number,
+ npe->pe_number,
+ tbl->it_indirect_levels + 1,
+ __pa(tbl->it_base),
+ size << 3,
+ IOMMU_PAGE_SIZE(tbl));
+ if (rc) {
+ pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
+ return rc;
+ }
+ pnv_pci_ioda2_tce_invalidate_entire(phb, false);
- if (WARN_ON(phb->type != PNV_PHB_NPU ||
- !phb->ioda.tce_inval_reg ||
- !(npe->flags & PNV_IODA_PE_DEV)))
- return;
+ /* Add the table to the list so its TCE cache will get invalidated */
+ pnv_pci_link_table_and_group(phb->hose->node, num,
+ tbl, &npe->table_group);
- mb(); /* Ensure previous TCE table stores are visible */
- __raw_writeq(cpu_to_be64(TCE_KILL_INVAL_ALL),
- phb->ioda.tce_inval_reg);
+ return 0;
}
-void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
- struct iommu_table *tbl,
- unsigned long index,
- unsigned long npages,
- bool rm)
+long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num)
{
struct pnv_phb *phb = npe->phb;
+ int64_t rc;
- /* We can only invalidate the whole cache on NPU */
- unsigned long val = TCE_KILL_INVAL_ALL;
-
- if (WARN_ON(phb->type != PNV_PHB_NPU ||
- !phb->ioda.tce_inval_reg ||
- !(npe->flags & PNV_IODA_PE_DEV)))
- return;
-
- mb(); /* Ensure previous TCE table stores are visible */
- if (rm)
- __raw_rm_writeq(cpu_to_be64(val),
- (__be64 __iomem *) phb->ioda.tce_inval_reg_phys);
- else
- __raw_writeq(cpu_to_be64(val),
- phb->ioda.tce_inval_reg);
-}
-
-void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe)
-{
- struct pnv_ioda_pe *gpe;
- struct pci_dev *gpdev;
- int i, avail = -1;
-
- if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
- return;
-
- gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (!gpe)
- return;
-
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- /* Nothing to do if the PE is already connected. */
- if (gpe->peers[i] == npe)
- return;
+ pe_info(npe, "Removing DMA window\n");
- if (!gpe->peers[i])
- avail = i;
+ rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
+ npe->pe_number,
+ 0/* levels */, 0/* table address */,
+ 0/* table size */, 0/* page size */);
+ if (rc) {
+ pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
+ return rc;
}
+ pnv_pci_ioda2_tce_invalidate_entire(phb, false);
- if (WARN_ON(avail < 0))
- return;
-
- gpe->peers[avail] = npe;
- gpe->flags |= PNV_IODA_PE_PEER;
+ pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
+ &npe->table_group);
- /*
- * We assume that the NPU devices only have a single peer PE
- * (the GPU PCIe device PE).
- */
- npe->peers[0] = gpe;
- npe->flags |= PNV_IODA_PE_PEER;
+ return 0;
}
/*
- * For the NPU we want to point the TCE table at the same table as the
- * real PCI device.
+ * Enables 32 bit DMA on NPU.
*/
-static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
+static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
{
- struct pnv_phb *phb = npe->phb;
struct pci_dev *gpdev;
struct pnv_ioda_pe *gpe;
- void *addr;
- unsigned int size;
int64_t rc;
/*
@@ -260,14 +232,7 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
if (!gpe)
return;
- addr = (void *)gpe->table_group.tables[0]->it_base;
- size = gpe->table_group.tables[0]->it_size << 3;
- rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
- npe->pe_number, 1, __pa(addr),
- size, 0x1000);
- if (rc != OPAL_SUCCESS)
- pr_warn("%s: Error %lld setting DMA window on PHB#%d-PE#%d\n",
- __func__, rc, phb->hose->global_number, npe->pe_number);
+ rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
/*
* We don't initialise npu_pe->tce32_table as we always use
@@ -277,72 +242,120 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
}
/*
- * Enable/disable bypass mode on the NPU. The NPU only supports one
+ * Enables bypass mode on the NPU. The NPU only supports one
* window per link, so bypass needs to be explicitly enabled or
* disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
* active at the same time.
*/
-int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enable)
+static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
{
struct pnv_phb *phb = npe->phb;
int64_t rc = 0;
+ phys_addr_t top = memblock_end_of_DRAM();
if (phb->type != PNV_PHB_NPU || !npe->pdev)
return -EINVAL;
- if (enable) {
- /* Enable the bypass window */
- phys_addr_t top = memblock_end_of_DRAM();
-
- npe->tce_bypass_base = 0;
- top = roundup_pow_of_two(top);
- dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
- npe->pe_number);
- rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
- npe->pe_number, npe->pe_number,
- npe->tce_bypass_base, top);
- } else {
- /*
- * Disable the bypass window by replacing it with the
- * TCE32 window.
- */
- pnv_npu_disable_bypass(npe);
- }
+ rc = pnv_npu_unset_window(npe, 0);
+ if (rc != OPAL_SUCCESS)
+ return rc;
+
+ /* Enable the bypass window */
+
+ top = roundup_pow_of_two(top);
+ dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
+ npe->pe_number);
+ rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
+ npe->pe_number, npe->pe_number,
+ 0 /* bypass base */, top);
+
+ if (rc == OPAL_SUCCESS)
+ pnv_pci_ioda2_tce_invalidate_entire(phb, false);
return rc;
}
-int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
+void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
{
- struct pci_controller *hose = pci_bus_to_host(npdev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct pci_dn *pdn = pci_get_pdn(npdev);
- struct pnv_ioda_pe *npe, *gpe;
- struct pci_dev *gpdev;
- uint64_t top;
- bool bypass = false;
+ int i;
+ struct pnv_phb *phb;
+ struct pci_dn *pdn;
+ struct pnv_ioda_pe *npe;
+ struct pci_dev *npdev;
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return -ENXIO;
+ for (i = 0; ; ++i) {
+ npdev = pnv_pci_get_npu_dev(gpdev, i);
- /* We only do bypass if it's enabled on the linked device */
- npe = &phb->ioda.pe_array[pdn->pe_number];
- gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (!gpe)
- return -ENODEV;
+ if (!npdev)
+ break;
+
+ pdn = pci_get_pdn(npdev);
+ if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+ return;
+
+ phb = pci_bus_to_host(npdev->bus)->private_data;
+
+ /* We only do bypass if it's enabled on the linked device */
+ npe = &phb->ioda.pe_array[pdn->pe_number];
+
+ if (bypass) {
+ dev_info(&npdev->dev,
+ "Using 64-bit DMA iommu bypass\n");
+ pnv_npu_dma_set_bypass(npe);
+ } else {
+ dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
+ pnv_npu_dma_set_32(npe);
+ }
+ }
+}
- if (gpe->tce_bypass_enabled) {
- top = gpe->tce_bypass_base + memblock_end_of_DRAM() - 1;
- bypass = (dma_mask >= top);
+/* Switch ownership from platform code to external user (e.g. VFIO) */
+void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
+{
+ struct pnv_phb *phb = npe->phb;
+ int64_t rc;
+
+ /*
+ * Note: NPU has just a single TVE in the hardware which means that
+ * while used by the kernel, it can have either 32bit window or
+ * DMA bypass but never both. So we deconfigure 32bit window only
+ * if it was enabled at the moment of ownership change.
+ */
+ if (npe->table_group.tables[0]) {
+ pnv_npu_unset_window(npe, 0);
+ return;
}
- if (bypass)
- dev_info(&npdev->dev, "Using 64-bit DMA iommu bypass\n");
- else
- dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
+ /* Disable bypass */
+ rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
+ npe->pe_number, npe->pe_number,
+ 0 /* bypass base */, 0);
+ if (rc) {
+ pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
+ return;
+ }
+ pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
+}
- pnv_npu_dma_set_bypass(npe, bypass);
- *npdev->dev.dma_mask = dma_mask;
+struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
+{
+ struct pnv_phb *phb = npe->phb;
+ struct pci_bus *pbus = phb->hose->bus;
+ struct pci_dev *npdev, *gpdev = NULL, *gptmp;
+ struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
- return 0;
+ if (!gpe || !gpdev)
+ return NULL;
+
+ list_for_each_entry(npdev, &pbus->devices, bus_list) {
+ gptmp = pnv_pci_get_gpu_dev(npdev);
+
+ if (gptmp != gpdev)
+ continue;
+
+ pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
+ iommu_group_add_device(gpe->table_group.group, &npdev->dev);
+ }
+
+ return gpe;
}
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index d000f4e..c0a8201 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -150,15 +150,17 @@ static void print_nx_checkstop_reason(const char *level,
static void print_checkstop_reason(const char *level,
struct OpalHMIEvent *hmi_evt)
{
- switch (hmi_evt->u.xstop_error.xstop_type) {
+ uint8_t type = hmi_evt->u.xstop_error.xstop_type;
+ switch (type) {
case CHECKSTOP_TYPE_CORE:
print_core_checkstop_reason(level, hmi_evt);
break;
case CHECKSTOP_TYPE_NX:
print_nx_checkstop_reason(level, hmi_evt);
break;
- case CHECKSTOP_TYPE_UNKNOWN:
- printk("%s Unknown Malfunction Alert.\n", level);
+ default:
+ printk("%s Unknown Malfunction Alert of type %d\n",
+ level, type);
break;
}
}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index c5baaf3..3a5ea82 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -48,15 +48,16 @@
#include "powernv.h"
#include "pci.h"
-/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
-#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
+#define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */
+#define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */
+#define PNV_IODA1_DMA32_SEGSIZE 0x10000000
#define POWERNV_IOMMU_DEFAULT_LEVELS 1
#define POWERNV_IOMMU_MAX_LEVELS 5
static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
-static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...)
{
struct va_format vaf;
@@ -87,13 +88,6 @@ static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
va_end(args);
}
-#define pe_err(pe, fmt, ...) \
- pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__)
-#define pe_warn(pe, fmt, ...) \
- pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__)
-#define pe_info(pe, fmt, ...) \
- pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
-
static bool pnv_iommu_bypass_disabled __read_mostly;
static int __init iommu_setup(char *str)
@@ -122,9 +116,17 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
}
+static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
+{
+ phb->ioda.pe_array[pe_no].phb = phb;
+ phb->ioda.pe_array[pe_no].pe_number = pe_no;
+
+ return &phb->ioda.pe_array[pe_no];
+}
+
static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
{
- if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) {
+ if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) {
pr_warn("%s: Invalid PE %d on PHB#%x\n",
__func__, pe_no, phb->hose->global_number);
return;
@@ -134,32 +136,31 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
pr_debug("%s: PE %d was reserved on PHB#%x\n",
__func__, pe_no, phb->hose->global_number);
- phb->ioda.pe_array[pe_no].phb = phb;
- phb->ioda.pe_array[pe_no].pe_number = pe_no;
+ pnv_ioda_init_pe(phb, pe_no);
}
-static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
+static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
{
unsigned long pe;
do {
pe = find_next_zero_bit(phb->ioda.pe_alloc,
- phb->ioda.total_pe, 0);
- if (pe >= phb->ioda.total_pe)
- return IODA_INVALID_PE;
+ phb->ioda.total_pe_num, 0);
+ if (pe >= phb->ioda.total_pe_num)
+ return NULL;
} while(test_and_set_bit(pe, phb->ioda.pe_alloc));
- phb->ioda.pe_array[pe].phb = phb;
- phb->ioda.pe_array[pe].pe_number = pe;
- return pe;
+ return pnv_ioda_init_pe(phb, pe);
}
-static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
+static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
{
- WARN_ON(phb->ioda.pe_array[pe].pdev);
+ struct pnv_phb *phb = pe->phb;
+
+ WARN_ON(pe->pdev);
- memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
- clear_bit(pe, phb->ioda.pe_alloc);
+ memset(pe, 0, sizeof(struct pnv_ioda_pe));
+ clear_bit(pe->pe_number, phb->ioda.pe_alloc);
}
/* The default M64 BAR is shared by all PEs */
@@ -199,13 +200,13 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb)
* expected to be 0 or last one of PE capabicity.
*/
r = &phb->hose->mem_resources[1];
- if (phb->ioda.reserved_pe == 0)
+ if (phb->ioda.reserved_pe_idx == 0)
r->start += phb->ioda.m64_segsize;
- else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1))
+ else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
r->end -= phb->ioda.m64_segsize;
else
pr_warn(" Cannot strip M64 segment for reserved PE#%d\n",
- phb->ioda.reserved_pe);
+ phb->ioda.reserved_pe_idx);
return 0;
@@ -219,7 +220,7 @@ fail:
return -EIO;
}
-static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev,
+static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
unsigned long *pe_bitmap)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
@@ -246,22 +247,80 @@ static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev,
}
}
-static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus,
- unsigned long *pe_bitmap,
- bool all)
+static int pnv_ioda1_init_m64(struct pnv_phb *phb)
+{
+ struct resource *r;
+ int index;
+
+ /*
+ * There are 16 M64 BARs, each of which has 8 segments. So
+ * there are as many M64 segments as the maximum number of
+ * PEs, which is 128.
+ */
+ for (index = 0; index < PNV_IODA1_M64_NUM; index++) {
+ unsigned long base, segsz = phb->ioda.m64_segsize;
+ int64_t rc;
+
+ base = phb->ioda.m64_base +
+ index * PNV_IODA1_M64_SEGS * segsz;
+ rc = opal_pci_set_phb_mem_window(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE, index, base, 0,
+ PNV_IODA1_M64_SEGS * segsz);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn(" Error %lld setting M64 PHB#%d-BAR#%d\n",
+ rc, phb->hose->global_number, index);
+ goto fail;
+ }
+
+ rc = opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE, index,
+ OPAL_ENABLE_M64_SPLIT);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn(" Error %lld enabling M64 PHB#%d-BAR#%d\n",
+ rc, phb->hose->global_number, index);
+ goto fail;
+ }
+ }
+
+ /*
+ * Exclude the segment used by the reserved PE, which
+ * is expected to be 0 or last supported PE#.
+ */
+ r = &phb->hose->mem_resources[1];
+ if (phb->ioda.reserved_pe_idx == 0)
+ r->start += phb->ioda.m64_segsize;
+ else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
+ r->end -= phb->ioda.m64_segsize;
+ else
+ WARN(1, "Wrong reserved PE#%d on PHB#%d\n",
+ phb->ioda.reserved_pe_idx, phb->hose->global_number);
+
+ return 0;
+
+fail:
+ for ( ; index >= 0; index--)
+ opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE, index, OPAL_DISABLE_M64);
+
+ return -EIO;
+}
+
+static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
+ unsigned long *pe_bitmap,
+ bool all)
{
struct pci_dev *pdev;
list_for_each_entry(pdev, &bus->devices, bus_list) {
- pnv_ioda2_reserve_dev_m64_pe(pdev, pe_bitmap);
+ pnv_ioda_reserve_dev_m64_pe(pdev, pe_bitmap);
if (all && pdev->subordinate)
- pnv_ioda2_reserve_m64_pe(pdev->subordinate,
- pe_bitmap, all);
+ pnv_ioda_reserve_m64_pe(pdev->subordinate,
+ pe_bitmap, all);
}
}
-static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
+static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
{
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
@@ -271,28 +330,28 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
/* Root bus shouldn't use M64 */
if (pci_is_root_bus(bus))
- return IODA_INVALID_PE;
+ return NULL;
/* Allocate bitmap */
- size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+ size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
pe_alloc = kzalloc(size, GFP_KERNEL);
if (!pe_alloc) {
pr_warn("%s: Out of memory !\n",
__func__);
- return IODA_INVALID_PE;
+ return NULL;
}
/* Figure out reserved PE numbers by the PE */
- pnv_ioda2_reserve_m64_pe(bus, pe_alloc, all);
+ pnv_ioda_reserve_m64_pe(bus, pe_alloc, all);
/*
* the current bus might not own M64 window and that's all
* contributed by its child buses. For the case, we needn't
* pick M64 dependent PE#.
*/
- if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
+ if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) {
kfree(pe_alloc);
- return IODA_INVALID_PE;
+ return NULL;
}
/*
@@ -301,10 +360,11 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
*/
master_pe = NULL;
i = -1;
- while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
- phb->ioda.total_pe) {
+ while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe_num, i + 1)) <
+ phb->ioda.total_pe_num) {
pe = &phb->ioda.pe_array[i];
+ phb->ioda.m64_segmap[pe->pe_number] = pe->pe_number;
if (!master_pe) {
pe->flags |= PNV_IODA_PE_MASTER;
INIT_LIST_HEAD(&pe->slaves);
@@ -314,10 +374,30 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
pe->master = master_pe;
list_add_tail(&pe->list, &master_pe->slaves);
}
+
+ /*
+ * P7IOC supports M64DT, which helps mapping M64 segment
+ * to one particular PE#. However, PHB3 has fixed mapping
+ * between M64 segment and PE#. In order to have same logic
+ * for P7IOC and PHB3, we enforce fixed mapping between M64
+ * segment and PE# on P7IOC.
+ */
+ if (phb->type == PNV_PHB_IODA1) {
+ int64_t rc;
+
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_M64_WINDOW_TYPE,
+ pe->pe_number / PNV_IODA1_M64_SEGS,
+ pe->pe_number % PNV_IODA1_M64_SEGS);
+ if (rc != OPAL_SUCCESS)
+ pr_warn("%s: Error %lld mapping M64 for PHB#%d-PE#%d\n",
+ __func__, rc, phb->hose->global_number,
+ pe->pe_number);
+ }
}
kfree(pe_alloc);
- return master_pe->pe_number;
+ return master_pe;
}
static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
@@ -328,8 +408,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
const u32 *r;
u64 pci_addr;
- /* FIXME: Support M64 for P7IOC */
- if (phb->type != PNV_PHB_IODA2) {
+ if (phb->type != PNV_PHB_IODA1 && phb->type != PNV_PHB_IODA2) {
pr_info(" Not support M64 window\n");
return;
}
@@ -355,7 +434,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
hose->mem_offset[1] = res->start - pci_addr;
phb->ioda.m64_size = resource_size(res);
- phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe;
+ phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe_num;
phb->ioda.m64_base = pci_addr;
pr_info(" MEM64 0x%016llx..0x%016llx -> 0x%016llx\n",
@@ -363,9 +442,12 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
/* Use last M64 BAR to cover M64 window */
phb->ioda.m64_bar_idx = 15;
- phb->init_m64 = pnv_ioda2_init_m64;
- phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe;
- phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
+ if (phb->type == PNV_PHB_IODA1)
+ phb->init_m64 = pnv_ioda1_init_m64;
+ else
+ phb->init_m64 = pnv_ioda2_init_m64;
+ phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe;
+ phb->pick_m64_pe = pnv_ioda_pick_m64_pe;
}
static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
@@ -456,7 +538,7 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
s64 rc;
/* Sanity check on PE number */
- if (pe_no < 0 || pe_no >= phb->ioda.total_pe)
+ if (pe_no < 0 || pe_no >= phb->ioda.total_pe_num)
return OPAL_EEH_STOPPED_PERM_UNAVAIL;
/*
@@ -808,44 +890,6 @@ out:
return 0;
}
-static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe)
-{
- struct pnv_ioda_pe *lpe;
-
- list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
- if (lpe->dma_weight < pe->dma_weight) {
- list_add_tail(&pe->dma_link, &lpe->dma_link);
- return;
- }
- }
- list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
-}
-
-static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
-{
- /* This is quite simplistic. The "base" weight of a device
- * is 10. 0 means no DMA is to be accounted for it.
- */
-
- /* If it's a bridge, no DMA */
- if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
- return 0;
-
- /* Reduce the weight of slow USB controllers */
- if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
- dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
- dev->class == PCI_CLASS_SERIAL_USB_EHCI)
- return 3;
-
- /* Increase the weight of RAID (includes Obsidian) */
- if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
- return 15;
-
- /* Default */
- return 10;
-}
-
#ifdef CONFIG_PCI_IOV
static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
{
@@ -919,7 +963,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(dev);
struct pnv_ioda_pe *pe;
- int pe_num;
if (!pdn) {
pr_err("%s: Device tree node not associated properly\n",
@@ -929,8 +972,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
if (pdn->pe_number != IODA_INVALID_PE)
return NULL;
- pe_num = pnv_ioda_alloc_pe(phb);
- if (pe_num == IODA_INVALID_PE) {
+ pe = pnv_ioda_alloc_pe(phb);
+ if (!pe) {
pr_warning("%s: Not enough PE# available, disabling device\n",
pci_name(dev));
return NULL;
@@ -943,14 +986,12 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
*
* At some point we want to remove the PDN completely anyways
*/
- pe = &phb->ioda.pe_array[pe_num];
pci_dev_get(dev);
pdn->pcidev = dev;
- pdn->pe_number = pe_num;
+ pdn->pe_number = pe->pe_number;
pe->flags = PNV_IODA_PE_DEV;
pe->pdev = dev;
pe->pbus = NULL;
- pe->tce32_seg = -1;
pe->mve_number = -1;
pe->rid = dev->bus->number << 8 | pdn->devfn;
@@ -958,23 +999,15 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
- if (pe_num)
- pnv_ioda_free_pe(phb, pe_num);
+ pnv_ioda_free_pe(pe);
pdn->pe_number = IODA_INVALID_PE;
pe->pdev = NULL;
pci_dev_put(dev);
return NULL;
}
- /* Assign a DMA weight to the device */
- pe->dma_weight = pnv_ioda_dma_weight(dev);
- if (pe->dma_weight != 0) {
- phb->ioda.dma_weight += pe->dma_weight;
- phb->ioda.dma_pe_count++;
- }
-
- /* Link the PE */
- pnv_ioda_link_pe_by_weight(phb, pe);
+ /* Put PE to the list */
+ list_add_tail(&pe->list, &phb->ioda.pe_list);
return pe;
}
@@ -993,7 +1026,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
}
pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
- pe->dma_weight += pnv_ioda_dma_weight(dev);
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_same_PE(dev->subordinate, pe);
}
@@ -1005,49 +1037,44 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
* subordinate PCI devices and buses. The second type of PE is normally
* orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
*/
-static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
+static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
{
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
- struct pnv_ioda_pe *pe;
- int pe_num = IODA_INVALID_PE;
+ struct pnv_ioda_pe *pe = NULL;
/* Check if PE is determined by M64 */
if (phb->pick_m64_pe)
- pe_num = phb->pick_m64_pe(bus, all);
+ pe = phb->pick_m64_pe(bus, all);
/* The PE number isn't pinned by M64 */
- if (pe_num == IODA_INVALID_PE)
- pe_num = pnv_ioda_alloc_pe(phb);
+ if (!pe)
+ pe = pnv_ioda_alloc_pe(phb);
- if (pe_num == IODA_INVALID_PE) {
+ if (!pe) {
pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
__func__, pci_domain_nr(bus), bus->number);
- return;
+ return NULL;
}
- pe = &phb->ioda.pe_array[pe_num];
pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
pe->pbus = bus;
pe->pdev = NULL;
- pe->tce32_seg = -1;
pe->mve_number = -1;
pe->rid = bus->busn_res.start << 8;
- pe->dma_weight = 0;
if (all)
pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
- bus->busn_res.start, bus->busn_res.end, pe_num);
+ bus->busn_res.start, bus->busn_res.end, pe->pe_number);
else
pe_info(pe, "Secondary bus %d associated with PE#%d\n",
- bus->busn_res.start, pe_num);
+ bus->busn_res.start, pe->pe_number);
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
- if (pe_num)
- pnv_ioda_free_pe(phb, pe_num);
+ pnv_ioda_free_pe(pe);
pe->pbus = NULL;
- return;
+ return NULL;
}
/* Associate it with all child devices */
@@ -1056,16 +1083,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
/* Put PE to the list */
list_add_tail(&pe->list, &phb->ioda.pe_list);
- /* Account for one DMA PE if at least one DMA capable device exist
- * below the bridge
- */
- if (pe->dma_weight != 0) {
- phb->ioda.dma_weight += pe->dma_weight;
- phb->ioda.dma_pe_count++;
- }
-
- /* Link the PE */
- pnv_ioda_link_pe_by_weight(phb, pe);
+ return pe;
}
static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
@@ -1088,7 +1106,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
* same GPU get assigned the same PE.
*/
gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
- for (pe_num = 0; pe_num < phb->ioda.total_pe; pe_num++) {
+ for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
pe = &phb->ioda.pe_array[pe_num];
if (!pe->pdev)
continue;
@@ -1106,7 +1124,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
npu_pdn->pcidev = npu_pdev;
npu_pdn->pe_number = pe_num;
- pe->dma_weight += pnv_ioda_dma_weight(npu_pdev);
phb->ioda.pe_rmap[rid] = pe->pe_number;
/* Map the PE to this link */
@@ -1378,7 +1395,7 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
pnv_ioda_deconfigure_pe(phb, pe);
- pnv_ioda_free_pe(phb, pe->pe_number);
+ pnv_ioda_free_pe(pe);
}
}
@@ -1387,6 +1404,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
struct pci_dn *pdn;
struct pci_sriov *iov;
u16 num_vfs, i;
@@ -1411,8 +1429,11 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
/* Release PE numbers */
if (pdn->m64_single_mode) {
for (i = 0; i < num_vfs; i++) {
- if (pdn->pe_num_map[i] != IODA_INVALID_PE)
- pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ if (pdn->pe_num_map[i] == IODA_INVALID_PE)
+ continue;
+
+ pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
+ pnv_ioda_free_pe(pe);
}
} else
bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
@@ -1454,7 +1475,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
pe->flags = PNV_IODA_PE_VF;
pe->pbus = NULL;
pe->parent_dev = pdev;
- pe->tce32_seg = -1;
pe->mve_number = -1;
pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
pci_iov_virtfn_devfn(pdev, vf_index);
@@ -1466,8 +1486,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
- if (pe_num)
- pnv_ioda_free_pe(phb, pe_num);
+ pnv_ioda_free_pe(pe);
pe->pdev = NULL;
continue;
}
@@ -1486,6 +1505,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
struct pci_dn *pdn;
int ret;
u16 i;
@@ -1528,18 +1548,20 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
/* Calculate available PE for required VFs */
if (pdn->m64_single_mode) {
for (i = 0; i < num_vfs; i++) {
- pdn->pe_num_map[i] = pnv_ioda_alloc_pe(phb);
- if (pdn->pe_num_map[i] == IODA_INVALID_PE) {
+ pe = pnv_ioda_alloc_pe(phb);
+ if (!pe) {
ret = -EBUSY;
goto m64_failed;
}
+
+ pdn->pe_num_map[i] = pe->pe_number;
}
} else {
mutex_lock(&phb->ioda.pe_alloc_mutex);
*pdn->pe_num_map = bitmap_find_next_zero_area(
- phb->ioda.pe_alloc, phb->ioda.total_pe,
+ phb->ioda.pe_alloc, phb->ioda.total_pe_num,
0, num_vfs, 0);
- if (*pdn->pe_num_map >= phb->ioda.total_pe) {
+ if (*pdn->pe_num_map >= phb->ioda.total_pe_num) {
mutex_unlock(&phb->ioda.pe_alloc_mutex);
dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
kfree(pdn->pe_num_map);
@@ -1577,8 +1599,11 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
m64_failed:
if (pdn->m64_single_mode) {
for (i = 0; i < num_vfs; i++) {
- if (pdn->pe_num_map[i] != IODA_INVALID_PE)
- pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ if (pdn->pe_num_map[i] == IODA_INVALID_PE)
+ continue;
+
+ pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
+ pnv_ioda_free_pe(pe);
}
} else
bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
@@ -1640,8 +1665,6 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
struct pnv_ioda_pe *pe;
uint64_t top;
bool bypass = false;
- struct pci_dev *linked_npu_dev;
- int i;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return -ENODEV;;
@@ -1662,15 +1685,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
*pdev->dev.dma_mask = dma_mask;
/* Update peer npu devices */
- if (pe->flags & PNV_IODA_PE_PEER)
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- if (!pe->peers[i])
- continue;
-
- linked_npu_dev = pe->peers[i]->pdev;
- if (dma_get_mask(&linked_npu_dev->dev) != dma_mask)
- dma_set_mask(&linked_npu_dev->dev, dma_mask);
- }
+ pnv_npu_try_dma_set_bypass(pdev, bypass);
return 0;
}
@@ -1811,28 +1826,34 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
.get = pnv_tce_get,
};
-static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
+#define TCE_KILL_INVAL_ALL PPC_BIT(0)
+#define TCE_KILL_INVAL_PE PPC_BIT(1)
+#define TCE_KILL_INVAL_TCE PPC_BIT(2)
+
+void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
+{
+ const unsigned long val = TCE_KILL_INVAL_ALL;
+
+ mb(); /* Ensure previous TCE table stores are visible */
+ if (rm)
+ __raw_rm_writeq(cpu_to_be64(val),
+ (__be64 __iomem *)
+ phb->ioda.tce_inval_reg_phys);
+ else
+ __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
+}
+
+static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe)
{
/* 01xb - invalidate TCEs that match the specified PE# */
- unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
+ unsigned long val = TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF);
struct pnv_phb *phb = pe->phb;
- struct pnv_ioda_pe *npe;
- int i;
if (!phb->ioda.tce_inval_reg)
return;
mb(); /* Ensure above stores are visible */
__raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
-
- if (pe->flags & PNV_IODA_PE_PEER)
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- npe = pe->peers[i];
- if (!npe || npe->phb->type != PNV_PHB_NPU)
- continue;
-
- pnv_npu_tce_invalidate_entire(npe);
- }
}
static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
@@ -1842,7 +1863,7 @@ static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
unsigned long start, end, inc;
/* We'll invalidate DMA address in PE scope */
- start = 0x2ull << 60;
+ start = TCE_KILL_INVAL_TCE;
start |= (pe_number & 0xFF);
end = start;
@@ -1867,28 +1888,24 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
struct iommu_table_group_link *tgl;
list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
- struct pnv_ioda_pe *npe;
struct pnv_ioda_pe *pe = container_of(tgl->table_group,
struct pnv_ioda_pe, table_group);
__be64 __iomem *invalidate = rm ?
(__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
pe->phb->ioda.tce_inval_reg;
- int i;
+ if (pe->phb->type == PNV_PHB_NPU) {
+ /*
+ * The NVLink hardware does not support TCE kill
+ * per TCE entry so we have to invalidate
+ * the entire cache for it.
+ */
+ pnv_pci_ioda2_tce_invalidate_entire(pe->phb, rm);
+ continue;
+ }
pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm,
invalidate, tbl->it_page_shift,
index, npages);
-
- if (pe->flags & PNV_IODA_PE_PEER)
- /* Invalidate PEs using the same TCE table */
- for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
- npe = pe->peers[i];
- if (!npe || npe->phb->type != PNV_PHB_NPU)
- continue;
-
- pnv_npu_tce_invalidate(npe, tbl, index,
- npages, rm);
- }
}
}
@@ -1945,56 +1962,140 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.free = pnv_ioda2_table_free,
};
-static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe, unsigned int base,
- unsigned int segs)
+static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data)
+{
+ unsigned int *weight = (unsigned int *)data;
+
+ /* This is quite simplistic. The "base" weight of a device
+ * is 10. 0 means no DMA is to be accounted for it.
+ */
+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+ return 0;
+
+ if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
+ dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
+ dev->class == PCI_CLASS_SERIAL_USB_EHCI)
+ *weight += 3;
+ else if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
+ *weight += 15;
+ else
+ *weight += 10;
+
+ return 0;
+}
+
+static unsigned int pnv_pci_ioda_pe_dma_weight(struct pnv_ioda_pe *pe)
+{
+ unsigned int weight = 0;
+
+ /* SRIOV VF has same DMA32 weight as its PF */
+#ifdef CONFIG_PCI_IOV
+ if ((pe->flags & PNV_IODA_PE_VF) && pe->parent_dev) {
+ pnv_pci_ioda_dev_dma_weight(pe->parent_dev, &weight);
+ return weight;
+ }
+#endif
+
+ if ((pe->flags & PNV_IODA_PE_DEV) && pe->pdev) {
+ pnv_pci_ioda_dev_dma_weight(pe->pdev, &weight);
+ } else if ((pe->flags & PNV_IODA_PE_BUS) && pe->pbus) {
+ struct pci_dev *pdev;
+
+ list_for_each_entry(pdev, &pe->pbus->devices, bus_list)
+ pnv_pci_ioda_dev_dma_weight(pdev, &weight);
+ } else if ((pe->flags & PNV_IODA_PE_BUS_ALL) && pe->pbus) {
+ pci_walk_bus(pe->pbus, pnv_pci_ioda_dev_dma_weight, &weight);
+ }
+
+ return weight;
+}
+
+static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe)
{
struct page *tce_mem = NULL;
struct iommu_table *tbl;
- unsigned int i;
+ unsigned int weight, total_weight = 0;
+ unsigned int tce32_segsz, base, segs, avail, i;
int64_t rc;
void *addr;
/* XXX FIXME: Handle 64-bit only DMA devices */
/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
/* XXX FIXME: Allocate multi-level tables on PHB3 */
+ weight = pnv_pci_ioda_pe_dma_weight(pe);
+ if (!weight)
+ return;
- /* We shouldn't already have a 32-bit DMA associated */
- if (WARN_ON(pe->tce32_seg >= 0))
+ pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight,
+ &total_weight);
+ segs = (weight * phb->ioda.dma32_count) / total_weight;
+ if (!segs)
+ segs = 1;
+
+ /*
+ * Allocate contiguous DMA32 segments. We begin with the expected
+ * number of segments. With one more attempt, the number of DMA32
+ * segments to be allocated is decreased by one until one segment
+ * is allocated successfully.
+ */
+ do {
+ for (base = 0; base <= phb->ioda.dma32_count - segs; base++) {
+ for (avail = 0, i = base; i < base + segs; i++) {
+ if (phb->ioda.dma32_segmap[i] ==
+ IODA_INVALID_PE)
+ avail++;
+ }
+
+ if (avail == segs)
+ goto found;
+ }
+ } while (--segs);
+
+ if (!segs) {
+ pe_warn(pe, "No available DMA32 segments\n");
return;
+ }
+found:
tbl = pnv_pci_table_alloc(phb->hose->node);
iommu_register_group(&pe->table_group, phb->hose->global_number,
pe->pe_number);
pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
/* Grab a 32-bit TCE table */
- pe->tce32_seg = base;
+ pe_info(pe, "DMA weight %d (%d), assigned (%d) %d DMA32 segments\n",
+ weight, total_weight, base, segs);
pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
- (base << 28), ((base + segs) << 28) - 1);
+ base * PNV_IODA1_DMA32_SEGSIZE,
+ (base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1);
/* XXX Currently, we allocate one big contiguous table for the
* TCEs. We only really need one chunk per 256M of TCE space
* (ie per segment) but that's an optimization for later, it
* requires some added smarts with our get/put_tce implementation
+ *
+ * Each TCE page is 4KB in size and each TCE entry occupies 8
+ * bytes
*/
+ tce32_segsz = PNV_IODA1_DMA32_SEGSIZE >> (IOMMU_PAGE_SHIFT_4K - 3);
tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
- get_order(TCE32_TABLE_SIZE * segs));
+ get_order(tce32_segsz * segs));
if (!tce_mem) {
pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
goto fail;
}
addr = page_address(tce_mem);
- memset(addr, 0, TCE32_TABLE_SIZE * segs);
+ memset(addr, 0, tce32_segsz * segs);
/* Configure HW */
for (i = 0; i < segs; i++) {
rc = opal_pci_map_pe_dma_window(phb->opal_id,
pe->pe_number,
base + i, 1,
- __pa(addr) + TCE32_TABLE_SIZE * i,
- TCE32_TABLE_SIZE, 0x1000);
+ __pa(addr) + tce32_segsz * i,
+ tce32_segsz, IOMMU_PAGE_SIZE_4K);
if (rc) {
pe_err(pe, " Failed to configure 32-bit TCE table,"
" err %ld\n", rc);
@@ -2002,9 +2103,14 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
}
}
+ /* Setup DMA32 segment mapping */
+ for (i = base; i < base + segs; i++)
+ phb->ioda.dma32_segmap[i] = pe->pe_number;
+
/* Setup linux iommu table */
- pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
- base << 28, IOMMU_PAGE_SHIFT_4K);
+ pnv_pci_setup_iommu_table(tbl, addr, tce32_segsz * segs,
+ base * PNV_IODA1_DMA32_SEGSIZE,
+ IOMMU_PAGE_SHIFT_4K);
/* OPAL variant of P7IOC SW invalidated TCEs */
if (phb->ioda.tce_inval_reg)
@@ -2031,10 +2137,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
return;
fail:
/* XXX Failure: Try to fallback to 64-bit only ? */
- if (pe->tce32_seg >= 0)
- pe->tce32_seg = -1;
if (tce_mem)
- __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
+ __free_pages(tce_mem, get_order(tce32_segsz * segs));
if (tbl) {
pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
iommu_free_table(tbl, "pnv");
@@ -2075,7 +2179,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
pnv_pci_link_table_and_group(phb->hose->node, num,
tbl, &pe->table_group);
- pnv_pci_ioda2_tce_invalidate_entire(pe);
+ pnv_pci_ioda2_tce_invalidate_pe(pe);
return 0;
}
@@ -2219,7 +2323,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
if (ret)
pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
else
- pnv_pci_ioda2_tce_invalidate_entire(pe);
+ pnv_pci_ioda2_tce_invalidate_pe(pe);
pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
@@ -2288,6 +2392,116 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
.take_ownership = pnv_ioda2_take_ownership,
.release_ownership = pnv_ioda2_release_ownership,
};
+
+static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe **ptmppe = opaque;
+ struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+ return 0;
+
+ hose = pci_bus_to_host(pdev->bus);
+ phb = hose->private_data;
+ if (phb->type != PNV_PHB_NPU)
+ return 0;
+
+ *ptmppe = &phb->ioda.pe_array[pdn->pe_number];
+
+ return 1;
+}
+
+/*
+ * This returns PE of associated NPU.
+ * This assumes that NPU is in the same IOMMU group with GPU and there is
+ * no other PEs.
+ */
+static struct pnv_ioda_pe *gpe_table_group_to_npe(
+ struct iommu_table_group *table_group)
+{
+ struct pnv_ioda_pe *npe = NULL;
+ int ret = iommu_group_for_each_dev(table_group->group, &npe,
+ gpe_table_group_to_npe_cb);
+
+ BUG_ON(!ret || !npe);
+
+ return npe;
+}
+
+static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
+
+ if (ret)
+ return ret;
+
+ ret = pnv_npu_set_window(gpe_table_group_to_npe(table_group), num, tbl);
+ if (ret)
+ pnv_pci_ioda2_unset_window(table_group, num);
+
+ return ret;
+}
+
+static long pnv_pci_ioda2_npu_unset_window(
+ struct iommu_table_group *table_group,
+ int num)
+{
+ long ret = pnv_pci_ioda2_unset_window(table_group, num);
+
+ if (ret)
+ return ret;
+
+ return pnv_npu_unset_window(gpe_table_group_to_npe(table_group), num);
+}
+
+static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
+{
+ /*
+ * Detach NPU first as pnv_ioda2_take_ownership() will destroy
+ * the iommu_table if 32bit DMA is enabled.
+ */
+ pnv_npu_take_ownership(gpe_table_group_to_npe(table_group));
+ pnv_ioda2_take_ownership(table_group);
+}
+
+static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
+ .get_table_size = pnv_pci_ioda2_get_table_size,
+ .create_table = pnv_pci_ioda2_create_table,
+ .set_window = pnv_pci_ioda2_npu_set_window,
+ .unset_window = pnv_pci_ioda2_npu_unset_window,
+ .take_ownership = pnv_ioda2_npu_take_ownership,
+ .release_ownership = pnv_ioda2_release_ownership,
+};
+
+static void pnv_pci_ioda_setup_iommu_api(void)
+{
+ struct pci_controller *hose, *tmp;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe, *gpe;
+
+ /*
+ * Now we have all PHBs discovered, time to add NPU devices to
+ * the corresponding IOMMU groups.
+ */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ if (phb->type != PNV_PHB_NPU)
+ continue;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ gpe = pnv_pci_npu_setup_iommu(pe);
+ if (gpe)
+ gpe->table_group.ops = &pnv_pci_ioda2_npu_ops;
+ }
+ }
+}
+#else /* !CONFIG_IOMMU_API */
+static void pnv_pci_ioda_setup_iommu_api(void) { };
#endif
static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb)
@@ -2443,10 +2657,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
{
int64_t rc;
- /* We shouldn't already have a 32-bit DMA associated */
- if (WARN_ON(pe->tce32_seg >= 0))
- return;
-
/* TVE #1 is selected by PCI address bit 59 */
pe->tce_bypass_base = 1ull << 59;
@@ -2454,7 +2664,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
pe->pe_number);
/* The PE will reserve all possible 32-bits space */
- pe->tce32_seg = 0;
pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
phb->ioda.m32_pci_base);
@@ -2470,11 +2679,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
#endif
rc = pnv_pci_ioda2_setup_default_config(pe);
- if (rc) {
- if (pe->tce32_seg >= 0)
- pe->tce32_seg = -1;
+ if (rc)
return;
- }
if (pe->flags & PNV_IODA_PE_DEV)
iommu_add_device(&pe->pdev->dev);
@@ -2485,47 +2691,24 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
static void pnv_ioda_setup_dma(struct pnv_phb *phb)
{
struct pci_controller *hose = phb->hose;
- unsigned int residual, remaining, segs, tw, base;
struct pnv_ioda_pe *pe;
+ unsigned int weight;
/* If we have more PE# than segments available, hand out one
* per PE until we run out and let the rest fail. If not,
* then we assign at least one segment per PE, plus more based
* on the amount of devices under that PE
*/
- if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
- residual = 0;
- else
- residual = phb->ioda.tce32_count -
- phb->ioda.dma_pe_count;
-
- pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
- hose->global_number, phb->ioda.tce32_count);
- pr_info("PCI: %d PE# for a total weight of %d\n",
- phb->ioda.dma_pe_count, phb->ioda.dma_weight);
+ pr_info("PCI: Domain %04x has %d available 32-bit DMA segments\n",
+ hose->global_number, phb->ioda.dma32_count);
pnv_pci_ioda_setup_opal_tce_kill(phb);
- /* Walk our PE list and configure their DMA segments, hand them
- * out one base segment plus any residual segments based on
- * weight
- */
- remaining = phb->ioda.tce32_count;
- tw = phb->ioda.dma_weight;
- base = 0;
- list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
- if (!pe->dma_weight)
- continue;
- if (!remaining) {
- pe_warn(pe, "No DMA32 resources available\n");
+ /* Walk our PE list and configure their DMA segments */
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ weight = pnv_pci_ioda_pe_dma_weight(pe);
+ if (!weight)
continue;
- }
- segs = 1;
- if (residual) {
- segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
- if (segs > remaining)
- segs = remaining;
- }
/*
* For IODA2 compliant PHB3, we needn't care about the weight.
@@ -2533,12 +2716,9 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
* the specific PE.
*/
if (phb->type == PNV_PHB_IODA1) {
- pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
- pe->dma_weight, segs);
- pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
+ pnv_pci_ioda1_setup_dma_pe(phb, pe);
} else if (phb->type == PNV_PHB_IODA2) {
pe_info(pe, "Assign DMA32 space\n");
- segs = 0;
pnv_pci_ioda2_setup_dma_pe(phb, pe);
} else if (phb->type == PNV_PHB_NPU) {
/*
@@ -2548,9 +2728,6 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
* as the PHB3 TVT.
*/
}
-
- remaining -= segs;
- base += segs;
}
}
@@ -2858,7 +3035,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
pdn->m64_single_mode = false;
total_vfs = pci_sriov_get_totalvfs(pdev);
- mul = phb->ioda.total_pe;
+ mul = phb->ioda.total_pe_num;
total_vf_bar_sz = 0;
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
@@ -2929,19 +3106,72 @@ truncate_iov:
}
#endif /* CONFIG_PCI_IOV */
+static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
+ struct resource *res)
+{
+ struct pnv_phb *phb = pe->phb;
+ struct pci_bus_region region;
+ int index;
+ int64_t rc;
+
+ if (!res || !res->flags || res->start > res->end)
+ return;
+
+ if (res->flags & IORESOURCE_IO) {
+ region.start = res->start - phb->ioda.io_pci_base;
+ region.end = res->end - phb->ioda.io_pci_base;
+ index = region.start / phb->ioda.io_segsize;
+
+ while (index < phb->ioda.total_pe_num &&
+ region.start <= region.end) {
+ phb->ioda.io_segmap[index] = pe->pe_number;
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("%s: Error %lld mapping IO segment#%d to PE#%d\n",
+ __func__, rc, index, pe->pe_number);
+ break;
+ }
+
+ region.start += phb->ioda.io_segsize;
+ index++;
+ }
+ } else if ((res->flags & IORESOURCE_MEM) &&
+ !pnv_pci_is_mem_pref_64(res->flags)) {
+ region.start = res->start -
+ phb->hose->mem_offset[0] -
+ phb->ioda.m32_pci_base;
+ region.end = res->end -
+ phb->hose->mem_offset[0] -
+ phb->ioda.m32_pci_base;
+ index = region.start / phb->ioda.m32_segsize;
+
+ while (index < phb->ioda.total_pe_num &&
+ region.start <= region.end) {
+ phb->ioda.m32_segmap[index] = pe->pe_number;
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("%s: Error %lld mapping M32 segment#%d to PE#%d",
+ __func__, rc, index, pe->pe_number);
+ break;
+ }
+
+ region.start += phb->ioda.m32_segsize;
+ index++;
+ }
+ }
+}
+
/*
* This function is supposed to be called on basis of PE from top
* to bottom style. So the the I/O or MMIO segment assigned to
* parent PE could be overrided by its child PEs if necessary.
*/
-static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
- struct pnv_ioda_pe *pe)
+static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
{
- struct pnv_phb *phb = hose->private_data;
- struct pci_bus_region region;
- struct resource *res;
- int i, index;
- int rc;
+ struct pci_dev *pdev;
+ int i;
/*
* NOTE: We only care PCI bus based PE for now. For PCI
@@ -2950,57 +3180,20 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
*/
BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
- pci_bus_for_each_resource(pe->pbus, res, i) {
- if (!res || !res->flags ||
- res->start > res->end)
- continue;
+ list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++)
+ pnv_ioda_setup_pe_res(pe, &pdev->resource[i]);
- if (res->flags & IORESOURCE_IO) {
- region.start = res->start - phb->ioda.io_pci_base;
- region.end = res->end - phb->ioda.io_pci_base;
- index = region.start / phb->ioda.io_segsize;
-
- while (index < phb->ioda.total_pe &&
- region.start <= region.end) {
- phb->ioda.io_segmap[index] = pe->pe_number;
- rc = opal_pci_map_pe_mmio_window(phb->opal_id,
- pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
- if (rc != OPAL_SUCCESS) {
- pr_err("%s: OPAL error %d when mapping IO "
- "segment #%d to PE#%d\n",
- __func__, rc, index, pe->pe_number);
- break;
- }
-
- region.start += phb->ioda.io_segsize;
- index++;
- }
- } else if ((res->flags & IORESOURCE_MEM) &&
- !pnv_pci_is_mem_pref_64(res->flags)) {
- region.start = res->start -
- hose->mem_offset[0] -
- phb->ioda.m32_pci_base;
- region.end = res->end -
- hose->mem_offset[0] -
- phb->ioda.m32_pci_base;
- index = region.start / phb->ioda.m32_segsize;
-
- while (index < phb->ioda.total_pe &&
- region.start <= region.end) {
- phb->ioda.m32_segmap[index] = pe->pe_number;
- rc = opal_pci_map_pe_mmio_window(phb->opal_id,
- pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
- if (rc != OPAL_SUCCESS) {
- pr_err("%s: OPAL error %d when mapping M32 "
- "segment#%d to PE#%d",
- __func__, rc, index, pe->pe_number);
- break;
- }
-
- region.start += phb->ioda.m32_segsize;
- index++;
- }
- }
+ /*
+ * If the PE contains all subordinate PCI buses, the
+ * windows of the child bridges should be mapped to
+ * the PE as well.
+ */
+ if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev))
+ continue;
+ for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
+ pnv_ioda_setup_pe_res(pe,
+ &pdev->resource[PCI_BRIDGE_RESOURCES + i]);
}
}
@@ -3018,7 +3211,7 @@ static void pnv_pci_ioda_setup_seg(void)
continue;
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- pnv_ioda_setup_pe_seg(hose, pe);
+ pnv_ioda_setup_pe_seg(pe);
}
}
}
@@ -3035,6 +3228,8 @@ static void pnv_pci_ioda_setup_DMA(void)
phb = hose->private_data;
phb->initialized = 1;
}
+
+ pnv_pci_ioda_setup_iommu_api();
}
static void pnv_pci_ioda_create_dbgfs(void)
@@ -3056,27 +3251,6 @@ static void pnv_pci_ioda_create_dbgfs(void)
#endif /* CONFIG_DEBUG_FS */
}
-static void pnv_npu_ioda_fixup(void)
-{
- bool enable_bypass;
- struct pci_controller *hose, *tmp;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
-
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU)
- continue;
-
- list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
- enable_bypass = dma_get_mask(&pe->pdev->dev) ==
- DMA_BIT_MASK(64);
- pnv_npu_init_dma_pe(pe);
- pnv_npu_dma_set_bypass(pe, enable_bypass);
- }
- }
-}
-
static void pnv_pci_ioda_fixup(void)
{
pnv_pci_ioda_setup_PEs();
@@ -3089,9 +3263,6 @@ static void pnv_pci_ioda_fixup(void)
eeh_init();
eeh_addr_cache_build();
#endif
-
- /* Link NPU IODA tables to their PCI devices. */
- pnv_npu_ioda_fixup();
}
/*
@@ -3195,12 +3366,6 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
return true;
}
-static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
- u32 devfn)
-{
- return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
-}
-
static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
{
struct pnv_phb *phb = hose->private_data;
@@ -3210,31 +3375,39 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
}
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
- .dma_bus_setup = pnv_pci_dma_bus_setup,
+ .dma_dev_setup = pnv_pci_dma_dev_setup,
+ .dma_bus_setup = pnv_pci_dma_bus_setup,
#ifdef CONFIG_PCI_MSI
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
+ .setup_msi_irqs = pnv_setup_msi_irqs,
+ .teardown_msi_irqs = pnv_teardown_msi_irqs,
#endif
- .enable_device_hook = pnv_pci_enable_device_hook,
- .window_alignment = pnv_pci_window_alignment,
- .reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_pci_ioda_dma_set_mask,
- .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
- .shutdown = pnv_pci_ioda_shutdown,
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .dma_set_mask = pnv_pci_ioda_dma_set_mask,
+ .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
+ .shutdown = pnv_pci_ioda_shutdown,
};
+static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
+{
+ dev_err_once(&npdev->dev,
+ "%s operation unsupported for NVLink devices\n",
+ __func__);
+ return -EPERM;
+}
+
static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
+ .dma_dev_setup = pnv_pci_dma_dev_setup,
#ifdef CONFIG_PCI_MSI
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
+ .setup_msi_irqs = pnv_setup_msi_irqs,
+ .teardown_msi_irqs = pnv_teardown_msi_irqs,
#endif
- .enable_device_hook = pnv_pci_enable_device_hook,
- .window_alignment = pnv_pci_window_alignment,
- .reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_npu_dma_set_mask,
- .shutdown = pnv_pci_ioda_shutdown,
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .dma_set_mask = pnv_npu_dma_set_mask,
+ .shutdown = pnv_pci_ioda_shutdown,
};
static void __init pnv_pci_init_ioda_phb(struct device_node *np,
@@ -3242,10 +3415,12 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
{
struct pci_controller *hose;
struct pnv_phb *phb;
- unsigned long size, m32map_off, pemap_off, iomap_off = 0;
+ unsigned long size, m64map_off, m32map_off, pemap_off;
+ unsigned long iomap_off = 0, dma32map_off = 0;
const __be64 *prop64;
const __be32 *prop32;
int len;
+ unsigned int segno;
u64 phb_id;
void *aux;
long rc;
@@ -3306,13 +3481,13 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
pr_err(" Failed to map registers !\n");
/* Initialize more IODA stuff */
- phb->ioda.total_pe = 1;
+ phb->ioda.total_pe_num = 1;
prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
if (prop32)
- phb->ioda.total_pe = be32_to_cpup(prop32);
+ phb->ioda.total_pe_num = be32_to_cpup(prop32);
prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
if (prop32)
- phb->ioda.reserved_pe = be32_to_cpup(prop32);
+ phb->ioda.reserved_pe_idx = be32_to_cpup(prop32);
/* Parse 64-bit MMIO range */
pnv_ioda_parse_m64_window(phb);
@@ -3321,36 +3496,58 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* FW Has already off top 64k of M32 space (MSI space) */
phb->ioda.m32_size += 0x10000;
- phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
+ phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe_num;
phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
phb->ioda.io_size = hose->pci_io_size;
- phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
+ phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num;
phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
+ /* Calculate how many 32-bit TCE segments we have */
+ phb->ioda.dma32_count = phb->ioda.m32_pci_base /
+ PNV_IODA1_DMA32_SEGSIZE;
+
/* Allocate aux data & arrays. We don't have IO ports on PHB3 */
- size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+ size = _ALIGN_UP(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8,
+ sizeof(unsigned long));
+ m64map_off = size;
+ size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]);
m32map_off = size;
- size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
+ size += phb->ioda.total_pe_num * sizeof(phb->ioda.m32_segmap[0]);
if (phb->type == PNV_PHB_IODA1) {
iomap_off = size;
- size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
+ size += phb->ioda.total_pe_num * sizeof(phb->ioda.io_segmap[0]);
+ dma32map_off = size;
+ size += phb->ioda.dma32_count *
+ sizeof(phb->ioda.dma32_segmap[0]);
}
pemap_off = size;
- size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
+ size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
aux = memblock_virt_alloc(size, 0);
phb->ioda.pe_alloc = aux;
+ phb->ioda.m64_segmap = aux + m64map_off;
phb->ioda.m32_segmap = aux + m32map_off;
- if (phb->type == PNV_PHB_IODA1)
+ for (segno = 0; segno < phb->ioda.total_pe_num; segno++) {
+ phb->ioda.m64_segmap[segno] = IODA_INVALID_PE;
+ phb->ioda.m32_segmap[segno] = IODA_INVALID_PE;
+ }
+ if (phb->type == PNV_PHB_IODA1) {
phb->ioda.io_segmap = aux + iomap_off;
+ for (segno = 0; segno < phb->ioda.total_pe_num; segno++)
+ phb->ioda.io_segmap[segno] = IODA_INVALID_PE;
+
+ phb->ioda.dma32_segmap = aux + dma32map_off;
+ for (segno = 0; segno < phb->ioda.dma32_count; segno++)
+ phb->ioda.dma32_segmap[segno] = IODA_INVALID_PE;
+ }
phb->ioda.pe_array = aux + pemap_off;
- set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc);
+ set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc);
- INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
INIT_LIST_HEAD(&phb->ioda.pe_list);
mutex_init(&phb->ioda.pe_list_mutex);
/* Calculate how many 32-bit TCE segments we have */
- phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
+ phb->ioda.dma32_count = phb->ioda.m32_pci_base /
+ PNV_IODA1_DMA32_SEGSIZE;
#if 0 /* We should really do that ... */
rc = opal_pci_set_phb_mem_window(opal->phb_id,
@@ -3362,7 +3559,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
#endif
pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
- phb->ioda.total_pe, phb->ioda.reserved_pe,
+ phb->ioda.total_pe_num, phb->ioda.reserved_pe_idx,
phb->ioda.m32_size, phb->ioda.m32_segsize);
if (phb->ioda.m64_size)
pr_info(" M64: 0x%lx [segment=0x%lx]\n",
@@ -3377,12 +3574,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->freeze_pe = pnv_ioda_freeze_pe;
phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
- /* Setup RID -> PE mapping function */
- phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
-
- /* Setup TCEs */
- phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
-
/* Setup MSI support */
pnv_pci_init_ioda_msis(phb);
@@ -3395,10 +3586,12 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
*/
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
- if (phb->type == PNV_PHB_NPU)
+ if (phb->type == PNV_PHB_NPU) {
hose->controller_ops = pnv_npu_ioda_controller_ops;
- else
+ } else {
+ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
hose->controller_ops = pnv_pci_ioda_controller_ops;
+ }
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 73c8dc2..1d92bd9 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -39,9 +39,6 @@
/* Delay in usec */
#define PCI_RESET_DELAY_US 3000000
-#define cfg_dbg(fmt...) do { } while(0)
-//#define cfg_dbg(fmt...) printk(fmt)
-
#ifdef CONFIG_PCI_MSI
int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
@@ -370,7 +367,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
struct pnv_phb *phb = pdn->phb->private_data;
u8 fstate;
__be16 pcierr;
- int pe_no;
+ unsigned int pe_no;
s64 rc;
/*
@@ -380,7 +377,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
*/
pe_no = pdn->pe_number;
if (pe_no == IODA_INVALID_PE) {
- pe_no = phb->ioda.reserved_pe;
+ pe_no = phb->ioda.reserved_pe_idx;
}
/*
@@ -402,8 +399,8 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
}
}
- cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
- (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
+ pr_devel(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
+ (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
/* Clear the frozen state if applicable */
if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
@@ -451,8 +448,8 @@ int pnv_pci_cfg_read(struct pci_dn *pdn,
return PCIBIOS_FUNC_NOT_SUPPORTED;
}
- cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
- __func__, pdn->busno, pdn->devfn, where, size, *val);
+ pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+ __func__, pdn->busno, pdn->devfn, where, size, *val);
return PCIBIOS_SUCCESSFUL;
}
@@ -462,8 +459,8 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
struct pnv_phb *phb = pdn->phb->private_data;
u32 bdfn = (pdn->busno << 8) | pdn->devfn;
- cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
- pdn->busno, pdn->devfn, where, size, val);
+ pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+ __func__, pdn->busno, pdn->devfn, where, size, val);
switch (size) {
case 1:
opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 3f814f3..7dee25e 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -24,7 +24,6 @@ enum pnv_phb_model {
#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */
#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */
-#define PNV_IODA_PE_PEER (1 << 6) /* PE has peers */
/* Data associated with a PE, including IOMMU tracking etc.. */
struct pnv_phb;
@@ -32,9 +31,6 @@ struct pnv_ioda_pe {
unsigned long flags;
struct pnv_phb *phb;
-#define PNV_IODA_MAX_PEER_PES 8
- struct pnv_ioda_pe *peers[PNV_IODA_MAX_PEER_PES];
-
/* A PE can be associated with a single device or an
* entire bus (& children). In the former case, pdev
* is populated, in the later case, pbus is.
@@ -53,14 +49,7 @@ struct pnv_ioda_pe {
/* PE number */
unsigned int pe_number;
- /* "Weight" assigned to the PE for the sake of DMA resource
- * allocations
- */
- unsigned int dma_weight;
-
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
- int tce32_seg;
- int tce32_segcount;
struct iommu_table_group table_group;
/* 64-bit TCE bypass region */
@@ -78,7 +67,6 @@ struct pnv_ioda_pe {
struct list_head slaves;
/* Link in list of PE#s */
- struct list_head dma_link;
struct list_head list;
};
@@ -110,19 +98,18 @@ struct pnv_phb {
unsigned int is_64, struct msi_msg *msg);
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
void (*fixup_phb)(struct pci_controller *hose);
- u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
int (*init_m64)(struct pnv_phb *phb);
void (*reserve_m64_pe)(struct pci_bus *bus,
unsigned long *pe_bitmap, bool all);
- int (*pick_m64_pe)(struct pci_bus *bus, bool all);
+ struct pnv_ioda_pe *(*pick_m64_pe)(struct pci_bus *bus, bool all);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
struct {
/* Global bridge info */
- unsigned int total_pe;
- unsigned int reserved_pe;
+ unsigned int total_pe_num;
+ unsigned int reserved_pe_idx;
/* 32-bit MMIO window */
unsigned int m32_size;
@@ -141,15 +128,19 @@ struct pnv_phb {
unsigned int io_segsize;
unsigned int io_pci_base;
- /* PE allocation bitmap */
- unsigned long *pe_alloc;
- /* PE allocation mutex */
+ /* PE allocation */
struct mutex pe_alloc_mutex;
+ unsigned long *pe_alloc;
+ struct pnv_ioda_pe *pe_array;
/* M32 & IO segment maps */
+ unsigned int *m64_segmap;
unsigned int *m32_segmap;
unsigned int *io_segmap;
- struct pnv_ioda_pe *pe_array;
+
+ /* DMA32 segment maps - IODA1 only */
+ unsigned int dma32_count;
+ unsigned int *dma32_segmap;
/* IRQ chip */
int irq_chip_init;
@@ -167,20 +158,6 @@ struct pnv_phb {
*/
unsigned char pe_rmap[0x10000];
- /* 32-bit TCE tables allocation */
- unsigned long tce32_count;
-
- /* Total "weight" for the sake of DMA resources
- * allocation
- */
- unsigned int dma_weight;
- unsigned int dma_pe_count;
-
- /* Sorted list of used PE's, sorted at
- * boot for resource allocation purposes
- */
- struct list_head pe_dma_list;
-
/* TCE cache invalidate registers (physical and
* remapped)
*/
@@ -236,16 +213,23 @@ extern void pnv_pci_dma_bus_setup(struct pci_bus *bus);
extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
+extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+ const char *fmt, ...);
+#define pe_err(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__)
+#define pe_warn(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define pe_info(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
+
/* Nvlink functions */
-extern void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe);
-extern void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
- struct iommu_table *tbl,
- unsigned long index,
- unsigned long npages,
- bool rm);
-extern void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe);
-extern void pnv_npu_setup_dma_pe(struct pnv_ioda_pe *npe);
-extern int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enabled);
-extern int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask);
+extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
+extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
+extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe);
+extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
+ struct iommu_table *tbl);
+extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num);
+extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe);
+extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe);
#endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 1acb0c7..ee6430b 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -273,7 +273,10 @@ static int __init pnv_probe(void)
if (!of_flat_dt_is_compatible(root, "ibm,powernv"))
return 0;
- hpte_init_native();
+ if (IS_ENABLED(CONFIG_PPC_RADIX_MMU) && radix_enabled())
+ radix_init_native();
+ else if (IS_ENABLED(CONFIG_PPC_STD_MMU_64))
+ hpte_init_native();
if (firmware_has_feature(FW_FEATURE_OPAL))
pnv_setup_machdep_opal();
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 2f95d33..c9a3e67 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
vflags &= ~HPTE_V_SECONDARY;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
+ hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags;
spin_lock_irqsave(&ps3_htab_lock, flags);
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index a0bca05..492b257 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -205,7 +205,7 @@ static void spu_unmap(struct spu *spu)
static int __init setup_areas(struct spu *spu)
{
struct table {char* name; unsigned long addr; unsigned long size;};
- static const unsigned long shadow_flags = _PAGE_NO_CACHE | 3;
+ unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO));
spu_pdata(spu)->shadow = __ioremap(spu_pdata(spu)->shadow_addr,
sizeof(struct spe_shadow),
@@ -216,7 +216,7 @@ static int __init setup_areas(struct spu *spu)
}
spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
- LS_SIZE, _PAGE_NO_CACHE);
+ LS_SIZE, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
if (!spu->local_store) {
pr_debug("%s:%d: ioremap local_store failed\n",
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index e9ff44c..2ce1385 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -116,6 +116,155 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn)
return new_prop;
}
+static void dlpar_update_drconf_property(struct device_node *dn,
+ struct property *prop)
+{
+ struct of_drconf_cell *lmbs;
+ u32 num_lmbs, *p;
+ int i;
+
+ /* Convert the property back to BE */
+ p = prop->value;
+ num_lmbs = *p;
+ *p = cpu_to_be32(*p);
+ p++;
+
+ lmbs = (struct of_drconf_cell *)p;
+ for (i = 0; i < num_lmbs; i++) {
+ lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
+ lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
+ lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
+ }
+
+ rtas_hp_event = true;
+ of_update_property(dn, prop);
+ rtas_hp_event = false;
+}
+
+static int dlpar_update_device_tree_lmb(struct of_drconf_cell *lmb)
+{
+ struct device_node *dn;
+ struct property *prop;
+ struct of_drconf_cell *lmbs;
+ u32 *p, num_lmbs;
+ int i;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ return -ENODEV;
+
+ prop = dlpar_clone_drconf_property(dn);
+ if (!prop) {
+ of_node_put(dn);
+ return -ENODEV;
+ }
+
+ p = prop->value;
+ num_lmbs = *p++;
+ lmbs = (struct of_drconf_cell *)p;
+
+ for (i = 0; i < num_lmbs; i++) {
+ if (lmbs[i].drc_index == lmb->drc_index) {
+ lmbs[i].flags = lmb->flags;
+ lmbs[i].aa_index = lmb->aa_index;
+
+ dlpar_update_drconf_property(dn, prop);
+ break;
+ }
+ }
+
+ of_node_put(dn);
+ return 0;
+}
+
+static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb)
+{
+ struct device_node *parent, *lmb_node, *dr_node;
+ const u32 *lmb_assoc;
+ const u32 *assoc_arrays;
+ u32 aa_index;
+ int aa_arrays, aa_array_entries, aa_array_sz;
+ int i;
+
+ parent = of_find_node_by_path("/");
+ if (!parent)
+ return -ENODEV;
+
+ lmb_node = dlpar_configure_connector(cpu_to_be32(lmb->drc_index),
+ parent);
+ of_node_put(parent);
+ if (!lmb_node)
+ return -EINVAL;
+
+ lmb_assoc = of_get_property(lmb_node, "ibm,associativity", NULL);
+ if (!lmb_assoc) {
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dr_node) {
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ assoc_arrays = of_get_property(dr_node,
+ "ibm,associativity-lookup-arrays",
+ NULL);
+ of_node_put(dr_node);
+ if (!assoc_arrays) {
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ /* The ibm,associativity-lookup-arrays property is defined to be
+ * a 32-bit value specifying the number of associativity arrays
+ * followed by a 32-bitvalue specifying the number of entries per
+ * array, followed by the associativity arrays.
+ */
+ aa_arrays = be32_to_cpu(assoc_arrays[0]);
+ aa_array_entries = be32_to_cpu(assoc_arrays[1]);
+ aa_array_sz = aa_array_entries * sizeof(u32);
+
+ aa_index = -1;
+ for (i = 0; i < aa_arrays; i++) {
+ int indx = (i * aa_array_entries) + 2;
+
+ if (memcmp(&assoc_arrays[indx], &lmb_assoc[1], aa_array_sz))
+ continue;
+
+ aa_index = i;
+ break;
+ }
+
+ dlpar_free_cc_nodes(lmb_node);
+ return aa_index;
+}
+
+static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb)
+{
+ int aa_index;
+
+ lmb->flags |= DRCONF_MEM_ASSIGNED;
+
+ aa_index = lookup_lmb_associativity_index(lmb);
+ if (aa_index < 0) {
+ pr_err("Couldn't find associativity index for drc index %x\n",
+ lmb->drc_index);
+ return aa_index;
+ }
+
+ lmb->aa_index = aa_index;
+ return dlpar_update_device_tree_lmb(lmb);
+}
+
+static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
+{
+ lmb->flags &= ~DRCONF_MEM_ASSIGNED;
+ lmb->aa_index = 0xffffffff;
+ return dlpar_update_device_tree_lmb(lmb);
+}
+
static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
{
unsigned long section_nr;
@@ -243,8 +392,8 @@ static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
memblock_remove(lmb->base_addr, block_sz);
dlpar_release_drc(lmb->drc_index);
+ dlpar_remove_device_tree_lmb(lmb);
- lmb->flags &= ~DRCONF_MEM_ASSIGNED;
return 0;
}
@@ -384,43 +533,32 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
#endif /* CONFIG_MEMORY_HOTREMOVE */
-static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+static int dlpar_add_lmb_memory(struct of_drconf_cell *lmb)
{
struct memory_block *mem_block;
unsigned long block_sz;
int nid, rc;
- if (lmb->flags & DRCONF_MEM_ASSIGNED)
- return -EINVAL;
-
block_sz = memory_block_size_bytes();
- rc = dlpar_acquire_drc(lmb->drc_index);
- if (rc)
- return rc;
-
/* Find the node id for this address */
nid = memory_add_physaddr_to_nid(lmb->base_addr);
/* Add the memory */
rc = add_memory(nid, lmb->base_addr, block_sz);
- if (rc) {
- dlpar_release_drc(lmb->drc_index);
+ if (rc)
return rc;
- }
/* Register this block of memory */
rc = memblock_add(lmb->base_addr, block_sz);
if (rc) {
remove_memory(nid, lmb->base_addr, block_sz);
- dlpar_release_drc(lmb->drc_index);
return rc;
}
mem_block = lmb_to_memblock(lmb);
if (!mem_block) {
remove_memory(nid, lmb->base_addr, block_sz);
- dlpar_release_drc(lmb->drc_index);
return -EINVAL;
}
@@ -428,7 +566,6 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb)
put_device(&mem_block->dev);
if (rc) {
remove_memory(nid, lmb->base_addr, block_sz);
- dlpar_release_drc(lmb->drc_index);
return rc;
}
@@ -436,6 +573,34 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb)
return 0;
}
+static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+{
+ int rc;
+
+ if (lmb->flags & DRCONF_MEM_ASSIGNED)
+ return -EINVAL;
+
+ rc = dlpar_acquire_drc(lmb->drc_index);
+ if (rc)
+ return rc;
+
+ rc = dlpar_add_device_tree_lmb(lmb);
+ if (rc) {
+ pr_err("Couldn't update device tree for drc index %x\n",
+ lmb->drc_index);
+ dlpar_release_drc(lmb->drc_index);
+ return rc;
+ }
+
+ rc = dlpar_add_lmb_memory(lmb);
+ if (rc) {
+ dlpar_remove_device_tree_lmb(lmb);
+ dlpar_release_drc(lmb->drc_index);
+ }
+
+ return rc;
+}
+
static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
{
struct of_drconf_cell *lmbs;
@@ -536,31 +701,6 @@ static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
return rc;
}
-static void dlpar_update_drconf_property(struct device_node *dn,
- struct property *prop)
-{
- struct of_drconf_cell *lmbs;
- u32 num_lmbs, *p;
- int i;
-
- /* Convert the property back to BE */
- p = prop->value;
- num_lmbs = *p;
- *p = cpu_to_be32(*p);
- p++;
-
- lmbs = (struct of_drconf_cell *)p;
- for (i = 0; i < num_lmbs; i++) {
- lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
- lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
- lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
- }
-
- rtas_hp_event = true;
- of_update_property(dn, prop);
- rtas_hp_event = false;
-}
-
int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
{
struct device_node *dn;
@@ -608,10 +748,7 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
break;
}
- if (rc)
- dlpar_free_drconf_property(prop);
- else
- dlpar_update_drconf_property(dn, prop);
+ dlpar_free_drconf_property(prop);
dlpar_memory_out:
of_node_put(dn);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index bd98ce2..b7dfc13 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -912,7 +912,8 @@ machine_arch_initcall(pseries, find_existing_ddw_windows);
static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
struct ddw_query_response *query)
{
- struct eeh_dev *edev;
+ struct device_node *dn;
+ struct pci_dn *pdn;
u32 cfg_addr;
u64 buid;
int ret;
@@ -923,11 +924,10 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
* Retrieve them from the pci device, not the node with the
* dma-window property
*/
- edev = pci_dev_to_eeh_dev(dev);
- cfg_addr = edev->config_addr;
- if (edev->pe_config_addr)
- cfg_addr = edev->pe_config_addr;
- buid = edev->phb->buid;
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ buid = pdn->phb->buid;
+ cfg_addr = (pdn->busno << 8) | pdn->devfn;
ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
cfg_addr, BUID_HI(buid), BUID_LO(buid));
@@ -941,7 +941,8 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
struct ddw_create_response *create, int page_shift,
int window_shift)
{
- struct eeh_dev *edev;
+ struct device_node *dn;
+ struct pci_dn *pdn;
u32 cfg_addr;
u64 buid;
int ret;
@@ -952,11 +953,10 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
* Retrieve them from the pci device, not the node with the
* dma-window property
*/
- edev = pci_dev_to_eeh_dev(dev);
- cfg_addr = edev->config_addr;
- if (edev->pe_config_addr)
- cfg_addr = edev->pe_config_addr;
- buid = edev->phb->buid;
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ buid = pdn->phb->buid;
+ cfg_addr = (pdn->busno << 8) | pdn->devfn;
do {
/* extra outputs are LIOBN and dma-addr (hi, lo) */
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 2415a0d..7f6100d 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -89,18 +89,21 @@ void vpa_init(int cpu)
"%lx failed with %ld\n", cpu, hwcpu, addr, ret);
return;
}
+
+#ifdef CONFIG_PPC_STD_MMU_64
/*
* PAPR says this feature is SLB-Buffer but firmware never
* reports that. All SPLPAR support SLB shadow buffer.
*/
- addr = __pa(paca[cpu].slb_shadow_ptr);
- if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ addr = __pa(paca[cpu].slb_shadow_ptr);
ret = register_slb_shadow(hwcpu, addr);
if (ret)
pr_err("WARNING: SLB shadow buffer registration for "
"cpu %d (hw %d) of area %lx failed with %ld\n",
cpu, hwcpu, addr, ret);
}
+#endif /* CONFIG_PPC_STD_MMU_64 */
/*
* Register dispatch trace log, if one has been allocated.
@@ -123,6 +126,8 @@ void vpa_init(int cpu)
}
}
+#ifdef CONFIG_PPC_STD_MMU_64
+
static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long vpn, unsigned long pa,
unsigned long rflags, unsigned long vflags,
@@ -139,7 +144,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
hpte_group, vpn, pa, rflags, vflags, psize);
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+ hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
if (!(vflags & HPTE_V_BOLTED))
pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
@@ -152,10 +157,6 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
/* Exact = 0 */
flags = 0;
- /* Make pHyp happy */
- if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
- hpte_r &= ~HPTE_R_M;
-
if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
flags |= H_COALESCE_CAND;
@@ -659,6 +660,8 @@ static void pSeries_set_page_state(struct page *page, int order,
void arch_free_page(struct page *page, int order)
{
+ if (radix_enabled())
+ return;
if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO))
return;
@@ -666,7 +669,8 @@ void arch_free_page(struct page *page, int order)
}
EXPORT_SYMBOL(arch_free_page);
-#endif
+#endif /* CONFIG_PPC_SMLPAR */
+#endif /* CONFIG_PPC_STD_MMU_64 */
#ifdef CONFIG_TRACEPOINTS
#ifdef HAVE_JUMP_LABEL
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index c9fecf0..afa05a2 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -484,8 +484,9 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
seq_printf(m, "shared_processor_mode=%d\n",
lppaca_shared_proc(get_lppaca()));
+#ifdef CONFIG_PPC_STD_MMU_64
seq_printf(m, "slb_size=%d\n", mmu_slb_size);
-
+#endif
parse_em_data(m);
return 0;
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index ceb18d3..a560a98 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -191,8 +191,8 @@ static int update_dt_node(__be32 phandle, s32 scope)
break;
case 0x80000000:
- prop = of_find_property(dn, prop_name, NULL);
- of_remove_property(dn, prop);
+ of_remove_property(dn, of_find_property(dn,
+ prop_name, NULL));
prop = NULL;
break;
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 272e9ec..543a638 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -305,7 +305,7 @@ static int msi_quota_for_device(struct pci_dev *dev, int request)
memset(&counts, 0, sizeof(struct msi_counts));
/* Work out how many devices we have below this PE */
- traverse_pci_devices(pe_dn, count_non_bridge_devices, &counts);
+ pci_traverse_device_nodes(pe_dn, count_non_bridge_devices, &counts);
if (counts.num_devices == 0) {
pr_err("rtas_msi: found 0 devices under PE for %s\n",
@@ -320,7 +320,7 @@ static int msi_quota_for_device(struct pci_dev *dev, int request)
/* else, we have some more calculating to do */
counts.requestor = pci_device_to_OF_node(dev);
counts.request = request;
- traverse_pci_devices(pe_dn, count_spare_msis, &counts);
+ pci_traverse_device_nodes(pe_dn, count_spare_msis, &counts);
/* If the quota isn't an integer multiple of the total, we can
* use the remainder as spare MSIs for anyone that wants them. */
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 5d4a3df..906dbaa 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -34,38 +34,6 @@
#include "pseries.h"
-static struct pci_bus *
-find_bus_among_children(struct pci_bus *bus,
- struct device_node *dn)
-{
- struct pci_bus *child = NULL;
- struct pci_bus *tmp;
- struct device_node *busdn;
-
- busdn = pci_bus_to_OF_node(bus);
- if (busdn == dn)
- return bus;
-
- list_for_each_entry(tmp, &bus->children, node) {
- child = find_bus_among_children(tmp, dn);
- if (child)
- break;
- };
- return child;
-}
-
-struct pci_bus *
-pcibios_find_pci_bus(struct device_node *dn)
-{
- struct pci_dn *pdn = dn->data;
-
- if (!pdn || !pdn->phb || !pdn->phb->bus)
- return NULL;
-
- return find_bus_among_children(pdn->phb->bus, dn);
-}
-EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
-
struct pci_controller *init_phb_dynamic(struct device_node *dn)
{
struct pci_controller *phb;
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 7c7fcc0..cc66c49 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -303,7 +303,6 @@ static int do_remove_property(char *buf, size_t bufsize)
{
struct device_node *np;
char *tmp;
- struct property *prop;
buf = parse_node(buf, bufsize, &np);
if (!np)
@@ -316,9 +315,7 @@ static int do_remove_property(char *buf, size_t bufsize)
if (strlen(buf) == 0)
return -EINVAL;
- prop = of_find_property(np, buf, NULL);
-
- return of_remove_property(np, prop);
+ return of_remove_property(np, of_find_property(np, buf, NULL));
}
static int do_update_property(char *buf, size_t bufsize)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 6e944fc..9883bc7 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -235,6 +235,8 @@ static void __init pseries_discover_pic(void)
for_each_node_by_name(np, "interrupt-controller") {
typep = of_get_property(np, "compatible", NULL);
+ if (!typep)
+ continue;
if (strstr(typep, "open-pic")) {
pSeries_mpic_node = of_node_get(np);
ppc_md.init_IRQ = pseries_mpic_init_IRQ;
@@ -265,7 +267,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act
pdn = parent ? PCI_DN(parent) : NULL;
if (pdn) {
/* Create pdn and EEH device */
- update_dn_pci_info(np, pdn->phb);
+ pci_add_device_node_info(pdn->phb, np);
eeh_dev_init(PCI_DN(np), pdn->phb);
}
OpenPOWER on IntegriCloud