From 4a4bdfea7cb75b5c8b222932796f64a14027d512 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Wed, 10 Feb 2016 11:10:44 -0600 Subject: powerpc/pseries: Refactor dlpar_add_lmb() code Re-factor dlpar_lmb_add() routine by moving the validation of the lmb flags and the acquireing of the DRC to a wrapper around the work to add the memory to the system. This is done to make handling of errors during the addition of the memory easier and to facilitate the upcoming addition of updating the lmb's affinity prior to adding the memory. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/hotplug-memory.c | 34 +++++++++++++++---------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index e9ff44c..294acfd 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -384,43 +384,32 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop) #endif /* CONFIG_MEMORY_HOTREMOVE */ -static int dlpar_add_lmb(struct of_drconf_cell *lmb) +static int dlpar_add_lmb_memory(struct of_drconf_cell *lmb) { struct memory_block *mem_block; unsigned long block_sz; int nid, rc; - if (lmb->flags & DRCONF_MEM_ASSIGNED) - return -EINVAL; - block_sz = memory_block_size_bytes(); - rc = dlpar_acquire_drc(lmb->drc_index); - if (rc) - return rc; - /* Find the node id for this address */ nid = memory_add_physaddr_to_nid(lmb->base_addr); /* Add the memory */ rc = add_memory(nid, lmb->base_addr, block_sz); - if (rc) { - dlpar_release_drc(lmb->drc_index); + if (rc) return rc; - } /* Register this block of memory */ rc = memblock_add(lmb->base_addr, block_sz); if (rc) { remove_memory(nid, lmb->base_addr, block_sz); - dlpar_release_drc(lmb->drc_index); return rc; } mem_block = lmb_to_memblock(lmb); if (!mem_block) { remove_memory(nid, lmb->base_addr, block_sz); - dlpar_release_drc(lmb->drc_index); return -EINVAL; } @@ -428,7 +417,6 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb) put_device(&mem_block->dev); if (rc) { remove_memory(nid, lmb->base_addr, block_sz); - dlpar_release_drc(lmb->drc_index); return rc; } @@ -436,6 +424,24 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb) return 0; } +static int dlpar_add_lmb(struct of_drconf_cell *lmb) +{ + int rc; + + if (lmb->flags & DRCONF_MEM_ASSIGNED) + return -EINVAL; + + rc = dlpar_acquire_drc(lmb->drc_index); + if (rc) + return rc; + + rc = dlpar_add_lmb_memory(lmb); + if (rc) + dlpar_release_drc(lmb->drc_index); + + return rc; +} + static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop) { struct of_drconf_cell *lmbs; -- cgit v1.1 From bdf5fc6338047cfeba98d2ab629997f3013e610f Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Wed, 10 Feb 2016 11:12:13 -0600 Subject: powerpc/pseries: Update LMB associativity index during DLPAR add/remove The associativity array index specified for a LMB in the device tree, /ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory, needs to be updated prior to DLPAR adding a LMB and after DLPAR removing a LMB. Without doing this step in the DLPAR add process a LMB could be configured with the incorrect affinity. For a LMB that was not present at boot the affinity index is set to 0xffffffff, which defaults to adding the LMB to the first online node since the index is not a valid value. Or, the affinity index could contain a stale value if the LMB was present at boot but later DLPAR removed and is being DLPAR added back to the system. This patch adds a step in the DLPAR add flow to look up the associativity index for a LMB prior to adding a LMB and setting the associativity to 0xffffffff when a LMB is removed. This patch also modifies the DLPAR add/remove flow to no longer do a single update of the device tree property after all of the requested DLPAR operations are complete and now does a property update during the add or remove of each LMB. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/hotplug-memory.c | 193 ++++++++++++++++++++---- 1 file changed, 162 insertions(+), 31 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 294acfd..2ce1385 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -116,6 +116,155 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn) return new_prop; } +static void dlpar_update_drconf_property(struct device_node *dn, + struct property *prop) +{ + struct of_drconf_cell *lmbs; + u32 num_lmbs, *p; + int i; + + /* Convert the property back to BE */ + p = prop->value; + num_lmbs = *p; + *p = cpu_to_be32(*p); + p++; + + lmbs = (struct of_drconf_cell *)p; + for (i = 0; i < num_lmbs; i++) { + lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr); + lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index); + lmbs[i].flags = cpu_to_be32(lmbs[i].flags); + } + + rtas_hp_event = true; + of_update_property(dn, prop); + rtas_hp_event = false; +} + +static int dlpar_update_device_tree_lmb(struct of_drconf_cell *lmb) +{ + struct device_node *dn; + struct property *prop; + struct of_drconf_cell *lmbs; + u32 *p, num_lmbs; + int i; + + dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (!dn) + return -ENODEV; + + prop = dlpar_clone_drconf_property(dn); + if (!prop) { + of_node_put(dn); + return -ENODEV; + } + + p = prop->value; + num_lmbs = *p++; + lmbs = (struct of_drconf_cell *)p; + + for (i = 0; i < num_lmbs; i++) { + if (lmbs[i].drc_index == lmb->drc_index) { + lmbs[i].flags = lmb->flags; + lmbs[i].aa_index = lmb->aa_index; + + dlpar_update_drconf_property(dn, prop); + break; + } + } + + of_node_put(dn); + return 0; +} + +static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb) +{ + struct device_node *parent, *lmb_node, *dr_node; + const u32 *lmb_assoc; + const u32 *assoc_arrays; + u32 aa_index; + int aa_arrays, aa_array_entries, aa_array_sz; + int i; + + parent = of_find_node_by_path("/"); + if (!parent) + return -ENODEV; + + lmb_node = dlpar_configure_connector(cpu_to_be32(lmb->drc_index), + parent); + of_node_put(parent); + if (!lmb_node) + return -EINVAL; + + lmb_assoc = of_get_property(lmb_node, "ibm,associativity", NULL); + if (!lmb_assoc) { + dlpar_free_cc_nodes(lmb_node); + return -ENODEV; + } + + dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (!dr_node) { + dlpar_free_cc_nodes(lmb_node); + return -ENODEV; + } + + assoc_arrays = of_get_property(dr_node, + "ibm,associativity-lookup-arrays", + NULL); + of_node_put(dr_node); + if (!assoc_arrays) { + dlpar_free_cc_nodes(lmb_node); + return -ENODEV; + } + + /* The ibm,associativity-lookup-arrays property is defined to be + * a 32-bit value specifying the number of associativity arrays + * followed by a 32-bitvalue specifying the number of entries per + * array, followed by the associativity arrays. + */ + aa_arrays = be32_to_cpu(assoc_arrays[0]); + aa_array_entries = be32_to_cpu(assoc_arrays[1]); + aa_array_sz = aa_array_entries * sizeof(u32); + + aa_index = -1; + for (i = 0; i < aa_arrays; i++) { + int indx = (i * aa_array_entries) + 2; + + if (memcmp(&assoc_arrays[indx], &lmb_assoc[1], aa_array_sz)) + continue; + + aa_index = i; + break; + } + + dlpar_free_cc_nodes(lmb_node); + return aa_index; +} + +static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb) +{ + int aa_index; + + lmb->flags |= DRCONF_MEM_ASSIGNED; + + aa_index = lookup_lmb_associativity_index(lmb); + if (aa_index < 0) { + pr_err("Couldn't find associativity index for drc index %x\n", + lmb->drc_index); + return aa_index; + } + + lmb->aa_index = aa_index; + return dlpar_update_device_tree_lmb(lmb); +} + +static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb) +{ + lmb->flags &= ~DRCONF_MEM_ASSIGNED; + lmb->aa_index = 0xffffffff; + return dlpar_update_device_tree_lmb(lmb); +} + static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) { unsigned long section_nr; @@ -243,8 +392,8 @@ static int dlpar_remove_lmb(struct of_drconf_cell *lmb) memblock_remove(lmb->base_addr, block_sz); dlpar_release_drc(lmb->drc_index); + dlpar_remove_device_tree_lmb(lmb); - lmb->flags &= ~DRCONF_MEM_ASSIGNED; return 0; } @@ -435,9 +584,19 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb) if (rc) return rc; + rc = dlpar_add_device_tree_lmb(lmb); + if (rc) { + pr_err("Couldn't update device tree for drc index %x\n", + lmb->drc_index); + dlpar_release_drc(lmb->drc_index); + return rc; + } + rc = dlpar_add_lmb_memory(lmb); - if (rc) + if (rc) { + dlpar_remove_device_tree_lmb(lmb); dlpar_release_drc(lmb->drc_index); + } return rc; } @@ -542,31 +701,6 @@ static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop) return rc; } -static void dlpar_update_drconf_property(struct device_node *dn, - struct property *prop) -{ - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i; - - /* Convert the property back to BE */ - p = prop->value; - num_lmbs = *p; - *p = cpu_to_be32(*p); - p++; - - lmbs = (struct of_drconf_cell *)p; - for (i = 0; i < num_lmbs; i++) { - lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr); - lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index); - lmbs[i].flags = cpu_to_be32(lmbs[i].flags); - } - - rtas_hp_event = true; - of_update_property(dn, prop); - rtas_hp_event = false; -} - int dlpar_memory(struct pseries_hp_errorlog *hp_elog) { struct device_node *dn; @@ -614,10 +748,7 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) break; } - if (rc) - dlpar_free_drconf_property(prop); - else - dlpar_update_drconf_property(dn, prop); + dlpar_free_drconf_property(prop); dlpar_memory_out: of_node_put(dn); -- cgit v1.1 From f8a25db47ebc11fe228735d916e480d4b2ebe611 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Tue, 15 Mar 2016 21:14:12 +1100 Subject: powerpc/powernv: Use the "unknown" checkstop type as a fallback The HMI code knows about three types of errors: CORE, NX and UNKNOWN. If OPAL were to add a new type, it would not be handled at all since there is no fallback case. Instead of explicitly checking for UNKNOWN, treat any checkstop type without a handler as unknown. Signed-off-by: Russell Currey Reviewed-by: Daniel Axtens Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-hmi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index d000f4e..c0a8201 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -150,15 +150,17 @@ static void print_nx_checkstop_reason(const char *level, static void print_checkstop_reason(const char *level, struct OpalHMIEvent *hmi_evt) { - switch (hmi_evt->u.xstop_error.xstop_type) { + uint8_t type = hmi_evt->u.xstop_error.xstop_type; + switch (type) { case CHECKSTOP_TYPE_CORE: print_core_checkstop_reason(level, hmi_evt); break; case CHECKSTOP_TYPE_NX: print_nx_checkstop_reason(level, hmi_evt); break; - case CHECKSTOP_TYPE_UNKNOWN: - printk("%s Unknown Malfunction Alert.\n", level); + default: + printk("%s Unknown Malfunction Alert of type %d\n", + level, type); break; } } -- cgit v1.1 From 8038665fb54f1e54785c63be111647abebfe9f20 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 27 Mar 2016 18:08:15 -0400 Subject: powerpc/cell: Make spu_base.c explicitly non-modular The Kconfig currently controlling compilation of this code is: arch/powerpc/platforms/cell/Kconfig:config SPU_BASE arch/powerpc/platforms/cell/Kconfig: bool ...meaning that it currently is not being built as a module by anyone. Lets remove the modular code that is essentially orphaned, so that when reading the driver there is no doubt it is builtin-only. Since module_init translates to device_initcall in the non-modular case, the init ordering remains unchanged with this commit. We also delete the MODULE_LICENSE tag etc. since all that information is already contained at the top of the file in the comments. Cc: Arnd Bergmann Signed-off-by: Paul Gortmaker Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/cell/spu_base.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index f7af74f..3ede04f 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include @@ -805,7 +805,4 @@ static int __init init_spu_base(void) out: return ret; } -module_init(init_spu_base); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arnd Bergmann "); +device_initcall(init_spu_base); -- cgit v1.1 From c7d54842deb1fa357cff75b988275a1c9f259140 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:30 +1000 Subject: powerpc/mm: Use _PAGE_READ to indicate Read access This splits the _PAGE_RW bit into _PAGE_READ and _PAGE_WRITE. It also removes the dependency on _PAGE_USER for implying read only. Few things to note here is that, we have read implied with write and execute permission. Hence we should always find _PAGE_READ set on hash pte fault. We still can't switch PROT_NONE to !(_PAGE_RWX). Auto numa depends on marking a prot none pte _PAGE_WRITE. (For more details look at b191f9b106ea "mm: numa: preserve PTE write permissions across a NUMA hinting fault") Cc: Arnd Bergmann Cc: Jeremy Kerr Cc: Frederic Barrat Acked-by: Ian Munsie Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/cell/spu_base.c | 2 +- arch/powerpc/platforms/cell/spufs/fault.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 3ede04f..3cbe38f 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -197,7 +197,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) (REGION_ID(ea) != USER_REGION_ID)) { spin_unlock(&spu->register_lock); - ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr); + ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr); spin_lock(&spu->register_lock); if (!ret) { diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index d98f845..c3a3bf1 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -141,8 +141,8 @@ int spufs_handle_class1(struct spu_context *ctx) /* we must not hold the lock when entering copro_handle_mm_fault */ spu_release(ctx); - access = (_PAGE_PRESENT | _PAGE_USER); - access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; + access = (_PAGE_PRESENT | _PAGE_READ | _PAGE_USER); + access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL; local_irq_save(flags); ret = hash_page(ea, access, 0x300, dsisr); local_irq_restore(flags); -- cgit v1.1 From ac29c64089b74d107edb90879e63a2f7a03cd66b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:34 +1000 Subject: powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED _PAGE_PRIVILEGED means the page can be accessed only by the kernel. This is done to keep pte bits similar to PowerISA 3.0 Radix PTE format. User pages are now marked by clearing _PAGE_PRIVILEGED bit. Previously we allowed the kernel to have a privileged page in the lower address range (USER_REGION). With this patch such access is denied. We also prevent a kernel access to a non-privileged page in higher address range (ie, REGION_ID != 0). Both the above access scenarios should never happen. Cc: Arnd Bergmann Cc: Jeremy Kerr Cc: Frederic Barrat Acked-by: Ian Munsie Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/cell/spufs/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index c3a3bf1..e29e4d5 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -141,7 +141,7 @@ int spufs_handle_class1(struct spu_context *ctx) /* we must not hold the lock when entering copro_handle_mm_fault */ spu_release(ctx); - access = (_PAGE_PRESENT | _PAGE_READ | _PAGE_USER); + access = (_PAGE_PRESENT | _PAGE_READ); access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL; local_irq_save(flags); ret = hash_page(ea, access, 0x300, dsisr); -- cgit v1.1 From e58e87adc8bf92e9c6abb1dc1f4af2500aa07ff3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:36 +1000 Subject: powerpc/mm: Update _PAGE_KERNEL_RO PS3 had used a PPP bit hack to implement a read only mapping in the kernel area. Since we are bolting the ioremap area, it used the pte flags _PAGE_PRESENT | _PAGE_USER to get a PPP value of 0x3 there by resulting in a read only mapping. This means the area can be accessed by user space, but kernel will never return such an address to user space. But we can do better by implementing a read only kernel mapping using PPP bits 0b110. This also allows us to do read only kernel mapping for radix in later patches. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/ps3/spu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index a0bca05..5e8a40f 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -205,7 +205,7 @@ static void spu_unmap(struct spu *spu) static int __init setup_areas(struct spu *spu) { struct table {char* name; unsigned long addr; unsigned long size;}; - static const unsigned long shadow_flags = _PAGE_NO_CACHE | 3; + unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO)); spu_pdata(spu)->shadow = __ioremap(spu_pdata(spu)->shadow_addr, sizeof(struct spe_shadow), -- cgit v1.1 From 72176dd0ad36c6d8e13515d085f7a229a55a2985 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:37 +1000 Subject: powerpc/mm: Use a helper for finding pte bits mapping I/O area Use a helper instead of open coding with constants. A later patch will drop the WIMG bits and use PowerISA 3.0 defines. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/ps3/spu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index 5e8a40f..492b257 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -216,7 +216,7 @@ static int __init setup_areas(struct spu *spu) } spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys, - LS_SIZE, _PAGE_NO_CACHE); + LS_SIZE, pgprot_val(pgprot_noncached_wc(__pgprot(0)))); if (!spu->local_store) { pr_debug("%s:%d: ioremap local_store failed\n", -- cgit v1.1 From 30bda41aba4efb2370c97e2cbe7385de93ccc372 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:38 +1000 Subject: powerpc/mm: Drop WIMG in favour of new constants PowerISA 3.0 introduces two pte bits with the below meaning for radix: 00 -> Normal Memory 01 -> Strong Access Order (SAO) 10 -> Non idempotent I/O (Cache inhibited and guarded) 11 -> Tolerant I/O (Cache inhibited) We drop the existing WIMG bits in the Linux page table in favour of the above constants. We loose _PAGE_WRITETHRU with this conversion. We only use writethru via pgprot_cached_wthru() which is used by fbdev/controlfb.c which is Apple control display and also PPC32. With respect to _PAGE_COHERENCE, we have been marking hpte always coherent for some time now. htab_convert_pte_flags() always added HPTE_R_M. NOTE: KVM changes need closer review. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/lpar.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 2415a0d..0d46089 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -152,10 +152,6 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, /* Exact = 0 */ flags = 0; - /* Make pHyp happy */ - if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU)) - hpte_r &= ~HPTE_R_M; - if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) flags |= H_COALESCE_CAND; -- cgit v1.1 From 50de596de8be6de75401a2190b90a822e8a1bab2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:43 +1000 Subject: powerpc/mm/hash: Add support for Power9 Hash PowerISA 3.0 adds a parition table indexed by LPID. Parition table allows us to specify the MMU model that will be used for guest and host translation. This patch adds support with SLB based hash model (UPRT = 0). What is required with this model is to support the new hash page table entry format and also setup partition table such that we use hash table for address translation. We don't have segment table support yet. In order to make sure we don't load KVM module on Power9 (since we don't have kvm support yet) this patch also disables KVM on Power9. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/ps3/htab.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 2f95d33..c9a3e67 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn, vflags &= ~HPTE_V_SECONDARY; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags; + hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags; spin_lock_irqsave(&ps3_htab_lock, flags); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0d46089..e4ceba2 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -139,7 +139,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, hpte_group, vpn, pa, rflags, vflags, psize); hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; if (!(vflags & HPTE_V_BOLTED)) pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); -- cgit v1.1 From 566ca99af026d43c7cf7ab72f78463dbcc7e6ac2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:53 +1000 Subject: powerpc/mm/radix: Add dummy radix_enabled() In this patch we add the radix Kconfig and conditional check. radix_enabled() is written to always return 0 here. Once we have all needed radix changes added, we will update this to an mmu_feature check. We need to add this early so that we can get it all build in the early stage. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/Kconfig.cputype | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 142dff5..8d2cfb8 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -331,6 +331,15 @@ config PPC_STD_MMU_64 def_bool y depends on PPC_STD_MMU && PPC64 +config PPC_RADIX_MMU + bool "Radix MMU Support" + depends on PPC_BOOK3S_64 + default y + help + Enable support for the Power ISA 3.0 Radix style MMU. Currently this + is only implemented by IBM Power9 CPUs, if you don't have one of them + you can probably disable this. + config PPC_MMU_NOHASH def_bool y depends on !PPC_STD_MMU -- cgit v1.1 From 2bfd65e45e877fb5704730244da67c748d28a1b8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:58 +1000 Subject: powerpc/mm/radix: Add radix callbacks for early init routines This adds routines for early setup for radix. We use device tree property "ibm,processor-radix-AP-encodings" to find supported page sizes. If we don't find the above we consider 64K and 4K as supported page sizes. We do map vmemap using 2M page size if we can. The linear mapping is done such that we use required page size for that range. For example memory of 3.5G is mapped such that we use 1G mapping till 3G range and use 2M mapping for the rest. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/setup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 1acb0c7..ee6430b 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -273,7 +273,10 @@ static int __init pnv_probe(void) if (!of_flat_dt_is_compatible(root, "ibm,powernv")) return 0; - hpte_init_native(); + if (IS_ENABLED(CONFIG_PPC_RADIX_MMU) && radix_enabled()) + radix_init_native(); + else if (IS_ENABLED(CONFIG_PPC_STD_MMU_64)) + hpte_init_native(); if (firmware_has_feature(FW_FEATURE_OPAL)) pnv_setup_machdep_opal(); -- cgit v1.1 From d8c476eeb697c9aac46b390e9cbd7af7032814f1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:26:08 +1000 Subject: powerpc/mm/radix: Isolate hash table function from pseries guest code Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/lpar.c | 14 +++++++++++--- arch/powerpc/platforms/pseries/lparcfg.c | 3 ++- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index e4ceba2..7f6100d 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -89,18 +89,21 @@ void vpa_init(int cpu) "%lx failed with %ld\n", cpu, hwcpu, addr, ret); return; } + +#ifdef CONFIG_PPC_STD_MMU_64 /* * PAPR says this feature is SLB-Buffer but firmware never * reports that. All SPLPAR support SLB shadow buffer. */ - addr = __pa(paca[cpu].slb_shadow_ptr); - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) { + addr = __pa(paca[cpu].slb_shadow_ptr); ret = register_slb_shadow(hwcpu, addr); if (ret) pr_err("WARNING: SLB shadow buffer registration for " "cpu %d (hw %d) of area %lx failed with %ld\n", cpu, hwcpu, addr, ret); } +#endif /* CONFIG_PPC_STD_MMU_64 */ /* * Register dispatch trace log, if one has been allocated. @@ -123,6 +126,8 @@ void vpa_init(int cpu) } } +#ifdef CONFIG_PPC_STD_MMU_64 + static long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long vpn, unsigned long pa, unsigned long rflags, unsigned long vflags, @@ -655,6 +660,8 @@ static void pSeries_set_page_state(struct page *page, int order, void arch_free_page(struct page *page, int order) { + if (radix_enabled()) + return; if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO)) return; @@ -662,7 +669,8 @@ void arch_free_page(struct page *page, int order) } EXPORT_SYMBOL(arch_free_page); -#endif +#endif /* CONFIG_PPC_SMLPAR */ +#endif /* CONFIG_PPC_STD_MMU_64 */ #ifdef CONFIG_TRACEPOINTS #ifdef HAVE_JUMP_LABEL diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index c9fecf0..afa05a2 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -484,8 +484,9 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "shared_processor_mode=%d\n", lppaca_shared_proc(get_lppaca())); +#ifdef CONFIG_PPC_STD_MMU_64 seq_printf(m, "slb_size=%d\n", mmu_slb_size); - +#endif parse_em_data(m); return 0; -- cgit v1.1 From ab62476240d59a9d7d78ca5ff32499dd50db2069 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:26:31 +1000 Subject: powerpc/mm/radix: Add THP support for 4K linux page size This adds THP support for 4K Linux page size config with radix. We still don't do THP with 4K Linux page size and hash page table. Hash page table needs a 16MB hugepage and we can't do THP with 16MM hugepage and 4K Linux page size. We add missing functions to 4K hash config to get it to build and hash__has_transparent_hugepage() makes sure we don't enable THP for 4K hash config. To catch wrong usage of THP related with 4K config, we add BUG() in those dummy functions we added to get it compile. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/Kconfig.cputype | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 8d2cfb8..77e9b8d 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -72,7 +72,7 @@ config PPC_BOOK3S_64 select PPC_FPU select PPC_HAVE_PMU_SUPPORT select SYS_SUPPORTS_HUGETLBFS - select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES + select HAVE_ARCH_TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK -- cgit v1.1 From 9e44754755528f78dbc3a5e64cd48dadf4773777 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 2 May 2016 17:06:12 +1000 Subject: powerpc/powernv/pci: Fix cfg_dbg() & replace with pr_devel() When cfg_dbg() is enabled (i.e. mapped to printk()), gcc produces errors as the __func__ parameter is missing (pnv_pci_cfg_read() has one); this adds the missing parameter. cfg_dbg() is just an inferior version of pr_devel() so use the latter instead. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 73c8dc2..0db20ae 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -39,9 +39,6 @@ /* Delay in usec */ #define PCI_RESET_DELAY_US 3000000 -#define cfg_dbg(fmt...) do { } while(0) -//#define cfg_dbg(fmt...) printk(fmt) - #ifdef CONFIG_PCI_MSI int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { @@ -402,8 +399,8 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn) } } - cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", - (pdn->busno << 8) | (pdn->devfn), pe_no, fstate); + pr_devel(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", + (pdn->busno << 8) | (pdn->devfn), pe_no, fstate); /* Clear the frozen state if applicable */ if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE || @@ -451,8 +448,8 @@ int pnv_pci_cfg_read(struct pci_dn *pdn, return PCIBIOS_FUNC_NOT_SUPPORTED; } - cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", - __func__, pdn->busno, pdn->devfn, where, size, *val); + pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n", + __func__, pdn->busno, pdn->devfn, where, size, *val); return PCIBIOS_SUCCESSFUL; } @@ -462,8 +459,8 @@ int pnv_pci_cfg_write(struct pci_dn *pdn, struct pnv_phb *phb = pdn->phb->private_data; u32 bdfn = (pdn->busno << 8) | pdn->devfn; - cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", - pdn->busno, pdn->devfn, where, size, val); + pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n", + __func__, pdn->busno, pdn->devfn, where, size, val); switch (size) { case 1: opal_pci_config_write_byte(phb->opal_id, bdfn, where, val); -- cgit v1.1 From b2ed059642875a699a195cb136b2e57e7e07cedf Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Thu, 28 Apr 2016 15:53:45 +1000 Subject: powerpc/pseries: Add null property check to pseries_discover_pic() The return value of of_get_property() isn't checked before it is passed to the strstr() function, if it happens that the return value is null then this will result in a null pointer being dereferenced. Add a check to see if the return value of of_get_property() is null and if it is continue straight on to the next node. Signed-off-by: Suraj Jitindar Singh Reviewed-by: Chris Smart Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 6e944fc..fa73494 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -235,6 +235,8 @@ static void __init pseries_discover_pic(void) for_each_node_by_name(np, "interrupt-controller") { typep = of_get_property(np, "compatible", NULL); + if (!typep) + continue; if (strstr(typep, "open-pic")) { pSeries_mpic_node = of_node_get(np); ppc_md.init_IRQ = pseries_mpic_init_IRQ; -- cgit v1.1 From 925e2d1ded80fcc92210c9a9e5009887553dd83f Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Thu, 28 Apr 2016 15:34:55 +1000 Subject: powerpc: Update of_remove_property() call sites to remove null checking After obtaining a property from of_find_property() and before calling of_remove_property() most code checks to ensure that the property returned from of_find_property() is not null. The previous patch moved this check to the start of the function of_remove_property() in order to avoid the case where this check isn't done and a null value is passed. This ensures the check is always conducted before taking locks and attempting to remove the property. Thus it is no longer necessary to perform a check for null values before invoking of_remove_property(). Update of_remove_property() call sites in order to remove redundant checking for null property value as check is now performed within the of_remove_property function(). Signed-off-by: Suraj Jitindar Singh [mpe: Unbreak some lines which are just >80 chars for readability] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/mobility.c | 4 ++-- arch/powerpc/platforms/pseries/reconfig.c | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index ceb18d3..a560a98 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -191,8 +191,8 @@ static int update_dt_node(__be32 phandle, s32 scope) break; case 0x80000000: - prop = of_find_property(dn, prop_name, NULL); - of_remove_property(dn, prop); + of_remove_property(dn, of_find_property(dn, + prop_name, NULL)); prop = NULL; break; diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 7c7fcc0..cc66c49 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -303,7 +303,6 @@ static int do_remove_property(char *buf, size_t bufsize) { struct device_node *np; char *tmp; - struct property *prop; buf = parse_node(buf, bufsize, &np); if (!np) @@ -316,9 +315,7 @@ static int do_remove_property(char *buf, size_t bufsize) if (strlen(buf) == 0) return -EINVAL; - prop = of_find_property(np, buf, NULL); - - return of_remove_property(np, prop); + return of_remove_property(np, of_find_property(np, buf, NULL)); } static int do_update_property(char *buf, size_t bufsize) -- cgit v1.1 From cb4224c501802940c41e2f46ae57de08fd0ca119 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:21 +1000 Subject: powerpc/powernv: Cleanup on pci_controller_ops instances This cleans up on below data struct instances to use tab instead of space indent of statement to avoid complains from scripts/checkpatch.pl. No logical changes introduced. @pnv_pci_ioda_controller_ops @pnv_npu_ioda_controller_ops Signed-off-by: Gavin Shan Reviewed-by: Daniel Axtens Reviewed-by: Andrew Donnellan Acked-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c5baaf3..524c9c7 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3210,31 +3210,31 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose) } static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, - .dma_bus_setup = pnv_pci_dma_bus_setup, + .dma_dev_setup = pnv_pci_dma_dev_setup, + .dma_bus_setup = pnv_pci_dma_bus_setup, #ifdef CONFIG_PCI_MSI - .setup_msi_irqs = pnv_setup_msi_irqs, - .teardown_msi_irqs = pnv_teardown_msi_irqs, + .setup_msi_irqs = pnv_setup_msi_irqs, + .teardown_msi_irqs = pnv_teardown_msi_irqs, #endif - .enable_device_hook = pnv_pci_enable_device_hook, - .window_alignment = pnv_pci_window_alignment, - .reset_secondary_bus = pnv_pci_reset_secondary_bus, - .dma_set_mask = pnv_pci_ioda_dma_set_mask, - .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, - .shutdown = pnv_pci_ioda_shutdown, + .enable_device_hook = pnv_pci_enable_device_hook, + .window_alignment = pnv_pci_window_alignment, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .dma_set_mask = pnv_pci_ioda_dma_set_mask, + .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, + .shutdown = pnv_pci_ioda_shutdown, }; static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, + .dma_dev_setup = pnv_pci_dma_dev_setup, #ifdef CONFIG_PCI_MSI - .setup_msi_irqs = pnv_setup_msi_irqs, - .teardown_msi_irqs = pnv_teardown_msi_irqs, + .setup_msi_irqs = pnv_setup_msi_irqs, + .teardown_msi_irqs = pnv_teardown_msi_irqs, #endif - .enable_device_hook = pnv_pci_enable_device_hook, - .window_alignment = pnv_pci_window_alignment, - .reset_secondary_bus = pnv_pci_reset_secondary_bus, - .dma_set_mask = pnv_npu_dma_set_mask, - .shutdown = pnv_pci_ioda_shutdown, + .enable_device_hook = pnv_pci_enable_device_hook, + .window_alignment = pnv_pci_window_alignment, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .dma_set_mask = pnv_npu_dma_set_mask, + .shutdown = pnv_pci_ioda_shutdown, }; static void __init pnv_pci_init_ioda_phb(struct device_node *np, -- cgit v1.1 From 475d92c27f3b32ea31f5f7c4ed7009a7b895b9e7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:22 +1000 Subject: powerpc/powernv: Drop phb->bdfn_to_pe() The last usage of pnv_phb::bdfn_to_pe() was removed in ff57b454ddb9 ("powerpc/eeh: Do probe on pci_dn"), so drop it. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 9 --------- arch/powerpc/platforms/powernv/pci.h | 1 - 2 files changed, 10 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 524c9c7..10ecd97 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3195,12 +3195,6 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) return true; } -static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, - u32 devfn) -{ - return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; -} - static void pnv_pci_ioda_shutdown(struct pci_controller *hose) { struct pnv_phb *phb = hose->private_data; @@ -3377,9 +3371,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->freeze_pe = pnv_ioda_freeze_pe; phb->unfreeze_pe = pnv_ioda_unfreeze_pe; - /* Setup RID -> PE mapping function */ - phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; - /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 3f814f3..78f035e 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -110,7 +110,6 @@ struct pnv_phb { unsigned int is_64, struct msi_msg *msg); void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); - u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); int (*init_m64)(struct pnv_phb *phb); void (*reserve_m64_pe)(struct pci_bus *bus, unsigned long *pe_bitmap, bool all); -- cgit v1.1 From 13ce7598b6c0d9b3734fb070242de21b05aedd0d Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:23 +1000 Subject: powerpc/powernv: Reorder fields in struct pnv_phb This moves those fields in struct pnv_phb that are related to PE allocation around. No logical change. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 78f035e..f2a1452 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -140,15 +140,14 @@ struct pnv_phb { unsigned int io_segsize; unsigned int io_pci_base; - /* PE allocation bitmap */ - unsigned long *pe_alloc; - /* PE allocation mutex */ + /* PE allocation */ struct mutex pe_alloc_mutex; + unsigned long *pe_alloc; + struct pnv_ioda_pe *pe_array; /* M32 & IO segment maps */ unsigned int *m32_segmap; unsigned int *io_segmap; - struct pnv_ioda_pe *pe_array; /* IRQ chip */ int irq_chip_init; -- cgit v1.1 From 92b8f137b3620b0a07c71b1bd191f9a22d0cf0ab Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:24 +1000 Subject: powerpc/powernv: Rename PE# fields in struct pnv_phb This renames the fields related to PE number in "struct pnv_phb" for better reflecting of their usages as Alexey suggested. No logical changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 58 ++++++++++++++-------------- arch/powerpc/platforms/powernv/pci.c | 2 +- arch/powerpc/platforms/powernv/pci.h | 4 +- 4 files changed, 33 insertions(+), 33 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 950b3e5..69e41ce 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -75,7 +75,7 @@ static int pnv_eeh_init(void) * and P7IOC separately. So we should regard * PE#0 as valid for PHB3 and P7IOC. */ - if (phb->ioda.reserved_pe != 0) + if (phb->ioda.reserved_pe_idx != 0) eeh_add_flag(EEH_VALID_PE_ZERO); break; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 10ecd97..1d2514f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -124,7 +124,7 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) { - if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) { + if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) { pr_warn("%s: Invalid PE %d on PHB#%x\n", __func__, pe_no, phb->hose->global_number); return; @@ -144,8 +144,8 @@ static int pnv_ioda_alloc_pe(struct pnv_phb *phb) do { pe = find_next_zero_bit(phb->ioda.pe_alloc, - phb->ioda.total_pe, 0); - if (pe >= phb->ioda.total_pe) + phb->ioda.total_pe_num, 0); + if (pe >= phb->ioda.total_pe_num) return IODA_INVALID_PE; } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); @@ -199,13 +199,13 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb) * expected to be 0 or last one of PE capabicity. */ r = &phb->hose->mem_resources[1]; - if (phb->ioda.reserved_pe == 0) + if (phb->ioda.reserved_pe_idx == 0) r->start += phb->ioda.m64_segsize; - else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1)) + else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) r->end -= phb->ioda.m64_segsize; else pr_warn(" Cannot strip M64 segment for reserved PE#%d\n", - phb->ioda.reserved_pe); + phb->ioda.reserved_pe_idx); return 0; @@ -274,7 +274,7 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) return IODA_INVALID_PE; /* Allocate bitmap */ - size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); + size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); pe_alloc = kzalloc(size, GFP_KERNEL); if (!pe_alloc) { pr_warn("%s: Out of memory !\n", @@ -290,7 +290,7 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) * contributed by its child buses. For the case, we needn't * pick M64 dependent PE#. */ - if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) { + if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) { kfree(pe_alloc); return IODA_INVALID_PE; } @@ -301,8 +301,8 @@ static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) */ master_pe = NULL; i = -1; - while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < - phb->ioda.total_pe) { + while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe_num, i + 1)) < + phb->ioda.total_pe_num) { pe = &phb->ioda.pe_array[i]; if (!master_pe) { @@ -355,7 +355,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) hose->mem_offset[1] = res->start - pci_addr; phb->ioda.m64_size = resource_size(res); - phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe; + phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe_num; phb->ioda.m64_base = pci_addr; pr_info(" MEM64 0x%016llx..0x%016llx -> 0x%016llx\n", @@ -456,7 +456,7 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) s64 rc; /* Sanity check on PE number */ - if (pe_no < 0 || pe_no >= phb->ioda.total_pe) + if (pe_no < 0 || pe_no >= phb->ioda.total_pe_num) return OPAL_EEH_STOPPED_PERM_UNAVAIL; /* @@ -1088,7 +1088,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) * same GPU get assigned the same PE. */ gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev); - for (pe_num = 0; pe_num < phb->ioda.total_pe; pe_num++) { + for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) { pe = &phb->ioda.pe_array[pe_num]; if (!pe->pdev) continue; @@ -1537,9 +1537,9 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) } else { mutex_lock(&phb->ioda.pe_alloc_mutex); *pdn->pe_num_map = bitmap_find_next_zero_area( - phb->ioda.pe_alloc, phb->ioda.total_pe, + phb->ioda.pe_alloc, phb->ioda.total_pe_num, 0, num_vfs, 0); - if (*pdn->pe_num_map >= phb->ioda.total_pe) { + if (*pdn->pe_num_map >= phb->ioda.total_pe_num) { mutex_unlock(&phb->ioda.pe_alloc_mutex); dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); kfree(pdn->pe_num_map); @@ -2858,7 +2858,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) pdn->m64_single_mode = false; total_vfs = pci_sriov_get_totalvfs(pdev); - mul = phb->ioda.total_pe; + mul = phb->ioda.total_pe_num; total_vf_bar_sz = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { @@ -2960,7 +2960,7 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, region.end = res->end - phb->ioda.io_pci_base; index = region.start / phb->ioda.io_segsize; - while (index < phb->ioda.total_pe && + while (index < phb->ioda.total_pe_num && region.start <= region.end) { phb->ioda.io_segmap[index] = pe->pe_number; rc = opal_pci_map_pe_mmio_window(phb->opal_id, @@ -2985,7 +2985,7 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, phb->ioda.m32_pci_base; index = region.start / phb->ioda.m32_segsize; - while (index < phb->ioda.total_pe && + while (index < phb->ioda.total_pe_num && region.start <= region.end) { phb->ioda.m32_segmap[index] = pe->pe_number; rc = opal_pci_map_pe_mmio_window(phb->opal_id, @@ -3300,13 +3300,13 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, pr_err(" Failed to map registers !\n"); /* Initialize more IODA stuff */ - phb->ioda.total_pe = 1; + phb->ioda.total_pe_num = 1; prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); if (prop32) - phb->ioda.total_pe = be32_to_cpup(prop32); + phb->ioda.total_pe_num = be32_to_cpup(prop32); prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); if (prop32) - phb->ioda.reserved_pe = be32_to_cpup(prop32); + phb->ioda.reserved_pe_idx = be32_to_cpup(prop32); /* Parse 64-bit MMIO range */ pnv_ioda_parse_m64_window(phb); @@ -3315,29 +3315,29 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* FW Has already off top 64k of M32 space (MSI space) */ phb->ioda.m32_size += 0x10000; - phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; + phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe_num; phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0]; phb->ioda.io_size = hose->pci_io_size; - phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; + phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num; phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ - size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); + size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); m32map_off = size; - size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); + size += phb->ioda.total_pe_num * sizeof(phb->ioda.m32_segmap[0]); if (phb->type == PNV_PHB_IODA1) { iomap_off = size; - size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); + size += phb->ioda.total_pe_num * sizeof(phb->ioda.io_segmap[0]); } pemap_off = size; - size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); + size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); aux = memblock_virt_alloc(size, 0); phb->ioda.pe_alloc = aux; phb->ioda.m32_segmap = aux + m32map_off; if (phb->type == PNV_PHB_IODA1) phb->ioda.io_segmap = aux + iomap_off; phb->ioda.pe_array = aux + pemap_off; - set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc); + set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc); INIT_LIST_HEAD(&phb->ioda.pe_dma_list); INIT_LIST_HEAD(&phb->ioda.pe_list); @@ -3356,7 +3356,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, #endif pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n", - phb->ioda.total_pe, phb->ioda.reserved_pe, + phb->ioda.total_pe_num, phb->ioda.reserved_pe_idx, phb->ioda.m32_size, phb->ioda.m32_segsize); if (phb->ioda.m64_size) pr_info(" M64: 0x%lx [segment=0x%lx]\n", diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 0db20ae..88255eb 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -377,7 +377,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn) */ pe_no = pdn->pe_number; if (pe_no == IODA_INVALID_PE) { - pe_no = phb->ioda.reserved_pe; + pe_no = phb->ioda.reserved_pe_idx; } /* diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index f2a1452..784882a 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -120,8 +120,8 @@ struct pnv_phb { struct { /* Global bridge info */ - unsigned int total_pe; - unsigned int reserved_pe; + unsigned int total_pe_num; + unsigned int reserved_pe_idx; /* 32-bit MMIO window */ unsigned int m32_size; -- cgit v1.1 From 689ee8c95f399998f52eda827849b7771629a53c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:25 +1000 Subject: powerpc/powernv: Data type unsigned int for PE number This changes the data type of PE number from "int" to "unsigned int" in order to match the fact PE number is never negative: * The number of PE to which the specified PCI device is attached. * The PE number map for SRIOV VFs. * The returned PE number from pnv_ioda_alloc_pe(). * The returned PE number from pnv_ioda2_pick_m64_pe(). Suggested-by: Alexey Kardashevskiy Signed-off-by: Gavin Shan Reviewed-By: Alistair Popple Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 8 ++++---- arch/powerpc/platforms/powernv/pci.c | 2 +- arch/powerpc/platforms/powernv/pci.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 1d2514f..4aa6cdf 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -138,7 +138,7 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) phb->ioda.pe_array[pe_no].pe_number = pe_no; } -static int pnv_ioda_alloc_pe(struct pnv_phb *phb) +static unsigned int pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -261,7 +261,7 @@ static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus, } } -static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) +static unsigned int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; @@ -919,7 +919,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(dev); struct pnv_ioda_pe *pe; - int pe_num; + unsigned int pe_num; if (!pdn) { pr_err("%s: Device tree node not associated properly\n", @@ -1010,7 +1010,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; struct pnv_ioda_pe *pe; - int pe_num = IODA_INVALID_PE; + unsigned int pe_num = IODA_INVALID_PE; /* Check if PE is determined by M64 */ if (phb->pick_m64_pe) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 88255eb..1d92bd9 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -367,7 +367,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn) struct pnv_phb *phb = pdn->phb->private_data; u8 fstate; __be16 pcierr; - int pe_no; + unsigned int pe_no; s64 rc; /* diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 784882a..66f2569 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -113,7 +113,7 @@ struct pnv_phb { int (*init_m64)(struct pnv_phb *phb); void (*reserve_m64_pe)(struct pci_bus *bus, unsigned long *pe_bitmap, bool all); - int (*pick_m64_pe)(struct pci_bus *bus, bool all); + unsigned int (*pick_m64_pe)(struct pci_bus *bus, bool all); int (*get_pe_state)(struct pnv_phb *phb, int pe_no); void (*freeze_pe)(struct pnv_phb *phb, int pe_no); int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); -- cgit v1.1 From 3fa23ff8ff3f0fa4f187bc46f81c69b1fdeecd9a Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:26 +1000 Subject: powerpc/powernv: Fix initial IO and M32 segmap There are two arrays for IO and M32 segment maps on every PHB. The index of the arrays are segment number and the value stored in the corresponding element is PE number, indicating the segment is assigned to the PE. Initially, all elements in those two arrays are zeroes, meaning all segments are assigned to PE#0. It's wrong. This fixes the initial values in the elements of those two arrays to IODA_INVALID_PE, meaning all segments aren't assigned to any PE. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 4aa6cdf..59b20e5 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3240,6 +3240,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, const __be64 *prop64; const __be32 *prop32; int len; + unsigned int segno; u64 phb_id; void *aux; long rc; @@ -3334,8 +3335,13 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, aux = memblock_virt_alloc(size, 0); phb->ioda.pe_alloc = aux; phb->ioda.m32_segmap = aux + m32map_off; - if (phb->type == PNV_PHB_IODA1) + for (segno = 0; segno < phb->ioda.total_pe_num; segno++) + phb->ioda.m32_segmap[segno] = IODA_INVALID_PE; + if (phb->type == PNV_PHB_IODA1) { phb->ioda.io_segmap = aux + iomap_off; + for (segno = 0; segno < phb->ioda.total_pe_num; segno++) + phb->ioda.io_segmap[segno] = IODA_INVALID_PE; + } phb->ioda.pe_array = aux + pemap_off; set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc); -- cgit v1.1 From 23e79425fe7caaa93ddcee18ac4c57069f0fb4b6 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:27 +1000 Subject: powerpc/powernv: Simplify pnv_ioda_setup_pe_seg() pnv_ioda_setup_pe_seg() associates the IO and M32 segments with the owner PE. The code mapping segments should be fixed and immune from logic changes introduced to pnv_ioda_setup_pe_seg(). This moves the code mapping segments to helper pnv_ioda_setup_pe_res(). The data type for @rc is changed to "int64_t". Also, argument @hose is removed from pnv_ioda_setup_pe() as it can be got from @pe. No functional changes introduced. Signed-off-by: Gavin Shan Reviewed-By: Alistair Popple Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 121 +++++++++++++++--------------- 1 file changed, 62 insertions(+), 59 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 59b20e5..b954fbc 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2929,19 +2929,72 @@ truncate_iov: } #endif /* CONFIG_PCI_IOV */ +static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, + struct resource *res) +{ + struct pnv_phb *phb = pe->phb; + struct pci_bus_region region; + int index; + int64_t rc; + + if (!res || !res->flags || res->start > res->end) + return; + + if (res->flags & IORESOURCE_IO) { + region.start = res->start - phb->ioda.io_pci_base; + region.end = res->end - phb->ioda.io_pci_base; + index = region.start / phb->ioda.io_segsize; + + while (index < phb->ioda.total_pe_num && + region.start <= region.end) { + phb->ioda.io_segmap[index] = pe->pe_number; + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); + if (rc != OPAL_SUCCESS) { + pr_err("%s: Error %lld mapping IO segment#%d to PE#%d\n", + __func__, rc, index, pe->pe_number); + break; + } + + region.start += phb->ioda.io_segsize; + index++; + } + } else if ((res->flags & IORESOURCE_MEM) && + !pnv_pci_is_mem_pref_64(res->flags)) { + region.start = res->start - + phb->hose->mem_offset[0] - + phb->ioda.m32_pci_base; + region.end = res->end - + phb->hose->mem_offset[0] - + phb->ioda.m32_pci_base; + index = region.start / phb->ioda.m32_segsize; + + while (index < phb->ioda.total_pe_num && + region.start <= region.end) { + phb->ioda.m32_segmap[index] = pe->pe_number; + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); + if (rc != OPAL_SUCCESS) { + pr_err("%s: Error %lld mapping M32 segment#%d to PE#%d", + __func__, rc, index, pe->pe_number); + break; + } + + region.start += phb->ioda.m32_segsize; + index++; + } + } +} + /* * This function is supposed to be called on basis of PE from top * to bottom style. So the the I/O or MMIO segment assigned to * parent PE could be overrided by its child PEs if necessary. */ -static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, - struct pnv_ioda_pe *pe) +static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe) { - struct pnv_phb *phb = hose->private_data; - struct pci_bus_region region; struct resource *res; - int i, index; - int rc; + int i; /* * NOTE: We only care PCI bus based PE for now. For PCI @@ -2950,58 +3003,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, */ BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); - pci_bus_for_each_resource(pe->pbus, res, i) { - if (!res || !res->flags || - res->start > res->end) - continue; - - if (res->flags & IORESOURCE_IO) { - region.start = res->start - phb->ioda.io_pci_base; - region.end = res->end - phb->ioda.io_pci_base; - index = region.start / phb->ioda.io_segsize; - - while (index < phb->ioda.total_pe_num && - region.start <= region.end) { - phb->ioda.io_segmap[index] = pe->pe_number; - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); - if (rc != OPAL_SUCCESS) { - pr_err("%s: OPAL error %d when mapping IO " - "segment #%d to PE#%d\n", - __func__, rc, index, pe->pe_number); - break; - } - - region.start += phb->ioda.io_segsize; - index++; - } - } else if ((res->flags & IORESOURCE_MEM) && - !pnv_pci_is_mem_pref_64(res->flags)) { - region.start = res->start - - hose->mem_offset[0] - - phb->ioda.m32_pci_base; - region.end = res->end - - hose->mem_offset[0] - - phb->ioda.m32_pci_base; - index = region.start / phb->ioda.m32_segsize; - - while (index < phb->ioda.total_pe_num && - region.start <= region.end) { - phb->ioda.m32_segmap[index] = pe->pe_number; - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); - if (rc != OPAL_SUCCESS) { - pr_err("%s: OPAL error %d when mapping M32 " - "segment#%d to PE#%d", - __func__, rc, index, pe->pe_number); - break; - } - - region.start += phb->ioda.m32_segsize; - index++; - } - } - } + pci_bus_for_each_resource(pe->pbus, res, i) + pnv_ioda_setup_pe_res(pe, res); } static void pnv_pci_ioda_setup_seg(void) @@ -3018,7 +3021,7 @@ static void pnv_pci_ioda_setup_seg(void) continue; list_for_each_entry(pe, &phb->ioda.pe_list, list) { - pnv_ioda_setup_pe_seg(hose, pe); + pnv_ioda_setup_pe_seg(pe); } } } -- cgit v1.1 From 69d733e72ddae1680ffd3b5c648ff31cb46d9cc9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:28 +1000 Subject: powerpc/powernv: IO and M32 mapping based on PCI device resources Currently, the IO and M32 segments are mapped to the corresponding PE based on the windows of the parent bridge of PE's primary bus. It's not going to work when the windows of root port or upstream port of the PCIe switch behind root port are extended to PHB's apertures in order to support hotplug in subsequent patch. This fixes the issue by mapping IO and M32 segments based on the resources of the PCI devices included in the PE, instead of the windows of the parent bridge of the PE's primary bus. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b954fbc..904790b 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2993,7 +2993,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, */ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe) { - struct resource *res; + struct pci_dev *pdev; int i; /* @@ -3003,8 +3003,21 @@ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe) */ BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); - pci_bus_for_each_resource(pe->pbus, res, i) - pnv_ioda_setup_pe_res(pe, res); + list_for_each_entry(pdev, &pe->pbus->devices, bus_list) { + for (i = 0; i <= PCI_ROM_RESOURCE; i++) + pnv_ioda_setup_pe_res(pe, &pdev->resource[i]); + + /* + * If the PE contains all subordinate PCI buses, the + * windows of the child bridges should be mapped to + * the PE as well. + */ + if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev)) + continue; + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) + pnv_ioda_setup_pe_res(pe, + &pdev->resource[PCI_BRIDGE_RESOURCES + i]); + } } static void pnv_pci_ioda_setup_seg(void) -- cgit v1.1 From 93289d8c08dc038a3efccf6b7fa563e1683cd868 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:29 +1000 Subject: powerpc/powernv: Track M64 segment consumption When unplugging PCI devices, their parent PEs might be offline. The consumed M64 resource by the PEs should be released at that time. As we track M32 segment consumption, this introduces an array to the PHB to track the mapping between M64 segment and PE number. Note: M64 mapping isn't covered by pnv_ioda_setup_pe_seg() as IODA2 doesn't support the mapping explicitly while it's supported on IODA1. Until now, no M64 is supported on IODA1 in software. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 10 ++++++++-- arch/powerpc/platforms/powernv/pci.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 904790b..832b430 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -305,6 +305,7 @@ static unsigned int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) phb->ioda.total_pe_num) { pe = &phb->ioda.pe_array[i]; + phb->ioda.m64_segmap[pe->pe_number] = pe->pe_number; if (!master_pe) { pe->flags |= PNV_IODA_PE_MASTER; INIT_LIST_HEAD(&pe->slaves); @@ -3252,7 +3253,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, { struct pci_controller *hose; struct pnv_phb *phb; - unsigned long size, m32map_off, pemap_off, iomap_off = 0; + unsigned long size, m64map_off, m32map_off, pemap_off, iomap_off = 0; const __be64 *prop64; const __be32 *prop32; int len; @@ -3340,6 +3341,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); + m64map_off = size; + size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]); m32map_off = size; size += phb->ioda.total_pe_num * sizeof(phb->ioda.m32_segmap[0]); if (phb->type == PNV_PHB_IODA1) { @@ -3350,9 +3353,12 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); aux = memblock_virt_alloc(size, 0); phb->ioda.pe_alloc = aux; + phb->ioda.m64_segmap = aux + m64map_off; phb->ioda.m32_segmap = aux + m32map_off; - for (segno = 0; segno < phb->ioda.total_pe_num; segno++) + for (segno = 0; segno < phb->ioda.total_pe_num; segno++) { + phb->ioda.m64_segmap[segno] = IODA_INVALID_PE; phb->ioda.m32_segmap[segno] = IODA_INVALID_PE; + } if (phb->type == PNV_PHB_IODA1) { phb->ioda.io_segmap = aux + iomap_off; for (segno = 0; segno < phb->ioda.total_pe_num; segno++) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 66f2569..a409165 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -146,6 +146,7 @@ struct pnv_phb { struct pnv_ioda_pe *pe_array; /* M32 & IO segment maps */ + unsigned int *m64_segmap; unsigned int *m32_segmap; unsigned int *io_segmap; -- cgit v1.1 From c430670ad11aa8c5a9f9706bf6f7376310af7f4b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:30 +1000 Subject: powerpc/powernv: Rename M64 related functions This renames those functions picking PE number based on consumed M64 segments, mapping M64 segments to PEs as those functions are going to be shared by IODA1/IODA2 in next patch. No logical changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 832b430..37f22b0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -219,7 +219,7 @@ fail: return -EIO; } -static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev, +static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, unsigned long *pe_bitmap) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); @@ -246,22 +246,22 @@ static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev, } } -static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus, - unsigned long *pe_bitmap, - bool all) +static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus, + unsigned long *pe_bitmap, + bool all) { struct pci_dev *pdev; list_for_each_entry(pdev, &bus->devices, bus_list) { - pnv_ioda2_reserve_dev_m64_pe(pdev, pe_bitmap); + pnv_ioda_reserve_dev_m64_pe(pdev, pe_bitmap); if (all && pdev->subordinate) - pnv_ioda2_reserve_m64_pe(pdev->subordinate, - pe_bitmap, all); + pnv_ioda_reserve_m64_pe(pdev->subordinate, + pe_bitmap, all); } } -static unsigned int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) +static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; @@ -283,7 +283,7 @@ static unsigned int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) } /* Figure out reserved PE numbers by the PE */ - pnv_ioda2_reserve_m64_pe(bus, pe_alloc, all); + pnv_ioda_reserve_m64_pe(bus, pe_alloc, all); /* * the current bus might not own M64 window and that's all @@ -365,8 +365,8 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) /* Use last M64 BAR to cover M64 window */ phb->ioda.m64_bar_idx = 15; phb->init_m64 = pnv_ioda2_init_m64; - phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe; - phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; + phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe; + phb->pick_m64_pe = pnv_ioda_pick_m64_pe; } static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) -- cgit v1.1 From 9945155143926c1191bc0a1178ac2bb290467370 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 5 May 2016 12:02:13 +1000 Subject: powerpc/powernv/ioda1: M64 support on P7IOC This enables M64 window on P7IOC, which has been enabled on PHB3. Different from PHB3 where 16 M64 BARs are supported and each of them can be owned by one particular PE# exclusively or divided evenly to 256 segments, every P7IOC PHB has 16 M64 BARs and each of them are divided to 8 segments. So every P7IOC PHB supports 128 M64 segments in total. P7IOC has M64DT, which helps mapping one particular M64 segment# to arbitrary PE#. PHB3 doesn't have M64DT, indicating that one M64 segment can only be pinned to the fixed PE#. In order to unified M64 support M64 on P7IOC and PHB3, we just provide 128 M64 segments on every P7IOC PHB and each of them is pinned to the fixed PE# by bypassing the function of M64DT. In turn, we just need different phb->init_m64() for P7IOC and PHB3 and maps M64 segment in pnv_ioda_reserve_m64_pe() for P7IOC, most of the code are shared by them. Signed-off-by: Gavin Shan Reviewed-by: Alistair Popple Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 89 +++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 37f22b0..30a6d79 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -48,6 +48,9 @@ #include "powernv.h" #include "pci.h" +#define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */ +#define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */ + /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) @@ -246,6 +249,64 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, } } +static int pnv_ioda1_init_m64(struct pnv_phb *phb) +{ + struct resource *r; + int index; + + /* + * There are 16 M64 BARs, each of which has 8 segments. So + * there are as many M64 segments as the maximum number of + * PEs, which is 128. + */ + for (index = 0; index < PNV_IODA1_M64_NUM; index++) { + unsigned long base, segsz = phb->ioda.m64_segsize; + int64_t rc; + + base = phb->ioda.m64_base + + index * PNV_IODA1_M64_SEGS * segsz; + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, index, base, 0, + PNV_IODA1_M64_SEGS * segsz); + if (rc != OPAL_SUCCESS) { + pr_warn(" Error %lld setting M64 PHB#%d-BAR#%d\n", + rc, phb->hose->global_number, index); + goto fail; + } + + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, index, + OPAL_ENABLE_M64_SPLIT); + if (rc != OPAL_SUCCESS) { + pr_warn(" Error %lld enabling M64 PHB#%d-BAR#%d\n", + rc, phb->hose->global_number, index); + goto fail; + } + } + + /* + * Exclude the segment used by the reserved PE, which + * is expected to be 0 or last supported PE#. + */ + r = &phb->hose->mem_resources[1]; + if (phb->ioda.reserved_pe_idx == 0) + r->start += phb->ioda.m64_segsize; + else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) + r->end -= phb->ioda.m64_segsize; + else + WARN(1, "Wrong reserved PE#%d on PHB#%d\n", + phb->ioda.reserved_pe_idx, phb->hose->global_number); + + return 0; + +fail: + for ( ; index >= 0; index--) + opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, index, OPAL_DISABLE_M64); + + return -EIO; +} + static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus, unsigned long *pe_bitmap, bool all) @@ -315,6 +376,26 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) pe->master = master_pe; list_add_tail(&pe->list, &master_pe->slaves); } + + /* + * P7IOC supports M64DT, which helps mapping M64 segment + * to one particular PE#. However, PHB3 has fixed mapping + * between M64 segment and PE#. In order to have same logic + * for P7IOC and PHB3, we enforce fixed mapping between M64 + * segment and PE# on P7IOC. + */ + if (phb->type == PNV_PHB_IODA1) { + int64_t rc; + + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe->pe_number, OPAL_M64_WINDOW_TYPE, + pe->pe_number / PNV_IODA1_M64_SEGS, + pe->pe_number % PNV_IODA1_M64_SEGS); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Error %lld mapping M64 for PHB#%d-PE#%d\n", + __func__, rc, phb->hose->global_number, + pe->pe_number); + } } kfree(pe_alloc); @@ -329,8 +410,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) const u32 *r; u64 pci_addr; - /* FIXME: Support M64 for P7IOC */ - if (phb->type != PNV_PHB_IODA2) { + if (phb->type != PNV_PHB_IODA1 && phb->type != PNV_PHB_IODA2) { pr_info(" Not support M64 window\n"); return; } @@ -364,7 +444,10 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) /* Use last M64 BAR to cover M64 window */ phb->ioda.m64_bar_idx = 15; - phb->init_m64 = pnv_ioda2_init_m64; + if (phb->type == PNV_PHB_IODA1) + phb->init_m64 = pnv_ioda1_init_m64; + else + phb->init_m64 = pnv_ioda2_init_m64; phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe; phb->pick_m64_pe = pnv_ioda_pick_m64_pe; } -- cgit v1.1 From b30d936f6f3ee357e48b7f50754b0855d87f1cce Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:32 +1000 Subject: powerpc/powernv/ioda1: Rename pnv_pci_ioda_setup_dma_pe() This renames pnv_pci_ioda_setup_dma_pe() to pnv_pci_ioda1_setup_dma_pe() as it's the counter-part of IODA2's pnv_pci_ioda2_setup_dma_pe(). No logical changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 30a6d79..faee623 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2029,9 +2029,10 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = { .free = pnv_ioda2_table_free, }; -static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe, unsigned int base, - unsigned int segs) +static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, + struct pnv_ioda_pe *pe, + unsigned int base, + unsigned int segs) { struct page *tce_mem = NULL; @@ -2619,7 +2620,7 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) if (phb->type == PNV_PHB_IODA1) { pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", pe->dma_weight, segs); - pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); + pnv_pci_ioda1_setup_dma_pe(phb, pe, base, segs); } else if (phb->type == PNV_PHB_IODA2) { pe_info(pe, "Assign DMA32 space\n"); segs = 0; -- cgit v1.1 From acce971c0e72c63cc2247e2412375e13a0be1ee1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:33 +1000 Subject: powerpc/powernv/ioda1: Introduce PNV_IODA1_DMA32_SEGSIZE Currently, there is one macro (TCE32_TABLE_SIZE) representing the TCE table size for one DMA32 segment. The constant representing the DMA32 segment size (1 << 28) is still used in the code. This defines PNV_IODA1_DMA32_SEGSIZE representing one DMA32 segment size. the TCE table size can be calcualted when the page has fixed 4KB size. So all the related calculation depends on one macro (PNV_IODA1_DMA32_SEGSIZE). No logical changes introduced. Signed-off-by: Gavin Shan Reviewed-By: Alistair Popple Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index faee623..4b08cef 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -50,9 +50,7 @@ #define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */ #define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */ - -/* 256M DMA window, 4K TCE pages, 8 bytes TCE */ -#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) +#define PNV_IODA1_DMA32_SEGSIZE 0x10000000 #define POWERNV_IOMMU_DEFAULT_LEVELS 1 #define POWERNV_IOMMU_MAX_LEVELS 5 @@ -2037,7 +2035,7 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, struct page *tce_mem = NULL; struct iommu_table *tbl; - unsigned int i; + unsigned int tce32_segsz, i; int64_t rc; void *addr; @@ -2057,29 +2055,34 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, /* Grab a 32-bit TCE table */ pe->tce32_seg = base; pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", - (base << 28), ((base + segs) << 28) - 1); + base * PNV_IODA1_DMA32_SEGSIZE, + (base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1); /* XXX Currently, we allocate one big contiguous table for the * TCEs. We only really need one chunk per 256M of TCE space * (ie per segment) but that's an optimization for later, it * requires some added smarts with our get/put_tce implementation + * + * Each TCE page is 4KB in size and each TCE entry occupies 8 + * bytes */ + tce32_segsz = PNV_IODA1_DMA32_SEGSIZE >> (IOMMU_PAGE_SHIFT_4K - 3); tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, - get_order(TCE32_TABLE_SIZE * segs)); + get_order(tce32_segsz * segs)); if (!tce_mem) { pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); goto fail; } addr = page_address(tce_mem); - memset(addr, 0, TCE32_TABLE_SIZE * segs); + memset(addr, 0, tce32_segsz * segs); /* Configure HW */ for (i = 0; i < segs; i++) { rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, base + i, 1, - __pa(addr) + TCE32_TABLE_SIZE * i, - TCE32_TABLE_SIZE, 0x1000); + __pa(addr) + tce32_segsz * i, + tce32_segsz, IOMMU_PAGE_SIZE_4K); if (rc) { pe_err(pe, " Failed to configure 32-bit TCE table," " err %ld\n", rc); @@ -2088,8 +2091,9 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, } /* Setup linux iommu table */ - pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, - base << 28, IOMMU_PAGE_SHIFT_4K); + pnv_pci_setup_iommu_table(tbl, addr, tce32_segsz * segs, + base * PNV_IODA1_DMA32_SEGSIZE, + IOMMU_PAGE_SHIFT_4K); /* OPAL variant of P7IOC SW invalidated TCEs */ if (phb->ioda.tce_inval_reg) @@ -2119,7 +2123,7 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, if (pe->tce32_seg >= 0) pe->tce32_seg = -1; if (tce_mem) - __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); + __free_pages(tce_mem, get_order(tce32_segsz * segs)); if (tbl) { pnv_pci_unlink_table_and_group(tbl, &pe->table_group); iommu_free_table(tbl, "pnv"); @@ -3456,7 +3460,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, mutex_init(&phb->ioda.pe_list_mutex); /* Calculate how many 32-bit TCE segments we have */ - phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; + phb->ioda.tce32_count = phb->ioda.m32_pci_base / + PNV_IODA1_DMA32_SEGSIZE; #if 0 /* We should really do that ... */ rc = opal_pci_set_phb_mem_window(opal->phb_id, -- cgit v1.1 From 801846d1deff943114f6f468b21386247b008321 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:34 +1000 Subject: powerpc/powernv: Remove DMA32 PE list PEs are put into PHB DMA32 list (phb->ioda.pe_dma_list) according to their DMA32 weight. The PEs on the list are iterated to setup their TCE32 tables at system booting time. The list is used for once at boot time and no need to keep it. This moves the logic calculating DMA32 weight of PHB and PE to pnv_ioda_setup_dma() to drop PHB's DMA32 list. Also, every PE traces the consumed DMA32 segment by @tce32_seg and @tce32_segcount are useless and they're removed. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 171 ++++++++++++++---------------- arch/powerpc/platforms/powernv/pci.h | 19 ---- 2 files changed, 78 insertions(+), 112 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 4b08cef..30c825a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -890,44 +890,6 @@ out: return 0; } -static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, - struct pnv_ioda_pe *pe) -{ - struct pnv_ioda_pe *lpe; - - list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { - if (lpe->dma_weight < pe->dma_weight) { - list_add_tail(&pe->dma_link, &lpe->dma_link); - return; - } - } - list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); -} - -static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) -{ - /* This is quite simplistic. The "base" weight of a device - * is 10. 0 means no DMA is to be accounted for it. - */ - - /* If it's a bridge, no DMA */ - if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) - return 0; - - /* Reduce the weight of slow USB controllers */ - if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || - dev->class == PCI_CLASS_SERIAL_USB_OHCI || - dev->class == PCI_CLASS_SERIAL_USB_EHCI) - return 3; - - /* Increase the weight of RAID (includes Obsidian) */ - if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) - return 15; - - /* Default */ - return 10; -} - #ifdef CONFIG_PCI_IOV static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) { @@ -1032,7 +994,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) pe->flags = PNV_IODA_PE_DEV; pe->pdev = dev; pe->pbus = NULL; - pe->tce32_seg = -1; pe->mve_number = -1; pe->rid = dev->bus->number << 8 | pdn->devfn; @@ -1048,16 +1009,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) return NULL; } - /* Assign a DMA weight to the device */ - pe->dma_weight = pnv_ioda_dma_weight(dev); - if (pe->dma_weight != 0) { - phb->ioda.dma_weight += pe->dma_weight; - phb->ioda.dma_pe_count++; - } - - /* Link the PE */ - pnv_ioda_link_pe_by_weight(phb, pe); - return pe; } @@ -1075,7 +1026,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) } pdn->pcidev = dev; pdn->pe_number = pe->pe_number; - pe->dma_weight += pnv_ioda_dma_weight(dev); if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) pnv_ioda_setup_same_PE(dev->subordinate, pe); } @@ -1112,10 +1062,8 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); pe->pbus = bus; pe->pdev = NULL; - pe->tce32_seg = -1; pe->mve_number = -1; pe->rid = bus->busn_res.start << 8; - pe->dma_weight = 0; if (all) pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", @@ -1137,17 +1085,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) /* Put PE to the list */ list_add_tail(&pe->list, &phb->ioda.pe_list); - - /* Account for one DMA PE if at least one DMA capable device exist - * below the bridge - */ - if (pe->dma_weight != 0) { - phb->ioda.dma_weight += pe->dma_weight; - phb->ioda.dma_pe_count++; - } - - /* Link the PE */ - pnv_ioda_link_pe_by_weight(phb, pe); } static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) @@ -1188,7 +1125,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; npu_pdn->pcidev = npu_pdev; npu_pdn->pe_number = pe_num; - pe->dma_weight += pnv_ioda_dma_weight(npu_pdev); phb->ioda.pe_rmap[rid] = pe->pe_number; /* Map the PE to this link */ @@ -1536,7 +1472,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe->flags = PNV_IODA_PE_VF; pe->pbus = NULL; pe->parent_dev = pdev; - pe->tce32_seg = -1; pe->mve_number = -1; pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | pci_iov_virtfn_devfn(pdev, vf_index); @@ -2027,6 +1962,54 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = { .free = pnv_ioda2_table_free, }; +static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data) +{ + unsigned int *weight = (unsigned int *)data; + + /* This is quite simplistic. The "base" weight of a device + * is 10. 0 means no DMA is to be accounted for it. + */ + if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) + return 0; + + if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || + dev->class == PCI_CLASS_SERIAL_USB_OHCI || + dev->class == PCI_CLASS_SERIAL_USB_EHCI) + *weight += 3; + else if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) + *weight += 15; + else + *weight += 10; + + return 0; +} + +static unsigned int pnv_pci_ioda_pe_dma_weight(struct pnv_ioda_pe *pe) +{ + unsigned int weight = 0; + + /* SRIOV VF has same DMA32 weight as its PF */ +#ifdef CONFIG_PCI_IOV + if ((pe->flags & PNV_IODA_PE_VF) && pe->parent_dev) { + pnv_pci_ioda_dev_dma_weight(pe->parent_dev, &weight); + return weight; + } +#endif + + if ((pe->flags & PNV_IODA_PE_DEV) && pe->pdev) { + pnv_pci_ioda_dev_dma_weight(pe->pdev, &weight); + } else if ((pe->flags & PNV_IODA_PE_BUS) && pe->pbus) { + struct pci_dev *pdev; + + list_for_each_entry(pdev, &pe->pbus->devices, bus_list) + pnv_pci_ioda_dev_dma_weight(pdev, &weight); + } else if ((pe->flags & PNV_IODA_PE_BUS_ALL) && pe->pbus) { + pci_walk_bus(pe->pbus, pnv_pci_ioda_dev_dma_weight, &weight); + } + + return weight; +} + static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe, unsigned int base, @@ -2043,17 +2026,12 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ /* XXX FIXME: Allocate multi-level tables on PHB3 */ - /* We shouldn't already have a 32-bit DMA associated */ - if (WARN_ON(pe->tce32_seg >= 0)) - return; - tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); /* Grab a 32-bit TCE table */ - pe->tce32_seg = base; pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", base * PNV_IODA1_DMA32_SEGSIZE, (base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1); @@ -2120,8 +2098,6 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, return; fail: /* XXX Failure: Try to fallback to 64-bit only ? */ - if (pe->tce32_seg >= 0) - pe->tce32_seg = -1; if (tce_mem) __free_pages(tce_mem, get_order(tce32_segsz * segs)); if (tbl) { @@ -2532,10 +2508,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, { int64_t rc; - /* We shouldn't already have a 32-bit DMA associated */ - if (WARN_ON(pe->tce32_seg >= 0)) - return; - /* TVE #1 is selected by PCI address bit 59 */ pe->tce_bypass_base = 1ull << 59; @@ -2543,7 +2515,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, pe->pe_number); /* The PE will reserve all possible 32-bits space */ - pe->tce32_seg = 0; pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", phb->ioda.m32_pci_base); @@ -2559,11 +2530,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, #endif rc = pnv_pci_ioda2_setup_default_config(pe); - if (rc) { - if (pe->tce32_seg >= 0) - pe->tce32_seg = -1; + if (rc) return; - } if (pe->flags & PNV_IODA_PE_DEV) iommu_add_device(&pe->pdev->dev); @@ -2574,24 +2542,35 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, static void pnv_ioda_setup_dma(struct pnv_phb *phb) { struct pci_controller *hose = phb->hose; - unsigned int residual, remaining, segs, tw, base; + unsigned int weight, total_weight, dma_pe_count; + unsigned int residual, remaining, segs, base; struct pnv_ioda_pe *pe; + total_weight = 0; + pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight, + &total_weight); + + dma_pe_count = 0; + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + weight = pnv_pci_ioda_pe_dma_weight(pe); + if (weight > 0) + dma_pe_count++; + } + /* If we have more PE# than segments available, hand out one * per PE until we run out and let the rest fail. If not, * then we assign at least one segment per PE, plus more based * on the amount of devices under that PE */ - if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) + if (dma_pe_count > phb->ioda.tce32_count) residual = 0; else - residual = phb->ioda.tce32_count - - phb->ioda.dma_pe_count; + residual = phb->ioda.tce32_count - dma_pe_count; pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", hose->global_number, phb->ioda.tce32_count); pr_info("PCI: %d PE# for a total weight of %d\n", - phb->ioda.dma_pe_count, phb->ioda.dma_weight); + dma_pe_count, total_weight); pnv_pci_ioda_setup_opal_tce_kill(phb); @@ -2600,18 +2579,20 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) * weight */ remaining = phb->ioda.tce32_count; - tw = phb->ioda.dma_weight; base = 0; - list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { - if (!pe->dma_weight) + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + weight = pnv_pci_ioda_pe_dma_weight(pe); + if (!weight) continue; + if (!remaining) { pe_warn(pe, "No DMA32 resources available\n"); continue; } segs = 1; if (residual) { - segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; + segs += ((weight * residual) + (total_weight / 2)) / + total_weight; if (segs > remaining) segs = remaining; } @@ -2623,7 +2604,7 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) */ if (phb->type == PNV_PHB_IODA1) { pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", - pe->dma_weight, segs); + weight, segs); pnv_pci_ioda1_setup_dma_pe(phb, pe, base, segs); } else if (phb->type == PNV_PHB_IODA2) { pe_info(pe, "Assign DMA32 space\n"); @@ -3167,13 +3148,18 @@ static void pnv_npu_ioda_fixup(void) struct pci_controller *hose, *tmp; struct pnv_phb *phb; struct pnv_ioda_pe *pe; + unsigned int weight; list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; if (phb->type != PNV_PHB_NPU) continue; - list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + weight = pnv_pci_ioda_pe_dma_weight(pe); + if (WARN_ON(!weight)) + continue; + enable_bypass = dma_get_mask(&pe->pdev->dev) == DMA_BIT_MASK(64); pnv_npu_init_dma_pe(pe); @@ -3455,7 +3441,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->ioda.pe_array = aux + pemap_off; set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc); - INIT_LIST_HEAD(&phb->ioda.pe_dma_list); INIT_LIST_HEAD(&phb->ioda.pe_list); mutex_init(&phb->ioda.pe_list_mutex); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index a409165..117cfcd 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -53,14 +53,7 @@ struct pnv_ioda_pe { /* PE number */ unsigned int pe_number; - /* "Weight" assigned to the PE for the sake of DMA resource - * allocations - */ - unsigned int dma_weight; - /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */ - int tce32_seg; - int tce32_segcount; struct iommu_table_group table_group; /* 64-bit TCE bypass region */ @@ -78,7 +71,6 @@ struct pnv_ioda_pe { struct list_head slaves; /* Link in list of PE#s */ - struct list_head dma_link; struct list_head list; }; @@ -169,17 +161,6 @@ struct pnv_phb { /* 32-bit TCE tables allocation */ unsigned long tce32_count; - /* Total "weight" for the sake of DMA resources - * allocation - */ - unsigned int dma_weight; - unsigned int dma_pe_count; - - /* Sorted list of used PE's, sorted at - * boot for resource allocation purposes - */ - struct list_head pe_dma_list; - /* TCE cache invalidate registers (physical and * remapped) */ -- cgit v1.1 From 2b923ed1bd96cc471bb0f0eddfed6ae4753d717b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 5 May 2016 12:04:16 +1000 Subject: powerpc/powernv/ioda1: Improve DMA32 segment track In current implementation, the DMA32 segments required by one specific PE isn't calculated with the information hold in the PE independently. It conflicts with the PCI hotplug design: PE centralized, meaning the PE's DMA32 segments should be calculated from the information hold in the PE independently. This introduces an array (@dma32_segmap) for every PHB to track the DMA32 segmeng usage. Besides, this moves the logic calculating PE's consumed DMA32 segments to pnv_pci_ioda1_setup_dma_pe() so that PE's DMA32 segments are calculated/allocated from the information hold in the PE (DMA32 weight). Also the logic is improved: we try to allocate as much DMA32 segments as we can. It's acceptable that number of DMA32 segments less than the expected number are allocated. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 115 ++++++++++++++++-------------- arch/powerpc/platforms/powernv/pci.h | 7 +- 2 files changed, 66 insertions(+), 56 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 30c825a..98879a0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2011,27 +2011,62 @@ static unsigned int pnv_pci_ioda_pe_dma_weight(struct pnv_ioda_pe *pe) } static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe, - unsigned int base, - unsigned int segs) + struct pnv_ioda_pe *pe) { struct page *tce_mem = NULL; struct iommu_table *tbl; - unsigned int tce32_segsz, i; + unsigned int weight, total_weight = 0; + unsigned int tce32_segsz, base, segs, avail, i; int64_t rc; void *addr; /* XXX FIXME: Handle 64-bit only DMA devices */ /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ /* XXX FIXME: Allocate multi-level tables on PHB3 */ + weight = pnv_pci_ioda_pe_dma_weight(pe); + if (!weight) + return; + + pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight, + &total_weight); + segs = (weight * phb->ioda.dma32_count) / total_weight; + if (!segs) + segs = 1; + /* + * Allocate contiguous DMA32 segments. We begin with the expected + * number of segments. With one more attempt, the number of DMA32 + * segments to be allocated is decreased by one until one segment + * is allocated successfully. + */ + do { + for (base = 0; base <= phb->ioda.dma32_count - segs; base++) { + for (avail = 0, i = base; i < base + segs; i++) { + if (phb->ioda.dma32_segmap[i] == + IODA_INVALID_PE) + avail++; + } + + if (avail == segs) + goto found; + } + } while (--segs); + + if (!segs) { + pe_warn(pe, "No available DMA32 segments\n"); + return; + } + +found: tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); /* Grab a 32-bit TCE table */ + pe_info(pe, "DMA weight %d (%d), assigned (%d) %d DMA32 segments\n", + weight, total_weight, base, segs); pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", base * PNV_IODA1_DMA32_SEGSIZE, (base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1); @@ -2068,6 +2103,10 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, } } + /* Setup DMA32 segment mapping */ + for (i = base; i < base + segs; i++) + phb->ioda.dma32_segmap[i] = pe->pe_number; + /* Setup linux iommu table */ pnv_pci_setup_iommu_table(tbl, addr, tce32_segsz * segs, base * PNV_IODA1_DMA32_SEGSIZE, @@ -2542,73 +2581,34 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, static void pnv_ioda_setup_dma(struct pnv_phb *phb) { struct pci_controller *hose = phb->hose; - unsigned int weight, total_weight, dma_pe_count; - unsigned int residual, remaining, segs, base; struct pnv_ioda_pe *pe; - - total_weight = 0; - pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight, - &total_weight); - - dma_pe_count = 0; - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - weight = pnv_pci_ioda_pe_dma_weight(pe); - if (weight > 0) - dma_pe_count++; - } + unsigned int weight; /* If we have more PE# than segments available, hand out one * per PE until we run out and let the rest fail. If not, * then we assign at least one segment per PE, plus more based * on the amount of devices under that PE */ - if (dma_pe_count > phb->ioda.tce32_count) - residual = 0; - else - residual = phb->ioda.tce32_count - dma_pe_count; - - pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", - hose->global_number, phb->ioda.tce32_count); - pr_info("PCI: %d PE# for a total weight of %d\n", - dma_pe_count, total_weight); + pr_info("PCI: Domain %04x has %d available 32-bit DMA segments\n", + hose->global_number, phb->ioda.dma32_count); pnv_pci_ioda_setup_opal_tce_kill(phb); - /* Walk our PE list and configure their DMA segments, hand them - * out one base segment plus any residual segments based on - * weight - */ - remaining = phb->ioda.tce32_count; - base = 0; + /* Walk our PE list and configure their DMA segments */ list_for_each_entry(pe, &phb->ioda.pe_list, list) { weight = pnv_pci_ioda_pe_dma_weight(pe); if (!weight) continue; - if (!remaining) { - pe_warn(pe, "No DMA32 resources available\n"); - continue; - } - segs = 1; - if (residual) { - segs += ((weight * residual) + (total_weight / 2)) / - total_weight; - if (segs > remaining) - segs = remaining; - } - /* * For IODA2 compliant PHB3, we needn't care about the weight. * The all available 32-bits DMA space will be assigned to * the specific PE. */ if (phb->type == PNV_PHB_IODA1) { - pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", - weight, segs); - pnv_pci_ioda1_setup_dma_pe(phb, pe, base, segs); + pnv_pci_ioda1_setup_dma_pe(phb, pe); } else if (phb->type == PNV_PHB_IODA2) { pe_info(pe, "Assign DMA32 space\n"); - segs = 0; pnv_pci_ioda2_setup_dma_pe(phb, pe); } else if (phb->type == PNV_PHB_NPU) { /* @@ -2618,9 +2618,6 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) * as the PHB3 TVT. */ } - - remaining -= segs; - base += segs; } } @@ -3327,7 +3324,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, { struct pci_controller *hose; struct pnv_phb *phb; - unsigned long size, m64map_off, m32map_off, pemap_off, iomap_off = 0; + unsigned long size, m64map_off, m32map_off, pemap_off; + unsigned long iomap_off = 0, dma32map_off = 0; const __be64 *prop64; const __be32 *prop32; int len; @@ -3413,6 +3411,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num; phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ + /* Calculate how many 32-bit TCE segments we have */ + phb->ioda.dma32_count = phb->ioda.m32_pci_base / + PNV_IODA1_DMA32_SEGSIZE; + /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); m64map_off = size; @@ -3422,6 +3424,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, if (phb->type == PNV_PHB_IODA1) { iomap_off = size; size += phb->ioda.total_pe_num * sizeof(phb->ioda.io_segmap[0]); + dma32map_off = size; + size += phb->ioda.dma32_count * + sizeof(phb->ioda.dma32_segmap[0]); } pemap_off = size; size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); @@ -3437,6 +3442,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->ioda.io_segmap = aux + iomap_off; for (segno = 0; segno < phb->ioda.total_pe_num; segno++) phb->ioda.io_segmap[segno] = IODA_INVALID_PE; + + phb->ioda.dma32_segmap = aux + dma32map_off; + for (segno = 0; segno < phb->ioda.dma32_count; segno++) + phb->ioda.dma32_segmap[segno] = IODA_INVALID_PE; } phb->ioda.pe_array = aux + pemap_off; set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc); @@ -3445,7 +3454,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, mutex_init(&phb->ioda.pe_list_mutex); /* Calculate how many 32-bit TCE segments we have */ - phb->ioda.tce32_count = phb->ioda.m32_pci_base / + phb->ioda.dma32_count = phb->ioda.m32_pci_base / PNV_IODA1_DMA32_SEGSIZE; #if 0 /* We should really do that ... */ diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 117cfcd..14d9391 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -142,6 +142,10 @@ struct pnv_phb { unsigned int *m32_segmap; unsigned int *io_segmap; + /* DMA32 segment maps - IODA1 only */ + unsigned int dma32_count; + unsigned int *dma32_segmap; + /* IRQ chip */ int irq_chip_init; struct irq_chip irq_chip; @@ -158,9 +162,6 @@ struct pnv_phb { */ unsigned char pe_rmap[0x10000]; - /* 32-bit TCE tables allocation */ - unsigned long tce32_count; - /* TCE cache invalidate registers (physical and * remapped) */ -- cgit v1.1 From 1e9167726c41b8d2126327379f4f82acbd548ba8 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:36 +1000 Subject: powerpc/powernv: Use PE instead of number during setup and release In current implementation, the PEs that are allocated or picked from the reserved list are identified by PE number. The PE instance has to be picked according to the PE number eventually. We have same issue when PE is released. For pnv_ioda_pick_m64_pe() and pnv_ioda_alloc_pe(), this returns PE instance so that pnv_ioda_setup_bus_PE() can use the allocated or reserved PE instance directly. Also, pnv_ioda_setup_bus_PE() returns the reserved/allocated PE instance to be used in subsequent patches. On the other hand, pnv_ioda_free_pe() uses PE instance (not number) as its argument. No logical changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 104 +++++++++++++++++------------- arch/powerpc/platforms/powernv/pci.h | 2 +- 2 files changed, 59 insertions(+), 47 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 98879a0..c762f38 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -123,6 +123,14 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); } +static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) +{ + phb->ioda.pe_array[pe_no].phb = phb; + phb->ioda.pe_array[pe_no].pe_number = pe_no; + + return &phb->ioda.pe_array[pe_no]; +} + static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) { if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) { @@ -135,11 +143,10 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) pr_debug("%s: PE %d was reserved on PHB#%x\n", __func__, pe_no, phb->hose->global_number); - phb->ioda.pe_array[pe_no].phb = phb; - phb->ioda.pe_array[pe_no].pe_number = pe_no; + pnv_ioda_init_pe(phb, pe_no); } -static unsigned int pnv_ioda_alloc_pe(struct pnv_phb *phb) +static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -147,20 +154,20 @@ static unsigned int pnv_ioda_alloc_pe(struct pnv_phb *phb) pe = find_next_zero_bit(phb->ioda.pe_alloc, phb->ioda.total_pe_num, 0); if (pe >= phb->ioda.total_pe_num) - return IODA_INVALID_PE; + return NULL; } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); - phb->ioda.pe_array[pe].phb = phb; - phb->ioda.pe_array[pe].pe_number = pe; - return pe; + return pnv_ioda_init_pe(phb, pe); } -static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) +static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) { - WARN_ON(phb->ioda.pe_array[pe].pdev); + struct pnv_phb *phb = pe->phb; - memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); - clear_bit(pe, phb->ioda.pe_alloc); + WARN_ON(pe->pdev); + + memset(pe, 0, sizeof(struct pnv_ioda_pe)); + clear_bit(pe->pe_number, phb->ioda.pe_alloc); } /* The default M64 BAR is shared by all PEs */ @@ -320,7 +327,7 @@ static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus, } } -static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) +static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; @@ -330,7 +337,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) /* Root bus shouldn't use M64 */ if (pci_is_root_bus(bus)) - return IODA_INVALID_PE; + return NULL; /* Allocate bitmap */ size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); @@ -338,7 +345,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) if (!pe_alloc) { pr_warn("%s: Out of memory !\n", __func__); - return IODA_INVALID_PE; + return NULL; } /* Figure out reserved PE numbers by the PE */ @@ -351,7 +358,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) */ if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) { kfree(pe_alloc); - return IODA_INVALID_PE; + return NULL; } /* @@ -397,7 +404,7 @@ static unsigned int pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) } kfree(pe_alloc); - return master_pe->pe_number; + return master_pe; } static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) @@ -963,7 +970,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(dev); struct pnv_ioda_pe *pe; - unsigned int pe_num; if (!pdn) { pr_err("%s: Device tree node not associated properly\n", @@ -973,8 +979,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) if (pdn->pe_number != IODA_INVALID_PE) return NULL; - pe_num = pnv_ioda_alloc_pe(phb); - if (pe_num == IODA_INVALID_PE) { + pe = pnv_ioda_alloc_pe(phb); + if (!pe) { pr_warning("%s: Not enough PE# available, disabling device\n", pci_name(dev)); return NULL; @@ -987,10 +993,9 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) * * At some point we want to remove the PDN completely anyways */ - pe = &phb->ioda.pe_array[pe_num]; pci_dev_get(dev); pdn->pcidev = dev; - pdn->pe_number = pe_num; + pdn->pe_number = pe->pe_number; pe->flags = PNV_IODA_PE_DEV; pe->pdev = dev; pe->pbus = NULL; @@ -1001,8 +1006,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ - if (pe_num) - pnv_ioda_free_pe(phb, pe_num); + pnv_ioda_free_pe(pe); pdn->pe_number = IODA_INVALID_PE; pe->pdev = NULL; pci_dev_put(dev); @@ -1037,28 +1041,26 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) * subordinate PCI devices and buses. The second type of PE is normally * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. */ -static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) +static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; - struct pnv_ioda_pe *pe; - unsigned int pe_num = IODA_INVALID_PE; + struct pnv_ioda_pe *pe = NULL; /* Check if PE is determined by M64 */ if (phb->pick_m64_pe) - pe_num = phb->pick_m64_pe(bus, all); + pe = phb->pick_m64_pe(bus, all); /* The PE number isn't pinned by M64 */ - if (pe_num == IODA_INVALID_PE) - pe_num = pnv_ioda_alloc_pe(phb); + if (!pe) + pe = pnv_ioda_alloc_pe(phb); - if (pe_num == IODA_INVALID_PE) { + if (!pe) { pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", __func__, pci_domain_nr(bus), bus->number); - return; + return NULL; } - pe = &phb->ioda.pe_array[pe_num]; pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); pe->pbus = bus; pe->pdev = NULL; @@ -1067,17 +1069,16 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) if (all) pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", - bus->busn_res.start, bus->busn_res.end, pe_num); + bus->busn_res.start, bus->busn_res.end, pe->pe_number); else pe_info(pe, "Secondary bus %d associated with PE#%d\n", - bus->busn_res.start, pe_num); + bus->busn_res.start, pe->pe_number); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ - if (pe_num) - pnv_ioda_free_pe(phb, pe_num); + pnv_ioda_free_pe(pe); pe->pbus = NULL; - return; + return NULL; } /* Associate it with all child devices */ @@ -1085,6 +1086,8 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) /* Put PE to the list */ list_add_tail(&pe->list, &phb->ioda.pe_list); + + return pe; } static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) @@ -1396,7 +1399,7 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) pnv_ioda_deconfigure_pe(phb, pe); - pnv_ioda_free_pe(phb, pe->pe_number); + pnv_ioda_free_pe(pe); } } @@ -1405,6 +1408,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) struct pci_bus *bus; struct pci_controller *hose; struct pnv_phb *phb; + struct pnv_ioda_pe *pe; struct pci_dn *pdn; struct pci_sriov *iov; u16 num_vfs, i; @@ -1429,8 +1433,11 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) /* Release PE numbers */ if (pdn->m64_single_mode) { for (i = 0; i < num_vfs; i++) { - if (pdn->pe_num_map[i] != IODA_INVALID_PE) - pnv_ioda_free_pe(phb, pdn->pe_num_map[i]); + if (pdn->pe_num_map[i] == IODA_INVALID_PE) + continue; + + pe = &phb->ioda.pe_array[pdn->pe_num_map[i]]; + pnv_ioda_free_pe(pe); } } else bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); @@ -1483,8 +1490,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ - if (pe_num) - pnv_ioda_free_pe(phb, pe_num); + pnv_ioda_free_pe(pe); pe->pdev = NULL; continue; } @@ -1503,6 +1509,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) struct pci_bus *bus; struct pci_controller *hose; struct pnv_phb *phb; + struct pnv_ioda_pe *pe; struct pci_dn *pdn; int ret; u16 i; @@ -1545,11 +1552,13 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) /* Calculate available PE for required VFs */ if (pdn->m64_single_mode) { for (i = 0; i < num_vfs; i++) { - pdn->pe_num_map[i] = pnv_ioda_alloc_pe(phb); - if (pdn->pe_num_map[i] == IODA_INVALID_PE) { + pe = pnv_ioda_alloc_pe(phb); + if (!pe) { ret = -EBUSY; goto m64_failed; } + + pdn->pe_num_map[i] = pe->pe_number; } } else { mutex_lock(&phb->ioda.pe_alloc_mutex); @@ -1594,8 +1603,11 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) m64_failed: if (pdn->m64_single_mode) { for (i = 0; i < num_vfs; i++) { - if (pdn->pe_num_map[i] != IODA_INVALID_PE) - pnv_ioda_free_pe(phb, pdn->pe_num_map[i]); + if (pdn->pe_num_map[i] == IODA_INVALID_PE) + continue; + + pe = &phb->ioda.pe_array[pdn->pe_num_map[i]]; + pnv_ioda_free_pe(pe); } } else bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 14d9391..904f60b 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -105,7 +105,7 @@ struct pnv_phb { int (*init_m64)(struct pnv_phb *phb); void (*reserve_m64_pe)(struct pci_bus *bus, unsigned long *pe_bitmap, bool all); - unsigned int (*pick_m64_pe)(struct pci_bus *bus, bool all); + struct pnv_ioda_pe *(*pick_m64_pe)(struct pci_bus *bus, bool all); int (*get_pe_state)(struct pnv_phb *phb, int pe_no); void (*freeze_pe)(struct pnv_phb *phb, int pe_no); int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); -- cgit v1.1 From 3773dd258e91e1e14d95cdf17b9d83a1a7b27af1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:38 +1000 Subject: powerpc/pci: Rename pcibios_find_pci_bus() This renames pcibios_find_pci_bus() to pci_find_bus_by_node() to avoid conflicts with those PCI subsystem weak function names, which have prefix "pcibios". No logical changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/pci_dlpar.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 5d4a3df..aee22b4 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -54,8 +54,7 @@ find_bus_among_children(struct pci_bus *bus, return child; } -struct pci_bus * -pcibios_find_pci_bus(struct device_node *dn) +struct pci_bus *pci_find_bus_by_node(struct device_node *dn) { struct pci_dn *pdn = dn->data; @@ -64,7 +63,7 @@ pcibios_find_pci_bus(struct device_node *dn) return find_bus_among_children(pdn->phb->bus, dn); } -EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); +EXPORT_SYMBOL_GPL(pci_find_bus_by_node); struct pci_controller *init_phb_dynamic(struct device_node *dn) { -- cgit v1.1 From 6384d97780c123d2e167242862ed4ea9051210e4 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:39 +1000 Subject: powerpc/pci: Move pci_find_bus_by_node() around This moves pci_find_bus_by_node() from arch/powerpc/platforms/ pseries/pci_dlpar.c to arch/powerpc/kernel/pci-hotplug.c so that the function can be used by pSeries and PowerNV platform at the same time. Also, below cleanup applied. No functional changes introduced. * Remove variable "busdn" in find_bus_among_children() * Use PCI_DN() to convert device node to pci_dn Signed-off-by: Gavin Shan Acked-by: Benjamin Herrenschmidt Reviewed-by: Andrew Donnellan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/pci_dlpar.c | 31 ------------------------------ 1 file changed, 31 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index aee22b4..906dbaa 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -34,37 +34,6 @@ #include "pseries.h" -static struct pci_bus * -find_bus_among_children(struct pci_bus *bus, - struct device_node *dn) -{ - struct pci_bus *child = NULL; - struct pci_bus *tmp; - struct device_node *busdn; - - busdn = pci_bus_to_OF_node(bus); - if (busdn == dn) - return bus; - - list_for_each_entry(tmp, &bus->children, node) { - child = find_bus_among_children(tmp, dn); - if (child) - break; - }; - return child; -} - -struct pci_bus *pci_find_bus_by_node(struct device_node *dn) -{ - struct pci_dn *pdn = dn->data; - - if (!pdn || !pdn->phb || !pdn->phb->bus) - return NULL; - - return find_bus_among_children(pdn->phb->bus, dn); -} -EXPORT_SYMBOL_GPL(pci_find_bus_by_node); - struct pci_controller *init_phb_dynamic(struct device_node *dn) { struct pci_controller *phb; -- cgit v1.1 From d8f66f411e2dc1c27413f0ba73f49bccfa998f1b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:40 +1000 Subject: powerpc/pci: Export pci_add_device_node_info() This renames update_dn_pci_info() to pci_add_device_node_info() with corresponding adjustment on the parameter type and exports it. The function is used to create pdn (struct pci_dn) for the indicated device node. Another function add_pdn(), almost wrapper of pci_add_device_node_info(), to be used in traverse_pci_devices(). No logical changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index fa73494..9883bc7 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -267,7 +267,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act pdn = parent ? PCI_DN(parent) : NULL; if (pdn) { /* Create pdn and EEH device */ - update_dn_pci_info(np, pdn->phb); + pci_add_device_node_info(pdn->phb, np); eeh_dev_init(PCI_DN(np), pdn->phb); } -- cgit v1.1 From cdddc577d956d55a62581ba3681f8ba66eca3873 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:42 +1000 Subject: powerpc/pci: Export pci_traverse_device_nodes() This renames traverse_pci_devices() to pci_traverse_device_nodes(). The function traverses all subordinate device nodes of the specified one. Also, below cleanup applied to the function. No logical changes introduced. * Rename "pre" to "fn". * Avoid assignment in if condition reported from checkpatch.pl. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/msi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 272e9ec..543a638 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -305,7 +305,7 @@ static int msi_quota_for_device(struct pci_dev *dev, int request) memset(&counts, 0, sizeof(struct msi_counts)); /* Work out how many devices we have below this PE */ - traverse_pci_devices(pe_dn, count_non_bridge_devices, &counts); + pci_traverse_device_nodes(pe_dn, count_non_bridge_devices, &counts); if (counts.num_devices == 0) { pr_err("rtas_msi: found 0 devices under PE for %s\n", @@ -320,7 +320,7 @@ static int msi_quota_for_device(struct pci_dev *dev, int request) /* else, we have some more calculating to do */ counts.requestor = pci_device_to_OF_node(dev); counts.request = request; - traverse_pci_devices(pe_dn, count_spare_msis, &counts); + pci_traverse_device_nodes(pe_dn, count_spare_msis, &counts); /* If the quota isn't an integer multiple of the total, we can * use the remainder as spare MSIs for anyone that wants them. */ -- cgit v1.1 From 4fad494321351f0ac412945c6a464109ad96734a Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:44 +1000 Subject: powerpc/powernv: Simplify pnv_eeh_reset() This drops unnecessary nested if statements in pnv_eeh_reset() to improve the code readability. After the changes, the unused local variable "ret" is dropped as well. No logical changes introduced. Signed-off-by: Gavin Shan Reviewed-by: Andrew Donnellan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 67 +++++++++++++--------------- 1 file changed, 31 insertions(+), 36 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 69e41ce..9226df1 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1009,8 +1009,9 @@ static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option) static int pnv_eeh_reset(struct eeh_pe *pe, int option) { struct pci_controller *hose = pe->phb; + struct pnv_phb *phb; struct pci_bus *bus; - int ret; + int64_t rc; /* * For PHB reset, we always have complete reset. For those PEs whose @@ -1026,45 +1027,39 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) * reset. The side effect is that EEH core has to clear the frozen * state explicitly after BAR restore. */ - if (pe->type & EEH_PE_PHB) { - ret = pnv_eeh_phb_reset(hose, option); - } else { - struct pnv_phb *phb; - s64 rc; + if (pe->type & EEH_PE_PHB) + return pnv_eeh_phb_reset(hose, option); - /* - * The frozen PE might be caused by PAPR error injection - * registers, which are expected to be cleared after hitting - * frozen PE as stated in the hardware spec. Unfortunately, - * that's not true on P7IOC. So we have to clear it manually - * to avoid recursive EEH errors during recovery. - */ - phb = hose->private_data; - if (phb->model == PNV_PHB_MODEL_P7IOC && - (option == EEH_RESET_HOT || - option == EEH_RESET_FUNDAMENTAL)) { - rc = opal_pci_reset(phb->opal_id, - OPAL_RESET_PHB_ERROR, - OPAL_ASSERT_RESET); - if (rc != OPAL_SUCCESS) { - pr_warn("%s: Failure %lld clearing " - "error injection registers\n", - __func__, rc); - return -EIO; - } + /* + * The frozen PE might be caused by PAPR error injection + * registers, which are expected to be cleared after hitting + * frozen PE as stated in the hardware spec. Unfortunately, + * that's not true on P7IOC. So we have to clear it manually + * to avoid recursive EEH errors during recovery. + */ + phb = hose->private_data; + if (phb->model == PNV_PHB_MODEL_P7IOC && + (option == EEH_RESET_HOT || + option == EEH_RESET_FUNDAMENTAL)) { + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PHB_ERROR, + OPAL_ASSERT_RESET); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clearing error injection registers\n", + __func__, rc); + return -EIO; } - - bus = eeh_pe_bus_get(pe); - if (pe->type & EEH_PE_VF) - ret = pnv_eeh_reset_vf_pe(pe, option); - else if (pci_is_root_bus(bus) || - pci_is_root_bus(bus->parent)) - ret = pnv_eeh_root_reset(hose, option); - else - ret = pnv_eeh_bridge_reset(bus->self, option); } - return ret; + bus = eeh_pe_bus_get(pe); + if (pe->type & EEH_PE_VF) + return pnv_eeh_reset_vf_pe(pe, option); + + if (pci_is_root_bus(bus) || + pci_is_root_bus(bus->parent)) + return pnv_eeh_root_reset(hose, option); + + return pnv_eeh_bridge_reset(bus->self, option); } /** -- cgit v1.1 From c8ceacc22bce95d3a9cff198c9c27a30105a16b8 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 15:41:45 +1000 Subject: powerpc/powernv: Exclude root bus in pnv_pci_reset_secondary_bus() The function pnv_pci_reset_secondary_bus() is called like below. It's impossible for call the function on root bus. So it's safe to remove the root bus case in the function. No functional changes introduced. pci_parent_bus_reset() / pci_bus_reset() / pci_try_reset_bus() pci_reset_bridge_secondary_bus() pcibios_reset_secondary_bus() pnv_pci_reset_secondary_bus() Signed-off-by: Gavin Shan Reviewed-by: Daniel Axtens Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 9226df1..593b8dc 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -868,16 +868,8 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) void pnv_pci_reset_secondary_bus(struct pci_dev *dev) { - struct pci_controller *hose; - - if (pci_is_root_bus(dev->bus)) { - hose = pci_bus_to_host(dev->bus); - pnv_eeh_root_reset(hose, EEH_RESET_HOT); - pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE); - } else { - pnv_eeh_bridge_reset(dev, EEH_RESET_HOT); - pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); - } + pnv_eeh_bridge_reset(dev, EEH_RESET_HOT); + pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); } static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, -- cgit v1.1 From a7cf13caad8bf368a28f31dbb4807d61bc9ace18 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 29 Apr 2016 18:55:16 +1000 Subject: powerpc/powernv: Rename pnv_pci_ioda2_tce_invalidate_entire As in fact pnv_pci_ioda2_tce_invalidate_entire() invalidates TCEs for the specific PE rather than the entire cache, rename it to pnv_pci_ioda2_tce_invalidate_pe(). In later patches we will add a proper pnv_pci_ioda2_tce_invalidate_entire(). Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c762f38..acce430 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1840,7 +1840,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { .get = pnv_tce_get, }; -static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) +static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe) { /* 01xb - invalidate TCEs that match the specified PE# */ unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF); @@ -2191,7 +2191,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, pnv_pci_link_table_and_group(phb->hose->node, num, tbl, &pe->table_group); - pnv_pci_ioda2_tce_invalidate_entire(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); return 0; } @@ -2335,7 +2335,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, if (ret) pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); else - pnv_pci_ioda2_tce_invalidate_entire(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); -- cgit v1.1 From bef9253f551189febf0052a06dc079eb34137a58 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 29 Apr 2016 18:55:17 +1000 Subject: powerpc/powernv: Define TCE Kill flags This replaces magic constants for TCE Kill IODA2 register with macros. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index acce430..d7fbbf1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1840,10 +1840,13 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { .get = pnv_tce_get, }; +#define TCE_KILL_INVAL_PE PPC_BIT(1) +#define TCE_KILL_INVAL_TCE PPC_BIT(2) + static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe) { /* 01xb - invalidate TCEs that match the specified PE# */ - unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF); + unsigned long val = TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF); struct pnv_phb *phb = pe->phb; struct pnv_ioda_pe *npe; int i; @@ -1871,7 +1874,7 @@ static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm, unsigned long start, end, inc; /* We'll invalidate DMA address in PE scope */ - start = 0x2ull << 60; + start = TCE_KILL_INVAL_TCE; start |= (pe_number & 0xFF); end = start; -- cgit v1.1 From 0bbcdb437da0c4acba582058d3d1a84b93bc081d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 29 Apr 2016 18:55:18 +1000 Subject: powerpc/powernv/npu: TCE Kill helpers cleanup NPU PHB TCE Kill register is exactly the same as in the rest of POWER8 so let's reuse the existing code for NPU. The only bit missing is a helper to reset the entire TCE cache so this moves such a helper from NPU code and renames it. Since pnv_npu_tce_invalidate() does really invalidate the entire cache, this uses pnv_pci_ioda2_tce_invalidate_entire() directly for NPU. This adds an explicit comment for workaround for invalidating NPU TCE cache. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 41 ------------------------------- arch/powerpc/platforms/powernv/pci-ioda.c | 29 ++++++++++++++++++---- arch/powerpc/platforms/powernv/pci.h | 7 +----- 3 files changed, 25 insertions(+), 52 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 7229acd..778570c 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -25,8 +25,6 @@ * Other types of TCE cache invalidation are not functional in the * hardware. */ -#define TCE_KILL_INVAL_ALL PPC_BIT(0) - static struct pci_dev *get_pci_dev(struct device_node *dn) { return PCI_DN(dn)->pcidev; @@ -161,45 +159,6 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, return pe; } -void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe) -{ - struct pnv_phb *phb = npe->phb; - - if (WARN_ON(phb->type != PNV_PHB_NPU || - !phb->ioda.tce_inval_reg || - !(npe->flags & PNV_IODA_PE_DEV))) - return; - - mb(); /* Ensure previous TCE table stores are visible */ - __raw_writeq(cpu_to_be64(TCE_KILL_INVAL_ALL), - phb->ioda.tce_inval_reg); -} - -void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe, - struct iommu_table *tbl, - unsigned long index, - unsigned long npages, - bool rm) -{ - struct pnv_phb *phb = npe->phb; - - /* We can only invalidate the whole cache on NPU */ - unsigned long val = TCE_KILL_INVAL_ALL; - - if (WARN_ON(phb->type != PNV_PHB_NPU || - !phb->ioda.tce_inval_reg || - !(npe->flags & PNV_IODA_PE_DEV))) - return; - - mb(); /* Ensure previous TCE table stores are visible */ - if (rm) - __raw_rm_writeq(cpu_to_be64(val), - (__be64 __iomem *) phb->ioda.tce_inval_reg_phys); - else - __raw_writeq(cpu_to_be64(val), - phb->ioda.tce_inval_reg); -} - void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe) { struct pnv_ioda_pe *gpe; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d7fbbf1..9b99aa1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1840,9 +1840,23 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { .get = pnv_tce_get, }; +#define TCE_KILL_INVAL_ALL PPC_BIT(0) #define TCE_KILL_INVAL_PE PPC_BIT(1) #define TCE_KILL_INVAL_TCE PPC_BIT(2) +void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) +{ + const unsigned long val = TCE_KILL_INVAL_ALL; + + mb(); /* Ensure previous TCE table stores are visible */ + if (rm) + __raw_rm_writeq(cpu_to_be64(val), + (__be64 __iomem *) + phb->ioda.tce_inval_reg_phys); + else + __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); +} + static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe) { /* 01xb - invalidate TCEs that match the specified PE# */ @@ -1863,7 +1877,7 @@ static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe) if (!npe || npe->phb->type != PNV_PHB_NPU) continue; - pnv_npu_tce_invalidate_entire(npe); + pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); } } @@ -1912,14 +1926,19 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, index, npages); if (pe->flags & PNV_IODA_PE_PEER) - /* Invalidate PEs using the same TCE table */ + /* + * The NVLink hardware does not support TCE kill + * per TCE entry so we have to invalidate + * the entire cache for it. + */ for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) { npe = pe->peers[i]; - if (!npe || npe->phb->type != PNV_PHB_NPU) + if (!npe || npe->phb->type != PNV_PHB_NPU || + !npe->phb->ioda.tce_inval_reg) continue; - pnv_npu_tce_invalidate(npe, tbl, index, - npages, rm); + pnv_pci_ioda2_tce_invalidate_entire(npe->phb, + rm); } } } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 904f60b..85436e3 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -218,15 +218,10 @@ extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); /* Nvlink functions */ -extern void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe); -extern void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe, - struct iommu_table *tbl, - unsigned long index, - unsigned long npages, - bool rm); extern void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe); extern void pnv_npu_setup_dma_pe(struct pnv_ioda_pe *npe); extern int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enabled); extern int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask); +extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); #endif /* __POWERNV_PCI_H */ -- cgit v1.1 From 6969af73521a9042d91c9efd41f22cf7be7f1ba9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 29 Apr 2016 18:55:19 +1000 Subject: powerpc/powernv/npu: Use the correct IOMMU page size This uses the page size from iommu_table instead of hard-coded 4K. This should cause no change in behavior. While we are here, move bits around to prepare for further rework which will define and use iommu_table_group_ops. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 778570c..5bd5fee 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -204,8 +204,7 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe) struct pnv_phb *phb = npe->phb; struct pci_dev *gpdev; struct pnv_ioda_pe *gpe; - void *addr; - unsigned int size; + struct iommu_table *tbl; int64_t rc; /* @@ -219,11 +218,11 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe) if (!gpe) return; - addr = (void *)gpe->table_group.tables[0]->it_base; - size = gpe->table_group.tables[0]->it_size << 3; + tbl = gpe->table_group.tables[0]; rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number, - npe->pe_number, 1, __pa(addr), - size, 0x1000); + npe->pe_number, 1, __pa(tbl->it_base), + tbl->it_size << 3, + IOMMU_PAGE_SIZE(tbl)); if (rc != OPAL_SUCCESS) pr_warn("%s: Error %lld setting DMA window on PHB#%d-PE#%d\n", __func__, rc, phb->hose->global_number, npe->pe_number); -- cgit v1.1 From f9f834567432e4d21f26debe10e1ca0ab52446e5 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 29 Apr 2016 18:55:20 +1000 Subject: powerpc/powernv/npu: Simplify DMA setup NPU devices are emulated in firmware and mainly used for NPU NVLink training; one NPU device is per a hardware link. Their DMA/TCE setup must match the GPU which is connected via PCIe and NVLink so any changes to the DMA/TCE setup on the GPU PCIe device need to be propagated to the NVLink device as this is what device drivers expect and it doesn't make much sense to do anything else. This makes NPU DMA setup explicit. pnv_npu_ioda_controller_ops::pnv_npu_dma_set_mask is moved to pci-ioda, made static and prints warning as dma_set_mask() should never be called on this function as in any case it will not configure GPU; so we make this explicit. Instead of using PNV_IODA_PE_PEER and peers[] (which the next patch will remove), we test every PCI device if there are corresponding NVLink devices. If there are any, we propagate bypass mode to just found NPU devices by calling the setup helper directly (which takes @bypass) and avoid guessing (i.e. calculating from DMA mask) whether we need bypass or not on NPU devices. Since DMA setup happens in very rare occasion, this will not slow down booting or VFIO start/stop much. This renames pnv_npu_disable_bypass to pnv_npu_dma_set_32 to make it more clear what the function really does which is programming 32bit table address to the TVT ("disabling bypass" means writing zeroes to the TVT). This removes pnv_npu_dma_set_bypass() from pnv_npu_ioda_fixup() as the DMA configuration on NPU does not matter until dma_set_mask() is called on GPU and that will do the NPU DMA configuration. This removes phb->dma_dev_setup initialization for NPU as pnv_pci_ioda_dma_dev_setup is no-op for it anyway. This stops using npe->tce_bypass_base as it never changes and values other than zero are not supported. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 87 ++++++++++++++----------------- arch/powerpc/platforms/powernv/pci-ioda.c | 30 +++++------ arch/powerpc/platforms/powernv/pci.h | 3 +- 3 files changed, 52 insertions(+), 68 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 5bd5fee..bec9267 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -196,10 +196,9 @@ void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe) } /* - * For the NPU we want to point the TCE table at the same table as the - * real PCI device. + * Enables 32 bit DMA on NPU. */ -static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe) +static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) { struct pnv_phb *phb = npe->phb; struct pci_dev *gpdev; @@ -235,72 +234,62 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe) } /* - * Enable/disable bypass mode on the NPU. The NPU only supports one + * Enables bypass mode on the NPU. The NPU only supports one * window per link, so bypass needs to be explicitly enabled or * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be * active at the same time. */ -int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enable) +static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) { struct pnv_phb *phb = npe->phb; int64_t rc = 0; + phys_addr_t top = memblock_end_of_DRAM(); if (phb->type != PNV_PHB_NPU || !npe->pdev) return -EINVAL; - if (enable) { - /* Enable the bypass window */ - phys_addr_t top = memblock_end_of_DRAM(); - - npe->tce_bypass_base = 0; - top = roundup_pow_of_two(top); - dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n", - npe->pe_number); - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, - npe->pe_number, npe->pe_number, - npe->tce_bypass_base, top); - } else { - /* - * Disable the bypass window by replacing it with the - * TCE32 window. - */ - pnv_npu_disable_bypass(npe); - } + /* Enable the bypass window */ + + top = roundup_pow_of_two(top); + dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n", + npe->pe_number); + rc = opal_pci_map_pe_dma_window_real(phb->opal_id, + npe->pe_number, npe->pe_number, + 0 /* bypass base */, top); return rc; } -int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask) +void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass) { - struct pci_controller *hose = pci_bus_to_host(npdev->bus); - struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pci_get_pdn(npdev); - struct pnv_ioda_pe *npe, *gpe; - struct pci_dev *gpdev; - uint64_t top; - bool bypass = false; + int i; + struct pnv_phb *phb; + struct pci_dn *pdn; + struct pnv_ioda_pe *npe; + struct pci_dev *npdev; - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) - return -ENXIO; + for (i = 0; ; ++i) { + npdev = pnv_pci_get_npu_dev(gpdev, i); - /* We only do bypass if it's enabled on the linked device */ - npe = &phb->ioda.pe_array[pdn->pe_number]; - gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); - if (!gpe) - return -ENODEV; + if (!npdev) + break; - if (gpe->tce_bypass_enabled) { - top = gpe->tce_bypass_base + memblock_end_of_DRAM() - 1; - bypass = (dma_mask >= top); - } + pdn = pci_get_pdn(npdev); + if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) + return; - if (bypass) - dev_info(&npdev->dev, "Using 64-bit DMA iommu bypass\n"); - else - dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n"); + phb = pci_bus_to_host(npdev->bus)->private_data; - pnv_npu_dma_set_bypass(npe, bypass); - *npdev->dev.dma_mask = dma_mask; + /* We only do bypass if it's enabled on the linked device */ + npe = &phb->ioda.pe_array[pdn->pe_number]; - return 0; + if (bypass) { + dev_info(&npdev->dev, + "Using 64-bit DMA iommu bypass\n"); + pnv_npu_dma_set_bypass(npe); + } else { + dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n"); + pnv_npu_dma_set_32(npe); + } + } } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9b99aa1..c94ee1b 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1669,8 +1669,6 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) struct pnv_ioda_pe *pe; uint64_t top; bool bypass = false; - struct pci_dev *linked_npu_dev; - int i; if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) return -ENODEV;; @@ -1691,15 +1689,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) *pdev->dev.dma_mask = dma_mask; /* Update peer npu devices */ - if (pe->flags & PNV_IODA_PE_PEER) - for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) { - if (!pe->peers[i]) - continue; - - linked_npu_dev = pe->peers[i]->pdev; - if (dma_get_mask(&linked_npu_dev->dev) != dma_mask) - dma_set_mask(&linked_npu_dev->dev, dma_mask); - } + pnv_npu_try_dma_set_bypass(pdev, bypass); return 0; } @@ -3194,7 +3184,6 @@ static void pnv_npu_ioda_fixup(void) enable_bypass = dma_get_mask(&pe->pdev->dev) == DMA_BIT_MASK(64); pnv_npu_init_dma_pe(pe); - pnv_npu_dma_set_bypass(pe, enable_bypass); } } } @@ -3340,6 +3329,14 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .shutdown = pnv_pci_ioda_shutdown, }; +static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask) +{ + dev_err_once(&npdev->dev, + "%s operation unsupported for NVLink devices\n", + __func__); + return -EPERM; +} + static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, #ifdef CONFIG_PCI_MSI @@ -3516,9 +3513,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->freeze_pe = pnv_ioda_freeze_pe; phb->unfreeze_pe = pnv_ioda_unfreeze_pe; - /* Setup TCEs */ - phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; - /* Setup MSI support */ pnv_pci_init_ioda_msis(phb); @@ -3531,10 +3525,12 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, */ ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; - if (phb->type == PNV_PHB_NPU) + if (phb->type == PNV_PHB_NPU) { hose->controller_ops = pnv_npu_ioda_controller_ops; - else + } else { + phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; hose->controller_ops = pnv_pci_ioda_controller_ops; + } #ifdef CONFIG_PCI_IOV ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 85436e3..a38349a 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -220,8 +220,7 @@ extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); /* Nvlink functions */ extern void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe); extern void pnv_npu_setup_dma_pe(struct pnv_ioda_pe *npe); -extern int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enabled); -extern int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask); +extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); #endif /* __POWERNV_PCI_H */ -- cgit v1.1 From 7d623e42560ea69eb248d48a1b1b105f28807e6e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 29 Apr 2016 18:55:21 +1000 Subject: powerpc/powernv/ioda2: Export debug helper pe_level_printk() This exports debugging helper pe_level_printk() and corresponding macroses so they can be used in npu-dma.c. Signed-off-by: Alexey Kardashevskiy Reviewed-By: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 9 +-------- arch/powerpc/platforms/powernv/pci.h | 9 +++++++++ 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c94ee1b..b2831c0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -57,7 +57,7 @@ static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); -static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, +void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) { struct va_format vaf; @@ -88,13 +88,6 @@ static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, va_end(args); } -#define pe_err(pe, fmt, ...) \ - pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__) -#define pe_warn(pe, fmt, ...) \ - pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__) -#define pe_info(pe, fmt, ...) \ - pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) - static bool pnv_iommu_bypass_disabled __read_mostly; static int __init iommu_setup(char *str) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index a38349a..b426614 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -217,6 +217,15 @@ extern void pnv_pci_dma_bus_setup(struct pci_bus *bus); extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); +extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, + const char *fmt, ...); +#define pe_err(pe, fmt, ...) \ + pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__) +#define pe_warn(pe, fmt, ...) \ + pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__) +#define pe_info(pe, fmt, ...) \ + pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) + /* Nvlink functions */ extern void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe); extern void pnv_npu_setup_dma_pe(struct pnv_ioda_pe *npe); -- cgit v1.1 From b575c731fe58c1ecb1247c6a334ba6c7b4c0b480 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 29 Apr 2016 18:55:22 +1000 Subject: powerpc/powernv/npu: Add set/unset window helpers The upcoming NVLink passthrough support will require NPU code to cope with two DMA windows. This adds a pnv_npu_set_window() helper which programs 32bit window to the hardware. This also adds multilevel TCE support. This adds a pnv_npu_unset_window() helper which removes the DMA window from the hardware. This does not make difference now as the caller - pnv_npu_dma_set_bypass() - enables bypass in the hardware but the next patch will use it to manage TCE table lists for TCE Kill handling. Signed-off-by: Alexey Kardashevskiy Reviewed-By: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 65 +++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index bec9267..800d70f 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -159,6 +159,56 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, return pe; } +static long pnv_npu_set_window(struct pnv_ioda_pe *npe, + struct iommu_table *tbl) +{ + struct pnv_phb *phb = npe->phb; + int64_t rc; + const unsigned long size = tbl->it_indirect_levels ? + tbl->it_level_size : tbl->it_size; + const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; + const __u64 win_size = tbl->it_size << tbl->it_page_shift; + + pe_info(npe, "Setting up window %llx..%llx pg=%lx\n", + start_addr, start_addr + win_size - 1, + IOMMU_PAGE_SIZE(tbl)); + + rc = opal_pci_map_pe_dma_window(phb->opal_id, + npe->pe_number, + npe->pe_number, + tbl->it_indirect_levels + 1, + __pa(tbl->it_base), + size << 3, + IOMMU_PAGE_SIZE(tbl)); + if (rc) { + pe_err(npe, "Failed to configure TCE table, err %lld\n", rc); + return rc; + } + pnv_pci_ioda2_tce_invalidate_entire(phb, false); + + return 0; +} + +static long pnv_npu_unset_window(struct pnv_ioda_pe *npe) +{ + struct pnv_phb *phb = npe->phb; + int64_t rc; + + pe_info(npe, "Removing DMA window\n"); + + rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number, + npe->pe_number, + 0/* levels */, 0/* table address */, + 0/* table size */, 0/* page size */); + if (rc) { + pe_err(npe, "Unmapping failed, ret = %lld\n", rc); + return rc; + } + pnv_pci_ioda2_tce_invalidate_entire(phb, false); + + return 0; +} + void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe) { struct pnv_ioda_pe *gpe; @@ -200,10 +250,8 @@ void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe) */ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) { - struct pnv_phb *phb = npe->phb; struct pci_dev *gpdev; struct pnv_ioda_pe *gpe; - struct iommu_table *tbl; int64_t rc; /* @@ -217,14 +265,7 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) if (!gpe) return; - tbl = gpe->table_group.tables[0]; - rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number, - npe->pe_number, 1, __pa(tbl->it_base), - tbl->it_size << 3, - IOMMU_PAGE_SIZE(tbl)); - if (rc != OPAL_SUCCESS) - pr_warn("%s: Error %lld setting DMA window on PHB#%d-PE#%d\n", - __func__, rc, phb->hose->global_number, npe->pe_number); + rc = pnv_npu_set_window(npe, gpe->table_group.tables[0]); /* * We don't initialise npu_pe->tce32_table as we always use @@ -248,6 +289,10 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) if (phb->type != PNV_PHB_NPU || !npe->pdev) return -EINVAL; + rc = pnv_npu_unset_window(npe); + if (rc != OPAL_SUCCESS) + return rc; + /* Enable the bypass window */ top = roundup_pow_of_two(top); -- cgit v1.1 From 85674868cecebdf6eb7239ecf9c32b6273208d03 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 29 Apr 2016 18:55:23 +1000 Subject: powerpc/powernv/npu: Rework TCE Kill handling The pnv_ioda_pe struct keeps an array of peers. At the moment it is only used to link GPU and NPU for 2 purposes: 1. Access NPU quickly when configuring DMA for GPU - this was addressed in the previos patch by removing use of it as DMA setup is not what the kernel would constantly do. 2. Invalidate TCE cache for NPU when it is invalidated for GPU. GPU and NPU are in different PE. There is already a mechanism to attach multiple iommu_table_group to the same iommu_table (used for VFIO), we can reuse it here so does this patch. This gets rid of peers[] array and PNV_IODA_PE_PEER flag as they are not needed anymore. While we are here, add TCE cache invalidation after enabling bypass. Signed-off-by: Alexey Kardashevskiy Reviewed-By: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 71 +++++++++---------------------- arch/powerpc/platforms/powernv/pci-ioda.c | 62 +++------------------------ arch/powerpc/platforms/powernv/pci.h | 6 --- 3 files changed, 27 insertions(+), 112 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 800d70f..cb2d1da 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -136,22 +136,17 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, struct pnv_ioda_pe *pe; struct pci_dn *pdn; - if (npe->flags & PNV_IODA_PE_PEER) { - pe = npe->peers[0]; - pdev = pe->pdev; - } else { - pdev = pnv_pci_get_gpu_dev(npe->pdev); - if (!pdev) - return NULL; - - pdn = pci_get_pdn(pdev); - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) - return NULL; + pdev = pnv_pci_get_gpu_dev(npe->pdev); + if (!pdev) + return NULL; - hose = pci_bus_to_host(pdev->bus); - phb = hose->private_data; - pe = &phb->ioda.pe_array[pdn->pe_number]; - } + pdn = pci_get_pdn(pdev); + if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) + return NULL; + + hose = pci_bus_to_host(pdev->bus); + phb = hose->private_data; + pe = &phb->ioda.pe_array[pdn->pe_number]; if (gpdev) *gpdev = pdev; @@ -186,6 +181,10 @@ static long pnv_npu_set_window(struct pnv_ioda_pe *npe, } pnv_pci_ioda2_tce_invalidate_entire(phb, false); + /* Add the table to the list so its TCE cache will get invalidated */ + pnv_pci_link_table_and_group(phb->hose->node, 0, + tbl, &npe->table_group); + return 0; } @@ -206,43 +205,10 @@ static long pnv_npu_unset_window(struct pnv_ioda_pe *npe) } pnv_pci_ioda2_tce_invalidate_entire(phb, false); - return 0; -} - -void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe) -{ - struct pnv_ioda_pe *gpe; - struct pci_dev *gpdev; - int i, avail = -1; - - if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV)) - return; - - gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); - if (!gpe) - return; - - for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) { - /* Nothing to do if the PE is already connected. */ - if (gpe->peers[i] == npe) - return; - - if (!gpe->peers[i]) - avail = i; - } + pnv_pci_unlink_table_and_group(npe->table_group.tables[0], + &npe->table_group); - if (WARN_ON(avail < 0)) - return; - - gpe->peers[avail] = npe; - gpe->flags |= PNV_IODA_PE_PEER; - - /* - * We assume that the NPU devices only have a single peer PE - * (the GPU PCIe device PE). - */ - npe->peers[0] = gpe; - npe->flags |= PNV_IODA_PE_PEER; + return 0; } /* @@ -302,6 +268,9 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) npe->pe_number, npe->pe_number, 0 /* bypass base */, top); + if (rc == OPAL_SUCCESS) + pnv_pci_ioda2_tce_invalidate_entire(phb, false); + return rc; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b2831c0..ef31218 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1845,23 +1845,12 @@ static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe) /* 01xb - invalidate TCEs that match the specified PE# */ unsigned long val = TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF); struct pnv_phb *phb = pe->phb; - struct pnv_ioda_pe *npe; - int i; if (!phb->ioda.tce_inval_reg) return; mb(); /* Ensure above stores are visible */ __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); - - if (pe->flags & PNV_IODA_PE_PEER) - for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) { - npe = pe->peers[i]; - if (!npe || npe->phb->type != PNV_PHB_NPU) - continue; - - pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); - } } static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm, @@ -1896,33 +1885,24 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, struct iommu_table_group_link *tgl; list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { - struct pnv_ioda_pe *npe; struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : pe->phb->ioda.tce_inval_reg; - int i; - - pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm, - invalidate, tbl->it_page_shift, - index, npages); - if (pe->flags & PNV_IODA_PE_PEER) + if (pe->phb->type == PNV_PHB_NPU) { /* * The NVLink hardware does not support TCE kill * per TCE entry so we have to invalidate * the entire cache for it. */ - for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) { - npe = pe->peers[i]; - if (!npe || npe->phb->type != PNV_PHB_NPU || - !npe->phb->ioda.tce_inval_reg) - continue; - - pnv_pci_ioda2_tce_invalidate_entire(npe->phb, - rm); - } + pnv_pci_ioda2_tce_invalidate_entire(pe->phb, rm); + continue; + } + pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm, + invalidate, tbl->it_page_shift, + index, npages); } } @@ -3156,31 +3136,6 @@ static void pnv_pci_ioda_create_dbgfs(void) #endif /* CONFIG_DEBUG_FS */ } -static void pnv_npu_ioda_fixup(void) -{ - bool enable_bypass; - struct pci_controller *hose, *tmp; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - unsigned int weight; - - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - phb = hose->private_data; - if (phb->type != PNV_PHB_NPU) - continue; - - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - weight = pnv_pci_ioda_pe_dma_weight(pe); - if (WARN_ON(!weight)) - continue; - - enable_bypass = dma_get_mask(&pe->pdev->dev) == - DMA_BIT_MASK(64); - pnv_npu_init_dma_pe(pe); - } - } -} - static void pnv_pci_ioda_fixup(void) { pnv_pci_ioda_setup_PEs(); @@ -3193,9 +3148,6 @@ static void pnv_pci_ioda_fixup(void) eeh_init(); eeh_addr_cache_build(); #endif - - /* Link NPU IODA tables to their PCI devices. */ - pnv_npu_ioda_fixup(); } /* diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index b426614..2b35606 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -24,7 +24,6 @@ enum pnv_phb_model { #define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */ #define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ #define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */ -#define PNV_IODA_PE_PEER (1 << 6) /* PE has peers */ /* Data associated with a PE, including IOMMU tracking etc.. */ struct pnv_phb; @@ -32,9 +31,6 @@ struct pnv_ioda_pe { unsigned long flags; struct pnv_phb *phb; -#define PNV_IODA_MAX_PEER_PES 8 - struct pnv_ioda_pe *peers[PNV_IODA_MAX_PEER_PES]; - /* A PE can be associated with a single device or an * entire bus (& children). In the former case, pdev * is populated, in the later case, pbus is. @@ -227,8 +223,6 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) /* Nvlink functions */ -extern void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe); -extern void pnv_npu_setup_dma_pe(struct pnv_ioda_pe *npe); extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); -- cgit v1.1 From b5cb9ab1a00b112fcb96164c814f1f111deeafba Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 29 Apr 2016 18:55:24 +1000 Subject: powerpc/powernv/npu: Enable NVLink pass through IBM POWER8 NVlink systems come with Tesla K40-ish GPUs each of which also has a couple of fast speed links (NVLink). The interface to links is exposed as an emulated PCI bridge which is included into the same IOMMU group as the corresponding GPU. In the kernel, NPUs get a separate PHB of the PNV_PHB_NPU type and a PE which behave pretty much as the standard IODA2 PHB except NPU PHB has just a single TVE in the hardware which means it can have either 32bit window or 64bit window or DMA bypass but never two of these. In order to make these links work when GPU is passed to the guest, these bridges need to be passed as well; otherwise performance will degrade. This implements and exports API to manage NPU state in regard to VFIO; it replicates iommu_table_group_ops. This defines a new pnv_pci_ioda2_npu_ops which is assigned to the IODA2 bridge if there are NPUs for a GPU on the bridge. The new callbacks call the default IODA2 callbacks plus new NPU API. This adds a gpe_table_group_to_npe() helper to find NPU PE for the IODA2 table_group, it is not expected to fail as the helper is only called from the pnv_pci_ioda2_npu_ops. This does not define NPU-specific .release_ownership() so after VFIO is finished, DMA on NPU is disabled which is ok as the nvidia driver sets DMA mask when probing which enable 32 or 64bit DMA on NPU. This adds a pnv_pci_npu_setup_iommu() helper which adds NPUs to the GPU group if any found. The helper uses helpers to look for the "ibm,gpu" property in the device tree which is a phandle of the corresponding GPU. This adds an additional loop over PEs in pnv_ioda_setup_dma() as the main loop skips NPU PEs as they do not have 32bit DMA segments. As pnv_npu_set_window() and pnv_npu_unset_window() are started being used by the new IODA2-NPU IOMMU group, this makes the helpers public and adds the DMA window number parameter. Signed-off-by: Alexey Kardashevskiy Reviewed-By: Alistair Popple [mpe: Add pnv_pci_ioda_setup_iommu_api() to fix build with IOMMU_API=n] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 64 +++++++++++++++-- arch/powerpc/platforms/powernv/pci-ioda.c | 112 ++++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/pci.h | 6 ++ 3 files changed, 176 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index cb2d1da..0459e10 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -154,7 +155,7 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, return pe; } -static long pnv_npu_set_window(struct pnv_ioda_pe *npe, +long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, struct iommu_table *tbl) { struct pnv_phb *phb = npe->phb; @@ -182,13 +183,13 @@ static long pnv_npu_set_window(struct pnv_ioda_pe *npe, pnv_pci_ioda2_tce_invalidate_entire(phb, false); /* Add the table to the list so its TCE cache will get invalidated */ - pnv_pci_link_table_and_group(phb->hose->node, 0, + pnv_pci_link_table_and_group(phb->hose->node, num, tbl, &npe->table_group); return 0; } -static long pnv_npu_unset_window(struct pnv_ioda_pe *npe) +long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num) { struct pnv_phb *phb = npe->phb; int64_t rc; @@ -205,7 +206,7 @@ static long pnv_npu_unset_window(struct pnv_ioda_pe *npe) } pnv_pci_ioda2_tce_invalidate_entire(phb, false); - pnv_pci_unlink_table_and_group(npe->table_group.tables[0], + pnv_pci_unlink_table_and_group(npe->table_group.tables[num], &npe->table_group); return 0; @@ -231,7 +232,7 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) if (!gpe) return; - rc = pnv_npu_set_window(npe, gpe->table_group.tables[0]); + rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]); /* * We don't initialise npu_pe->tce32_table as we always use @@ -255,7 +256,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) if (phb->type != PNV_PHB_NPU || !npe->pdev) return -EINVAL; - rc = pnv_npu_unset_window(npe); + rc = pnv_npu_unset_window(npe, 0); if (rc != OPAL_SUCCESS) return rc; @@ -307,3 +308,54 @@ void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass) } } } + +/* Switch ownership from platform code to external user (e.g. VFIO) */ +void pnv_npu_take_ownership(struct pnv_ioda_pe *npe) +{ + struct pnv_phb *phb = npe->phb; + int64_t rc; + + /* + * Note: NPU has just a single TVE in the hardware which means that + * while used by the kernel, it can have either 32bit window or + * DMA bypass but never both. So we deconfigure 32bit window only + * if it was enabled at the moment of ownership change. + */ + if (npe->table_group.tables[0]) { + pnv_npu_unset_window(npe, 0); + return; + } + + /* Disable bypass */ + rc = opal_pci_map_pe_dma_window_real(phb->opal_id, + npe->pe_number, npe->pe_number, + 0 /* bypass base */, 0); + if (rc) { + pe_err(npe, "Failed to disable bypass, err %lld\n", rc); + return; + } + pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); +} + +struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) +{ + struct pnv_phb *phb = npe->phb; + struct pci_bus *pbus = phb->hose->bus; + struct pci_dev *npdev, *gpdev = NULL, *gptmp; + struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); + + if (!gpe || !gpdev) + return NULL; + + list_for_each_entry(npdev, &pbus->devices, bus_list) { + gptmp = pnv_pci_get_gpu_dev(npdev); + + if (gptmp != gpdev) + continue; + + pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev)); + iommu_group_add_device(gpe->table_group.group, &npdev->dev); + } + + return gpe; +} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ef31218..4cfe704 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2389,6 +2389,116 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { .take_ownership = pnv_ioda2_take_ownership, .release_ownership = pnv_ioda2_release_ownership, }; + +static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + struct pnv_ioda_pe **ptmppe = opaque; + struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!pdn || pdn->pe_number == IODA_INVALID_PE) + return 0; + + hose = pci_bus_to_host(pdev->bus); + phb = hose->private_data; + if (phb->type != PNV_PHB_NPU) + return 0; + + *ptmppe = &phb->ioda.pe_array[pdn->pe_number]; + + return 1; +} + +/* + * This returns PE of associated NPU. + * This assumes that NPU is in the same IOMMU group with GPU and there is + * no other PEs. + */ +static struct pnv_ioda_pe *gpe_table_group_to_npe( + struct iommu_table_group *table_group) +{ + struct pnv_ioda_pe *npe = NULL; + int ret = iommu_group_for_each_dev(table_group->group, &npe, + gpe_table_group_to_npe_cb); + + BUG_ON(!ret || !npe); + + return npe; +} + +static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group, + int num, struct iommu_table *tbl) +{ + long ret = pnv_pci_ioda2_set_window(table_group, num, tbl); + + if (ret) + return ret; + + ret = pnv_npu_set_window(gpe_table_group_to_npe(table_group), num, tbl); + if (ret) + pnv_pci_ioda2_unset_window(table_group, num); + + return ret; +} + +static long pnv_pci_ioda2_npu_unset_window( + struct iommu_table_group *table_group, + int num) +{ + long ret = pnv_pci_ioda2_unset_window(table_group, num); + + if (ret) + return ret; + + return pnv_npu_unset_window(gpe_table_group_to_npe(table_group), num); +} + +static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group) +{ + /* + * Detach NPU first as pnv_ioda2_take_ownership() will destroy + * the iommu_table if 32bit DMA is enabled. + */ + pnv_npu_take_ownership(gpe_table_group_to_npe(table_group)); + pnv_ioda2_take_ownership(table_group); +} + +static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = { + .get_table_size = pnv_pci_ioda2_get_table_size, + .create_table = pnv_pci_ioda2_create_table, + .set_window = pnv_pci_ioda2_npu_set_window, + .unset_window = pnv_pci_ioda2_npu_unset_window, + .take_ownership = pnv_ioda2_npu_take_ownership, + .release_ownership = pnv_ioda2_release_ownership, +}; + +static void pnv_pci_ioda_setup_iommu_api(void) +{ + struct pci_controller *hose, *tmp; + struct pnv_phb *phb; + struct pnv_ioda_pe *pe, *gpe; + + /* + * Now we have all PHBs discovered, time to add NPU devices to + * the corresponding IOMMU groups. + */ + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->type != PNV_PHB_NPU) + continue; + + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + gpe = pnv_pci_npu_setup_iommu(pe); + if (gpe) + gpe->table_group.ops = &pnv_pci_ioda2_npu_ops; + } + } +} +#else /* !CONFIG_IOMMU_API */ +static void pnv_pci_ioda_setup_iommu_api(void) { }; #endif static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) @@ -3115,6 +3225,8 @@ static void pnv_pci_ioda_setup_DMA(void) phb = hose->private_data; phb->initialized = 1; } + + pnv_pci_ioda_setup_iommu_api(); } static void pnv_pci_ioda_create_dbgfs(void) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 2b35606..7dee25e 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -225,5 +225,11 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, /* Nvlink functions */ extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); +extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe); +extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, + struct iommu_table *tbl); +extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num); +extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe); +extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe); #endif /* __POWERNV_PCI_H */ -- cgit v1.1 From 848912e547c4569445a61203a7df402646a88c25 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 12 May 2016 19:43:37 +1000 Subject: Revert "powerpc/powernv: Exclude root bus in pnv_pci_reset_secondary_bus()" This reverts commit c8ceacc22bce95d3a9cff198c9c27a30105a16b8. Gavin says: I missed the fact that it affects the PCI passthrou path as reported by Alexey: When passing GPU (0003:01:00.0) which seats behind the root port, the reset request is routed to skiboot in original code. In skiboot, the link bouncing events are masked during the reset. So we don't see EEH (freeze all) error even link bouncing happens. With the changes included, the reset is done by kernel and the link bouncing events aren't masked by altering content of PHB3 (or P7IOC) specific hardware registers which are invisible to kernel (skiboot hides the hardware specific). It means the link bouncing is seen by the root port and it causes a EEH (freeze all) error. The PCI passthrough on GPU device cannot work. Requested-by: Alexey Kardashevskiy Requested-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 593b8dc..9226df1 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -868,8 +868,16 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) void pnv_pci_reset_secondary_bus(struct pci_dev *dev) { - pnv_eeh_bridge_reset(dev, EEH_RESET_HOT); - pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); + struct pci_controller *hose; + + if (pci_is_root_bus(dev->bus)) { + hose = pci_bus_to_host(dev->bus); + pnv_eeh_root_reset(hose, EEH_RESET_HOT); + pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE); + } else { + pnv_eeh_bridge_reset(dev, EEH_RESET_HOT); + pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); + } } static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, -- cgit v1.1 From 8445a87f7092bc8336ea1305be9306f26b846d93 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Mon, 11 Apr 2016 16:17:23 -0300 Subject: powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism Commit 39baadbf36ce ("powerpc/eeh: Remove eeh information from pci_dn") changed the pci_dn struct by removing its EEH-related members. As part of this clean-up, DDW mechanism was modified to read the device configuration address from eeh_dev struct. As a consequence, now if we disable EEH mechanism on kernel command-line for example, the DDW mechanism will fail, generating a kernel oops by dereferencing a NULL pointer (which turns to be the eeh_dev pointer). This patch just changes the configuration address calculation on DDW functions to a manual calculation based on pci_dn members instead of using eeh_dev-based address. No functional changes were made. This was tested on pSeries, both in PHyp and qemu guest. Fixes: 39baadbf36ce ("powerpc/eeh: Remove eeh information from pci_dn") Cc: stable@vger.kernel.org # v3.4+ Reviewed-by: Gavin Shan Signed-off-by: Guilherme G. Piccoli Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/iommu.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index bd98ce2..b7dfc13 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -912,7 +912,8 @@ machine_arch_initcall(pseries, find_existing_ddw_windows); static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, struct ddw_query_response *query) { - struct eeh_dev *edev; + struct device_node *dn; + struct pci_dn *pdn; u32 cfg_addr; u64 buid; int ret; @@ -923,11 +924,10 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, * Retrieve them from the pci device, not the node with the * dma-window property */ - edev = pci_dev_to_eeh_dev(dev); - cfg_addr = edev->config_addr; - if (edev->pe_config_addr) - cfg_addr = edev->pe_config_addr; - buid = edev->phb->buid; + dn = pci_device_to_OF_node(dev); + pdn = PCI_DN(dn); + buid = pdn->phb->buid; + cfg_addr = (pdn->busno << 8) | pdn->devfn; ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, cfg_addr, BUID_HI(buid), BUID_LO(buid)); @@ -941,7 +941,8 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, struct ddw_create_response *create, int page_shift, int window_shift) { - struct eeh_dev *edev; + struct device_node *dn; + struct pci_dn *pdn; u32 cfg_addr; u64 buid; int ret; @@ -952,11 +953,10 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, * Retrieve them from the pci device, not the node with the * dma-window property */ - edev = pci_dev_to_eeh_dev(dev); - cfg_addr = edev->config_addr; - if (edev->pe_config_addr) - cfg_addr = edev->pe_config_addr; - buid = edev->phb->buid; + dn = pci_device_to_OF_node(dev); + pdn = PCI_DN(dn); + buid = pdn->phb->buid; + cfg_addr = (pdn->busno << 8) | pdn->devfn; do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ -- cgit v1.1 From 92a86756904b127a3450262b33c0b9f8e99f6b36 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 12 May 2016 15:47:09 +1000 Subject: powerpc/powernv: Fix insufficient memory allocation The pnv_pci_init_ioda_phb() helper allocates a blob to store auxilary data such PE and M32/M64 segment allocation maps; this single blob has few partitions, size of each is derived from the PE number - phb->ioda.total_pe_num. It was assumed that the minimum PE number is 8, however it is 4 for NPU so the pe_alloc part was missing in the allocated blob. It was invisible till recently as we were not tracking used M64 segments and NPUs do not use M32 segments so the phb->ioda.m32_segmap (which was pointing to the same address as phb->ioda.pe_alloc) has never been written to leaving the pe_alloc memory intact. After commit 401203ac2d "powerpc/powernv: Track M64 segment consumption" the pe_alloc gets corrupted and PE allocation cannot work. This fixes the issue by enforcing the minimum PE number to 8. Fixes: 401203ac2d15 ("powerpc/powernv: Track M64 segment consumption") Signed-off-by: Alexey Kardashevskiy Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 4cfe704..7e1e4f1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3504,7 +3504,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, PNV_IODA1_DMA32_SEGSIZE; /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ - size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); + size = _ALIGN_UP(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8, + sizeof(unsigned long)); m64map_off = size; size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]); m32map_off = size; -- cgit v1.1 From 1d4e89cf0a4e819fbde428e9e5286e7141a02e06 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 12 May 2016 15:47:10 +1000 Subject: powerpc/powernv/npu: Add PE to PHB's list Before commit 3e68dc57 "powerpc/powernv: Remove DMA32 PE list", NPU PEs were linked to the NPU PHB via phb->ioda.pe_dma_list; after that fix, the phb->ioda.pe_list is used. During the pe_dma_list removal, list_add_tail(&phb->ioda.pe_dma_list) was removed, however no list_add() was added so does this patch. Fixes: 3e68dc57219a ("powerpc/powernv: Remove DMA32 PE list") Signed-off-by: Alexey Kardashevskiy Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7e1e4f1..3a5ea82 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1006,6 +1006,9 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) return NULL; } + /* Put PE to the list */ + list_add_tail(&pe->list, &phb->ioda.pe_list); + return pe; } -- cgit v1.1