summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/powernv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r--arch/powerpc/platforms/powernv/Makefile3
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c4
-rw-r--r--arch/powerpc/platforms/powernv/idle.c2
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c348
-rw-r--r--arch/powerpc/platforms/powernv/opal-kmsg.c75
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c1
-rw-r--r--arch/powerpc/platforms/powernv/opal-rtc.c5
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S1
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c41
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c216
-rw-r--r--arch/powerpc/platforms/powernv/pci.c6
-rw-r--r--arch/powerpc/platforms/powernv/pci.h19
-rw-r--r--arch/powerpc/platforms/powernv/setup.c12
-rw-r--r--arch/powerpc/platforms/powernv/smp.c74
15 files changed, 700 insertions, 109 deletions
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 1c8cdb6..f1516b5 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,9 +2,10 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o
obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
+obj-y += opal-kmsg.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
-obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
+obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o
obj-$(CONFIG_EEH) += eeh-powernv.o
obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index e1c9072..5f152b9 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -48,8 +48,8 @@ static int pnv_eeh_init(void)
struct pci_controller *hose;
struct pnv_phb *phb;
- if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
- pr_warn("%s: OPALv3 is required !\n",
+ if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+ pr_warn("%s: OPAL is required !\n",
__func__);
return -EINVAL;
}
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 59d735d..15bfbcd 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -242,7 +242,7 @@ static int __init pnv_init_idle_states(void)
if (cpuidle_disable != IDLE_NO_OVERRIDE)
goto out;
- if (!firmware_has_feature(FW_FEATURE_OPALv3))
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
goto out;
power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
new file mode 100644
index 0000000..e85aa900
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -0,0 +1,348 @@
+/*
+ * This file implements the DMA operations for NVLink devices. The NPU
+ * devices all point to the same iommu table as the parent PCI device.
+ *
+ * Copyright Alistair Popple, IBM Corporation 2015.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/memblock.h>
+
+#include <asm/iommu.h>
+#include <asm/pnv-pci.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+/*
+ * Other types of TCE cache invalidation are not functional in the
+ * hardware.
+ */
+#define TCE_KILL_INVAL_ALL PPC_BIT(0)
+
+static struct pci_dev *get_pci_dev(struct device_node *dn)
+{
+ return PCI_DN(dn)->pcidev;
+}
+
+/* Given a NPU device get the associated PCI device. */
+struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
+{
+ struct device_node *dn;
+ struct pci_dev *gpdev;
+
+ /* Get assoicated PCI device */
+ dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
+ if (!dn)
+ return NULL;
+
+ gpdev = get_pci_dev(dn);
+ of_node_put(dn);
+
+ return gpdev;
+}
+EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
+
+/* Given the real PCI device get a linked NPU device. */
+struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
+{
+ struct device_node *dn;
+ struct pci_dev *npdev;
+
+ /* Get assoicated PCI device */
+ dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
+ if (!dn)
+ return NULL;
+
+ npdev = get_pci_dev(dn);
+ of_node_put(dn);
+
+ return npdev;
+}
+EXPORT_SYMBOL(pnv_pci_get_npu_dev);
+
+#define NPU_DMA_OP_UNSUPPORTED() \
+ dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \
+ __func__)
+
+static void *dma_npu_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ NPU_DMA_OP_UNSUPPORTED();
+ return NULL;
+}
+
+static void dma_npu_free(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ NPU_DMA_OP_UNSUPPORTED();
+}
+
+static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ NPU_DMA_OP_UNSUPPORTED();
+ return 0;
+}
+
+static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ NPU_DMA_OP_UNSUPPORTED();
+ return 0;
+}
+
+static int dma_npu_dma_supported(struct device *dev, u64 mask)
+{
+ NPU_DMA_OP_UNSUPPORTED();
+ return 0;
+}
+
+static u64 dma_npu_get_required_mask(struct device *dev)
+{
+ NPU_DMA_OP_UNSUPPORTED();
+ return 0;
+}
+
+struct dma_map_ops dma_npu_ops = {
+ .map_page = dma_npu_map_page,
+ .map_sg = dma_npu_map_sg,
+ .alloc = dma_npu_alloc,
+ .free = dma_npu_free,
+ .dma_supported = dma_npu_dma_supported,
+ .get_required_mask = dma_npu_get_required_mask,
+};
+
+/*
+ * Returns the PE assoicated with the PCI device of the given
+ * NPU. Returns the linked pci device if pci_dev != NULL.
+ */
+static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
+ struct pci_dev **gpdev)
+{
+ struct pnv_phb *phb;
+ struct pci_controller *hose;
+ struct pci_dev *pdev;
+ struct pnv_ioda_pe *pe;
+ struct pci_dn *pdn;
+
+ if (npe->flags & PNV_IODA_PE_PEER) {
+ pe = npe->peers[0];
+ pdev = pe->pdev;
+ } else {
+ pdev = pnv_pci_get_gpu_dev(npe->pdev);
+ if (!pdev)
+ return NULL;
+
+ pdn = pci_get_pdn(pdev);
+ if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+ return NULL;
+
+ hose = pci_bus_to_host(pdev->bus);
+ phb = hose->private_data;
+ pe = &phb->ioda.pe_array[pdn->pe_number];
+ }
+
+ if (gpdev)
+ *gpdev = pdev;
+
+ return pe;
+}
+
+void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe)
+{
+ struct pnv_phb *phb = npe->phb;
+
+ if (WARN_ON(phb->type != PNV_PHB_NPU ||
+ !phb->ioda.tce_inval_reg ||
+ !(npe->flags & PNV_IODA_PE_DEV)))
+ return;
+
+ mb(); /* Ensure previous TCE table stores are visible */
+ __raw_writeq(cpu_to_be64(TCE_KILL_INVAL_ALL),
+ phb->ioda.tce_inval_reg);
+}
+
+void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
+ struct iommu_table *tbl,
+ unsigned long index,
+ unsigned long npages,
+ bool rm)
+{
+ struct pnv_phb *phb = npe->phb;
+
+ /* We can only invalidate the whole cache on NPU */
+ unsigned long val = TCE_KILL_INVAL_ALL;
+
+ if (WARN_ON(phb->type != PNV_PHB_NPU ||
+ !phb->ioda.tce_inval_reg ||
+ !(npe->flags & PNV_IODA_PE_DEV)))
+ return;
+
+ mb(); /* Ensure previous TCE table stores are visible */
+ if (rm)
+ __raw_rm_writeq(cpu_to_be64(val),
+ (__be64 __iomem *) phb->ioda.tce_inval_reg_phys);
+ else
+ __raw_writeq(cpu_to_be64(val),
+ phb->ioda.tce_inval_reg);
+}
+
+void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe)
+{
+ struct pnv_ioda_pe *gpe;
+ struct pci_dev *gpdev;
+ int i, avail = -1;
+
+ if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
+ return;
+
+ gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
+ if (!gpe)
+ return;
+
+ for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
+ /* Nothing to do if the PE is already connected. */
+ if (gpe->peers[i] == npe)
+ return;
+
+ if (!gpe->peers[i])
+ avail = i;
+ }
+
+ if (WARN_ON(avail < 0))
+ return;
+
+ gpe->peers[avail] = npe;
+ gpe->flags |= PNV_IODA_PE_PEER;
+
+ /*
+ * We assume that the NPU devices only have a single peer PE
+ * (the GPU PCIe device PE).
+ */
+ npe->peers[0] = gpe;
+ npe->flags |= PNV_IODA_PE_PEER;
+}
+
+/*
+ * For the NPU we want to point the TCE table at the same table as the
+ * real PCI device.
+ */
+static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
+{
+ struct pnv_phb *phb = npe->phb;
+ struct pci_dev *gpdev;
+ struct pnv_ioda_pe *gpe;
+ void *addr;
+ unsigned int size;
+ int64_t rc;
+
+ /*
+ * Find the assoicated PCI devices and get the dma window
+ * information from there.
+ */
+ if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
+ return;
+
+ gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
+ if (!gpe)
+ return;
+
+ addr = (void *)gpe->table_group.tables[0]->it_base;
+ size = gpe->table_group.tables[0]->it_size << 3;
+ rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
+ npe->pe_number, 1, __pa(addr),
+ size, 0x1000);
+ if (rc != OPAL_SUCCESS)
+ pr_warn("%s: Error %lld setting DMA window on PHB#%d-PE#%d\n",
+ __func__, rc, phb->hose->global_number, npe->pe_number);
+
+ /*
+ * We don't initialise npu_pe->tce32_table as we always use
+ * dma_npu_ops which are nops.
+ */
+ set_dma_ops(&npe->pdev->dev, &dma_npu_ops);
+}
+
+/*
+ * Enable/disable bypass mode on the NPU. The NPU only supports one
+ * window per link, so bypass needs to be explicity enabled or
+ * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
+ * active at the same time.
+ */
+int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enable)
+{
+ struct pnv_phb *phb = npe->phb;
+ int64_t rc = 0;
+
+ if (phb->type != PNV_PHB_NPU || !npe->pdev)
+ return -EINVAL;
+
+ if (enable) {
+ /* Enable the bypass window */
+ phys_addr_t top = memblock_end_of_DRAM();
+
+ npe->tce_bypass_base = 0;
+ top = roundup_pow_of_two(top);
+ dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
+ npe->pe_number);
+ rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
+ npe->pe_number, npe->pe_number,
+ npe->tce_bypass_base, top);
+ } else {
+ /*
+ * Disable the bypass window by replacing it with the
+ * TCE32 window.
+ */
+ pnv_npu_disable_bypass(npe);
+ }
+
+ return rc;
+}
+
+int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
+{
+ struct pci_controller *hose = pci_bus_to_host(npdev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct pci_dn *pdn = pci_get_pdn(npdev);
+ struct pnv_ioda_pe *npe, *gpe;
+ struct pci_dev *gpdev;
+ uint64_t top;
+ bool bypass = false;
+
+ if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+ return -ENXIO;
+
+ /* We only do bypass if it's enabled on the linked device */
+ npe = &phb->ioda.pe_array[pdn->pe_number];
+ gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
+ if (!gpe)
+ return -ENODEV;
+
+ if (gpe->tce_bypass_enabled) {
+ top = gpe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+ bypass = (dma_mask >= top);
+ }
+
+ if (bypass)
+ dev_info(&npdev->dev, "Using 64-bit DMA iommu bypass\n");
+ else
+ dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
+
+ pnv_npu_dma_set_bypass(npe, bypass);
+ *npdev->dev.dma_mask = dma_mask;
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c
new file mode 100644
index 0000000..6f1214d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-kmsg.c
@@ -0,0 +1,75 @@
+/*
+ * kmsg dumper that ensures the OPAL console fully flushes panic messages
+ *
+ * Author: Russell Currey <ruscur@russell.cc>
+ *
+ * Copyright 2015 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kmsg_dump.h>
+
+#include <asm/opal.h>
+#include <asm/opal-api.h>
+
+/*
+ * Console output is controlled by OPAL firmware. The kernel regularly calls
+ * OPAL_POLL_EVENTS, which flushes some console output. In a panic state,
+ * however, the kernel no longer calls OPAL_POLL_EVENTS and the panic message
+ * may not be completely printed. This function does not actually dump the
+ * message, it just ensures that OPAL completely flushes the console buffer.
+ */
+static void force_opal_console_flush(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason)
+{
+ int i;
+ int64_t ret;
+
+ /*
+ * Outside of a panic context the pollers will continue to run,
+ * so we don't need to do any special flushing.
+ */
+ if (reason != KMSG_DUMP_PANIC)
+ return;
+
+ if (opal_check_token(OPAL_CONSOLE_FLUSH)) {
+ ret = opal_console_flush(0);
+
+ if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER)
+ return;
+
+ /* Incrementally flush until there's nothing left */
+ while (opal_console_flush(0) != OPAL_SUCCESS);
+ } else {
+ /*
+ * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
+ * the console can still be flushed by calling the polling
+ * function enough times to flush the buffer. We don't know
+ * how much output still needs to be flushed, but we can be
+ * generous since the kernel is in panic and doesn't need
+ * to do much else.
+ */
+ printk(KERN_NOTICE "opal: OPAL_CONSOLE_FLUSH missing.\n");
+ for (i = 0; i < 1024; i++) {
+ opal_poll_events(NULL);
+ }
+ }
+}
+
+static struct kmsg_dumper opal_kmsg_dumper = {
+ .dump = force_opal_console_flush
+};
+
+void __init opal_kmsg_init(void)
+{
+ int rc;
+
+ /* Add our dumper to the list */
+ rc = kmsg_dump_register(&opal_kmsg_dumper);
+ if (rc != 0)
+ pr_err("opal: kmsg_dump_register failed; returned %d\n", rc);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 4ece8e4..e315e70 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -434,7 +434,6 @@ static const struct of_device_id opal_prd_match[] = {
static struct platform_driver opal_prd_driver = {
.driver = {
.name = "opal-prd",
- .owner = THIS_MODULE,
.of_match_table = opal_prd_match,
},
.probe = opal_prd_probe,
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index 37dbee1..f886886 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -31,8 +31,7 @@ static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm)
tm->tm_hour = bcd2bin((h_m_s_ms >> 56) & 0xff);
tm->tm_min = bcd2bin((h_m_s_ms >> 48) & 0xff);
tm->tm_sec = bcd2bin((h_m_s_ms >> 40) & 0xff);
-
- GregorianDay(tm);
+ tm->tm_wday = -1;
}
unsigned long __init opal_get_boot_time(void)
@@ -51,7 +50,7 @@ unsigned long __init opal_get_boot_time(void)
rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
- else
+ else if (rc == OPAL_BUSY)
mdelay(10);
}
if (rc != OPAL_SUCCESS)
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index b7a464f..e45b88a 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -301,3 +301,4 @@ OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE);
OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG);
OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR);
OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR);
+OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 7634d1c..d0ac535 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -126,7 +126,7 @@ static const struct scom_controller opal_scom_controller = {
static int opal_xscom_init(void)
{
- if (firmware_has_feature(FW_FEATURE_OPALv3))
+ if (firmware_has_feature(FW_FEATURE_OPAL))
scom_init(&opal_scom_controller);
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 57cffb8..4e0da5a 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -98,16 +98,11 @@ int __init early_init_dt_scan_opal(unsigned long node,
pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
opal.size, sizep, runtimesz);
- powerpc_firmware_features |= FW_FEATURE_OPAL;
if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
- powerpc_firmware_features |= FW_FEATURE_OPALv2;
- powerpc_firmware_features |= FW_FEATURE_OPALv3;
- pr_info("OPAL V3 detected !\n");
- } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
- powerpc_firmware_features |= FW_FEATURE_OPALv2;
- pr_info("OPAL V2 detected !\n");
+ powerpc_firmware_features |= FW_FEATURE_OPAL;
+ pr_info("OPAL detected !\n");
} else {
- pr_info("OPAL V1 detected !\n");
+ panic("OPAL != V3 detected, no longer supported.\n");
}
/* Reinit all cores with the right endian */
@@ -352,17 +347,15 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
* enough room and be done with it
*/
spin_lock_irqsave(&opal_write_lock, flags);
- if (firmware_has_feature(FW_FEATURE_OPALv2)) {
- rc = opal_console_write_buffer_space(vtermno, &olen);
- len = be64_to_cpu(olen);
- if (rc || len < total_len) {
- spin_unlock_irqrestore(&opal_write_lock, flags);
- /* Closed -> drop characters */
- if (rc)
- return total_len;
- opal_poll_events(NULL);
- return -EAGAIN;
- }
+ rc = opal_console_write_buffer_space(vtermno, &olen);
+ len = be64_to_cpu(olen);
+ if (rc || len < total_len) {
+ spin_unlock_irqrestore(&opal_write_lock, flags);
+ /* Closed -> drop characters */
+ if (rc)
+ return total_len;
+ opal_poll_events(NULL);
+ return -EAGAIN;
}
/* We still try to handle partial completions, though they
@@ -555,7 +548,7 @@ bool opal_mce_check_early_recovery(struct pt_regs *regs)
goto out;
if ((regs->nip >= opal.base) &&
- (regs->nip <= (opal.base + opal.size)))
+ (regs->nip < (opal.base + opal.size)))
recover_addr = find_recovery_address(regs->nip);
/*
@@ -696,10 +689,7 @@ static int __init opal_init(void)
}
/* Register OPAL consoles if any ports */
- if (firmware_has_feature(FW_FEATURE_OPALv2))
- consoles = of_find_node_by_path("/ibm,opal/consoles");
- else
- consoles = of_node_get(opal_node);
+ consoles = of_find_node_by_path("/ibm,opal/consoles");
if (consoles) {
for_each_child_of_node(consoles, np) {
if (strcmp(np->name, "serial"))
@@ -758,6 +748,9 @@ static int __init opal_init(void)
opal_pdev_init(opal_node, "ibm,opal-flash");
opal_pdev_init(opal_node, "ibm,opal-prd");
+ /* Initialise OPAL kmsg dumper for flushing console on panic */
+ opal_kmsg_init();
+
return 0;
}
machine_subsys_initcall(powernv, opal_init);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 414fd1a..573ae19 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -116,16 +116,6 @@ static int __init iommu_setup(char *str)
}
early_param("iommu", iommu_setup);
-/*
- * stdcix is only supposed to be used in hypervisor real mode as per
- * the architecture spec
- */
-static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
-{
- __asm__ __volatile__("stdcix %0,0,%1"
- : : "r" (val), "r" (paddr) : "memory");
-}
-
static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
{
return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
@@ -344,7 +334,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
return;
}
- if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
+ if (!firmware_has_feature(FW_FEATURE_OPAL)) {
pr_info(" Firmware too old to support M64 window\n");
return;
}
@@ -357,6 +347,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
}
res = &hose->mem_resources[1];
+ res->name = dn->full_name;
res->start = of_translate_address(dn, r + 2);
res->end = res->start + of_read_number(r + 4, 2) - 1;
res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
@@ -780,8 +771,12 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
return -ENXIO;
}
- /* Configure PELTV */
- pnv_ioda_set_peltv(phb, pe, true);
+ /*
+ * Configure PELTV. NPUs don't have a PELTV table so skip
+ * configuration on them.
+ */
+ if (phb->type != PNV_PHB_NPU)
+ pnv_ioda_set_peltv(phb, pe, true);
/* Setup reverse map */
for (rid = pe->rid; rid < rid_end; rid++)
@@ -924,7 +919,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
}
#endif /* CONFIG_PCI_IOV */
-#if 0
static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
@@ -941,11 +935,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
if (pdn->pe_number != IODA_INVALID_PE)
return NULL;
- /* PE#0 has been pre-set */
- if (dev->bus->number == 0)
- pe_num = 0;
- else
- pe_num = pnv_ioda_alloc_pe(phb);
+ pe_num = pnv_ioda_alloc_pe(phb);
if (pe_num == IODA_INVALID_PE) {
pr_warning("%s: Not enough PE# available, disabling device\n",
pci_name(dev));
@@ -963,6 +953,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
pci_dev_get(dev);
pdn->pcidev = dev;
pdn->pe_number = pe_num;
+ pe->flags = PNV_IODA_PE_DEV;
pe->pdev = dev;
pe->pbus = NULL;
pe->tce32_seg = -1;
@@ -993,7 +984,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
return pe;
}
-#endif /* Useful for SRIOV case */
static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
{
@@ -1007,6 +997,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
pci_name(dev));
continue;
}
+ pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
pe->dma_weight += pnv_ioda_dma_weight(dev);
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
@@ -1083,6 +1074,77 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
pnv_ioda_link_pe_by_weight(phb, pe);
}
+static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
+{
+ int pe_num, found_pe = false, rc;
+ long rid;
+ struct pnv_ioda_pe *pe;
+ struct pci_dev *gpu_pdev;
+ struct pci_dn *npu_pdn;
+ struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
+
+ /*
+ * Due to a hardware errata PE#0 on the NPU is reserved for
+ * error handling. This means we only have three PEs remaining
+ * which need to be assigned to four links, implying some
+ * links must share PEs.
+ *
+ * To achieve this we assign PEs such that NPUs linking the
+ * same GPU get assigned the same PE.
+ */
+ gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
+ for (pe_num = 0; pe_num < phb->ioda.total_pe; pe_num++) {
+ pe = &phb->ioda.pe_array[pe_num];
+ if (!pe->pdev)
+ continue;
+
+ if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) {
+ /*
+ * This device has the same peer GPU so should
+ * be assigned the same PE as the existing
+ * peer NPU.
+ */
+ dev_info(&npu_pdev->dev,
+ "Associating to existing PE %d\n", pe_num);
+ pci_dev_get(npu_pdev);
+ npu_pdn = pci_get_pdn(npu_pdev);
+ rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
+ npu_pdn->pcidev = npu_pdev;
+ npu_pdn->pe_number = pe_num;
+ pe->dma_weight += pnv_ioda_dma_weight(npu_pdev);
+ phb->ioda.pe_rmap[rid] = pe->pe_number;
+
+ /* Map the PE to this link */
+ rc = opal_pci_set_pe(phb->opal_id, pe_num, rid,
+ OpalPciBusAll,
+ OPAL_COMPARE_RID_DEVICE_NUMBER,
+ OPAL_COMPARE_RID_FUNCTION_NUMBER,
+ OPAL_MAP_PE);
+ WARN_ON(rc != OPAL_SUCCESS);
+ found_pe = true;
+ break;
+ }
+ }
+
+ if (!found_pe)
+ /*
+ * Could not find an existing PE so allocate a new
+ * one.
+ */
+ return pnv_ioda_setup_dev_PE(npu_pdev);
+ else
+ return pe;
+}
+
+static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
+{
+ struct pci_dev *pdev;
+
+ list_for_each_entry(pdev, &bus->devices, bus_list)
+ pnv_ioda_setup_npu_PE(pdev);
+}
+
static void pnv_ioda_setup_PEs(struct pci_bus *bus)
{
struct pci_dev *dev;
@@ -1119,7 +1181,17 @@ static void pnv_pci_ioda_setup_PEs(void)
if (phb->reserve_m64_pe)
phb->reserve_m64_pe(hose->bus, NULL, true);
- pnv_ioda_setup_PEs(hose->bus);
+ /*
+ * On NPU PHB, we expect separate PEs for individual PCI
+ * functions. PCI bus dependent PEs are required for the
+ * remaining types of PHBs.
+ */
+ if (phb->type == PNV_PHB_NPU) {
+ /* PE#0 is needed for error reporting */
+ pnv_ioda_reserve_pe(phb, 0);
+ pnv_ioda_setup_npu_PEs(hose->bus);
+ } else
+ pnv_ioda_setup_PEs(hose->bus);
}
}
@@ -1578,6 +1650,8 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
struct pnv_ioda_pe *pe;
uint64_t top;
bool bypass = false;
+ struct pci_dev *linked_npu_dev;
+ int i;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return -ENODEV;;
@@ -1596,6 +1670,18 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
set_dma_ops(&pdev->dev, &dma_iommu_ops);
}
*pdev->dev.dma_mask = dma_mask;
+
+ /* Update peer npu devices */
+ if (pe->flags & PNV_IODA_PE_PEER)
+ for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
+ if (!pe->peers[i])
+ continue;
+
+ linked_npu_dev = pe->peers[i]->pdev;
+ if (dma_get_mask(&linked_npu_dev->dev) != dma_mask)
+ dma_set_mask(&linked_npu_dev->dev, dma_mask);
+ }
+
return 0;
}
@@ -1740,12 +1826,23 @@ static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
/* 01xb - invalidate TCEs that match the specified PE# */
unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
struct pnv_phb *phb = pe->phb;
+ struct pnv_ioda_pe *npe;
+ int i;
if (!phb->ioda.tce_inval_reg)
return;
mb(); /* Ensure above stores are visible */
__raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
+
+ if (pe->flags & PNV_IODA_PE_PEER)
+ for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
+ npe = pe->peers[i];
+ if (!npe || npe->phb->type != PNV_PHB_NPU)
+ continue;
+
+ pnv_npu_tce_invalidate_entire(npe);
+ }
}
static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
@@ -1780,15 +1877,28 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
struct iommu_table_group_link *tgl;
list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+ struct pnv_ioda_pe *npe;
struct pnv_ioda_pe *pe = container_of(tgl->table_group,
struct pnv_ioda_pe, table_group);
__be64 __iomem *invalidate = rm ?
(__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
pe->phb->ioda.tce_inval_reg;
+ int i;
pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm,
invalidate, tbl->it_page_shift,
index, npages);
+
+ if (pe->flags & PNV_IODA_PE_PEER)
+ /* Invalidate PEs using the same TCE table */
+ for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
+ npe = pe->peers[i];
+ if (!npe || npe->phb->type != PNV_PHB_NPU)
+ continue;
+
+ pnv_npu_tce_invalidate(npe, tbl, index,
+ npages, rm);
+ }
}
}
@@ -2436,10 +2546,17 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
pe->dma_weight, segs);
pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
- } else {
+ } else if (phb->type == PNV_PHB_IODA2) {
pe_info(pe, "Assign DMA32 space\n");
segs = 0;
pnv_pci_ioda2_setup_dma_pe(phb, pe);
+ } else if (phb->type == PNV_PHB_NPU) {
+ /*
+ * We initialise the DMA space for an NPU PHB
+ * after setup of the PHB is complete as we
+ * point the NPU TVT to the the same location
+ * as the PHB3 TVT.
+ */
}
remaining -= segs;
@@ -2881,6 +2998,11 @@ static void pnv_pci_ioda_setup_seg(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
phb = hose->private_data;
+
+ /* NPU PHB does not support IO or MMIO segmentation */
+ if (phb->type == PNV_PHB_NPU)
+ continue;
+
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
pnv_ioda_setup_pe_seg(hose, pe);
}
@@ -2920,6 +3042,27 @@ static void pnv_pci_ioda_create_dbgfs(void)
#endif /* CONFIG_DEBUG_FS */
}
+static void pnv_npu_ioda_fixup(void)
+{
+ bool enable_bypass;
+ struct pci_controller *hose, *tmp;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
+
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ phb = hose->private_data;
+ if (phb->type != PNV_PHB_NPU)
+ continue;
+
+ list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
+ enable_bypass = dma_get_mask(&pe->pdev->dev) ==
+ DMA_BIT_MASK(64);
+ pnv_npu_init_dma_pe(pe);
+ pnv_npu_dma_set_bypass(pe, enable_bypass);
+ }
+ }
+}
+
static void pnv_pci_ioda_fixup(void)
{
pnv_pci_ioda_setup_PEs();
@@ -2932,6 +3075,9 @@ static void pnv_pci_ioda_fixup(void)
eeh_init();
eeh_addr_cache_build();
#endif
+
+ /* Link NPU IODA tables to their PCI devices. */
+ pnv_npu_ioda_fixup();
}
/*
@@ -3046,6 +3192,19 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.shutdown = pnv_pci_ioda_shutdown,
};
+static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
+ .dma_dev_setup = pnv_pci_dma_dev_setup,
+#ifdef CONFIG_PCI_MSI
+ .setup_msi_irqs = pnv_setup_msi_irqs,
+ .teardown_msi_irqs = pnv_teardown_msi_irqs,
+#endif
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .dma_set_mask = pnv_npu_dma_set_mask,
+ .shutdown = pnv_pci_ioda_shutdown,
+};
+
static void __init pnv_pci_init_ioda_phb(struct device_node *np,
u64 hub_id, int ioda_type)
{
@@ -3101,6 +3260,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->model = PNV_PHB_MODEL_P7IOC;
else if (of_device_is_compatible(np, "ibm,power8-pciex"))
phb->model = PNV_PHB_MODEL_PHB3;
+ else if (of_device_is_compatible(np, "ibm,power8-npu-pciex"))
+ phb->model = PNV_PHB_MODEL_NPU;
else
phb->model = PNV_PHB_MODEL_UNKNOWN;
@@ -3201,7 +3362,11 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
* the child P2P bridges) can form individual PE.
*/
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
- hose->controller_ops = pnv_pci_ioda_controller_ops;
+
+ if (phb->type == PNV_PHB_NPU)
+ hose->controller_ops = pnv_npu_ioda_controller_ops;
+ else
+ hose->controller_ops = pnv_pci_ioda_controller_ops;
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
@@ -3236,6 +3401,11 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
}
+void __init pnv_pci_init_npu_phb(struct device_node *np)
+{
+ pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU);
+}
+
void __init pnv_pci_init_ioda_hub(struct device_node *np)
{
struct device_node *phbn;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index f2dd772..2f55c86 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -1,8 +1,6 @@
/*
* Support PCI/PCIe on PowerNV platforms
*
- * Currently supports only P5IOC2
- *
* Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
*
* This program is free software; you can redistribute it and/or
@@ -807,6 +805,10 @@ void __init pnv_pci_init(void)
for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
pnv_pci_init_ioda2_phb(np);
+ /* Look for NPU PHBs */
+ for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb")
+ pnv_pci_init_npu_phb(np);
+
/* Setup the linkage between OF nodes and PHBs */
pci_devs_phb_init();
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index c8ff50e..7f56313 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -7,6 +7,7 @@ enum pnv_phb_type {
PNV_PHB_P5IOC2 = 0,
PNV_PHB_IODA1 = 1,
PNV_PHB_IODA2 = 2,
+ PNV_PHB_NPU = 3,
};
/* Precise PHB model for error management */
@@ -15,6 +16,7 @@ enum pnv_phb_model {
PNV_PHB_MODEL_P5IOC2,
PNV_PHB_MODEL_P7IOC,
PNV_PHB_MODEL_PHB3,
+ PNV_PHB_MODEL_NPU,
};
#define PNV_PCI_DIAG_BUF_SIZE 8192
@@ -24,6 +26,7 @@ enum pnv_phb_model {
#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */
#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */
+#define PNV_IODA_PE_PEER (1 << 6) /* PE has peers */
/* Data associated with a PE, including IOMMU tracking etc.. */
struct pnv_phb;
@@ -31,6 +34,9 @@ struct pnv_ioda_pe {
unsigned long flags;
struct pnv_phb *phb;
+#define PNV_IODA_MAX_PEER_PES 8
+ struct pnv_ioda_pe *peers[PNV_IODA_MAX_PEER_PES];
+
/* A PE can be associated with a single device or an
* entire bus (& children). In the former case, pdev
* is populated, in the later case, pbus is.
@@ -229,6 +235,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
+extern void pnv_pci_init_npu_phb(struct device_node *np);
extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
__be64 *startp, __be64 *endp, bool rm);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
@@ -238,4 +245,16 @@ extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
+/* Nvlink functions */
+extern void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe);
+extern void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
+ struct iommu_table *tbl,
+ unsigned long index,
+ unsigned long npages,
+ bool rm);
+extern void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe);
+extern void pnv_npu_setup_dma_pe(struct pnv_ioda_pe *npe);
+extern int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enabled);
+extern int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask);
+
#endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index a9a8fa3..1acb0c7 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -90,12 +90,8 @@ static void pnv_show_cpuinfo(struct seq_file *m)
if (root)
model = of_get_property(root, "model", NULL);
seq_printf(m, "machine\t\t: PowerNV %s\n", model);
- if (firmware_has_feature(FW_FEATURE_OPALv3))
- seq_printf(m, "firmware\t: OPAL v3\n");
- else if (firmware_has_feature(FW_FEATURE_OPALv2))
- seq_printf(m, "firmware\t: OPAL v2\n");
- else if (firmware_has_feature(FW_FEATURE_OPAL))
- seq_printf(m, "firmware\t: OPAL v1\n");
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ seq_printf(m, "firmware\t: OPAL\n");
else
seq_printf(m, "firmware\t: BML\n");
of_node_put(root);
@@ -224,9 +220,9 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
{
xics_kexec_teardown_cpu(secondary);
- /* On OPAL v3, we return all CPUs to firmware */
+ /* On OPAL, we return all CPUs to firmware */
- if (!firmware_has_feature(FW_FEATURE_OPALv3))
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
return;
if (secondary) {
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index ca26483..ad7b1a3 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -61,14 +61,15 @@ static int pnv_smp_kick_cpu(int nr)
unsigned long start_here =
__pa(ppc_function_entry(generic_secondary_smp_init));
long rc;
+ uint8_t status;
BUG_ON(nr < 0 || nr >= NR_CPUS);
/*
- * If we already started or OPALv2 is not supported, we just
+ * If we already started or OPAL is not supported, we just
* kick the CPU via the PACA
*/
- if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv2))
+ if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
goto kick;
/*
@@ -77,55 +78,42 @@ static int pnv_smp_kick_cpu(int nr)
* first time. OPAL v3 allows us to query OPAL to know if it
* has the CPUs, so we do that
*/
- if (firmware_has_feature(FW_FEATURE_OPALv3)) {
- uint8_t status;
-
- rc = opal_query_cpu_status(pcpu, &status);
- if (rc != OPAL_SUCCESS) {
- pr_warn("OPAL Error %ld querying CPU %d state\n",
- rc, nr);
- return -ENODEV;
- }
+ rc = opal_query_cpu_status(pcpu, &status);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr);
+ return -ENODEV;
+ }
- /*
- * Already started, just kick it, probably coming from
- * kexec and spinning
- */
- if (status == OPAL_THREAD_STARTED)
- goto kick;
+ /*
+ * Already started, just kick it, probably coming from
+ * kexec and spinning
+ */
+ if (status == OPAL_THREAD_STARTED)
+ goto kick;
- /*
- * Available/inactive, let's kick it
- */
- if (status == OPAL_THREAD_INACTIVE) {
- pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n",
- nr, pcpu);
- rc = opal_start_cpu(pcpu, start_here);
- if (rc != OPAL_SUCCESS) {
- pr_warn("OPAL Error %ld starting CPU %d\n",
- rc, nr);
- return -ENODEV;
- }
- } else {
- /*
- * An unavailable CPU (or any other unknown status)
- * shouldn't be started. It should also
- * not be in the possible map but currently it can
- * happen
- */
- pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
- " (status %d)...\n", nr, pcpu, status);
+ /*
+ * Available/inactive, let's kick it
+ */
+ if (status == OPAL_THREAD_INACTIVE) {
+ pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu);
+ rc = opal_start_cpu(pcpu, start_here);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("OPAL Error %ld starting CPU %d\n", rc, nr);
return -ENODEV;
}
} else {
/*
- * On OPAL v2, we just kick it and hope for the best,
- * we must not test the error from opal_start_cpu() or
- * we would fail to get CPUs from kexec.
+ * An unavailable CPU (or any other unknown status)
+ * shouldn't be started. It should also
+ * not be in the possible map but currently it can
+ * happen
*/
- opal_start_cpu(pcpu, start_here);
+ pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
+ " (status %d)...\n", nr, pcpu, status);
+ return -ENODEV;
}
- kick:
+
+kick:
return smp_generic_kick_cpu(nr);
}
OpenPOWER on IntegriCloud