summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c14
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c2
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c4
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c66
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c589
-rw-r--r--arch/powerpc/platforms/pseries/msi.c18
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c279
-rw-r--r--arch/powerpc/platforms/pseries/offline_states.h2
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c2
-rw-r--r--arch/powerpc/platforms/pseries/setup.c21
-rw-r--r--arch/powerpc/platforms/pseries/smp.c21
-rw-r--r--arch/powerpc/platforms/pseries/xics.c140
13 files changed, 984 insertions, 176 deletions
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index f480386..3cafc30 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -508,12 +508,7 @@ static int cmm_memory_isolate_cb(struct notifier_block *self,
if (action == MEM_ISOLATE_COUNT)
ret = cmm_count_pages(arg);
- if (ret)
- ret = notifier_from_errno(ret);
- else
- ret = NOTIFY_OK;
-
- return ret;
+ return notifier_from_errno(ret);
}
static struct notifier_block cmm_mem_isolate_nb = {
@@ -635,12 +630,7 @@ static int cmm_memory_cb(struct notifier_block *self,
break;
}
- if (ret)
- ret = notifier_from_errno(ret);
- else
- ret = NOTIFY_OK;
-
- return ret;
+ return notifier_from_errno(ret);
}
static struct notifier_block cmm_mem_nb = {
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index b74a923..57ceb92 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -74,7 +74,7 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa)
return NULL;
/* The configure connector reported name does not contain a
- * preceeding '/', so we allocate a buffer large enough to
+ * preceding '/', so we allocate a buffer large enough to
* prepend this to the full_name.
*/
name = (char *)ccwa + ccwa->name_offset;
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 17a11c8..8964917 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -65,7 +65,7 @@
* with EEH.
*
* Ideally, a PCI device driver, when suspecting that an isolation
- * event has occured (e.g. by reading 0xff's), will then ask EEH
+ * event has occurred (e.g. by reading 0xff's), will then ask EEH
* whether this is the case, and then take appropriate steps to
* reset the PCI slot, the PCI device, and then resume operations.
* However, until that day, the checking is done here, with the
@@ -876,7 +876,7 @@ void eeh_restore_bars(struct pci_dn *pdn)
*
* Save the values of the device bars. Unlike the restore
* routine, this routine is *not* recursive. This is because
- * PCI devices are added individuallly; but, for the restore,
+ * PCI devices are added individually; but, for the restore,
* an entire slot is reset at a time.
*/
static void eeh_save_bars(struct pci_dn *pdn)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index fd50ccd..ef8c454 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -216,7 +216,7 @@ static void pseries_cpu_die(unsigned int cpu)
cpu, pcpu, cpu_status);
}
- /* Isolation and deallocation are definatly done by
+ /* Isolation and deallocation are definitely done by
* drslot_chrp_cpu. If they were not they would be
* done here. Change isolate state to Isolate and
* change allocation-state to Unusable.
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index bc88036..33867ec 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -17,6 +17,54 @@
#include <asm/pSeries_reconfig.h>
#include <asm/sparsemem.h>
+static unsigned long get_memblock_size(void)
+{
+ struct device_node *np;
+ unsigned int memblock_size = 0;
+
+ np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (np) {
+ const unsigned long *size;
+
+ size = of_get_property(np, "ibm,lmb-size", NULL);
+ memblock_size = size ? *size : 0;
+
+ of_node_put(np);
+ } else {
+ unsigned int memzero_size = 0;
+ const unsigned int *regs;
+
+ np = of_find_node_by_path("/memory@0");
+ if (np) {
+ regs = of_get_property(np, "reg", NULL);
+ memzero_size = regs ? regs[3] : 0;
+ of_node_put(np);
+ }
+
+ if (memzero_size) {
+ /* We now know the size of memory@0, use this to find
+ * the first memoryblock and get its size.
+ */
+ char buf[64];
+
+ sprintf(buf, "/memory@%x", memzero_size);
+ np = of_find_node_by_path(buf);
+ if (np) {
+ regs = of_get_property(np, "reg", NULL);
+ memblock_size = regs ? regs[3] : 0;
+ of_node_put(np);
+ }
+ }
+ }
+
+ return memblock_size;
+}
+
+unsigned long memory_block_size_bytes(void)
+{
+ return get_memblock_size();
+}
+
static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
{
unsigned long start, start_pfn;
@@ -127,30 +175,22 @@ static int pseries_add_memory(struct device_node *np)
static int pseries_drconf_memory(unsigned long *base, unsigned int action)
{
- struct device_node *np;
- const unsigned long *lmb_size;
+ unsigned long memblock_size;
int rc;
- np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (!np)
+ memblock_size = get_memblock_size();
+ if (!memblock_size)
return -EINVAL;
- lmb_size = of_get_property(np, "ibm,lmb-size", NULL);
- if (!lmb_size) {
- of_node_put(np);
- return -EINVAL;
- }
-
if (action == PSERIES_DRCONF_MEM_ADD) {
- rc = memblock_add(*base, *lmb_size);
+ rc = memblock_add(*base, memblock_size);
rc = (rc < 0) ? -EINVAL : 0;
} else if (action == PSERIES_DRCONF_MEM_REMOVE) {
- rc = pseries_remove_memblock(*base, *lmb_size);
+ rc = pseries_remove_memblock(*base, memblock_size);
} else {
rc = -EINVAL;
}
- of_node_put(np);
return rc;
}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index edea60b..6d5412a 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -33,6 +33,7 @@
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/crash_dump.h>
+#include <linux/memory.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -45,6 +46,7 @@
#include <asm/tce.h>
#include <asm/ppc-pci.h>
#include <asm/udbg.h>
+#include <asm/mmzone.h>
#include "plpar_wrappers.h"
@@ -270,6 +272,152 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
return tce_ret;
}
+/* this is compatible with cells for the device tree property */
+struct dynamic_dma_window_prop {
+ __be32 liobn; /* tce table number */
+ __be64 dma_base; /* address hi,lo */
+ __be32 tce_shift; /* ilog2(tce_page_size) */
+ __be32 window_shift; /* ilog2(tce_window_size) */
+};
+
+struct direct_window {
+ struct device_node *device;
+ const struct dynamic_dma_window_prop *prop;
+ struct list_head list;
+};
+
+/* Dynamic DMA Window support */
+struct ddw_query_response {
+ u32 windows_available;
+ u32 largest_available_block;
+ u32 page_size;
+ u32 migration_capable;
+};
+
+struct ddw_create_response {
+ u32 liobn;
+ u32 addr_hi;
+ u32 addr_lo;
+};
+
+static LIST_HEAD(direct_window_list);
+/* prevents races between memory on/offline and window creation */
+static DEFINE_SPINLOCK(direct_window_list_lock);
+/* protects initializing window twice for same device */
+static DEFINE_MUTEX(direct_window_init_mutex);
+#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
+
+static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
+ unsigned long num_pfn, const void *arg)
+{
+ const struct dynamic_dma_window_prop *maprange = arg;
+ int rc;
+ u64 tce_size, num_tce, dma_offset, next;
+ u32 tce_shift;
+ long limit;
+
+ tce_shift = be32_to_cpu(maprange->tce_shift);
+ tce_size = 1ULL << tce_shift;
+ next = start_pfn << PAGE_SHIFT;
+ num_tce = num_pfn << PAGE_SHIFT;
+
+ /* round back to the beginning of the tce page size */
+ num_tce += next & (tce_size - 1);
+ next &= ~(tce_size - 1);
+
+ /* covert to number of tces */
+ num_tce |= tce_size - 1;
+ num_tce >>= tce_shift;
+
+ do {
+ /*
+ * Set up the page with TCE data, looping through and setting
+ * the values.
+ */
+ limit = min_t(long, num_tce, 512);
+ dma_offset = next + be64_to_cpu(maprange->dma_base);
+
+ rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn),
+ dma_offset,
+ 0, limit);
+ num_tce -= limit;
+ } while (num_tce > 0 && !rc);
+
+ return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+ unsigned long num_pfn, const void *arg)
+{
+ const struct dynamic_dma_window_prop *maprange = arg;
+ u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn;
+ u32 tce_shift;
+ u64 rc = 0;
+ long l, limit;
+
+ local_irq_disable(); /* to protect tcep and the page behind it */
+ tcep = __get_cpu_var(tce_page);
+
+ if (!tcep) {
+ tcep = (u64 *)__get_free_page(GFP_ATOMIC);
+ if (!tcep) {
+ local_irq_enable();
+ return -ENOMEM;
+ }
+ __get_cpu_var(tce_page) = tcep;
+ }
+
+ proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
+
+ liobn = (u64)be32_to_cpu(maprange->liobn);
+ tce_shift = be32_to_cpu(maprange->tce_shift);
+ tce_size = 1ULL << tce_shift;
+ next = start_pfn << PAGE_SHIFT;
+ num_tce = num_pfn << PAGE_SHIFT;
+
+ /* round back to the beginning of the tce page size */
+ num_tce += next & (tce_size - 1);
+ next &= ~(tce_size - 1);
+
+ /* covert to number of tces */
+ num_tce |= tce_size - 1;
+ num_tce >>= tce_shift;
+
+ /* We can map max one pageful of TCEs at a time */
+ do {
+ /*
+ * Set up the page with TCE data, looping through and setting
+ * the values.
+ */
+ limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE);
+ dma_offset = next + be64_to_cpu(maprange->dma_base);
+
+ for (l = 0; l < limit; l++) {
+ tcep[l] = proto_tce | next;
+ next += tce_size;
+ }
+
+ rc = plpar_tce_put_indirect(liobn,
+ dma_offset,
+ (u64)virt_to_abs(tcep),
+ limit);
+
+ num_tce -= limit;
+ } while (num_tce > 0 && !rc);
+
+ /* error cleanup: caller will clear whole range */
+
+ local_irq_enable();
+ return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
+ unsigned long num_pfn, void *arg)
+{
+ return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
+}
+
+
#ifdef CONFIG_PCI
static void iommu_table_setparms(struct pci_controller *phb,
struct device_node *dn,
@@ -495,6 +643,329 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
pci_name(dev));
}
+static int __read_mostly disable_ddw;
+
+static int __init disable_ddw_setup(char *str)
+{
+ disable_ddw = 1;
+ printk(KERN_INFO "ppc iommu: disabling ddw.\n");
+
+ return 0;
+}
+
+early_param("disable_ddw", disable_ddw_setup);
+
+static void remove_ddw(struct device_node *np)
+{
+ struct dynamic_dma_window_prop *dwp;
+ struct property *win64;
+ const u32 *ddr_avail;
+ u64 liobn;
+ int len, ret;
+
+ ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len);
+ win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
+ if (!win64 || !ddr_avail || len < 3 * sizeof(u32))
+ return;
+
+ dwp = win64->value;
+ liobn = (u64)be32_to_cpu(dwp->liobn);
+
+ /* clear the whole window, note the arg is in kernel pages */
+ ret = tce_clearrange_multi_pSeriesLP(0,
+ 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
+ if (ret)
+ pr_warning("%s failed to clear tces in window.\n",
+ np->full_name);
+ else
+ pr_debug("%s successfully cleared tces in window.\n",
+ np->full_name);
+
+ ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn);
+ if (ret)
+ pr_warning("%s: failed to remove direct window: rtas returned "
+ "%d to ibm,remove-pe-dma-window(%x) %llx\n",
+ np->full_name, ret, ddr_avail[2], liobn);
+ else
+ pr_debug("%s: successfully removed direct window: rtas returned "
+ "%d to ibm,remove-pe-dma-window(%x) %llx\n",
+ np->full_name, ret, ddr_avail[2], liobn);
+}
+
+
+static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
+{
+ struct device_node *dn;
+ struct pci_dn *pcidn;
+ struct direct_window *window;
+ const struct dynamic_dma_window_prop *direct64;
+ u64 dma_addr = 0;
+
+ dn = pci_device_to_OF_node(dev);
+ pcidn = PCI_DN(dn);
+ spin_lock(&direct_window_list_lock);
+ /* check if we already created a window and dupe that config if so */
+ list_for_each_entry(window, &direct_window_list, list) {
+ if (window->device == pdn) {
+ direct64 = window->prop;
+ dma_addr = direct64->dma_base;
+ break;
+ }
+ }
+ spin_unlock(&direct_window_list_lock);
+
+ return dma_addr;
+}
+
+static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn)
+{
+ struct device_node *dn;
+ struct pci_dn *pcidn;
+ int len;
+ struct direct_window *window;
+ const struct dynamic_dma_window_prop *direct64;
+ u64 dma_addr = 0;
+
+ dn = pci_device_to_OF_node(dev);
+ pcidn = PCI_DN(dn);
+ direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
+ if (direct64) {
+ window = kzalloc(sizeof(*window), GFP_KERNEL);
+ if (!window) {
+ remove_ddw(pdn);
+ } else {
+ window->device = pdn;
+ window->prop = direct64;
+ spin_lock(&direct_window_list_lock);
+ list_add(&window->list, &direct_window_list);
+ spin_unlock(&direct_window_list_lock);
+ dma_addr = direct64->dma_base;
+ }
+ }
+
+ return dma_addr;
+}
+
+static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+ struct ddw_query_response *query)
+{
+ struct device_node *dn;
+ struct pci_dn *pcidn;
+ u32 cfg_addr;
+ u64 buid;
+ int ret;
+
+ /*
+ * Get the config address and phb buid of the PE window.
+ * Rely on eeh to retrieve this for us.
+ * Retrieve them from the pci device, not the node with the
+ * dma-window property
+ */
+ dn = pci_device_to_OF_node(dev);
+ pcidn = PCI_DN(dn);
+ cfg_addr = pcidn->eeh_config_addr;
+ if (pcidn->eeh_pe_config_addr)
+ cfg_addr = pcidn->eeh_pe_config_addr;
+ buid = pcidn->phb->buid;
+ ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query,
+ cfg_addr, BUID_HI(buid), BUID_LO(buid));
+ dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
+ " returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), ret);
+ return ret;
+}
+
+static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+ struct ddw_create_response *create, int page_shift,
+ int window_shift)
+{
+ struct device_node *dn;
+ struct pci_dn *pcidn;
+ u32 cfg_addr;
+ u64 buid;
+ int ret;
+
+ /*
+ * Get the config address and phb buid of the PE window.
+ * Rely on eeh to retrieve this for us.
+ * Retrieve them from the pci device, not the node with the
+ * dma-window property
+ */
+ dn = pci_device_to_OF_node(dev);
+ pcidn = PCI_DN(dn);
+ cfg_addr = pcidn->eeh_config_addr;
+ if (pcidn->eeh_pe_config_addr)
+ cfg_addr = pcidn->eeh_pe_config_addr;
+ buid = pcidn->phb->buid;
+
+ do {
+ /* extra outputs are LIOBN and dma-addr (hi, lo) */
+ ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr,
+ BUID_HI(buid), BUID_LO(buid), page_shift, window_shift);
+ } while (rtas_busy_delay(ret));
+ dev_info(&dev->dev,
+ "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
+ "(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1],
+ cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
+ window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
+
+ return ret;
+}
+
+/*
+ * If the PE supports dynamic dma windows, and there is space for a table
+ * that can map all pages in a linear offset, then setup such a table,
+ * and record the dma-offset in the struct device.
+ *
+ * dev: the pci device we are checking
+ * pdn: the parent pe node with the ibm,dma_window property
+ * Future: also check if we can remap the base window for our base page size
+ *
+ * returns the dma offset for use by dma_set_mask
+ */
+static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
+{
+ int len, ret;
+ struct ddw_query_response query;
+ struct ddw_create_response create;
+ int page_shift;
+ u64 dma_addr, max_addr;
+ struct device_node *dn;
+ const u32 *uninitialized_var(ddr_avail);
+ struct direct_window *window;
+ struct property *uninitialized_var(win64);
+ struct dynamic_dma_window_prop *ddwprop;
+
+ mutex_lock(&direct_window_init_mutex);
+
+ dma_addr = dupe_ddw_if_already_created(dev, pdn);
+ if (dma_addr != 0)
+ goto out_unlock;
+
+ dma_addr = dupe_ddw_if_kexec(dev, pdn);
+ if (dma_addr != 0)
+ goto out_unlock;
+
+ /*
+ * the ibm,ddw-applicable property holds the tokens for:
+ * ibm,query-pe-dma-window
+ * ibm,create-pe-dma-window
+ * ibm,remove-pe-dma-window
+ * for the given node in that order.
+ * the property is actually in the parent, not the PE
+ */
+ ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
+ if (!ddr_avail || len < 3 * sizeof(u32))
+ goto out_unlock;
+
+ /*
+ * Query if there is a second window of size to map the
+ * whole partition. Query returns number of windows, largest
+ * block assigned to PE (partition endpoint), and two bitmasks
+ * of page sizes: supported and supported for migrate-dma.
+ */
+ dn = pci_device_to_OF_node(dev);
+ ret = query_ddw(dev, ddr_avail, &query);
+ if (ret != 0)
+ goto out_unlock;
+
+ if (query.windows_available == 0) {
+ /*
+ * no additional windows are available for this device.
+ * We might be able to reallocate the existing window,
+ * trading in for a larger page size.
+ */
+ dev_dbg(&dev->dev, "no free dynamic windows");
+ goto out_unlock;
+ }
+ if (query.page_size & 4) {
+ page_shift = 24; /* 16MB */
+ } else if (query.page_size & 2) {
+ page_shift = 16; /* 64kB */
+ } else if (query.page_size & 1) {
+ page_shift = 12; /* 4kB */
+ } else {
+ dev_dbg(&dev->dev, "no supported direct page size in mask %x",
+ query.page_size);
+ goto out_unlock;
+ }
+ /* verify the window * number of ptes will map the partition */
+ /* check largest block * page size > max memory hotplug addr */
+ max_addr = memory_hotplug_max();
+ if (query.largest_available_block < (max_addr >> page_shift)) {
+ dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u "
+ "%llu-sized pages\n", max_addr, query.largest_available_block,
+ 1ULL << page_shift);
+ goto out_unlock;
+ }
+ len = order_base_2(max_addr);
+ win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
+ if (!win64) {
+ dev_info(&dev->dev,
+ "couldn't allocate property for 64bit dma window\n");
+ goto out_unlock;
+ }
+ win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
+ win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
+ if (!win64->name || !win64->value) {
+ dev_info(&dev->dev,
+ "couldn't allocate property name and value\n");
+ goto out_free_prop;
+ }
+
+ ret = create_ddw(dev, ddr_avail, &create, page_shift, len);
+ if (ret != 0)
+ goto out_free_prop;
+
+ ddwprop->liobn = cpu_to_be32(create.liobn);
+ ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2));
+ ddwprop->tce_shift = cpu_to_be32(page_shift);
+ ddwprop->window_shift = cpu_to_be32(len);
+
+ dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %s\n",
+ create.liobn, dn->full_name);
+
+ window = kzalloc(sizeof(*window), GFP_KERNEL);
+ if (!window)
+ goto out_clear_window;
+
+ ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
+ win64->value, tce_setrange_multi_pSeriesLP_walk);
+ if (ret) {
+ dev_info(&dev->dev, "failed to map direct window for %s: %d\n",
+ dn->full_name, ret);
+ goto out_clear_window;
+ }
+
+ ret = prom_add_property(pdn, win64);
+ if (ret) {
+ dev_err(&dev->dev, "unable to add dma window property for %s: %d",
+ pdn->full_name, ret);
+ goto out_clear_window;
+ }
+
+ window->device = pdn;
+ window->prop = ddwprop;
+ spin_lock(&direct_window_list_lock);
+ list_add(&window->list, &direct_window_list);
+ spin_unlock(&direct_window_list_lock);
+
+ dma_addr = of_read_number(&create.addr_hi, 2);
+ goto out_unlock;
+
+out_clear_window:
+ remove_ddw(pdn);
+
+out_free_prop:
+ kfree(win64->name);
+ kfree(win64->value);
+ kfree(win64);
+
+out_unlock:
+ mutex_unlock(&direct_window_init_mutex);
+ return dma_addr;
+}
+
static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
{
struct device_node *pdn, *dn;
@@ -505,7 +976,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
/* dev setup for LPAR is a little tricky, since the device tree might
- * contain the dma-window properties per-device and not neccesarily
+ * contain the dma-window properties per-device and not necessarily
* for the bus. So we need to search upwards in the tree until we
* either hit a dma-window property, OR find a parent with a table
* already allocated.
@@ -541,23 +1012,137 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
set_iommu_table_base(&dev->dev, pci->iommu_table);
}
+
+static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
+{
+ bool ddw_enabled = false;
+ struct device_node *pdn, *dn;
+ struct pci_dev *pdev;
+ const void *dma_window = NULL;
+ u64 dma_offset;
+
+ if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+ return -EIO;
+
+ /* only attempt to use a new window if 64-bit DMA is requested */
+ if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
+ pdev = to_pci_dev(dev);
+
+ dn = pci_device_to_OF_node(pdev);
+ dev_dbg(dev, "node is %s\n", dn->full_name);
+
+ /*
+ * the device tree might contain the dma-window properties
+ * per-device and not necessarily for the bus. So we need to
+ * search upwards in the tree until we either hit a dma-window
+ * property, OR find a parent with a table already allocated.
+ */
+ for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
+ pdn = pdn->parent) {
+ dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
+ if (dma_window)
+ break;
+ }
+ if (pdn && PCI_DN(pdn)) {
+ dma_offset = enable_ddw(pdev, pdn);
+ if (dma_offset != 0) {
+ dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset);
+ set_dma_offset(dev, dma_offset);
+ set_dma_ops(dev, &dma_direct_ops);
+ ddw_enabled = true;
+ }
+ }
+ }
+
+ /* fall-through to iommu ops */
+ if (!ddw_enabled) {
+ dev_info(dev, "Using 32-bit DMA via iommu\n");
+ set_dma_ops(dev, &dma_iommu_ops);
+ }
+
+ *dev->dma_mask = dma_mask;
+ return 0;
+}
+
#else /* CONFIG_PCI */
#define pci_dma_bus_setup_pSeries NULL
#define pci_dma_dev_setup_pSeries NULL
#define pci_dma_bus_setup_pSeriesLP NULL
#define pci_dma_dev_setup_pSeriesLP NULL
+#define dma_set_mask_pSeriesLP NULL
#endif /* !CONFIG_PCI */
+static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct direct_window *window;
+ struct memory_notify *arg = data;
+ int ret = 0;
+
+ switch (action) {
+ case MEM_GOING_ONLINE:
+ spin_lock(&direct_window_list_lock);
+ list_for_each_entry(window, &direct_window_list, list) {
+ ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
+ arg->nr_pages, window->prop);
+ /* XXX log error */
+ }
+ spin_unlock(&direct_window_list_lock);
+ break;
+ case MEM_CANCEL_ONLINE:
+ case MEM_OFFLINE:
+ spin_lock(&direct_window_list_lock);
+ list_for_each_entry(window, &direct_window_list, list) {
+ ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
+ arg->nr_pages, window->prop);
+ /* XXX log error */
+ }
+ spin_unlock(&direct_window_list_lock);
+ break;
+ default:
+ break;
+ }
+ if (ret && action != MEM_CANCEL_ONLINE)
+ return NOTIFY_BAD;
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block iommu_mem_nb = {
+ .notifier_call = iommu_mem_notifier,
+};
+
static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
{
int err = NOTIFY_OK;
struct device_node *np = node;
struct pci_dn *pci = PCI_DN(np);
+ struct direct_window *window;
switch (action) {
case PSERIES_RECONFIG_REMOVE:
if (pci && pci->iommu_table)
iommu_free_table(pci->iommu_table, np->full_name);
+
+ spin_lock(&direct_window_list_lock);
+ list_for_each_entry(window, &direct_window_list, list) {
+ if (window->device == np) {
+ list_del(&window->list);
+ kfree(window);
+ break;
+ }
+ }
+ spin_unlock(&direct_window_list_lock);
+
+ /*
+ * Because the notifier runs after isolation of the
+ * slot, we are guaranteed any DMA window has already
+ * been revoked and the TCEs have been marked invalid,
+ * so we don't need a call to remove_ddw(np). However,
+ * if an additional notifier action is added before the
+ * isolate call, we should update this code for
+ * completeness with such a call.
+ */
break;
default:
err = NOTIFY_DONE;
@@ -587,6 +1172,7 @@ void iommu_init_early_pSeries(void)
ppc_md.tce_get = tce_get_pSeriesLP;
ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
+ ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
} else {
ppc_md.tce_build = tce_build_pSeries;
ppc_md.tce_free = tce_free_pSeries;
@@ -597,6 +1183,7 @@ void iommu_init_early_pSeries(void)
pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
+ register_memory_notifier(&iommu_mem_nb);
set_pci_dma_ops(&dma_iommu_ops);
}
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 1164c34..38d24e7 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -93,8 +93,18 @@ static void rtas_disable_msi(struct pci_dev *pdev)
if (!pdn)
return;
- if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0)
- pr_debug("rtas_msi: Setting MSIs to 0 failed!\n");
+ /*
+ * disabling MSI with the explicit interface also disables MSI-X
+ */
+ if (rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, 0) != 0) {
+ /*
+ * may have failed because explicit interface is not
+ * present
+ */
+ if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0) {
+ pr_debug("rtas_msi: Setting MSIs to 0 failed!\n");
+ }
+ }
}
static int rtas_query_irq_number(struct pci_dn *pdn, int offset)
@@ -127,7 +137,7 @@ static void rtas_teardown_msi_irqs(struct pci_dev *pdev)
if (entry->irq == NO_IRQ)
continue;
- set_irq_msi(entry->irq, NULL);
+ irq_set_msi_desc(entry->irq, NULL);
irq_dispose_mapping(entry->irq);
}
@@ -427,7 +437,7 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
}
dev_dbg(&pdev->dev, "rtas_msi: allocated virq %d\n", virq);
- set_irq_msi(virq, entry);
+ irq_set_msi_desc(virq, entry);
/* Read config space back so we can restore after reset */
read_msi_msg(virq, &msg);
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 7e828ba..00cc3a0 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -16,6 +16,8 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/kmsg_dump.h>
#include <asm/uaccess.h>
#include <asm/nvram.h>
#include <asm/rtas.h>
@@ -30,17 +32,54 @@ static int nvram_fetch, nvram_store;
static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
static DEFINE_SPINLOCK(nvram_lock);
-static long nvram_error_log_index = -1;
-static long nvram_error_log_size = 0;
-
struct err_log_info {
int error_type;
unsigned int seq_num;
};
-#define NVRAM_MAX_REQ 2079
-#define NVRAM_MIN_REQ 1055
-#define NVRAM_LOG_PART_NAME "ibm,rtas-log"
+struct nvram_os_partition {
+ const char *name;
+ int req_size; /* desired size, in bytes */
+ int min_size; /* minimum acceptable size (0 means req_size) */
+ long size; /* size of data portion (excluding err_log_info) */
+ long index; /* offset of data portion of partition */
+};
+
+static struct nvram_os_partition rtas_log_partition = {
+ .name = "ibm,rtas-log",
+ .req_size = 2079,
+ .min_size = 1055,
+ .index = -1
+};
+
+static struct nvram_os_partition oops_log_partition = {
+ .name = "lnx,oops-log",
+ .req_size = 4000,
+ .min_size = 2000,
+ .index = -1
+};
+
+static const char *pseries_nvram_os_partitions[] = {
+ "ibm,rtas-log",
+ "lnx,oops-log",
+ NULL
+};
+
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason,
+ const char *old_msgs, unsigned long old_len,
+ const char *new_msgs, unsigned long new_len);
+
+static struct kmsg_dumper nvram_kmsg_dumper = {
+ .dump = oops_to_nvram
+};
+
+/* See clobbering_unread_rtas_event() */
+#define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */
+static unsigned long last_unread_rtas_event; /* timestamp */
+
+/* We preallocate oops_buf during init to avoid kmalloc during oops/panic. */
+static char *oops_buf;
static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
{
@@ -134,7 +173,7 @@ static ssize_t pSeries_nvram_get_size(void)
}
-/* nvram_write_error_log
+/* nvram_write_os_partition, nvram_write_error_log
*
* We need to buffer the error logs into nvram to ensure that we have
* the failure information to decode. If we have a severe error there
@@ -156,48 +195,58 @@ static ssize_t pSeries_nvram_get_size(void)
* The 'data' section would look like (in bytes):
* +--------------+------------+-----------------------------------+
* | event_logged | sequence # | error log |
- * |0 3|4 7|8 nvram_error_log_size-1|
+ * |0 3|4 7|8 error_log_size-1|
* +--------------+------------+-----------------------------------+
*
* event_logged: 0 if event has not been logged to syslog, 1 if it has
* sequence #: The unique sequence # for each event. (until it wraps)
* error log: The error log from event_scan
*/
-int nvram_write_error_log(char * buff, int length,
- unsigned int err_type, unsigned int error_log_cnt)
+int nvram_write_os_partition(struct nvram_os_partition *part, char * buff,
+ int length, unsigned int err_type, unsigned int error_log_cnt)
{
int rc;
loff_t tmp_index;
struct err_log_info info;
- if (nvram_error_log_index == -1) {
+ if (part->index == -1) {
return -ESPIPE;
}
- if (length > nvram_error_log_size) {
- length = nvram_error_log_size;
+ if (length > part->size) {
+ length = part->size;
}
info.error_type = err_type;
info.seq_num = error_log_cnt;
- tmp_index = nvram_error_log_index;
+ tmp_index = part->index;
rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
if (rc <= 0) {
- printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+ pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
return rc;
}
rc = ppc_md.nvram_write(buff, length, &tmp_index);
if (rc <= 0) {
- printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+ pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
return rc;
}
return 0;
}
+int nvram_write_error_log(char * buff, int length,
+ unsigned int err_type, unsigned int error_log_cnt)
+{
+ int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
+ err_type, error_log_cnt);
+ if (!rc)
+ last_unread_rtas_event = get_seconds();
+ return rc;
+}
+
/* nvram_read_error_log
*
* Reads nvram for error log for at most 'length'
@@ -209,13 +258,13 @@ int nvram_read_error_log(char * buff, int length,
loff_t tmp_index;
struct err_log_info info;
- if (nvram_error_log_index == -1)
+ if (rtas_log_partition.index == -1)
return -1;
- if (length > nvram_error_log_size)
- length = nvram_error_log_size;
+ if (length > rtas_log_partition.size)
+ length = rtas_log_partition.size;
- tmp_index = nvram_error_log_index;
+ tmp_index = rtas_log_partition.index;
rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
if (rc <= 0) {
@@ -244,37 +293,40 @@ int nvram_clear_error_log(void)
int clear_word = ERR_FLAG_ALREADY_LOGGED;
int rc;
- if (nvram_error_log_index == -1)
+ if (rtas_log_partition.index == -1)
return -1;
- tmp_index = nvram_error_log_index;
+ tmp_index = rtas_log_partition.index;
rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
if (rc <= 0) {
printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
return rc;
}
+ last_unread_rtas_event = 0;
return 0;
}
-/* pseries_nvram_init_log_partition
+/* pseries_nvram_init_os_partition
*
- * This will setup the partition we need for buffering the
- * error logs and cleanup partitions if needed.
+ * This sets up a partition with an "OS" signature.
*
* The general strategy is the following:
- * 1.) If there is log partition large enough then use it.
- * 2.) If there is none large enough, search
- * for a free partition that is large enough.
- * 3.) If there is not a free partition large enough remove
- * _all_ OS partitions and consolidate the space.
- * 4.) Will first try getting a chunk that will satisfy the maximum
- * error log size (NVRAM_MAX_REQ).
- * 5.) If the max chunk cannot be allocated then try finding a chunk
- * that will satisfy the minum needed (NVRAM_MIN_REQ).
+ * 1.) If a partition with the indicated name already exists...
+ * - If it's large enough, use it.
+ * - Otherwise, recycle it and keep going.
+ * 2.) Search for a free partition that is large enough.
+ * 3.) If there's not a free partition large enough, recycle any obsolete
+ * OS partitions and try again.
+ * 4.) Will first try getting a chunk that will satisfy the requested size.
+ * 5.) If a chunk of the requested size cannot be allocated, then try finding
+ * a chunk that will satisfy the minum needed.
+ *
+ * Returns 0 on success, else -1.
*/
-static int __init pseries_nvram_init_log_partition(void)
+static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
+ *part)
{
loff_t p;
int size;
@@ -282,47 +334,76 @@ static int __init pseries_nvram_init_log_partition(void)
/* Scan nvram for partitions */
nvram_scan_partitions();
- /* Lookg for ours */
- p = nvram_find_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS, &size);
+ /* Look for ours */
+ p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
/* Found one but too small, remove it */
- if (p && size < NVRAM_MIN_REQ) {
- pr_info("nvram: Found too small "NVRAM_LOG_PART_NAME" partition"
- ",removing it...");
- nvram_remove_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS);
+ if (p && size < part->min_size) {
+ pr_info("nvram: Found too small %s partition,"
+ " removing it...\n", part->name);
+ nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
p = 0;
}
/* Create one if we didn't find */
if (!p) {
- p = nvram_create_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS,
- NVRAM_MAX_REQ, NVRAM_MIN_REQ);
- /* No room for it, try to get rid of any OS partition
- * and try again
- */
+ p = nvram_create_partition(part->name, NVRAM_SIG_OS,
+ part->req_size, part->min_size);
if (p == -ENOSPC) {
- pr_info("nvram: No room to create "NVRAM_LOG_PART_NAME
- " partition, deleting all OS partitions...");
- nvram_remove_partition(NULL, NVRAM_SIG_OS);
- p = nvram_create_partition(NVRAM_LOG_PART_NAME,
- NVRAM_SIG_OS, NVRAM_MAX_REQ,
- NVRAM_MIN_REQ);
+ pr_info("nvram: No room to create %s partition, "
+ "deleting any obsolete OS partitions...\n",
+ part->name);
+ nvram_remove_partition(NULL, NVRAM_SIG_OS,
+ pseries_nvram_os_partitions);
+ p = nvram_create_partition(part->name, NVRAM_SIG_OS,
+ part->req_size, part->min_size);
}
}
if (p <= 0) {
- pr_err("nvram: Failed to find or create "NVRAM_LOG_PART_NAME
- " partition, err %d\n", (int)p);
- return 0;
+ pr_err("nvram: Failed to find or create %s"
+ " partition, err %d\n", part->name, (int)p);
+ return -1;
}
- nvram_error_log_index = p;
- nvram_error_log_size = nvram_get_partition_size(p) -
- sizeof(struct err_log_info);
+ part->index = p;
+ part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
return 0;
}
-machine_arch_initcall(pseries, pseries_nvram_init_log_partition);
+
+static void __init nvram_init_oops_partition(int rtas_partition_exists)
+{
+ int rc;
+
+ rc = pseries_nvram_init_os_partition(&oops_log_partition);
+ if (rc != 0) {
+ if (!rtas_partition_exists)
+ return;
+ pr_notice("nvram: Using %s partition to log both"
+ " RTAS errors and oops/panic reports\n",
+ rtas_log_partition.name);
+ memcpy(&oops_log_partition, &rtas_log_partition,
+ sizeof(rtas_log_partition));
+ }
+ oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
+ rc = kmsg_dump_register(&nvram_kmsg_dumper);
+ if (rc != 0) {
+ pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
+ kfree(oops_buf);
+ return;
+ }
+}
+
+static int __init pseries_nvram_init_log_partitions(void)
+{
+ int rc;
+
+ rc = pseries_nvram_init_os_partition(&rtas_log_partition);
+ nvram_init_oops_partition(rc == 0);
+ return 0;
+}
+machine_arch_initcall(pseries, pseries_nvram_init_log_partitions);
int __init pSeries_nvram_init(void)
{
@@ -353,3 +434,83 @@ int __init pSeries_nvram_init(void)
return 0;
}
+
+/*
+ * Try to capture the last capture_len bytes of the printk buffer. Return
+ * the amount actually captured.
+ */
+static size_t capture_last_msgs(const char *old_msgs, size_t old_len,
+ const char *new_msgs, size_t new_len,
+ char *captured, size_t capture_len)
+{
+ if (new_len >= capture_len) {
+ memcpy(captured, new_msgs + (new_len - capture_len),
+ capture_len);
+ return capture_len;
+ } else {
+ /* Grab the end of old_msgs. */
+ size_t old_tail_len = min(old_len, capture_len - new_len);
+ memcpy(captured, old_msgs + (old_len - old_tail_len),
+ old_tail_len);
+ memcpy(captured + old_tail_len, new_msgs, new_len);
+ return old_tail_len + new_len;
+ }
+}
+
+/*
+ * Are we using the ibm,rtas-log for oops/panic reports? And if so,
+ * would logging this oops/panic overwrite an RTAS event that rtas_errd
+ * hasn't had a chance to read and process? Return 1 if so, else 0.
+ *
+ * We assume that if rtas_errd hasn't read the RTAS event in
+ * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
+ */
+static int clobbering_unread_rtas_event(void)
+{
+ return (oops_log_partition.index == rtas_log_partition.index
+ && last_unread_rtas_event
+ && get_seconds() - last_unread_rtas_event <=
+ NVRAM_RTAS_READ_TIMEOUT);
+}
+
+/* our kmsg_dump callback */
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason,
+ const char *old_msgs, unsigned long old_len,
+ const char *new_msgs, unsigned long new_len)
+{
+ static unsigned int oops_count = 0;
+ static bool panicking = false;
+ size_t text_len;
+
+ switch (reason) {
+ case KMSG_DUMP_RESTART:
+ case KMSG_DUMP_HALT:
+ case KMSG_DUMP_POWEROFF:
+ /* These are almost always orderly shutdowns. */
+ return;
+ case KMSG_DUMP_OOPS:
+ case KMSG_DUMP_KEXEC:
+ break;
+ case KMSG_DUMP_PANIC:
+ panicking = true;
+ break;
+ case KMSG_DUMP_EMERG:
+ if (panicking)
+ /* Panic report already captured. */
+ return;
+ break;
+ default:
+ pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
+ __FUNCTION__, (int) reason);
+ return;
+ }
+
+ if (clobbering_unread_rtas_event())
+ return;
+
+ text_len = capture_last_msgs(old_msgs, old_len, new_msgs, new_len,
+ oops_buf, oops_log_partition.size);
+ (void) nvram_write_os_partition(&oops_log_partition, oops_buf,
+ (int) text_len, ERR_TYPE_KERNEL_PANIC, ++oops_count);
+}
diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h
index 75a6f48..08672d9 100644
--- a/arch/powerpc/platforms/pseries/offline_states.h
+++ b/arch/powerpc/platforms/pseries/offline_states.h
@@ -34,6 +34,4 @@ static inline void set_default_offline_state(int cpu)
#endif
extern enum cpu_state_vals get_preferred_offline_state(int cpu);
-extern int start_secondary(void);
-extern void start_secondary_resume(void);
#endif
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 5fcc92a..3bf4488 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -149,7 +149,7 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
if (dn->child)
eeh_add_device_tree_early(dn);
- pcibios_scan_phb(phb, dn);
+ pcibios_scan_phb(phb);
pcibios_finish_adding_to_bus(phb->bus);
return phb;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index d345bfd..6c42cfd 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -114,10 +114,13 @@ static void __init fwnmi_init(void)
static void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc)
{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
+
if (cascade_irq != NO_IRQ)
generic_handle_irq(cascade_irq);
- desc->chip->eoi(irq);
+
+ chip->irq_eoi(&desc->irq_data);
}
static void __init pseries_setup_i8259_cascade(void)
@@ -166,7 +169,7 @@ static void __init pseries_setup_i8259_cascade(void)
printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);
i8259_init(found, intack);
of_node_put(found);
- set_irq_chained_handler(cascade, pseries_8259_cascade);
+ irq_set_chained_handler(cascade, pseries_8259_cascade);
}
static void __init pseries_mpic_init_IRQ(void)
@@ -284,14 +287,22 @@ static int alloc_dispatch_logs(void)
int cpu, ret;
struct paca_struct *pp;
struct dtl_entry *dtl;
+ struct kmem_cache *dtl_cache;
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return 0;
+ dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
+ DISPATCH_LOG_BYTES, 0, NULL);
+ if (!dtl_cache) {
+ pr_warn("Failed to create dispatch trace log buffer cache\n");
+ pr_warn("Stolen time statistics will be unreliable\n");
+ return 0;
+ }
+
for_each_possible_cpu(cpu) {
pp = &paca[cpu];
- dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL,
- cpu_to_node(cpu));
+ dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
if (!dtl) {
pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
cpu);
@@ -375,7 +386,7 @@ static int __init pSeries_init_panel(void)
return 0;
}
-arch_initcall(pSeries_init_panel);
+machine_arch_initcall(pseries, pSeries_init_panel);
static int pseries_set_dabr(unsigned long dabr)
{
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 0317cce..a509c52 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -64,8 +64,8 @@ int smp_query_cpu_stopped(unsigned int pcpu)
int qcss_tok = rtas_token("query-cpu-stopped-state");
if (qcss_tok == RTAS_UNKNOWN_SERVICE) {
- printk(KERN_INFO "Firmware doesn't support "
- "query-cpu-stopped-state\n");
+ printk_once(KERN_INFO
+ "Firmware doesn't support query-cpu-stopped-state\n");
return QCSS_HARDWARE_ERROR;
}
@@ -112,10 +112,10 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
/* Fixup atomic count: it exited inside IRQ handler. */
task_thread_info(paca[lcpu].__current)->preempt_count = 0;
-
+#ifdef CONFIG_HOTPLUG_CPU
if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
goto out;
-
+#endif
/*
* If the RTAS start-cpu token does not exist then presume the
* cpu is already spinning.
@@ -130,7 +130,9 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
return 0;
}
+#ifdef CONFIG_HOTPLUG_CPU
out:
+#endif
return 1;
}
@@ -144,16 +146,15 @@ static void __devinit smp_xics_setup_cpu(int cpu)
vpa_init(cpu);
cpumask_clear_cpu(cpu, of_spin_mask);
+#ifdef CONFIG_HOTPLUG_CPU
set_cpu_current_state(cpu, CPU_STATE_ONLINE);
set_default_offline_state(cpu);
-
+#endif
}
#endif /* CONFIG_XICS */
static void __devinit smp_pSeries_kick_cpu(int nr)
{
- long rc;
- unsigned long hcpuid;
BUG_ON(nr < 0 || nr >= NR_CPUS);
if (!smp_startup_cpu(nr))
@@ -165,16 +166,20 @@ static void __devinit smp_pSeries_kick_cpu(int nr)
* the processor will continue on to secondary_start
*/
paca[nr].cpu_start = 1;
-
+#ifdef CONFIG_HOTPLUG_CPU
set_preferred_offline_state(nr, CPU_STATE_ONLINE);
if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
+ long rc;
+ unsigned long hcpuid;
+
hcpuid = get_hard_smp_processor_id(nr);
rc = plpar_hcall_norets(H_PROD, hcpuid);
if (rc != H_SUCCESS)
printk(KERN_ERR "Error: Prod to wake up processor %d "
"Ret= %ld\n", nr, rc);
}
+#endif
}
static int smp_pSeries_cpu_bootable(unsigned int nr)
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 7b96e5a..d690133 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -202,88 +202,88 @@ static int get_irq_server(unsigned int virq, const struct cpumask *cpumask,
#define get_irq_server(virq, cpumask, strict_check) (default_server)
#endif
-static void xics_unmask_irq(unsigned int virq)
+static void xics_unmask_irq(struct irq_data *d)
{
- unsigned int irq;
+ unsigned int hwirq;
int call_status;
int server;
- pr_devel("xics: unmask virq %d\n", virq);
+ pr_devel("xics: unmask virq %d\n", d->irq);
- irq = (unsigned int)irq_map[virq].hwirq;
- pr_devel(" -> map to hwirq 0x%x\n", irq);
- if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+ hwirq = (unsigned int)irq_map[d->irq].hwirq;
+ pr_devel(" -> map to hwirq 0x%x\n", hwirq);
+ if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
return;
- server = get_irq_server(virq, irq_to_desc(virq)->affinity, 0);
+ server = get_irq_server(d->irq, d->affinity, 0);
- call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server,
+ call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq, server,
DEFAULT_PRIORITY);
if (call_status != 0) {
printk(KERN_ERR
"%s: ibm_set_xive irq %u server %x returned %d\n",
- __func__, irq, server, call_status);
+ __func__, hwirq, server, call_status);
return;
}
/* Now unmask the interrupt (often a no-op) */
- call_status = rtas_call(ibm_int_on, 1, 1, NULL, irq);
+ call_status = rtas_call(ibm_int_on, 1, 1, NULL, hwirq);
if (call_status != 0) {
printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
- __func__, irq, call_status);
+ __func__, hwirq, call_status);
return;
}
}
-static unsigned int xics_startup(unsigned int virq)
+static unsigned int xics_startup(struct irq_data *d)
{
/*
* The generic MSI code returns with the interrupt disabled on the
* card, using the MSI mask bits. Firmware doesn't appear to unmask
* at that level, so we do it here by hand.
*/
- if (irq_to_desc(virq)->msi_desc)
- unmask_msi_irq(irq_get_irq_data(virq));
+ if (d->msi_desc)
+ unmask_msi_irq(d);
/* unmask it */
- xics_unmask_irq(virq);
+ xics_unmask_irq(d);
return 0;
}
-static void xics_mask_real_irq(unsigned int irq)
+static void xics_mask_real_irq(unsigned int hwirq)
{
int call_status;
- if (irq == XICS_IPI)
+ if (hwirq == XICS_IPI)
return;
- call_status = rtas_call(ibm_int_off, 1, 1, NULL, irq);
+ call_status = rtas_call(ibm_int_off, 1, 1, NULL, hwirq);
if (call_status != 0) {
printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
- __func__, irq, call_status);
+ __func__, hwirq, call_status);
return;
}
/* Have to set XIVE to 0xff to be able to remove a slot */
- call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq,
+ call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq,
default_server, 0xff);
if (call_status != 0) {
printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
- __func__, irq, call_status);
+ __func__, hwirq, call_status);
return;
}
}
-static void xics_mask_irq(unsigned int virq)
+static void xics_mask_irq(struct irq_data *d)
{
- unsigned int irq;
+ unsigned int hwirq;
- pr_devel("xics: mask virq %d\n", virq);
+ pr_devel("xics: mask virq %d\n", d->irq);
- irq = (unsigned int)irq_map[virq].hwirq;
- if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+ hwirq = (unsigned int)irq_map[d->irq].hwirq;
+ if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
return;
- xics_mask_real_irq(irq);
+ xics_mask_real_irq(hwirq);
}
static void xics_mask_unknown_vec(unsigned int vec)
@@ -371,57 +371,58 @@ static unsigned char pop_cppr(void)
return os_cppr->stack[--os_cppr->index];
}
-static void xics_eoi_direct(unsigned int virq)
+static void xics_eoi_direct(struct irq_data *d)
{
- unsigned int irq = (unsigned int)irq_map[virq].hwirq;
+ unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
iosync();
- direct_xirr_info_set((pop_cppr() << 24) | irq);
+ direct_xirr_info_set((pop_cppr() << 24) | hwirq);
}
-static void xics_eoi_lpar(unsigned int virq)
+static void xics_eoi_lpar(struct irq_data *d)
{
- unsigned int irq = (unsigned int)irq_map[virq].hwirq;
+ unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
iosync();
- lpar_xirr_info_set((pop_cppr() << 24) | irq);
+ lpar_xirr_info_set((pop_cppr() << 24) | hwirq);
}
-static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
+static int
+xics_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force)
{
- unsigned int irq;
+ unsigned int hwirq;
int status;
int xics_status[2];
int irq_server;
- irq = (unsigned int)irq_map[virq].hwirq;
- if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+ hwirq = (unsigned int)irq_map[d->irq].hwirq;
+ if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
return -1;
- status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
+ status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
if (status) {
printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
- __func__, irq, status);
+ __func__, hwirq, status);
return -1;
}
- irq_server = get_irq_server(virq, cpumask, 1);
+ irq_server = get_irq_server(d->irq, cpumask, 1);
if (irq_server == -1) {
char cpulist[128];
cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
printk(KERN_WARNING
"%s: No online cpus in the mask %s for irq %d\n",
- __func__, cpulist, virq);
+ __func__, cpulist, d->irq);
return -1;
}
status = rtas_call(ibm_set_xive, 3, 1, NULL,
- irq, irq_server, xics_status[1]);
+ hwirq, irq_server, xics_status[1]);
if (status) {
printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
- __func__, irq, status);
+ __func__, hwirq, status);
return -1;
}
@@ -430,20 +431,20 @@ static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
static struct irq_chip xics_pic_direct = {
.name = "XICS",
- .startup = xics_startup,
- .mask = xics_mask_irq,
- .unmask = xics_unmask_irq,
- .eoi = xics_eoi_direct,
- .set_affinity = xics_set_affinity
+ .irq_startup = xics_startup,
+ .irq_mask = xics_mask_irq,
+ .irq_unmask = xics_unmask_irq,
+ .irq_eoi = xics_eoi_direct,
+ .irq_set_affinity = xics_set_affinity
};
static struct irq_chip xics_pic_lpar = {
.name = "XICS",
- .startup = xics_startup,
- .mask = xics_mask_irq,
- .unmask = xics_unmask_irq,
- .eoi = xics_eoi_lpar,
- .set_affinity = xics_set_affinity
+ .irq_startup = xics_startup,
+ .irq_mask = xics_mask_irq,
+ .irq_unmask = xics_unmask_irq,
+ .irq_eoi = xics_eoi_lpar,
+ .irq_set_affinity = xics_set_affinity
};
@@ -469,8 +470,8 @@ static int xics_host_map(struct irq_host *h, unsigned int virq,
/* Insert the interrupt mapping into the radix tree for fast lookup */
irq_radix_revmap_insert(xics_host, virq, hw);
- irq_to_desc(virq)->status |= IRQ_LEVEL;
- set_irq_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq);
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ irq_set_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq);
return 0;
}
@@ -599,7 +600,7 @@ static void xics_request_ipi(void)
* IPIs are marked IRQF_DISABLED as they must run with irqs
* disabled
*/
- set_irq_handler(ipi, handle_percpu_irq);
+ irq_set_handler(ipi, handle_percpu_irq);
if (firmware_has_feature(FW_FEATURE_LPAR))
rc = request_irq(ipi, xics_ipi_action_lpar,
IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
@@ -873,7 +874,7 @@ void xics_kexec_teardown_cpu(int secondary)
void xics_migrate_irqs_away(void)
{
int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
- unsigned int irq, virq;
+ int virq;
/* If we used to be the default server, move to the new "boot_cpuid" */
if (hw_cpu == default_server)
@@ -890,33 +891,38 @@ void xics_migrate_irqs_away(void)
for_each_irq(virq) {
struct irq_desc *desc;
+ struct irq_chip *chip;
+ unsigned int hwirq;
int xics_status[2];
int status;
unsigned long flags;
- /* We cant set affinity on ISA interrupts */
+ /* We can't set affinity on ISA interrupts */
if (virq < NUM_ISA_INTERRUPTS)
continue;
if (irq_map[virq].host != xics_host)
continue;
- irq = (unsigned int)irq_map[virq].hwirq;
+ hwirq = (unsigned int)irq_map[virq].hwirq;
/* We need to get IPIs still. */
- if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+ if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
continue;
+
desc = irq_to_desc(virq);
/* We only need to migrate enabled IRQS */
- if (desc == NULL || desc->chip == NULL
- || desc->action == NULL
- || desc->chip->set_affinity == NULL)
+ if (desc == NULL || desc->action == NULL)
+ continue;
+
+ chip = irq_desc_get_chip(desc);
+ if (chip == NULL || chip->irq_set_affinity == NULL)
continue;
raw_spin_lock_irqsave(&desc->lock, flags);
- status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
+ status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
if (status) {
printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
- __func__, irq, status);
+ __func__, hwirq, status);
goto unlock;
}
@@ -934,8 +940,8 @@ void xics_migrate_irqs_away(void)
virq, cpu);
/* Reset affinity to all cpus */
- cpumask_setall(irq_to_desc(virq)->affinity);
- desc->chip->set_affinity(virq, cpu_all_mask);
+ cpumask_setall(desc->irq_data.affinity);
+ chip->irq_set_affinity(&desc->irq_data, cpu_all_mask, true);
unlock:
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
OpenPOWER on IntegriCloud