77 files changed, 25297 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
new file mode 100644
index 0000000..c14d7d8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -0,0 +1,140 @@
+config PPC_CELL
+	bool
+	default n
+
+config PPC_CELL_NATIVE
+	bool
+	select PPC_CELL
+	select PPC_DCR_MMIO
+	select PPC_OF_PLATFORM_PCI
+	select PPC_INDIRECT_IO
+	select PPC_NATIVE
+	select MPIC
+	select IBM_NEW_EMAC_EMAC4
+	select IBM_NEW_EMAC_RGMII
+	select IBM_NEW_EMAC_ZMII #test only
+	select IBM_NEW_EMAC_TAH  #test only
+	default n
+
+config PPC_IBM_CELL_BLADE
+	bool "IBM Cell Blade"
+	depends on PPC_MULTIPLATFORM && PPC64
+	select PPC_CELL_NATIVE
+	select PPC_RTAS
+	select MMIO_NVRAM
+	select PPC_UDBG_16550
+	select UDBG_RTAS_CONSOLE
+
+config PPC_CELLEB
+	bool "Toshiba's Cell Reference Set 'Celleb' Architecture"
+	depends on PPC_MULTIPLATFORM && PPC64
+	select PPC_CELL
+	select PPC_CELL_NATIVE
+	select PPC_RTAS
+	select PPC_INDIRECT_IO
+	select PPC_OF_PLATFORM_PCI
+	select HAS_TXX9_SERIAL
+	select PPC_UDBG_BEAT
+	select USB_OHCI_BIG_ENDIAN_MMIO
+	select USB_EHCI_BIG_ENDIAN_MMIO
+
+menu "Cell Broadband Engine options"
+	depends on PPC_CELL
+
+config SPU_FS
+	tristate "SPU file system"
+	default m
+	depends on PPC_CELL
+	select SPU_BASE
+	select MEMORY_HOTPLUG
+	help
+	  The SPU file system is used to access Synergistic Processing
+	  Units on machines implementing the Broadband Processor
+	  Architecture.
+
+config SPU_FS_64K_LS
+	bool "Use 64K pages to map SPE local  store"
+	# we depend on PPC_MM_SLICES for now rather than selecting
+	# it because we depend on hugetlbfs hooks being present. We
+	# will fix that when the generic code has been improved to
+	# not require hijacking hugetlbfs hooks.
+	depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES
+	default y
+	select PPC_HAS_HASH_64K
+	help
+	  This option causes SPE local stores to be mapped in process
+	  address spaces using 64K pages while the rest of the kernel
+	  uses 4K pages. This can improve performances of applications
+	  using multiple SPEs by lowering the TLB pressure on them.
+
+config SPU_TRACE
+	tristate "SPU event tracing support"
+	depends on SPU_FS && MARKERS
+	help
+	  This option allows reading a trace of spu-related events through
+	  the sputrace file in procfs.
+
+config SPU_BASE
+	bool
+	default n
+
+config CBE_RAS
+	bool "RAS features for bare metal Cell BE"
+	depends on PPC_CELL_NATIVE
+	default y
+
+config PPC_IBM_CELL_RESETBUTTON
+	bool "IBM Cell Blade Pinhole reset button"
+	depends on CBE_RAS && PPC_IBM_CELL_BLADE
+	default y
+	help
+	  Support Pinhole Resetbutton on IBM Cell blades.
+	  This adds a method to trigger system reset via front panel pinhole button.
+
+config PPC_IBM_CELL_POWERBUTTON
+	tristate "IBM Cell Blade power button"
+	depends on PPC_IBM_CELL_BLADE && PPC_PMI && INPUT_EVDEV
+	default y
+	help
+	  Support Powerbutton on IBM Cell blades.
+	  This will enable the powerbutton as an input device.
+
+config CBE_THERM
+	tristate "CBE thermal support"
+	default m
+	depends on CBE_RAS
+
+config CBE_CPUFREQ
+	tristate "CBE frequency scaling"
+	depends on CBE_RAS && CPU_FREQ
+	default m
+	help
+	  This adds the cpufreq driver for Cell BE processors.
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+	  If you don't have such processor, say N
+
+config CBE_CPUFREQ_PMI
+	tristate "CBE frequency scaling using PMI interface"
+	depends on CBE_CPUFREQ && PPC_PMI && EXPERIMENTAL
+	default n
+	help
+	  Select this, if you want to use the PMI interface
+	  to switch frequencies. Using PMI, the
+	  processor will not only be able to run at lower speed,
+	  but also at lower core voltage.
+
+config CBE_CPUFREQ_SPU_GOVERNOR
+	tristate "CBE frequency scaling based on SPU usage"
+	depends on SPU_FS && CPU_FREQ
+	default m
+	help
+	  This governor checks for spu usage to adjust the cpu frequency.
+	  If no spu is running on a given cpu, that cpu will be throttled to
+	  the minimal possible frequency.
+
+endmenu
+
+config OPROFILE_CELL
+	def_bool y
+	depends on PPC_CELL_NATIVE && (OPROFILE = m || OPROFILE = y)
+
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
new file mode 100644
index 0000000..7fd8308
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -0,0 +1,49 @@
+obj-$(CONFIG_PPC_CELL_NATIVE)		+= interrupt.o iommu.o setup.o \
+					   cbe_regs.o spider-pic.o \
+					   pervasive.o pmu.o io-workarounds.o \
+					   spider-pci.o
+obj-$(CONFIG_CBE_RAS)			+= ras.o
+
+obj-$(CONFIG_CBE_THERM)			+= cbe_thermal.o
+obj-$(CONFIG_CBE_CPUFREQ_PMI)		+= cbe_cpufreq_pmi.o
+obj-$(CONFIG_CBE_CPUFREQ)		+= cbe-cpufreq.o
+cbe-cpufreq-y				+= cbe_cpufreq_pervasive.o cbe_cpufreq.o
+obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR)	+= cpufreq_spudemand.o
+
+obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON)	+= cbe_powerbutton.o
+
+ifeq ($(CONFIG_SMP),y)
+obj-$(CONFIG_PPC_CELL_NATIVE)		+= smp.o
+endif
+
+# needed only when building loadable spufs.ko
+spu-priv1-$(CONFIG_PPC_CELL_NATIVE)	+= spu_priv1_mmio.o
+
+spu-manage-$(CONFIG_PPC_CELLEB)		+= spu_manage.o
+spu-manage-$(CONFIG_PPC_CELL_NATIVE)	+= spu_manage.o
+
+obj-$(CONFIG_SPU_BASE)			+= spu_callbacks.o spu_base.o \
+					   spu_notify.o \
+					   spu_syscalls.o spu_fault.o \
+					   $(spu-priv1-y) \
+					   $(spu-manage-y) \
+					   spufs/
+
+obj-$(CONFIG_PCI_MSI)			+= axon_msi.o
+
+
+# celleb stuff
+ifeq ($(CONFIG_PPC_CELLEB),y)
+obj-y					+= celleb_setup.o \
+					   celleb_pci.o celleb_scc_epci.o \
+					   celleb_scc_pciex.o \
+					   celleb_scc_uhc.o \
+					   io-workarounds.o spider-pci.o \
+					   beat.o beat_htab.o beat_hvCall.o \
+					   beat_interrupt.o beat_iommu.o
+
+obj-$(CONFIG_SMP)			+= beat_smp.o
+obj-$(CONFIG_PPC_UDBG_BEAT)		+= beat_udbg.o
+obj-$(CONFIG_SERIAL_TXX9)		+= celleb_scc_sio.o
+obj-$(CONFIG_SPU_BASE)			+= beat_spu_priv1.o
+endif
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
new file mode 100644
index 0000000..0ce45c2
--- /dev/null
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -0,0 +1,505 @@
+/*
+ * Copyright 2007, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/of_platform.h>
+#include <linux/debugfs.h>
+
+#include <asm/dcr.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+
+
+/*
+ * MSIC registers, specified as offsets from dcr_base
+ */
+#define MSIC_CTRL_REG	0x0
+
+/* Base Address registers specify FIFO location in BE memory */
+#define MSIC_BASE_ADDR_HI_REG	0x3
+#define MSIC_BASE_ADDR_LO_REG	0x4
+
+/* Hold the read/write offsets into the FIFO */
+#define MSIC_READ_OFFSET_REG	0x5
+#define MSIC_WRITE_OFFSET_REG	0x6
+
+
+/* MSIC control register flags */
+#define MSIC_CTRL_ENABLE		0x0001
+#define MSIC_CTRL_FIFO_FULL_ENABLE	0x0002
+#define MSIC_CTRL_IRQ_ENABLE		0x0008
+#define MSIC_CTRL_FULL_STOP_ENABLE	0x0010
+
+/*
+ * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
+ * Currently we're using a 64KB FIFO size.
+ */
+#define MSIC_FIFO_SIZE_SHIFT	16
+#define MSIC_FIFO_SIZE_BYTES	(1 << MSIC_FIFO_SIZE_SHIFT)
+
+/*
+ * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
+ * 8-9 of the MSIC control reg.
+ */
+#define MSIC_CTRL_FIFO_SIZE	(((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
+
+/*
+ * We need to mask the read/write offsets to make sure they stay within
+ * the bounds of the FIFO. Also they should always be 16-byte aligned.
+ */
+#define MSIC_FIFO_SIZE_MASK	((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
+
+/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
+#define MSIC_FIFO_ENTRY_SIZE	0x10
+
+
+struct axon_msic {
+	struct irq_host *irq_host;
+	__le32 *fifo_virt;
+	dma_addr_t fifo_phys;
+	dcr_host_t dcr_host;
+	u32 read_offset;
+#ifdef DEBUG
+	u32 __iomem *trigger;
+#endif
+};
+
+#ifdef DEBUG
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic);
+#else
+static inline void axon_msi_debug_setup(struct device_node *dn,
+					struct axon_msic *msic) { }
+#endif
+
+
+static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
+{
+	pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
+
+	dcr_write(msic->dcr_host, dcr_n, val);
+}
+
+static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct axon_msic *msic = get_irq_data(irq);
+	u32 write_offset, msi;
+	int idx;
+	int retry = 0;
+
+	write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
+	pr_debug("axon_msi: original write_offset 0x%x\n", write_offset);
+
+	/* write_offset doesn't wrap properly, so we have to mask it */
+	write_offset &= MSIC_FIFO_SIZE_MASK;
+
+	while (msic->read_offset != write_offset && retry < 100) {
+		idx  = msic->read_offset / sizeof(__le32);
+		msi  = le32_to_cpu(msic->fifo_virt[idx]);
+		msi &= 0xFFFF;
+
+		pr_debug("axon_msi: woff %x roff %x msi %x\n",
+			  write_offset, msic->read_offset, msi);
+
+		if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) {
+			generic_handle_irq(msi);
+			msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
+		} else {
+			/*
+			 * Reading the MSIC_WRITE_OFFSET_REG does not
+			 * reliably flush the outstanding DMA to the
+			 * FIFO buffer. Here we were reading stale
+			 * data, so we need to retry.
+			 */
+			udelay(1);
+			retry++;
+			pr_debug("axon_msi: invalid irq 0x%x!\n", msi);
+			continue;
+		}
+
+		if (retry) {
+			pr_debug("axon_msi: late irq 0x%x, retry %d\n",
+				 msi, retry);
+			retry = 0;
+		}
+
+		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+	}
+
+	if (retry) {
+		printk(KERN_WARNING "axon_msi: irq timed out\n");
+
+		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+	}
+
+	desc->chip->eoi(irq);
+}
+
+static struct axon_msic *find_msi_translator(struct pci_dev *dev)
+{
+	struct irq_host *irq_host;
+	struct device_node *dn, *tmp;
+	const phandle *ph;
+	struct axon_msic *msic = NULL;
+
+	dn = of_node_get(pci_device_to_OF_node(dev));
+	if (!dn) {
+		dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+		return NULL;
+	}
+
+	for (; dn; dn = of_get_next_parent(dn)) {
+		ph = of_get_property(dn, "msi-translator", NULL);
+		if (ph)
+			break;
+	}
+
+	if (!ph) {
+		dev_dbg(&dev->dev,
+			"axon_msi: no msi-translator property found\n");
+		goto out_error;
+	}
+
+	tmp = dn;
+	dn = of_find_node_by_phandle(*ph);
+	of_node_put(tmp);
+	if (!dn) {
+		dev_dbg(&dev->dev,
+			"axon_msi: msi-translator doesn't point to a node\n");
+		goto out_error;
+	}
+
+	irq_host = irq_find_host(dn);
+	if (!irq_host) {
+		dev_dbg(&dev->dev, "axon_msi: no irq_host found for node %s\n",
+			dn->full_name);
+		goto out_error;
+	}
+
+	msic = irq_host->host_data;
+
+out_error:
+	of_node_put(dn);
+
+	return msic;
+}
+
+static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type)
+{
+	if (!find_msi_translator(dev))
+		return -ENODEV;
+
+	return 0;
+}
+
+static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
+{
+	struct device_node *dn;
+	struct msi_desc *entry;
+	int len;
+	const u32 *prop;
+
+	dn = of_node_get(pci_device_to_OF_node(dev));
+	if (!dn) {
+		dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+		return -ENODEV;
+	}
+
+	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
+
+	for (; dn; dn = of_get_next_parent(dn)) {
+		if (entry->msi_attrib.is_64) {
+			prop = of_get_property(dn, "msi-address-64", &len);
+			if (prop)
+				break;
+		}
+
+		prop = of_get_property(dn, "msi-address-32", &len);
+		if (prop)
+			break;
+	}
+
+	if (!prop) {
+		dev_dbg(&dev->dev,
+			"axon_msi: no msi-address-(32|64) properties found\n");
+		return -ENOENT;
+	}
+
+	switch (len) {
+	case 8:
+		msg->address_hi = prop[0];
+		msg->address_lo = prop[1];
+		break;
+	case 4:
+		msg->address_hi = 0;
+		msg->address_lo = prop[0];
+		break;
+	default:
+		dev_dbg(&dev->dev,
+			"axon_msi: malformed msi-address-(32|64) property\n");
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	of_node_put(dn);
+
+	return 0;
+}
+
+static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	unsigned int virq, rc;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	struct axon_msic *msic;
+
+	msic = find_msi_translator(dev);
+	if (!msic)
+		return -ENODEV;
+
+	rc = setup_msi_msg_address(dev, &msg);
+	if (rc)
+		return rc;
+
+	/* We rely on being able to stash a virq in a u16 */
+	BUILD_BUG_ON(NR_IRQS > 65536);
+
+	list_for_each_entry(entry, &dev->msi_list, list) {
+		virq = irq_create_direct_mapping(msic->irq_host);
+		if (virq == NO_IRQ) {
+			dev_warn(&dev->dev,
+				 "axon_msi: virq allocation failed!\n");
+			return -1;
+		}
+		dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
+
+		set_irq_msi(virq, entry);
+		msg.data = virq;
+		write_msi_msg(virq, &msg);
+	}
+
+	return 0;
+}
+
+static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct msi_desc *entry;
+
+	dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
+
+	list_for_each_entry(entry, &dev->msi_list, list) {
+		if (entry->irq == NO_IRQ)
+			continue;
+
+		set_irq_msi(entry->irq, NULL);
+		irq_dispose_mapping(entry->irq);
+	}
+}
+
+static struct irq_chip msic_irq_chip = {
+	.mask		= mask_msi_irq,
+	.unmask		= unmask_msi_irq,
+	.shutdown	= unmask_msi_irq,
+	.typename	= "AXON-MSI",
+};
+
+static int msic_host_map(struct irq_host *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	set_irq_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
+
+	return 0;
+}
+
+static struct irq_host_ops msic_host_ops = {
+	.map	= msic_host_map,
+};
+
+static int axon_msi_shutdown(struct of_device *device)
+{
+	struct axon_msic *msic = device->dev.platform_data;
+	u32 tmp;
+
+	pr_debug("axon_msi: disabling %s\n",
+		  msic->irq_host->of_node->full_name);
+	tmp  = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
+	tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
+	msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
+
+	return 0;
+}
+
+static int axon_msi_probe(struct of_device *device,
+			  const struct of_device_id *device_id)
+{
+	struct device_node *dn = device->node;
+	struct axon_msic *msic;
+	unsigned int virq;
+	int dcr_base, dcr_len;
+
+	pr_debug("axon_msi: setting up dn %s\n", dn->full_name);
+
+	msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
+	if (!msic) {
+		printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
+		       dn->full_name);
+		goto out;
+	}
+
+	dcr_base = dcr_resource_start(dn, 0);
+	dcr_len = dcr_resource_len(dn, 0);
+
+	if (dcr_base == 0 || dcr_len == 0) {
+		printk(KERN_ERR
+		       "axon_msi: couldn't parse dcr properties on %s\n",
+			dn->full_name);
+		goto out;
+	}
+
+	msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
+	if (!DCR_MAP_OK(msic->dcr_host)) {
+		printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
+		       dn->full_name);
+		goto out_free_msic;
+	}
+
+	msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES,
+					     &msic->fifo_phys, GFP_KERNEL);
+	if (!msic->fifo_virt) {
+		printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
+		       dn->full_name);
+		goto out_free_msic;
+	}
+
+	virq = irq_of_parse_and_map(dn, 0);
+	if (virq == NO_IRQ) {
+		printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
+		       dn->full_name);
+		goto out_free_fifo;
+	}
+	memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
+
+	msic->irq_host = irq_alloc_host(dn, IRQ_HOST_MAP_NOMAP,
+					NR_IRQS, &msic_host_ops, 0);
+	if (!msic->irq_host) {
+		printk(KERN_ERR "axon_msi: couldn't allocate irq_host for %s\n",
+		       dn->full_name);
+		goto out_free_fifo;
+	}
+
+	msic->irq_host->host_data = msic;
+
+	set_irq_data(virq, msic);
+	set_irq_chained_handler(virq, axon_msi_cascade);
+	pr_debug("axon_msi: irq 0x%x setup for axon_msi\n", virq);
+
+	/* Enable the MSIC hardware */
+	msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, msic->fifo_phys >> 32);
+	msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
+				  msic->fifo_phys & 0xFFFFFFFF);
+	msic_dcr_write(msic, MSIC_CTRL_REG,
+			MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
+			MSIC_CTRL_FIFO_SIZE);
+
+	msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG)
+				& MSIC_FIFO_SIZE_MASK;
+
+	device->dev.platform_data = msic;
+
+	ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
+	ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
+	ppc_md.msi_check_device = axon_msi_check_device;
+
+	axon_msi_debug_setup(dn, msic);
+
+	printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
+
+	return 0;
+
+out_free_fifo:
+	dma_free_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, msic->fifo_virt,
+			  msic->fifo_phys);
+out_free_msic:
+	kfree(msic);
+out:
+
+	return -1;
+}
+
+static const struct of_device_id axon_msi_device_id[] = {
+	{
+		.compatible	= "ibm,axon-msic"
+	},
+	{}
+};
+
+static struct of_platform_driver axon_msi_driver = {
+	.match_table	= axon_msi_device_id,
+	.probe		= axon_msi_probe,
+	.shutdown	= axon_msi_shutdown,
+	.driver		= {
+		.name	= "axon-msi"
+	},
+};
+
+static int __init axon_msi_init(void)
+{
+	return of_register_platform_driver(&axon_msi_driver);
+}
+subsys_initcall(axon_msi_init);
+
+
+#ifdef DEBUG
+static int msic_set(void *data, u64 val)
+{
+	struct axon_msic *msic = data;
+	out_le32(msic->trigger, val);
+	return 0;
+}
+
+static int msic_get(void *data, u64 *val)
+{
+	*val = 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_msic, msic_get, msic_set, "%llu\n");
+
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
+{
+	char name[8];
+	u64 addr;
+
+	addr = of_translate_address(dn, of_get_property(dn, "reg", NULL));
+	if (addr == OF_BAD_ADDR) {
+		pr_debug("axon_msi: couldn't translate reg property\n");
+		return;
+	}
+
+	msic->trigger = ioremap(addr, 0x4);
+	if (!msic->trigger) {
+		pr_debug("axon_msi: ioremap failed\n");
+		return;
+	}
+
+	snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
+
+	if (!debugfs_create_file(name, 0600, powerpc_debugfs_root,
+				 msic, &fops_msic)) {
+		pr_debug("axon_msi: debugfs_create_file failed!\n");
+		return;
+	}
+}
+#endif /* DEBUG */
diff --git a/arch/powerpc/platforms/cell/beat.c b/arch/powerpc/platforms/cell/beat.c
new file mode 100644
index 0000000..48c690e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat.c
@@ -0,0 +1,264 @@
+/*
+ * Simple routines for Celleb/Beat
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/rtc.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+#include <linux/reboot.h>
+
+#include <asm/hvconsole.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "beat_wrapper.h"
+#include "beat.h"
+#include "beat_interrupt.h"
+
+static int beat_pm_poweroff_flag;
+
+void beat_restart(char *cmd)
+{
+	beat_shutdown_logical_partition(!beat_pm_poweroff_flag);
+}
+
+void beat_power_off(void)
+{
+	beat_shutdown_logical_partition(0);
+}
+
+u64 beat_halt_code = 0x1000000000000000UL;
+EXPORT_SYMBOL(beat_halt_code);
+
+void beat_halt(void)
+{
+	beat_shutdown_logical_partition(beat_halt_code);
+}
+
+int beat_set_rtc_time(struct rtc_time *rtc_time)
+{
+	u64 tim;
+	tim = mktime(rtc_time->tm_year+1900,
+		     rtc_time->tm_mon+1, rtc_time->tm_mday,
+		     rtc_time->tm_hour, rtc_time->tm_min, rtc_time->tm_sec);
+	if (beat_rtc_write(tim))
+		return -1;
+	return 0;
+}
+
+void beat_get_rtc_time(struct rtc_time *rtc_time)
+{
+	u64 tim;
+
+	if (beat_rtc_read(&tim))
+		tim = 0;
+	to_tm(tim, rtc_time);
+	rtc_time->tm_year -= 1900;
+	rtc_time->tm_mon -= 1;
+}
+
+#define	BEAT_NVRAM_SIZE	4096
+
+ssize_t beat_nvram_read(char *buf, size_t count, loff_t *index)
+{
+	unsigned int i;
+	unsigned long len;
+	char *p = buf;
+
+	if (*index >= BEAT_NVRAM_SIZE)
+		return -ENODEV;
+	i = *index;
+	if (i + count > BEAT_NVRAM_SIZE)
+		count = BEAT_NVRAM_SIZE - i;
+
+	for (; count != 0; count -= len) {
+		len = count;
+		if (len > BEAT_NVRW_CNT)
+			len = BEAT_NVRW_CNT;
+		if (beat_eeprom_read(i, len, p))
+			return -EIO;
+
+		p += len;
+		i += len;
+	}
+	*index = i;
+	return p - buf;
+}
+
+ssize_t beat_nvram_write(char *buf, size_t count, loff_t *index)
+{
+	unsigned int i;
+	unsigned long len;
+	char *p = buf;
+
+	if (*index >= BEAT_NVRAM_SIZE)
+		return -ENODEV;
+	i = *index;
+	if (i + count > BEAT_NVRAM_SIZE)
+		count = BEAT_NVRAM_SIZE - i;
+
+	for (; count != 0; count -= len) {
+		len = count;
+		if (len > BEAT_NVRW_CNT)
+			len = BEAT_NVRW_CNT;
+		if (beat_eeprom_write(i, len, p))
+			return -EIO;
+
+		p += len;
+		i += len;
+	}
+	*index = i;
+	return p - buf;
+}
+
+ssize_t beat_nvram_get_size(void)
+{
+	return BEAT_NVRAM_SIZE;
+}
+
+int beat_set_xdabr(unsigned long dabr)
+{
+	if (beat_set_dabr(dabr, DABRX_KERNEL | DABRX_USER))
+		return -1;
+	return 0;
+}
+
+int64_t beat_get_term_char(u64 vterm, u64 *len, u64 *t1, u64 *t2)
+{
+	u64 db[2];
+	s64 ret;
+
+	ret = beat_get_characters_from_console(vterm, len, (u8 *)db);
+	if (ret == 0) {
+		*t1 = db[0];
+		*t2 = db[1];
+	}
+	return ret;
+}
+EXPORT_SYMBOL(beat_get_term_char);
+
+int64_t beat_put_term_char(u64 vterm, u64 len, u64 t1, u64 t2)
+{
+	u64 db[2];
+
+	db[0] = t1;
+	db[1] = t2;
+	return beat_put_characters_to_console(vterm, len, (u8 *)db);
+}
+EXPORT_SYMBOL(beat_put_term_char);
+
+void beat_power_save(void)
+{
+	beat_pause(0);
+}
+
+#ifdef CONFIG_KEXEC
+void beat_kexec_cpu_down(int crash, int secondary)
+{
+	beatic_deinit_IRQ();
+}
+#endif
+
+static irqreturn_t beat_power_event(int virq, void *arg)
+{
+	printk(KERN_DEBUG "Beat: power button pressed\n");
+	beat_pm_poweroff_flag = 1;
+	ctrl_alt_del();
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t beat_reset_event(int virq, void *arg)
+{
+	printk(KERN_DEBUG "Beat: reset button pressed\n");
+	beat_pm_poweroff_flag = 0;
+	ctrl_alt_del();
+	return IRQ_HANDLED;
+}
+
+static struct beat_event_list {
+	const char *typecode;
+	irq_handler_t handler;
+	unsigned int virq;
+} beat_event_list[] = {
+	{ "power", beat_power_event, 0 },
+	{ "reset", beat_reset_event, 0 },
+};
+
+static int __init beat_register_event(void)
+{
+	u64 path[4], data[2];
+	int rc, i;
+	unsigned int virq;
+
+	for (i = 0; i < ARRAY_SIZE(beat_event_list); i++) {
+		struct beat_event_list *ev = &beat_event_list[i];
+
+		if (beat_construct_event_receive_port(data) != 0) {
+			printk(KERN_ERR "Beat: "
+			       "cannot construct event receive port for %s\n",
+			       ev->typecode);
+			return -EINVAL;
+		}
+
+		virq = irq_create_mapping(NULL, data[0]);
+		if (virq == NO_IRQ) {
+			printk(KERN_ERR "Beat: failed to get virtual IRQ"
+			       " for event receive port for %s\n",
+			       ev->typecode);
+			beat_destruct_event_receive_port(data[0]);
+			return -EIO;
+		}
+		ev->virq = virq;
+
+		rc = request_irq(virq, ev->handler, IRQF_DISABLED,
+				      ev->typecode, NULL);
+		if (rc != 0) {
+			printk(KERN_ERR "Beat: failed to request virtual IRQ"
+			       " for event receive port for %s\n",
+			       ev->typecode);
+			beat_destruct_event_receive_port(data[0]);
+			return rc;
+		}
+
+		path[0] = 0x1000000065780000ul;	/* 1,ex */
+		path[1] = 0x627574746f6e0000ul;	/* button */
+		path[2] = 0;
+		strncpy((char *)&path[2], ev->typecode, 8);
+		path[3] = 0;
+		data[1] = 0;
+
+		beat_create_repository_node(path, data);
+	}
+	return 0;
+}
+
+static int __init beat_event_init(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_BEAT))
+		return -EINVAL;
+
+	beat_pm_poweroff_flag = 0;
+	return beat_register_event();
+}
+
+device_initcall(beat_event_init);
diff --git a/arch/powerpc/platforms/cell/beat.h b/arch/powerpc/platforms/cell/beat.h
new file mode 100644
index 0000000..32c8efc
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat.h
@@ -0,0 +1,39 @@
+/*
+ * Guest OS Interfaces.
+ *
+ * (C) Copyright 2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _CELLEB_BEAT_H
+#define _CELLEB_BEAT_H
+
+int64_t beat_get_term_char(uint64_t, uint64_t *, uint64_t *, uint64_t *);
+int64_t beat_put_term_char(uint64_t, uint64_t, uint64_t, uint64_t);
+int64_t beat_repository_encode(int, const char *, uint64_t[4]);
+void beat_restart(char *);
+void beat_power_off(void);
+void beat_halt(void);
+int beat_set_rtc_time(struct rtc_time *);
+void beat_get_rtc_time(struct rtc_time *);
+ssize_t beat_nvram_get_size(void);
+ssize_t beat_nvram_read(char *, size_t, loff_t *);
+ssize_t beat_nvram_write(char *, size_t, loff_t *);
+int beat_set_xdabr(unsigned long);
+void beat_power_save(void);
+void beat_kexec_cpu_down(int, int);
+
+#endif /* _CELLEB_BEAT_H */
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
new file mode 100644
index 0000000..2e67bd8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -0,0 +1,441 @@
+/*
+ * "Cell Reference Set" HTAB support.
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/pseries/lpar.c:
+ *  Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG_LOW
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+
+#include "beat_wrapper.h"
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while (0)
+#else
+#define DBG_LOW(fmt...) do { } while (0)
+#endif
+
+static DEFINE_SPINLOCK(beat_htab_lock);
+
+static inline unsigned int beat_read_mask(unsigned hpte_group)
+{
+	unsigned long hpte_v[5];
+	unsigned long rmask = 0;
+
+	beat_read_htab_entries(0, hpte_group + 0, hpte_v);
+	if (!(hpte_v[0] & HPTE_V_BOLTED))
+		rmask |= 0x8000;
+	if (!(hpte_v[1] & HPTE_V_BOLTED))
+		rmask |= 0x4000;
+	if (!(hpte_v[2] & HPTE_V_BOLTED))
+		rmask |= 0x2000;
+	if (!(hpte_v[3] & HPTE_V_BOLTED))
+		rmask |= 0x1000;
+	beat_read_htab_entries(0, hpte_group + 4, hpte_v);
+	if (!(hpte_v[0] & HPTE_V_BOLTED))
+		rmask |= 0x0800;
+	if (!(hpte_v[1] & HPTE_V_BOLTED))
+		rmask |= 0x0400;
+	if (!(hpte_v[2] & HPTE_V_BOLTED))
+		rmask |= 0x0200;
+	if (!(hpte_v[3] & HPTE_V_BOLTED))
+		rmask |= 0x0100;
+	hpte_group = ~hpte_group & (htab_hash_mask * HPTES_PER_GROUP);
+	beat_read_htab_entries(0, hpte_group + 0, hpte_v);
+	if (!(hpte_v[0] & HPTE_V_BOLTED))
+		rmask |= 0x80;
+	if (!(hpte_v[1] & HPTE_V_BOLTED))
+		rmask |= 0x40;
+	if (!(hpte_v[2] & HPTE_V_BOLTED))
+		rmask |= 0x20;
+	if (!(hpte_v[3] & HPTE_V_BOLTED))
+		rmask |= 0x10;
+	beat_read_htab_entries(0, hpte_group + 4, hpte_v);
+	if (!(hpte_v[0] & HPTE_V_BOLTED))
+		rmask |= 0x08;
+	if (!(hpte_v[1] & HPTE_V_BOLTED))
+		rmask |= 0x04;
+	if (!(hpte_v[2] & HPTE_V_BOLTED))
+		rmask |= 0x02;
+	if (!(hpte_v[3] & HPTE_V_BOLTED))
+		rmask |= 0x01;
+	return rmask;
+}
+
+static long beat_lpar_hpte_insert(unsigned long hpte_group,
+				  unsigned long va, unsigned long pa,
+				  unsigned long rflags, unsigned long vflags,
+				  int psize, int ssize)
+{
+	unsigned long lpar_rc;
+	unsigned long slot;
+	unsigned long hpte_v, hpte_r;
+
+	/* same as iseries */
+	if (vflags & HPTE_V_SECONDARY)
+		return -1;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+			"rflags=%lx, vflags=%lx, psize=%d)\n",
+		hpte_group, va, pa, rflags, vflags, psize);
+
+	hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) |
+		vflags | HPTE_V_VALID;
+	hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+	if (rflags & _PAGE_NO_CACHE)
+		hpte_r &= ~_PAGE_COHERENT;
+
+	spin_lock(&beat_htab_lock);
+	lpar_rc = beat_read_mask(hpte_group);
+	if (lpar_rc == 0) {
+		if (!(vflags & HPTE_V_BOLTED))
+			DBG_LOW(" full\n");
+		spin_unlock(&beat_htab_lock);
+		return -1;
+	}
+
+	lpar_rc = beat_insert_htab_entry(0, hpte_group, lpar_rc << 48,
+		hpte_v, hpte_r, &slot);
+	spin_unlock(&beat_htab_lock);
+
+	/*
+	 * Since we try and ioremap PHBs we don't own, the pte insert
+	 * will fail. However we must catch the failure in hash_page
+	 * or we will loop forever, so return -2 in this case.
+	 */
+	if (unlikely(lpar_rc != 0)) {
+		if (!(vflags & HPTE_V_BOLTED))
+			DBG_LOW(" lpar err %lx\n", lpar_rc);
+		return -2;
+	}
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW(" -> slot: %lx\n", slot);
+
+	/* We have to pass down the secondary bucket bit here as well */
+	return (slot ^ hpte_group) & 15;
+}
+
+static long beat_lpar_hpte_remove(unsigned long hpte_group)
+{
+	DBG_LOW("hpte_remove(group=%lx)\n", hpte_group);
+	return -1;
+}
+
+static unsigned long beat_lpar_hpte_getword0(unsigned long slot)
+{
+	unsigned long dword0, dword[5];
+	unsigned long lpar_rc;
+
+	lpar_rc = beat_read_htab_entries(0, slot & ~3UL, dword);
+
+	dword0 = dword[slot&3];
+
+	BUG_ON(lpar_rc != 0);
+
+	return dword0;
+}
+
+static void beat_lpar_hptab_clear(void)
+{
+	unsigned long size_bytes = 1UL << ppc64_pft_size;
+	unsigned long hpte_count = size_bytes >> 4;
+	int i;
+	unsigned long dummy0, dummy1;
+
+	/* TODO: Use bulk call */
+	for (i = 0; i < hpte_count; i++)
+		beat_write_htab_entry(0, i, 0, 0, -1UL, -1UL, &dummy0, &dummy1);
+}
+
+/*
+ * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
+ * the low 3 bits of flags happen to line up.  So no transform is needed.
+ * We can probably optimize here and assume the high bits of newpp are
+ * already zero.  For now I am paranoid.
+ */
+static long beat_lpar_hpte_updatepp(unsigned long slot,
+				    unsigned long newpp,
+				    unsigned long va,
+				    int psize, int ssize, int local)
+{
+	unsigned long lpar_rc;
+	unsigned long dummy0, dummy1, want_v;
+
+	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+
+	DBG_LOW("    update: "
+		"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
+		want_v & HPTE_V_AVPN, slot, psize, newpp);
+
+	spin_lock(&beat_htab_lock);
+	dummy0 = beat_lpar_hpte_getword0(slot);
+	if ((dummy0 & ~0x7FUL) != (want_v & ~0x7FUL)) {
+		DBG_LOW("not found !\n");
+		spin_unlock(&beat_htab_lock);
+		return -1;
+	}
+
+	lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0,
+					&dummy1);
+	spin_unlock(&beat_htab_lock);
+	if (lpar_rc != 0 || dummy0 == 0) {
+		DBG_LOW("not found !\n");
+		return -1;
+	}
+
+	DBG_LOW("ok %lx %lx\n", dummy0, dummy1);
+
+	BUG_ON(lpar_rc != 0);
+
+	return 0;
+}
+
+static long beat_lpar_hpte_find(unsigned long va, int psize)
+{
+	unsigned long hash;
+	unsigned long i, j;
+	long slot;
+	unsigned long want_v, hpte_v;
+
+	hash = hpt_hash(va, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+
+	for (j = 0; j < 2; j++) {
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		for (i = 0; i < HPTES_PER_GROUP; i++) {
+			hpte_v = beat_lpar_hpte_getword0(slot);
+
+			if (HPTE_V_COMPARE(hpte_v, want_v)
+			    && (hpte_v & HPTE_V_VALID)
+			    && (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
+				/* HPTE matches */
+				if (j)
+					slot = -slot;
+				return slot;
+			}
+			++slot;
+		}
+		hash = ~hash;
+	}
+
+	return -1;
+}
+
+static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
+					  unsigned long ea,
+					  int psize, int ssize)
+{
+	unsigned long lpar_rc, slot, vsid, va, dummy0, dummy1;
+
+	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
+	va = (vsid << 28) | (ea & 0x0fffffff);
+
+	spin_lock(&beat_htab_lock);
+	slot = beat_lpar_hpte_find(va, psize);
+	BUG_ON(slot == -1);
+
+	lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7,
+		&dummy0, &dummy1);
+	spin_unlock(&beat_htab_lock);
+
+	BUG_ON(lpar_rc != 0);
+}
+
+static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
+					 int psize, int ssize, int local)
+{
+	unsigned long want_v;
+	unsigned long lpar_rc;
+	unsigned long dummy1, dummy2;
+	unsigned long flags;
+
+	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
+		slot, va, psize, local);
+	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+
+	spin_lock_irqsave(&beat_htab_lock, flags);
+	dummy1 = beat_lpar_hpte_getword0(slot);
+
+	if ((dummy1 & ~0x7FUL) != (want_v & ~0x7FUL)) {
+		DBG_LOW("not found !\n");
+		spin_unlock_irqrestore(&beat_htab_lock, flags);
+		return;
+	}
+
+	lpar_rc = beat_write_htab_entry(0, slot, 0, 0, HPTE_V_VALID, 0,
+		&dummy1, &dummy2);
+	spin_unlock_irqrestore(&beat_htab_lock, flags);
+
+	BUG_ON(lpar_rc != 0);
+}
+
+void __init hpte_init_beat(void)
+{
+	ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate;
+	ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp;
+	ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
+	ppc_md.hpte_insert	= beat_lpar_hpte_insert;
+	ppc_md.hpte_remove	= beat_lpar_hpte_remove;
+	ppc_md.hpte_clear_all	= beat_lpar_hptab_clear;
+}
+
+static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
+				  unsigned long va, unsigned long pa,
+				  unsigned long rflags, unsigned long vflags,
+				  int psize, int ssize)
+{
+	unsigned long lpar_rc;
+	unsigned long slot;
+	unsigned long hpte_v, hpte_r;
+
+	/* same as iseries */
+	if (vflags & HPTE_V_SECONDARY)
+		return -1;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+			"rflags=%lx, vflags=%lx, psize=%d)\n",
+		hpte_group, va, pa, rflags, vflags, psize);
+
+	hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) |
+		vflags | HPTE_V_VALID;
+	hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+	if (rflags & _PAGE_NO_CACHE)
+		hpte_r &= ~_PAGE_COHERENT;
+
+	/* insert into not-volted entry */
+	lpar_rc = beat_insert_htab_entry3(0, hpte_group, hpte_v, hpte_r,
+		HPTE_V_BOLTED, 0, &slot);
+	/*
+	 * Since we try and ioremap PHBs we don't own, the pte insert
+	 * will fail. However we must catch the failure in hash_page
+	 * or we will loop forever, so return -2 in this case.
+	 */
+	if (unlikely(lpar_rc != 0)) {
+		if (!(vflags & HPTE_V_BOLTED))
+			DBG_LOW(" lpar err %lx\n", lpar_rc);
+		return -2;
+	}
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW(" -> slot: %lx\n", slot);
+
+	/* We have to pass down the secondary bucket bit here as well */
+	return (slot ^ hpte_group) & 15;
+}
+
+/*
+ * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
+ * the low 3 bits of flags happen to line up.  So no transform is needed.
+ * We can probably optimize here and assume the high bits of newpp are
+ * already zero.  For now I am paranoid.
+ */
+static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
+				    unsigned long newpp,
+				    unsigned long va,
+				    int psize, int ssize, int local)
+{
+	unsigned long lpar_rc;
+	unsigned long want_v;
+	unsigned long pss;
+
+	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
+
+	DBG_LOW("    update: "
+		"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
+		want_v & HPTE_V_AVPN, slot, psize, newpp);
+
+	lpar_rc = beat_update_htab_permission3(0, slot, want_v, pss, 7, newpp);
+
+	if (lpar_rc == 0xfffffff7) {
+		DBG_LOW("not found !\n");
+		return -1;
+	}
+
+	DBG_LOW("ok\n");
+
+	BUG_ON(lpar_rc != 0);
+
+	return 0;
+}
+
+static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long va,
+					 int psize, int ssize, int local)
+{
+	unsigned long want_v;
+	unsigned long lpar_rc;
+	unsigned long pss;
+
+	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
+		slot, va, psize, local);
+	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
+
+	lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss);
+
+	/* E_busy can be valid output: page may be already replaced */
+	BUG_ON(lpar_rc != 0 && lpar_rc != 0xfffffff7);
+}
+
+static int64_t _beat_lpar_hptab_clear_v3(void)
+{
+	return beat_clear_htab3(0);
+}
+
+static void beat_lpar_hptab_clear_v3(void)
+{
+	_beat_lpar_hptab_clear_v3();
+}
+
+void __init hpte_init_beat_v3(void)
+{
+	if (_beat_lpar_hptab_clear_v3() == 0) {
+		ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate_v3;
+		ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp_v3;
+		ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
+		ppc_md.hpte_insert	= beat_lpar_hpte_insert_v3;
+		ppc_md.hpte_remove	= beat_lpar_hpte_remove;
+		ppc_md.hpte_clear_all	= beat_lpar_hptab_clear_v3;
+	} else {
+		ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate;
+		ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp;
+		ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
+		ppc_md.hpte_insert	= beat_lpar_hpte_insert;
+		ppc_md.hpte_remove	= beat_lpar_hpte_remove;
+		ppc_md.hpte_clear_all	= beat_lpar_hptab_clear;
+	}
+}
diff --git a/arch/powerpc/platforms/cell/beat_hvCall.S b/arch/powerpc/platforms/cell/beat_hvCall.S
new file mode 100644
index 0000000..74c8174
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_hvCall.S
@@ -0,0 +1,287 @@
+/*
+ * Beat hypervisor call I/F
+ *
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/pseries/hvCall.S.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <asm/ppc_asm.h>
+
+#define	STK_PARM(i)	(48 + ((i)-3)*8)
+
+/* Not implemented on Beat, now */
+#define	HCALL_INST_PRECALL
+#define	HCALL_INST_POSTCALL
+
+	.text
+
+#define	HVSC	.long	0x44000022
+
+/* Note: takes only 7 input parameters at maximum */
+_GLOBAL(beat_hcall_norets)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	mr	r11,r3
+	mr	r3,r4
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes 8 input parameters at maximum */
+_GLOBAL(beat_hcall_norets8)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	mr	r11,r3
+	mr	r3,r4
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	ld	r10,STK_PARM(r10)(r1)
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 1 output parameters at maximum */
+_GLOBAL(beat_hcall1)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARM(r4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARM(r4)(r1)
+	std	r4,  0(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 2 output parameters at maximum */
+_GLOBAL(beat_hcall2)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARM(r4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARM(r4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 3 output parameters at maximum */
+_GLOBAL(beat_hcall3)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARM(r4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARM(r4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 4 output parameters at maximum */
+_GLOBAL(beat_hcall4)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARM(r4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARM(r4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 5 output parameters at maximum */
+_GLOBAL(beat_hcall5)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARM(r4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARM(r4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+	std	r8, 32(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 6 output parameters at maximum */
+_GLOBAL(beat_hcall6)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARM(r4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARM(r4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+	std	r8, 32(r12)
+	std	r9, 40(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
new file mode 100644
index 0000000..192a935
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -0,0 +1,283 @@
+/*
+ * Celleb/Beat Interrupt controller
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+
+#include <asm/machdep.h>
+
+#include "beat_interrupt.h"
+#include "beat_wrapper.h"
+
+#define	MAX_IRQS	NR_IRQS
+static DEFINE_SPINLOCK(beatic_irq_mask_lock);
+static uint64_t	beatic_irq_mask_enable[(MAX_IRQS+255)/64];
+static uint64_t	beatic_irq_mask_ack[(MAX_IRQS+255)/64];
+
+static struct irq_host *beatic_host;
+
+/*
+ * In this implementation, "virq" == "IRQ plug number",
+ * "(irq_hw_number_t)hwirq" == "IRQ outlet number".
+ */
+
+/* assumption: locked */
+static inline void beatic_update_irq_mask(unsigned int irq_plug)
+{
+	int off;
+	unsigned long masks[4];
+
+	off = (irq_plug / 256) * 4;
+	masks[0] = beatic_irq_mask_enable[off + 0]
+		& beatic_irq_mask_ack[off + 0];
+	masks[1] = beatic_irq_mask_enable[off + 1]
+		& beatic_irq_mask_ack[off + 1];
+	masks[2] = beatic_irq_mask_enable[off + 2]
+		& beatic_irq_mask_ack[off + 2];
+	masks[3] = beatic_irq_mask_enable[off + 3]
+		& beatic_irq_mask_ack[off + 3];
+	if (beat_set_interrupt_mask(irq_plug&~255UL,
+		masks[0], masks[1], masks[2], masks[3]) != 0)
+		panic("Failed to set mask IRQ!");
+}
+
+static void beatic_mask_irq(unsigned int irq_plug)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+	beatic_irq_mask_enable[irq_plug/64] &= ~(1UL << (63 - (irq_plug%64)));
+	beatic_update_irq_mask(irq_plug);
+	spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static void beatic_unmask_irq(unsigned int irq_plug)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+	beatic_irq_mask_enable[irq_plug/64] |= 1UL << (63 - (irq_plug%64));
+	beatic_update_irq_mask(irq_plug);
+	spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static void beatic_ack_irq(unsigned int irq_plug)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+	beatic_irq_mask_ack[irq_plug/64] &= ~(1UL << (63 - (irq_plug%64)));
+	beatic_update_irq_mask(irq_plug);
+	spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static void beatic_end_irq(unsigned int irq_plug)
+{
+	s64 err;
+	unsigned long flags;
+
+	err = beat_downcount_of_interrupt(irq_plug);
+	if (err != 0) {
+		if ((err & 0xFFFFFFFF) != 0xFFFFFFF5) /* -11: wrong state */
+			panic("Failed to downcount IRQ! Error = %16lx", err);
+
+		printk(KERN_ERR "IRQ over-downcounted, plug %d\n", irq_plug);
+	}
+	spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+	beatic_irq_mask_ack[irq_plug/64] |= 1UL << (63 - (irq_plug%64));
+	beatic_update_irq_mask(irq_plug);
+	spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static struct irq_chip beatic_pic = {
+	.typename = " CELL-BEAT ",
+	.unmask = beatic_unmask_irq,
+	.mask = beatic_mask_irq,
+	.eoi = beatic_end_irq,
+};
+
+/*
+ * Dispose binding hardware IRQ number (hw) and Virtuql IRQ number (virq),
+ * update flags.
+ *
+ * Note that the number (virq) is already assigned at upper layer.
+ */
+static void beatic_pic_host_unmap(struct irq_host *h, unsigned int virq)
+{
+	beat_destruct_irq_plug(virq);
+}
+
+/*
+ * Create or update binding hardware IRQ number (hw) and Virtuql
+ * IRQ number (virq). This is called only once for a given mapping.
+ *
+ * Note that the number (virq) is already assigned at upper layer.
+ */
+static int beatic_pic_host_map(struct irq_host *h, unsigned int virq,
+			       irq_hw_number_t hw)
+{
+	struct irq_desc *desc = get_irq_desc(virq);
+	int64_t	err;
+
+	err = beat_construct_and_connect_irq_plug(virq, hw);
+	if (err < 0)
+		return -EIO;
+
+	desc->status |= IRQ_LEVEL;
+	set_irq_chip_and_handler(virq, &beatic_pic, handle_fasteoi_irq);
+	return 0;
+}
+
+/*
+ * Update binding hardware IRQ number (hw) and Virtuql
+ * IRQ number (virq). This is called only once for a given mapping.
+ */
+static void beatic_pic_host_remap(struct irq_host *h, unsigned int virq,
+			       irq_hw_number_t hw)
+{
+	beat_construct_and_connect_irq_plug(virq, hw);
+}
+
+/*
+ * Translate device-tree interrupt spec to irq_hw_number_t style (ulong),
+ * to pass away to irq_create_mapping().
+ *
+ * Called from irq_create_of_mapping() only.
+ * Note: We have only 1 entry to translate.
+ */
+static int beatic_pic_host_xlate(struct irq_host *h, struct device_node *ct,
+				 u32 *intspec, unsigned int intsize,
+				 irq_hw_number_t *out_hwirq,
+				 unsigned int *out_flags)
+{
+	u64 *intspec2 = (u64 *)intspec;
+
+	*out_hwirq = *intspec2;
+	*out_flags |= IRQ_TYPE_LEVEL_LOW;
+	return 0;
+}
+
+static int beatic_pic_host_match(struct irq_host *h, struct device_node *np)
+{
+	/* Match all */
+	return 1;
+}
+
+static struct irq_host_ops beatic_pic_host_ops = {
+	.map = beatic_pic_host_map,
+	.remap = beatic_pic_host_remap,
+	.unmap = beatic_pic_host_unmap,
+	.xlate = beatic_pic_host_xlate,
+	.match = beatic_pic_host_match,
+};
+
+/*
+ * Get an IRQ number
+ * Note: returns VIRQ
+ */
+static inline unsigned int beatic_get_irq_plug(void)
+{
+	int i;
+	uint64_t	pending[4], ub;
+
+	for (i = 0; i < MAX_IRQS; i += 256) {
+		beat_detect_pending_interrupts(i, pending);
+		__asm__ ("cntlzd %0,%1":"=r"(ub):
+			"r"(pending[0] & beatic_irq_mask_enable[i/64+0]
+				       & beatic_irq_mask_ack[i/64+0]));
+		if (ub != 64)
+			return i + ub + 0;
+		__asm__ ("cntlzd %0,%1":"=r"(ub):
+			"r"(pending[1] & beatic_irq_mask_enable[i/64+1]
+				       & beatic_irq_mask_ack[i/64+1]));
+		if (ub != 64)
+			return i + ub + 64;
+		__asm__ ("cntlzd %0,%1":"=r"(ub):
+			"r"(pending[2] & beatic_irq_mask_enable[i/64+2]
+				       & beatic_irq_mask_ack[i/64+2]));
+		if (ub != 64)
+			return i + ub + 128;
+		__asm__ ("cntlzd %0,%1":"=r"(ub):
+			"r"(pending[3] & beatic_irq_mask_enable[i/64+3]
+				       & beatic_irq_mask_ack[i/64+3]));
+		if (ub != 64)
+			return i + ub + 192;
+	}
+
+	return NO_IRQ;
+}
+unsigned int beatic_get_irq(void)
+{
+	unsigned int ret;
+
+	ret = beatic_get_irq_plug();
+	if (ret != NO_IRQ)
+		beatic_ack_irq(ret);
+	return ret;
+}
+
+/*
+ */
+void __init beatic_init_IRQ(void)
+{
+	int	i;
+
+	memset(beatic_irq_mask_enable, 0, sizeof(beatic_irq_mask_enable));
+	memset(beatic_irq_mask_ack, 255, sizeof(beatic_irq_mask_ack));
+	for (i = 0; i < MAX_IRQS; i += 256)
+		beat_set_interrupt_mask(i, 0L, 0L, 0L, 0L);
+
+	/* Set out get_irq function */
+	ppc_md.get_irq = beatic_get_irq;
+
+	/* Allocate an irq host */
+	beatic_host = irq_alloc_host(NULL, IRQ_HOST_MAP_NOMAP, 0,
+				     &beatic_pic_host_ops,
+					 0);
+	BUG_ON(beatic_host == NULL);
+	irq_set_default_host(beatic_host);
+}
+
+#ifdef CONFIG_SMP
+
+/* Nullified to compile with SMP mode */
+void beatic_setup_cpu(int cpu)
+{
+}
+
+void beatic_cause_IPI(int cpu, int mesg)
+{
+}
+
+void beatic_request_IPIs(void)
+{
+}
+#endif /* CONFIG_SMP */
+
+void beatic_deinit_IRQ(void)
+{
+	int	i;
+
+	for (i = 1; i < NR_IRQS; i++)
+		beat_destruct_irq_plug(i);
+}
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
new file mode 100644
index 0000000..b470fd0
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_interrupt.h
@@ -0,0 +1,33 @@
+/*
+ * Celleb/Beat Interrupt controller
+ *
+ * (C) Copyright 2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef ASM_BEAT_PIC_H
+#define ASM_BEAT_PIC_H
+#ifdef __KERNEL__
+
+extern void beatic_init_IRQ(void);
+extern unsigned int beatic_get_irq(void);
+extern void beatic_cause_IPI(int cpu, int mesg);
+extern void beatic_request_IPIs(void);
+extern void beatic_setup_cpu(int);
+extern void beatic_deinit_IRQ(void);
+
+#endif
+#endif /* ASM_BEAT_PIC_H */
diff --git a/arch/powerpc/platforms/cell/beat_iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c
new file mode 100644
index 0000000..93b0efd
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_iommu.c
@@ -0,0 +1,116 @@
+/*
+ * Support for IOMMU on Celleb platform.
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+
+#include "beat_wrapper.h"
+
+#define DMA_FLAGS 0xf800000000000000UL	/* r/w permitted, coherency required,
+					   strongest order */
+
+static int __init find_dma_window(u64 *io_space_id, u64 *ioid,
+				  u64 *base, u64 *size, u64 *io_page_size)
+{
+	struct device_node *dn;
+	const unsigned long *dma_window;
+
+	for_each_node_by_type(dn, "ioif") {
+		dma_window = of_get_property(dn, "toshiba,dma-window", NULL);
+		if (dma_window) {
+			*io_space_id = (dma_window[0] >> 32) & 0xffffffffUL;
+			*ioid = dma_window[0] & 0x7ffUL;
+			*base = dma_window[1];
+			*size = dma_window[2];
+			*io_page_size = 1 << dma_window[3];
+			of_node_put(dn);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static unsigned long celleb_dma_direct_offset;
+
+static void __init celleb_init_direct_mapping(void)
+{
+	u64 lpar_addr, io_addr;
+	u64 io_space_id, ioid, dma_base, dma_size, io_page_size;
+
+	if (!find_dma_window(&io_space_id, &ioid, &dma_base, &dma_size,
+			     &io_page_size)) {
+		pr_info("No dma window found !\n");
+		return;
+	}
+
+	for (lpar_addr = 0; lpar_addr < dma_size; lpar_addr += io_page_size) {
+		io_addr = lpar_addr + dma_base;
+		(void)beat_put_iopte(io_space_id, io_addr, lpar_addr,
+				     ioid, DMA_FLAGS);
+	}
+
+	celleb_dma_direct_offset = dma_base;
+}
+
+static void celleb_dma_dev_setup(struct device *dev)
+{
+	dev->archdata.dma_ops = get_pci_dma_ops();
+	dev->archdata.dma_data = (void *)celleb_dma_direct_offset;
+}
+
+static void celleb_pci_dma_dev_setup(struct pci_dev *pdev)
+{
+	celleb_dma_dev_setup(&pdev->dev);
+}
+
+static int celleb_of_bus_notify(struct notifier_block *nb,
+				unsigned long action, void *data)
+{
+	struct device *dev = data;
+
+	/* We are only intereted in device addition */
+	if (action != BUS_NOTIFY_ADD_DEVICE)
+		return 0;
+
+	celleb_dma_dev_setup(dev);
+
+	return 0;
+}
+
+static struct notifier_block celleb_of_bus_notifier = {
+	.notifier_call = celleb_of_bus_notify
+};
+
+static int __init celleb_init_iommu(void)
+{
+	celleb_init_direct_mapping();
+	set_pci_dma_ops(&dma_direct_ops);
+	ppc_md.pci_dma_dev_setup = celleb_pci_dma_dev_setup;
+	bus_register_notifier(&of_platform_bus_type, &celleb_of_bus_notifier);
+
+	return 0;
+}
+
+machine_arch_initcall(celleb_beat, celleb_init_iommu);
diff --git a/arch/powerpc/platforms/cell/beat_smp.c b/arch/powerpc/platforms/cell/beat_smp.c
new file mode 100644
index 0000000..26efc20
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_smp.c
@@ -0,0 +1,124 @@
+/*
+ * SMP support for Celleb platform. (Incomplete)
+ *
+ * (C) Copyright 2006 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/cell/smp.c:
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ * Plus various changes from other IBM teams...
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/threads.h>
+#include <linux/cpu.h>
+
+#include <asm/irq.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+
+#include "beat_interrupt.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/*
+ * The primary thread of each non-boot processor is recorded here before
+ * smp init.
+ */
+/* static cpumask_t of_spin_map; */
+
+/**
+ * smp_startup_cpu() - start the given cpu
+ *
+ * At boot time, there is nothing to do for primary threads which were
+ * started from Open Firmware.  For anything else, call RTAS with the
+ * appropriate start location.
+ *
+ * Returns:
+ *	0	- failure
+ *	1	- success
+ */
+static inline int __devinit smp_startup_cpu(unsigned int lcpu)
+{
+	return 0;
+}
+
+static void smp_beatic_message_pass(int target, int msg)
+{
+	unsigned int i;
+
+	if (target < NR_CPUS) {
+		beatic_cause_IPI(target, msg);
+	} else {
+		for_each_online_cpu(i) {
+			if (target == MSG_ALL_BUT_SELF
+			    && i == smp_processor_id())
+				continue;
+			beatic_cause_IPI(i, msg);
+		}
+	}
+}
+
+static int __init smp_beatic_probe(void)
+{
+	return cpus_weight(cpu_possible_map);
+}
+
+static void __devinit smp_beatic_setup_cpu(int cpu)
+{
+	beatic_setup_cpu(cpu);
+}
+
+static void __devinit smp_celleb_kick_cpu(int nr)
+{
+	BUG_ON(nr < 0 || nr >= NR_CPUS);
+
+	if (!smp_startup_cpu(nr))
+		return;
+}
+
+static int smp_celleb_cpu_bootable(unsigned int nr)
+{
+	return 1;
+}
+static struct smp_ops_t bpa_beatic_smp_ops = {
+	.message_pass	= smp_beatic_message_pass,
+	.probe		= smp_beatic_probe,
+	.kick_cpu	= smp_celleb_kick_cpu,
+	.setup_cpu	= smp_beatic_setup_cpu,
+	.cpu_bootable	= smp_celleb_cpu_bootable,
+};
+
+/* This is called very early */
+void __init smp_init_celleb(void)
+{
+	DBG(" -> smp_init_celleb()\n");
+
+	smp_ops = &bpa_beatic_smp_ops;
+
+	DBG(" <- smp_init_celleb()\n");
+}
diff --git a/arch/powerpc/platforms/cell/beat_spu_priv1.c b/arch/powerpc/platforms/cell/beat_spu_priv1.c
new file mode 100644
index 0000000..bcc17f7
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_spu_priv1.c
@@ -0,0 +1,207 @@
+/*
+ * spu hypervisor abstraction for Beat
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/module.h>
+
+#include <asm/types.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+
+#include "beat_wrapper.h"
+
+static inline void _int_mask_set(struct spu *spu, int class, u64 mask)
+{
+	spu->shadow_int_mask_RW[class] = mask;
+	beat_set_irq_mask_for_spe(spu->spe_id, class, mask);
+}
+
+static inline u64 _int_mask_get(struct spu *spu, int class)
+{
+	return spu->shadow_int_mask_RW[class];
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+	_int_mask_set(spu, class, mask);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+	return _int_mask_get(spu, class);
+}
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+	old_mask = _int_mask_get(spu, class);
+	_int_mask_set(spu, class, old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+	old_mask = _int_mask_get(spu, class);
+	_int_mask_set(spu, class, old_mask | mask);
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+	beat_clear_interrupt_status_of_spe(spu->spe_id, class, stat);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+	u64 int_stat;
+	beat_get_interrupt_status_of_spe(spu->spe_id, class, &int_stat);
+	return int_stat;
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+	return;
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+	u64 dar;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_dar_RW), &dar);
+	return dar;
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+	u64 dsisr;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_dsisr_RW), &dsisr);
+	return dsisr;
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_dsisr_RW), dsisr);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+	return;
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_sr1_RW), sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+	u64 sr1;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_sr1_RW), &sr1);
+	return sr1;
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_tclass_id_RW), tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+	u64 tclass_id;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_tclass_id_RW), &tclass_id);
+	return tclass_id;
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, tlb_invalidate_entry_W), 0ul);
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, resource_allocation_groupID_RW),
+		id);
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+	u64 id;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, resource_allocation_groupID_RW),
+		&id);
+	return id;
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, resource_allocation_enable_RW),
+		enable);
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+	u64 enable;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, resource_allocation_enable_RW),
+		&enable);
+	return enable;
+}
+
+const struct spu_priv1_ops spu_priv1_beat_ops = {
+	.int_mask_and = int_mask_and,
+	.int_mask_or = int_mask_or,
+	.int_mask_set = int_mask_set,
+	.int_mask_get = int_mask_get,
+	.int_stat_clear = int_stat_clear,
+	.int_stat_get = int_stat_get,
+	.cpu_affinity_set = cpu_affinity_set,
+	.mfc_dar_get = mfc_dar_get,
+	.mfc_dsisr_get = mfc_dsisr_get,
+	.mfc_dsisr_set = mfc_dsisr_set,
+	.mfc_sdr_setup = mfc_sdr_setup,
+	.mfc_sr1_set = mfc_sr1_set,
+	.mfc_sr1_get = mfc_sr1_get,
+	.mfc_tclass_id_set = mfc_tclass_id_set,
+	.mfc_tclass_id_get = mfc_tclass_id_get,
+	.tlb_invalidate = tlb_invalidate,
+	.resource_allocation_groupID_set = resource_allocation_groupID_set,
+	.resource_allocation_groupID_get = resource_allocation_groupID_get,
+	.resource_allocation_enable_set = resource_allocation_enable_set,
+	.resource_allocation_enable_get = resource_allocation_enable_get,
+};
diff --git a/arch/powerpc/platforms/cell/beat_syscall.h b/arch/powerpc/platforms/cell/beat_syscall.h
new file mode 100644
index 0000000..8580dc7
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_syscall.h
@@ -0,0 +1,164 @@
+/*
+ * Beat hypervisor call numbers
+ *
+ * (C) Copyright 2004-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef BEAT_BEAT_syscall_H
+#define BEAT_BEAT_syscall_H
+
+#ifdef	__ASSEMBLY__
+#define	__BEAT_ADD_VENDOR_ID(__x, __v)	((__v)<<60|(__x))
+#else
+#define	__BEAT_ADD_VENDOR_ID(__x, __v)	((u64)(__v)<<60|(__x))
+#endif
+#define HV_allocate_memory __BEAT_ADD_VENDOR_ID(0, 0)
+#define HV_construct_virtual_address_space __BEAT_ADD_VENDOR_ID(2, 0)
+#define HV_destruct_virtual_address_space __BEAT_ADD_VENDOR_ID(10, 0)
+#define HV_get_virtual_address_space_id_of_ppe __BEAT_ADD_VENDOR_ID(4, 0)
+#define HV_query_logical_partition_address_region_info 			\
+						__BEAT_ADD_VENDOR_ID(6, 0)
+#define HV_release_memory __BEAT_ADD_VENDOR_ID(13, 0)
+#define HV_select_virtual_address_space __BEAT_ADD_VENDOR_ID(7, 0)
+#define HV_load_range_registers __BEAT_ADD_VENDOR_ID(68, 0)
+#define HV_set_ppe_l2cache_rmt_entry __BEAT_ADD_VENDOR_ID(70, 0)
+#define HV_set_ppe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(71, 0)
+#define HV_set_spe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(72, 0)
+#define HV_get_io_address_translation_fault_info __BEAT_ADD_VENDOR_ID(14, 0)
+#define HV_get_iopte __BEAT_ADD_VENDOR_ID(16, 0)
+#define HV_preload_iopt_cache __BEAT_ADD_VENDOR_ID(17, 0)
+#define HV_put_iopte __BEAT_ADD_VENDOR_ID(15, 0)
+#define HV_connect_event_ports __BEAT_ADD_VENDOR_ID(21, 0)
+#define HV_construct_event_receive_port __BEAT_ADD_VENDOR_ID(18, 0)
+#define HV_destruct_event_receive_port __BEAT_ADD_VENDOR_ID(19, 0)
+#define HV_destruct_event_send_port __BEAT_ADD_VENDOR_ID(22, 0)
+#define HV_get_state_of_event_send_port __BEAT_ADD_VENDOR_ID(25, 0)
+#define HV_request_to_connect_event_ports __BEAT_ADD_VENDOR_ID(20, 0)
+#define HV_send_event_externally __BEAT_ADD_VENDOR_ID(23, 0)
+#define HV_send_event_locally __BEAT_ADD_VENDOR_ID(24, 0)
+#define HV_construct_and_connect_irq_plug __BEAT_ADD_VENDOR_ID(28, 0)
+#define HV_destruct_irq_plug __BEAT_ADD_VENDOR_ID(29, 0)
+#define HV_detect_pending_interrupts __BEAT_ADD_VENDOR_ID(26, 0)
+#define HV_end_of_interrupt __BEAT_ADD_VENDOR_ID(27, 0)
+#define HV_assign_control_signal_notification_port __BEAT_ADD_VENDOR_ID(45, 0)
+#define HV_end_of_control_signal_processing __BEAT_ADD_VENDOR_ID(48, 0)
+#define HV_get_control_signal __BEAT_ADD_VENDOR_ID(46, 0)
+#define HV_set_irq_mask_for_spe __BEAT_ADD_VENDOR_ID(61, 0)
+#define HV_shutdown_logical_partition __BEAT_ADD_VENDOR_ID(44, 0)
+#define HV_connect_message_ports __BEAT_ADD_VENDOR_ID(35, 0)
+#define HV_destruct_message_port __BEAT_ADD_VENDOR_ID(36, 0)
+#define HV_receive_message __BEAT_ADD_VENDOR_ID(37, 0)
+#define HV_get_message_port_info __BEAT_ADD_VENDOR_ID(34, 0)
+#define HV_request_to_connect_message_ports __BEAT_ADD_VENDOR_ID(33, 0)
+#define HV_send_message __BEAT_ADD_VENDOR_ID(32, 0)
+#define HV_get_logical_ppe_id __BEAT_ADD_VENDOR_ID(69, 0)
+#define HV_pause __BEAT_ADD_VENDOR_ID(9, 0)
+#define HV_destruct_shared_memory_handle __BEAT_ADD_VENDOR_ID(51, 0)
+#define HV_get_shared_memory_info __BEAT_ADD_VENDOR_ID(52, 0)
+#define HV_permit_sharing_memory __BEAT_ADD_VENDOR_ID(50, 0)
+#define HV_request_to_attach_shared_memory __BEAT_ADD_VENDOR_ID(49, 0)
+#define HV_enable_logical_spe_execution __BEAT_ADD_VENDOR_ID(55, 0)
+#define HV_construct_logical_spe __BEAT_ADD_VENDOR_ID(53, 0)
+#define HV_disable_logical_spe_execution __BEAT_ADD_VENDOR_ID(56, 0)
+#define HV_destruct_logical_spe __BEAT_ADD_VENDOR_ID(54, 0)
+#define HV_sense_spe_execution_status __BEAT_ADD_VENDOR_ID(58, 0)
+#define HV_insert_htab_entry __BEAT_ADD_VENDOR_ID(101, 0)
+#define HV_read_htab_entries __BEAT_ADD_VENDOR_ID(95, 0)
+#define HV_write_htab_entry __BEAT_ADD_VENDOR_ID(94, 0)
+#define HV_assign_io_address_translation_fault_port 			\
+						__BEAT_ADD_VENDOR_ID(100, 0)
+#define HV_set_interrupt_mask __BEAT_ADD_VENDOR_ID(73, 0)
+#define HV_get_logical_partition_id __BEAT_ADD_VENDOR_ID(74, 0)
+#define HV_create_repository_node2 __BEAT_ADD_VENDOR_ID(90, 0)
+#define HV_create_repository_node __BEAT_ADD_VENDOR_ID(90, 0) /* alias */
+#define HV_get_repository_node_value2 __BEAT_ADD_VENDOR_ID(91, 0)
+#define HV_get_repository_node_value __BEAT_ADD_VENDOR_ID(91, 0) /* alias */
+#define HV_modify_repository_node_value2 __BEAT_ADD_VENDOR_ID(92, 0)
+#define HV_modify_repository_node_value __BEAT_ADD_VENDOR_ID(92, 0) /* alias */
+#define HV_remove_repository_node2 __BEAT_ADD_VENDOR_ID(93, 0)
+#define HV_remove_repository_node __BEAT_ADD_VENDOR_ID(93, 0) /* alias */
+#define HV_cancel_shared_memory __BEAT_ADD_VENDOR_ID(104, 0)
+#define HV_clear_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(206, 0)
+#define HV_construct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(80, 0)
+#define HV_destruct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(81, 0)
+#define HV_disconnect_ipspc_service __BEAT_ADD_VENDOR_ID(88, 0)
+#define HV_execute_ipspc_command __BEAT_ADD_VENDOR_ID(86, 0)
+#define HV_get_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(205, 0)
+#define HV_get_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(208, 0)
+#define HV_permit_use_of_ipspc_service __BEAT_ADD_VENDOR_ID(85, 0)
+#define HV_reinitialize_logical_spe __BEAT_ADD_VENDOR_ID(82, 0)
+#define HV_request_ipspc_service __BEAT_ADD_VENDOR_ID(84, 0)
+#define HV_stop_ipspc_command __BEAT_ADD_VENDOR_ID(87, 0)
+#define HV_set_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(204, 0)
+#define HV_get_status_of_ipspc_service __BEAT_ADD_VENDOR_ID(203, 0)
+#define HV_put_characters_to_console __BEAT_ADD_VENDOR_ID(0x101, 1)
+#define HV_get_characters_from_console __BEAT_ADD_VENDOR_ID(0x102, 1)
+#define HV_get_base_clock __BEAT_ADD_VENDOR_ID(0x111, 1)
+#define HV_set_base_clock __BEAT_ADD_VENDOR_ID(0x112, 1)
+#define HV_get_frame_cycle __BEAT_ADD_VENDOR_ID(0x114, 1)
+#define HV_disable_console __BEAT_ADD_VENDOR_ID(0x115, 1)
+#define HV_disable_all_console __BEAT_ADD_VENDOR_ID(0x116, 1)
+#define HV_oneshot_timer __BEAT_ADD_VENDOR_ID(0x117, 1)
+#define HV_set_dabr __BEAT_ADD_VENDOR_ID(0x118, 1)
+#define HV_get_dabr __BEAT_ADD_VENDOR_ID(0x119, 1)
+#define HV_start_hv_stats __BEAT_ADD_VENDOR_ID(0x21c, 1)
+#define HV_stop_hv_stats __BEAT_ADD_VENDOR_ID(0x21d, 1)
+#define HV_get_hv_stats __BEAT_ADD_VENDOR_ID(0x21e, 1)
+#define HV_get_hv_error_stats __BEAT_ADD_VENDOR_ID(0x221, 1)
+#define HV_get_stats __BEAT_ADD_VENDOR_ID(0x224, 1)
+#define HV_get_heap_stats __BEAT_ADD_VENDOR_ID(0x225, 1)
+#define HV_get_memory_stats __BEAT_ADD_VENDOR_ID(0x227, 1)
+#define HV_get_memory_detail __BEAT_ADD_VENDOR_ID(0x228, 1)
+#define HV_set_priority_of_irq_outlet __BEAT_ADD_VENDOR_ID(0x122, 1)
+#define HV_get_physical_spe_by_reservation_id __BEAT_ADD_VENDOR_ID(0x128, 1)
+#define HV_get_spe_context __BEAT_ADD_VENDOR_ID(0x129, 1)
+#define HV_set_spe_context __BEAT_ADD_VENDOR_ID(0x12a, 1)
+#define HV_downcount_of_interrupt __BEAT_ADD_VENDOR_ID(0x12e, 1)
+#define HV_peek_spe_context __BEAT_ADD_VENDOR_ID(0x12f, 1)
+#define HV_read_bpa_register __BEAT_ADD_VENDOR_ID(0x131, 1)
+#define HV_write_bpa_register __BEAT_ADD_VENDOR_ID(0x132, 1)
+#define HV_map_context_table_of_spe __BEAT_ADD_VENDOR_ID(0x137, 1)
+#define HV_get_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x138, 1)
+#define HV_set_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x139, 1)
+#define HV_init_pm __BEAT_ADD_VENDOR_ID(0x150, 1)
+#define HV_set_pm_signal __BEAT_ADD_VENDOR_ID(0x151, 1)
+#define HV_get_pm_signal __BEAT_ADD_VENDOR_ID(0x152, 1)
+#define HV_set_pm_config __BEAT_ADD_VENDOR_ID(0x153, 1)
+#define HV_get_pm_config __BEAT_ADD_VENDOR_ID(0x154, 1)
+#define HV_get_inner_trace_data __BEAT_ADD_VENDOR_ID(0x155, 1)
+#define HV_set_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x156, 1)
+#define HV_get_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x157, 1)
+#define HV_set_pm_interrupt __BEAT_ADD_VENDOR_ID(0x158, 1)
+#define HV_get_pm_interrupt __BEAT_ADD_VENDOR_ID(0x159, 1)
+#define HV_kick_pm __BEAT_ADD_VENDOR_ID(0x160, 1)
+#define HV_construct_pm_context __BEAT_ADD_VENDOR_ID(0x164, 1)
+#define HV_destruct_pm_context __BEAT_ADD_VENDOR_ID(0x165, 1)
+#define HV_be_slow __BEAT_ADD_VENDOR_ID(0x170, 1)
+#define HV_assign_ipspc_server_connection_status_notification_port 	\
+						__BEAT_ADD_VENDOR_ID(0x173, 1)
+#define HV_get_raid_of_physical_spe __BEAT_ADD_VENDOR_ID(0x174, 1)
+#define HV_set_physical_spe_to_rag __BEAT_ADD_VENDOR_ID(0x175, 1)
+#define HV_release_physical_spe_from_rag __BEAT_ADD_VENDOR_ID(0x176, 1)
+#define HV_rtc_read __BEAT_ADD_VENDOR_ID(0x190, 1)
+#define HV_rtc_write __BEAT_ADD_VENDOR_ID(0x191, 1)
+#define HV_eeprom_read __BEAT_ADD_VENDOR_ID(0x192, 1)
+#define HV_eeprom_write __BEAT_ADD_VENDOR_ID(0x193, 1)
+#define HV_insert_htab_entry3 __BEAT_ADD_VENDOR_ID(0x104, 1)
+#define HV_invalidate_htab_entry3 __BEAT_ADD_VENDOR_ID(0x105, 1)
+#define HV_update_htab_permission3 __BEAT_ADD_VENDOR_ID(0x106, 1)
+#define HV_clear_htab3 __BEAT_ADD_VENDOR_ID(0x107, 1)
+#endif
diff --git a/arch/powerpc/platforms/cell/beat_udbg.c b/arch/powerpc/platforms/cell/beat_udbg.c
new file mode 100644
index 0000000..6b418f6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_udbg.c
@@ -0,0 +1,98 @@
+/*
+ * udbg function for Beat
+ *
+ * (C) Copyright 2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/console.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+
+#include "beat.h"
+
+#define	celleb_vtermno	0
+
+static void udbg_putc_beat(char c)
+{
+	unsigned long rc;
+
+	if (c == '\n')
+		udbg_putc_beat('\r');
+
+	rc = beat_put_term_char(celleb_vtermno, 1, (uint64_t)c << 56, 0);
+}
+
+/* Buffered chars getc */
+static long inbuflen;
+static long inbuf[2];	/* must be 2 longs */
+
+static int udbg_getc_poll_beat(void)
+{
+	/* The interface is tricky because it may return up to 16 chars.
+	 * We save them statically for future calls to udbg_getc().
+	 */
+	char ch, *buf = (char *)inbuf;
+	int i;
+	long rc;
+	if (inbuflen == 0) {
+		/* get some more chars. */
+		inbuflen = 0;
+		rc = beat_get_term_char(celleb_vtermno, &inbuflen,
+					inbuf+0, inbuf+1);
+		if (rc != 0)
+			inbuflen = 0;	/* otherwise inbuflen is garbage */
+	}
+	if (inbuflen <= 0 || inbuflen > 16) {
+		/* Catch error case as well as other oddities (corruption) */
+		inbuflen = 0;
+		return -1;
+	}
+	ch = buf[0];
+	for (i = 1; i < inbuflen; i++)	/* shuffle them down. */
+		buf[i-1] = buf[i];
+	inbuflen--;
+	return ch;
+}
+
+static int udbg_getc_beat(void)
+{
+	int ch;
+	for (;;) {
+		ch = udbg_getc_poll_beat();
+		if (ch == -1) {
+			/* This shouldn't be needed...but... */
+			volatile unsigned long delay;
+			for (delay = 0; delay < 2000000; delay++)
+				;
+		} else {
+			return ch;
+		}
+	}
+}
+
+/* call this from early_init() for a working debug console on
+ * vterm capable LPAR machines
+ */
+void __init udbg_init_debug_beat(void)
+{
+	udbg_putc = udbg_putc_beat;
+	udbg_getc = udbg_getc_beat;
+	udbg_getc_poll = udbg_getc_poll_beat;
+}
diff --git a/arch/powerpc/platforms/cell/beat_wrapper.h b/arch/powerpc/platforms/cell/beat_wrapper.h
new file mode 100644
index 0000000..b47dfda
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_wrapper.h
@@ -0,0 +1,289 @@
+/*
+ * Beat hypervisor call I/F
+ *
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/pseries/plpar_wrapper.h.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef BEAT_HCALL
+#include "beat_syscall.h"
+
+/* defined in hvCall.S */
+extern s64 beat_hcall_norets(u64 opcode, ...);
+extern s64 beat_hcall_norets8(u64 opcode, u64 arg1, u64 arg2, u64 arg3,
+	u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8);
+extern s64 beat_hcall1(u64 opcode, u64 retbuf[1], ...);
+extern s64 beat_hcall2(u64 opcode, u64 retbuf[2], ...);
+extern s64 beat_hcall3(u64 opcode, u64 retbuf[3], ...);
+extern s64 beat_hcall4(u64 opcode, u64 retbuf[4], ...);
+extern s64 beat_hcall5(u64 opcode, u64 retbuf[5], ...);
+extern s64 beat_hcall6(u64 opcode, u64 retbuf[6], ...);
+
+static inline s64 beat_downcount_of_interrupt(u64 plug_id)
+{
+	return beat_hcall_norets(HV_downcount_of_interrupt, plug_id);
+}
+
+static inline s64 beat_set_interrupt_mask(u64 index,
+	u64 val0, u64 val1, u64 val2, u64 val3)
+{
+	return beat_hcall_norets(HV_set_interrupt_mask, index,
+	       val0, val1, val2, val3);
+}
+
+static inline s64 beat_destruct_irq_plug(u64 plug_id)
+{
+	return beat_hcall_norets(HV_destruct_irq_plug, plug_id);
+}
+
+static inline s64 beat_construct_and_connect_irq_plug(u64 plug_id,
+	u64 outlet_id)
+{
+	return beat_hcall_norets(HV_construct_and_connect_irq_plug, plug_id,
+	       outlet_id);
+}
+
+static inline s64 beat_detect_pending_interrupts(u64 index, u64 *retbuf)
+{
+	return beat_hcall4(HV_detect_pending_interrupts, retbuf, index);
+}
+
+static inline s64 beat_pause(u64 style)
+{
+	return beat_hcall_norets(HV_pause, style);
+}
+
+static inline s64 beat_read_htab_entries(u64 htab_id, u64 index, u64 *retbuf)
+{
+	return beat_hcall5(HV_read_htab_entries, retbuf, htab_id, index);
+}
+
+static inline s64 beat_insert_htab_entry(u64 htab_id, u64 group,
+	u64 bitmask, u64 hpte_v, u64 hpte_r, u64 *slot)
+{
+	u64 dummy[3];
+	s64 ret;
+
+	ret = beat_hcall3(HV_insert_htab_entry, dummy, htab_id, group,
+		bitmask, hpte_v, hpte_r);
+	*slot = dummy[0];
+	return ret;
+}
+
+static inline s64 beat_write_htab_entry(u64 htab_id, u64 slot,
+	u64 hpte_v, u64 hpte_r, u64 mask_v, u64 mask_r,
+	u64 *ret_v, u64 *ret_r)
+{
+	u64 dummy[2];
+	s64 ret;
+
+	ret = beat_hcall2(HV_write_htab_entry, dummy, htab_id, slot,
+		hpte_v, hpte_r, mask_v, mask_r);
+	*ret_v = dummy[0];
+	*ret_r = dummy[1];
+	return ret;
+}
+
+static inline s64 beat_insert_htab_entry3(u64 htab_id, u64 group,
+	u64 hpte_v, u64 hpte_r, u64 mask_v, u64 value_v, u64 *slot)
+{
+	u64 dummy[1];
+	s64 ret;
+
+	ret = beat_hcall1(HV_insert_htab_entry3, dummy, htab_id, group,
+		hpte_v, hpte_r, mask_v, value_v);
+	*slot = dummy[0];
+	return ret;
+}
+
+static inline s64 beat_invalidate_htab_entry3(u64 htab_id, u64 group,
+	u64 va, u64 pss)
+{
+	return beat_hcall_norets(HV_invalidate_htab_entry3,
+		htab_id, group, va, pss);
+}
+
+static inline s64 beat_update_htab_permission3(u64 htab_id, u64 group,
+	u64 va, u64 pss, u64 ptel_mask, u64 ptel_value)
+{
+	return beat_hcall_norets(HV_update_htab_permission3,
+		htab_id, group, va, pss, ptel_mask, ptel_value);
+}
+
+static inline s64 beat_clear_htab3(u64 htab_id)
+{
+	return beat_hcall_norets(HV_clear_htab3, htab_id);
+}
+
+static inline void beat_shutdown_logical_partition(u64 code)
+{
+	(void)beat_hcall_norets(HV_shutdown_logical_partition, code);
+}
+
+static inline s64 beat_rtc_write(u64 time_from_epoch)
+{
+	return beat_hcall_norets(HV_rtc_write, time_from_epoch);
+}
+
+static inline s64 beat_rtc_read(u64 *time_from_epoch)
+{
+	u64 dummy[1];
+	s64 ret;
+
+	ret = beat_hcall1(HV_rtc_read, dummy);
+	*time_from_epoch = dummy[0];
+	return ret;
+}
+
+#define	BEAT_NVRW_CNT	(sizeof(u64) * 6)
+
+static inline s64 beat_eeprom_write(u64 index, u64 length, u8 *buffer)
+{
+	u64	b[6];
+
+	if (length > BEAT_NVRW_CNT)
+		return -1;
+	memcpy(b, buffer, sizeof(b));
+	return beat_hcall_norets8(HV_eeprom_write, index, length,
+		b[0], b[1], b[2], b[3], b[4], b[5]);
+}
+
+static inline s64 beat_eeprom_read(u64 index, u64 length, u8 *buffer)
+{
+	u64	b[6];
+	s64	ret;
+
+	if (length > BEAT_NVRW_CNT)
+		return -1;
+	ret = beat_hcall6(HV_eeprom_read, b, index, length);
+	memcpy(buffer, b, length);
+	return ret;
+}
+
+static inline s64 beat_set_dabr(u64 value, u64 style)
+{
+	return beat_hcall_norets(HV_set_dabr, value, style);
+}
+
+static inline s64 beat_get_characters_from_console(u64 termno, u64 *len,
+	u8 *buffer)
+{
+	u64 dummy[3];
+	s64 ret;
+
+	ret = beat_hcall3(HV_get_characters_from_console, dummy, termno, len);
+	*len = dummy[0];
+	memcpy(buffer, dummy + 1, *len);
+	return ret;
+}
+
+static inline s64 beat_put_characters_to_console(u64 termno, u64 len,
+	u8 *buffer)
+{
+	u64 b[2];
+
+	memcpy(b, buffer, len);
+	return beat_hcall_norets(HV_put_characters_to_console, termno, len,
+		b[0], b[1]);
+}
+
+static inline s64 beat_get_spe_privileged_state_1_registers(
+		u64 id, u64 offsetof, u64 *value)
+{
+	u64 dummy[1];
+	s64 ret;
+
+	ret = beat_hcall1(HV_get_spe_privileged_state_1_registers, dummy, id,
+		offsetof);
+	*value = dummy[0];
+	return ret;
+}
+
+static inline s64 beat_set_irq_mask_for_spe(u64 id, u64 class, u64 mask)
+{
+	return beat_hcall_norets(HV_set_irq_mask_for_spe, id, class, mask);
+}
+
+static inline s64 beat_clear_interrupt_status_of_spe(u64 id, u64 class,
+	u64 mask)
+{
+	return beat_hcall_norets(HV_clear_interrupt_status_of_spe,
+		id, class, mask);
+}
+
+static inline s64 beat_set_spe_privileged_state_1_registers(
+		u64 id, u64 offsetof, u64 value)
+{
+	return beat_hcall_norets(HV_set_spe_privileged_state_1_registers,
+		id, offsetof, value);
+}
+
+static inline s64 beat_get_interrupt_status_of_spe(u64 id, u64 class, u64 *val)
+{
+	u64 dummy[1];
+	s64 ret;
+
+	ret = beat_hcall1(HV_get_interrupt_status_of_spe, dummy, id, class);
+	*val = dummy[0];
+	return ret;
+}
+
+static inline s64 beat_put_iopte(u64 ioas_id, u64 io_addr, u64 real_addr,
+	u64 ioid, u64 flags)
+{
+	return beat_hcall_norets(HV_put_iopte, ioas_id, io_addr, real_addr,
+		ioid, flags);
+}
+
+static inline s64 beat_construct_event_receive_port(u64 *port)
+{
+	u64 dummy[1];
+	s64 ret;
+
+	ret = beat_hcall1(HV_construct_event_receive_port, dummy);
+	*port = dummy[0];
+	return ret;
+}
+
+static inline s64 beat_destruct_event_receive_port(u64 port)
+{
+	s64 ret;
+
+	ret = beat_hcall_norets(HV_destruct_event_receive_port, port);
+	return ret;
+}
+
+static inline s64 beat_create_repository_node(u64 path[4], u64 data[2])
+{
+	s64 ret;
+
+	ret = beat_hcall_norets(HV_create_repository_node2,
+		path[0], path[1], path[2], path[3], data[0], data[1]);
+	return ret;
+}
+
+static inline s64 beat_get_repository_node_value(u64 lpid, u64 path[4],
+	u64 data[2])
+{
+	s64 ret;
+
+	ret = beat_hcall2(HV_get_repository_node_value2, data,
+		lpid, path[0], path[1], path[2], path[3]);
+	return ret;
+}
+
+#endif
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c
new file mode 100644
index 0000000..ec7c8f4
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c
@@ -0,0 +1,208 @@
+/*
+ * cpufreq driver for the cell processor
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/cell-regs.h>
+#include "cbe_cpufreq.h"
+
+static DEFINE_MUTEX(cbe_switch_mutex);
+
+
+/* the CBE supports an 8 step frequency scaling */
+static struct cpufreq_frequency_table cbe_freqs[] = {
+	{1,	0},
+	{2,	0},
+	{3,	0},
+	{4,	0},
+	{5,	0},
+	{6,	0},
+	{8,	0},
+	{10,	0},
+	{0,	CPUFREQ_TABLE_END},
+};
+
+/*
+ * hardware specific functions
+ */
+
+static int set_pmode(unsigned int cpu, unsigned int slow_mode)
+{
+	int rc;
+
+	if (cbe_cpufreq_has_pmi)
+		rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode);
+	else
+		rc = cbe_cpufreq_set_pmode(cpu, slow_mode);
+
+	pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu));
+
+	return rc;
+}
+
+/*
+ * cpufreq functions
+ */
+
+static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+	const u32 *max_freqp;
+	u32 max_freq;
+	int i, cur_pmode;
+	struct device_node *cpu;
+
+	cpu = of_get_cpu_node(policy->cpu, NULL);
+
+	if (!cpu)
+		return -ENODEV;
+
+	pr_debug("init cpufreq on CPU %d\n", policy->cpu);
+
+	/*
+	 * Let's check we can actually get to the CELL regs
+	 */
+	if (!cbe_get_cpu_pmd_regs(policy->cpu) ||
+	    !cbe_get_cpu_mic_tm_regs(policy->cpu)) {
+		pr_info("invalid CBE regs pointers for cpufreq\n");
+		return -EINVAL;
+	}
+
+	max_freqp = of_get_property(cpu, "clock-frequency", NULL);
+
+	of_node_put(cpu);
+
+	if (!max_freqp)
+		return -EINVAL;
+
+	/* we need the freq in kHz */
+	max_freq = *max_freqp / 1000;
+
+	pr_debug("max clock-frequency is at %u kHz\n", max_freq);
+	pr_debug("initializing frequency table\n");
+
+	/* initialize frequency table */
+	for (i=0; cbe_freqs[i].frequency!=CPUFREQ_TABLE_END; i++) {
+		cbe_freqs[i].frequency = max_freq / cbe_freqs[i].index;
+		pr_debug("%d: %d\n", i, cbe_freqs[i].frequency);
+	}
+
+	/* if DEBUG is enabled set_pmode() measures the latency
+	 * of a transition */
+	policy->cpuinfo.transition_latency = 25000;
+
+	cur_pmode = cbe_cpufreq_get_pmode(policy->cpu);
+	pr_debug("current pmode is at %d\n",cur_pmode);
+
+	policy->cur = cbe_freqs[cur_pmode].frequency;
+
+#ifdef CONFIG_SMP
+	policy->cpus = per_cpu(cpu_sibling_map, policy->cpu);
+#endif
+
+	cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu);
+
+	/* this ensures that policy->cpuinfo_min
+	 * and policy->cpuinfo_max are set correctly */
+	return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs);
+}
+
+static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static int cbe_cpufreq_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, cbe_freqs);
+}
+
+static int cbe_cpufreq_target(struct cpufreq_policy *policy,
+			      unsigned int target_freq,
+			      unsigned int relation)
+{
+	int rc;
+	struct cpufreq_freqs freqs;
+	unsigned int cbe_pmode_new;
+
+	cpufreq_frequency_table_target(policy,
+				       cbe_freqs,
+				       target_freq,
+				       relation,
+				       &cbe_pmode_new);
+
+	freqs.old = policy->cur;
+	freqs.new = cbe_freqs[cbe_pmode_new].frequency;
+	freqs.cpu = policy->cpu;
+
+	mutex_lock(&cbe_switch_mutex);
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	pr_debug("setting frequency for cpu %d to %d kHz, " \
+		 "1/%d of max frequency\n",
+		 policy->cpu,
+		 cbe_freqs[cbe_pmode_new].frequency,
+		 cbe_freqs[cbe_pmode_new].index);
+
+	rc = set_pmode(policy->cpu, cbe_pmode_new);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	mutex_unlock(&cbe_switch_mutex);
+
+	return rc;
+}
+
+static struct cpufreq_driver cbe_cpufreq_driver = {
+	.verify		= cbe_cpufreq_verify,
+	.target		= cbe_cpufreq_target,
+	.init		= cbe_cpufreq_cpu_init,
+	.exit		= cbe_cpufreq_cpu_exit,
+	.name		= "cbe-cpufreq",
+	.owner		= THIS_MODULE,
+	.flags		= CPUFREQ_CONST_LOOPS,
+};
+
+/*
+ * module init and destoy
+ */
+
+static int __init cbe_cpufreq_init(void)
+{
+	if (!machine_is(cell))
+		return -ENODEV;
+
+	return cpufreq_register_driver(&cbe_cpufreq_driver);
+}
+
+static void __exit cbe_cpufreq_exit(void)
+{
+	cpufreq_unregister_driver(&cbe_cpufreq_driver);
+}
+
+module_init(cbe_cpufreq_init);
+module_exit(cbe_cpufreq_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.h b/arch/powerpc/platforms/cell/cbe_cpufreq.h
new file mode 100644
index 0000000..c1d86bf
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.h
@@ -0,0 +1,24 @@
+/*
+ * cbe_cpufreq.h
+ *
+ * This file contains the definitions used by the cbe_cpufreq driver.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/types.h>
+
+int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode);
+int cbe_cpufreq_get_pmode(int cpu);
+
+int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
+
+#if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE)
+extern bool cbe_cpufreq_has_pmi;
+#else
+#define cbe_cpufreq_has_pmi (0)
+#endif
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
new file mode 100644
index 0000000..70fa7ae
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
@@ -0,0 +1,115 @@
+/*
+ * pervasive backend for the cbe_cpufreq driver
+ *
+ * This driver makes use of the pervasive unit to
+ * engage the desired frequency.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <asm/machdep.h>
+#include <asm/hw_irq.h>
+#include <asm/cell-regs.h>
+
+#include "cbe_cpufreq.h"
+
+/* to write to MIC register */
+static u64 MIC_Slow_Fast_Timer_table[] = {
+	[0 ... 7] = 0x007fc00000000000ull,
+};
+
+/* more values for the MIC */
+static u64 MIC_Slow_Next_Timer_table[] = {
+	0x0000240000000000ull,
+	0x0000268000000000ull,
+	0x000029C000000000ull,
+	0x00002D0000000000ull,
+	0x0000300000000000ull,
+	0x0000334000000000ull,
+	0x000039C000000000ull,
+	0x00003FC000000000ull,
+};
+
+
+int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode)
+{
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	struct cbe_mic_tm_regs __iomem *mic_tm_regs;
+	u64 flags;
+	u64 value;
+#ifdef DEBUG
+	long time;
+#endif
+
+	local_irq_save(flags);
+
+	mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
+	pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+
+#ifdef DEBUG
+	time = jiffies;
+#endif
+
+	out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
+	out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
+
+	out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
+	out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
+
+	value = in_be64(&pmd_regs->pmcr);
+	/* set bits to zero */
+	value &= 0xFFFFFFFFFFFFFFF8ull;
+	/* set bits to next pmode */
+	value |= pmode;
+
+	out_be64(&pmd_regs->pmcr, value);
+
+#ifdef DEBUG
+	/* wait until new pmode appears in status register */
+	value = in_be64(&pmd_regs->pmsr) & 0x07;
+	while (value != pmode) {
+		cpu_relax();
+		value = in_be64(&pmd_regs->pmsr) & 0x07;
+	}
+
+	time = jiffies  - time;
+	time = jiffies_to_msecs(time);
+	pr_debug("had to wait %lu ms for a transition using " \
+		 "pervasive unit\n", time);
+#endif
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+
+int cbe_cpufreq_get_pmode(int cpu)
+{
+	int ret;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+
+	pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+	ret = in_be64(&pmd_regs->pmsr) & 0x07;
+
+	return ret;
+}
+
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
new file mode 100644
index 0000000..3233fe8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
@@ -0,0 +1,155 @@
+/*
+ * pmi backend for the cbe_cpufreq driver
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/of_platform.h>
+
+#include <asm/processor.h>
+#include <asm/prom.h>
+#include <asm/pmi.h>
+#include <asm/cell-regs.h>
+
+#ifdef DEBUG
+#include <asm/time.h>
+#endif
+
+#include "cbe_cpufreq.h"
+
+static u8 pmi_slow_mode_limit[MAX_CBE];
+
+bool cbe_cpufreq_has_pmi = false;
+EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi);
+
+/*
+ * hardware specific functions
+ */
+
+int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode)
+{
+	int ret;
+	pmi_message_t pmi_msg;
+#ifdef DEBUG
+	long time;
+#endif
+	pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
+	pmi_msg.data1 =	cbe_cpu_to_node(cpu);
+	pmi_msg.data2 = pmode;
+
+#ifdef DEBUG
+	time = jiffies;
+#endif
+	pmi_send_message(pmi_msg);
+
+#ifdef DEBUG
+	time = jiffies  - time;
+	time = jiffies_to_msecs(time);
+	pr_debug("had to wait %lu ms for a transition using " \
+		 "PMI\n", time);
+#endif
+	ret = pmi_msg.data2;
+	pr_debug("PMI returned slow mode %d\n", ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi);
+
+
+static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg)
+{
+	u8 node, slow_mode;
+
+	BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
+
+	node = pmi_msg.data1;
+	slow_mode = pmi_msg.data2;
+
+	pmi_slow_mode_limit[node] = slow_mode;
+
+	pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode);
+}
+
+static int pmi_notifier(struct notifier_block *nb,
+				       unsigned long event, void *data)
+{
+	struct cpufreq_policy *policy = data;
+	struct cpufreq_frequency_table *cbe_freqs;
+	u8 node;
+
+	/* Should this really be called for CPUFREQ_ADJUST, CPUFREQ_INCOMPATIBLE
+	 * and CPUFREQ_NOTIFY policy events?)
+	 */
+	if (event == CPUFREQ_START)
+		return 0;
+
+	cbe_freqs = cpufreq_frequency_get_table(policy->cpu);
+	node = cbe_cpu_to_node(policy->cpu);
+
+	pr_debug("got notified, event=%lu, node=%u\n", event, node);
+
+	if (pmi_slow_mode_limit[node] != 0) {
+		pr_debug("limiting node %d to slow mode %d\n",
+			 node, pmi_slow_mode_limit[node]);
+
+		cpufreq_verify_within_limits(policy, 0,
+
+			cbe_freqs[pmi_slow_mode_limit[node]].frequency);
+	}
+
+	return 0;
+}
+
+static struct notifier_block pmi_notifier_block = {
+	.notifier_call = pmi_notifier,
+};
+
+static struct pmi_handler cbe_pmi_handler = {
+	.type			= PMI_TYPE_FREQ_CHANGE,
+	.handle_pmi_message	= cbe_cpufreq_handle_pmi,
+};
+
+
+
+static int __init cbe_cpufreq_pmi_init(void)
+{
+	cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0;
+
+	if (!cbe_cpufreq_has_pmi)
+		return -ENODEV;
+
+	cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
+
+	return 0;
+}
+
+static void __exit cbe_cpufreq_pmi_exit(void)
+{
+	cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
+	pmi_unregister_handler(&cbe_pmi_handler);
+}
+
+module_init(cbe_cpufreq_pmi_init);
+module_exit(cbe_cpufreq_pmi_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_powerbutton.c b/arch/powerpc/platforms/cell/cbe_powerbutton.c
new file mode 100644
index 0000000..dcddaa5
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_powerbutton.c
@@ -0,0 +1,117 @@
+/*
+ * driver for powerbutton on IBM cell blades
+ *
+ * (C) Copyright IBM Corp. 2005-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/input.h>
+#include <linux/platform_device.h>
+#include <asm/pmi.h>
+#include <asm/prom.h>
+
+static struct input_dev *button_dev;
+static struct platform_device *button_pdev;
+
+static void cbe_powerbutton_handle_pmi(pmi_message_t pmi_msg)
+{
+	BUG_ON(pmi_msg.type != PMI_TYPE_POWER_BUTTON);
+
+	input_report_key(button_dev, KEY_POWER, 1);
+	input_sync(button_dev);
+	input_report_key(button_dev, KEY_POWER, 0);
+	input_sync(button_dev);
+}
+
+static struct pmi_handler cbe_pmi_handler = {
+	.type			= PMI_TYPE_POWER_BUTTON,
+	.handle_pmi_message	= cbe_powerbutton_handle_pmi,
+};
+
+static int __init cbe_powerbutton_init(void)
+{
+	int ret = 0;
+	struct input_dev *dev;
+
+	if (!machine_is_compatible("IBM,CBPLUS-1.0")) {
+		printk(KERN_ERR "%s: Not a cell blade.\n", __func__);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	dev = input_allocate_device();
+	if (!dev) {
+		ret = -ENOMEM;
+		printk(KERN_ERR "%s: Not enough memory.\n", __func__);
+		goto out;
+	}
+
+	set_bit(EV_KEY, dev->evbit);
+	set_bit(KEY_POWER, dev->keybit);
+
+	dev->name = "Power Button";
+	dev->id.bustype = BUS_HOST;
+
+	/* this makes the button look like an acpi power button
+	 * no clue whether anyone relies on that though */
+	dev->id.product = 0x02;
+	dev->phys = "LNXPWRBN/button/input0";
+
+	button_pdev = platform_device_register_simple("power_button", 0, NULL, 0);
+	if (IS_ERR(button_pdev)) {
+		ret = PTR_ERR(button_pdev);
+		goto out_free_input;
+	}
+
+	dev->dev.parent = &button_pdev->dev;
+	ret = input_register_device(dev);
+	if (ret) {
+		printk(KERN_ERR "%s: Failed to register device\n", __func__);
+		goto out_free_pdev;
+	}
+
+	button_dev = dev;
+
+	ret = pmi_register_handler(&cbe_pmi_handler);
+	if (ret) {
+		printk(KERN_ERR "%s: Failed to register with pmi.\n", __func__);
+		goto out_free_pdev;
+	}
+
+	goto out;
+
+out_free_pdev:
+	platform_device_unregister(button_pdev);
+out_free_input:
+	input_free_device(dev);
+out:
+	return ret;
+}
+
+static void __exit cbe_powerbutton_exit(void)
+{
+	pmi_unregister_handler(&cbe_pmi_handler);
+	platform_device_unregister(button_pdev);
+	input_free_device(button_dev);
+}
+
+module_init(cbe_powerbutton_init);
+module_exit(cbe_powerbutton_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
new file mode 100644
index 0000000..dbc338f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -0,0 +1,280 @@
+/*
+ * cbe_regs.c
+ *
+ * Accessor routines for the various MMIO register blocks of the CBE
+ *
+ * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ */
+
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/cell-regs.h>
+
+/*
+ * Current implementation uses "cpu" nodes. We build our own mapping
+ * array of cpu numbers to cpu nodes locally for now to allow interrupt
+ * time code to have a fast path rather than call of_get_cpu_node(). If
+ * we implement cpu hotplug, we'll have to install an appropriate norifier
+ * in order to release references to the cpu going away
+ */
+static struct cbe_regs_map
+{
+	struct device_node *cpu_node;
+	struct device_node *be_node;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	struct cbe_iic_regs __iomem *iic_regs;
+	struct cbe_mic_tm_regs __iomem *mic_tm_regs;
+	struct cbe_pmd_shadow_regs pmd_shadow_regs;
+} cbe_regs_maps[MAX_CBE];
+static int cbe_regs_map_count;
+
+static struct cbe_thread_map
+{
+	struct device_node *cpu_node;
+	struct device_node *be_node;
+	struct cbe_regs_map *regs;
+	unsigned int thread_id;
+	unsigned int cbe_id;
+} cbe_thread_map[NR_CPUS];
+
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = CPU_MASK_NONE };
+static cpumask_t cbe_first_online_cpu = CPU_MASK_NONE;
+
+static struct cbe_regs_map *cbe_find_map(struct device_node *np)
+{
+	int i;
+	struct device_node *tmp_np;
+
+	if (strcasecmp(np->type, "spe")) {
+		for (i = 0; i < cbe_regs_map_count; i++)
+			if (cbe_regs_maps[i].cpu_node == np ||
+			    cbe_regs_maps[i].be_node == np)
+				return &cbe_regs_maps[i];
+		return NULL;
+	}
+
+	if (np->data)
+		return np->data;
+
+	/* walk up path until cpu or be node was found */
+	tmp_np = np;
+	do {
+		tmp_np = tmp_np->parent;
+		/* on a correct devicetree we wont get up to root */
+		BUG_ON(!tmp_np);
+	} while (strcasecmp(tmp_np->type, "cpu") &&
+		 strcasecmp(tmp_np->type, "be"));
+
+	np->data = cbe_find_map(tmp_np);
+
+	return np->data;
+}
+
+struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_pmd_regs);
+
+struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs);
+
+struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return &map->pmd_shadow_regs;
+}
+
+struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return &map->pmd_shadow_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->iic_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->iic_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->mic_tm_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->mic_tm_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
+
+u32 cbe_get_hw_thread_id(int cpu)
+{
+	return cbe_thread_map[cpu].thread_id;
+}
+EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id);
+
+u32 cbe_cpu_to_node(int cpu)
+{
+	return cbe_thread_map[cpu].cbe_id;
+}
+EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
+
+u32 cbe_node_to_cpu(int node)
+{
+	return find_first_bit( (unsigned long *) &cbe_local_mask[node], sizeof(cpumask_t));
+}
+EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
+
+static struct device_node *cbe_get_be_node(int cpu_id)
+{
+	struct device_node *np;
+
+	for_each_node_by_type (np, "be") {
+		int len,i;
+		const phandle *cpu_handle;
+
+		cpu_handle = of_get_property(np, "cpus", &len);
+
+		/*
+		 * the CAB SLOF tree is non compliant, so we just assume
+		 * there is only one node
+		 */
+		if (WARN_ON_ONCE(!cpu_handle))
+			return np;
+
+		for (i=0; i<len; i++)
+			if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
+				return np;
+	}
+
+	return NULL;
+}
+
+void __init cbe_fill_regs_map(struct cbe_regs_map *map)
+{
+	if(map->be_node) {
+		struct device_node *be, *np;
+
+		be = map->be_node;
+
+		for_each_node_by_type(np, "pervasive")
+			if (of_get_parent(np) == be)
+				map->pmd_regs = of_iomap(np, 0);
+
+		for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller")
+			if (of_get_parent(np) == be)
+				map->iic_regs = of_iomap(np, 2);
+
+		for_each_node_by_type(np, "mic-tm")
+			if (of_get_parent(np) == be)
+				map->mic_tm_regs = of_iomap(np, 0);
+	} else {
+		struct device_node *cpu;
+		/* That hack must die die die ! */
+		const struct address_prop {
+			unsigned long address;
+			unsigned int len;
+		} __attribute__((packed)) *prop;
+
+		cpu = map->cpu_node;
+
+		prop = of_get_property(cpu, "pervasive", NULL);
+		if (prop != NULL)
+			map->pmd_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "iic", NULL);
+		if (prop != NULL)
+			map->iic_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "mic-tm", NULL);
+		if (prop != NULL)
+			map->mic_tm_regs = ioremap(prop->address, prop->len);
+	}
+}
+
+
+void __init cbe_regs_init(void)
+{
+	int i;
+	unsigned int thread_id;
+	struct device_node *cpu;
+
+	/* Build local fast map of CPUs */
+	for_each_possible_cpu(i) {
+		cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id);
+		cbe_thread_map[i].be_node = cbe_get_be_node(i);
+		cbe_thread_map[i].thread_id = thread_id;
+	}
+
+	/* Find maps for each device tree CPU */
+	for_each_node_by_type(cpu, "cpu") {
+		struct cbe_regs_map *map;
+		unsigned int cbe_id;
+
+		cbe_id = cbe_regs_map_count++;
+		map = &cbe_regs_maps[cbe_id];
+
+		if (cbe_regs_map_count > MAX_CBE) {
+			printk(KERN_ERR "cbe_regs: More BE chips than supported"
+			       "!\n");
+			cbe_regs_map_count--;
+			of_node_put(cpu);
+			return;
+		}
+		map->cpu_node = cpu;
+
+		for_each_possible_cpu(i) {
+			struct cbe_thread_map *thread = &cbe_thread_map[i];
+
+			if (thread->cpu_node == cpu) {
+				thread->regs = map;
+				thread->cbe_id = cbe_id;
+				map->be_node = thread->be_node;
+				cpu_set(i, cbe_local_mask[cbe_id]);
+				if(thread->thread_id == 0)
+					cpu_set(i, cbe_first_online_cpu);
+			}
+		}
+
+		cbe_fill_regs_map(map);
+	}
+}
+
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
new file mode 100644
index 0000000..4d4c8c1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -0,0 +1,399 @@
+/*
+ * thermal support for the cell processor
+ *
+ * This module adds some sysfs attributes to cpu and spu nodes.
+ * Base for measurements are the digital thermal sensors (DTS)
+ * located on the chip.
+ * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius
+ * The attributes can be found under
+ * /sys/devices/system/cpu/cpuX/thermal
+ * /sys/devices/system/spu/spuX/thermal
+ *
+ * The following attributes are added for each node:
+ * temperature:
+ *	contains the current temperature measured by the DTS
+ * throttle_begin:
+ *	throttling begins when temperature is greater or equal to
+ *	throttle_begin. Setting this value to 125 prevents throttling.
+ * throttle_end:
+ *	throttling is being ceased, if the temperature is lower than
+ *	throttle_end. Due to a delay between applying throttling and
+ *	a reduced temperature this value should be less than throttle_begin.
+ *	A value equal to throttle_begin provides only a very little hysteresis.
+ * throttle_full_stop:
+ *	If the temperatrue is greater or equal to throttle_full_stop,
+ *	full throttling is applied to the cpu or spu. This value should be
+ *	greater than throttle_begin and throttle_end. Setting this value to
+ *	65 prevents the unit from running code at all.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/sysdev.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <asm/spu.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/cell-regs.h>
+
+#include "spu_priv1_mmio.h"
+
+#define TEMP_MIN 65
+#define TEMP_MAX 125
+
+#define SYSDEV_PREFIX_ATTR(_prefix,_name,_mode)			\
+struct sysdev_attribute attr_ ## _prefix ## _ ## _name = {	\
+	.attr = { .name = __stringify(_name), .mode = _mode },	\
+	.show	= _prefix ## _show_ ## _name,			\
+	.store	= _prefix ## _store_ ## _name,			\
+};
+
+static inline u8 reg_to_temp(u8 reg_value)
+{
+	return ((reg_value & 0x3f) << 1) + TEMP_MIN;
+}
+
+static inline u8 temp_to_reg(u8 temp)
+{
+	return ((temp - TEMP_MIN) >> 1) & 0x3f;
+}
+
+static struct cbe_pmd_regs __iomem *get_pmd_regs(struct sys_device *sysdev)
+{
+	struct spu *spu;
+
+	spu = container_of(sysdev, struct spu, sysdev);
+
+	return cbe_get_pmd_regs(spu_devnode(spu));
+}
+
+/* returns the value for a given spu in a given register */
+static u8 spu_read_register_value(struct sys_device *sysdev, union spe_reg __iomem *reg)
+{
+	union spe_reg value;
+	struct spu *spu;
+
+	spu = container_of(sysdev, struct spu, sysdev);
+	value.val = in_be64(&reg->val);
+
+	return value.spe[spu->spe_id];
+}
+
+static ssize_t spu_show_temp(struct sys_device *sysdev, struct sysdev_attribute *attr,
+			char *buf)
+{
+	u8 value;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+
+	pmd_regs = get_pmd_regs(sysdev);
+
+	value = spu_read_register_value(sysdev, &pmd_regs->ts_ctsr1);
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos)
+{
+	u64 value;
+
+	value = in_be64(&pmd_regs->tm_tpr.val);
+	/* access the corresponding byte */
+	value >>= pos;
+	value &= 0x3F;
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
+{
+	u64 reg_value;
+	int temp;
+	u64 new_value;
+	int ret;
+
+	ret = sscanf(buf, "%u", &temp);
+
+	if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX)
+		return -EINVAL;
+
+	new_value = temp_to_reg(temp);
+
+	reg_value = in_be64(&pmd_regs->tm_tpr.val);
+
+	/* zero out bits for new value */
+	reg_value &= ~(0xffull << pos);
+	/* set bits to new value */
+	reg_value |= new_value << pos;
+
+	out_be64(&pmd_regs->tm_tpr.val, reg_value);
+	return size;
+}
+
+static ssize_t spu_show_throttle_end(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(sysdev), buf, 0);
+}
+
+static ssize_t spu_show_throttle_begin(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(sysdev), buf, 8);
+}
+
+static ssize_t spu_show_throttle_full_stop(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(sysdev), buf, 16);
+}
+
+static ssize_t spu_store_throttle_end(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(sysdev), buf, size, 0);
+}
+
+static ssize_t spu_store_throttle_begin(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(sysdev), buf, size, 8);
+}
+
+static ssize_t spu_store_throttle_full_stop(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(sysdev), buf, size, 16);
+}
+
+static ssize_t ppe_show_temp(struct sys_device *sysdev, char *buf, int pos)
+{
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	u64 value;
+
+	pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id);
+	value = in_be64(&pmd_regs->ts_ctsr2);
+
+	value = (value >> pos) & 0x3f;
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+
+/* shows the temperature of the DTS on the PPE,
+ * located near the linear thermal sensor */
+static ssize_t ppe_show_temp0(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	return ppe_show_temp(sysdev, buf, 32);
+}
+
+/* shows the temperature of the second DTS on the PPE */
+static ssize_t ppe_show_temp1(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	return ppe_show_temp(sysdev, buf, 0);
+}
+
+static ssize_t ppe_show_throttle_end(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 32);
+}
+
+static ssize_t ppe_show_throttle_begin(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 40);
+}
+
+static ssize_t ppe_show_throttle_full_stop(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 48);
+}
+
+static ssize_t ppe_store_throttle_end(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 32);
+}
+
+static ssize_t ppe_store_throttle_begin(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 40);
+}
+
+static ssize_t ppe_store_throttle_full_stop(struct sys_device *sysdev,
+			struct sysdev_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 48);
+}
+
+
+static struct sysdev_attribute attr_spu_temperature = {
+	.attr = {.name = "temperature", .mode = 0400 },
+	.show = spu_show_temp,
+};
+
+static SYSDEV_PREFIX_ATTR(spu, throttle_end, 0600);
+static SYSDEV_PREFIX_ATTR(spu, throttle_begin, 0600);
+static SYSDEV_PREFIX_ATTR(spu, throttle_full_stop, 0600);
+
+
+static struct attribute *spu_attributes[] = {
+	&attr_spu_temperature.attr,
+	&attr_spu_throttle_end.attr,
+	&attr_spu_throttle_begin.attr,
+	&attr_spu_throttle_full_stop.attr,
+	NULL,
+};
+
+static struct attribute_group spu_attribute_group = {
+	.name	= "thermal",
+	.attrs	= spu_attributes,
+};
+
+static struct sysdev_attribute attr_ppe_temperature0 = {
+	.attr = {.name = "temperature0", .mode = 0400 },
+	.show = ppe_show_temp0,
+};
+
+static struct sysdev_attribute attr_ppe_temperature1 = {
+	.attr = {.name = "temperature1", .mode = 0400 },
+	.show = ppe_show_temp1,
+};
+
+static SYSDEV_PREFIX_ATTR(ppe, throttle_end, 0600);
+static SYSDEV_PREFIX_ATTR(ppe, throttle_begin, 0600);
+static SYSDEV_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
+
+static struct attribute *ppe_attributes[] = {
+	&attr_ppe_temperature0.attr,
+	&attr_ppe_temperature1.attr,
+	&attr_ppe_throttle_end.attr,
+	&attr_ppe_throttle_begin.attr,
+	&attr_ppe_throttle_full_stop.attr,
+	NULL,
+};
+
+static struct attribute_group ppe_attribute_group = {
+	.name	= "thermal",
+	.attrs	= ppe_attributes,
+};
+
+/*
+ * initialize throttling with default values
+ */
+static int __init init_default_values(void)
+{
+	int cpu;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	struct sys_device *sysdev;
+	union ppe_spe_reg tpr;
+	union spe_reg str1;
+	u64 str2;
+	union spe_reg cr1;
+	u64 cr2;
+
+	/* TPR defaults */
+	/* ppe
+	 *	1F - no full stop
+	 *	08 - dynamic throttling starts if over 80 degrees
+	 *	03 - dynamic throttling ceases if below 70 degrees */
+	tpr.ppe = 0x1F0803;
+	/* spe
+	 *	10 - full stopped when over 96 degrees
+	 *	08 - dynamic throttling starts if over 80 degrees
+	 *	03 - dynamic throttling ceases if below 70 degrees
+	 */
+	tpr.spe = 0x100803;
+
+	/* STR defaults */
+	/* str1
+	 *	10 - stop 16 of 32 cycles
+	 */
+	str1.val = 0x1010101010101010ull;
+	/* str2
+	 *	10 - stop 16 of 32 cycles
+	 */
+	str2 = 0x10;
+
+	/* CR defaults */
+	/* cr1
+	 *	4 - normal operation
+	 */
+	cr1.val = 0x0404040404040404ull;
+	/* cr2
+	 *	4 - normal operation
+	 */
+	cr2 = 0x04;
+
+	for_each_possible_cpu (cpu) {
+		pr_debug("processing cpu %d\n", cpu);
+		sysdev = get_cpu_sysdev(cpu);
+
+		if (!sysdev) {
+			pr_info("invalid sysdev pointer for cbe_thermal\n");
+			return -EINVAL;
+		}
+
+		pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id);
+
+		if (!pmd_regs) {
+			pr_info("invalid CBE regs pointer for cbe_thermal\n");
+			return -EINVAL;
+		}
+
+		out_be64(&pmd_regs->tm_str2, str2);
+		out_be64(&pmd_regs->tm_str1.val, str1.val);
+		out_be64(&pmd_regs->tm_tpr.val, tpr.val);
+		out_be64(&pmd_regs->tm_cr1.val, cr1.val);
+		out_be64(&pmd_regs->tm_cr2, cr2);
+	}
+
+	return 0;
+}
+
+
+static int __init thermal_init(void)
+{
+	int rc = init_default_values();
+
+	if (rc == 0) {
+		spu_add_sysdev_attr_group(&spu_attribute_group);
+		cpu_add_sysdev_attr_group(&ppe_attribute_group);
+	}
+
+	return rc;
+}
+module_init(thermal_init);
+
+static void __exit thermal_exit(void)
+{
+	spu_remove_sysdev_attr_group(&spu_attribute_group);
+	cpu_remove_sysdev_attr_group(&ppe_attribute_group);
+}
+module_exit(thermal_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
new file mode 100644
index 0000000..f39a3b2
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -0,0 +1,514 @@
+/*
+ * Support for PCI on Celleb platform.
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/kernel/rtas_pci.c:
+ *  Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ *  Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/pci_regs.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+#include "io-workarounds.h"
+#include "celleb_pci.h"
+
+#define MAX_PCI_DEVICES    32
+#define MAX_PCI_FUNCTIONS   8
+#define MAX_PCI_BASE_ADDRS  3 /* use 64 bit address */
+
+/* definition for fake pci configuration area for GbE, .... ,and etc. */
+
+struct celleb_pci_resource {
+	struct resource r[MAX_PCI_BASE_ADDRS];
+};
+
+struct celleb_pci_private {
+	unsigned char *fake_config[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
+	struct celleb_pci_resource *res[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
+};
+
+static inline u8 celleb_fake_config_readb(void *addr)
+{
+	u8 *p = addr;
+	return *p;
+}
+
+static inline u16 celleb_fake_config_readw(void *addr)
+{
+	__le16 *p = addr;
+	return le16_to_cpu(*p);
+}
+
+static inline u32 celleb_fake_config_readl(void *addr)
+{
+	__le32 *p = addr;
+	return le32_to_cpu(*p);
+}
+
+static inline void celleb_fake_config_writeb(u32 val, void *addr)
+{
+	u8 *p = addr;
+	*p = val;
+}
+
+static inline void celleb_fake_config_writew(u32 val, void *addr)
+{
+	__le16 val16;
+	__le16 *p = addr;
+	val16 = cpu_to_le16(val);
+	*p = val16;
+}
+
+static inline void celleb_fake_config_writel(u32 val, void *addr)
+{
+	__le32 val32;
+	__le32 *p = addr;
+	val32 = cpu_to_le32(val);
+	*p = val32;
+}
+
+static unsigned char *get_fake_config_start(struct pci_controller *hose,
+					    int devno, int fn)
+{
+	struct celleb_pci_private *private = hose->private_data;
+
+	if (private == NULL)
+		return NULL;
+
+	return private->fake_config[devno][fn];
+}
+
+static struct celleb_pci_resource *get_resource_start(
+				struct pci_controller *hose,
+				int devno, int fn)
+{
+	struct celleb_pci_private *private = hose->private_data;
+
+	if (private == NULL)
+		return NULL;
+
+	return private->res[devno][fn];
+}
+
+
+static void celleb_config_read_fake(unsigned char *config, int where,
+				    int size, u32 *val)
+{
+	char *p = config + where;
+
+	switch (size) {
+	case 1:
+		*val = celleb_fake_config_readb(p);
+		break;
+	case 2:
+		*val = celleb_fake_config_readw(p);
+		break;
+	case 4:
+		*val = celleb_fake_config_readl(p);
+		break;
+	}
+}
+
+static void celleb_config_write_fake(unsigned char *config, int where,
+				     int size, u32 val)
+{
+	char *p = config + where;
+
+	switch (size) {
+	case 1:
+		celleb_fake_config_writeb(val, p);
+		break;
+	case 2:
+		celleb_fake_config_writew(val, p);
+		break;
+	case 4:
+		celleb_fake_config_writel(val, p);
+		break;
+	}
+}
+
+static int celleb_fake_pci_read_config(struct pci_bus *bus,
+		unsigned int devfn, int where, int size, u32 *val)
+{
+	char *config;
+	struct device_node *node;
+	struct pci_controller *hose;
+	unsigned int devno = devfn >> 3;
+	unsigned int fn = devfn & 0x7;
+
+	/* allignment check */
+	BUG_ON(where % size);
+
+	pr_debug("    fake read: bus=0x%x, ", bus->number);
+	node = (struct device_node *)bus->sysdata;
+	hose = pci_find_hose_for_OF_device(node);
+	config = get_fake_config_start(hose, devno, fn);
+
+	pr_debug("devno=0x%x, where=0x%x, size=0x%x, ", devno, where, size);
+	if (!config) {
+		pr_debug("failed\n");
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	celleb_config_read_fake(config, where, size, val);
+	pr_debug("val=0x%x\n", *val);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+
+static int celleb_fake_pci_write_config(struct pci_bus *bus,
+		unsigned int devfn, int where, int size, u32 val)
+{
+	char *config;
+	struct device_node *node;
+	struct pci_controller *hose;
+	struct celleb_pci_resource *res;
+	unsigned int devno = devfn >> 3;
+	unsigned int fn = devfn & 0x7;
+
+	/* allignment check */
+	BUG_ON(where % size);
+
+	node = (struct device_node *)bus->sysdata;
+	hose = pci_find_hose_for_OF_device(node);
+	config = get_fake_config_start(hose, devno, fn);
+
+	if (!config)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (val == ~0) {
+		int i = (where - PCI_BASE_ADDRESS_0) >> 3;
+
+		switch (where) {
+		case PCI_BASE_ADDRESS_0:
+		case PCI_BASE_ADDRESS_2:
+			if (size != 4)
+				return PCIBIOS_DEVICE_NOT_FOUND;
+			res = get_resource_start(hose, devno, fn);
+			if (!res)
+				return PCIBIOS_DEVICE_NOT_FOUND;
+			celleb_config_write_fake(config, where, size,
+					(res->r[i].end - res->r[i].start));
+			return PCIBIOS_SUCCESSFUL;
+		case PCI_BASE_ADDRESS_1:
+		case PCI_BASE_ADDRESS_3:
+		case PCI_BASE_ADDRESS_4:
+		case PCI_BASE_ADDRESS_5:
+			break;
+		default:
+			break;
+		}
+	}
+
+	celleb_config_write_fake(config, where, size, val);
+	pr_debug("    fake write: where=%x, size=%d, val=%x\n",
+		 where, size, val);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops celleb_fake_pci_ops = {
+	.read = celleb_fake_pci_read_config,
+	.write = celleb_fake_pci_write_config,
+};
+
+static inline void celleb_setup_pci_base_addrs(struct pci_controller *hose,
+					unsigned int devno, unsigned int fn,
+					unsigned int num_base_addr)
+{
+	u32 val;
+	unsigned char *config;
+	struct celleb_pci_resource *res;
+
+	config = get_fake_config_start(hose, devno, fn);
+	res = get_resource_start(hose, devno, fn);
+
+	if (!config || !res)
+		return;
+
+	switch (num_base_addr) {
+	case 3:
+		val = (res->r[2].start & 0xfffffff0)
+		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_4, 4, val);
+		val = res->r[2].start >> 32;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_5, 4, val);
+		/* FALLTHROUGH */
+	case 2:
+		val = (res->r[1].start & 0xfffffff0)
+		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_2, 4, val);
+		val = res->r[1].start >> 32;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_3, 4, val);
+		/* FALLTHROUGH */
+	case 1:
+		val = (res->r[0].start & 0xfffffff0)
+		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_0, 4, val);
+		val = res->r[0].start >> 32;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_1, 4, val);
+		break;
+	}
+
+	val = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+	celleb_config_write_fake(config, PCI_COMMAND, 2, val);
+}
+
+static int __init celleb_setup_fake_pci_device(struct device_node *node,
+					       struct pci_controller *hose)
+{
+	unsigned int rlen;
+	int num_base_addr = 0;
+	u32 val;
+	const u32 *wi0, *wi1, *wi2, *wi3, *wi4;
+	unsigned int devno, fn;
+	struct celleb_pci_private *private = hose->private_data;
+	unsigned char **config = NULL;
+	struct celleb_pci_resource **res = NULL;
+	const char *name;
+	const unsigned long *li;
+	int size, result;
+
+	if (private == NULL) {
+		printk(KERN_ERR "PCI: "
+		       "memory space for pci controller is not assigned\n");
+		goto error;
+	}
+
+	name = of_get_property(node, "model", &rlen);
+	if (!name) {
+		printk(KERN_ERR "PCI: model property not found.\n");
+		goto error;
+	}
+
+	wi4 = of_get_property(node, "reg", &rlen);
+	if (wi4 == NULL)
+		goto error;
+
+	devno = ((wi4[0] >> 8) & 0xff) >> 3;
+	fn = (wi4[0] >> 8) & 0x7;
+
+	pr_debug("PCI: celleb_setup_fake_pci() %s devno=%x fn=%x\n", name,
+		 devno, fn);
+
+	size = 256;
+	config = &private->fake_config[devno][fn];
+	*config = alloc_maybe_bootmem(size, GFP_KERNEL);
+	if (*config == NULL) {
+		printk(KERN_ERR "PCI: "
+		       "not enough memory for fake configuration space\n");
+		goto error;
+	}
+	pr_debug("PCI: fake config area assigned 0x%016lx\n",
+		 (unsigned long)*config);
+
+	size = sizeof(struct celleb_pci_resource);
+	res = &private->res[devno][fn];
+	*res = alloc_maybe_bootmem(size, GFP_KERNEL);
+	if (*res == NULL) {
+		printk(KERN_ERR
+		       "PCI: not enough memory for resource data space\n");
+		goto error;
+	}
+	pr_debug("PCI: res assigned 0x%016lx\n", (unsigned long)*res);
+
+	wi0 = of_get_property(node, "device-id", NULL);
+	wi1 = of_get_property(node, "vendor-id", NULL);
+	wi2 = of_get_property(node, "class-code", NULL);
+	wi3 = of_get_property(node, "revision-id", NULL);
+	if (!wi0 || !wi1 || !wi2 || !wi3) {
+		printk(KERN_ERR "PCI: Missing device tree properties.\n");
+		goto error;
+	}
+
+	celleb_config_write_fake(*config, PCI_DEVICE_ID, 2, wi0[0] & 0xffff);
+	celleb_config_write_fake(*config, PCI_VENDOR_ID, 2, wi1[0] & 0xffff);
+	pr_debug("class-code = 0x%08x\n", wi2[0]);
+
+	celleb_config_write_fake(*config, PCI_CLASS_PROG, 1, wi2[0] & 0xff);
+	celleb_config_write_fake(*config, PCI_CLASS_DEVICE, 2,
+				 (wi2[0] >> 8) & 0xffff);
+	celleb_config_write_fake(*config, PCI_REVISION_ID, 1, wi3[0]);
+
+	while (num_base_addr < MAX_PCI_BASE_ADDRS) {
+		result = of_address_to_resource(node,
+				num_base_addr, &(*res)->r[num_base_addr]);
+		if (result)
+			break;
+		num_base_addr++;
+	}
+
+	celleb_setup_pci_base_addrs(hose, devno, fn, num_base_addr);
+
+	li = of_get_property(node, "interrupts", &rlen);
+	if (!li) {
+		printk(KERN_ERR "PCI: interrupts not found.\n");
+		goto error;
+	}
+	val = li[0];
+	celleb_config_write_fake(*config, PCI_INTERRUPT_PIN, 1, 1);
+	celleb_config_write_fake(*config, PCI_INTERRUPT_LINE, 1, val);
+
+#ifdef DEBUG
+	pr_debug("PCI: %s irq=%ld\n", name, li[0]);
+	for (i = 0; i < 6; i++) {
+		celleb_config_read_fake(*config,
+					PCI_BASE_ADDRESS_0 + 0x4 * i, 4,
+					&val);
+		pr_debug("PCI: %s fn=%d base_address_%d=0x%x\n",
+			 name, fn, i, val);
+	}
+#endif
+
+	celleb_config_write_fake(*config, PCI_HEADER_TYPE, 1,
+				 PCI_HEADER_TYPE_NORMAL);
+
+	return 0;
+
+error:
+	if (mem_init_done) {
+		if (config && *config)
+			kfree(*config);
+		if (res && *res)
+			kfree(*res);
+
+	} else {
+		if (config && *config) {
+			size = 256;
+			free_bootmem((unsigned long)(*config), size);
+		}
+		if (res && *res) {
+			size = sizeof(struct celleb_pci_resource);
+			free_bootmem((unsigned long)(*res), size);
+		}
+	}
+
+	return 1;
+}
+
+static int __init phb_set_bus_ranges(struct device_node *dev,
+				     struct pci_controller *phb)
+{
+	const int *bus_range;
+	unsigned int len;
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int))
+		return 1;
+
+	phb->first_busno = bus_range[0];
+	phb->last_busno = bus_range[1];
+
+	return 0;
+}
+
+static void __init celleb_alloc_private_mem(struct pci_controller *hose)
+{
+	hose->private_data =
+		alloc_maybe_bootmem(sizeof(struct celleb_pci_private),
+			GFP_KERNEL);
+}
+
+static int __init celleb_setup_fake_pci(struct device_node *dev,
+					struct pci_controller *phb)
+{
+	struct device_node *node;
+
+	phb->ops = &celleb_fake_pci_ops;
+	celleb_alloc_private_mem(phb);
+
+	for (node = of_get_next_child(dev, NULL);
+	     node != NULL; node = of_get_next_child(dev, node))
+		celleb_setup_fake_pci_device(node, phb);
+
+	return 0;
+}
+
+static struct celleb_phb_spec celleb_fake_pci_spec __initdata = {
+	.setup = celleb_setup_fake_pci,
+};
+
+static struct of_device_id celleb_phb_match[] __initdata = {
+	{
+		.name = "pci-pseudo",
+		.data = &celleb_fake_pci_spec,
+	}, {
+		.name = "epci",
+		.data = &celleb_epci_spec,
+	}, {
+		.name = "pcie",
+		.data = &celleb_pciex_spec,
+	}, {
+	},
+};
+
+static int __init celleb_io_workaround_init(struct pci_controller *phb,
+					    struct celleb_phb_spec *phb_spec)
+{
+	if (phb_spec->ops) {
+		iowa_register_bus(phb, phb_spec->ops, phb_spec->iowa_init,
+				  phb_spec->iowa_data);
+		io_workaround_init();
+	}
+
+	return 0;
+}
+
+int __init celleb_setup_phb(struct pci_controller *phb)
+{
+	struct device_node *dev = phb->dn;
+	const struct of_device_id *match;
+	struct celleb_phb_spec *phb_spec;
+	int rc;
+
+	match = of_match_node(celleb_phb_match, dev);
+	if (!match)
+		return 1;
+
+	phb_set_bus_ranges(dev, phb);
+	phb->buid = 1;
+
+	phb_spec = match->data;
+	rc = (*phb_spec->setup)(dev, phb);
+	if (rc)
+		return 1;
+
+	return celleb_io_workaround_init(phb, phb_spec);
+}
+
+int celleb_pci_probe_mode(struct pci_bus *bus)
+{
+	return PCI_PROBE_DEVTREE;
+}
diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
new file mode 100644
index 0000000..4cba152
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_pci.h
@@ -0,0 +1,45 @@
+/*
+ * pci prototypes for Celleb platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _CELLEB_PCI_H
+#define _CELLEB_PCI_H
+
+#include <linux/pci.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <asm/ppc-pci.h>
+
+#include "io-workarounds.h"
+
+struct celleb_phb_spec {
+	int (*setup)(struct device_node *, struct pci_controller *);
+	struct ppc_pci_io *ops;
+	int (*iowa_init)(struct iowa_bus *, void *);
+	void *iowa_data;
+};
+
+extern int celleb_setup_phb(struct pci_controller *);
+extern int celleb_pci_probe_mode(struct pci_bus *);
+
+extern struct celleb_phb_spec celleb_epci_spec;
+extern struct celleb_phb_spec celleb_pciex_spec;
+
+#endif /* _CELLEB_PCI_H */
diff --git a/arch/powerpc/platforms/cell/celleb_scc.h b/arch/powerpc/platforms/cell/celleb_scc.h
new file mode 100644
index 0000000..b596a71
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc.h
@@ -0,0 +1,232 @@
+/*
+ * SCC (Super Companion Chip) definitions
+ *
+ * (C) Copyright 2004-2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _CELLEB_SCC_H
+#define _CELLEB_SCC_H
+
+#define PCI_VENDOR_ID_TOSHIBA_2                 0x102f
+#define PCI_DEVICE_ID_TOSHIBA_SCC_PCIEXC_BRIDGE 0x01b0
+#define PCI_DEVICE_ID_TOSHIBA_SCC_EPCI_BRIDGE   0x01b1
+#define PCI_DEVICE_ID_TOSHIBA_SCC_BRIDGE        0x01b2
+#define PCI_DEVICE_ID_TOSHIBA_SCC_GBE           0x01b3
+#define PCI_DEVICE_ID_TOSHIBA_SCC_ATA           0x01b4
+#define PCI_DEVICE_ID_TOSHIBA_SCC_USB2          0x01b5
+#define PCI_DEVICE_ID_TOSHIBA_SCC_USB           0x01b6
+#define PCI_DEVICE_ID_TOSHIBA_SCC_ENCDEC        0x01b7
+
+#define SCC_EPCI_REG            0x0000d000
+
+/* EPCI registers */
+#define SCC_EPCI_CNF10_REG      0x010
+#define SCC_EPCI_CNF14_REG      0x014
+#define SCC_EPCI_CNF18_REG      0x018
+#define SCC_EPCI_PVBAT          0x100
+#define SCC_EPCI_VPMBAT         0x104
+#define SCC_EPCI_VPIBAT         0x108
+#define SCC_EPCI_VCSR           0x110
+#define SCC_EPCI_VIENAB         0x114
+#define SCC_EPCI_VISTAT         0x118
+#define SCC_EPCI_VRDCOUNT       0x124
+#define SCC_EPCI_BAM0           0x12c
+#define SCC_EPCI_BAM1           0x134
+#define SCC_EPCI_BAM2           0x13c
+#define SCC_EPCI_IADR           0x164
+#define SCC_EPCI_CLKRST         0x800
+#define SCC_EPCI_INTSET         0x804
+#define SCC_EPCI_STATUS         0x808
+#define SCC_EPCI_ABTSET         0x80c
+#define SCC_EPCI_WATRP          0x810
+#define SCC_EPCI_DUMYRADR       0x814
+#define SCC_EPCI_SWRESP         0x818
+#define SCC_EPCI_CNTOPT         0x81c
+#define SCC_EPCI_ECMODE         0xf00
+#define SCC_EPCI_IOM_AC_NUM     5
+#define SCC_EPCI_IOM_ACTE(n)    (0xf10 + (n) * 4)
+#define SCC_EPCI_IOT_AC_NUM     4
+#define SCC_EPCI_IOT_ACTE(n)    (0xf30 + (n) * 4)
+#define SCC_EPCI_MAEA           0xf50
+#define SCC_EPCI_MAEC           0xf54
+#define SCC_EPCI_CKCTRL         0xff0
+
+/* bits for SCC_EPCI_VCSR */
+#define SCC_EPCI_VCSR_FRE       0x00020000
+#define SCC_EPCI_VCSR_FWE       0x00010000
+#define SCC_EPCI_VCSR_DR        0x00000400
+#define SCC_EPCI_VCSR_SR        0x00000008
+#define SCC_EPCI_VCSR_AT        0x00000004
+
+/* bits for SCC_EPCI_VIENAB/SCC_EPCI_VISTAT */
+#define SCC_EPCI_VISTAT_PMPE    0x00000008
+#define SCC_EPCI_VISTAT_PMFE    0x00000004
+#define SCC_EPCI_VISTAT_PRA     0x00000002
+#define SCC_EPCI_VISTAT_PRD     0x00000001
+#define SCC_EPCI_VISTAT_ALL     0x0000000f
+
+#define SCC_EPCI_VIENAB_PMPEE   0x00000008
+#define SCC_EPCI_VIENAB_PMFEE   0x00000004
+#define SCC_EPCI_VIENAB_PRA     0x00000002
+#define SCC_EPCI_VIENAB_PRD     0x00000001
+#define SCC_EPCI_VIENAB_ALL     0x0000000f
+
+/* bits for SCC_EPCI_CLKRST */
+#define SCC_EPCI_CLKRST_CKS_MASK 0x00030000
+#define SCC_EPCI_CLKRST_CKS_2   0x00000000
+#define SCC_EPCI_CLKRST_CKS_4   0x00010000
+#define SCC_EPCI_CLKRST_CKS_8   0x00020000
+#define SCC_EPCI_CLKRST_PCICRST 0x00000400
+#define SCC_EPCI_CLKRST_BC      0x00000200
+#define SCC_EPCI_CLKRST_PCIRST  0x00000100
+#define SCC_EPCI_CLKRST_PCKEN   0x00000001
+
+/* bits for SCC_EPCI_INTSET/SCC_EPCI_STATUS */
+#define SCC_EPCI_INT_2M         0x01000000
+#define SCC_EPCI_INT_RERR       0x00200000
+#define SCC_EPCI_INT_SERR       0x00100000
+#define SCC_EPCI_INT_PRTER      0x00080000
+#define SCC_EPCI_INT_SER        0x00040000
+#define SCC_EPCI_INT_PER        0x00020000
+#define SCC_EPCI_INT_PAI        0x00010000
+#define SCC_EPCI_INT_1M         0x00000100
+#define SCC_EPCI_INT_PME        0x00000010
+#define SCC_EPCI_INT_INTD       0x00000008
+#define SCC_EPCI_INT_INTC       0x00000004
+#define SCC_EPCI_INT_INTB       0x00000002
+#define SCC_EPCI_INT_INTA       0x00000001
+#define SCC_EPCI_INT_DEVINT     0x0000000f
+#define SCC_EPCI_INT_ALL        0x003f001f
+#define SCC_EPCI_INT_ALLERR     0x003f0000
+
+/* bits for SCC_EPCI_CKCTRL */
+#define SCC_EPCI_CKCTRL_CRST0   0x00010000
+#define SCC_EPCI_CKCTRL_CRST1   0x00020000
+#define SCC_EPCI_CKCTRL_OCLKEN  0x00000100
+#define SCC_EPCI_CKCTRL_LCLKEN  0x00000001
+
+#define SCC_EPCI_IDSEL_AD_TO_SLOT(ad)       ((ad) - 10)
+#define SCC_EPCI_MAX_DEVNU      SCC_EPCI_IDSEL_AD_TO_SLOT(32)
+
+/* bits for SCC_EPCI_CNTOPT */
+#define SCC_EPCI_CNTOPT_O2PMB   0x00000002
+
+/* SCC PCIEXC SMMIO registers */
+#define PEXCADRS		0x000
+#define PEXCWDATA		0x004
+#define PEXCRDATA		0x008
+#define PEXDADRS		0x010
+#define PEXDCMND		0x014
+#define PEXDWDATA		0x018
+#define PEXDRDATA		0x01c
+#define PEXREQID		0x020
+#define PEXTIDMAP		0x024
+#define PEXINTMASK		0x028
+#define PEXINTSTS		0x02c
+#define PEXAERRMASK		0x030
+#define PEXAERRSTS		0x034
+#define PEXPRERRMASK		0x040
+#define PEXPRERRSTS		0x044
+#define PEXPRERRID01		0x048
+#define PEXPRERRID23		0x04c
+#define PEXVDMASK		0x050
+#define PEXVDSTS		0x054
+#define PEXRCVCPLIDA		0x060
+#define PEXLENERRIDA		0x068
+#define PEXPHYPLLST		0x070
+#define PEXDMRDEN0		0x100
+#define PEXDMRDADR0		0x104
+#define PEXDMRDENX		0x110
+#define PEXDMRDADRX		0x114
+#define PEXECMODE		0xf00
+#define PEXMAEA(n)		(0xf50 + (8 * n))
+#define PEXMAEC(n)		(0xf54 + (8 * n))
+#define PEXCCRCTRL		0xff0
+
+/* SCC PCIEXC bits and shifts for PEXCADRS */
+#define PEXCADRS_BYTE_EN_SHIFT		20
+#define PEXCADRS_CMD_SHIFT		16
+#define PEXCADRS_CMD_READ		(0xa << PEXCADRS_CMD_SHIFT)
+#define PEXCADRS_CMD_WRITE		(0xb << PEXCADRS_CMD_SHIFT)
+
+/* SCC PCIEXC shifts for PEXDADRS */
+#define PEXDADRS_BUSNO_SHIFT		20
+#define PEXDADRS_DEVNO_SHIFT		15
+#define PEXDADRS_FUNCNO_SHIFT		12
+
+/* SCC PCIEXC bits and shifts for PEXDCMND */
+#define PEXDCMND_BYTE_EN_SHIFT		4
+#define PEXDCMND_IO_READ		0x2
+#define PEXDCMND_IO_WRITE		0x3
+#define PEXDCMND_CONFIG_READ		0xa
+#define PEXDCMND_CONFIG_WRITE		0xb
+
+/* SCC PCIEXC bits for PEXPHYPLLST */
+#define PEXPHYPLLST_PEXPHYAPLLST	0x00000001
+
+/* SCC PCIEXC bits for PEXECMODE */
+#define PEXECMODE_ALL_THROUGH		0x00000000
+#define PEXECMODE_ALL_8BIT		0x00550155
+#define PEXECMODE_ALL_16BIT		0x00aa02aa
+
+/* SCC PCIEXC bits for PEXCCRCTRL */
+#define PEXCCRCTRL_PEXIPCOREEN		0x00040000
+#define PEXCCRCTRL_PEXIPCONTEN		0x00020000
+#define PEXCCRCTRL_PEXPHYPLLEN		0x00010000
+#define PEXCCRCTRL_PCIEXCAOCKEN		0x00000100
+
+/* SCC PCIEXC port configuration registers */
+#define PEXTCERRCHK		0x21c
+#define PEXTAMAPB0		0x220
+#define PEXTAMAPL0		0x224
+#define PEXTAMAPB(n)		(PEXTAMAPB0 + 8 * (n))
+#define PEXTAMAPL(n)		(PEXTAMAPL0 + 8 * (n))
+#define PEXCHVC0P		0x500
+#define PEXCHVC0NP		0x504
+#define PEXCHVC0C		0x508
+#define PEXCDVC0P		0x50c
+#define PEXCDVC0NP		0x510
+#define PEXCDVC0C		0x514
+#define PEXCHVCXP		0x518
+#define PEXCHVCXNP		0x51c
+#define PEXCHVCXC		0x520
+#define PEXCDVCXP		0x524
+#define PEXCDVCXNP		0x528
+#define PEXCDVCXC		0x52c
+#define PEXCTTRG		0x530
+#define PEXTSCTRL		0x700
+#define PEXTSSTS		0x704
+#define PEXSKPCTRL		0x708
+
+/* UHC registers */
+#define SCC_UHC_CKRCTRL         0xff0
+#define SCC_UHC_ECMODE          0xf00
+
+/* bits for SCC_UHC_CKRCTRL */
+#define SCC_UHC_F48MCKLEN       0x00000001
+#define SCC_UHC_P_SUSPEND       0x00000002
+#define SCC_UHC_PHY_SUSPEND_SEL 0x00000004
+#define SCC_UHC_HCLKEN          0x00000100
+#define SCC_UHC_USBEN           0x00010000
+#define SCC_UHC_USBCEN          0x00020000
+#define SCC_UHC_PHYEN           0x00040000
+
+/* bits for SCC_UHC_ECMODE */
+#define SCC_UHC_ECMODE_BY_BYTE  0x00000555
+#define SCC_UHC_ECMODE_BY_WORD  0x00000aaa
+
+#endif /* _CELLEB_SCC_H */
diff --git a/arch/powerpc/platforms/cell/celleb_scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c
new file mode 100644
index 0000000..08c285b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_epci.c
@@ -0,0 +1,438 @@
+/*
+ * Support for SCC external PCI
+ *
+ * (C) Copyright 2004-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/pci_regs.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+#include "celleb_scc.h"
+#include "celleb_pci.h"
+
+#define MAX_PCI_DEVICES   32
+#define MAX_PCI_FUNCTIONS  8
+
+#define iob()  __asm__ __volatile__("eieio; sync":::"memory")
+
+static inline PCI_IO_ADDR celleb_epci_get_epci_base(
+					struct pci_controller *hose)
+{
+	/*
+	 * Note:
+	 * Celleb epci uses cfg_addr as a base address for
+	 * epci control registers.
+	 */
+
+	return hose->cfg_addr;
+}
+
+static inline PCI_IO_ADDR celleb_epci_get_epci_cfg(
+					struct pci_controller *hose)
+{
+	/*
+	 * Note:
+	 * Celleb epci uses cfg_data as a base address for
+	 * configuration area for epci devices.
+	 */
+
+	return hose->cfg_data;
+}
+
+static inline void clear_and_disable_master_abort_interrupt(
+					struct pci_controller *hose)
+{
+	PCI_IO_ADDR epci_base;
+	PCI_IO_ADDR reg;
+	epci_base = celleb_epci_get_epci_base(hose);
+	reg = epci_base + PCI_COMMAND;
+	out_be32(reg, in_be32(reg) | (PCI_STATUS_REC_MASTER_ABORT << 16));
+}
+
+static int celleb_epci_check_abort(struct pci_controller *hose,
+				   PCI_IO_ADDR addr)
+{
+	PCI_IO_ADDR reg;
+	PCI_IO_ADDR epci_base;
+	u32 val;
+
+	iob();
+	epci_base = celleb_epci_get_epci_base(hose);
+
+	reg = epci_base + PCI_COMMAND;
+	val = in_be32(reg);
+
+	if (val & (PCI_STATUS_REC_MASTER_ABORT << 16)) {
+		out_be32(reg,
+			 (val & 0xffff) | (PCI_STATUS_REC_MASTER_ABORT << 16));
+
+		/* clear PCI Controller error, FRE, PMFE */
+		reg = epci_base + SCC_EPCI_STATUS;
+		out_be32(reg, SCC_EPCI_INT_PAI);
+
+		reg = epci_base + SCC_EPCI_VCSR;
+		val = in_be32(reg) & 0xffff;
+		val |= SCC_EPCI_VCSR_FRE;
+		out_be32(reg, val);
+
+		reg = epci_base + SCC_EPCI_VISTAT;
+		out_be32(reg, SCC_EPCI_VISTAT_PMFE);
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static PCI_IO_ADDR celleb_epci_make_config_addr(struct pci_bus *bus,
+		struct pci_controller *hose, unsigned int devfn, int where)
+{
+	PCI_IO_ADDR addr;
+
+	if (bus != hose->bus)
+		addr = celleb_epci_get_epci_cfg(hose) +
+		       (((bus->number & 0xff) << 16)
+			| ((devfn & 0xff) << 8)
+			| (where & 0xff)
+			| 0x01000000);
+	else
+		addr = celleb_epci_get_epci_cfg(hose) +
+		       (((devfn & 0xff) << 8) | (where & 0xff));
+
+	pr_debug("EPCI: config_addr = 0x%p\n", addr);
+
+	return addr;
+}
+
+static int celleb_epci_read_config(struct pci_bus *bus,
+			unsigned int devfn, int where, int size, u32 *val)
+{
+	PCI_IO_ADDR epci_base;
+	PCI_IO_ADDR addr;
+	struct device_node *node;
+	struct pci_controller *hose;
+
+	/* allignment check */
+	BUG_ON(where % size);
+
+	node = (struct device_node *)bus->sysdata;
+	hose = pci_find_hose_for_OF_device(node);
+
+	if (!celleb_epci_get_epci_cfg(hose))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (bus->number == hose->first_busno && devfn == 0) {
+		/* EPCI controller self */
+
+		epci_base = celleb_epci_get_epci_base(hose);
+		addr = epci_base + where;
+
+		switch (size) {
+		case 1:
+			*val = in_8(addr);
+			break;
+		case 2:
+			*val = in_be16(addr);
+			break;
+		case 4:
+			*val = in_be32(addr);
+			break;
+		default:
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+
+	} else {
+
+		clear_and_disable_master_abort_interrupt(hose);
+		addr = celleb_epci_make_config_addr(bus, hose, devfn, where);
+
+		switch (size) {
+		case 1:
+			*val = in_8(addr);
+			break;
+		case 2:
+			*val = in_le16(addr);
+			break;
+		case 4:
+			*val = in_le32(addr);
+			break;
+		default:
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+	}
+
+	pr_debug("EPCI: "
+		 "addr=0x%p, devfn=0x%x, where=0x%x, size=0x%x, val=0x%x\n",
+		 addr, devfn, where, size, *val);
+
+	return celleb_epci_check_abort(hose, NULL);
+}
+
+static int celleb_epci_write_config(struct pci_bus *bus,
+			unsigned int devfn, int where, int size, u32 val)
+{
+	PCI_IO_ADDR epci_base;
+	PCI_IO_ADDR addr;
+	struct device_node *node;
+	struct pci_controller *hose;
+
+	/* allignment check */
+	BUG_ON(where % size);
+
+	node = (struct device_node *)bus->sysdata;
+	hose = pci_find_hose_for_OF_device(node);
+
+
+	if (!celleb_epci_get_epci_cfg(hose))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (bus->number == hose->first_busno && devfn == 0) {
+		/* EPCI controller self */
+
+		epci_base = celleb_epci_get_epci_base(hose);
+		addr = epci_base + where;
+
+		switch (size) {
+		case 1:
+			out_8(addr, val);
+			break;
+		case 2:
+			out_be16(addr, val);
+			break;
+		case 4:
+			out_be32(addr, val);
+			break;
+		default:
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+
+	} else {
+
+		clear_and_disable_master_abort_interrupt(hose);
+		addr = celleb_epci_make_config_addr(bus, hose, devfn, where);
+
+		switch (size) {
+		case 1:
+			out_8(addr, val);
+			break;
+		case 2:
+			out_le16(addr, val);
+			break;
+		case 4:
+			out_le32(addr, val);
+			break;
+		default:
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+	}
+
+	return celleb_epci_check_abort(hose, addr);
+}
+
+struct pci_ops celleb_epci_ops = {
+	.read = celleb_epci_read_config,
+	.write = celleb_epci_write_config,
+};
+
+/* to be moved in FW */
+static int __init celleb_epci_init(struct pci_controller *hose)
+{
+	u32 val;
+	PCI_IO_ADDR reg;
+	PCI_IO_ADDR epci_base;
+	int hwres = 0;
+
+	epci_base = celleb_epci_get_epci_base(hose);
+
+	/* PCI core reset(Internal bus and PCI clock) */
+	reg = epci_base + SCC_EPCI_CKCTRL;
+	val = in_be32(reg);
+	if (val == 0x00030101)
+		hwres = 1;
+	else {
+		val &= ~(SCC_EPCI_CKCTRL_CRST0 | SCC_EPCI_CKCTRL_CRST1);
+		out_be32(reg, val);
+
+		/* set PCI core clock */
+		val = in_be32(reg);
+		val |= (SCC_EPCI_CKCTRL_OCLKEN | SCC_EPCI_CKCTRL_LCLKEN);
+		out_be32(reg, val);
+
+		/* release PCI core reset (internal bus) */
+		val = in_be32(reg);
+		val |= SCC_EPCI_CKCTRL_CRST0;
+		out_be32(reg, val);
+
+		/* set PCI clock select */
+		reg = epci_base + SCC_EPCI_CLKRST;
+		val = in_be32(reg);
+		val &= ~SCC_EPCI_CLKRST_CKS_MASK;
+		val |= SCC_EPCI_CLKRST_CKS_2;
+		out_be32(reg, val);
+
+		/* set arbiter */
+		reg = epci_base + SCC_EPCI_ABTSET;
+		out_be32(reg, 0x0f1f001f);	/* temporary value */
+
+		/* buffer on */
+		reg = epci_base + SCC_EPCI_CLKRST;
+		val = in_be32(reg);
+		val |= SCC_EPCI_CLKRST_BC;
+		out_be32(reg, val);
+
+		/* PCI clock enable */
+		val = in_be32(reg);
+		val |= SCC_EPCI_CLKRST_PCKEN;
+		out_be32(reg, val);
+
+		/* release PCI core reset (all) */
+		reg = epci_base + SCC_EPCI_CKCTRL;
+		val = in_be32(reg);
+		val |= (SCC_EPCI_CKCTRL_CRST0 | SCC_EPCI_CKCTRL_CRST1);
+		out_be32(reg, val);
+
+		/* set base translation registers. (already set by Beat) */
+
+		/* set base address masks. (already set by Beat) */
+	}
+
+	/* release interrupt masks and clear all interrupts */
+	reg = epci_base + SCC_EPCI_INTSET;
+	out_be32(reg, 0x013f011f);	/* all interrupts enable */
+	reg = epci_base + SCC_EPCI_VIENAB;
+	val = SCC_EPCI_VIENAB_PMPEE | SCC_EPCI_VIENAB_PMFEE;
+	out_be32(reg, val);
+	reg = epci_base + SCC_EPCI_STATUS;
+	out_be32(reg, 0xffffffff);
+	reg = epci_base + SCC_EPCI_VISTAT;
+	out_be32(reg, 0xffffffff);
+
+	/* disable PCI->IB address translation */
+	reg = epci_base + SCC_EPCI_VCSR;
+	val = in_be32(reg);
+	val &= ~(SCC_EPCI_VCSR_DR | SCC_EPCI_VCSR_AT);
+	out_be32(reg, val);
+
+	/* set base addresses. (no need to set?) */
+
+	/* memory space, bus master enable */
+	reg = epci_base + PCI_COMMAND;
+	val = PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+	out_be32(reg, val);
+
+	/* endian mode setup */
+	reg = epci_base + SCC_EPCI_ECMODE;
+	val = 0x00550155;
+	out_be32(reg, val);
+
+	/* set control option */
+	reg = epci_base + SCC_EPCI_CNTOPT;
+	val = in_be32(reg);
+	val |= SCC_EPCI_CNTOPT_O2PMB;
+	out_be32(reg, val);
+
+	/* XXX: temporay: set registers for address conversion setup */
+	reg = epci_base + SCC_EPCI_CNF10_REG;
+	out_be32(reg, 0x80000008);
+	reg = epci_base + SCC_EPCI_CNF14_REG;
+	out_be32(reg, 0x40000008);
+
+	reg = epci_base + SCC_EPCI_BAM0;
+	out_be32(reg, 0x80000000);
+	reg = epci_base + SCC_EPCI_BAM1;
+	out_be32(reg, 0xe0000000);
+
+	reg = epci_base + SCC_EPCI_PVBAT;
+	out_be32(reg, 0x80000000);
+
+	if (!hwres) {
+		/* release external PCI reset */
+		reg = epci_base + SCC_EPCI_CLKRST;
+		val = in_be32(reg);
+		val |= SCC_EPCI_CLKRST_PCIRST;
+		out_be32(reg, val);
+	}
+
+	return 0;
+}
+
+static int __init celleb_setup_epci(struct device_node *node,
+				    struct pci_controller *hose)
+{
+	struct resource r;
+
+	pr_debug("PCI: celleb_setup_epci()\n");
+
+	/*
+	 * Note:
+	 * Celleb epci uses cfg_addr and cfg_data member of
+	 * pci_controller structure in irregular way.
+	 *
+	 * cfg_addr is used to map for control registers of
+	 * celleb epci.
+	 *
+	 * cfg_data is used for configuration area of devices
+	 * on Celleb epci buses.
+	 */
+
+	if (of_address_to_resource(node, 0, &r))
+		goto error;
+	hose->cfg_addr = ioremap(r.start, (r.end - r.start + 1));
+	if (!hose->cfg_addr)
+		goto error;
+	pr_debug("EPCI: cfg_addr map 0x%016lx->0x%016lx + 0x%016lx\n",
+		 r.start, (unsigned long)hose->cfg_addr, (r.end - r.start + 1));
+
+	if (of_address_to_resource(node, 2, &r))
+		goto error;
+	hose->cfg_data = ioremap(r.start, (r.end - r.start + 1));
+	if (!hose->cfg_data)
+		goto error;
+	pr_debug("EPCI: cfg_data map 0x%016lx->0x%016lx + 0x%016lx\n",
+		 r.start, (unsigned long)hose->cfg_data, (r.end - r.start + 1));
+
+	hose->ops = &celleb_epci_ops;
+	celleb_epci_init(hose);
+
+	return 0;
+
+error:
+	if (hose->cfg_addr)
+		iounmap(hose->cfg_addr);
+
+	if (hose->cfg_data)
+		iounmap(hose->cfg_data);
+	return 1;
+}
+
+struct celleb_phb_spec celleb_epci_spec __initdata = {
+	.setup = celleb_setup_epci,
+	.ops = &spiderpci_ops,
+	.iowa_init = &spiderpci_iowa_init,
+	.iowa_data = (void *)0,
+};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
new file mode 100644
index 0000000..3e7e0f1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -0,0 +1,548 @@
+/*
+ * Support for Celleb PCI-Express.
+ *
+ * (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/iommu.h>
+#include <asm/byteorder.h>
+
+#include "celleb_scc.h"
+#include "celleb_pci.h"
+
+#define PEX_IN(base, off)	in_be32((void __iomem *)(base) + (off))
+#define PEX_OUT(base, off, data) out_be32((void __iomem *)(base) + (off), (data))
+
+static void scc_pciex_io_flush(struct iowa_bus *bus)
+{
+	(void)PEX_IN(bus->phb->cfg_addr, PEXDMRDEN0);
+}
+
+/*
+ * Memory space access to device on PCIEX
+ */
+#define PCIEX_MMIO_READ(name, ret)					\
+static ret scc_pciex_##name(const PCI_IO_ADDR addr)			\
+{									\
+	ret val = __do_##name(addr);					\
+	scc_pciex_io_flush(iowa_mem_find_bus(addr));			\
+	return val;							\
+}
+
+#define PCIEX_MMIO_READ_STR(name)					\
+static void scc_pciex_##name(const PCI_IO_ADDR addr, void *buf,		\
+			     unsigned long count)			\
+{									\
+	__do_##name(addr, buf, count);					\
+	scc_pciex_io_flush(iowa_mem_find_bus(addr));			\
+}
+
+PCIEX_MMIO_READ(readb, u8)
+PCIEX_MMIO_READ(readw, u16)
+PCIEX_MMIO_READ(readl, u32)
+PCIEX_MMIO_READ(readq, u64)
+PCIEX_MMIO_READ(readw_be, u16)
+PCIEX_MMIO_READ(readl_be, u32)
+PCIEX_MMIO_READ(readq_be, u64)
+PCIEX_MMIO_READ_STR(readsb)
+PCIEX_MMIO_READ_STR(readsw)
+PCIEX_MMIO_READ_STR(readsl)
+
+static void scc_pciex_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+				    unsigned long n)
+{
+	__do_memcpy_fromio(dest, src, n);
+	scc_pciex_io_flush(iowa_mem_find_bus(src));
+}
+
+/*
+ * I/O port access to devices on PCIEX.
+ */
+
+static inline unsigned long get_bus_address(struct pci_controller *phb,
+					    unsigned long port)
+{
+	return port - ((unsigned long)(phb->io_base_virt) - _IO_BASE);
+}
+
+static u32 scc_pciex_read_port(struct pci_controller *phb,
+			       unsigned long port, int size)
+{
+	unsigned int byte_enable;
+	unsigned int cmd, shift;
+	unsigned long addr;
+	u32 data, ret;
+
+	BUG_ON(((port & 0x3ul) + size) > 4);
+
+	addr = get_bus_address(phb, port);
+	shift = addr & 0x3ul;
+	byte_enable = ((1 << size) - 1) << shift;
+	cmd = PEXDCMND_IO_READ | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
+	PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
+	PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
+	data = PEX_IN(phb->cfg_addr, PEXDRDATA);
+	ret = (data >> (shift * 8)) & (0xFFFFFFFF >> ((4 - size) * 8));
+
+	pr_debug("PCIEX:PIO READ:port=0x%lx, addr=0x%lx, size=%d, be=%x,"
+		 " cmd=%x, data=%x, ret=%x\n", port, addr, size, byte_enable,
+		 cmd, data, ret);
+
+	return ret;
+}
+
+static void scc_pciex_write_port(struct pci_controller *phb,
+				 unsigned long port, int size, u32 val)
+{
+	unsigned int byte_enable;
+	unsigned int cmd, shift;
+	unsigned long addr;
+	u32 data;
+
+	BUG_ON(((port & 0x3ul) + size) > 4);
+
+	addr = get_bus_address(phb, port);
+	shift = addr & 0x3ul;
+	byte_enable = ((1 << size) - 1) << shift;
+	cmd = PEXDCMND_IO_WRITE | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
+	data = (val & (0xFFFFFFFF >> (4 - size) * 8)) << (shift * 8);
+	PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
+	PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
+	PEX_OUT(phb->cfg_addr, PEXDWDATA, data);
+
+	pr_debug("PCIEX:PIO WRITE:port=0x%lx, addr=%lx, size=%d, val=%x,"
+		 " be=%x, cmd=%x, data=%x\n", port, addr, size, val,
+		 byte_enable, cmd, data);
+}
+
+static u8 __scc_pciex_inb(struct pci_controller *phb, unsigned long port)
+{
+	return (u8)scc_pciex_read_port(phb, port, 1);
+}
+
+static u16 __scc_pciex_inw(struct pci_controller *phb, unsigned long port)
+{
+	u32 data;
+	if ((port & 0x3ul) < 3)
+		data = scc_pciex_read_port(phb, port, 2);
+	else {
+		u32 d1 = scc_pciex_read_port(phb, port, 1);
+		u32 d2 = scc_pciex_read_port(phb, port + 1, 1);
+		data = d1 | (d2 << 8);
+	}
+	return (u16)data;
+}
+
+static u32 __scc_pciex_inl(struct pci_controller *phb, unsigned long port)
+{
+	unsigned int mod = port & 0x3ul;
+	u32 data;
+	if (mod == 0)
+		data = scc_pciex_read_port(phb, port, 4);
+	else {
+		u32 d1 = scc_pciex_read_port(phb, port, 4 - mod);
+		u32 d2 = scc_pciex_read_port(phb, port + 1, mod);
+		data = d1 | (d2 << (mod * 8));
+	}
+	return data;
+}
+
+static void __scc_pciex_outb(struct pci_controller *phb,
+			     u8 val, unsigned long port)
+{
+	scc_pciex_write_port(phb, port, 1, (u32)val);
+}
+
+static void __scc_pciex_outw(struct pci_controller *phb,
+			     u16 val, unsigned long port)
+{
+	if ((port & 0x3ul) < 3)
+		scc_pciex_write_port(phb, port, 2, (u32)val);
+	else {
+		u32 d1 = val & 0x000000FF;
+		u32 d2 = (val & 0x0000FF00) >> 8;
+		scc_pciex_write_port(phb, port, 1, d1);
+		scc_pciex_write_port(phb, port + 1, 1, d2);
+	}
+}
+
+static void __scc_pciex_outl(struct pci_controller *phb,
+			     u32 val, unsigned long port)
+{
+	unsigned int mod = port & 0x3ul;
+	if (mod == 0)
+		scc_pciex_write_port(phb, port, 4, val);
+	else {
+		u32 d1 = val & (0xFFFFFFFFul >> (mod * 8));
+		u32 d2 = val >> ((4 - mod) * 8);
+		scc_pciex_write_port(phb, port, 4 - mod, d1);
+		scc_pciex_write_port(phb, port + 1, mod, d2);
+	}
+}
+
+#define PCIEX_PIO_FUNC(size, name)					\
+static u##size scc_pciex_in##name(unsigned long port)			\
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(port);			\
+	u##size data = __scc_pciex_in##name(bus->phb, port);		\
+	scc_pciex_io_flush(bus);					\
+	return data;							\
+}									\
+static void scc_pciex_ins##name(unsigned long p, void *b, unsigned long c) \
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(p);			\
+	__le##size *dst = b;						\
+	for (; c != 0; c--, dst++)					\
+		*dst = cpu_to_le##size(__scc_pciex_in##name(bus->phb, p)); \
+	scc_pciex_io_flush(bus);					\
+}									\
+static void scc_pciex_out##name(u##size val, unsigned long port)	\
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(port);			\
+	__scc_pciex_out##name(bus->phb, val, port);			\
+}									\
+static void scc_pciex_outs##name(unsigned long p, const void *b,	\
+				 unsigned long c)			\
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(p);			\
+	const __le##size *src = b;					\
+	for (; c != 0; c--, src++)					\
+		__scc_pciex_out##name(bus->phb, le##size##_to_cpu(*src), p); \
+}
+#define __le8 u8
+#define cpu_to_le8(x) (x)
+#define le8_to_cpu(x) (x)
+PCIEX_PIO_FUNC(8, b)
+PCIEX_PIO_FUNC(16, w)
+PCIEX_PIO_FUNC(32, l)
+
+static struct ppc_pci_io scc_pciex_ops = {
+	.readb = scc_pciex_readb,
+	.readw = scc_pciex_readw,
+	.readl = scc_pciex_readl,
+	.readq = scc_pciex_readq,
+	.readw_be = scc_pciex_readw_be,
+	.readl_be = scc_pciex_readl_be,
+	.readq_be = scc_pciex_readq_be,
+	.readsb = scc_pciex_readsb,
+	.readsw = scc_pciex_readsw,
+	.readsl = scc_pciex_readsl,
+	.memcpy_fromio = scc_pciex_memcpy_fromio,
+	.inb = scc_pciex_inb,
+	.inw = scc_pciex_inw,
+	.inl = scc_pciex_inl,
+	.outb = scc_pciex_outb,
+	.outw = scc_pciex_outw,
+	.outl = scc_pciex_outl,
+	.insb = scc_pciex_insb,
+	.insw = scc_pciex_insw,
+	.insl = scc_pciex_insl,
+	.outsb = scc_pciex_outsb,
+	.outsw = scc_pciex_outsw,
+	.outsl = scc_pciex_outsl,
+};
+
+static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data)
+{
+	dma_addr_t dummy_page_da;
+	void *dummy_page_va;
+
+	dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dummy_page_va) {
+		pr_err("PCIEX:Alloc dummy_page_va failed\n");
+		return -1;
+	}
+
+	dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va,
+				       PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(bus->phb->parent, dummy_page_da)) {
+		pr_err("PCIEX:Map dummy page failed.\n");
+		kfree(dummy_page_va);
+		return -1;
+	}
+
+	PEX_OUT(bus->phb->cfg_addr, PEXDMRDADR0, dummy_page_da);
+
+	return 0;
+}
+
+/*
+ * config space access
+ */
+#define MK_PEXDADRS(bus_no, dev_no, func_no, addr) \
+	((uint32_t)(((addr) & ~0x3UL) | \
+	((bus_no) << PEXDADRS_BUSNO_SHIFT) | \
+	((dev_no)  << PEXDADRS_DEVNO_SHIFT) | \
+	((func_no) << PEXDADRS_FUNCNO_SHIFT)))
+
+#define MK_PEXDCMND_BYTE_EN(addr, size) \
+	((((0x1 << (size))-1) << ((addr) & 0x3)) << PEXDCMND_BYTE_EN_SHIFT)
+#define MK_PEXDCMND(cmd, addr, size) ((cmd) | MK_PEXDCMND_BYTE_EN(addr, size))
+
+static uint32_t config_read_pciex_dev(unsigned int __iomem *base,
+		uint64_t bus_no, uint64_t dev_no, uint64_t func_no,
+		uint64_t off, uint64_t size)
+{
+	uint32_t ret;
+	uint32_t addr, cmd;
+
+	addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
+	cmd = MK_PEXDCMND(PEXDCMND_CONFIG_READ, off, size);
+	PEX_OUT(base, PEXDADRS, addr);
+	PEX_OUT(base, PEXDCMND, cmd);
+	ret = (PEX_IN(base, PEXDRDATA)
+		>> ((off & (4-size)) * 8)) & ((0x1 << (size * 8)) - 1);
+	return ret;
+}
+
+static void config_write_pciex_dev(unsigned int __iomem *base, uint64_t bus_no,
+	uint64_t dev_no, uint64_t func_no, uint64_t off, uint64_t size,
+	uint32_t data)
+{
+	uint32_t addr, cmd;
+
+	addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
+	cmd = MK_PEXDCMND(PEXDCMND_CONFIG_WRITE, off, size);
+	PEX_OUT(base, PEXDADRS, addr);
+	PEX_OUT(base, PEXDCMND, cmd);
+	PEX_OUT(base, PEXDWDATA,
+		(data & ((0x1 << (size * 8)) - 1)) << ((off & (4-size)) * 8));
+}
+
+#define MK_PEXCADRS_BYTE_EN(off, len) \
+	((((0x1 << (len)) - 1) << ((off) & 0x3)) << PEXCADRS_BYTE_EN_SHIFT)
+#define MK_PEXCADRS(cmd, addr, size) \
+	((cmd) | MK_PEXCADRS_BYTE_EN(addr, size) | ((addr) & ~0x3))
+static uint32_t config_read_pciex_rc(unsigned int __iomem *base,
+				     uint32_t where, uint32_t size)
+{
+	PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_READ, where, size));
+	return (PEX_IN(base, PEXCRDATA)
+		>> ((where & (4 - size)) * 8)) & ((0x1 << (size * 8)) - 1);
+}
+
+static void config_write_pciex_rc(unsigned int __iomem *base, uint32_t where,
+				  uint32_t size, uint32_t val)
+{
+	uint32_t data;
+
+	data = (val & ((0x1 << (size * 8)) - 1)) << ((where & (4 - size)) * 8);
+	PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_WRITE, where, size));
+	PEX_OUT(base, PEXCWDATA, data);
+}
+
+/* Interfaces */
+/* Note: Work-around
+ *  On SCC PCIEXC, one device is seen on all 32 dev_no.
+ *  As SCC PCIEXC can have only one device on the bus, we look only one dev_no.
+ * (dev_no = 1)
+ */
+static int scc_pciex_read_config(struct pci_bus *bus, unsigned int devfn,
+				 int where, int size, unsigned int *val)
+{
+	struct device_node *dn;
+	struct pci_controller *phb;
+
+	dn = bus->sysdata;
+	phb = pci_find_hose_for_OF_device(dn);
+
+	if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1) {
+		*val = ~0;
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	if (bus->number == 0 && PCI_SLOT(devfn) == 0)
+		*val = config_read_pciex_rc(phb->cfg_addr, where, size);
+	else
+		*val = config_read_pciex_dev(phb->cfg_addr, bus->number,
+				PCI_SLOT(devfn), PCI_FUNC(devfn), where, size);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int scc_pciex_write_config(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, unsigned int val)
+{
+	struct device_node *dn;
+	struct pci_controller *phb;
+
+	dn = bus->sysdata;
+	phb = pci_find_hose_for_OF_device(dn);
+
+	if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (bus->number == 0 && PCI_SLOT(devfn) == 0)
+		config_write_pciex_rc(phb->cfg_addr, where, size, val);
+	else
+		config_write_pciex_dev(phb->cfg_addr, bus->number,
+			PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops scc_pciex_pci_ops = {
+	scc_pciex_read_config,
+	scc_pciex_write_config,
+};
+
+static void pciex_clear_intr_all(unsigned int __iomem *base)
+{
+	PEX_OUT(base, PEXAERRSTS, 0xffffffff);
+	PEX_OUT(base, PEXPRERRSTS, 0xffffffff);
+	PEX_OUT(base, PEXINTSTS, 0xffffffff);
+}
+
+#if 0
+static void pciex_disable_intr_all(unsigned int *base)
+{
+	PEX_OUT(base, PEXINTMASK,   0x0);
+	PEX_OUT(base, PEXAERRMASK,  0x0);
+	PEX_OUT(base, PEXPRERRMASK, 0x0);
+	PEX_OUT(base, PEXVDMASK,    0x0);
+}
+#endif
+
+static void pciex_enable_intr_all(unsigned int __iomem *base)
+{
+	PEX_OUT(base, PEXINTMASK, 0x0000e7f1);
+	PEX_OUT(base, PEXAERRMASK, 0x03ff01ff);
+	PEX_OUT(base, PEXPRERRMASK, 0x0001010f);
+	PEX_OUT(base, PEXVDMASK, 0x00000001);
+}
+
+static void pciex_check_status(unsigned int __iomem *base)
+{
+	uint32_t err = 0;
+	uint32_t intsts, aerr, prerr, rcvcp, lenerr;
+	uint32_t maea, maec;
+
+	intsts = PEX_IN(base, PEXINTSTS);
+	aerr = PEX_IN(base, PEXAERRSTS);
+	prerr = PEX_IN(base, PEXPRERRSTS);
+	rcvcp = PEX_IN(base, PEXRCVCPLIDA);
+	lenerr = PEX_IN(base, PEXLENERRIDA);
+
+	if (intsts || aerr || prerr || rcvcp || lenerr)
+		err = 1;
+
+	pr_info("PCEXC interrupt!!\n");
+	pr_info("PEXINTSTS    :0x%08x\n", intsts);
+	pr_info("PEXAERRSTS   :0x%08x\n", aerr);
+	pr_info("PEXPRERRSTS  :0x%08x\n", prerr);
+	pr_info("PEXRCVCPLIDA :0x%08x\n", rcvcp);
+	pr_info("PEXLENERRIDA :0x%08x\n", lenerr);
+
+	/* print detail of Protection Error */
+	if (intsts & 0x00004000) {
+		uint32_t i, n;
+		for (i = 0; i < 4; i++) {
+			n = 1 << i;
+			if (prerr & n) {
+				maea = PEX_IN(base, PEXMAEA(i));
+				maec = PEX_IN(base, PEXMAEC(i));
+				pr_info("PEXMAEC%d     :0x%08x\n", i, maec);
+				pr_info("PEXMAEA%d     :0x%08x\n", i, maea);
+			}
+		}
+	}
+
+	if (err)
+		pciex_clear_intr_all(base);
+}
+
+static irqreturn_t pciex_handle_internal_irq(int irq, void *dev_id)
+{
+	struct pci_controller *phb = dev_id;
+
+	pr_debug("PCIEX:pciex_handle_internal_irq(irq=%d)\n", irq);
+
+	BUG_ON(phb->cfg_addr == NULL);
+
+	pciex_check_status(phb->cfg_addr);
+
+	return IRQ_HANDLED;
+}
+
+static __init int celleb_setup_pciex(struct device_node *node,
+				     struct pci_controller *phb)
+{
+	struct resource	r;
+	struct of_irq oirq;
+	int virq;
+
+	/* SMMIO registers; used inside this file */
+	if (of_address_to_resource(node, 0, &r)) {
+		pr_err("PCIEXC:Failed to get config resource.\n");
+		return 1;
+	}
+	phb->cfg_addr = ioremap(r.start, r.end - r.start + 1);
+	if (!phb->cfg_addr) {
+		pr_err("PCIEXC:Failed to remap SMMIO region.\n");
+		return 1;
+	}
+
+	/* Not use cfg_data,  cmd and data regs are near address reg */
+	phb->cfg_data = NULL;
+
+	/* set pci_ops */
+	phb->ops = &scc_pciex_pci_ops;
+
+	/* internal interrupt handler */
+	if (of_irq_map_one(node, 1, &oirq)) {
+		pr_err("PCIEXC:Failed to map irq\n");
+		goto error;
+	}
+	virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
+				     oirq.size);
+	if (request_irq(virq, pciex_handle_internal_irq,
+			IRQF_DISABLED, "pciex", (void *)phb)) {
+		pr_err("PCIEXC:Failed to request irq\n");
+		goto error;
+	}
+
+	/* enable all interrupts */
+	pciex_clear_intr_all(phb->cfg_addr);
+	pciex_enable_intr_all(phb->cfg_addr);
+	/* MSI: TBD */
+
+	return 0;
+
+error:
+	phb->cfg_data = NULL;
+	if (phb->cfg_addr)
+		iounmap(phb->cfg_addr);
+	phb->cfg_addr = NULL;
+	return 1;
+}
+
+struct celleb_phb_spec celleb_pciex_spec __initdata = {
+	.setup = celleb_setup_pciex,
+	.ops = &scc_pciex_ops,
+	.iowa_init = &scc_pciex_iowa_init,
+};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c
new file mode 100644
index 0000000..3a16c5b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_sio.c
@@ -0,0 +1,101 @@
+/*
+ * setup serial port in SCC
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/tty.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/console.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+
+/* sio irq0=0xb00010022 irq0=0xb00010023 irq2=0xb00010024
+    mmio=0xfff000-0x1000,0xff2000-0x1000 */
+static int txx9_serial_bitmap __initdata;
+
+static struct {
+	uint32_t offset;
+	uint32_t index;
+} txx9_scc_tab[3] __initdata = {
+	{ 0x300, 0 },	/* 0xFFF300 */
+	{ 0x400, 0 },	/* 0xFFF400 */
+	{ 0x800, 1 }	/* 0xFF2800 */
+};
+
+static int __init txx9_serial_init(void)
+{
+	extern int early_serial_txx9_setup(struct uart_port *port);
+	struct device_node *node = NULL;
+	int i;
+	struct uart_port req;
+	struct of_irq irq;
+	struct resource res;
+
+	while ((node = of_find_compatible_node(node,
+				"serial", "toshiba,sio-scc")) != NULL) {
+		for (i = 0; i < ARRAY_SIZE(txx9_scc_tab); i++) {
+			if (!(txx9_serial_bitmap & (1<<i)))
+				continue;
+
+			if (of_irq_map_one(node, i, &irq))
+				continue;
+			if (of_address_to_resource(node,
+				txx9_scc_tab[i].index, &res))
+				continue;
+
+			memset(&req, 0, sizeof(req));
+			req.line = i;
+			req.iotype = UPIO_MEM;
+			req.mapbase = res.start + txx9_scc_tab[i].offset;
+#ifdef CONFIG_SERIAL_TXX9_CONSOLE
+			req.membase = ioremap(req.mapbase, 0x24);
+#endif
+			req.irq = irq_create_of_mapping(irq.controller,
+				irq.specifier, irq.size);
+			req.flags |= UPF_IOREMAP | UPF_BUGGY_UART
+				/*HAVE_CTS_LINE*/;
+			req.uartclk = 83300000;
+			early_serial_txx9_setup(&req);
+		}
+	}
+
+	return 0;
+}
+
+static int __init txx9_serial_config(char *ptr)
+{
+	int	i;
+
+	for (;;) {
+		switch (get_option(&ptr, &i)) {
+		default:
+			return 0;
+		case 2:
+			txx9_serial_bitmap |= 1 << i;
+			break;
+		case 1:
+			txx9_serial_bitmap |= 1 << i;
+			return 0;
+		}
+	}
+}
+__setup("txx9_serial=", txx9_serial_config);
+
+console_initcall(txx9_serial_init);
diff --git a/arch/powerpc/platforms/cell/celleb_scc_uhc.c b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
new file mode 100644
index 0000000..d63b720
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
@@ -0,0 +1,95 @@
+/*
+ * SCC (Super Companion Chip) UHC setup
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+
+#include "celleb_scc.h"
+
+#define UHC_RESET_WAIT_MAX 10000
+
+static inline int uhc_clkctrl_ready(u32 val)
+{
+	const u32 mask = SCC_UHC_USBCEN | SCC_UHC_USBCEN;
+	return((val & mask) == mask);
+}
+
+/*
+ * UHC(usb host controller) enable function.
+ * affect to both of OHCI and EHCI core module.
+ */
+static void enable_scc_uhc(struct pci_dev *dev)
+{
+	void __iomem *uhc_base;
+	u32 __iomem *uhc_clkctrl;
+	u32 __iomem *uhc_ecmode;
+	u32 val = 0;
+	int i;
+
+	if (!machine_is(celleb_beat) &&
+	    !machine_is(celleb_native))
+		return;
+
+	uhc_base = ioremap(pci_resource_start(dev, 0),
+			   pci_resource_len(dev, 0));
+	if (!uhc_base) {
+		printk(KERN_ERR "failed to map UHC register base.\n");
+		return;
+	}
+	uhc_clkctrl = uhc_base + SCC_UHC_CKRCTRL;
+	uhc_ecmode  = uhc_base + SCC_UHC_ECMODE;
+
+	/* setup for normal mode */
+	val |= SCC_UHC_F48MCKLEN;
+	out_be32(uhc_clkctrl, val);
+	val |= SCC_UHC_PHY_SUSPEND_SEL;
+	out_be32(uhc_clkctrl, val);
+	udelay(10);
+	val |= SCC_UHC_PHYEN;
+	out_be32(uhc_clkctrl, val);
+	udelay(50);
+
+	/* disable reset */
+	val |= SCC_UHC_HCLKEN;
+	out_be32(uhc_clkctrl, val);
+	val |= (SCC_UHC_USBCEN | SCC_UHC_USBEN);
+	out_be32(uhc_clkctrl, val);
+	i = 0;
+	while (!uhc_clkctrl_ready(in_be32(uhc_clkctrl))) {
+		udelay(10);
+		if (i++ > UHC_RESET_WAIT_MAX) {
+			printk(KERN_ERR "Failed to disable UHC reset %x\n",
+			       in_be32(uhc_clkctrl));
+			break;
+		}
+	}
+
+	/* Endian Conversion Mode for Master ALL area */
+	out_be32(uhc_ecmode, SCC_UHC_ECMODE_BY_BYTE);
+
+	iounmap(uhc_base);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TOSHIBA_2,
+		 PCI_DEVICE_ID_TOSHIBA_SCC_USB, enable_scc_uhc);
diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c
new file mode 100644
index 0000000..b11cb30
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_setup.c
@@ -0,0 +1,256 @@
+/*
+ * Celleb setup code
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/cell/setup.c:
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified by PPC64 Team, IBM Corp
+ *  Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/kexec.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/irq.h>
+#include <asm/time.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/cell-regs.h>
+
+#include "beat_interrupt.h"
+#include "beat_wrapper.h"
+#include "beat.h"
+#include "celleb_pci.h"
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+static char celleb_machine_type[128] = "Celleb";
+
+static void celleb_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	/* using "CHRP" is to trick anaconda into installing FCx into Celleb */
+	seq_printf(m, "machine\t\t: %s %s\n", celleb_machine_type, model);
+	of_node_put(root);
+}
+
+static int __init celleb_machine_type_hack(char *ptr)
+{
+	strncpy(celleb_machine_type, ptr, sizeof(celleb_machine_type));
+	celleb_machine_type[sizeof(celleb_machine_type)-1] = 0;
+	return 0;
+}
+
+__setup("celleb_machine_type_hack=", celleb_machine_type_hack);
+
+static void celleb_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static void __init celleb_setup_arch_common(void)
+{
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+}
+
+static struct of_device_id celleb_bus_ids[] __initdata = {
+	{ .type = "scc", },
+	{ .type = "ioif", },	/* old style */
+	{},
+};
+
+static int __init celleb_publish_devices(void)
+{
+	/* Publish OF platform devices for southbridge IOs */
+	of_platform_bus_probe(NULL, celleb_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(celleb_beat, celleb_publish_devices);
+machine_device_initcall(celleb_native, celleb_publish_devices);
+
+
+/*
+ * functions for Celleb-Beat
+ */
+static void __init celleb_setup_arch_beat(void)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_priv1_ops		= &spu_priv1_beat_ops;
+	spu_management_ops	= &spu_management_of_ops;
+#endif
+
+#ifdef CONFIG_SMP
+	smp_init_celleb();
+#endif
+
+	celleb_setup_arch_common();
+}
+
+static int __init celleb_probe_beat(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "Beat"))
+		return 0;
+
+	powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS
+		| FW_FEATURE_BEAT | FW_FEATURE_LPAR;
+	hpte_init_beat_v3();
+
+	return 1;
+}
+
+
+/*
+ * functions for Celleb-native
+ */
+static void __init celleb_init_IRQ_native(void)
+{
+	iic_init_IRQ();
+	spider_init_IRQ();
+}
+
+static void __init celleb_setup_arch_native(void)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_priv1_ops		= &spu_priv1_mmio_ops;
+	spu_management_ops	= &spu_management_of_ops;
+#endif
+
+	cbe_regs_init();
+
+#ifdef CONFIG_CBE_RAS
+	cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+	smp_init_cell();
+#endif
+
+	cbe_pervasive_init();
+
+	/* XXX: nvram initialization should be added */
+
+	celleb_setup_arch_common();
+}
+
+static int __init celleb_probe_native(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "Beat") ||
+	    !of_flat_dt_is_compatible(root, "TOSHIBA,Celleb"))
+		return 0;
+
+	powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS;
+	hpte_init_native();
+
+	return 1;
+}
+
+
+/*
+ * machine definitions
+ */
+define_machine(celleb_beat) {
+	.name			= "Cell Reference Set (Beat)",
+	.probe			= celleb_probe_beat,
+	.setup_arch		= celleb_setup_arch_beat,
+	.show_cpuinfo		= celleb_show_cpuinfo,
+	.restart		= beat_restart,
+	.power_off		= beat_power_off,
+	.halt			= beat_halt,
+	.get_rtc_time		= beat_get_rtc_time,
+	.set_rtc_time		= beat_set_rtc_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= celleb_progress,
+	.power_save		= beat_power_save,
+	.nvram_size		= beat_nvram_get_size,
+	.nvram_read		= beat_nvram_read,
+	.nvram_write		= beat_nvram_write,
+	.set_dabr		= beat_set_xdabr,
+	.init_IRQ		= beatic_init_IRQ,
+	.get_irq		= beatic_get_irq,
+	.pci_probe_mode 	= celleb_pci_probe_mode,
+	.pci_setup_phb		= celleb_setup_phb,
+#ifdef CONFIG_KEXEC
+	.kexec_cpu_down		= beat_kexec_cpu_down,
+	.machine_kexec		= default_machine_kexec,
+	.machine_kexec_prepare	= default_machine_kexec_prepare,
+	.machine_crash_shutdown	= default_machine_crash_shutdown,
+#endif
+};
+
+define_machine(celleb_native) {
+	.name			= "Cell Reference Set (native)",
+	.probe			= celleb_probe_native,
+	.setup_arch		= celleb_setup_arch_native,
+	.show_cpuinfo		= celleb_show_cpuinfo,
+	.restart		= rtas_restart,
+	.power_off		= rtas_power_off,
+	.halt			= rtas_halt,
+	.get_boot_time		= rtas_get_boot_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= celleb_progress,
+	.pci_probe_mode 	= celleb_pci_probe_mode,
+	.pci_setup_phb		= celleb_setup_phb,
+	.init_IRQ		= celleb_init_IRQ_native,
+#ifdef CONFIG_KEXEC
+	.machine_kexec		= default_machine_kexec,
+	.machine_kexec_prepare	= default_machine_kexec_prepare,
+	.machine_crash_shutdown	= default_machine_crash_shutdown,
+#endif
+};
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
new file mode 100644
index 0000000..a3c6c01
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -0,0 +1,184 @@
+/*
+ * spu aware cpufreq governor for the cell processor
+ *
+ * © Copyright IBM Corporation 2006-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <asm/atomic.h>
+#include <asm/machdep.h>
+#include <asm/spu.h>
+
+#define POLL_TIME	100000		/* in µs */
+#define EXP		753		/* exp(-1) in fixed-point */
+
+struct spu_gov_info_struct {
+	unsigned long busy_spus;	/* fixed-point */
+	struct cpufreq_policy *policy;
+	struct delayed_work work;
+	unsigned int poll_int;		/* µs */
+};
+static DEFINE_PER_CPU(struct spu_gov_info_struct, spu_gov_info);
+
+static struct workqueue_struct *kspugov_wq;
+
+static int calc_freq(struct spu_gov_info_struct *info)
+{
+	int cpu;
+	int busy_spus;
+
+	cpu = info->policy->cpu;
+	busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
+
+	CALC_LOAD(info->busy_spus, EXP, busy_spus * FIXED_1);
+	pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
+			cpu, busy_spus, info->busy_spus);
+
+	return info->policy->max * info->busy_spus / FIXED_1;
+}
+
+static void spu_gov_work(struct work_struct *work)
+{
+	struct spu_gov_info_struct *info;
+	int delay;
+	unsigned long target_freq;
+
+	info = container_of(work, struct spu_gov_info_struct, work.work);
+
+	/* after cancel_delayed_work_sync we unset info->policy */
+	BUG_ON(info->policy == NULL);
+
+	target_freq = calc_freq(info);
+	__cpufreq_driver_target(info->policy, target_freq, CPUFREQ_RELATION_H);
+
+	delay = usecs_to_jiffies(info->poll_int);
+	queue_delayed_work_on(info->policy->cpu, kspugov_wq, &info->work, delay);
+}
+
+static void spu_gov_init_work(struct spu_gov_info_struct *info)
+{
+	int delay = usecs_to_jiffies(info->poll_int);
+	INIT_DELAYED_WORK_DEFERRABLE(&info->work, spu_gov_work);
+	queue_delayed_work_on(info->policy->cpu, kspugov_wq, &info->work, delay);
+}
+
+static void spu_gov_cancel_work(struct spu_gov_info_struct *info)
+{
+	cancel_delayed_work_sync(&info->work);
+}
+
+static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event)
+{
+	unsigned int cpu = policy->cpu;
+	struct spu_gov_info_struct *info, *affected_info;
+	int i;
+	int ret = 0;
+
+	info = &per_cpu(spu_gov_info, cpu);
+
+	switch (event) {
+	case CPUFREQ_GOV_START:
+		if (!cpu_online(cpu)) {
+			printk(KERN_ERR "cpu %d is not online\n", cpu);
+			ret = -EINVAL;
+			break;
+		}
+
+		if (!policy->cur) {
+			printk(KERN_ERR "no cpu specified in policy\n");
+			ret = -EINVAL;
+			break;
+		}
+
+		/* initialize spu_gov_info for all affected cpus */
+		for_each_cpu_mask(i, policy->cpus) {
+			affected_info = &per_cpu(spu_gov_info, i);
+			affected_info->policy = policy;
+		}
+
+		info->poll_int = POLL_TIME;
+
+		/* setup timer */
+		spu_gov_init_work(info);
+
+		break;
+
+	case CPUFREQ_GOV_STOP:
+		/* cancel timer */
+		spu_gov_cancel_work(info);
+
+		/* clean spu_gov_info for all affected cpus */
+		for_each_cpu_mask (i, policy->cpus) {
+			info = &per_cpu(spu_gov_info, i);
+			info->policy = NULL;
+		}
+
+		break;
+	}
+
+	return ret;
+}
+
+static struct cpufreq_governor spu_governor = {
+	.name = "spudemand",
+	.governor = spu_gov_govern,
+	.owner = THIS_MODULE,
+};
+
+/*
+ * module init and destoy
+ */
+
+static int __init spu_gov_init(void)
+{
+	int ret;
+
+	kspugov_wq = create_workqueue("kspugov");
+	if (!kspugov_wq) {
+		printk(KERN_ERR "creation of kspugov failed\n");
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = cpufreq_register_governor(&spu_governor);
+	if (ret) {
+		printk(KERN_ERR "registration of governor failed\n");
+		destroy_workqueue(kspugov_wq);
+		goto out;
+	}
+out:
+	return ret;
+}
+
+static void __exit spu_gov_exit(void)
+{
+	cpufreq_unregister_governor(&spu_governor);
+	destroy_workqueue(kspugov_wq);
+}
+
+
+module_init(spu_gov_init);
+module_exit(spu_gov_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
new file mode 100644
index 0000000..2d5bb22
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -0,0 +1,464 @@
+/*
+ * Cell Internal Interrupt Controller
+ *
+ * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *                    IBM, Corp.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * TODO:
+ * - Fix various assumptions related to HW CPU numbers vs. linux CPU numbers
+ *   vs node numbers in the setup code
+ * - Implement proper handling of maxcpus=1/2 (that is, routing of irqs from
+ *   a non-active node to the active node)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+struct iic {
+	struct cbe_iic_thread_regs __iomem *regs;
+	u8 target_id;
+	u8 eoi_stack[16];
+	int eoi_ptr;
+	struct device_node *node;
+};
+
+static DEFINE_PER_CPU(struct iic, iic);
+#define IIC_NODE_COUNT	2
+static struct irq_host *iic_host;
+
+/* Convert between "pending" bits and hw irq number */
+static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits)
+{
+	unsigned char unit = bits.source & 0xf;
+	unsigned char node = bits.source >> 4;
+	unsigned char class = bits.class & 3;
+
+	/* Decode IPIs */
+	if (bits.flags & CBE_IIC_IRQ_IPI)
+		return IIC_IRQ_TYPE_IPI | (bits.prio >> 4);
+	else
+		return (node << IIC_IRQ_NODE_SHIFT) | (class << 4) | unit;
+}
+
+static void iic_mask(unsigned int irq)
+{
+}
+
+static void iic_unmask(unsigned int irq)
+{
+}
+
+static void iic_eoi(unsigned int irq)
+{
+	struct iic *iic = &__get_cpu_var(iic);
+	out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
+	BUG_ON(iic->eoi_ptr < 0);
+}
+
+static struct irq_chip iic_chip = {
+	.typename = " CELL-IIC ",
+	.mask = iic_mask,
+	.unmask = iic_unmask,
+	.eoi = iic_eoi,
+};
+
+
+static void iic_ioexc_eoi(unsigned int irq)
+{
+}
+
+static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct cbe_iic_regs __iomem *node_iic = (void __iomem *)desc->handler_data;
+	unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC;
+	unsigned long bits, ack;
+	int cascade;
+
+	for (;;) {
+		bits = in_be64(&node_iic->iic_is);
+		if (bits == 0)
+			break;
+		/* pre-ack edge interrupts */
+		ack = bits & IIC_ISR_EDGE_MASK;
+		if (ack)
+			out_be64(&node_iic->iic_is, ack);
+		/* handle them */
+		for (cascade = 63; cascade >= 0; cascade--)
+			if (bits & (0x8000000000000000UL >> cascade)) {
+				unsigned int cirq =
+					irq_linear_revmap(iic_host,
+							  base | cascade);
+				if (cirq != NO_IRQ)
+					generic_handle_irq(cirq);
+			}
+		/* post-ack level interrupts */
+		ack = bits & ~IIC_ISR_EDGE_MASK;
+		if (ack)
+			out_be64(&node_iic->iic_is, ack);
+	}
+	desc->chip->eoi(irq);
+}
+
+
+static struct irq_chip iic_ioexc_chip = {
+	.typename = " CELL-IOEX",
+	.mask = iic_mask,
+	.unmask = iic_unmask,
+	.eoi = iic_ioexc_eoi,
+};
+
+/* Get an IRQ number from the pending state register of the IIC */
+static unsigned int iic_get_irq(void)
+{
+	struct cbe_iic_pending_bits pending;
+	struct iic *iic;
+	unsigned int virq;
+
+	iic = &__get_cpu_var(iic);
+	*(unsigned long *) &pending =
+		in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
+	if (!(pending.flags & CBE_IIC_IRQ_VALID))
+		return NO_IRQ;
+	virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
+	if (virq == NO_IRQ)
+		return NO_IRQ;
+	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+	BUG_ON(iic->eoi_ptr > 15);
+	return virq;
+}
+
+void iic_setup_cpu(void)
+{
+	out_be64(&__get_cpu_var(iic).regs->prio, 0xff);
+}
+
+u8 iic_get_target_id(int cpu)
+{
+	return per_cpu(iic, cpu).target_id;
+}
+
+EXPORT_SYMBOL_GPL(iic_get_target_id);
+
+#ifdef CONFIG_SMP
+
+/* Use the highest interrupt priorities for IPI */
+static inline int iic_ipi_to_irq(int ipi)
+{
+	return IIC_IRQ_TYPE_IPI + 0xf - ipi;
+}
+
+void iic_cause_IPI(int cpu, int mesg)
+{
+	out_be64(&per_cpu(iic, cpu).regs->generate, (0xf - mesg) << 4);
+}
+
+struct irq_host *iic_get_irq_host(int node)
+{
+	return iic_host;
+}
+EXPORT_SYMBOL_GPL(iic_get_irq_host);
+
+static irqreturn_t iic_ipi_action(int irq, void *dev_id)
+{
+	int ipi = (int)(long)dev_id;
+
+	smp_message_recv(ipi);
+
+	return IRQ_HANDLED;
+}
+static void iic_request_ipi(int ipi, const char *name)
+{
+	int virq;
+
+	virq = irq_create_mapping(iic_host, iic_ipi_to_irq(ipi));
+	if (virq == NO_IRQ) {
+		printk(KERN_ERR
+		       "iic: failed to map IPI %s\n", name);
+		return;
+	}
+	if (request_irq(virq, iic_ipi_action, IRQF_DISABLED, name,
+			(void *)(long)ipi))
+		printk(KERN_ERR
+		       "iic: failed to request IPI %s\n", name);
+}
+
+void iic_request_IPIs(void)
+{
+	iic_request_ipi(PPC_MSG_CALL_FUNCTION, "IPI-call");
+	iic_request_ipi(PPC_MSG_RESCHEDULE, "IPI-resched");
+	iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE, "IPI-call-single");
+#ifdef CONFIG_DEBUGGER
+	iic_request_ipi(PPC_MSG_DEBUGGER_BREAK, "IPI-debug");
+#endif /* CONFIG_DEBUGGER */
+}
+
+#endif /* CONFIG_SMP */
+
+
+static int iic_host_match(struct irq_host *h, struct device_node *node)
+{
+	return of_device_is_compatible(node,
+				    "IBM,CBEA-Internal-Interrupt-Controller");
+}
+
+extern int noirqdebug;
+
+static void handle_iic_irq(unsigned int irq, struct irq_desc *desc)
+{
+	const unsigned int cpu = smp_processor_id();
+
+	spin_lock(&desc->lock);
+
+	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+
+	/*
+	 * If we're currently running this IRQ, or its disabled,
+	 * we shouldn't process the IRQ. Mark it pending, handle
+	 * the necessary masking and go out
+	 */
+	if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) ||
+		    !desc->action)) {
+		desc->status |= IRQ_PENDING;
+		goto out_eoi;
+	}
+
+	kstat_cpu(cpu).irqs[irq]++;
+
+	/* Mark the IRQ currently in progress.*/
+	desc->status |= IRQ_INPROGRESS;
+
+	do {
+		struct irqaction *action = desc->action;
+		irqreturn_t action_ret;
+
+		if (unlikely(!action))
+			goto out_eoi;
+
+		desc->status &= ~IRQ_PENDING;
+		spin_unlock(&desc->lock);
+		action_ret = handle_IRQ_event(irq, action);
+		if (!noirqdebug)
+			note_interrupt(irq, desc, action_ret);
+		spin_lock(&desc->lock);
+
+	} while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
+
+	desc->status &= ~IRQ_INPROGRESS;
+out_eoi:
+	desc->chip->eoi(irq);
+	spin_unlock(&desc->lock);
+}
+
+static int iic_host_map(struct irq_host *h, unsigned int virq,
+			irq_hw_number_t hw)
+{
+	switch (hw & IIC_IRQ_TYPE_MASK) {
+	case IIC_IRQ_TYPE_IPI:
+		set_irq_chip_and_handler(virq, &iic_chip, handle_percpu_irq);
+		break;
+	case IIC_IRQ_TYPE_IOEXC:
+		set_irq_chip_and_handler(virq, &iic_ioexc_chip,
+					 handle_iic_irq);
+		break;
+	default:
+		set_irq_chip_and_handler(virq, &iic_chip, handle_iic_irq);
+	}
+	return 0;
+}
+
+static int iic_host_xlate(struct irq_host *h, struct device_node *ct,
+			   u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	unsigned int node, ext, unit, class;
+	const u32 *val;
+
+	if (!of_device_is_compatible(ct,
+				     "IBM,CBEA-Internal-Interrupt-Controller"))
+		return -ENODEV;
+	if (intsize != 1)
+		return -ENODEV;
+	val = of_get_property(ct, "#interrupt-cells", NULL);
+	if (val == NULL || *val != 1)
+		return -ENODEV;
+
+	node = intspec[0] >> 24;
+	ext = (intspec[0] >> 16) & 0xff;
+	class = (intspec[0] >> 8) & 0xff;
+	unit = intspec[0] & 0xff;
+
+	/* Check if node is in supported range */
+	if (node > 1)
+		return -EINVAL;
+
+	/* Build up interrupt number, special case for IO exceptions */
+	*out_hwirq = (node << IIC_IRQ_NODE_SHIFT);
+	if (unit == IIC_UNIT_IIC && class == 1)
+		*out_hwirq |= IIC_IRQ_TYPE_IOEXC | ext;
+	else
+		*out_hwirq |= IIC_IRQ_TYPE_NORMAL |
+			(class << IIC_IRQ_CLASS_SHIFT) | unit;
+
+	/* Dummy flags, ignored by iic code */
+	*out_flags = IRQ_TYPE_EDGE_RISING;
+
+	return 0;
+}
+
+static struct irq_host_ops iic_host_ops = {
+	.match = iic_host_match,
+	.map = iic_host_map,
+	.xlate = iic_host_xlate,
+};
+
+static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
+				struct device_node *node)
+{
+	/* XXX FIXME: should locate the linux CPU number from the HW cpu
+	 * number properly. We are lucky for now
+	 */
+	struct iic *iic = &per_cpu(iic, hw_cpu);
+
+	iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
+	BUG_ON(iic->regs == NULL);
+
+	iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe);
+	iic->eoi_stack[0] = 0xff;
+	iic->node = of_node_get(node);
+	out_be64(&iic->regs->prio, 0);
+
+	printk(KERN_INFO "IIC for CPU %d target id 0x%x : %s\n",
+	       hw_cpu, iic->target_id, node->full_name);
+}
+
+static int __init setup_iic(void)
+{
+	struct device_node *dn;
+	struct resource r0, r1;
+	unsigned int node, cascade, found = 0;
+	struct cbe_iic_regs __iomem *node_iic;
+	const u32 *np;
+
+	for (dn = NULL;
+	     (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
+		if (!of_device_is_compatible(dn,
+				     "IBM,CBEA-Internal-Interrupt-Controller"))
+			continue;
+		np = of_get_property(dn, "ibm,interrupt-server-ranges", NULL);
+		if (np == NULL) {
+			printk(KERN_WARNING "IIC: CPU association not found\n");
+			of_node_put(dn);
+			return -ENODEV;
+		}
+		if (of_address_to_resource(dn, 0, &r0) ||
+		    of_address_to_resource(dn, 1, &r1)) {
+			printk(KERN_WARNING "IIC: Can't resolve addresses\n");
+			of_node_put(dn);
+			return -ENODEV;
+		}
+		found++;
+		init_one_iic(np[0], r0.start, dn);
+		init_one_iic(np[1], r1.start, dn);
+
+		/* Setup cascade for IO exceptions. XXX cleanup tricks to get
+		 * node vs CPU etc...
+		 * Note that we configure the IIC_IRR here with a hard coded
+		 * priority of 1. We might want to improve that later.
+		 */
+		node = np[0] >> 1;
+		node_iic = cbe_get_cpu_iic_regs(np[0]);
+		cascade = node << IIC_IRQ_NODE_SHIFT;
+		cascade |= 1 << IIC_IRQ_CLASS_SHIFT;
+		cascade |= IIC_UNIT_IIC;
+		cascade = irq_create_mapping(iic_host, cascade);
+		if (cascade == NO_IRQ)
+			continue;
+		/*
+		 * irq_data is a generic pointer that gets passed back
+		 * to us later, so the forced cast is fine.
+		 */
+		set_irq_data(cascade, (void __force *)node_iic);
+		set_irq_chained_handler(cascade , iic_ioexc_cascade);
+		out_be64(&node_iic->iic_ir,
+			 (1 << 12)		/* priority */ |
+			 (node << 4)		/* dest node */ |
+			 IIC_UNIT_THREAD_0	/* route them to thread 0 */);
+		/* Flush pending (make sure it triggers if there is
+		 * anything pending
+		 */
+		out_be64(&node_iic->iic_is, 0xfffffffffffffffful);
+	}
+
+	if (found)
+		return 0;
+	else
+		return -ENODEV;
+}
+
+void __init iic_init_IRQ(void)
+{
+	/* Setup an irq host data structure */
+	iic_host = irq_alloc_host(NULL, IRQ_HOST_MAP_LINEAR, IIC_SOURCE_COUNT,
+				  &iic_host_ops, IIC_IRQ_INVALID);
+	BUG_ON(iic_host == NULL);
+	irq_set_default_host(iic_host);
+
+	/* Discover and initialize iics */
+	if (setup_iic() < 0)
+		panic("IIC: Failed to initialize !\n");
+
+	/* Set master interrupt handling function */
+	ppc_md.get_irq = iic_get_irq;
+
+	/* Enable on current CPU */
+	iic_setup_cpu();
+}
+
+void iic_set_interrupt_routing(int cpu, int thread, int priority)
+{
+	struct cbe_iic_regs __iomem *iic_regs = cbe_get_cpu_iic_regs(cpu);
+	u64 iic_ir = 0;
+	int node = cpu >> 1;
+
+	/* Set which node and thread will handle the next interrupt */
+	iic_ir |= CBE_IIC_IR_PRIO(priority) |
+		  CBE_IIC_IR_DEST_NODE(node);
+	if (thread == 0)
+		iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_0);
+	else
+		iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_1);
+	out_be64(&iic_regs->iic_ir, iic_ir);
+}
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h
new file mode 100644
index 0000000..942dc39
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.h
@@ -0,0 +1,89 @@
+#ifndef ASM_CELL_PIC_H
+#define ASM_CELL_PIC_H
+#ifdef __KERNEL__
+/*
+ * Mapping of IIC pending bits into per-node interrupt numbers.
+ *
+ * Interrupt numbers are in the range 0...0x1ff where the top bit
+ * (0x100) represent the source node. Only 2 nodes are supported with
+ * the current code though it's trivial to extend that if necessary using
+ * higher level bits
+ *
+ * The bottom 8 bits are split into 2 type bits and 6 data bits that
+ * depend on the type:
+ *
+ * 00 (0x00 | data) : normal interrupt. data is (class << 4) | source
+ * 01 (0x40 | data) : IO exception. data is the exception number as
+ *                    defined by bit numbers in IIC_SR
+ * 10 (0x80 | data) : IPI. data is the IPI number (obtained from the priority)
+ *                    and node is always 0 (IPIs are per-cpu, their source is
+ *                    not relevant)
+ * 11 (0xc0 | data) : reserved
+ *
+ * In addition, interrupt number 0x80000000 is defined as always invalid
+ * (that is the node field is expected to never extend to move than 23 bits)
+ *
+ */
+
+enum {
+	IIC_IRQ_INVALID		= 0x80000000u,
+	IIC_IRQ_NODE_MASK	= 0x100,
+	IIC_IRQ_NODE_SHIFT	= 8,
+	IIC_IRQ_MAX		= 0x1ff,
+	IIC_IRQ_TYPE_MASK	= 0xc0,
+	IIC_IRQ_TYPE_NORMAL	= 0x00,
+	IIC_IRQ_TYPE_IOEXC	= 0x40,
+	IIC_IRQ_TYPE_IPI	= 0x80,
+	IIC_IRQ_CLASS_SHIFT	= 4,
+	IIC_IRQ_CLASS_0		= 0x00,
+	IIC_IRQ_CLASS_1		= 0x10,
+	IIC_IRQ_CLASS_2		= 0x20,
+	IIC_SOURCE_COUNT	= 0x200,
+
+	/* Here are defined the various source/dest units. Avoid using those
+	 * definitions if you can, they are mostly here for reference
+	 */
+	IIC_UNIT_SPU_0		= 0x4,
+	IIC_UNIT_SPU_1		= 0x7,
+	IIC_UNIT_SPU_2		= 0x3,
+	IIC_UNIT_SPU_3		= 0x8,
+	IIC_UNIT_SPU_4		= 0x2,
+	IIC_UNIT_SPU_5		= 0x9,
+	IIC_UNIT_SPU_6		= 0x1,
+	IIC_UNIT_SPU_7		= 0xa,
+	IIC_UNIT_IOC_0		= 0x0,
+	IIC_UNIT_IOC_1		= 0xb,
+	IIC_UNIT_THREAD_0	= 0xe, /* target only */
+	IIC_UNIT_THREAD_1	= 0xf, /* target only */
+	IIC_UNIT_IIC		= 0xe, /* source only (IO exceptions) */
+
+	/* Base numbers for the external interrupts */
+	IIC_IRQ_EXT_IOIF0	=
+		IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_0,
+	IIC_IRQ_EXT_IOIF1	=
+		IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_1,
+
+	/* Base numbers for the IIC_ISR interrupts */
+	IIC_IRQ_IOEX_TMI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 63,
+	IIC_IRQ_IOEX_PMI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 62,
+	IIC_IRQ_IOEX_ATI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 61,
+	IIC_IRQ_IOEX_MATBFI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 60,
+	IIC_IRQ_IOEX_ELDI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 59,
+
+	/* Which bits in IIC_ISR are edge sensitive */
+	IIC_ISR_EDGE_MASK	= 0x4ul,
+};
+
+extern void iic_init_IRQ(void);
+extern void iic_cause_IPI(int cpu, int mesg);
+extern void iic_request_IPIs(void);
+extern void iic_setup_cpu(void);
+
+extern u8 iic_get_target_id(int cpu);
+
+extern void spider_init_IRQ(void);
+
+extern void iic_set_interrupt_routing(int cpu, int thread, int priority);
+
+#endif
+#endif /* ASM_CELL_PIC_H */
diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/platforms/cell/io-workarounds.c
new file mode 100644
index 0000000..b5f84e8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/io-workarounds.c
@@ -0,0 +1,185 @@
+/*
+ * Support PCI IO workaround
+ *
+ *  Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *		       IBM, Corp.
+ *  (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#undef DEBUG
+
+#include <linux/kernel.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/pgtable.h>
+#include <asm/ppc-pci.h>
+
+#include "io-workarounds.h"
+
+#define IOWA_MAX_BUS	8
+
+static struct iowa_bus iowa_busses[IOWA_MAX_BUS];
+static unsigned int iowa_bus_count;
+
+static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
+{
+	int i, j;
+	struct resource *res;
+	unsigned long vstart, vend;
+
+	for (i = 0; i < iowa_bus_count; i++) {
+		struct iowa_bus *bus = &iowa_busses[i];
+		struct pci_controller *phb = bus->phb;
+
+		if (vaddr) {
+			vstart = (unsigned long)phb->io_base_virt;
+			vend = vstart + phb->pci_io_size - 1;
+			if ((vaddr >= vstart) && (vaddr <= vend))
+				return bus;
+		}
+
+		if (paddr)
+			for (j = 0; j < 3; j++) {
+				res = &phb->mem_resources[j];
+				if (paddr >= res->start && paddr <= res->end)
+					return bus;
+			}
+	}
+
+	return NULL;
+}
+
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
+{
+	struct iowa_bus *bus;
+	int token;
+
+	token = PCI_GET_ADDR_TOKEN(addr);
+
+	if (token && token <= iowa_bus_count)
+		bus = &iowa_busses[token - 1];
+	else {
+		unsigned long vaddr, paddr;
+		pte_t *ptep;
+
+		vaddr = (unsigned long)PCI_FIX_ADDR(addr);
+		if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
+			return NULL;
+
+		ptep = find_linux_pte(init_mm.pgd, vaddr);
+		if (ptep == NULL)
+			paddr = 0;
+		else
+			paddr = pte_pfn(*ptep) << PAGE_SHIFT;
+		bus = iowa_pci_find(vaddr, paddr);
+
+		if (bus == NULL)
+			return NULL;
+	}
+
+	return bus;
+}
+
+struct iowa_bus *iowa_pio_find_bus(unsigned long port)
+{
+	unsigned long vaddr = (unsigned long)pci_io_base + port;
+	return iowa_pci_find(vaddr, 0);
+}
+
+
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
+static ret iowa_##name at					\
+{								\
+	struct iowa_bus *bus;					\
+	bus = iowa_##space##_find_bus(aa);			\
+	if (bus && bus->ops && bus->ops->name)			\
+		return bus->ops->name al;			\
+	return __do_##name al;					\
+}
+
+#define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
+static void iowa_##name at					\
+{								\
+	struct iowa_bus *bus;					\
+	bus = iowa_##space##_find_bus(aa);			\
+	if (bus && bus->ops && bus->ops->name) {		\
+		bus->ops->name al;				\
+		return;						\
+	}							\
+	__do_##name al;						\
+}
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+static const struct ppc_pci_io __devinitconst iowa_pci_io = {
+
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)	.name = iowa_##name,
+#define DEF_PCI_AC_NORET(name, at, al, space, aa)	.name = iowa_##name,
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+};
+
+static void __iomem *iowa_ioremap(unsigned long addr, unsigned long size,
+						unsigned long flags)
+{
+	struct iowa_bus *bus;
+	void __iomem *res = __ioremap(addr, size, flags);
+	int busno;
+
+	bus = iowa_pci_find(0, addr);
+	if (bus != NULL) {
+		busno = bus - iowa_busses;
+		PCI_SET_ADDR_TOKEN(res, busno + 1);
+	}
+	return res;
+}
+
+/* Regist new bus to support workaround */
+void __devinit iowa_register_bus(struct pci_controller *phb,
+			struct ppc_pci_io *ops,
+			int (*initfunc)(struct iowa_bus *, void *), void *data)
+{
+	struct iowa_bus *bus;
+	struct device_node *np = phb->dn;
+
+	if (iowa_bus_count >= IOWA_MAX_BUS) {
+		pr_err("IOWA:Too many pci bridges, "
+		       "workarounds disabled for %s\n", np->full_name);
+		return;
+	}
+
+	bus = &iowa_busses[iowa_bus_count];
+	bus->phb = phb;
+	bus->ops = ops;
+
+	if (initfunc)
+		if ((*initfunc)(bus, data))
+			return;
+
+	iowa_bus_count++;
+
+	pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
+}
+
+/* enable IO workaround */
+void __devinit io_workaround_init(void)
+{
+	static int io_workaround_inited;
+
+	if (io_workaround_inited)
+		return;
+	ppc_pci_io = iowa_pci_io;
+	ppc_md.ioremap = iowa_ioremap;
+	io_workaround_inited = 1;
+}
diff --git a/arch/powerpc/platforms/cell/io-workarounds.h b/arch/powerpc/platforms/cell/io-workarounds.h
new file mode 100644
index 0000000..6efc778
--- /dev/null
+++ b/arch/powerpc/platforms/cell/io-workarounds.h
@@ -0,0 +1,49 @@
+/*
+ * Support PCI IO workaround
+ *
+ * (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _IO_WORKAROUNDS_H
+#define _IO_WORKAROUNDS_H
+
+#include <linux/io.h>
+#include <asm/pci-bridge.h>
+
+/* Bus info */
+struct iowa_bus {
+	struct pci_controller *phb;
+	struct ppc_pci_io *ops;
+	void   *private;
+};
+
+void __devinit io_workaround_init(void);
+void __devinit iowa_register_bus(struct pci_controller *, struct ppc_pci_io *,
+				 int (*)(struct iowa_bus *, void *), void *);
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR);
+struct iowa_bus *iowa_pio_find_bus(unsigned long);
+
+extern struct ppc_pci_io spiderpci_ops;
+extern int spiderpci_iowa_init(struct iowa_bus *, void *);
+
+#define SPIDER_PCI_REG_BASE		0xd000
+#define SPIDER_PCI_REG_SIZE		0x1000
+#define SPIDER_PCI_VCI_CNTL_STAT	0x0110
+#define SPIDER_PCI_DUMMY_READ		0x0810
+#define SPIDER_PCI_DUMMY_READ_BASE	0x0814
+
+#endif /* _IO_WORKAROUNDS_H */
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
new file mode 100644
index 0000000..3168272
--- /dev/null
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -0,0 +1,1225 @@
+/*
+ * IOMMU implementation for Cell Broadband Processor Architecture
+ *
+ * (C) Copyright IBM Corporation 2006-2008
+ *
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/lmb.h>
+
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/udbg.h>
+#include <asm/firmware.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages
+ * instead of leaving them mapped to some dummy page. This can be
+ * enabled once the appropriate workarounds for spider bugs have
+ * been enabled
+ */
+#define CELL_IOMMU_REAL_UNMAP
+
+/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of
+ * IO PTEs based on the transfer direction. That can be enabled
+ * once spider-net has been fixed to pass the correct direction
+ * to the DMA mapping functions
+ */
+#define CELL_IOMMU_STRICT_PROTECTION
+
+
+#define NR_IOMMUS			2
+
+/* IOC mmap registers */
+#define IOC_Reg_Size			0x2000
+
+#define IOC_IOPT_CacheInvd		0x908
+#define IOC_IOPT_CacheInvd_NE_Mask	0xffe0000000000000ul
+#define IOC_IOPT_CacheInvd_IOPTE_Mask	0x000003fffffffff8ul
+#define IOC_IOPT_CacheInvd_Busy		0x0000000000000001ul
+
+#define IOC_IOST_Origin			0x918
+#define IOC_IOST_Origin_E		0x8000000000000000ul
+#define IOC_IOST_Origin_HW		0x0000000000000800ul
+#define IOC_IOST_Origin_HL		0x0000000000000400ul
+
+#define IOC_IO_ExcpStat			0x920
+#define IOC_IO_ExcpStat_V		0x8000000000000000ul
+#define IOC_IO_ExcpStat_SPF_Mask	0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_S		0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_P		0x4000000000000000ul
+#define IOC_IO_ExcpStat_ADDR_Mask	0x00000007fffff000ul
+#define IOC_IO_ExcpStat_RW_Mask		0x0000000000000800ul
+#define IOC_IO_ExcpStat_IOID_Mask	0x00000000000007fful
+
+#define IOC_IO_ExcpMask			0x928
+#define IOC_IO_ExcpMask_SFE		0x4000000000000000ul
+#define IOC_IO_ExcpMask_PFE		0x2000000000000000ul
+
+#define IOC_IOCmd_Offset		0x1000
+
+#define IOC_IOCmd_Cfg			0xc00
+#define IOC_IOCmd_Cfg_TE		0x0000800000000000ul
+
+
+/* Segment table entries */
+#define IOSTE_V			0x8000000000000000ul /* valid */
+#define IOSTE_H			0x4000000000000000ul /* cache hint */
+#define IOSTE_PT_Base_RPN_Mask  0x3ffffffffffff000ul /* base RPN of IOPT */
+#define IOSTE_NPPT_Mask		0x0000000000000fe0ul /* no. pages in IOPT */
+#define IOSTE_PS_Mask		0x0000000000000007ul /* page size */
+#define IOSTE_PS_4K		0x0000000000000001ul /*   - 4kB  */
+#define IOSTE_PS_64K		0x0000000000000003ul /*   - 64kB */
+#define IOSTE_PS_1M		0x0000000000000005ul /*   - 1MB  */
+#define IOSTE_PS_16M		0x0000000000000007ul /*   - 16MB */
+
+/* Page table entries */
+#define IOPTE_PP_W		0x8000000000000000ul /* protection: write */
+#define IOPTE_PP_R		0x4000000000000000ul /* protection: read */
+#define IOPTE_M			0x2000000000000000ul /* coherency required */
+#define IOPTE_SO_R		0x1000000000000000ul /* ordering: writes */
+#define IOPTE_SO_RW             0x1800000000000000ul /* ordering: r & w */
+#define IOPTE_RPN_Mask		0x07fffffffffff000ul /* RPN */
+#define IOPTE_H			0x0000000000000800ul /* cache hint */
+#define IOPTE_IOID_Mask		0x00000000000007fful /* ioid */
+
+
+/* IOMMU sizing */
+#define IO_SEGMENT_SHIFT	28
+#define IO_PAGENO_BITS(shift)	(IO_SEGMENT_SHIFT - (shift))
+
+/* The high bit needs to be set on every DMA address */
+#define SPIDER_DMA_OFFSET	0x80000000ul
+
+struct iommu_window {
+	struct list_head list;
+	struct cbe_iommu *iommu;
+	unsigned long offset;
+	unsigned long size;
+	unsigned int ioid;
+	struct iommu_table table;
+};
+
+#define NAMESIZE 8
+struct cbe_iommu {
+	int nid;
+	char name[NAMESIZE];
+	void __iomem *xlate_regs;
+	void __iomem *cmd_regs;
+	unsigned long *stab;
+	unsigned long *ptab;
+	void *pad_page;
+	struct list_head windows;
+};
+
+/* Static array of iommus, one per node
+ *   each contains a list of windows, keyed from dma_window property
+ *   - on bus setup, look for a matching window, or create one
+ *   - on dev setup, assign iommu_table ptr
+ */
+static struct cbe_iommu iommus[NR_IOMMUS];
+static int cbe_nr_iommus;
+
+static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
+		long n_ptes)
+{
+	unsigned long __iomem *reg;
+	unsigned long val;
+	long n;
+
+	reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
+
+	while (n_ptes > 0) {
+		/* we can invalidate up to 1 << 11 PTEs at once */
+		n = min(n_ptes, 1l << 11);
+		val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask)
+			| (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask)
+		        | IOC_IOPT_CacheInvd_Busy;
+
+		out_be64(reg, val);
+		while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy)
+			;
+
+		n_ptes -= n;
+		pte += n;
+	}
+}
+
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
+		unsigned long uaddr, enum dma_data_direction direction,
+		struct dma_attrs *attrs)
+{
+	int i;
+	unsigned long *io_pte, base_pte;
+	struct iommu_window *window =
+		container_of(tbl, struct iommu_window, table);
+
+	/* implementing proper protection causes problems with the spidernet
+	 * driver - check mapping directions later, but allow read & write by
+	 * default for now.*/
+#ifdef CELL_IOMMU_STRICT_PROTECTION
+	/* to avoid referencing a global, we use a trick here to setup the
+	 * protection bit. "prot" is setup to be 3 fields of 4 bits apprended
+	 * together for each of the 3 supported direction values. It is then
+	 * shifted left so that the fields matching the desired direction
+	 * lands on the appropriate bits, and other bits are masked out.
+	 */
+	const unsigned long prot = 0xc48;
+	base_pte =
+		((prot << (52 + 4 * direction)) & (IOPTE_PP_W | IOPTE_PP_R))
+		| IOPTE_M | IOPTE_SO_RW | (window->ioid & IOPTE_IOID_Mask);
+#else
+	base_pte = IOPTE_PP_W | IOPTE_PP_R | IOPTE_M | IOPTE_SO_RW |
+		(window->ioid & IOPTE_IOID_Mask);
+#endif
+	if (unlikely(dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)))
+		base_pte &= ~IOPTE_SO_RW;
+
+	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+	for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE)
+		io_pte[i] = base_pte | (__pa(uaddr) & IOPTE_RPN_Mask);
+
+	mb();
+
+	invalidate_tce_cache(window->iommu, io_pte, npages);
+
+	pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
+		 index, npages, direction, base_pte);
+	return 0;
+}
+
+static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
+{
+
+	int i;
+	unsigned long *io_pte, pte;
+	struct iommu_window *window =
+		container_of(tbl, struct iommu_window, table);
+
+	pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages);
+
+#ifdef CELL_IOMMU_REAL_UNMAP
+	pte = 0;
+#else
+	/* spider bridge does PCI reads after freeing - insert a mapping
+	 * to a scratch page instead of an invalid entry */
+	pte = IOPTE_PP_R | IOPTE_M | IOPTE_SO_RW | __pa(window->iommu->pad_page)
+		| (window->ioid & IOPTE_IOID_Mask);
+#endif
+
+	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+	for (i = 0; i < npages; i++)
+		io_pte[i] = pte;
+
+	mb();
+
+	invalidate_tce_cache(window->iommu, io_pte, npages);
+}
+
+static irqreturn_t ioc_interrupt(int irq, void *data)
+{
+	unsigned long stat;
+	struct cbe_iommu *iommu = data;
+
+	stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+
+	/* Might want to rate limit it */
+	printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat);
+	printk(KERN_ERR "  V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n",
+	       !!(stat & IOC_IO_ExcpStat_V),
+	       (stat & IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ',
+	       (stat & IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ',
+	       (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write",
+	       (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask));
+	printk(KERN_ERR "  page=0x%016lx\n",
+	       stat & IOC_IO_ExcpStat_ADDR_Mask);
+
+	/* clear interrupt */
+	stat &= ~IOC_IO_ExcpStat_V;
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat);
+
+	return IRQ_HANDLED;
+}
+
+static int cell_iommu_find_ioc(int nid, unsigned long *base)
+{
+	struct device_node *np;
+	struct resource r;
+
+	*base = 0;
+
+	/* First look for new style /be nodes */
+	for_each_node_by_name(np, "ioc") {
+		if (of_node_to_nid(np) != nid)
+			continue;
+		if (of_address_to_resource(np, 0, &r)) {
+			printk(KERN_ERR "iommu: can't get address for %s\n",
+			       np->full_name);
+			continue;
+		}
+		*base = r.start;
+		of_node_put(np);
+		return 0;
+	}
+
+	/* Ok, let's try the old way */
+	for_each_node_by_type(np, "cpu") {
+		const unsigned int *nidp;
+		const unsigned long *tmp;
+
+		nidp = of_get_property(np, "node-id", NULL);
+		if (nidp && *nidp == nid) {
+			tmp = of_get_property(np, "ioc-translation", NULL);
+			if (tmp) {
+				*base = *tmp;
+				of_node_put(np);
+				return 0;
+			}
+		}
+	}
+
+	return -ENODEV;
+}
+
+static void cell_iommu_setup_stab(struct cbe_iommu *iommu,
+				unsigned long dbase, unsigned long dsize,
+				unsigned long fbase, unsigned long fsize)
+{
+	struct page *page;
+	unsigned long segments, stab_size;
+
+	segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT;
+
+	pr_debug("%s: iommu[%d]: segments: %lu\n",
+			__func__, iommu->nid, segments);
+
+	/* set up the segment table */
+	stab_size = segments * sizeof(unsigned long);
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(stab_size));
+	BUG_ON(!page);
+	iommu->stab = page_address(page);
+	memset(iommu->stab, 0, stab_size);
+}
+
+static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
+		unsigned long base, unsigned long size, unsigned long gap_base,
+		unsigned long gap_size, unsigned long page_shift)
+{
+	struct page *page;
+	int i;
+	unsigned long reg, segments, pages_per_segment, ptab_size,
+		      n_pte_pages, start_seg, *ptab;
+
+	start_seg = base >> IO_SEGMENT_SHIFT;
+	segments  = size >> IO_SEGMENT_SHIFT;
+	pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift);
+	/* PTEs for each segment must start on a 4K bounday */
+	pages_per_segment = max(pages_per_segment,
+				(1 << 12) / sizeof(unsigned long));
+
+	ptab_size = segments * pages_per_segment * sizeof(unsigned long);
+	pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __func__,
+			iommu->nid, ptab_size, get_order(ptab_size));
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size));
+	BUG_ON(!page);
+
+	ptab = page_address(page);
+	memset(ptab, 0, ptab_size);
+
+	/* number of 4K pages needed for a page table */
+	n_pte_pages = (pages_per_segment * sizeof(unsigned long)) >> 12;
+
+	pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n",
+			__func__, iommu->nid, iommu->stab, ptab,
+			n_pte_pages);
+
+	/* initialise the STEs */
+	reg = IOSTE_V | ((n_pte_pages - 1) << 5);
+
+	switch (page_shift) {
+	case 12: reg |= IOSTE_PS_4K;  break;
+	case 16: reg |= IOSTE_PS_64K; break;
+	case 20: reg |= IOSTE_PS_1M;  break;
+	case 24: reg |= IOSTE_PS_16M; break;
+	default: BUG();
+	}
+
+	gap_base = gap_base >> IO_SEGMENT_SHIFT;
+	gap_size = gap_size >> IO_SEGMENT_SHIFT;
+
+	pr_debug("Setting up IOMMU stab:\n");
+	for (i = start_seg; i < (start_seg + segments); i++) {
+		if (i >= gap_base && i < (gap_base + gap_size)) {
+			pr_debug("\toverlap at %d, skipping\n", i);
+			continue;
+		}
+		iommu->stab[i] = reg | (__pa(ptab) + (n_pte_pages << 12) *
+					(i - start_seg));
+		pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]);
+	}
+
+	return ptab;
+}
+
+static void cell_iommu_enable_hardware(struct cbe_iommu *iommu)
+{
+	int ret;
+	unsigned long reg, xlate_base;
+	unsigned int virq;
+
+	if (cell_iommu_find_ioc(iommu->nid, &xlate_base))
+		panic("%s: missing IOC register mappings for node %d\n",
+		      __func__, iommu->nid);
+
+	iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size);
+	iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset;
+
+	/* ensure that the STEs have updated */
+	mb();
+
+	/* setup interrupts for the iommu. */
+	reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpStat,
+			reg & ~IOC_IO_ExcpStat_V);
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpMask,
+			IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE);
+
+	virq = irq_create_mapping(NULL,
+			IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT));
+	BUG_ON(virq == NO_IRQ);
+
+	ret = request_irq(virq, ioc_interrupt, IRQF_DISABLED,
+			iommu->name, iommu);
+	BUG_ON(ret);
+
+	/* set the IOC segment table origin register (and turn on the iommu) */
+	reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW;
+	out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg);
+	in_be64(iommu->xlate_regs + IOC_IOST_Origin);
+
+	/* turn on IO translation */
+	reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE;
+	out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg);
+}
+
+static void cell_iommu_setup_hardware(struct cbe_iommu *iommu,
+	unsigned long base, unsigned long size)
+{
+	cell_iommu_setup_stab(iommu, base, size, 0, 0);
+	iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0,
+					    IOMMU_PAGE_SHIFT);
+	cell_iommu_enable_hardware(iommu);
+}
+
+#if 0/* Unused for now */
+static struct iommu_window *find_window(struct cbe_iommu *iommu,
+		unsigned long offset, unsigned long size)
+{
+	struct iommu_window *window;
+
+	/* todo: check for overlapping (but not equal) windows) */
+
+	list_for_each_entry(window, &(iommu->windows), list) {
+		if (window->offset == offset && window->size == size)
+			return window;
+	}
+
+	return NULL;
+}
+#endif
+
+static inline u32 cell_iommu_get_ioid(struct device_node *np)
+{
+	const u32 *ioid;
+
+	ioid = of_get_property(np, "ioid", NULL);
+	if (ioid == NULL) {
+		printk(KERN_WARNING "iommu: missing ioid for %s using 0\n",
+		       np->full_name);
+		return 0;
+	}
+
+	return *ioid;
+}
+
+static struct iommu_window * __init
+cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
+			unsigned long offset, unsigned long size,
+			unsigned long pte_offset)
+{
+	struct iommu_window *window;
+	struct page *page;
+	u32 ioid;
+
+	ioid = cell_iommu_get_ioid(np);
+
+	window = kmalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid);
+	BUG_ON(window == NULL);
+
+	window->offset = offset;
+	window->size = size;
+	window->ioid = ioid;
+	window->iommu = iommu;
+
+	window->table.it_blocksize = 16;
+	window->table.it_base = (unsigned long)iommu->ptab;
+	window->table.it_index = iommu->nid;
+	window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT) + pte_offset;
+	window->table.it_size = size >> IOMMU_PAGE_SHIFT;
+
+	iommu_init_table(&window->table, iommu->nid);
+
+	pr_debug("\tioid      %d\n", window->ioid);
+	pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
+	pr_debug("\tbase      0x%016lx\n", window->table.it_base);
+	pr_debug("\toffset    0x%lx\n", window->table.it_offset);
+	pr_debug("\tsize      %ld\n", window->table.it_size);
+
+	list_add(&window->list, &iommu->windows);
+
+	if (offset != 0)
+		return window;
+
+	/* We need to map and reserve the first IOMMU page since it's used
+	 * by the spider workaround. In theory, we only need to do that when
+	 * running on spider but it doesn't really matter.
+	 *
+	 * This code also assumes that we have a window that starts at 0,
+	 * which is the case on all spider based blades.
+	 */
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0);
+	BUG_ON(!page);
+	iommu->pad_page = page_address(page);
+	clear_page(iommu->pad_page);
+
+	__set_bit(0, window->table.it_map);
+	tce_build_cell(&window->table, window->table.it_offset, 1,
+		       (unsigned long)iommu->pad_page, DMA_TO_DEVICE, NULL);
+	window->table.it_hint = window->table.it_blocksize;
+
+	return window;
+}
+
+static struct cbe_iommu *cell_iommu_for_node(int nid)
+{
+	int i;
+
+	for (i = 0; i < cbe_nr_iommus; i++)
+		if (iommus[i].nid == nid)
+			return &iommus[i];
+	return NULL;
+}
+
+static unsigned long cell_dma_direct_offset;
+
+static unsigned long dma_iommu_fixed_base;
+
+/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */
+static int iommu_fixed_is_weak;
+
+static struct iommu_table *cell_get_iommu_table(struct device *dev)
+{
+	struct iommu_window *window;
+	struct cbe_iommu *iommu;
+	struct dev_archdata *archdata = &dev->archdata;
+
+	/* Current implementation uses the first window available in that
+	 * node's iommu. We -might- do something smarter later though it may
+	 * never be necessary
+	 */
+	iommu = cell_iommu_for_node(dev_to_node(dev));
+	if (iommu == NULL || list_empty(&iommu->windows)) {
+		printk(KERN_ERR "iommu: missing iommu for %s (node %d)\n",
+		       archdata->of_node ? archdata->of_node->full_name : "?",
+		       dev_to_node(dev));
+		return NULL;
+	}
+	window = list_entry(iommu->windows.next, struct iommu_window, list);
+
+	return &window->table;
+}
+
+/* A coherent allocation implies strong ordering */
+
+static void *dma_fixed_alloc_coherent(struct device *dev, size_t size,
+				      dma_addr_t *dma_handle, gfp_t flag)
+{
+	if (iommu_fixed_is_weak)
+		return iommu_alloc_coherent(dev, cell_get_iommu_table(dev),
+					    size, dma_handle,
+					    device_to_mask(dev), flag,
+					    dev_to_node(dev));
+	else
+		return dma_direct_ops.alloc_coherent(dev, size, dma_handle,
+						     flag);
+}
+
+static void dma_fixed_free_coherent(struct device *dev, size_t size,
+				    void *vaddr, dma_addr_t dma_handle)
+{
+	if (iommu_fixed_is_weak)
+		iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr,
+				    dma_handle);
+	else
+		dma_direct_ops.free_coherent(dev, size, vaddr, dma_handle);
+}
+
+static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction direction,
+				     struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		return dma_direct_ops.map_page(dev, page, offset, size,
+					       direction, attrs);
+	else
+		return iommu_map_page(dev, cell_get_iommu_table(dev), page,
+				      offset, size, device_to_mask(dev),
+				      direction, attrs);
+}
+
+static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr,
+				 size_t size, enum dma_data_direction direction,
+				 struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		dma_direct_ops.unmap_page(dev, dma_addr, size, direction,
+					  attrs);
+	else
+		iommu_unmap_page(cell_get_iommu_table(dev), dma_addr, size,
+				 direction, attrs);
+}
+
+static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg,
+			   int nents, enum dma_data_direction direction,
+			   struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs);
+	else
+		return iommu_map_sg(dev, cell_get_iommu_table(dev), sg, nents,
+				    device_to_mask(dev), direction, attrs);
+}
+
+static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
+			       int nents, enum dma_data_direction direction,
+			       struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs);
+	else
+		iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents, direction,
+			       attrs);
+}
+
+static int dma_fixed_dma_supported(struct device *dev, u64 mask)
+{
+	return mask == DMA_64BIT_MASK;
+}
+
+static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask);
+
+struct dma_mapping_ops dma_iommu_fixed_ops = {
+	.alloc_coherent = dma_fixed_alloc_coherent,
+	.free_coherent  = dma_fixed_free_coherent,
+	.map_sg         = dma_fixed_map_sg,
+	.unmap_sg       = dma_fixed_unmap_sg,
+	.dma_supported  = dma_fixed_dma_supported,
+	.set_dma_mask   = dma_set_mask_and_switch,
+	.map_page       = dma_fixed_map_page,
+	.unmap_page     = dma_fixed_unmap_page,
+};
+
+static void cell_dma_dev_setup_fixed(struct device *dev);
+
+static void cell_dma_dev_setup(struct device *dev)
+{
+	struct dev_archdata *archdata = &dev->archdata;
+
+	/* Order is important here, these are not mutually exclusive */
+	if (get_dma_ops(dev) == &dma_iommu_fixed_ops)
+		cell_dma_dev_setup_fixed(dev);
+	else if (get_pci_dma_ops() == &dma_iommu_ops)
+		archdata->dma_data = cell_get_iommu_table(dev);
+	else if (get_pci_dma_ops() == &dma_direct_ops)
+		archdata->dma_data = (void *)cell_dma_direct_offset;
+	else
+		BUG();
+}
+
+static void cell_pci_dma_dev_setup(struct pci_dev *dev)
+{
+	cell_dma_dev_setup(&dev->dev);
+}
+
+static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
+			      void *data)
+{
+	struct device *dev = data;
+
+	/* We are only intereted in device addition */
+	if (action != BUS_NOTIFY_ADD_DEVICE)
+		return 0;
+
+	/* We use the PCI DMA ops */
+	dev->archdata.dma_ops = get_pci_dma_ops();
+
+	cell_dma_dev_setup(dev);
+
+	return 0;
+}
+
+static struct notifier_block cell_of_bus_notifier = {
+	.notifier_call = cell_of_bus_notify
+};
+
+static int __init cell_iommu_get_window(struct device_node *np,
+					 unsigned long *base,
+					 unsigned long *size)
+{
+	const void *dma_window;
+	unsigned long index;
+
+	/* Use ibm,dma-window if available, else, hard code ! */
+	dma_window = of_get_property(np, "ibm,dma-window", NULL);
+	if (dma_window == NULL) {
+		*base = 0;
+		*size = 0x80000000u;
+		return -ENODEV;
+	}
+
+	of_parse_dma_window(np, dma_window, &index, base, size);
+	return 0;
+}
+
+static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
+{
+	struct cbe_iommu *iommu;
+	int nid, i;
+
+	/* Get node ID */
+	nid = of_node_to_nid(np);
+	if (nid < 0) {
+		printk(KERN_ERR "iommu: failed to get node for %s\n",
+		       np->full_name);
+		return NULL;
+	}
+	pr_debug("iommu: setting up iommu for node %d (%s)\n",
+		 nid, np->full_name);
+
+	/* XXX todo: If we can have multiple windows on the same IOMMU, which
+	 * isn't the case today, we probably want here to check wether the
+	 * iommu for that node is already setup.
+	 * However, there might be issue with getting the size right so let's
+	 * ignore that for now. We might want to completely get rid of the
+	 * multiple window support since the cell iommu supports per-page ioids
+	 */
+
+	if (cbe_nr_iommus >= NR_IOMMUS) {
+		printk(KERN_ERR "iommu: too many IOMMUs detected ! (%s)\n",
+		       np->full_name);
+		return NULL;
+	}
+
+	/* Init base fields */
+	i = cbe_nr_iommus++;
+	iommu = &iommus[i];
+	iommu->stab = NULL;
+	iommu->nid = nid;
+	snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i);
+	INIT_LIST_HEAD(&iommu->windows);
+
+	return iommu;
+}
+
+static void __init cell_iommu_init_one(struct device_node *np,
+				       unsigned long offset)
+{
+	struct cbe_iommu *iommu;
+	unsigned long base, size;
+
+	iommu = cell_iommu_alloc(np);
+	if (!iommu)
+		return;
+
+	/* Obtain a window for it */
+	cell_iommu_get_window(np, &base, &size);
+
+	pr_debug("\ttranslating window 0x%lx...0x%lx\n",
+		 base, base + size - 1);
+
+	/* Initialize the hardware */
+	cell_iommu_setup_hardware(iommu, base, size);
+
+	/* Setup the iommu_table */
+	cell_iommu_setup_window(iommu, np, base, size,
+				offset >> IOMMU_PAGE_SHIFT);
+}
+
+static void __init cell_disable_iommus(void)
+{
+	int node;
+	unsigned long base, val;
+	void __iomem *xregs, *cregs;
+
+	/* Make sure IOC translation is disabled on all nodes */
+	for_each_online_node(node) {
+		if (cell_iommu_find_ioc(node, &base))
+			continue;
+		xregs = ioremap(base, IOC_Reg_Size);
+		if (xregs == NULL)
+			continue;
+		cregs = xregs + IOC_IOCmd_Offset;
+
+		pr_debug("iommu: cleaning up iommu on node %d\n", node);
+
+		out_be64(xregs + IOC_IOST_Origin, 0);
+		(void)in_be64(xregs + IOC_IOST_Origin);
+		val = in_be64(cregs + IOC_IOCmd_Cfg);
+		val &= ~IOC_IOCmd_Cfg_TE;
+		out_be64(cregs + IOC_IOCmd_Cfg, val);
+		(void)in_be64(cregs + IOC_IOCmd_Cfg);
+
+		iounmap(xregs);
+	}
+}
+
+static int __init cell_iommu_init_disabled(void)
+{
+	struct device_node *np = NULL;
+	unsigned long base = 0, size;
+
+	/* When no iommu is present, we use direct DMA ops */
+	set_pci_dma_ops(&dma_direct_ops);
+
+	/* First make sure all IOC translation is turned off */
+	cell_disable_iommus();
+
+	/* If we have no Axon, we set up the spider DMA magic offset */
+	if (of_find_node_by_name(NULL, "axon") == NULL)
+		cell_dma_direct_offset = SPIDER_DMA_OFFSET;
+
+	/* Now we need to check to see where the memory is mapped
+	 * in PCI space. We assume that all busses use the same dma
+	 * window which is always the case so far on Cell, thus we
+	 * pick up the first pci-internal node we can find and check
+	 * the DMA window from there.
+	 */
+	for_each_node_by_name(np, "axon") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		if (cell_iommu_get_window(np, &base, &size) == 0)
+			break;
+	}
+	if (np == NULL) {
+		for_each_node_by_name(np, "pci-internal") {
+			if (np->parent == NULL || np->parent->parent != NULL)
+				continue;
+			if (cell_iommu_get_window(np, &base, &size) == 0)
+				break;
+		}
+	}
+	of_node_put(np);
+
+	/* If we found a DMA window, we check if it's big enough to enclose
+	 * all of physical memory. If not, we force enable IOMMU
+	 */
+	if (np && size < lmb_end_of_DRAM()) {
+		printk(KERN_WARNING "iommu: force-enabled, dma window"
+		       " (%ldMB) smaller than total memory (%ldMB)\n",
+		       size >> 20, lmb_end_of_DRAM() >> 20);
+		return -ENODEV;
+	}
+
+	cell_dma_direct_offset += base;
+
+	if (cell_dma_direct_offset != 0)
+		ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup;
+
+	printk("iommu: disabled, direct DMA offset is 0x%lx\n",
+	       cell_dma_direct_offset);
+
+	return 0;
+}
+
+/*
+ *  Fixed IOMMU mapping support
+ *
+ *  This code adds support for setting up a fixed IOMMU mapping on certain
+ *  cell machines. For 64-bit devices this avoids the performance overhead of
+ *  mapping and unmapping pages at runtime. 32-bit devices are unable to use
+ *  the fixed mapping.
+ *
+ *  The fixed mapping is established at boot, and maps all of physical memory
+ *  1:1 into device space at some offset. On machines with < 30 GB of memory
+ *  we setup the fixed mapping immediately above the normal IOMMU window.
+ *
+ *  For example a machine with 4GB of memory would end up with the normal
+ *  IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In
+ *  this case a 64-bit device wishing to DMA to 1GB would be told to DMA to
+ *  3GB, plus any offset required by firmware. The firmware offset is encoded
+ *  in the "dma-ranges" property.
+ *
+ *  On machines with 30GB or more of memory, we are unable to place the fixed
+ *  mapping above the normal IOMMU window as we would run out of address space.
+ *  Instead we move the normal IOMMU window to coincide with the hash page
+ *  table, this region does not need to be part of the fixed mapping as no
+ *  device should ever be DMA'ing to it. We then setup the fixed mapping
+ *  from 0 to 32GB.
+ */
+
+static u64 cell_iommu_get_fixed_address(struct device *dev)
+{
+	u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR;
+	struct device_node *np;
+	const u32 *ranges = NULL;
+	int i, len, best, naddr, nsize, pna, range_size;
+
+	np = of_node_get(dev->archdata.of_node);
+	while (1) {
+		naddr = of_n_addr_cells(np);
+		nsize = of_n_size_cells(np);
+		np = of_get_next_parent(np);
+		if (!np)
+			break;
+
+		ranges = of_get_property(np, "dma-ranges", &len);
+
+		/* Ignore empty ranges, they imply no translation required */
+		if (ranges && len > 0)
+			break;
+	}
+
+	if (!ranges) {
+		dev_dbg(dev, "iommu: no dma-ranges found\n");
+		goto out;
+	}
+
+	len /= sizeof(u32);
+
+	pna = of_n_addr_cells(np);
+	range_size = naddr + nsize + pna;
+
+	/* dma-ranges format:
+	 * child addr	: naddr cells
+	 * parent addr	: pna cells
+	 * size		: nsize cells
+	 */
+	for (i = 0, best = -1, best_size = 0; i < len; i += range_size) {
+		cpu_addr = of_translate_dma_address(np, ranges + i + naddr);
+		size = of_read_number(ranges + i + naddr + pna, nsize);
+
+		if (cpu_addr == 0 && size > best_size) {
+			best = i;
+			best_size = size;
+		}
+	}
+
+	if (best >= 0) {
+		dev_addr = of_read_number(ranges + best, naddr);
+	} else
+		dev_dbg(dev, "iommu: no suitable range found!\n");
+
+out:
+	of_node_put(np);
+
+	return dev_addr;
+}
+
+static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+		return -EIO;
+
+	if (dma_mask == DMA_BIT_MASK(64) &&
+		cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+	{
+		dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+		set_dma_ops(dev, &dma_iommu_fixed_ops);
+	} else {
+		dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
+		set_dma_ops(dev, get_pci_dma_ops());
+	}
+
+	cell_dma_dev_setup(dev);
+
+	*dev->dma_mask = dma_mask;
+
+	return 0;
+}
+
+static void cell_dma_dev_setup_fixed(struct device *dev)
+{
+	struct dev_archdata *archdata = &dev->archdata;
+	u64 addr;
+
+	addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base;
+	archdata->dma_data = (void *)addr;
+
+	dev_dbg(dev, "iommu: fixed addr = %lx\n", addr);
+}
+
+static void insert_16M_pte(unsigned long addr, unsigned long *ptab,
+			   unsigned long base_pte)
+{
+	unsigned long segment, offset;
+
+	segment = addr >> IO_SEGMENT_SHIFT;
+	offset = (addr >> 24) - (segment << IO_PAGENO_BITS(24));
+	ptab = ptab + (segment * (1 << 12) / sizeof(unsigned long));
+
+	pr_debug("iommu: addr %lx ptab %p segment %lx offset %lx\n",
+		  addr, ptab, segment, offset);
+
+	ptab[offset] = base_pte | (__pa(addr) & IOPTE_RPN_Mask);
+}
+
+static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu,
+	struct device_node *np, unsigned long dbase, unsigned long dsize,
+	unsigned long fbase, unsigned long fsize)
+{
+	unsigned long base_pte, uaddr, ioaddr, *ptab;
+
+	ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize, 24);
+
+	dma_iommu_fixed_base = fbase;
+
+	pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase);
+
+	base_pte = IOPTE_PP_W | IOPTE_PP_R | IOPTE_M
+		    | (cell_iommu_get_ioid(np) & IOPTE_IOID_Mask);
+
+	if (iommu_fixed_is_weak)
+		pr_info("IOMMU: Using weak ordering for fixed mapping\n");
+	else {
+		pr_info("IOMMU: Using strong ordering for fixed mapping\n");
+		base_pte |= IOPTE_SO_RW;
+	}
+
+	for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) {
+		/* Don't touch the dynamic region */
+		ioaddr = uaddr + fbase;
+		if (ioaddr >= dbase && ioaddr < (dbase + dsize)) {
+			pr_debug("iommu: fixed/dynamic overlap, skipping\n");
+			continue;
+		}
+
+		insert_16M_pte(uaddr, ptab, base_pte);
+	}
+
+	mb();
+}
+
+static int __init cell_iommu_fixed_mapping_init(void)
+{
+	unsigned long dbase, dsize, fbase, fsize, hbase, hend;
+	struct cbe_iommu *iommu;
+	struct device_node *np;
+
+	/* The fixed mapping is only supported on axon machines */
+	np = of_find_node_by_name(NULL, "axon");
+	if (!np) {
+		pr_debug("iommu: fixed mapping disabled, no axons found\n");
+		return -1;
+	}
+
+	/* We must have dma-ranges properties for fixed mapping to work */
+	for (np = NULL; (np = of_find_all_nodes(np));) {
+		if (of_find_property(np, "dma-ranges", NULL))
+			break;
+	}
+	of_node_put(np);
+
+	if (!np) {
+		pr_debug("iommu: no dma-ranges found, no fixed mapping\n");
+		return -1;
+	}
+
+	/* The default setup is to have the fixed mapping sit after the
+	 * dynamic region, so find the top of the largest IOMMU window
+	 * on any axon, then add the size of RAM and that's our max value.
+	 * If that is > 32GB we have to do other shennanigans.
+	 */
+	fbase = 0;
+	for_each_node_by_name(np, "axon") {
+		cell_iommu_get_window(np, &dbase, &dsize);
+		fbase = max(fbase, dbase + dsize);
+	}
+
+	fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT);
+	fsize = lmb_phys_mem_size();
+
+	if ((fbase + fsize) <= 0x800000000)
+		hbase = 0; /* use the device tree window */
+	else {
+		/* If we're over 32 GB we need to cheat. We can't map all of
+		 * RAM with the fixed mapping, and also fit the dynamic
+		 * region. So try to place the dynamic region where the hash
+		 * table sits, drivers never need to DMA to it, we don't
+		 * need a fixed mapping for that area.
+		 */
+		if (!htab_address) {
+			pr_debug("iommu: htab is NULL, on LPAR? Huh?\n");
+			return -1;
+		}
+		hbase = __pa(htab_address);
+		hend  = hbase + htab_size_bytes;
+
+		/* The window must start and end on a segment boundary */
+		if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) ||
+		    (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) {
+			pr_debug("iommu: hash window not segment aligned\n");
+			return -1;
+		}
+
+		/* Check the hash window fits inside the real DMA window */
+		for_each_node_by_name(np, "axon") {
+			cell_iommu_get_window(np, &dbase, &dsize);
+
+			if (hbase < dbase || (hend > (dbase + dsize))) {
+				pr_debug("iommu: hash window doesn't fit in"
+					 "real DMA window\n");
+				return -1;
+			}
+		}
+
+		fbase = 0;
+	}
+
+	/* Setup the dynamic regions */
+	for_each_node_by_name(np, "axon") {
+		iommu = cell_iommu_alloc(np);
+		BUG_ON(!iommu);
+
+		if (hbase == 0)
+			cell_iommu_get_window(np, &dbase, &dsize);
+		else {
+			dbase = hbase;
+			dsize = htab_size_bytes;
+		}
+
+		printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx "
+			"fixed window 0x%lx-0x%lx\n", iommu->nid, dbase,
+			 dbase + dsize, fbase, fbase + fsize);
+
+		cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize);
+		iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0,
+						    IOMMU_PAGE_SHIFT);
+		cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize,
+					     fbase, fsize);
+		cell_iommu_enable_hardware(iommu);
+		cell_iommu_setup_window(iommu, np, dbase, dsize, 0);
+	}
+
+	dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch;
+	set_pci_dma_ops(&dma_iommu_ops);
+
+	return 0;
+}
+
+static int iommu_fixed_disabled;
+
+static int __init setup_iommu_fixed(char *str)
+{
+	struct device_node *pciep;
+
+	if (strcmp(str, "off") == 0)
+		iommu_fixed_disabled = 1;
+
+	/* If we can find a pcie-endpoint in the device tree assume that
+	 * we're on a triblade or a CAB so by default the fixed mapping
+	 * should be set to be weakly ordered; but only if the boot
+	 * option WASN'T set for strong ordering
+	 */
+	pciep = of_find_node_by_type(NULL, "pcie-endpoint");
+
+	if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
+		iommu_fixed_is_weak = 1;
+
+	of_node_put(pciep);
+
+	return 1;
+}
+__setup("iommu_fixed=", setup_iommu_fixed);
+
+static int __init cell_iommu_init(void)
+{
+	struct device_node *np;
+
+	/* If IOMMU is disabled or we have little enough RAM to not need
+	 * to enable it, we setup a direct mapping.
+	 *
+	 * Note: should we make sure we have the IOMMU actually disabled ?
+	 */
+	if (iommu_is_off ||
+	    (!iommu_force_on && lmb_end_of_DRAM() <= 0x80000000ull))
+		if (cell_iommu_init_disabled() == 0)
+			goto bail;
+
+	/* Setup various ppc_md. callbacks */
+	ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup;
+	ppc_md.tce_build = tce_build_cell;
+	ppc_md.tce_free = tce_free_cell;
+
+	if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
+		goto bail;
+
+	/* Create an iommu for each /axon node.  */
+	for_each_node_by_name(np, "axon") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		cell_iommu_init_one(np, 0);
+	}
+
+	/* Create an iommu for each toplevel /pci-internal node for
+	 * old hardware/firmware
+	 */
+	for_each_node_by_name(np, "pci-internal") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		cell_iommu_init_one(np, SPIDER_DMA_OFFSET);
+	}
+
+	/* Setup default PCI iommu ops */
+	set_pci_dma_ops(&dma_iommu_ops);
+
+ bail:
+	/* Register callbacks on OF platform device addition/removal
+	 * to handle linking them to the right DMA operations
+	 */
+	bus_register_notifier(&of_platform_bus_type, &cell_of_bus_notifier);
+
+	return 0;
+}
+machine_arch_initcall(cell, cell_iommu_init);
+machine_arch_initcall(celleb_native, cell_iommu_init);
+
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
new file mode 100644
index 0000000..efdacc8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -0,0 +1,132 @@
+/*
+ * CBE Pervasive Monitor and Debug
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ *          Michael N. Day (mnday@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/pgtable.h>
+#include <asm/reg.h>
+#include <asm/cell-regs.h>
+
+#include "pervasive.h"
+
+static void cbe_power_save(void)
+{
+	unsigned long ctrl, thread_switch_control;
+
+	/*
+	 * We need to hard disable interrupts, the local_irq_enable() done by
+	 * our caller upon return will hard re-enable.
+	 */
+	hard_irq_disable();
+
+	ctrl = mfspr(SPRN_CTRLF);
+
+	/* Enable DEC and EE interrupt request */
+	thread_switch_control  = mfspr(SPRN_TSC_CELL);
+	thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST;
+
+	switch (ctrl & CTRL_CT) {
+	case CTRL_CT0:
+		thread_switch_control |= TSC_CELL_DEC_ENABLE_0;
+		break;
+	case CTRL_CT1:
+		thread_switch_control |= TSC_CELL_DEC_ENABLE_1;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unknown configuration\n",
+			__func__);
+		break;
+	}
+	mtspr(SPRN_TSC_CELL, thread_switch_control);
+
+	/*
+	 * go into low thread priority, medium priority will be
+	 * restored for us after wake-up.
+	 */
+	HMT_low();
+
+	/*
+	 * atomically disable thread execution and runlatch.
+	 * External and Decrementer exceptions are still handled when the
+	 * thread is disabled but now enter in cbe_system_reset_exception()
+	 */
+	ctrl &= ~(CTRL_RUNLATCH | CTRL_TE);
+	mtspr(SPRN_CTRLT, ctrl);
+}
+
+static int cbe_system_reset_exception(struct pt_regs *regs)
+{
+	switch (regs->msr & SRR1_WAKEMASK) {
+	case SRR1_WAKEEE:
+		do_IRQ(regs);
+		break;
+	case SRR1_WAKEDEC:
+		timer_interrupt(regs);
+		break;
+	case SRR1_WAKEMT:
+		return cbe_sysreset_hack();
+#ifdef CONFIG_CBE_RAS
+	case SRR1_WAKESYSERR:
+		cbe_system_error_exception(regs);
+		break;
+	case SRR1_WAKETHERM:
+		cbe_thermal_exception(regs);
+		break;
+#endif /* CONFIG_CBE_RAS */
+	default:
+		/* do system reset */
+		return 0;
+	}
+	/* everything handled */
+	return 1;
+}
+
+void __init cbe_pervasive_init(void)
+{
+	int cpu;
+
+	if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct cbe_pmd_regs __iomem *regs = cbe_get_cpu_pmd_regs(cpu);
+		if (!regs)
+			continue;
+
+		 /* Enable Pause(0) control bit */
+		out_be64(&regs->pmcr, in_be64(&regs->pmcr) |
+					    CBE_PMD_PAUSE_ZERO_CONTROL);
+	}
+
+	ppc_md.power_save = cbe_power_save;
+	ppc_md.system_reset_exception = cbe_system_reset_exception;
+}
diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h
new file mode 100644
index 0000000..fd4d7b7
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.h
@@ -0,0 +1,42 @@
+/*
+ * Cell Pervasive Monitor and Debug interface and HW structures
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ *          David J. Erb (djerb@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#ifndef PERVASIVE_H
+#define PERVASIVE_H
+
+extern void cbe_pervasive_init(void);
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+extern int cbe_sysreset_hack(void);
+#else
+static inline int cbe_sysreset_hack(void)
+{
+	return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+#endif
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
new file mode 100644
index 0000000..69ed0d7
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -0,0 +1,423 @@
+/*
+ * Cell Broadband Engine Performance Monitor
+ *
+ * (C) Copyright IBM Corporation 2001,2006
+ *
+ * Author:
+ *    David Erb (djerb@us.ibm.com)
+ *    Kevin Corry (kevcorry@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+#include <asm/machdep.h>
+#include <asm/pmc.h>
+#include <asm/reg.h>
+#include <asm/spu.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+/*
+ * When writing to write-only mmio addresses, save a shadow copy. All of the
+ * registers are 32-bit, but stored in the upper-half of a 64-bit field in
+ * pmd_regs.
+ */
+
+#define WRITE_WO_MMIO(reg, x)					\
+	do {							\
+		u32 _x = (x);					\
+		struct cbe_pmd_regs __iomem *pmd_regs;		\
+		struct cbe_pmd_shadow_regs *shadow_regs;	\
+		pmd_regs = cbe_get_cpu_pmd_regs(cpu);		\
+		shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);	\
+		out_be64(&(pmd_regs->reg), (((u64)_x) << 32));	\
+		shadow_regs->reg = _x;				\
+	} while (0)
+
+#define READ_SHADOW_REG(val, reg)				\
+	do {							\
+		struct cbe_pmd_shadow_regs *shadow_regs;	\
+		shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);	\
+		(val) = shadow_regs->reg;			\
+	} while (0)
+
+#define READ_MMIO_UPPER32(val, reg)				\
+	do {							\
+		struct cbe_pmd_regs __iomem *pmd_regs;		\
+		pmd_regs = cbe_get_cpu_pmd_regs(cpu);		\
+		(val) = (u32)(in_be64(&pmd_regs->reg) >> 32);	\
+	} while (0)
+
+/*
+ * Physical counter registers.
+ * Each physical counter can act as one 32-bit counter or two 16-bit counters.
+ */
+
+u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr)
+{
+	u32 val_in_latch, val = 0;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		READ_SHADOW_REG(val_in_latch, counter_value_in_latch);
+
+		/* Read the latch or the actual counter, whichever is newer. */
+		if (val_in_latch & (1 << phys_ctr)) {
+			READ_SHADOW_REG(val, pm_ctr[phys_ctr]);
+		} else {
+			READ_MMIO_UPPER32(val, pm_ctr[phys_ctr]);
+		}
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_phys_ctr);
+
+void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val)
+{
+	struct cbe_pmd_shadow_regs *shadow_regs;
+	u32 pm_ctrl;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		/* Writing to a counter only writes to a hardware latch.
+		 * The new value is not propagated to the actual counter
+		 * until the performance monitor is enabled.
+		 */
+		WRITE_WO_MMIO(pm_ctr[phys_ctr], val);
+
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		if (pm_ctrl & CBE_PM_ENABLE_PERF_MON) {
+			/* The counters are already active, so we need to
+			 * rewrite the pm_control register to "re-enable"
+			 * the PMU.
+			 */
+			cbe_write_pm(cpu, pm_control, pm_ctrl);
+		} else {
+			shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+			shadow_regs->counter_value_in_latch |= (1 << phys_ctr);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_write_phys_ctr);
+
+/*
+ * "Logical" counter registers.
+ * These will read/write 16-bits or 32-bits depending on the
+ * current size of the counter. Counters 4 - 7 are always 16-bit.
+ */
+
+u32 cbe_read_ctr(u32 cpu, u32 ctr)
+{
+	u32 val;
+	u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+	val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+	if (cbe_get_ctr_size(cpu, phys_ctr) == 16)
+		val = (ctr < NR_PHYS_CTRS) ? (val >> 16) : (val & 0xffff);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_ctr);
+
+void cbe_write_ctr(u32 cpu, u32 ctr, u32 val)
+{
+	u32 phys_ctr;
+	u32 phys_val;
+
+	phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+	if (cbe_get_ctr_size(cpu, phys_ctr) == 16) {
+		phys_val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+		if (ctr < NR_PHYS_CTRS)
+			val = (val << 16) | (phys_val & 0xffff);
+		else
+			val = (val & 0xffff) | (phys_val & 0xffff0000);
+	}
+
+	cbe_write_phys_ctr(cpu, phys_ctr, val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_ctr);
+
+/*
+ * Counter-control registers.
+ * Each "logical" counter has a corresponding control register.
+ */
+
+u32 cbe_read_pm07_control(u32 cpu, u32 ctr)
+{
+	u32 pm07_control = 0;
+
+	if (ctr < NR_CTRS)
+		READ_SHADOW_REG(pm07_control, pm07_control[ctr]);
+
+	return pm07_control;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm07_control);
+
+void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val)
+{
+	if (ctr < NR_CTRS)
+		WRITE_WO_MMIO(pm07_control[ctr], val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm07_control);
+
+/*
+ * Other PMU control registers. Most of these are write-only.
+ */
+
+u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg)
+{
+	u32 val = 0;
+
+	switch (reg) {
+	case group_control:
+		READ_SHADOW_REG(val, group_control);
+		break;
+
+	case debug_bus_control:
+		READ_SHADOW_REG(val, debug_bus_control);
+		break;
+
+	case trace_address:
+		READ_MMIO_UPPER32(val, trace_address);
+		break;
+
+	case ext_tr_timer:
+		READ_SHADOW_REG(val, ext_tr_timer);
+		break;
+
+	case pm_status:
+		READ_MMIO_UPPER32(val, pm_status);
+		break;
+
+	case pm_control:
+		READ_SHADOW_REG(val, pm_control);
+		break;
+
+	case pm_interval:
+		READ_MMIO_UPPER32(val, pm_interval);
+		break;
+
+	case pm_start_stop:
+		READ_SHADOW_REG(val, pm_start_stop);
+		break;
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm);
+
+void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val)
+{
+	switch (reg) {
+	case group_control:
+		WRITE_WO_MMIO(group_control, val);
+		break;
+
+	case debug_bus_control:
+		WRITE_WO_MMIO(debug_bus_control, val);
+		break;
+
+	case trace_address:
+		WRITE_WO_MMIO(trace_address, val);
+		break;
+
+	case ext_tr_timer:
+		WRITE_WO_MMIO(ext_tr_timer, val);
+		break;
+
+	case pm_status:
+		WRITE_WO_MMIO(pm_status, val);
+		break;
+
+	case pm_control:
+		WRITE_WO_MMIO(pm_control, val);
+		break;
+
+	case pm_interval:
+		WRITE_WO_MMIO(pm_interval, val);
+		break;
+
+	case pm_start_stop:
+		WRITE_WO_MMIO(pm_start_stop, val);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm);
+
+/*
+ * Get/set the size of a physical counter to either 16 or 32 bits.
+ */
+
+u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr)
+{
+	u32 pm_ctrl, size = 0;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		size = (pm_ctrl & CBE_PM_16BIT_CTR(phys_ctr)) ? 16 : 32;
+	}
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(cbe_get_ctr_size);
+
+void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size)
+{
+	u32 pm_ctrl;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		switch (ctr_size) {
+		case 16:
+			pm_ctrl |= CBE_PM_16BIT_CTR(phys_ctr);
+			break;
+
+		case 32:
+			pm_ctrl &= ~CBE_PM_16BIT_CTR(phys_ctr);
+			break;
+		}
+		cbe_write_pm(cpu, pm_control, pm_ctrl);
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_set_ctr_size);
+
+/*
+ * Enable/disable the entire performance monitoring unit.
+ * When we enable the PMU, all pending writes to counters get committed.
+ */
+
+void cbe_enable_pm(u32 cpu)
+{
+	struct cbe_pmd_shadow_regs *shadow_regs;
+	u32 pm_ctrl;
+
+	shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+	shadow_regs->counter_value_in_latch = 0;
+
+	pm_ctrl = cbe_read_pm(cpu, pm_control) | CBE_PM_ENABLE_PERF_MON;
+	cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm);
+
+void cbe_disable_pm(u32 cpu)
+{
+	u32 pm_ctrl;
+	pm_ctrl = cbe_read_pm(cpu, pm_control) & ~CBE_PM_ENABLE_PERF_MON;
+	cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm);
+
+/*
+ * Reading from the trace_buffer.
+ * The trace buffer is two 64-bit registers. Reading from
+ * the second half automatically increments the trace_address.
+ */
+
+void cbe_read_trace_buffer(u32 cpu, u64 *buf)
+{
+	struct cbe_pmd_regs __iomem *pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+
+	*buf++ = in_be64(&pmd_regs->trace_buffer_0_63);
+	*buf++ = in_be64(&pmd_regs->trace_buffer_64_127);
+}
+EXPORT_SYMBOL_GPL(cbe_read_trace_buffer);
+
+/*
+ * Enabling/disabling interrupts for the entire performance monitoring unit.
+ */
+
+u32 cbe_get_and_clear_pm_interrupts(u32 cpu)
+{
+	/* Reading pm_status clears the interrupt bits. */
+	return cbe_read_pm(cpu, pm_status);
+}
+EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts);
+
+void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask)
+{
+	/* Set which node and thread will handle the next interrupt. */
+	iic_set_interrupt_routing(cpu, thread, 0);
+
+	/* Enable the interrupt bits in the pm_status register. */
+	if (mask)
+		cbe_write_pm(cpu, pm_status, mask);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts);
+
+void cbe_disable_pm_interrupts(u32 cpu)
+{
+	cbe_get_and_clear_pm_interrupts(cpu);
+	cbe_write_pm(cpu, pm_status, 0);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts);
+
+static irqreturn_t cbe_pm_irq(int irq, void *dev_id)
+{
+	perf_irq(get_irq_regs());
+	return IRQ_HANDLED;
+}
+
+static int __init cbe_init_pm_irq(void)
+{
+	unsigned int irq;
+	int rc, node;
+
+	for_each_node(node) {
+		irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI |
+					       (node << IIC_IRQ_NODE_SHIFT));
+		if (irq == NO_IRQ) {
+			printk("ERROR: Unable to allocate irq for node %d\n",
+			       node);
+			return -EINVAL;
+		}
+
+		rc = request_irq(irq, cbe_pm_irq,
+				 IRQF_DISABLED, "cbe-pmu-0", NULL);
+		if (rc) {
+			printk("ERROR: Request for irq on node %d failed\n",
+			       node);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+machine_arch_initcall(cell, cbe_init_pm_irq);
+
+void cbe_sync_irq(int node)
+{
+	unsigned int irq;
+
+	irq = irq_find_mapping(NULL,
+			       IIC_IRQ_IOEX_PMI
+			       | (node << IIC_IRQ_NODE_SHIFT));
+
+	if (irq == NO_IRQ) {
+		printk(KERN_WARNING "ERROR, unable to get existing irq %d " \
+		"for node %d\n", irq, node);
+		return;
+	}
+
+	synchronize_irq(irq);
+}
+EXPORT_SYMBOL_GPL(cbe_sync_irq);
+
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
new file mode 100644
index 0000000..7b4cefa
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2006-2008, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+
+#include <asm/kexec.h>
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/cell-regs.h>
+
+#include "ras.h"
+
+
+static void dump_fir(int cpu)
+{
+	struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
+	struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
+
+	if (pregs == NULL)
+		return;
+
+	/* Todo: do some nicer parsing of bits and based on them go down
+	 * to other sub-units FIRs and not only IIC
+	 */
+	printk(KERN_ERR "Global Checkstop FIR    : 0x%016lx\n",
+	       in_be64(&pregs->checkstop_fir));
+	printk(KERN_ERR "Global Recoverable FIR  : 0x%016lx\n",
+	       in_be64(&pregs->checkstop_fir));
+	printk(KERN_ERR "Global MachineCheck FIR : 0x%016lx\n",
+	       in_be64(&pregs->spec_att_mchk_fir));
+
+	if (iregs == NULL)
+		return;
+	printk(KERN_ERR "IOC FIR                 : 0x%016lx\n",
+	       in_be64(&iregs->ioc_fir));
+
+}
+
+void cbe_system_error_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
+	dump_fir(cpu);
+	dump_stack();
+}
+
+void cbe_maintenance_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * Nothing implemented for the maintenance interrupt at this point
+	 */
+
+	printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
+	dump_stack();
+}
+
+void cbe_thermal_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * Nothing implemented for the thermal interrupt at this point
+	 */
+
+	printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
+	dump_stack();
+}
+
+static int cbe_machine_check_handler(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
+	dump_fir(cpu);
+
+	/* No recovery from this code now, lets continue */
+	return 0;
+}
+
+struct ptcal_area {
+	struct list_head list;
+	int nid;
+	int order;
+	struct page *pages;
+};
+
+static LIST_HEAD(ptcal_list);
+
+static int ptcal_start_tok, ptcal_stop_tok;
+
+static int __init cbe_ptcal_enable_on_node(int nid, int order)
+{
+	struct ptcal_area *area;
+	int ret = -ENOMEM;
+	unsigned long addr;
+
+	if (is_kdump_kernel())
+		rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
+
+	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		goto out_err;
+
+	area->nid = nid;
+	area->order = order;
+	area->pages = alloc_pages_node(area->nid, GFP_KERNEL, area->order);
+
+	if (!area->pages)
+		goto out_free_area;
+
+	addr = __pa(page_address(area->pages));
+
+	ret = -EIO;
+	if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid,
+				(unsigned int)(addr >> 32),
+				(unsigned int)(addr & 0xffffffff))) {
+		printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n",
+				__func__, nid);
+		goto out_free_pages;
+	}
+
+	list_add(&area->list, &ptcal_list);
+
+	return 0;
+
+out_free_pages:
+	__free_pages(area->pages, area->order);
+out_free_area:
+	kfree(area);
+out_err:
+	return ret;
+}
+
+static int __init cbe_ptcal_enable(void)
+{
+	const u32 *size;
+	struct device_node *np;
+	int order, found_mic = 0;
+
+	np = of_find_node_by_path("/rtas");
+	if (!np)
+		return -ENODEV;
+
+	size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
+	if (!size)
+		return -ENODEV;
+
+	pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size);
+	order = get_order(*size);
+	of_node_put(np);
+
+	/* support for malta device trees, with be@/mic@ nodes */
+	for_each_node_by_type(np, "mic-tm") {
+		cbe_ptcal_enable_on_node(of_node_to_nid(np), order);
+		found_mic = 1;
+	}
+
+	if (found_mic)
+		return 0;
+
+	/* support for older device tree - use cpu nodes */
+	for_each_node_by_type(np, "cpu") {
+		const u32 *nid = of_get_property(np, "node-id", NULL);
+		if (!nid) {
+			printk(KERN_ERR "%s: node %s is missing node-id?\n",
+					__func__, np->full_name);
+			continue;
+		}
+		cbe_ptcal_enable_on_node(*nid, order);
+		found_mic = 1;
+	}
+
+	return found_mic ? 0 : -ENODEV;
+}
+
+static int cbe_ptcal_disable(void)
+{
+	struct ptcal_area *area, *tmp;
+	int ret = 0;
+
+	pr_debug("%s: disabling PTCAL\n", __func__);
+
+	list_for_each_entry_safe(area, tmp, &ptcal_list, list) {
+		/* disable ptcal on this node */
+		if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) {
+			printk(KERN_ERR "%s: error disabling PTCAL "
+					"on node %d!\n", __func__,
+					area->nid);
+			ret = -EIO;
+			continue;
+		}
+
+		/* ensure we can access the PTCAL area */
+		memset(page_address(area->pages), 0,
+				1 << (area->order + PAGE_SHIFT));
+
+		/* clean up */
+		list_del(&area->list);
+		__free_pages(area->pages, area->order);
+		kfree(area);
+	}
+
+	return ret;
+}
+
+static int cbe_ptcal_notify_reboot(struct notifier_block *nb,
+		unsigned long code, void *data)
+{
+	return cbe_ptcal_disable();
+}
+
+static void cbe_ptcal_crash_shutdown(void)
+{
+	cbe_ptcal_disable();
+}
+
+static struct notifier_block cbe_ptcal_reboot_notifier = {
+	.notifier_call = cbe_ptcal_notify_reboot
+};
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+static int sysreset_hack;
+
+static int __init cbe_sysreset_init(void)
+{
+	struct cbe_pmd_regs __iomem *regs;
+
+	sysreset_hack = machine_is_compatible("IBM,CBPLUS-1.0");
+	if (!sysreset_hack)
+		return 0;
+
+	regs = cbe_get_cpu_pmd_regs(0);
+	if (!regs)
+		return 0;
+
+	/* Enable JTAG system-reset hack */
+	out_be32(&regs->fir_mode_reg,
+		in_be32(&regs->fir_mode_reg) |
+		CBE_PMD_FIR_MODE_M8);
+
+	return 0;
+}
+device_initcall(cbe_sysreset_init);
+
+int cbe_sysreset_hack(void)
+{
+	struct cbe_pmd_regs __iomem *regs;
+
+	/*
+	 * The BMC can inject user triggered system reset exceptions,
+	 * but cannot set the system reset reason in srr1,
+	 * so check an extra register here.
+	 */
+	if (sysreset_hack && (smp_processor_id() == 0)) {
+		regs = cbe_get_cpu_pmd_regs(0);
+		if (!regs)
+			return 0;
+		if (in_be64(&regs->ras_esc_0) & 0x0000ffff) {
+			out_be64(&regs->ras_esc_0, 0);
+			return 0;
+		}
+	}
+	return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+int __init cbe_ptcal_init(void)
+{
+	int ret;
+	ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal");
+	ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal");
+
+	if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE
+			|| ptcal_stop_tok == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier);
+	if (ret)
+		goto out1;
+
+	ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown);
+	if (ret)
+		goto out2;
+
+	return cbe_ptcal_enable();
+
+out2:
+	unregister_reboot_notifier(&cbe_ptcal_reboot_notifier);
+out1:
+	printk(KERN_ERR "Can't disable PTCAL, so not enabling\n");
+	return ret;
+}
+
+arch_initcall(cbe_ptcal_init);
+
+void __init cbe_ras_init(void)
+{
+	unsigned long hid0;
+
+	/*
+	 * Enable System Error & thermal interrupts and wakeup conditions
+	 */
+
+	hid0 = mfspr(SPRN_HID0);
+	hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
+		HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
+	mtspr(SPRN_HID0, hid0);
+	mb();
+
+	/*
+	 * Install machine check handler. Leave setting of precise mode to
+	 * what the firmware did for now
+	 */
+	ppc_md.machine_check_exception = cbe_machine_check_handler;
+	mb();
+
+	/*
+	 * For now, we assume that IOC_FIR is already set to forward some
+	 * error conditions to the System Error handler. If that is not true
+	 * then it will have to be fixed up here.
+	 */
+}
diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h
new file mode 100644
index 0000000..eb7ee54
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.h
@@ -0,0 +1,9 @@
+#ifndef RAS_H
+#define RAS_H
+
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+extern void cbe_ras_init(void);
+
+#endif /* RAS_H */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
new file mode 100644
index 0000000..ab721b5
--- /dev/null
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -0,0 +1,297 @@
+/*
+ *  linux/arch/powerpc/platforms/cell/cell_setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified by PPC64 Team, IBM Corp
+ *  Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#undef DEBUG
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/kexec.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/cputable.h>
+#include <asm/ppc-pci.h>
+#include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+#include "io-workarounds.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static void cell_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: CHRP %s\n", model);
+	of_node_put(root);
+}
+
+static void cell_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev)
+{
+	struct pci_controller *hose;
+	const char *s;
+	int i;
+
+	if (!machine_is(cell))
+		return;
+
+	/* We're searching for a direct child of the PHB */
+	if (dev->bus->self != NULL || dev->devfn != 0)
+		return;
+
+	hose = pci_bus_to_host(dev->bus);
+	if (hose == NULL)
+		return;
+
+	/* Only on PCIE */
+	if (!of_device_is_compatible(hose->dn, "pciex"))
+		return;
+
+	/* And only on axon */
+	s = of_get_property(hose->dn, "model", NULL);
+	if (!s || strcmp(s, "Axon") != 0)
+		return;
+
+	for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+		dev->resource[i].start = dev->resource[i].end = 0;
+		dev->resource[i].flags = 0;
+	}
+
+	printk(KERN_DEBUG "PCI: Hiding resources on Axon PCIE RC %s\n",
+	       pci_name(dev));
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex);
+
+static int __devinit cell_setup_phb(struct pci_controller *phb)
+{
+	const char *model;
+	struct device_node *np;
+
+	int rc = rtas_setup_phb(phb);
+	if (rc)
+		return rc;
+
+	np = phb->dn;
+	model = of_get_property(np, "model", NULL);
+	if (model == NULL || strcmp(np->name, "pci"))
+		return 0;
+
+	/* Setup workarounds for spider */
+	if (strcmp(model, "Spider"))
+		return 0;
+
+	iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
+				  (void *)SPIDER_PCI_REG_BASE);
+	io_workaround_init();
+
+	return 0;
+}
+
+static int __init cell_publish_devices(void)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	struct device_node *np;
+	int node;
+
+	/* Publish OF platform devices for southbridge IOs */
+	of_platform_bus_probe(NULL, NULL, NULL);
+
+	/* On spider based blades, we need to manually create the OF
+	 * platform devices for the PCI host bridges
+	 */
+	for_each_child_of_node(root, np) {
+		if (np->type == NULL || (strcmp(np->type, "pci") != 0 &&
+					 strcmp(np->type, "pciex") != 0))
+			continue;
+		of_platform_device_create(np, NULL, NULL);
+	}
+
+	/* There is no device for the MIC memory controller, thus we create
+	 * a platform device for it to attach the EDAC driver to.
+	 */
+	for_each_online_node(node) {
+		if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+			continue;
+		platform_device_register_simple("cbe-mic", node, NULL, 0);
+	}
+
+	return 0;
+}
+machine_subsys_initcall(cell, cell_publish_devices);
+
+static void cell_mpic_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct mpic *mpic = desc->handler_data;
+	unsigned int virq;
+
+	virq = mpic_get_one_irq(mpic);
+	if (virq != NO_IRQ)
+		generic_handle_irq(virq);
+	desc->chip->eoi(irq);
+}
+
+static void __init mpic_init_IRQ(void)
+{
+	struct device_node *dn;
+	struct mpic *mpic;
+	unsigned int virq;
+
+	for (dn = NULL;
+	     (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+		if (!of_device_is_compatible(dn, "CBEA,platform-open-pic"))
+			continue;
+
+		/* The MPIC driver will get everything it needs from the
+		 * device-tree, just pass 0 to all arguments
+		 */
+		mpic = mpic_alloc(dn, 0, 0, 0, 0, " MPIC     ");
+		if (mpic == NULL)
+			continue;
+		mpic_init(mpic);
+
+		virq = irq_of_parse_and_map(dn, 0);
+		if (virq == NO_IRQ)
+			continue;
+
+		printk(KERN_INFO "%s : hooking up to IRQ %d\n",
+		       dn->full_name, virq);
+		set_irq_data(virq, mpic);
+		set_irq_chained_handler(virq, cell_mpic_cascade);
+	}
+}
+
+
+static void __init cell_init_irq(void)
+{
+	iic_init_IRQ();
+	spider_init_IRQ();
+	mpic_init_IRQ();
+}
+
+static void __init cell_set_dabrx(void)
+{
+	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static void __init cell_setup_arch(void)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_priv1_ops = &spu_priv1_mmio_ops;
+	spu_management_ops = &spu_management_of_ops;
+#endif
+
+	cbe_regs_init();
+
+	cell_set_dabrx();
+
+#ifdef CONFIG_CBE_RAS
+	cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+	smp_init_cell();
+#endif
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	/* Find and initialize PCI host bridges */
+	init_pci_config_tokens();
+
+	cbe_pervasive_init();
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	mmio_nvram_init();
+}
+
+static int __init cell_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "IBM,CBEA") &&
+	    !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+		return 0;
+
+	hpte_init_native();
+
+	return 1;
+}
+
+define_machine(cell) {
+	.name			= "Cell",
+	.probe			= cell_probe,
+	.setup_arch		= cell_setup_arch,
+	.show_cpuinfo		= cell_show_cpuinfo,
+	.restart		= rtas_restart,
+	.power_off		= rtas_power_off,
+	.halt			= rtas_halt,
+	.get_boot_time		= rtas_get_boot_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= cell_progress,
+	.init_IRQ       	= cell_init_irq,
+	.pci_setup_phb		= cell_setup_phb,
+#ifdef CONFIG_KEXEC
+	.machine_kexec		= default_machine_kexec,
+	.machine_kexec_prepare	= default_machine_kexec_prepare,
+	.machine_crash_shutdown	= default_machine_crash_shutdown,
+#endif
+};
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
new file mode 100644
index 0000000..9046803
--- /dev/null
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -0,0 +1,232 @@
+/*
+ * SMP support for BPA machines.
+ *
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * Plus various changes from other IBM teams...
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+
+#include <asm/ptrace.h>
+#include <asm/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/system.h>
+#include <asm/rtas.h>
+#include <asm/cputhreads.h>
+
+#include "interrupt.h"
+#include <asm/udbg.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/*
+ * The Primary thread of each non-boot processor was started from the OF client
+ * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
+ */
+static cpumask_t of_spin_map;
+
+extern void generic_secondary_smp_init(unsigned long);
+
+/**
+ * smp_startup_cpu() - start the given cpu
+ *
+ * At boot time, there is nothing to do for primary threads which were
+ * started from Open Firmware.  For anything else, call RTAS with the
+ * appropriate start location.
+ *
+ * Returns:
+ *	0	- failure
+ *	1	- success
+ */
+static inline int __devinit smp_startup_cpu(unsigned int lcpu)
+{
+	int status;
+	unsigned long start_here = __pa((u32)*((unsigned long *)
+					       generic_secondary_smp_init));
+	unsigned int pcpu;
+	int start_cpu;
+
+	if (cpu_isset(lcpu, of_spin_map))
+		/* Already started by OF and sitting in spin loop */
+		return 1;
+
+	pcpu = get_hard_smp_processor_id(lcpu);
+
+	/* Fixup atomic count: it exited inside IRQ handler. */
+	task_thread_info(paca[lcpu].__current)->preempt_count	= 0;
+
+	/*
+	 * If the RTAS start-cpu token does not exist then presume the
+	 * cpu is already spinning.
+	 */
+	start_cpu = rtas_token("start-cpu");
+	if (start_cpu == RTAS_UNKNOWN_SERVICE)
+		return 1;
+
+	status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu);
+	if (status != 0) {
+		printk(KERN_ERR "start-cpu failed: %i\n", status);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void smp_iic_message_pass(int target, int msg)
+{
+	unsigned int i;
+
+	if (target < NR_CPUS) {
+		iic_cause_IPI(target, msg);
+	} else {
+		for_each_online_cpu(i) {
+			if (target == MSG_ALL_BUT_SELF
+			    && i == smp_processor_id())
+				continue;
+			iic_cause_IPI(i, msg);
+		}
+	}
+}
+
+static int __init smp_iic_probe(void)
+{
+	iic_request_IPIs();
+
+	return cpus_weight(cpu_possible_map);
+}
+
+static void __devinit smp_cell_setup_cpu(int cpu)
+{
+	if (cpu != boot_cpuid)
+		iic_setup_cpu();
+
+	/*
+	 * change default DABRX to allow user watchpoints
+	 */
+	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static DEFINE_SPINLOCK(timebase_lock);
+static unsigned long timebase = 0;
+
+static void __devinit cell_give_timebase(void)
+{
+	spin_lock(&timebase_lock);
+	rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
+	timebase = get_tb();
+	spin_unlock(&timebase_lock);
+
+	while (timebase)
+		barrier();
+	rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
+}
+
+static void __devinit cell_take_timebase(void)
+{
+	while (!timebase)
+		barrier();
+	spin_lock(&timebase_lock);
+	set_tb(timebase >> 32, timebase & 0xffffffff);
+	timebase = 0;
+	spin_unlock(&timebase_lock);
+}
+
+static void __devinit smp_cell_kick_cpu(int nr)
+{
+	BUG_ON(nr < 0 || nr >= NR_CPUS);
+
+	if (!smp_startup_cpu(nr))
+		return;
+
+	/*
+	 * The processor is currently spinning, waiting for the
+	 * cpu_start field to become non-zero After we set cpu_start,
+	 * the processor will continue on to secondary_start
+	 */
+	paca[nr].cpu_start = 1;
+}
+
+static int smp_cell_cpu_bootable(unsigned int nr)
+{
+	/* Special case - we inhibit secondary thread startup
+	 * during boot if the user requests it.  Odd-numbered
+	 * cpus are assumed to be secondary threads.
+	 */
+	if (system_state < SYSTEM_RUNNING &&
+	    cpu_has_feature(CPU_FTR_SMT) &&
+	    !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
+		return 0;
+
+	return 1;
+}
+static struct smp_ops_t bpa_iic_smp_ops = {
+	.message_pass	= smp_iic_message_pass,
+	.probe		= smp_iic_probe,
+	.kick_cpu	= smp_cell_kick_cpu,
+	.setup_cpu	= smp_cell_setup_cpu,
+	.cpu_bootable	= smp_cell_cpu_bootable,
+};
+
+/* This is called very early */
+void __init smp_init_cell(void)
+{
+	int i;
+
+	DBG(" -> smp_init_cell()\n");
+
+	smp_ops = &bpa_iic_smp_ops;
+
+	/* Mark threads which are still spinning in hold loops. */
+	if (cpu_has_feature(CPU_FTR_SMT)) {
+		for_each_present_cpu(i) {
+			if (cpu_thread_in_core(i) == 0)
+				cpu_set(i, of_spin_map);
+		}
+	} else {
+		of_spin_map = cpu_present_map;
+	}
+
+	cpu_clear(boot_cpuid, of_spin_map);
+
+	/* Non-lpar has additional take/give timebase */
+	if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
+		smp_ops->give_timebase = cell_give_timebase;
+		smp_ops->take_timebase = cell_take_timebase;
+	}
+
+	DBG(" <- smp_init_cell()\n");
+}
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
new file mode 100644
index 0000000..5122ec1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -0,0 +1,184 @@
+/*
+ * IO workarounds for PCI on Celleb/Cell platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/io.h>
+
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+
+#include "io-workarounds.h"
+
+#define SPIDER_PCI_DISABLE_PREFETCH
+
+struct spiderpci_iowa_private {
+	void __iomem *regs;
+};
+
+static void spiderpci_io_flush(struct iowa_bus *bus)
+{
+	struct spiderpci_iowa_private *priv;
+	u32 val;
+
+	priv = bus->private;
+	val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+	iosync();
+}
+
+#define SPIDER_PCI_MMIO_READ(name, ret)					\
+static ret spiderpci_##name(const PCI_IO_ADDR addr)			\
+{									\
+	ret val = __do_##name(addr);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+	return val;							\
+}
+
+#define SPIDER_PCI_MMIO_READ_STR(name)					\
+static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, 	\
+			     unsigned long count)			\
+{									\
+	__do_##name(addr, buf, count);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+}
+
+SPIDER_PCI_MMIO_READ(readb, u8)
+SPIDER_PCI_MMIO_READ(readw, u16)
+SPIDER_PCI_MMIO_READ(readl, u32)
+SPIDER_PCI_MMIO_READ(readq, u64)
+SPIDER_PCI_MMIO_READ(readw_be, u16)
+SPIDER_PCI_MMIO_READ(readl_be, u32)
+SPIDER_PCI_MMIO_READ(readq_be, u64)
+SPIDER_PCI_MMIO_READ_STR(readsb)
+SPIDER_PCI_MMIO_READ_STR(readsw)
+SPIDER_PCI_MMIO_READ_STR(readsl)
+
+static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+				    unsigned long n)
+{
+	__do_memcpy_fromio(dest, src, n);
+	spiderpci_io_flush(iowa_mem_find_bus(src));
+}
+
+static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
+					   void __iomem *regs)
+{
+	void *dummy_page_va;
+	dma_addr_t dummy_page_da;
+
+#ifdef SPIDER_PCI_DISABLE_PREFETCH
+	u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT);
+	pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val);
+	out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
+#endif /* SPIDER_PCI_DISABLE_PREFETCH */
+
+	/* setup dummy read */
+	/*
+	 * On CellBlade, we can't know that which XDR memory is used by
+	 * kmalloc() to allocate dummy_page_va.
+	 * In order to imporve the performance, the XDR which is used to
+	 * allocate dummy_page_va is the nearest the spider-pci.
+	 * We have to select the CBE which is the nearest the spider-pci
+	 * to allocate memory from the best XDR, but I don't know that
+	 * how to do.
+	 *
+	 * Celleb does not have this problem, because it has only one XDR.
+	 */
+	dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dummy_page_va) {
+		pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n");
+		return -1;
+	}
+
+	dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
+				       PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(phb->parent, dummy_page_da)) {
+		pr_err("SPIDER-IOWA:Map dummy page filed.\n");
+		kfree(dummy_page_va);
+		return -1;
+	}
+
+	out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da);
+
+	return 0;
+}
+
+int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
+{
+	void __iomem *regs = NULL;
+	struct spiderpci_iowa_private *priv;
+	struct device_node *np = bus->phb->dn;
+	struct resource r;
+	unsigned long offset = (unsigned long)data;
+
+	pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n",
+		 np->full_name);
+
+	priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
+	if (!priv) {
+		pr_err("SPIDERPCI-IOWA:"
+		       "Can't allocate struct spiderpci_iowa_private");
+		return -1;
+	}
+
+	if (of_address_to_resource(np, 0, &r)) {
+		pr_err("SPIDERPCI-IOWA:Can't get resource.\n");
+		goto error;
+	}
+
+	regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE);
+	if (!regs) {
+		pr_err("SPIDERPCI-IOWA:ioremap failed.\n");
+		goto error;
+	}
+	priv->regs = regs;
+	bus->private = priv;
+
+	if (spiderpci_pci_setup_chip(bus->phb, regs))
+		goto error;
+
+	return 0;
+
+error:
+	kfree(priv);
+	bus->private = NULL;
+
+	if (regs)
+		iounmap(regs);
+
+	return -1;
+}
+
+struct ppc_pci_io spiderpci_ops = {
+	.readb = spiderpci_readb,
+	.readw = spiderpci_readw,
+	.readl = spiderpci_readl,
+	.readq = spiderpci_readq,
+	.readw_be = spiderpci_readw_be,
+	.readl_be = spiderpci_readl_be,
+	.readq_be = spiderpci_readq_be,
+	.readsb = spiderpci_readsb,
+	.readsw = spiderpci_readsw,
+	.readsl = spiderpci_readsl,
+	.memcpy_fromio = spiderpci_memcpy_fromio,
+};
+
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
new file mode 100644
index 0000000..4e56556
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -0,0 +1,365 @@
+/*
+ * External Interrupt Controller on Spider South Bridge
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/ioport.h>
+
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+
+#include "interrupt.h"
+
+/* register layout taken from Spider spec, table 7.4-4 */
+enum {
+	TIR_DEN		= 0x004, /* Detection Enable Register */
+	TIR_MSK		= 0x084, /* Mask Level Register */
+	TIR_EDC		= 0x0c0, /* Edge Detection Clear Register */
+	TIR_PNDA	= 0x100, /* Pending Register A */
+	TIR_PNDB	= 0x104, /* Pending Register B */
+	TIR_CS		= 0x144, /* Current Status Register */
+	TIR_LCSA	= 0x150, /* Level Current Status Register A */
+	TIR_LCSB	= 0x154, /* Level Current Status Register B */
+	TIR_LCSC	= 0x158, /* Level Current Status Register C */
+	TIR_LCSD	= 0x15c, /* Level Current Status Register D */
+	TIR_CFGA	= 0x200, /* Setting Register A0 */
+	TIR_CFGB	= 0x204, /* Setting Register B0 */
+			/* 0x208 ... 0x3ff Setting Register An/Bn */
+	TIR_PPNDA	= 0x400, /* Packet Pending Register A */
+	TIR_PPNDB	= 0x404, /* Packet Pending Register B */
+	TIR_PIERA	= 0x408, /* Packet Output Error Register A */
+	TIR_PIERB	= 0x40c, /* Packet Output Error Register B */
+	TIR_PIEN	= 0x444, /* Packet Output Enable Register */
+	TIR_PIPND	= 0x454, /* Packet Output Pending Register */
+	TIRDID		= 0x484, /* Spider Device ID Register */
+	REISTIM		= 0x500, /* Reissue Command Timeout Time Setting */
+	REISTIMEN	= 0x504, /* Reissue Command Timeout Setting */
+	REISWAITEN	= 0x508, /* Reissue Wait Control*/
+};
+
+#define SPIDER_CHIP_COUNT	4
+#define SPIDER_SRC_COUNT	64
+#define SPIDER_IRQ_INVALID	63
+
+struct spider_pic {
+	struct irq_host		*host;
+	void __iomem		*regs;
+	unsigned int		node_id;
+};
+static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
+
+static struct spider_pic *spider_virq_to_pic(unsigned int virq)
+{
+	return irq_map[virq].host->host_data;
+}
+
+static void __iomem *spider_get_irq_config(struct spider_pic *pic,
+					   unsigned int src)
+{
+	return pic->regs + TIR_CFGA + 8 * src;
+}
+
+static void spider_unmask_irq(unsigned int virq)
+{
+	struct spider_pic *pic = spider_virq_to_pic(virq);
+	void __iomem *cfg = spider_get_irq_config(pic, irq_map[virq].hwirq);
+
+	out_be32(cfg, in_be32(cfg) | 0x30000000u);
+}
+
+static void spider_mask_irq(unsigned int virq)
+{
+	struct spider_pic *pic = spider_virq_to_pic(virq);
+	void __iomem *cfg = spider_get_irq_config(pic, irq_map[virq].hwirq);
+
+	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+}
+
+static void spider_ack_irq(unsigned int virq)
+{
+	struct spider_pic *pic = spider_virq_to_pic(virq);
+	unsigned int src = irq_map[virq].hwirq;
+
+	/* Reset edge detection logic if necessary
+	 */
+	if (get_irq_desc(virq)->status & IRQ_LEVEL)
+		return;
+
+	/* Only interrupts 47 to 50 can be set to edge */
+	if (src < 47 || src > 50)
+		return;
+
+	/* Perform the clear of the edge logic */
+	out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf));
+}
+
+static int spider_set_irq_type(unsigned int virq, unsigned int type)
+{
+	unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
+	struct spider_pic *pic = spider_virq_to_pic(virq);
+	unsigned int hw = irq_map[virq].hwirq;
+	void __iomem *cfg = spider_get_irq_config(pic, hw);
+	struct irq_desc *desc = get_irq_desc(virq);
+	u32 old_mask;
+	u32 ic;
+
+	/* Note that only level high is supported for most interrupts */
+	if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH &&
+	    (hw < 47 || hw > 50))
+		return -EINVAL;
+
+	/* Decode sense type */
+	switch(sense) {
+	case IRQ_TYPE_EDGE_RISING:
+		ic = 0x3;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		ic = 0x2;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		ic = 0x0;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+	case IRQ_TYPE_NONE:
+		ic = 0x1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Update irq_desc */
+	desc->status &= ~(IRQ_TYPE_SENSE_MASK | IRQ_LEVEL);
+	desc->status |= type & IRQ_TYPE_SENSE_MASK;
+	if (type & (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW))
+		desc->status |= IRQ_LEVEL;
+
+	/* Configure the source. One gross hack that was there before and
+	 * that I've kept around is the priority to the BE which I set to
+	 * be the same as the interrupt source number. I don't know wether
+	 * that's supposed to make any kind of sense however, we'll have to
+	 * decide that, but for now, I'm not changing the behaviour.
+	 */
+	old_mask = in_be32(cfg) & 0x30000000u;
+	out_be32(cfg, old_mask | (ic << 24) | (0x7 << 16) |
+		 (pic->node_id << 4) | 0xe);
+	out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff));
+
+	return 0;
+}
+
+static struct irq_chip spider_pic = {
+	.typename = " SPIDER   ",
+	.unmask = spider_unmask_irq,
+	.mask = spider_mask_irq,
+	.ack = spider_ack_irq,
+	.set_type = spider_set_irq_type,
+};
+
+static int spider_host_map(struct irq_host *h, unsigned int virq,
+			irq_hw_number_t hw)
+{
+	set_irq_chip_and_handler(virq, &spider_pic, handle_level_irq);
+
+	/* Set default irq type */
+	set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static int spider_host_xlate(struct irq_host *h, struct device_node *ct,
+			   u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	/* Spider interrupts have 2 cells, first is the interrupt source,
+	 * second, well, I don't know for sure yet ... We mask the top bits
+	 * because old device-trees encode a node number in there
+	 */
+	*out_hwirq = intspec[0] & 0x3f;
+	*out_flags = IRQ_TYPE_LEVEL_HIGH;
+	return 0;
+}
+
+static struct irq_host_ops spider_host_ops = {
+	.map = spider_host_map,
+	.xlate = spider_host_xlate,
+};
+
+static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct spider_pic *pic = desc->handler_data;
+	unsigned int cs, virq;
+
+	cs = in_be32(pic->regs + TIR_CS) >> 24;
+	if (cs == SPIDER_IRQ_INVALID)
+		virq = NO_IRQ;
+	else
+		virq = irq_linear_revmap(pic->host, cs);
+	if (virq != NO_IRQ)
+		generic_handle_irq(virq);
+	desc->chip->eoi(irq);
+}
+
+/* For hooking up the cascace we have a problem. Our device-tree is
+ * crap and we don't know on which BE iic interrupt we are hooked on at
+ * least not the "standard" way. We can reconstitute it based on two
+ * informations though: which BE node we are connected to and wether
+ * we are connected to IOIF0 or IOIF1. Right now, we really only care
+ * about the IBM cell blade and we know that its firmware gives us an
+ * interrupt-map property which is pretty strange.
+ */
+static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
+{
+	unsigned int virq;
+	const u32 *imap, *tmp;
+	int imaplen, intsize, unit;
+	struct device_node *iic;
+
+	/* First, we check wether we have a real "interrupts" in the device
+	 * tree in case the device-tree is ever fixed
+	 */
+	struct of_irq oirq;
+	if (of_irq_map_one(pic->host->of_node, 0, &oirq) == 0) {
+		virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
+					     oirq.size);
+		return virq;
+	}
+
+	/* Now do the horrible hacks */
+	tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL);
+	if (tmp == NULL)
+		return NO_IRQ;
+	intsize = *tmp;
+	imap = of_get_property(pic->host->of_node, "interrupt-map", &imaplen);
+	if (imap == NULL || imaplen < (intsize + 1))
+		return NO_IRQ;
+	iic = of_find_node_by_phandle(imap[intsize]);
+	if (iic == NULL)
+		return NO_IRQ;
+	imap += intsize + 1;
+	tmp = of_get_property(iic, "#interrupt-cells", NULL);
+	if (tmp == NULL)
+		return NO_IRQ;
+	intsize = *tmp;
+	/* Assume unit is last entry of interrupt specifier */
+	unit = imap[intsize - 1];
+	/* Ok, we have a unit, now let's try to get the node */
+	tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL);
+	if (tmp == NULL) {
+		of_node_put(iic);
+		return NO_IRQ;
+	}
+	/* ugly as hell but works for now */
+	pic->node_id = (*tmp) >> 1;
+	of_node_put(iic);
+
+	/* Ok, now let's get cracking. You may ask me why I just didn't match
+	 * the iic host from the iic OF node, but that way I'm still compatible
+	 * with really really old old firmwares for which we don't have a node
+	 */
+	/* Manufacture an IIC interrupt number of class 2 */
+	virq = irq_create_mapping(NULL,
+				  (pic->node_id << IIC_IRQ_NODE_SHIFT) |
+				  (2 << IIC_IRQ_CLASS_SHIFT) |
+				  unit);
+	if (virq == NO_IRQ)
+		printk(KERN_ERR "spider_pic: failed to map cascade !");
+	return virq;
+}
+
+
+static void __init spider_init_one(struct device_node *of_node, int chip,
+				   unsigned long addr)
+{
+	struct spider_pic *pic = &spider_pics[chip];
+	int i, virq;
+
+	/* Map registers */
+	pic->regs = ioremap(addr, 0x1000);
+	if (pic->regs == NULL)
+		panic("spider_pic: can't map registers !");
+
+	/* Allocate a host */
+	pic->host = irq_alloc_host(of_node, IRQ_HOST_MAP_LINEAR,
+				   SPIDER_SRC_COUNT, &spider_host_ops,
+				   SPIDER_IRQ_INVALID);
+	if (pic->host == NULL)
+		panic("spider_pic: can't allocate irq host !");
+	pic->host->host_data = pic;
+
+	/* Go through all sources and disable them */
+	for (i = 0; i < SPIDER_SRC_COUNT; i++) {
+		void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i;
+		out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+	}
+
+	/* do not mask any interrupts because of level */
+	out_be32(pic->regs + TIR_MSK, 0x0);
+
+	/* enable interrupt packets to be output */
+	out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1);
+
+	/* Hook up the cascade interrupt to the iic and nodeid */
+	virq = spider_find_cascade_and_node(pic);
+	if (virq == NO_IRQ)
+		return;
+	set_irq_data(virq, pic);
+	set_irq_chained_handler(virq, spider_irq_cascade);
+
+	printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %s\n",
+	       pic->node_id, addr, of_node->full_name);
+
+	/* Enable the interrupt detection enable bit. Do this last! */
+	out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
+}
+
+void __init spider_init_IRQ(void)
+{
+	struct resource r;
+	struct device_node *dn;
+	int chip = 0;
+
+	/* XXX node numbers are totally bogus. We _hope_ we get the device
+	 * nodes in the right order here but that's definitely not guaranteed,
+	 * we need to get the node from the device tree instead.
+	 * There is currently no proper property for it (but our whole
+	 * device-tree is bogus anyway) so all we can do is pray or maybe test
+	 * the address and deduce the node-id
+	 */
+	for (dn = NULL;
+	     (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+		if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) {
+			if (of_address_to_resource(dn, 0, &r)) {
+				printk(KERN_WARNING "spider-pic: Failed\n");
+				continue;
+			}
+		} else if (of_device_is_compatible(dn, "sti,platform-spider-pic")
+			   && (chip < 2)) {
+			static long hard_coded_pics[] =
+				{ 0x24000008000ul, 0x34000008000ul};
+			r.start = hard_coded_pics[chip];
+		} else
+			continue;
+		spider_init_one(dn, chip++, r.start);
+	}
+}
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
new file mode 100644
index 0000000..a5bdb89
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -0,0 +1,785 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/linux_logo.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/xmon.h>
+#include <asm/prom.h>
+
+const struct spu_management_ops *spu_management_ops;
+EXPORT_SYMBOL_GPL(spu_management_ops);
+
+const struct spu_priv1_ops *spu_priv1_ops;
+EXPORT_SYMBOL_GPL(spu_priv1_ops);
+
+struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
+EXPORT_SYMBOL_GPL(cbe_spu_info);
+
+/*
+ * The spufs fault-handling code needs to call force_sig_info to raise signals
+ * on DMA errors. Export it here to avoid general kernel-wide access to this
+ * function
+ */
+EXPORT_SYMBOL_GPL(force_sig_info);
+
+/*
+ * Protects cbe_spu_info and spu->number.
+ */
+static DEFINE_SPINLOCK(spu_lock);
+
+/*
+ * List of all spus in the system.
+ *
+ * This list is iterated by callers from irq context and callers that
+ * want to sleep.  Thus modifications need to be done with both
+ * spu_full_list_lock and spu_full_list_mutex held, while iterating
+ * through it requires either of these locks.
+ *
+ * In addition spu_full_list_lock protects all assignmens to
+ * spu->mm.
+ */
+static LIST_HEAD(spu_full_list);
+static DEFINE_SPINLOCK(spu_full_list_lock);
+static DEFINE_MUTEX(spu_full_list_mutex);
+
+struct spu_slb {
+	u64 esid, vsid;
+};
+
+void spu_invalidate_slbs(struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu->register_lock, flags);
+	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
+		out_be64(&priv2->slb_invalidate_all_W, 0UL);
+	spin_unlock_irqrestore(&spu->register_lock, flags);
+}
+EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
+
+/* This is called by the MM core when a segment size is changed, to
+ * request a flush of all the SPEs using a given mm
+ */
+void spu_flush_all_slbs(struct mm_struct *mm)
+{
+	struct spu *spu;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		if (spu->mm == mm)
+			spu_invalidate_slbs(spu);
+	}
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+}
+
+/* The hack below stinks... try to do something better one of
+ * these days... Does it even work properly with NR_CPUS == 1 ?
+ */
+static inline void mm_needs_global_tlbie(struct mm_struct *mm)
+{
+	int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
+
+	/* Global TLBIE broadcast required with SPEs. */
+	__cpus_setall(&mm->cpu_vm_mask, nr);
+}
+
+void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	spu->mm = mm;
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+	if (mm)
+		mm_needs_global_tlbie(mm);
+}
+EXPORT_SYMBOL_GPL(spu_associate_mm);
+
+int spu_64k_pages_available(void)
+{
+	return mmu_psize_defs[MMU_PAGE_64K].shift != 0;
+}
+EXPORT_SYMBOL_GPL(spu_64k_pages_available);
+
+static void spu_restart_dma(struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+	else {
+		set_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+		mb();
+	}
+}
+
+static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	pr_debug("%s: adding SLB[%d] 0x%016lx 0x%016lx\n",
+			__func__, slbe, slb->vsid, slb->esid);
+
+	out_be64(&priv2->slb_index_W, slbe);
+	/* set invalid before writing vsid */
+	out_be64(&priv2->slb_esid_RW, 0);
+	/* now it's safe to write the vsid */
+	out_be64(&priv2->slb_vsid_RW, slb->vsid);
+	/* setting the new esid makes the entry valid again */
+	out_be64(&priv2->slb_esid_RW, slb->esid);
+}
+
+static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
+{
+	struct mm_struct *mm = spu->mm;
+	struct spu_slb slb;
+	int psize;
+
+	pr_debug("%s\n", __func__);
+
+	slb.esid = (ea & ESID_MASK) | SLB_ESID_V;
+
+	switch(REGION_ID(ea)) {
+	case USER_REGION_ID:
+#ifdef CONFIG_PPC_MM_SLICES
+		psize = get_slice_psize(mm, ea);
+#else
+		psize = mm->context.user_psize;
+#endif
+		slb.vsid = (get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M)
+				<< SLB_VSID_SHIFT) | SLB_VSID_USER;
+		break;
+	case VMALLOC_REGION_ID:
+		if (ea < VMALLOC_END)
+			psize = mmu_vmalloc_psize;
+		else
+			psize = mmu_io_psize;
+		slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M)
+				<< SLB_VSID_SHIFT) | SLB_VSID_KERNEL;
+		break;
+	case KERNEL_REGION_ID:
+		psize = mmu_linear_psize;
+		slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M)
+				<< SLB_VSID_SHIFT) | SLB_VSID_KERNEL;
+		break;
+	default:
+		/* Future: support kernel segments so that drivers
+		 * can use SPUs.
+		 */
+		pr_debug("invalid region access at %016lx\n", ea);
+		return 1;
+	}
+	slb.vsid |= mmu_psize_defs[psize].sllp;
+
+	spu_load_slb(spu, spu->slb_replace, &slb);
+
+	spu->slb_replace++;
+	if (spu->slb_replace >= 8)
+		spu->slb_replace = 0;
+
+	spu_restart_dma(spu);
+	spu->stats.slb_flt++;
+	return 0;
+}
+
+extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX
+static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
+{
+	int ret;
+
+	pr_debug("%s, %lx, %lx\n", __func__, dsisr, ea);
+
+	/*
+	 * Handle kernel space hash faults immediately. User hash
+	 * faults need to be deferred to process context.
+	 */
+	if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
+	    (REGION_ID(ea) != USER_REGION_ID)) {
+
+		spin_unlock(&spu->register_lock);
+		ret = hash_page(ea, _PAGE_PRESENT, 0x300);
+		spin_lock(&spu->register_lock);
+
+		if (!ret) {
+			spu_restart_dma(spu);
+			return 0;
+		}
+	}
+
+	spu->class_1_dar = ea;
+	spu->class_1_dsisr = dsisr;
+
+	spu->stop_callback(spu, 1);
+
+	spu->class_1_dar = 0;
+	spu->class_1_dsisr = 0;
+
+	return 0;
+}
+
+static void __spu_kernel_slb(void *addr, struct spu_slb *slb)
+{
+	unsigned long ea = (unsigned long)addr;
+	u64 llp;
+
+	if (REGION_ID(ea) == KERNEL_REGION_ID)
+		llp = mmu_psize_defs[mmu_linear_psize].sllp;
+	else
+		llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+
+	slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) |
+		SLB_VSID_KERNEL | llp;
+	slb->esid = (ea & ESID_MASK) | SLB_ESID_V;
+}
+
+/**
+ * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the
+ * address @new_addr is present.
+ */
+static inline int __slb_present(struct spu_slb *slbs, int nr_slbs,
+		void *new_addr)
+{
+	unsigned long ea = (unsigned long)new_addr;
+	int i;
+
+	for (i = 0; i < nr_slbs; i++)
+		if (!((slbs[i].esid ^ ea) & ESID_MASK))
+			return 1;
+
+	return 0;
+}
+
+/**
+ * Setup the SPU kernel SLBs, in preparation for a context save/restore. We
+ * need to map both the context save area, and the save/restore code.
+ *
+ * Because the lscsa and code may cross segment boundaires, we check to see
+ * if mappings are required for the start and end of each range. We currently
+ * assume that the mappings are smaller that one segment - if not, something
+ * is seriously wrong.
+ */
+void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa,
+		void *code, int code_size)
+{
+	struct spu_slb slbs[4];
+	int i, nr_slbs = 0;
+	/* start and end addresses of both mappings */
+	void *addrs[] = {
+		lscsa, (void *)lscsa + sizeof(*lscsa) - 1,
+		code, code + code_size - 1
+	};
+
+	/* check the set of addresses, and create a new entry in the slbs array
+	 * if there isn't already a SLB for that address */
+	for (i = 0; i < ARRAY_SIZE(addrs); i++) {
+		if (__slb_present(slbs, nr_slbs, addrs[i]))
+			continue;
+
+		__spu_kernel_slb(addrs[i], &slbs[nr_slbs]);
+		nr_slbs++;
+	}
+
+	spin_lock_irq(&spu->register_lock);
+	/* Add the set of SLBs */
+	for (i = 0; i < nr_slbs; i++)
+		spu_load_slb(spu, i, &slbs[i]);
+	spin_unlock_irq(&spu->register_lock);
+}
+EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs);
+
+static irqreturn_t
+spu_irq_class_0(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat, mask;
+
+	spu = data;
+
+	spin_lock(&spu->register_lock);
+	mask = spu_int_mask_get(spu, 0);
+	stat = spu_int_stat_get(spu, 0) & mask;
+
+	spu->class_0_pending |= stat;
+	spu->class_0_dar = spu_mfc_dar_get(spu);
+	spu->stop_callback(spu, 0);
+	spu->class_0_pending = 0;
+	spu->class_0_dar = 0;
+
+	spu_int_stat_clear(spu, 0, stat);
+	spin_unlock(&spu->register_lock);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t
+spu_irq_class_1(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat, mask, dar, dsisr;
+
+	spu = data;
+
+	/* atomically read & clear class1 status. */
+	spin_lock(&spu->register_lock);
+	mask  = spu_int_mask_get(spu, 1);
+	stat  = spu_int_stat_get(spu, 1) & mask;
+	dar   = spu_mfc_dar_get(spu);
+	dsisr = spu_mfc_dsisr_get(spu);
+	if (stat & CLASS1_STORAGE_FAULT_INTR)
+		spu_mfc_dsisr_set(spu, 0ul);
+	spu_int_stat_clear(spu, 1, stat);
+
+	pr_debug("%s: %lx %lx %lx %lx\n", __func__, mask, stat,
+			dar, dsisr);
+
+	if (stat & CLASS1_SEGMENT_FAULT_INTR)
+		__spu_trap_data_seg(spu, dar);
+
+	if (stat & CLASS1_STORAGE_FAULT_INTR)
+		__spu_trap_data_map(spu, dar, dsisr);
+
+	if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_GET_INTR)
+		;
+
+	if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_PUT_INTR)
+		;
+
+	spu->class_1_dsisr = 0;
+	spu->class_1_dar = 0;
+
+	spin_unlock(&spu->register_lock);
+
+	return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static irqreturn_t
+spu_irq_class_2(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat;
+	unsigned long mask;
+	const int mailbox_intrs =
+		CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR;
+
+	spu = data;
+	spin_lock(&spu->register_lock);
+	stat = spu_int_stat_get(spu, 2);
+	mask = spu_int_mask_get(spu, 2);
+	/* ignore interrupts we're not waiting for */
+	stat &= mask;
+	/* mailbox interrupts are level triggered. mask them now before
+	 * acknowledging */
+	if (stat & mailbox_intrs)
+		spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs));
+	/* acknowledge all interrupts before the callbacks */
+	spu_int_stat_clear(spu, 2, stat);
+
+	pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask);
+
+	if (stat & CLASS2_MAILBOX_INTR)
+		spu->ibox_callback(spu);
+
+	if (stat & CLASS2_SPU_STOP_INTR)
+		spu->stop_callback(spu, 2);
+
+	if (stat & CLASS2_SPU_HALT_INTR)
+		spu->stop_callback(spu, 2);
+
+	if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR)
+		spu->mfc_callback(spu);
+
+	if (stat & CLASS2_MAILBOX_THRESHOLD_INTR)
+		spu->wbox_callback(spu);
+
+	spu->stats.class2_intr++;
+
+	spin_unlock(&spu->register_lock);
+
+	return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int spu_request_irqs(struct spu *spu)
+{
+	int ret = 0;
+
+	if (spu->irqs[0] != NO_IRQ) {
+		snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
+			 spu->number);
+		ret = request_irq(spu->irqs[0], spu_irq_class_0,
+				  IRQF_DISABLED,
+				  spu->irq_c0, spu);
+		if (ret)
+			goto bail0;
+	}
+	if (spu->irqs[1] != NO_IRQ) {
+		snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
+			 spu->number);
+		ret = request_irq(spu->irqs[1], spu_irq_class_1,
+				  IRQF_DISABLED,
+				  spu->irq_c1, spu);
+		if (ret)
+			goto bail1;
+	}
+	if (spu->irqs[2] != NO_IRQ) {
+		snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
+			 spu->number);
+		ret = request_irq(spu->irqs[2], spu_irq_class_2,
+				  IRQF_DISABLED,
+				  spu->irq_c2, spu);
+		if (ret)
+			goto bail2;
+	}
+	return 0;
+
+bail2:
+	if (spu->irqs[1] != NO_IRQ)
+		free_irq(spu->irqs[1], spu);
+bail1:
+	if (spu->irqs[0] != NO_IRQ)
+		free_irq(spu->irqs[0], spu);
+bail0:
+	return ret;
+}
+
+static void spu_free_irqs(struct spu *spu)
+{
+	if (spu->irqs[0] != NO_IRQ)
+		free_irq(spu->irqs[0], spu);
+	if (spu->irqs[1] != NO_IRQ)
+		free_irq(spu->irqs[1], spu);
+	if (spu->irqs[2] != NO_IRQ)
+		free_irq(spu->irqs[2], spu);
+}
+
+void spu_init_channels(struct spu *spu)
+{
+	static const struct {
+		 unsigned channel;
+		 unsigned count;
+	} zero_list[] = {
+		{ 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, },
+		{ 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, },
+	}, count_list[] = {
+		{ 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, },
+		{ 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, },
+		{ 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, },
+	};
+	struct spu_priv2 __iomem *priv2;
+	int i;
+
+	priv2 = spu->priv2;
+
+	/* initialize all channel data to zero */
+	for (i = 0; i < ARRAY_SIZE(zero_list); i++) {
+		int count;
+
+		out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel);
+		for (count = 0; count < zero_list[i].count; count++)
+			out_be64(&priv2->spu_chnldata_RW, 0);
+	}
+
+	/* initialize channel counts to meaningful values */
+	for (i = 0; i < ARRAY_SIZE(count_list); i++) {
+		out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel);
+		out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
+	}
+}
+EXPORT_SYMBOL_GPL(spu_init_channels);
+
+static int spu_shutdown(struct sys_device *sysdev)
+{
+	struct spu *spu = container_of(sysdev, struct spu, sysdev);
+
+	spu_free_irqs(spu);
+	spu_destroy_spu(spu);
+	return 0;
+}
+
+static struct sysdev_class spu_sysdev_class = {
+	.name = "spu",
+	.shutdown = spu_shutdown,
+};
+
+int spu_add_sysdev_attr(struct sysdev_attribute *attr)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		sysdev_create_file(&spu->sysdev, attr);
+	mutex_unlock(&spu_full_list_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);
+
+int spu_add_sysdev_attr_group(struct attribute_group *attrs)
+{
+	struct spu *spu;
+	int rc = 0;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		rc = sysfs_create_group(&spu->sysdev.kobj, attrs);
+
+		/* we're in trouble here, but try unwinding anyway */
+		if (rc) {
+			printk(KERN_ERR "%s: can't create sysfs group '%s'\n",
+					__func__, attrs->name);
+
+			list_for_each_entry_continue_reverse(spu,
+					&spu_full_list, full_list)
+				sysfs_remove_group(&spu->sysdev.kobj, attrs);
+			break;
+		}
+	}
+
+	mutex_unlock(&spu_full_list_mutex);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);
+
+
+void spu_remove_sysdev_attr(struct sysdev_attribute *attr)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		sysdev_remove_file(&spu->sysdev, attr);
+	mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr);
+
+void spu_remove_sysdev_attr_group(struct attribute_group *attrs)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		sysfs_remove_group(&spu->sysdev.kobj, attrs);
+	mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group);
+
+static int spu_create_sysdev(struct spu *spu)
+{
+	int ret;
+
+	spu->sysdev.id = spu->number;
+	spu->sysdev.cls = &spu_sysdev_class;
+	ret = sysdev_register(&spu->sysdev);
+	if (ret) {
+		printk(KERN_ERR "Can't register SPU %d with sysfs\n",
+				spu->number);
+		return ret;
+	}
+
+	sysfs_add_device_to_node(&spu->sysdev, spu->node);
+
+	return 0;
+}
+
+static int __init create_spu(void *data)
+{
+	struct spu *spu;
+	int ret;
+	static int number;
+	unsigned long flags;
+	struct timespec ts;
+
+	ret = -ENOMEM;
+	spu = kzalloc(sizeof (*spu), GFP_KERNEL);
+	if (!spu)
+		goto out;
+
+	spu->alloc_state = SPU_FREE;
+
+	spin_lock_init(&spu->register_lock);
+	spin_lock(&spu_lock);
+	spu->number = number++;
+	spin_unlock(&spu_lock);
+
+	ret = spu_create_spu(spu, data);
+
+	if (ret)
+		goto out_free;
+
+	spu_mfc_sdr_setup(spu);
+	spu_mfc_sr1_set(spu, 0x33);
+	ret = spu_request_irqs(spu);
+	if (ret)
+		goto out_destroy;
+
+	ret = spu_create_sysdev(spu);
+	if (ret)
+		goto out_free_irqs;
+
+	mutex_lock(&cbe_spu_info[spu->node].list_mutex);
+	list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
+	cbe_spu_info[spu->node].n_spus++;
+	mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
+
+	mutex_lock(&spu_full_list_mutex);
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	list_add(&spu->full_list, &spu_full_list);
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+	mutex_unlock(&spu_full_list_mutex);
+
+	spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
+	ktime_get_ts(&ts);
+	spu->stats.tstamp = timespec_to_ns(&ts);
+
+	INIT_LIST_HEAD(&spu->aff_list);
+
+	goto out;
+
+out_free_irqs:
+	spu_free_irqs(spu);
+out_destroy:
+	spu_destroy_spu(spu);
+out_free:
+	kfree(spu);
+out:
+	return ret;
+}
+
+static const char *spu_state_names[] = {
+	"user", "system", "iowait", "idle"
+};
+
+static unsigned long long spu_acct_time(struct spu *spu,
+		enum spu_utilization_state state)
+{
+	struct timespec ts;
+	unsigned long long time = spu->stats.times[state];
+
+	/*
+	 * If the spu is idle or the context is stopped, utilization
+	 * statistics are not updated.  Apply the time delta from the
+	 * last recorded state of the spu.
+	 */
+	if (spu->stats.util_state == state) {
+		ktime_get_ts(&ts);
+		time += timespec_to_ns(&ts) - spu->stats.tstamp;
+	}
+
+	return time / NSEC_PER_MSEC;
+}
+
+
+static ssize_t spu_stat_show(struct sys_device *sysdev,
+				struct sysdev_attribute *attr, char *buf)
+{
+	struct spu *spu = container_of(sysdev, struct spu, sysdev);
+
+	return sprintf(buf, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		spu_state_names[spu->stats.util_state],
+		spu_acct_time(spu, SPU_UTIL_USER),
+		spu_acct_time(spu, SPU_UTIL_SYSTEM),
+		spu_acct_time(spu, SPU_UTIL_IOWAIT),
+		spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
+		spu->stats.vol_ctx_switch,
+		spu->stats.invol_ctx_switch,
+		spu->stats.slb_flt,
+		spu->stats.hash_flt,
+		spu->stats.min_flt,
+		spu->stats.maj_flt,
+		spu->stats.class2_intr,
+		spu->stats.libassist);
+}
+
+static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL);
+
+static int __init init_spu_base(void)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		mutex_init(&cbe_spu_info[i].list_mutex);
+		INIT_LIST_HEAD(&cbe_spu_info[i].spus);
+	}
+
+	if (!spu_management_ops)
+		goto out;
+
+	/* create sysdev class for spus */
+	ret = sysdev_class_register(&spu_sysdev_class);
+	if (ret)
+		goto out;
+
+	ret = spu_enumerate_spus(create_spu);
+
+	if (ret < 0) {
+		printk(KERN_WARNING "%s: Error initializing spus\n",
+			__func__);
+		goto out_unregister_sysdev_class;
+	}
+
+	if (ret > 0) {
+		/*
+		 * We cannot put the forward declaration in
+		 * <linux/linux_logo.h> because of conflicting session type
+		 * conflicts for const and __initdata with different compiler
+		 * versions
+		 */
+		extern const struct linux_logo logo_spe_clut224;
+
+		fb_append_extra_logo(&logo_spe_clut224, ret);
+	}
+
+	mutex_lock(&spu_full_list_mutex);
+	xmon_register_spus(&spu_full_list);
+	crash_register_spus(&spu_full_list);
+	mutex_unlock(&spu_full_list_mutex);
+	spu_add_sysdev_attr(&attr_stat);
+
+	spu_init_affinity();
+
+	return 0;
+
+ out_unregister_sysdev_class:
+	sysdev_class_unregister(&spu_sysdev_class);
+ out:
+	return ret;
+}
+module_init(init_spu_base);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
new file mode 100644
index 0000000..19f6bfd
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -0,0 +1,74 @@
+/*
+ * System call callback functions for SPUs
+ */
+
+#undef DEBUG
+
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+
+#include <asm/spu.h>
+#include <asm/syscalls.h>
+#include <asm/unistd.h>
+
+/*
+ * This table defines the system calls that an SPU can call.
+ * It is currently a subset of the 64 bit powerpc system calls,
+ * with the exact semantics.
+ *
+ * The reasons for disabling some of the system calls are:
+ * 1. They interact with the way SPU syscalls are handled
+ *    and we can't let them execute ever:
+ *	restart_syscall, exit, for, execve, ptrace, ...
+ * 2. They are deprecated and replaced by other means:
+ *	uselib, pciconfig_*, sysfs, ...
+ * 3. They are somewhat interacting with the system in a way
+ *    we don't want an SPU to:
+ *	reboot, init_module, mount, kexec_load
+ * 4. They are optional and we can't rely on them being
+ *    linked into the kernel. Unfortunately, the cond_syscall
+ *    helper does not work here as it does not add the necessary
+ *    opd symbols:
+ *	mbind, mq_open, ipc, ...
+ */
+
+static void *spu_syscall_table[] = {
+#define SYSCALL(func)		sys_ni_syscall,
+#define COMPAT_SYS(func)	sys_ni_syscall,
+#define PPC_SYS(func)		sys_ni_syscall,
+#define OLDSYS(func)		sys_ni_syscall,
+#define SYS32ONLY(func)		sys_ni_syscall,
+#define SYSX(f, f3264, f32)	sys_ni_syscall,
+
+#define SYSCALL_SPU(func)	sys_##func,
+#define COMPAT_SYS_SPU(func)	sys_##func,
+#define PPC_SYS_SPU(func)	ppc_##func,
+#define SYSX_SPU(f, f3264, f32)	f,
+
+#include <asm/systbl.h>
+};
+
+long spu_sys_callback(struct spu_syscall_block *s)
+{
+	long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6);
+
+	if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) {
+		pr_debug("%s: invalid syscall #%ld", __func__, s->nr_ret);
+		return -ENOSYS;
+	}
+
+	syscall = spu_syscall_table[s->nr_ret];
+
+#ifdef DEBUG
+	print_symbol(KERN_DEBUG "SPU-syscall %s:", (unsigned long)syscall);
+	printk("syscall%ld(%lx, %lx, %lx, %lx, %lx, %lx)\n",
+			s->nr_ret,
+			s->parm[0], s->parm[1], s->parm[2],
+			s->parm[3], s->parm[4], s->parm[5]);
+#endif
+
+	return syscall(s->parm[0], s->parm[1], s->parm[2],
+		       s->parm[3], s->parm[4], s->parm[5]);
+}
+EXPORT_SYMBOL_GPL(spu_sys_callback);
diff --git a/arch/powerpc/platforms/cell/spu_fault.c b/arch/powerpc/platforms/cell/spu_fault.c
new file mode 100644
index 0000000..c8b1cd4
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_fault.c
@@ -0,0 +1,98 @@
+/*
+ * SPU mm fault handler
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2007
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+		unsigned long dsisr, unsigned *flt)
+{
+	struct vm_area_struct *vma;
+	unsigned long is_write;
+	int ret;
+
+#if 0
+	if (!IS_VALID_EA(ea)) {
+		return -EFAULT;
+	}
+#endif /* XXX */
+	if (mm == NULL) {
+		return -EFAULT;
+	}
+	if (mm->pgd == NULL) {
+		return -EFAULT;
+	}
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, ea);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= ea)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, ea))
+		goto bad_area;
+good_area:
+	is_write = dsisr & MFC_DSISR_ACCESS_PUT;
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	} else {
+		if (dsisr & MFC_DSISR_ACCESS_DENIED)
+			goto bad_area;
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+	ret = 0;
+	*flt = handle_mm_fault(mm, vma, ea, is_write);
+	if (unlikely(*flt & VM_FAULT_ERROR)) {
+		if (*flt & VM_FAULT_OOM) {
+			ret = -ENOMEM;
+			goto bad_area;
+		} else if (*flt & VM_FAULT_SIGBUS) {
+			ret = -EFAULT;
+			goto bad_area;
+		}
+		BUG();
+	}
+	if (*flt & VM_FAULT_MAJOR)
+		current->maj_flt++;
+	else
+		current->min_flt++;
+	up_read(&mm->mmap_sem);
+	return ret;
+
+bad_area:
+	up_read(&mm->mmap_sem);
+	return -EFAULT;
+}
+EXPORT_SYMBOL_GPL(spu_handle_mm_fault);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
new file mode 100644
index 0000000..4c506c1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -0,0 +1,557 @@
+/*
+ * spu management operations for of based platforms
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "spufs/spufs.h"
+#include "interrupt.h"
+
+struct device_node *spu_devnode(struct spu *spu)
+{
+	return spu->devnode;
+}
+
+EXPORT_SYMBOL_GPL(spu_devnode);
+
+static u64 __init find_spu_unit_number(struct device_node *spe)
+{
+	const unsigned int *prop;
+	int proplen;
+
+	/* new device trees should provide the physical-id attribute */
+	prop = of_get_property(spe, "physical-id", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	/* celleb device tree provides the unit-id */
+	prop = of_get_property(spe, "unit-id", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	/* legacy device trees provide the id in the reg attribute */
+	prop = of_get_property(spe, "reg", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	return 0;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		iounmap(spu->priv1);
+	iounmap(spu->priv2);
+	iounmap(spu->problem);
+	iounmap((__force u8 __iomem *)spu->local_store);
+}
+
+static int __init spu_map_interrupts_old(struct spu *spu,
+	struct device_node *np)
+{
+	unsigned int isrc;
+	const u32 *tmp;
+	int nid;
+
+	/* Get the interrupt source unit from the device-tree */
+	tmp = of_get_property(np, "isrc", NULL);
+	if (!tmp)
+		return -ENODEV;
+	isrc = tmp[0];
+
+	tmp = of_get_property(np->parent->parent, "node-id", NULL);
+	if (!tmp) {
+		printk(KERN_WARNING "%s: can't find node-id\n", __func__);
+		nid = spu->node;
+	} else
+		nid = tmp[0];
+
+	/* Add the node number */
+	isrc |= nid << IIC_IRQ_NODE_SHIFT;
+
+	/* Now map interrupts of all 3 classes */
+	spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
+	spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
+	spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
+
+	/* Right now, we only fail if class 2 failed */
+	return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
+}
+
+static void __iomem * __init spu_map_prop_old(struct spu *spu,
+					      struct device_node *n,
+					      const char *name)
+{
+	const struct address_prop {
+		unsigned long address;
+		unsigned int len;
+	} __attribute__((packed)) *prop;
+	int proplen;
+
+	prop = of_get_property(n, name, &proplen);
+	if (prop == NULL || proplen != sizeof (struct address_prop))
+		return NULL;
+
+	return ioremap(prop->address, prop->len);
+}
+
+static int __init spu_map_device_old(struct spu *spu)
+{
+	struct device_node *node = spu->devnode;
+	const char *prop;
+	int ret;
+
+	ret = -ENODEV;
+	spu->name = of_get_property(node, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	prop = of_get_property(node, "local-store", NULL);
+	if (!prop)
+		goto out;
+	spu->local_store_phys = *(unsigned long *)prop;
+
+	/* we use local store as ram, not io memory */
+	spu->local_store = (void __force *)
+		spu_map_prop_old(spu, node, "local-store");
+	if (!spu->local_store)
+		goto out;
+
+	prop = of_get_property(node, "problem", NULL);
+	if (!prop)
+		goto out_unmap;
+	spu->problem_phys = *(unsigned long *)prop;
+
+	spu->problem = spu_map_prop_old(spu, node, "problem");
+	if (!spu->problem)
+		goto out_unmap;
+
+	spu->priv2 = spu_map_prop_old(spu, node, "priv2");
+	if (!spu->priv2)
+		goto out_unmap;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		spu->priv1 = spu_map_prop_old(spu, node, "priv1");
+		if (!spu->priv1)
+			goto out_unmap;
+	}
+
+	ret = 0;
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+{
+	struct of_irq oirq;
+	int ret;
+	int i;
+
+	for (i=0; i < 3; i++) {
+		ret = of_irq_map_one(np, i, &oirq);
+		if (ret) {
+			pr_debug("spu_new: failed to get irq %d\n", i);
+			goto err;
+		}
+		ret = -EINVAL;
+		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.specifier[0],
+			 oirq.controller->full_name);
+		spu->irqs[i] = irq_create_of_mapping(oirq.controller,
+					oirq.specifier, oirq.size);
+		if (spu->irqs[i] == NO_IRQ) {
+			pr_debug("spu_new: failed to map it !\n");
+			goto err;
+		}
+	}
+	return 0;
+
+err:
+	pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier,
+		spu->name);
+	for (; i >= 0; i--) {
+		if (spu->irqs[i] != NO_IRQ)
+			irq_dispose_mapping(spu->irqs[i]);
+	}
+	return ret;
+}
+
+static int spu_map_resource(struct spu *spu, int nr,
+			    void __iomem** virt, unsigned long *phys)
+{
+	struct device_node *np = spu->devnode;
+	struct resource resource = { };
+	unsigned long len;
+	int ret;
+
+	ret = of_address_to_resource(np, nr, &resource);
+	if (ret)
+		return ret;
+	if (phys)
+		*phys = resource.start;
+	len = resource.end - resource.start + 1;
+	*virt = ioremap(resource.start, len);
+	if (!*virt)
+		return -EINVAL;
+	return 0;
+}
+
+static int __init spu_map_device(struct spu *spu)
+{
+	struct device_node *np = spu->devnode;
+	int ret = -ENODEV;
+
+	spu->name = of_get_property(np, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
+			       &spu->local_store_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 0\n",
+			 np->full_name);
+		goto out;
+	}
+	ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
+			       &spu->problem_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 1\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 2\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		ret = spu_map_resource(spu, 3,
+			       (void __iomem**)&spu->priv1, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 3\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	pr_debug("spu_new: %s maps:\n", np->full_name);
+	pr_debug("  local store   : 0x%016lx -> 0x%p\n",
+		 spu->local_store_phys, spu->local_store);
+	pr_debug("  problem state : 0x%016lx -> 0x%p\n",
+		 spu->problem_phys, spu->problem);
+	pr_debug("  priv2         :                       0x%p\n", spu->priv2);
+	pr_debug("  priv1         :                       0x%p\n", spu->priv1);
+
+	return 0;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	pr_debug("failed to map spe %s: %d\n", spu->name, ret);
+	return ret;
+}
+
+static int __init of_enumerate_spus(int (*fn)(void *data))
+{
+	int ret;
+	struct device_node *node;
+	unsigned int n = 0;
+
+	ret = -ENODEV;
+	for (node = of_find_node_by_type(NULL, "spe");
+			node; node = of_find_node_by_type(node, "spe")) {
+		ret = fn(node);
+		if (ret) {
+			printk(KERN_WARNING "%s: Error initializing %s\n",
+				__func__, node->name);
+			break;
+		}
+		n++;
+	}
+	return ret ? ret : n;
+}
+
+static int __init of_create_spu(struct spu *spu, void *data)
+{
+	int ret;
+	struct device_node *spe = (struct device_node *)data;
+	static int legacy_map = 0, legacy_irq = 0;
+
+	spu->devnode = of_node_get(spe);
+	spu->spe_id = find_spu_unit_number(spe);
+
+	spu->node = of_node_to_nid(spe);
+	if (spu->node >= MAX_NUMNODES) {
+		printk(KERN_WARNING "SPE %s on node %d ignored,"
+		       " node number too big\n", spe->full_name, spu->node);
+		printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	ret = spu_map_device(spu);
+	if (ret) {
+		if (!legacy_map) {
+			legacy_map = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying to map old style\n", __func__);
+		}
+		ret = spu_map_device_old(spu);
+		if (ret) {
+			printk(KERN_ERR "Unable to map %s\n",
+				spu->name);
+			goto out;
+		}
+	}
+
+	ret = spu_map_interrupts(spu, spe);
+	if (ret) {
+		if (!legacy_irq) {
+			legacy_irq = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying old style irq\n", __func__);
+		}
+		ret = spu_map_interrupts_old(spu, spe);
+		if (ret) {
+			printk(KERN_ERR "%s: could not map interrupts\n",
+				spu->name);
+			goto out_unmap;
+		}
+	}
+
+	pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name,
+		spu->local_store, spu->problem, spu->priv1,
+		spu->priv2, spu->number);
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int of_destroy_spu(struct spu *spu)
+{
+	spu_unmap(spu);
+	of_node_put(spu->devnode);
+	return 0;
+}
+
+static void enable_spu_by_master_run(struct spu_context *ctx)
+{
+	ctx->ops->master_start(ctx);
+}
+
+static void disable_spu_by_master_run(struct spu_context *ctx)
+{
+	ctx->ops->master_stop(ctx);
+}
+
+/* Hardcoded affinity idxs for qs20 */
+#define QS20_SPES_PER_BE 8
+static int qs20_reg_idxs[QS20_SPES_PER_BE] =   { 0, 2, 4, 6, 7, 5, 3, 1 };
+static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
+
+static struct spu *spu_lookup_reg(int node, u32 reg)
+{
+	struct spu *spu;
+	const u32 *spu_reg;
+
+	list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+		spu_reg = of_get_property(spu_devnode(spu), "reg", NULL);
+		if (*spu_reg == reg)
+			return spu;
+	}
+	return NULL;
+}
+
+static void init_affinity_qs20_harcoded(void)
+{
+	int node, i;
+	struct spu *last_spu, *spu;
+	u32 reg;
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		last_spu = NULL;
+		for (i = 0; i < QS20_SPES_PER_BE; i++) {
+			reg = qs20_reg_idxs[i];
+			spu = spu_lookup_reg(node, reg);
+			if (!spu)
+				continue;
+			spu->has_mem_affinity = qs20_reg_memory[reg];
+			if (last_spu)
+				list_add_tail(&spu->aff_list,
+						&last_spu->aff_list);
+			last_spu = spu;
+		}
+	}
+}
+
+static int of_has_vicinity(void)
+{
+	struct device_node *dn;
+
+	for_each_node_by_type(dn, "spe") {
+		if (of_find_property(dn, "vicinity", NULL))  {
+			of_node_put(dn);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static struct spu *devnode_spu(int cbe, struct device_node *dn)
+{
+	struct spu *spu;
+
+	list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
+		if (spu_devnode(spu) == dn)
+			return spu;
+	return NULL;
+}
+
+static struct spu *
+neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid)
+{
+	struct spu *spu;
+	struct device_node *spu_dn;
+	const phandle *vic_handles;
+	int lenp, i;
+
+	list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
+		spu_dn = spu_devnode(spu);
+		if (spu_dn == avoid)
+			continue;
+		vic_handles = of_get_property(spu_dn, "vicinity", &lenp);
+		for (i=0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == target->linux_phandle)
+				return spu;
+		}
+	}
+	return NULL;
+}
+
+static void init_affinity_node(int cbe)
+{
+	struct spu *spu, *last_spu;
+	struct device_node *vic_dn, *last_spu_dn;
+	phandle avoid_ph;
+	const phandle *vic_handles;
+	const char *name;
+	int lenp, i, added;
+
+	last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu,
+								cbe_list);
+	avoid_ph = 0;
+	for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
+		last_spu_dn = spu_devnode(last_spu);
+		vic_handles = of_get_property(last_spu_dn, "vicinity", &lenp);
+
+		/*
+		 * Walk through each phandle in vicinity property of the spu
+		 * (tipically two vicinity phandles per spe node)
+		 */
+		for (i = 0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == avoid_ph)
+				continue;
+
+			vic_dn = of_find_node_by_phandle(vic_handles[i]);
+			if (!vic_dn)
+				continue;
+
+			/* a neighbour might be spe, mic-tm, or bif0 */
+			name = of_get_property(vic_dn, "name", NULL);
+			if (!name)
+				continue;
+
+			if (strcmp(name, "spe") == 0) {
+				spu = devnode_spu(cbe, vic_dn);
+				avoid_ph = last_spu_dn->linux_phandle;
+			} else {
+				/*
+				 * "mic-tm" and "bif0" nodes do not have
+				 * vicinity property. So we need to find the
+				 * spe which has vic_dn as neighbour, but
+				 * skipping the one we came from (last_spu_dn)
+				 */
+				spu = neighbour_spu(cbe, vic_dn, last_spu_dn);
+				if (!spu)
+					continue;
+				if (!strcmp(name, "mic-tm")) {
+					last_spu->has_mem_affinity = 1;
+					spu->has_mem_affinity = 1;
+				}
+				avoid_ph = vic_dn->linux_phandle;
+			}
+
+			list_add_tail(&spu->aff_list, &last_spu->aff_list);
+			last_spu = spu;
+			break;
+		}
+	}
+}
+
+static void init_affinity_fw(void)
+{
+	int cbe;
+
+	for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
+		init_affinity_node(cbe);
+}
+
+static int __init init_affinity(void)
+{
+	if (of_has_vicinity()) {
+		init_affinity_fw();
+	} else {
+		long root = of_get_flat_dt_root();
+		if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+			init_affinity_qs20_harcoded();
+		else
+			printk("No affinity configuration found\n");
+	}
+
+	return 0;
+}
+
+const struct spu_management_ops spu_management_of_ops = {
+	.enumerate_spus = of_enumerate_spus,
+	.create_spu = of_create_spu,
+	.destroy_spu = of_destroy_spu,
+	.enable_spu = enable_spu_by_master_run,
+	.disable_spu = disable_spu_by_master_run,
+	.init_affinity = init_affinity,
+};
diff --git a/arch/powerpc/platforms/cell/spu_notify.c b/arch/powerpc/platforms/cell/spu_notify.c
new file mode 100644
index 0000000..34d1569
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_notify.c
@@ -0,0 +1,67 @@
+/*
+ * Move OProfile dependencies from spufs module to the kernel so it
+ * can run on non-cell PPC.
+ *
+ * Copyright (C) IBM 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/module.h>
+#include <asm/spu.h>
+#include "spufs/spufs.h"
+
+static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
+
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
+{
+	blocking_notifier_call_chain(&spu_switch_notifier,
+				     ctx ? ctx->object_id : 0, spu);
+}
+EXPORT_SYMBOL_GPL(spu_switch_notify);
+
+int spu_switch_event_register(struct notifier_block *n)
+{
+	int ret;
+	ret = blocking_notifier_chain_register(&spu_switch_notifier, n);
+	if (!ret)
+		notify_spus_active();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_register);
+
+int spu_switch_event_unregister(struct notifier_block *n)
+{
+	return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
+
+void spu_set_profile_private_kref(struct spu_context *ctx,
+				  struct kref *prof_info_kref,
+				  void (* prof_info_release) (struct kref *kref))
+{
+	ctx->prof_priv_kref = prof_info_kref;
+	ctx->prof_priv_release = prof_info_release;
+}
+EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
+
+void *spu_get_profile_private_kref(struct spu_context *ctx)
+{
+	return ctx->prof_priv_kref;
+}
+EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
+
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
new file mode 100644
index 0000000..906a0a2
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -0,0 +1,182 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ *  (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "interrupt.h"
+#include "spu_priv1_mmio.h"
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+	out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+	out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask);
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+	out_be64(&spu->priv1->int_mask_RW[class], mask);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+	return in_be64(&spu->priv1->int_mask_RW[class]);
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+	out_be64(&spu->priv1->int_stat_RW[class], stat);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+	return in_be64(&spu->priv1->int_stat_RW[class]);
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+	u64 target;
+	u64 route;
+
+	if (nr_cpus_node(spu->node)) {
+		cpumask_t spumask = node_to_cpumask(spu->node);
+		cpumask_t cpumask = node_to_cpumask(cpu_to_node(cpu));
+
+		if (!cpus_intersects(spumask, cpumask))
+			return;
+	}
+
+	target = iic_get_target_id(cpu);
+	route = target << 48 | target << 32 | target << 16;
+	out_be64(&spu->priv1->int_route_RW, route);
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_dar_RW);
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_dsisr_RW);
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+	out_be64(&spu->priv1->mfc_dsisr_RW, dsisr);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+	out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+	out_be64(&spu->priv1->mfc_sr1_RW, sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_sr1_RW);
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+	out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_tclass_id_RW);
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+	out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul);
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+	out_be64(&spu->priv1->resource_allocation_groupID_RW, id);
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->resource_allocation_groupID_RW);
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+	out_be64(&spu->priv1->resource_allocation_enable_RW, enable);
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->resource_allocation_enable_RW);
+}
+
+const struct spu_priv1_ops spu_priv1_mmio_ops =
+{
+	.int_mask_and = int_mask_and,
+	.int_mask_or = int_mask_or,
+	.int_mask_set = int_mask_set,
+	.int_mask_get = int_mask_get,
+	.int_stat_clear = int_stat_clear,
+	.int_stat_get = int_stat_get,
+	.cpu_affinity_set = cpu_affinity_set,
+	.mfc_dar_get = mfc_dar_get,
+	.mfc_dsisr_get = mfc_dsisr_get,
+	.mfc_dsisr_set = mfc_dsisr_set,
+	.mfc_sdr_setup = mfc_sdr_setup,
+	.mfc_sr1_set = mfc_sr1_set,
+	.mfc_sr1_get = mfc_sr1_get,
+	.mfc_tclass_id_set = mfc_tclass_id_set,
+	.mfc_tclass_id_get = mfc_tclass_id_get,
+	.tlb_invalidate = tlb_invalidate,
+	.resource_allocation_groupID_set = resource_allocation_groupID_set,
+	.resource_allocation_groupID_get = resource_allocation_groupID_get,
+	.resource_allocation_enable_set = resource_allocation_enable_set,
+	.resource_allocation_enable_get = resource_allocation_enable_get,
+};
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.h b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
new file mode 100644
index 0000000..7b62bd1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
@@ -0,0 +1,26 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef SPU_PRIV1_MMIO_H
+#define SPU_PRIV1_MMIO_H
+
+struct device_node *spu_devnode(struct spu *spu);
+
+#endif /* SPU_PRIV1_MMIO_H */
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
new file mode 100644
index 0000000..75530d9
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -0,0 +1,178 @@
+/*
+ * SPU file system -- system call stubs
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * (C) Copyright 2006-2007, IBM Corporation
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/rcupdate.h>
+
+#include <asm/spu.h>
+
+/* protected by rcu */
+static struct spufs_calls *spufs_calls;
+
+#ifdef CONFIG_SPU_FS_MODULE
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+	struct spufs_calls *calls = NULL;
+
+	rcu_read_lock();
+	calls = rcu_dereference(spufs_calls);
+	if (calls && !try_module_get(calls->owner))
+		calls = NULL;
+	rcu_read_unlock();
+
+	return calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls)
+{
+	BUG_ON(calls != spufs_calls);
+
+	/* we don't need to rcu this, as we hold a reference to the module */
+	module_put(spufs_calls->owner);
+}
+
+#else /* !defined CONFIG_SPU_FS_MODULE */
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+	return spufs_calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls) { }
+
+#endif /* CONFIG_SPU_FS_MODULE */
+
+asmlinkage long sys_spu_create(const char __user *name,
+		unsigned int flags, mode_t mode, int neighbor_fd)
+{
+	long ret;
+	struct file *neighbor;
+	int fput_needed;
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return -ENOSYS;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		ret = -EBADF;
+		neighbor = fget_light(neighbor_fd, &fput_needed);
+		if (neighbor) {
+			ret = calls->create_thread(name, flags, mode, neighbor);
+			fput_light(neighbor, fput_needed);
+		}
+	} else
+		ret = calls->create_thread(name, flags, mode, NULL);
+
+	spufs_calls_put(calls);
+	return ret;
+}
+
+asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
+{
+	long ret;
+	struct file *filp;
+	int fput_needed;
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return -ENOSYS;
+
+	ret = -EBADF;
+	filp = fget_light(fd, &fput_needed);
+	if (filp) {
+		ret = calls->spu_run(filp, unpc, ustatus);
+		fput_light(filp, fput_needed);
+	}
+
+	spufs_calls_put(calls);
+	return ret;
+}
+
+int elf_coredump_extra_notes_size(void)
+{
+	struct spufs_calls *calls;
+	int ret;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return 0;
+
+	ret = calls->coredump_extra_notes_size();
+
+	spufs_calls_put(calls);
+
+	return ret;
+}
+
+int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+{
+	struct spufs_calls *calls;
+	int ret;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return 0;
+
+	ret = calls->coredump_extra_notes_write(file, foffset);
+
+	spufs_calls_put(calls);
+
+	return ret;
+}
+
+void notify_spus_active(void)
+{
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return;
+
+	calls->notify_spus_active();
+	spufs_calls_put(calls);
+
+	return;
+}
+
+int register_spu_syscalls(struct spufs_calls *calls)
+{
+	if (spufs_calls)
+		return -EBUSY;
+
+	rcu_assign_pointer(spufs_calls, calls);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_spu_syscalls);
+
+void unregister_spu_syscalls(struct spufs_calls *calls)
+{
+	BUG_ON(spufs_calls->owner != calls->owner);
+	rcu_assign_pointer(spufs_calls, NULL);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(unregister_spu_syscalls);
diff --git a/arch/powerpc/platforms/cell/spufs/.gitignore b/arch/powerpc/platforms/cell/spufs/.gitignore
new file mode 100644
index 0000000..a09ee8d
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/.gitignore
@@ -0,0 +1,2 @@
+spu_save_dump.h
+spu_restore_dump.h
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
new file mode 100644
index 0000000..99610a6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -0,0 +1,62 @@
+
+obj-$(CONFIG_SPU_FS) += spufs.o
+spufs-y += inode.o file.o context.o syscalls.o coredump.o
+spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
+spufs-y += switch.o fault.o lscsa_alloc.o
+
+obj-$(CONFIG_SPU_TRACE)	+= sputrace.o
+
+# Rules to build switch.o with the help of SPU tool chain
+SPU_CROSS	:= spu-
+SPU_CC		:= $(SPU_CROSS)gcc
+SPU_AS		:= $(SPU_CROSS)gcc
+SPU_LD		:= $(SPU_CROSS)ld
+SPU_OBJCOPY	:= $(SPU_CROSS)objcopy
+SPU_CFLAGS	:= -O2 -Wall -I$(srctree)/include \
+		   -I$(objtree)/include2 -D__KERNEL__
+SPU_AFLAGS	:= -c -D__ASSEMBLY__ -I$(srctree)/include \
+		   -I$(objtree)/include2 -D__KERNEL__
+SPU_LDFLAGS	:= -N -Ttext=0x0
+
+$(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h
+clean-files := spu_save_dump.h spu_restore_dump.h
+
+# Compile SPU files
+      cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $<
+quiet_cmd_spu_cc = SPU_CC  $@
+$(obj)/spu_%.o: $(src)/spu_%.c
+	$(call if_changed,spu_cc)
+
+# Assemble SPU files
+      cmd_spu_as = $(SPU_AS) $(SPU_AFLAGS) -o $@ $<
+quiet_cmd_spu_as = SPU_AS  $@
+$(obj)/spu_%.o: $(src)/spu_%.S
+	$(call if_changed,spu_as)
+
+# Link SPU Executables
+      cmd_spu_ld = $(SPU_LD) $(SPU_LDFLAGS) -o $@ $^
+quiet_cmd_spu_ld = SPU_LD  $@
+$(obj)/spu_%: $(obj)/spu_%_crt0.o $(obj)/spu_%.o
+	$(call if_changed,spu_ld)
+
+# Copy into binary format
+      cmd_spu_objcopy = $(SPU_OBJCOPY) -O binary $< $@
+quiet_cmd_spu_objcopy = OBJCOPY $@
+$(obj)/spu_%.bin: $(src)/spu_%
+	$(call if_changed,spu_objcopy)
+
+# create C code from ELF executable
+cmd_hexdump   = ( \
+		echo "/*" ; \
+		echo " * $*_dump.h: Copyright (C) 2005 IBM." ; \
+		echo " * Hex-dump auto generated from $*.c." ; \
+		echo " * Do not edit!" ; \
+		echo " */" ; \
+		echo "static unsigned int $*_code[] " \
+			"__attribute__((__aligned__(128))) = {" ; \
+		hexdump -v -e '"0x" 4/1 "%02x" "," "\n"' $< ; \
+		echo "};" ; \
+		) > $@
+quiet_cmd_hexdump = HEXDUMP $@
+$(obj)/%_dump.h: $(obj)/%.bin
+	$(call if_changed,hexdump)
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
new file mode 100644
index 0000000..64eb15b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -0,0 +1,414 @@
+/* backing_ops.c - query/set operations on saved SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * These register operations allow SPUFS to operate on saved
+ * SPU contexts rather than hardware.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/poll.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+/*
+ * Reads/writes to various problem and priv2 registers require
+ * state changes, i.e.  generate SPU events, modify channel
+ * counts, etc.
+ */
+
+static void gen_spu_event(struct spu_context *ctx, u32 event)
+{
+	u64 ch0_cnt;
+	u64 ch0_data;
+	u64 ch1_data;
+
+	ch0_cnt = ctx->csa.spu_chnlcnt_RW[0];
+	ch0_data = ctx->csa.spu_chnldata_RW[0];
+	ch1_data = ctx->csa.spu_chnldata_RW[1];
+	ctx->csa.spu_chnldata_RW[0] |= event;
+	if ((ch0_cnt == 0) && !(ch0_data & event) && (ch1_data & event)) {
+		ctx->csa.spu_chnlcnt_RW[0] = 1;
+	}
+}
+
+static int spu_backing_mbox_read(struct spu_context *ctx, u32 * data)
+{
+	u32 mbox_stat;
+	int ret = 0;
+
+	spin_lock(&ctx->csa.register_lock);
+	mbox_stat = ctx->csa.prob.mb_stat_R;
+	if (mbox_stat & 0x0000ff) {
+		/* Read the first available word.
+		 * Implementation note: the depth
+		 * of pu_mb_R is currently 1.
+		 */
+		*data = ctx->csa.prob.pu_mb_R;
+		ctx->csa.prob.mb_stat_R &= ~(0x0000ff);
+		ctx->csa.spu_chnlcnt_RW[28] = 1;
+		gen_spu_event(ctx, MFC_PU_MAILBOX_AVAILABLE_EVENT);
+		ret = 4;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static u32 spu_backing_mbox_stat_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.mb_stat_R;
+}
+
+static unsigned int spu_backing_mbox_stat_poll(struct spu_context *ctx,
+					  unsigned int events)
+{
+	int ret;
+	u32 stat;
+
+	ret = 0;
+	spin_lock_irq(&ctx->csa.register_lock);
+	stat = ctx->csa.prob.mb_stat_R;
+
+	/* if the requested event is there, return the poll
+	   mask, otherwise enable the interrupt to get notified,
+	   but first mark any pending interrupts as done so
+	   we don't get woken up unnecessarily */
+
+	if (events & (POLLIN | POLLRDNORM)) {
+		if (stat & 0xff0000)
+			ret |= POLLIN | POLLRDNORM;
+		else {
+			ctx->csa.priv1.int_stat_class2_RW &=
+				~CLASS2_MAILBOX_INTR;
+			ctx->csa.priv1.int_mask_class2_RW |=
+				CLASS2_ENABLE_MAILBOX_INTR;
+		}
+	}
+	if (events & (POLLOUT | POLLWRNORM)) {
+		if (stat & 0x00ff00)
+			ret = POLLOUT | POLLWRNORM;
+		else {
+			ctx->csa.priv1.int_stat_class2_RW &=
+				~CLASS2_MAILBOX_THRESHOLD_INTR;
+			ctx->csa.priv1.int_mask_class2_RW |=
+				CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+		}
+	}
+	spin_unlock_irq(&ctx->csa.register_lock);
+	return ret;
+}
+
+static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.prob.mb_stat_R & 0xff0000) {
+		/* Read the first available word.
+		 * Implementation note: the depth
+		 * of puint_mb_R is currently 1.
+		 */
+		*data = ctx->csa.priv2.puint_mb_R;
+		ctx->csa.prob.mb_stat_R &= ~(0xff0000);
+		ctx->csa.spu_chnlcnt_RW[30] = 1;
+		gen_spu_event(ctx, MFC_PU_INT_MAILBOX_AVAILABLE_EVENT);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt */
+		ctx->csa.priv1.int_mask_class2_RW |= CLASS2_ENABLE_MAILBOX_INTR;
+		ret = 0;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static int spu_backing_wbox_write(struct spu_context *ctx, u32 data)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	if ((ctx->csa.prob.mb_stat_R) & 0x00ff00) {
+		int slot = ctx->csa.spu_chnlcnt_RW[29];
+		int avail = (ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8;
+
+		/* We have space to write wbox_data.
+		 * Implementation note: the depth
+		 * of spu_mb_W is currently 4.
+		 */
+		BUG_ON(avail != (4 - slot));
+		ctx->csa.spu_mailbox_data[slot] = data;
+		ctx->csa.spu_chnlcnt_RW[29] = ++slot;
+		ctx->csa.prob.mb_stat_R &= ~(0x00ff00);
+		ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8);
+		gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt when space
+		   becomes available */
+		ctx->csa.priv1.int_mask_class2_RW |=
+			CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+		ret = 0;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static u32 spu_backing_signal1_read(struct spu_context *ctx)
+{
+	return ctx->csa.spu_chnldata_RW[3];
+}
+
+static void spu_backing_signal1_write(struct spu_context *ctx, u32 data)
+{
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.priv2.spu_cfg_RW & 0x1)
+		ctx->csa.spu_chnldata_RW[3] |= data;
+	else
+		ctx->csa.spu_chnldata_RW[3] = data;
+	ctx->csa.spu_chnlcnt_RW[3] = 1;
+	gen_spu_event(ctx, MFC_SIGNAL_1_EVENT);
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u32 spu_backing_signal2_read(struct spu_context *ctx)
+{
+	return ctx->csa.spu_chnldata_RW[4];
+}
+
+static void spu_backing_signal2_write(struct spu_context *ctx, u32 data)
+{
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.priv2.spu_cfg_RW & 0x2)
+		ctx->csa.spu_chnldata_RW[4] |= data;
+	else
+		ctx->csa.spu_chnldata_RW[4] = data;
+	ctx->csa.spu_chnlcnt_RW[4] = 1;
+	gen_spu_event(ctx, MFC_SIGNAL_2_EVENT);
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+	u64 tmp;
+
+	spin_lock(&ctx->csa.register_lock);
+	tmp = ctx->csa.priv2.spu_cfg_RW;
+	if (val)
+		tmp |= 1;
+	else
+		tmp &= ~1;
+	ctx->csa.priv2.spu_cfg_RW = tmp;
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal1_type_get(struct spu_context *ctx)
+{
+	return ((ctx->csa.priv2.spu_cfg_RW & 1) != 0);
+}
+
+static void spu_backing_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+	u64 tmp;
+
+	spin_lock(&ctx->csa.register_lock);
+	tmp = ctx->csa.priv2.spu_cfg_RW;
+	if (val)
+		tmp |= 2;
+	else
+		tmp &= ~2;
+	ctx->csa.priv2.spu_cfg_RW = tmp;
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal2_type_get(struct spu_context *ctx)
+{
+	return ((ctx->csa.priv2.spu_cfg_RW & 2) != 0);
+}
+
+static u32 spu_backing_npc_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_npc_RW;
+}
+
+static void spu_backing_npc_write(struct spu_context *ctx, u32 val)
+{
+	ctx->csa.prob.spu_npc_RW = val;
+}
+
+static u32 spu_backing_status_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_status_R;
+}
+
+static char *spu_backing_get_ls(struct spu_context *ctx)
+{
+	return ctx->csa.lscsa->ls;
+}
+
+static void spu_backing_privcntl_write(struct spu_context *ctx, u64 val)
+{
+	ctx->csa.priv2.spu_privcntl_RW = val;
+}
+
+static u32 spu_backing_runcntl_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_runcntl_RW;
+}
+
+static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
+{
+	spin_lock(&ctx->csa.register_lock);
+	ctx->csa.prob.spu_runcntl_RW = val;
+	if (val & SPU_RUNCNTL_RUNNABLE) {
+		ctx->csa.prob.spu_status_R &=
+			~SPU_STATUS_STOPPED_BY_STOP &
+			~SPU_STATUS_STOPPED_BY_HALT &
+			~SPU_STATUS_SINGLE_STEP &
+			~SPU_STATUS_INVALID_INSTR &
+			~SPU_STATUS_INVALID_CH;
+		ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING;
+	} else {
+		ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_runcntl_stop(struct spu_context *ctx)
+{
+	spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP);
+}
+
+static void spu_backing_master_start(struct spu_context *ctx)
+{
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
+}
+
+static void spu_backing_master_stop(struct spu_context *ctx)
+{
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
+}
+
+static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask,
+					u32 mode)
+{
+	struct spu_problem_collapsed *prob = &ctx->csa.prob;
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	if (prob->dma_querytype_RW)
+		goto out;
+	ret = 0;
+	/* FIXME: what are the side-effects of this? */
+	prob->dma_querymask_RW = mask;
+	prob->dma_querytype_RW = mode;
+	/* In the current implementation, the SPU context is always
+	 * acquired in runnable state when new bits are added to the
+	 * mask (tagwait), so it's sufficient just to mask
+	 * dma_tagstatus_R with the 'mask' parameter here.
+	 */
+	ctx->csa.prob.dma_tagstatus_R &= mask;
+out:
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
+static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return ctx->csa.prob.dma_tagstatus_R;
+}
+
+static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return ctx->csa.prob.dma_qstatus_R;
+}
+
+static int spu_backing_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	/* FIXME: set up priv2->puq */
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+	ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+}
+
+struct spu_context_ops spu_backing_ops = {
+	.mbox_read = spu_backing_mbox_read,
+	.mbox_stat_read = spu_backing_mbox_stat_read,
+	.mbox_stat_poll = spu_backing_mbox_stat_poll,
+	.ibox_read = spu_backing_ibox_read,
+	.wbox_write = spu_backing_wbox_write,
+	.signal1_read = spu_backing_signal1_read,
+	.signal1_write = spu_backing_signal1_write,
+	.signal2_read = spu_backing_signal2_read,
+	.signal2_write = spu_backing_signal2_write,
+	.signal1_type_set = spu_backing_signal1_type_set,
+	.signal1_type_get = spu_backing_signal1_type_get,
+	.signal2_type_set = spu_backing_signal2_type_set,
+	.signal2_type_get = spu_backing_signal2_type_get,
+	.npc_read = spu_backing_npc_read,
+	.npc_write = spu_backing_npc_write,
+	.status_read = spu_backing_status_read,
+	.get_ls = spu_backing_get_ls,
+	.privcntl_write = spu_backing_privcntl_write,
+	.runcntl_read = spu_backing_runcntl_read,
+	.runcntl_write = spu_backing_runcntl_write,
+	.runcntl_stop = spu_backing_runcntl_stop,
+	.master_start = spu_backing_master_start,
+	.master_stop = spu_backing_master_stop,
+	.set_mfc_query = spu_backing_set_mfc_query,
+	.read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_backing_get_mfc_free_elements,
+	.send_mfc_command = spu_backing_send_mfc_command,
+	.restart_dma = spu_backing_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
new file mode 100644
index 0000000..6653ddb
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -0,0 +1,183 @@
+/*
+ * SPU file system -- SPU context management
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/atomic.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include "spufs.h"
+
+
+atomic_t nr_spu_contexts = ATOMIC_INIT(0);
+
+struct spu_context *alloc_spu_context(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+	if (!ctx)
+		goto out;
+	/* Binding to physical processor deferred
+	 * until spu_activate().
+	 */
+	if (spu_init_csa(&ctx->csa))
+		goto out_free;
+	spin_lock_init(&ctx->mmio_lock);
+	mutex_init(&ctx->mapping_lock);
+	kref_init(&ctx->kref);
+	mutex_init(&ctx->state_mutex);
+	mutex_init(&ctx->run_mutex);
+	init_waitqueue_head(&ctx->ibox_wq);
+	init_waitqueue_head(&ctx->wbox_wq);
+	init_waitqueue_head(&ctx->stop_wq);
+	init_waitqueue_head(&ctx->mfc_wq);
+	init_waitqueue_head(&ctx->run_wq);
+	ctx->state = SPU_STATE_SAVED;
+	ctx->ops = &spu_backing_ops;
+	ctx->owner = get_task_mm(current);
+	INIT_LIST_HEAD(&ctx->rq);
+	INIT_LIST_HEAD(&ctx->aff_list);
+	if (gang)
+		spu_gang_add_ctx(gang, ctx);
+
+	__spu_update_sched_info(ctx);
+	spu_set_timeslice(ctx);
+	ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
+
+	atomic_inc(&nr_spu_contexts);
+	goto out;
+out_free:
+	kfree(ctx);
+	ctx = NULL;
+out:
+	return ctx;
+}
+
+void destroy_spu_context(struct kref *kref)
+{
+	struct spu_context *ctx;
+	ctx = container_of(kref, struct spu_context, kref);
+	spu_context_nospu_trace(destroy_spu_context__enter, ctx);
+	mutex_lock(&ctx->state_mutex);
+	spu_deactivate(ctx);
+	mutex_unlock(&ctx->state_mutex);
+	spu_fini_csa(&ctx->csa);
+	if (ctx->gang)
+		spu_gang_remove_ctx(ctx->gang, ctx);
+	if (ctx->prof_priv_kref)
+		kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
+	BUG_ON(!list_empty(&ctx->rq));
+	atomic_dec(&nr_spu_contexts);
+	kfree(ctx->switch_log);
+	kfree(ctx);
+}
+
+struct spu_context * get_spu_context(struct spu_context *ctx)
+{
+	kref_get(&ctx->kref);
+	return ctx;
+}
+
+int put_spu_context(struct spu_context *ctx)
+{
+	return kref_put(&ctx->kref, &destroy_spu_context);
+}
+
+/* give up the mm reference when the context is about to be destroyed */
+void spu_forget(struct spu_context *ctx)
+{
+	struct mm_struct *mm;
+
+	/*
+	 * This is basically an open-coded spu_acquire_saved, except that
+	 * we don't acquire the state mutex interruptible, and we don't
+	 * want this context to be rescheduled on release.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->state != SPU_STATE_SAVED)
+		spu_deactivate(ctx);
+
+	mm = ctx->owner;
+	ctx->owner = NULL;
+	mmput(mm);
+	spu_release(ctx);
+}
+
+void spu_unmap_mappings(struct spu_context *ctx)
+{
+	mutex_lock(&ctx->mapping_lock);
+	if (ctx->local_store)
+		unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
+	if (ctx->mfc)
+		unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1);
+	if (ctx->cntl)
+		unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1);
+	if (ctx->signal1)
+		unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+	if (ctx->signal2)
+		unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+	if (ctx->mss)
+		unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1);
+	if (ctx->psmap)
+		unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1);
+	mutex_unlock(&ctx->mapping_lock);
+}
+
+/**
+ * spu_acquire_saved - lock spu contex and make sure it is in saved state
+ * @ctx:	spu contex to lock
+ */
+int spu_acquire_saved(struct spu_context *ctx)
+{
+	int ret;
+
+	spu_context_nospu_trace(spu_acquire_saved__enter, ctx);
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	if (ctx->state != SPU_STATE_SAVED) {
+		set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
+		spu_deactivate(ctx);
+	}
+
+	return 0;
+}
+
+/**
+ * spu_release_saved - unlock spu context and return it to the runqueue
+ * @ctx:	context to unlock
+ */
+void spu_release_saved(struct spu_context *ctx)
+{
+	BUG_ON(ctx->state != SPU_STATE_SAVED);
+
+	if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags) &&
+			test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+		spu_activate(ctx, 0);
+
+	spu_release(ctx);
+}
+
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
new file mode 100644
index 0000000..af116aa
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -0,0 +1,250 @@
+/*
+ * SPU core dump code
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/elf.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
+				size_t size, loff_t *off)
+{
+	u64 data;
+	int ret;
+
+	if (spufs_coredump_read[num].read)
+		return spufs_coredump_read[num].read(ctx, buffer, size, off);
+
+	data = spufs_coredump_read[num].get(ctx);
+	ret = snprintf(buffer, size, "0x%.16lx", data);
+	if (ret >= size)
+		return size;
+	return ++ret; /* count trailing NULL */
+}
+
+/*
+ * These are the only things you should do on a core-file: use only these
+ * functions to write out all the necessary info.
+ */
+static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset)
+{
+	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
+	ssize_t written;
+
+	if (*foffset + nr > limit)
+		return -EIO;
+
+	written = file->f_op->write(file, addr, nr, &file->f_pos);
+	*foffset += written;
+
+	if (written != nr)
+		return -EIO;
+
+	return 0;
+}
+
+static int spufs_dump_align(struct file *file, char *buf, loff_t new_off,
+			    loff_t *foffset)
+{
+	int rc, size;
+
+	size = min((loff_t)PAGE_SIZE, new_off - *foffset);
+	memset(buf, 0, size);
+
+	rc = 0;
+	while (rc == 0 && new_off > *foffset) {
+		size = min((loff_t)PAGE_SIZE, new_off - *foffset);
+		rc = spufs_dump_write(file, buf, size, foffset);
+	}
+
+	return rc;
+}
+
+static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
+{
+	int i, sz, total = 0;
+	char *name;
+	char fullname[80];
+
+	for (i = 0; spufs_coredump_read[i].name != NULL; i++) {
+		name = spufs_coredump_read[i].name;
+		sz = spufs_coredump_read[i].size;
+
+		sprintf(fullname, "SPU/%d/%s", dfd, name);
+
+		total += sizeof(struct elf_note);
+		total += roundup(strlen(fullname) + 1, 4);
+		total += roundup(sz, 4);
+	}
+
+	return total;
+}
+
+/*
+ * The additional architecture-specific notes for Cell are various
+ * context files in the spu context.
+ *
+ * This function iterates over all open file descriptors and sees
+ * if they are a directory in spufs.  In that case we use spufs
+ * internal functionality to dump them without needing to actually
+ * open the files.
+ */
+static struct spu_context *coredump_next_context(int *fd)
+{
+	struct fdtable *fdt = files_fdtable(current->files);
+	struct file *file;
+	struct spu_context *ctx = NULL;
+
+	for (; *fd < fdt->max_fds; (*fd)++) {
+		if (!FD_ISSET(*fd, fdt->open_fds))
+			continue;
+
+		file = fcheck(*fd);
+
+		if (!file || file->f_op != &spufs_context_fops)
+			continue;
+
+		ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx;
+		if (ctx->flags & SPU_CREATE_NOSCHED)
+			continue;
+
+		break;
+	}
+
+	return ctx;
+}
+
+int spufs_coredump_extra_notes_size(void)
+{
+	struct spu_context *ctx;
+	int size = 0, rc, fd;
+
+	fd = 0;
+	while ((ctx = coredump_next_context(&fd)) != NULL) {
+		rc = spu_acquire_saved(ctx);
+		if (rc)
+			break;
+		rc = spufs_ctx_note_size(ctx, fd);
+		spu_release_saved(ctx);
+		if (rc < 0)
+			break;
+
+		size += rc;
+
+		/* start searching the next fd next time */
+		fd++;
+	}
+
+	return size;
+}
+
+static int spufs_arch_write_note(struct spu_context *ctx, int i,
+				  struct file *file, int dfd, loff_t *foffset)
+{
+	loff_t pos = 0;
+	int sz, rc, nread, total = 0;
+	const int bufsz = PAGE_SIZE;
+	char *name;
+	char fullname[80], *buf;
+	struct elf_note en;
+
+	buf = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	name = spufs_coredump_read[i].name;
+	sz = spufs_coredump_read[i].size;
+
+	sprintf(fullname, "SPU/%d/%s", dfd, name);
+	en.n_namesz = strlen(fullname) + 1;
+	en.n_descsz = sz;
+	en.n_type = NT_SPU;
+
+	rc = spufs_dump_write(file, &en, sizeof(en), foffset);
+	if (rc)
+		goto out;
+
+	rc = spufs_dump_write(file, fullname, en.n_namesz, foffset);
+	if (rc)
+		goto out;
+
+	rc = spufs_dump_align(file, buf, roundup(*foffset, 4), foffset);
+	if (rc)
+		goto out;
+
+	do {
+		nread = do_coredump_read(i, ctx, buf, bufsz, &pos);
+		if (nread > 0) {
+			rc = spufs_dump_write(file, buf, nread, foffset);
+			if (rc)
+				goto out;
+			total += nread;
+		}
+	} while (nread == bufsz && total < sz);
+
+	if (nread < 0) {
+		rc = nread;
+		goto out;
+	}
+
+	rc = spufs_dump_align(file, buf, roundup(*foffset - total + sz, 4),
+			      foffset);
+
+out:
+	free_page((unsigned long)buf);
+	return rc;
+}
+
+int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+{
+	struct spu_context *ctx;
+	int fd, j, rc;
+
+	fd = 0;
+	while ((ctx = coredump_next_context(&fd)) != NULL) {
+		rc = spu_acquire_saved(ctx);
+		if (rc)
+			return rc;
+
+		for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
+			rc = spufs_arch_write_note(ctx, j, file, fd, foffset);
+			if (rc) {
+				spu_release_saved(ctx);
+				return rc;
+			}
+		}
+
+		spu_release_saved(ctx);
+
+		/* start searching the next fd next time */
+		fd++;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644
index 0000000..f093a58
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,192 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/**
+ * Handle an SPE event, depending on context SPU_CREATE_EVENTS_ENABLED flag.
+ *
+ * If the context was created with events, we just set the return event.
+ * Otherwise, send an appropriate signal to the process.
+ */
+static void spufs_handle_event(struct spu_context *ctx,
+				unsigned long ea, int type)
+{
+	siginfo_t info;
+
+	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+		ctx->event_return |= type;
+		wake_up_all(&ctx->stop_wq);
+		return;
+	}
+
+	memset(&info, 0, sizeof(info));
+
+	switch (type) {
+	case SPE_EVENT_INVALID_DMA:
+		info.si_signo = SIGBUS;
+		info.si_code = BUS_OBJERR;
+		break;
+	case SPE_EVENT_SPE_DATA_STORAGE:
+		info.si_signo = SIGSEGV;
+		info.si_addr = (void __user *)ea;
+		info.si_code = SEGV_ACCERR;
+		ctx->ops->restart_dma(ctx);
+		break;
+	case SPE_EVENT_DMA_ALIGNMENT:
+		info.si_signo = SIGBUS;
+		/* DAR isn't set for an alignment fault :( */
+		info.si_code = BUS_ADRALN;
+		break;
+	case SPE_EVENT_SPE_ERROR:
+		info.si_signo = SIGILL;
+		info.si_addr = (void __user *)(unsigned long)
+			ctx->ops->npc_read(ctx) - 4;
+		info.si_code = ILL_ILLOPC;
+		break;
+	}
+
+	if (info.si_signo)
+		force_sig_info(info.si_signo, &info, current);
+}
+
+int spufs_handle_class0(struct spu_context *ctx)
+{
+	unsigned long stat = ctx->csa.class_0_pending & CLASS0_INTR_MASK;
+
+	if (likely(!stat))
+		return 0;
+
+	if (stat & CLASS0_DMA_ALIGNMENT_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_DMA_ALIGNMENT);
+
+	if (stat & CLASS0_INVALID_DMA_COMMAND_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_INVALID_DMA);
+
+	if (stat & CLASS0_SPU_ERROR_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_SPE_ERROR);
+
+	ctx->csa.class_0_pending = 0;
+
+	return -EIO;
+}
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+	u64 ea, dsisr, access;
+	unsigned long flags;
+	unsigned flt = 0;
+	int ret;
+
+	/*
+	 * dar and dsisr get passed from the registers
+	 * to the spu_context, to this function, but not
+	 * back to the spu if it gets scheduled again.
+	 *
+	 * if we don't handle the fault for a saved context
+	 * in time, we can still expect to get the same fault
+	 * the immediately after the context restore.
+	 */
+	ea = ctx->csa.class_1_dar;
+	dsisr = ctx->csa.class_1_dsisr;
+
+	if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+		return 0;
+
+	spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
+
+	pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
+		dsisr, ctx->state);
+
+	ctx->stats.hash_flt++;
+	if (ctx->state == SPU_STATE_RUNNABLE)
+		ctx->spu->stats.hash_flt++;
+
+	/* we must not hold the lock when entering spu_handle_mm_fault */
+	spu_release(ctx);
+
+	access = (_PAGE_PRESENT | _PAGE_USER);
+	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+	local_irq_save(flags);
+	ret = hash_page(ea, access, 0x300);
+	local_irq_restore(flags);
+
+	/* hashing failed, so try the actual fault handler */
+	if (ret)
+		ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt);
+
+	/*
+	 * This is nasty: we need the state_mutex for all the bookkeeping even
+	 * if the syscall was interrupted by a signal. ewww.
+	 */
+	mutex_lock(&ctx->state_mutex);
+
+	/*
+	 * Clear dsisr under ctxt lock after handling the fault, so that
+	 * time slicing will not preempt the context while the page fault
+	 * handler is running. Context switch code removes mappings.
+	 */
+	ctx->csa.class_1_dar = ctx->csa.class_1_dsisr = 0;
+
+	/*
+	 * If we handled the fault successfully and are in runnable
+	 * state, restart the DMA.
+	 * In case of unhandled error report the problem to user space.
+	 */
+	if (!ret) {
+		if (flt & VM_FAULT_MAJOR)
+			ctx->stats.maj_flt++;
+		else
+			ctx->stats.min_flt++;
+		if (ctx->state == SPU_STATE_RUNNABLE) {
+			if (flt & VM_FAULT_MAJOR)
+				ctx->spu->stats.maj_flt++;
+			else
+				ctx->spu->stats.min_flt++;
+		}
+
+		if (ctx->spu)
+			ctx->ops->restart_dma(ctx);
+	} else
+		spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
new file mode 100644
index 0000000..1b26071
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -0,0 +1,2770 @@
+/*
+ * SPU file system -- file contents
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/ptrace.h>
+#include <linux/seq_file.h>
+#include <linux/marker.h>
+
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/spu.h>
+#include <asm/spu_info.h>
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+#define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
+
+/* Simple attribute files */
+struct spufs_attr {
+	int (*get)(void *, u64 *);
+	int (*set)(void *, u64);
+	char get_buf[24];       /* enough to store a u64 and "\n\0" */
+	char set_buf[24];
+	void *data;
+	const char *fmt;        /* format for read operation */
+	struct mutex mutex;     /* protects access to these buffers */
+};
+
+static int spufs_attr_open(struct inode *inode, struct file *file,
+		int (*get)(void *, u64 *), int (*set)(void *, u64),
+		const char *fmt)
+{
+	struct spufs_attr *attr;
+
+	attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return -ENOMEM;
+
+	attr->get = get;
+	attr->set = set;
+	attr->data = inode->i_private;
+	attr->fmt = fmt;
+	mutex_init(&attr->mutex);
+	file->private_data = attr;
+
+	return nonseekable_open(inode, file);
+}
+
+static int spufs_attr_release(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+	return 0;
+}
+
+static ssize_t spufs_attr_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct spufs_attr *attr;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+	if (!attr->get)
+		return -EACCES;
+
+	ret = mutex_lock_interruptible(&attr->mutex);
+	if (ret)
+		return ret;
+
+	if (*ppos) {		/* continued read */
+		size = strlen(attr->get_buf);
+	} else {		/* first read */
+		u64 val;
+		ret = attr->get(attr->data, &val);
+		if (ret)
+			goto out;
+
+		size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
+				 attr->fmt, (unsigned long long)val);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
+out:
+	mutex_unlock(&attr->mutex);
+	return ret;
+}
+
+static ssize_t spufs_attr_write(struct file *file, const char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct spufs_attr *attr;
+	u64 val;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+	if (!attr->set)
+		return -EACCES;
+
+	ret = mutex_lock_interruptible(&attr->mutex);
+	if (ret)
+		return ret;
+
+	ret = -EFAULT;
+	size = min(sizeof(attr->set_buf) - 1, len);
+	if (copy_from_user(attr->set_buf, buf, size))
+		goto out;
+
+	ret = len; /* claim we got the whole input */
+	attr->set_buf[size] = '\0';
+	val = simple_strtol(attr->set_buf, NULL, 0);
+	attr->set(attr->data, val);
+out:
+	mutex_unlock(&attr->mutex);
+	return ret;
+}
+
+#define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)	\
+static int __fops ## _open(struct inode *inode, struct file *file)	\
+{									\
+	__simple_attr_check_format(__fmt, 0ull);			\
+	return spufs_attr_open(inode, file, __get, __set, __fmt);	\
+}									\
+static struct file_operations __fops = {				\
+	.owner	 = THIS_MODULE,						\
+	.open	 = __fops ## _open,					\
+	.release = spufs_attr_release,					\
+	.read	 = spufs_attr_read,					\
+	.write	 = spufs_attr_write,					\
+};
+
+
+static int
+spufs_mem_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->local_store = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static int
+spufs_mem_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->local_store = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t
+__spufs_mem_read(struct spu_context *ctx, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	char *local_store = ctx->ops->get_ls(ctx);
+	return simple_read_from_buffer(buffer, size, pos, local_store,
+					LS_SIZE);
+}
+
+static ssize_t
+spufs_mem_read(struct file *file, char __user *buffer,
+				size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_mem_read(ctx, buffer, size, pos);
+	spu_release(ctx);
+
+	return ret;
+}
+
+static ssize_t
+spufs_mem_write(struct file *file, const char __user *buffer,
+					size_t size, loff_t *ppos)
+{
+	struct spu_context *ctx = file->private_data;
+	char *local_store;
+	loff_t pos = *ppos;
+	int ret;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos > LS_SIZE)
+		return -EFBIG;
+	if (size > LS_SIZE - pos)
+		size = LS_SIZE - pos;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	local_store = ctx->ops->get_ls(ctx);
+	ret = copy_from_user(local_store + pos, buffer, size);
+	spu_release(ctx);
+
+	if (ret)
+		return -EFAULT;
+	*ppos = pos + size;
+	return size;
+}
+
+static int
+spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct spu_context *ctx	= vma->vm_file->private_data;
+	unsigned long address = (unsigned long)vmf->virtual_address;
+	unsigned long pfn, offset;
+
+#ifdef CONFIG_SPU_FS_64K_LS
+	struct spu_state *csa = &ctx->csa;
+	int psize;
+
+	/* Check what page size we are using */
+	psize = get_slice_psize(vma->vm_mm, address);
+
+	/* Some sanity checking */
+	BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K));
+
+	/* Wow, 64K, cool, we need to align the address though */
+	if (csa->use_big_pages) {
+		BUG_ON(vma->vm_start & 0xffff);
+		address &= ~0xfffful;
+	}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+	offset = vmf->pgoff << PAGE_SHIFT;
+	if (offset >= LS_SIZE)
+		return VM_FAULT_SIGBUS;
+
+	pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n",
+			address, offset);
+
+	if (spu_acquire(ctx))
+		return VM_FAULT_NOPAGE;
+
+	if (ctx->state == SPU_STATE_SAVED) {
+		vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+							& ~_PAGE_NO_CACHE);
+		pfn = vmalloc_to_pfn(ctx->csa.lscsa->ls + offset);
+	} else {
+		vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+					     | _PAGE_NO_CACHE);
+		pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
+	}
+	vm_insert_pfn(vma, address, pfn);
+
+	spu_release(ctx);
+
+	return VM_FAULT_NOPAGE;
+}
+
+static int spufs_mem_mmap_access(struct vm_area_struct *vma,
+				unsigned long address,
+				void *buf, int len, int write)
+{
+	struct spu_context *ctx = vma->vm_file->private_data;
+	unsigned long offset = address - vma->vm_start;
+	char *local_store;
+
+	if (write && !(vma->vm_flags & VM_WRITE))
+		return -EACCES;
+	if (spu_acquire(ctx))
+		return -EINTR;
+	if ((offset + len) > vma->vm_end)
+		len = vma->vm_end - offset;
+	local_store = ctx->ops->get_ls(ctx);
+	if (write)
+		memcpy_toio(local_store + offset, buf, len);
+	else
+		memcpy_fromio(buf, local_store + offset, len);
+	spu_release(ctx);
+	return len;
+}
+
+static struct vm_operations_struct spufs_mem_mmap_vmops = {
+	.fault = spufs_mem_mmap_fault,
+	.access = spufs_mem_mmap_access,
+};
+
+static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+#ifdef CONFIG_SPU_FS_64K_LS
+	struct spu_context	*ctx = file->private_data;
+	struct spu_state	*csa = &ctx->csa;
+
+	/* Sanity check VMA alignment */
+	if (csa->use_big_pages) {
+		pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx,"
+			 " pgoff=0x%lx\n", vma->vm_start, vma->vm_end,
+			 vma->vm_pgoff);
+		if (vma->vm_start & 0xffff)
+			return -EINVAL;
+		if (vma->vm_pgoff & 0xf)
+			return -EINVAL;
+	}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE);
+
+	vma->vm_ops = &spufs_mem_mmap_vmops;
+	return 0;
+}
+
+#ifdef CONFIG_SPU_FS_64K_LS
+static unsigned long spufs_get_unmapped_area(struct file *file,
+		unsigned long addr, unsigned long len, unsigned long pgoff,
+		unsigned long flags)
+{
+	struct spu_context	*ctx = file->private_data;
+	struct spu_state	*csa = &ctx->csa;
+
+	/* If not using big pages, fallback to normal MM g_u_a */
+	if (!csa->use_big_pages)
+		return current->mm->get_unmapped_area(file, addr, len,
+						      pgoff, flags);
+
+	/* Else, try to obtain a 64K pages slice */
+	return slice_get_unmapped_area(addr, len, flags,
+				       MMU_PAGE_64K, 1, 0);
+}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+static const struct file_operations spufs_mem_fops = {
+	.open			= spufs_mem_open,
+	.release		= spufs_mem_release,
+	.read			= spufs_mem_read,
+	.write			= spufs_mem_write,
+	.llseek			= generic_file_llseek,
+	.mmap			= spufs_mem_mmap,
+#ifdef CONFIG_SPU_FS_64K_LS
+	.get_unmapped_area	= spufs_get_unmapped_area,
+#endif
+};
+
+static int spufs_ps_fault(struct vm_area_struct *vma,
+				    struct vm_fault *vmf,
+				    unsigned long ps_offs,
+				    unsigned long ps_size)
+{
+	struct spu_context *ctx = vma->vm_file->private_data;
+	unsigned long area, offset = vmf->pgoff << PAGE_SHIFT;
+	int ret = 0;
+
+	spu_context_nospu_trace(spufs_ps_fault__enter, ctx);
+
+	if (offset >= ps_size)
+		return VM_FAULT_SIGBUS;
+
+	if (fatal_signal_pending(current))
+		return VM_FAULT_SIGBUS;
+
+	/*
+	 * Because we release the mmap_sem, the context may be destroyed while
+	 * we're in spu_wait. Grab an extra reference so it isn't destroyed
+	 * in the meantime.
+	 */
+	get_spu_context(ctx);
+
+	/*
+	 * We have to wait for context to be loaded before we have
+	 * pages to hand out to the user, but we don't want to wait
+	 * with the mmap_sem held.
+	 * It is possible to drop the mmap_sem here, but then we need
+	 * to return VM_FAULT_NOPAGE because the mappings may have
+	 * hanged.
+	 */
+	if (spu_acquire(ctx))
+		goto refault;
+
+	if (ctx->state == SPU_STATE_SAVED) {
+		up_read(&current->mm->mmap_sem);
+		spu_context_nospu_trace(spufs_ps_fault__sleep, ctx);
+		ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+		spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu);
+		down_read(&current->mm->mmap_sem);
+	} else {
+		area = ctx->spu->problem_phys + ps_offs;
+		vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+					(area + offset) >> PAGE_SHIFT);
+		spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
+	}
+
+	if (!ret)
+		spu_release(ctx);
+
+refault:
+	put_spu_context(ctx);
+	return VM_FAULT_NOPAGE;
+}
+
+#if SPUFS_MMAP_4K
+static int spufs_cntl_mmap_fault(struct vm_area_struct *vma,
+					   struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
+}
+
+static struct vm_operations_struct spufs_cntl_mmap_vmops = {
+	.fault = spufs_cntl_mmap_fault,
+};
+
+/*
+ * mmap support for problem state control area [0x4000 - 0x4fff].
+ */
+static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
+
+	vma->vm_ops = &spufs_cntl_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_cntl_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_cntl_get(void *data, u64 *val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	*val = ctx->ops->status_read(ctx);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int spufs_cntl_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->runcntl_write(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int spufs_cntl_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->cntl = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return simple_attr_open(inode, file, spufs_cntl_get,
+					spufs_cntl_set, "0x%08lx");
+}
+
+static int
+spufs_cntl_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	simple_attr_release(inode, file);
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->cntl = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_cntl_fops = {
+	.open = spufs_cntl_open,
+	.release = spufs_cntl_release,
+	.read = simple_attr_read,
+	.write = simple_attr_write,
+	.mmap = spufs_cntl_mmap,
+};
+
+static int
+spufs_regs_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	file->private_data = i->i_ctx;
+	return 0;
+}
+
+static ssize_t
+__spufs_regs_read(struct spu_context *ctx, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return simple_read_from_buffer(buffer, size, pos,
+				      lscsa->gprs, sizeof lscsa->gprs);
+}
+
+static ssize_t
+spufs_regs_read(struct file *file, char __user *buffer,
+		size_t size, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	/* pre-check for file position: if we'd return EOF, there's no point
+	 * causing a deschedule */
+	if (*pos >= sizeof(ctx->csa.lscsa->gprs))
+		return 0;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_regs_read(ctx, buffer, size, pos);
+	spu_release_saved(ctx);
+	return ret;
+}
+
+static ssize_t
+spufs_regs_write(struct file *file, const char __user *buffer,
+		 size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	size = min_t(ssize_t, sizeof lscsa->gprs - *pos, size);
+	if (size <= 0)
+		return -EFBIG;
+	*pos += size;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+
+	ret = copy_from_user(lscsa->gprs + *pos - size,
+			     buffer, size) ? -EFAULT : size;
+
+	spu_release_saved(ctx);
+	return ret;
+}
+
+static const struct file_operations spufs_regs_fops = {
+	.open	 = spufs_regs_open,
+	.read    = spufs_regs_read,
+	.write   = spufs_regs_write,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t
+__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer,
+			size_t size, loff_t * pos)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return simple_read_from_buffer(buffer, size, pos,
+				      &lscsa->fpcr, sizeof(lscsa->fpcr));
+}
+
+static ssize_t
+spufs_fpcr_read(struct file *file, char __user * buffer,
+		size_t size, loff_t * pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_fpcr_read(ctx, buffer, size, pos);
+	spu_release_saved(ctx);
+	return ret;
+}
+
+static ssize_t
+spufs_fpcr_write(struct file *file, const char __user * buffer,
+		 size_t size, loff_t * pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	size = min_t(ssize_t, sizeof(lscsa->fpcr) - *pos, size);
+	if (size <= 0)
+		return -EFBIG;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+
+	*pos += size;
+	ret = copy_from_user((char *)&lscsa->fpcr + *pos - size,
+			     buffer, size) ? -EFAULT : size;
+
+	spu_release_saved(ctx);
+	return ret;
+}
+
+static const struct file_operations spufs_fpcr_fops = {
+	.open = spufs_regs_open,
+	.read = spufs_fpcr_read,
+	.write = spufs_fpcr_write,
+	.llseek = generic_file_llseek,
+};
+
+/* generic open function for all pipe-like files */
+static int spufs_pipe_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	file->private_data = i->i_ctx;
+
+	return nonseekable_open(inode, file);
+}
+
+/*
+ * Read as many bytes from the mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ */
+static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 mbox_data, __user *udata;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	udata = (void __user *)buf;
+
+	count = spu_acquire(ctx);
+	if (count)
+		return count;
+
+	for (count = 0; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->mbox_read(ctx, &mbox_data);
+		if (ret == 0)
+			break;
+
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = __put_user(mbox_data, udata);
+		if (ret) {
+			if (!count)
+				count = -EFAULT;
+			break;
+		}
+	}
+	spu_release(ctx);
+
+	if (!count)
+		count = -EAGAIN;
+
+	return count;
+}
+
+static const struct file_operations spufs_mbox_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_mbox_read,
+};
+
+static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 mbox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff;
+
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &mbox_stat, sizeof mbox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_mbox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_mbox_stat_read,
+};
+
+/* low-level ibox access function */
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data)
+{
+	return ctx->ops->ibox_read(ctx, data);
+}
+
+static int spufs_ibox_fasync(int fd, struct file *file, int on)
+{
+	struct spu_context *ctx = file->private_data;
+
+	return fasync_helper(fd, file, on, &ctx->ibox_fasync);
+}
+
+/* interrupt-level ibox callback function. */
+void spufs_ibox_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (!ctx)
+		return;
+
+	wake_up_all(&ctx->ibox_wq);
+	kill_fasync(&ctx->ibox_fasync, SIGIO, POLLIN);
+}
+
+/*
+ * Read as many bytes from the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * any data is available, but return when we have been able to
+ * read something.
+ */
+static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 ibox_data, __user *udata;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	udata = (void __user *)buf;
+
+	count = spu_acquire(ctx);
+	if (count)
+		goto out;
+
+	/* wait only for the first element */
+	count = 0;
+	if (file->f_flags & O_NONBLOCK) {
+		if (!spu_ibox_read(ctx, &ibox_data)) {
+			count = -EAGAIN;
+			goto out_unlock;
+		}
+	} else {
+		count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+		if (count)
+			goto out;
+	}
+
+	/* if we can't write at all, return -EFAULT */
+	count = __put_user(ibox_data, udata);
+	if (count)
+		goto out_unlock;
+
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->ibox_read(ctx, &ibox_data);
+		if (ret == 0)
+			break;
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = __put_user(ibox_data, udata);
+		if (ret)
+			break;
+	}
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return count;
+}
+
+static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	unsigned int mask;
+
+	poll_wait(file, &ctx->ibox_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM);
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_ibox_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_ibox_read,
+	.poll	= spufs_ibox_poll,
+	.fasync	= spufs_ibox_fasync,
+};
+
+static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 ibox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff;
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &ibox_stat, sizeof ibox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_ibox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_ibox_stat_read,
+};
+
+/* low-level mailbox write */
+size_t spu_wbox_write(struct spu_context *ctx, u32 data)
+{
+	return ctx->ops->wbox_write(ctx, data);
+}
+
+static int spufs_wbox_fasync(int fd, struct file *file, int on)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = fasync_helper(fd, file, on, &ctx->wbox_fasync);
+
+	return ret;
+}
+
+/* interrupt-level wbox callback function. */
+void spufs_wbox_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (!ctx)
+		return;
+
+	wake_up_all(&ctx->wbox_wq);
+	kill_fasync(&ctx->wbox_fasync, SIGIO, POLLOUT);
+}
+
+/*
+ * Write as many bytes to the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - the mailbox is full
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * space is availabyl, but return when we have been able to
+ * write something.
+ */
+static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 wbox_data, __user *udata;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	udata = (void __user *)buf;
+	if (!access_ok(VERIFY_READ, buf, len))
+		return -EFAULT;
+
+	if (__get_user(wbox_data, udata))
+		return -EFAULT;
+
+	count = spu_acquire(ctx);
+	if (count)
+		goto out;
+
+	/*
+	 * make sure we can at least write one element, by waiting
+	 * in case of !O_NONBLOCK
+	 */
+	count = 0;
+	if (file->f_flags & O_NONBLOCK) {
+		if (!spu_wbox_write(ctx, wbox_data)) {
+			count = -EAGAIN;
+			goto out_unlock;
+		}
+	} else {
+		count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+		if (count)
+			goto out;
+	}
+
+
+	/* write as much as possible */
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = __get_user(wbox_data, udata);
+		if (ret)
+			break;
+
+		ret = spu_wbox_write(ctx, wbox_data);
+		if (ret == 0)
+			break;
+	}
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return count;
+}
+
+static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	unsigned int mask;
+
+	poll_wait(file, &ctx->wbox_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM);
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_wbox_fops = {
+	.open	= spufs_pipe_open,
+	.write	= spufs_wbox_write,
+	.poll	= spufs_wbox_poll,
+	.fasync	= spufs_wbox_fasync,
+};
+
+static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 wbox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff;
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &wbox_stat, sizeof wbox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_wbox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_wbox_stat_read,
+};
+
+static int spufs_signal1_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->signal1 = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal1_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal1 = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret = 0;
+	u32 data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (ctx->csa.spu_chnlcnt_RW[3]) {
+		data = ctx->csa.spu_chnldata_RW[3];
+		ret = 4;
+	}
+
+	if (!ret)
+		goto out;
+
+	if (copy_to_user(buf, &data, 4))
+		return -EFAULT;
+
+out:
+	return ret;
+}
+
+static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_signal1_read(ctx, buf, len, pos);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx;
+	ssize_t ret;
+	u32 data;
+
+	ctx = file->private_data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, 4))
+		return -EFAULT;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal1_write(ctx, data);
+	spu_release(ctx);
+
+	return 4;
+}
+
+static int
+spufs_signal1_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+	return spufs_ps_fault(vma, vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static struct vm_operations_struct spufs_signal1_mmap_vmops = {
+	.fault = spufs_signal1_mmap_fault,
+};
+
+static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
+
+	vma->vm_ops = &spufs_signal1_mmap_vmops;
+	return 0;
+}
+
+static const struct file_operations spufs_signal1_fops = {
+	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
+	.read = spufs_signal1_read,
+	.write = spufs_signal1_write,
+	.mmap = spufs_signal1_mmap,
+};
+
+static const struct file_operations spufs_signal1_nosched_fops = {
+	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
+	.write = spufs_signal1_write,
+	.mmap = spufs_signal1_mmap,
+};
+
+static int spufs_signal2_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->signal2 = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal2_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal2 = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret = 0;
+	u32 data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (ctx->csa.spu_chnlcnt_RW[4]) {
+		data =  ctx->csa.spu_chnldata_RW[4];
+		ret = 4;
+	}
+
+	if (!ret)
+		goto out;
+
+	if (copy_to_user(buf, &data, 4))
+		return -EFAULT;
+
+out:
+	return ret;
+}
+
+static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_signal2_read(ctx, buf, len, pos);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx;
+	ssize_t ret;
+	u32 data;
+
+	ctx = file->private_data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, 4))
+		return -EFAULT;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal2_write(ctx, data);
+	spu_release(ctx);
+
+	return 4;
+}
+
+#if SPUFS_MMAP_4K
+static int
+spufs_signal2_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+	return spufs_ps_fault(vma, vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static struct vm_operations_struct spufs_signal2_mmap_vmops = {
+	.fault = spufs_signal2_mmap_fault,
+};
+
+static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
+
+	vma->vm_ops = &spufs_signal2_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_signal2_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static const struct file_operations spufs_signal2_fops = {
+	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
+	.read = spufs_signal2_read,
+	.write = spufs_signal2_write,
+	.mmap = spufs_signal2_mmap,
+};
+
+static const struct file_operations spufs_signal2_nosched_fops = {
+	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
+	.write = spufs_signal2_write,
+	.mmap = spufs_signal2_mmap,
+};
+
+/*
+ * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the
+ * work of acquiring (or not) the SPU context before calling through
+ * to the actual get routine. The set routine is called directly.
+ */
+#define SPU_ATTR_NOACQUIRE	0
+#define SPU_ATTR_ACQUIRE	1
+#define SPU_ATTR_ACQUIRE_SAVED	2
+
+#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire)	\
+static int __##__get(void *data, u64 *val)				\
+{									\
+	struct spu_context *ctx = data;					\
+	int ret = 0;							\
+									\
+	if (__acquire == SPU_ATTR_ACQUIRE) {				\
+		ret = spu_acquire(ctx);					\
+		if (ret)						\
+			return ret;					\
+		*val = __get(ctx);					\
+		spu_release(ctx);					\
+	} else if (__acquire == SPU_ATTR_ACQUIRE_SAVED)	{		\
+		ret = spu_acquire_saved(ctx);				\
+		if (ret)						\
+			return ret;					\
+		*val = __get(ctx);					\
+		spu_release_saved(ctx);					\
+	} else								\
+		*val = __get(ctx);					\
+									\
+	return 0;							\
+}									\
+DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt);
+
+static int spufs_signal1_type_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal1_type_set(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_signal1_type_get(struct spu_context *ctx)
+{
+	return ctx->ops->signal1_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get,
+		       spufs_signal1_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+
+static int spufs_signal2_type_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal2_type_set(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_signal2_type_get(struct spu_context *ctx)
+{
+	return ctx->ops->signal2_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
+		       spufs_signal2_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+#if SPUFS_MMAP_4K
+static int
+spufs_mss_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
+}
+
+static struct vm_operations_struct spufs_mss_mmap_vmops = {
+	.fault = spufs_mss_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
+
+	vma->vm_ops = &spufs_mss_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mss_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mss_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	file->private_data = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!i->i_openers++)
+		ctx->mss = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mss_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mss = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_mss_fops = {
+	.open	 = spufs_mss_open,
+	.release = spufs_mss_release,
+	.mmap	 = spufs_mss_mmap,
+};
+
+static int
+spufs_psmap_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_PS_MAP_SIZE);
+}
+
+static struct vm_operations_struct spufs_psmap_mmap_vmops = {
+	.fault = spufs_psmap_mmap_fault,
+};
+
+/*
+ * mmap support for full problem state area [0x00000 - 0x1ffff].
+ */
+static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
+
+	vma->vm_ops = &spufs_psmap_mmap_vmops;
+	return 0;
+}
+
+static int spufs_psmap_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = i->i_ctx;
+	if (!i->i_openers++)
+		ctx->psmap = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_psmap_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->psmap = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_psmap_fops = {
+	.open	 = spufs_psmap_open,
+	.release = spufs_psmap_release,
+	.mmap	 = spufs_psmap_mmap,
+};
+
+
+#if SPUFS_MMAP_4K
+static int
+spufs_mfc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
+}
+
+static struct vm_operations_struct spufs_mfc_mmap_vmops = {
+	.fault = spufs_mfc_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
+
+	vma->vm_ops = &spufs_mfc_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mfc_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mfc_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	/* we don't want to deal with DMA into other processes */
+	if (ctx->owner != current->mm)
+		return -EINVAL;
+
+	if (atomic_read(&inode->i_count) != 1)
+		return -EBUSY;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->mfc = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mfc_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mfc = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+/* interrupt-level mfc callback function. */
+void spufs_mfc_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (!ctx)
+		return;
+
+	wake_up_all(&ctx->mfc_wq);
+
+	pr_debug("%s %s\n", __func__, spu->name);
+	if (ctx->mfc_fasync) {
+		u32 free_elements, tagstatus;
+		unsigned int mask;
+
+		/* no need for spu_acquire in interrupt context */
+		free_elements = ctx->ops->get_mfc_free_elements(ctx);
+		tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+
+		mask = 0;
+		if (free_elements & 0xffff)
+			mask |= POLLOUT;
+		if (tagstatus & ctx->tagwait)
+			mask |= POLLIN;
+
+		kill_fasync(&ctx->mfc_fasync, SIGIO, mask);
+	}
+}
+
+static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status)
+{
+	/* See if there is one tag group is complete */
+	/* FIXME we need locking around tagwait */
+	*status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait;
+	ctx->tagwait &= ~*status;
+	if (*status)
+		return 1;
+
+	/* enable interrupt waiting for any tag group,
+	   may silently fail if interrupts are already enabled */
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+	return 0;
+}
+
+static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret = -EINVAL;
+	u32 status;
+
+	if (size != 4)
+		goto out;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	if (file->f_flags & O_NONBLOCK) {
+		status = ctx->ops->read_mfc_tagstatus(ctx);
+		if (!(status & ctx->tagwait))
+			ret = -EAGAIN;
+		else
+			/* XXX(hch): shouldn't we clear ret here? */
+			ctx->tagwait &= ~status;
+	} else {
+		ret = spufs_wait(ctx->mfc_wq,
+			   spufs_read_mfc_tagstatus(ctx, &status));
+		if (ret)
+			goto out;
+	}
+	spu_release(ctx);
+
+	ret = 4;
+	if (copy_to_user(buffer, &status, 4))
+		ret = -EFAULT;
+
+out:
+	return ret;
+}
+
+static int spufs_check_valid_dma(struct mfc_dma_command *cmd)
+{
+	pr_debug("queueing DMA %x %lx %x %x %x\n", cmd->lsa,
+		 cmd->ea, cmd->size, cmd->tag, cmd->cmd);
+
+	switch (cmd->cmd) {
+	case MFC_PUT_CMD:
+	case MFC_PUTF_CMD:
+	case MFC_PUTB_CMD:
+	case MFC_GET_CMD:
+	case MFC_GETF_CMD:
+	case MFC_GETB_CMD:
+		break;
+	default:
+		pr_debug("invalid DMA opcode %x\n", cmd->cmd);
+		return -EIO;
+	}
+
+	if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) {
+		pr_debug("invalid DMA alignment, ea %lx lsa %x\n",
+				cmd->ea, cmd->lsa);
+		return -EIO;
+	}
+
+	switch (cmd->size & 0xf) {
+	case 1:
+		break;
+	case 2:
+		if (cmd->lsa & 1)
+			goto error;
+		break;
+	case 4:
+		if (cmd->lsa & 3)
+			goto error;
+		break;
+	case 8:
+		if (cmd->lsa & 7)
+			goto error;
+		break;
+	case 0:
+		if (cmd->lsa & 15)
+			goto error;
+		break;
+	error:
+	default:
+		pr_debug("invalid DMA alignment %x for size %x\n",
+			cmd->lsa & 0xf, cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->size > 16 * 1024) {
+		pr_debug("invalid DMA size %x\n", cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->tag & 0xfff0) {
+		/* we reserve the higher tag numbers for kernel use */
+		pr_debug("invalid DMA tag\n");
+		return -EIO;
+	}
+
+	if (cmd->class) {
+		/* not supported in this version */
+		pr_debug("invalid DMA class\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int spu_send_mfc_command(struct spu_context *ctx,
+				struct mfc_dma_command cmd,
+				int *error)
+{
+	*error = ctx->ops->send_mfc_command(ctx, &cmd);
+	if (*error == -EAGAIN) {
+		/* wait for any tag group to complete
+		   so we have space for the new command */
+		ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+		/* try again, because the queue might be
+		   empty again */
+		*error = ctx->ops->send_mfc_command(ctx, &cmd);
+		if (*error == -EAGAIN)
+			return 0;
+	}
+	return 1;
+}
+
+static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct mfc_dma_command cmd;
+	int ret = -EINVAL;
+
+	if (size != sizeof cmd)
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_from_user(&cmd, buffer, sizeof cmd))
+		goto out;
+
+	ret = spufs_check_valid_dma(&cmd);
+	if (ret)
+		goto out;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out;
+
+	ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+	if (ret)
+		goto out;
+
+	if (file->f_flags & O_NONBLOCK) {
+		ret = ctx->ops->send_mfc_command(ctx, &cmd);
+	} else {
+		int status;
+		ret = spufs_wait(ctx->mfc_wq,
+				 spu_send_mfc_command(ctx, cmd, &status));
+		if (ret)
+			goto out;
+		if (status)
+			ret = status;
+	}
+
+	if (ret)
+		goto out_unlock;
+
+	ctx->tagwait |= 1 << cmd.tag;
+	ret = size;
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return ret;
+}
+
+static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 free_elements, tagstatus;
+	unsigned int mask;
+
+	poll_wait(file, &ctx->mfc_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2);
+	free_elements = ctx->ops->get_mfc_free_elements(ctx);
+	tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+	spu_release(ctx);
+
+	mask = 0;
+	if (free_elements & 0xffff)
+		mask |= POLLOUT | POLLWRNORM;
+	if (tagstatus & ctx->tagwait)
+		mask |= POLLIN | POLLRDNORM;
+
+	pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__,
+		free_elements, tagstatus, ctx->tagwait);
+
+	return mask;
+}
+
+static int spufs_mfc_flush(struct file *file, fl_owner_t id)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out;
+#if 0
+/* this currently hangs */
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2));
+	if (ret)
+		goto out;
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
+	if (ret)
+		goto out;
+#else
+	ret = 0;
+#endif
+	spu_release(ctx);
+out:
+	return ret;
+}
+
+static int spufs_mfc_fsync(struct file *file, struct dentry *dentry,
+			   int datasync)
+{
+	return spufs_mfc_flush(file, NULL);
+}
+
+static int spufs_mfc_fasync(int fd, struct file *file, int on)
+{
+	struct spu_context *ctx = file->private_data;
+
+	return fasync_helper(fd, file, on, &ctx->mfc_fasync);
+}
+
+static const struct file_operations spufs_mfc_fops = {
+	.open	 = spufs_mfc_open,
+	.release = spufs_mfc_release,
+	.read	 = spufs_mfc_read,
+	.write	 = spufs_mfc_write,
+	.poll	 = spufs_mfc_poll,
+	.flush	 = spufs_mfc_flush,
+	.fsync	 = spufs_mfc_fsync,
+	.fasync	 = spufs_mfc_fasync,
+	.mmap	 = spufs_mfc_mmap,
+};
+
+static int spufs_npc_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->npc_write(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_npc_get(struct spu_context *ctx)
+{
+	return ctx->ops->npc_read(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE);
+
+static int spufs_decr_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->decr.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_decr_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->decr.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_decr_status_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	if (val)
+		ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
+	else
+		ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_decr_status_get(struct spu_context *ctx)
+{
+	if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
+		return SPU_DECR_STATUS_RUNNING;
+	else
+		return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
+		       spufs_decr_status_set, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_event_mask_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->event_mask.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_event_mask_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->event_mask.slot[0];
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
+		       spufs_event_mask_set, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static u64 spufs_event_status_get(struct spu_context *ctx)
+{
+	struct spu_state *state = &ctx->csa;
+	u64 stat;
+	stat = state->spu_chnlcnt_RW[0];
+	if (stat)
+		return state->spu_chnldata_RW[0];
+	return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
+		       NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static int spufs_srr0_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->srr0.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_srr0_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->srr0.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static u64 spufs_id_get(struct spu_context *ctx)
+{
+	u64 num;
+
+	if (ctx->state == SPU_STATE_RUNNABLE)
+		num = ctx->spu->number;
+	else
+		num = (unsigned int)-1;
+
+	return num;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE)
+
+static u64 spufs_object_id_get(struct spu_context *ctx)
+{
+	/* FIXME: Should there really be no locking here? */
+	return ctx->object_id;
+}
+
+static int spufs_object_id_set(void *data, u64 id)
+{
+	struct spu_context *ctx = data;
+	ctx->object_id = id;
+
+	return 0;
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get,
+		       spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE);
+
+static u64 spufs_lslr_get(struct spu_context *ctx)
+{
+	return ctx->csa.priv2.spu_lslr_RW;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_info_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+	file->private_data = ctx;
+	return 0;
+}
+
+static int spufs_caps_show(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	if (!(ctx->flags & SPU_CREATE_NOSCHED))
+		seq_puts(s, "sched\n");
+	if (!(ctx->flags & SPU_CREATE_ISOLATE))
+		seq_puts(s, "step\n");
+	return 0;
+}
+
+static int spufs_caps_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_caps_fops = {
+	.open		= spufs_caps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	u32 data;
+
+	/* EOF if there's no entry in the mbox */
+	if (!(ctx->csa.prob.mb_stat_R & 0x0000ff))
+		return 0;
+
+	data = ctx->csa.prob.pu_mb_R;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_mbox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_mbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_mbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_ibox_info_read(struct spu_context *ctx,
+				char __user *buf, size_t len, loff_t *pos)
+{
+	u32 data;
+
+	/* EOF if there's no entry in the ibox */
+	if (!(ctx->csa.prob.mb_stat_R & 0xff0000))
+		return 0;
+
+	data = ctx->csa.priv2.puint_mb_R;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_ibox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_ibox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_ibox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_wbox_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	int i, cnt;
+	u32 data[4];
+	u32 wbox_stat;
+
+	wbox_stat = ctx->csa.prob.mb_stat_R;
+	cnt = 4 - ((wbox_stat & 0x00ff00) >> 8);
+	for (i = 0; i < cnt; i++) {
+		data[i] = ctx->csa.spu_mailbox_data[i];
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &data,
+				cnt * sizeof(u32));
+}
+
+static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_wbox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_wbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_wbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_dma_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	struct spu_dma_info info;
+	struct mfc_cq_sr *qp, *spuqp;
+	int i;
+
+	info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
+	info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
+	info.dma_info_status = ctx->csa.spu_chnldata_RW[24];
+	info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
+	info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+	for (i = 0; i < 16; i++) {
+		qp = &info.dma_info_command_data[i];
+		spuqp = &ctx->csa.priv2.spuq[i];
+
+		qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW;
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof info);
+}
+
+static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
+			      size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_dma_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_dma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_dma_info_read,
+};
+
+static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	struct spu_proxydma_info info;
+	struct mfc_cq_sr *qp, *puqp;
+	int ret = sizeof info;
+	int i;
+
+	if (len < ret)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
+	info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
+	info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
+	for (i = 0; i < 8; i++) {
+		qp = &info.proxydma_info_command_data[i];
+		puqp = &ctx->csa.priv2.puq[i];
+
+		qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW;
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof info);
+}
+
+static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_proxydma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_proxydma_info_read,
+};
+
+static int spufs_show_tid(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	seq_printf(s, "%d\n", ctx->tid);
+	return 0;
+}
+
+static int spufs_tid_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_tid_fops = {
+	.open		= spufs_tid_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const char *ctx_state_names[] = {
+	"user", "system", "iowait", "loaded"
+};
+
+static unsigned long long spufs_acct_time(struct spu_context *ctx,
+		enum spu_utilization_state state)
+{
+	struct timespec ts;
+	unsigned long long time = ctx->stats.times[state];
+
+	/*
+	 * In general, utilization statistics are updated by the controlling
+	 * thread as the spu context moves through various well defined
+	 * state transitions, but if the context is lazily loaded its
+	 * utilization statistics are not updated as the controlling thread
+	 * is not tightly coupled with the execution of the spu context.  We
+	 * calculate and apply the time delta from the last recorded state
+	 * of the spu context.
+	 */
+	if (ctx->spu && ctx->stats.util_state == state) {
+		ktime_get_ts(&ts);
+		time += timespec_to_ns(&ts) - ctx->stats.tstamp;
+	}
+
+	return time / NSEC_PER_MSEC;
+}
+
+static unsigned long long spufs_slb_flts(struct spu_context *ctx)
+{
+	unsigned long long slb_flts = ctx->stats.slb_flt;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		slb_flts += (ctx->spu->stats.slb_flt -
+			     ctx->stats.slb_flt_base);
+	}
+
+	return slb_flts;
+}
+
+static unsigned long long spufs_class2_intrs(struct spu_context *ctx)
+{
+	unsigned long long class2_intrs = ctx->stats.class2_intr;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		class2_intrs += (ctx->spu->stats.class2_intr -
+				 ctx->stats.class2_intr_base);
+	}
+
+	return class2_intrs;
+}
+
+
+static int spufs_show_stat(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		ctx_state_names[ctx->stats.util_state],
+		spufs_acct_time(ctx, SPU_UTIL_USER),
+		spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
+		spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
+		spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
+		ctx->stats.vol_ctx_switch,
+		ctx->stats.invol_ctx_switch,
+		spufs_slb_flts(ctx),
+		ctx->stats.hash_flt,
+		ctx->stats.min_flt,
+		ctx->stats.maj_flt,
+		spufs_class2_intrs(ctx),
+		ctx->stats.libassist);
+	spu_release(ctx);
+	return 0;
+}
+
+static int spufs_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_stat_fops = {
+	.open		= spufs_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static inline int spufs_switch_log_used(struct spu_context *ctx)
+{
+	return (ctx->switch_log->head - ctx->switch_log->tail) %
+		SWITCH_LOG_BUFSIZE;
+}
+
+static inline int spufs_switch_log_avail(struct spu_context *ctx)
+{
+	return SWITCH_LOG_BUFSIZE - spufs_switch_log_used(ctx);
+}
+
+static int spufs_switch_log_open(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int rc;
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	if (ctx->switch_log) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	ctx->switch_log = kmalloc(sizeof(struct switch_log) +
+		SWITCH_LOG_BUFSIZE * sizeof(struct switch_log_entry),
+		GFP_KERNEL);
+
+	if (!ctx->switch_log) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	ctx->switch_log->head = ctx->switch_log->tail = 0;
+	init_waitqueue_head(&ctx->switch_log->wait);
+	rc = 0;
+
+out:
+	spu_release(ctx);
+	return rc;
+}
+
+static int spufs_switch_log_release(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int rc;
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	kfree(ctx->switch_log);
+	ctx->switch_log = NULL;
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
+{
+	struct switch_log_entry *p;
+
+	p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
+
+	return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n",
+			(unsigned int) p->tstamp.tv_sec,
+			(unsigned int) p->tstamp.tv_nsec,
+			p->spu_id,
+			(unsigned int) p->type,
+			(unsigned int) p->val,
+			(unsigned long long) p->timebase);
+}
+
+static ssize_t spufs_switch_log_read(struct file *file, char __user *buf,
+			     size_t len, loff_t *ppos)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int error = 0, cnt = 0;
+
+	if (!buf || len < 0)
+		return -EINVAL;
+
+	error = spu_acquire(ctx);
+	if (error)
+		return error;
+
+	while (cnt < len) {
+		char tbuf[128];
+		int width;
+
+		if (spufs_switch_log_used(ctx) == 0) {
+			if (cnt > 0) {
+				/* If there's data ready to go, we can
+				 * just return straight away */
+				break;
+
+			} else if (file->f_flags & O_NONBLOCK) {
+				error = -EAGAIN;
+				break;
+
+			} else {
+				/* spufs_wait will drop the mutex and
+				 * re-acquire, but since we're in read(), the
+				 * file cannot be _released (and so
+				 * ctx->switch_log is stable).
+				 */
+				error = spufs_wait(ctx->switch_log->wait,
+						spufs_switch_log_used(ctx) > 0);
+
+				/* On error, spufs_wait returns without the
+				 * state mutex held */
+				if (error)
+					return error;
+
+				/* We may have had entries read from underneath
+				 * us while we dropped the mutex in spufs_wait,
+				 * so re-check */
+				if (spufs_switch_log_used(ctx) == 0)
+					continue;
+			}
+		}
+
+		width = switch_log_sprint(ctx, tbuf, sizeof(tbuf));
+		if (width < len)
+			ctx->switch_log->tail =
+				(ctx->switch_log->tail + 1) %
+				 SWITCH_LOG_BUFSIZE;
+		else
+			/* If the record is greater than space available return
+			 * partial buffer (so far) */
+			break;
+
+		error = copy_to_user(buf + cnt, tbuf, width);
+		if (error)
+			break;
+		cnt += width;
+	}
+
+	spu_release(ctx);
+
+	return cnt == 0 ? error : cnt;
+}
+
+static unsigned int spufs_switch_log_poll(struct file *file, poll_table *wait)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	unsigned int mask = 0;
+	int rc;
+
+	poll_wait(file, &ctx->switch_log->wait, wait);
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	if (spufs_switch_log_used(ctx) > 0)
+		mask |= POLLIN;
+
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_switch_log_fops = {
+	.owner		= THIS_MODULE,
+	.open		= spufs_switch_log_open,
+	.read		= spufs_switch_log_read,
+	.poll		= spufs_switch_log_poll,
+	.release	= spufs_switch_log_release,
+};
+
+/**
+ * Log a context switch event to a switch log reader.
+ *
+ * Must be called with ctx->state_mutex held.
+ */
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+		u32 type, u32 val)
+{
+	if (!ctx->switch_log)
+		return;
+
+	if (spufs_switch_log_avail(ctx) > 1) {
+		struct switch_log_entry *p;
+
+		p = ctx->switch_log->log + ctx->switch_log->head;
+		ktime_get_ts(&p->tstamp);
+		p->timebase = get_tb();
+		p->spu_id = spu ? spu->number : -1;
+		p->type = type;
+		p->val = val;
+
+		ctx->switch_log->head =
+			(ctx->switch_log->head + 1) % SWITCH_LOG_BUFSIZE;
+	}
+
+	wake_up(&ctx->switch_log->wait);
+}
+
+static int spufs_show_ctx(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+	u64 mfc_control_RW;
+
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->spu) {
+		struct spu *spu = ctx->spu;
+		struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+		spin_lock_irq(&spu->register_lock);
+		mfc_control_RW = in_be64(&priv2->mfc_control_RW);
+		spin_unlock_irq(&spu->register_lock);
+	} else {
+		struct spu_state *csa = &ctx->csa;
+
+		mfc_control_RW = csa->priv2.mfc_control_RW;
+	}
+
+	seq_printf(s, "%c flgs(%lx) sflgs(%lx) pri(%d) ts(%d) spu(%02d)"
+		" %c %lx %lx %lx %lx %x %x\n",
+		ctx->state == SPU_STATE_SAVED ? 'S' : 'R',
+		ctx->flags,
+		ctx->sched_flags,
+		ctx->prio,
+		ctx->time_slice,
+		ctx->spu ? ctx->spu->number : -1,
+		!list_empty(&ctx->rq) ? 'q' : ' ',
+		ctx->csa.class_0_pending,
+		ctx->csa.class_0_dar,
+		ctx->csa.class_1_dsisr,
+		mfc_control_RW,
+		ctx->ops->runcntl_read(ctx),
+		ctx->ops->status_read(ctx));
+
+	mutex_unlock(&ctx->state_mutex);
+
+	return 0;
+}
+
+static int spufs_ctx_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_ctx, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_ctx_fops = {
+	.open           = spufs_ctx_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+struct spufs_tree_descr spufs_dir_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
+	{ "mem",  &spufs_mem_fops,  0666, LS_SIZE, },
+	{ "regs", &spufs_regs_fops,  0666, sizeof(struct spu_reg128[128]), },
+	{ "mbox", &spufs_mbox_fops, 0444, },
+	{ "ibox", &spufs_ibox_fops, 0444, },
+	{ "wbox", &spufs_wbox_fops, 0222, },
+	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+	{ "signal1", &spufs_signal1_fops, 0666, },
+	{ "signal2", &spufs_signal2_fops, 0666, },
+	{ "signal1_type", &spufs_signal1_type, 0666, },
+	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
+	{ "fpcr", &spufs_fpcr_fops, 0666, sizeof(struct spu_reg128), },
+	{ "lslr", &spufs_lslr_ops, 0444, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
+	{ "npc", &spufs_npc_ops, 0666, },
+	{ "srr0", &spufs_srr0_ops, 0666, },
+	{ "decr", &spufs_decr_ops, 0666, },
+	{ "decr_status", &spufs_decr_status_ops, 0666, },
+	{ "event_mask", &spufs_event_mask_ops, 0666, },
+	{ "event_status", &spufs_event_status_ops, 0444, },
+	{ "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "mbox_info", &spufs_mbox_info_fops, 0444, sizeof(u32), },
+	{ "ibox_info", &spufs_ibox_info_fops, 0444, sizeof(u32), },
+	{ "wbox_info", &spufs_wbox_info_fops, 0444, sizeof(u32), },
+	{ "dma_info", &spufs_dma_info_fops, 0444,
+		sizeof(struct spu_dma_info), },
+	{ "proxydma_info", &spufs_proxydma_info_fops, 0444,
+		sizeof(struct spu_proxydma_info)},
+	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
+	{ "switch_log", &spufs_switch_log_fops, 0444 },
+	{},
+};
+
+struct spufs_tree_descr spufs_dir_nosched_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
+	{ "mem",  &spufs_mem_fops,  0666, LS_SIZE, },
+	{ "mbox", &spufs_mbox_fops, 0444, },
+	{ "ibox", &spufs_ibox_fops, 0444, },
+	{ "wbox", &spufs_wbox_fops, 0222, },
+	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+	{ "signal1", &spufs_signal1_nosched_fops, 0222, },
+	{ "signal2", &spufs_signal2_nosched_fops, 0222, },
+	{ "signal1_type", &spufs_signal1_type, 0666, },
+	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
+	{ "npc", &spufs_npc_ops, 0666, },
+	{ "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
+	{},
+};
+
+struct spufs_tree_descr spufs_dir_debug_contents[] = {
+	{ ".ctx", &spufs_ctx_fops, 0444, },
+	{},
+};
+
+struct spufs_coredump_reader spufs_coredump_read[] = {
+	{ "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])},
+	{ "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) },
+	{ "lslr", NULL, spufs_lslr_get, 19 },
+	{ "decr", NULL, spufs_decr_get, 19 },
+	{ "decr_status", NULL, spufs_decr_status_get, 19 },
+	{ "mem", __spufs_mem_read, NULL, LS_SIZE, },
+	{ "signal1", __spufs_signal1_read, NULL, sizeof(u32) },
+	{ "signal1_type", NULL, spufs_signal1_type_get, 19 },
+	{ "signal2", __spufs_signal2_read, NULL, sizeof(u32) },
+	{ "signal2_type", NULL, spufs_signal2_type_get, 19 },
+	{ "event_mask", NULL, spufs_event_mask_get, 19 },
+	{ "event_status", NULL, spufs_event_status_get, 19 },
+	{ "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) },
+	{ "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) },
+	{ "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)},
+	{ "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)},
+	{ "proxydma_info", __spufs_proxydma_info_read,
+			   NULL, sizeof(struct spu_proxydma_info)},
+	{ "object-id", NULL, spufs_object_id_get, 19 },
+	{ "npc", NULL, spufs_npc_get, 19 },
+	{ NULL },
+};
diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c
new file mode 100644
index 0000000..71a4432
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -0,0 +1,87 @@
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "spufs.h"
+
+struct spu_gang *alloc_spu_gang(void)
+{
+	struct spu_gang *gang;
+
+	gang = kzalloc(sizeof *gang, GFP_KERNEL);
+	if (!gang)
+		goto out;
+
+	kref_init(&gang->kref);
+	mutex_init(&gang->mutex);
+	mutex_init(&gang->aff_mutex);
+	INIT_LIST_HEAD(&gang->list);
+	INIT_LIST_HEAD(&gang->aff_list_head);
+
+out:
+	return gang;
+}
+
+static void destroy_spu_gang(struct kref *kref)
+{
+	struct spu_gang *gang;
+	gang = container_of(kref, struct spu_gang, kref);
+	WARN_ON(gang->contexts || !list_empty(&gang->list));
+	kfree(gang);
+}
+
+struct spu_gang *get_spu_gang(struct spu_gang *gang)
+{
+	kref_get(&gang->kref);
+	return gang;
+}
+
+int put_spu_gang(struct spu_gang *gang)
+{
+	return kref_put(&gang->kref, &destroy_spu_gang);
+}
+
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	ctx->gang = get_spu_gang(gang);
+	list_add(&ctx->gang_list, &gang->list);
+	gang->contexts++;
+	mutex_unlock(&gang->mutex);
+}
+
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	WARN_ON(ctx->gang != gang);
+	if (!list_empty(&ctx->aff_list)) {
+		list_del_init(&ctx->aff_list);
+		gang->aff_flags &= ~AFF_OFFSETS_SET;
+	}
+	list_del_init(&ctx->gang_list);
+	gang->contexts--;
+	mutex_unlock(&gang->mutex);
+
+	put_spu_gang(gang);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
new file mode 100644
index 0000000..64f8540
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -0,0 +1,350 @@
+/* hw_ops.c - query/set operations on active SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+static int spu_hw_mbox_read(struct spu_context *ctx, u32 * data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mbox_stat;
+	int ret = 0;
+
+	spin_lock_irq(&spu->register_lock);
+	mbox_stat = in_be32(&prob->mb_stat_R);
+	if (mbox_stat & 0x0000ff) {
+		*data = in_be32(&prob->pu_mb_R);
+		ret = 4;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static u32 spu_hw_mbox_stat_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->mb_stat_R);
+}
+
+static unsigned int spu_hw_mbox_stat_poll(struct spu_context *ctx,
+					  unsigned int events)
+{
+	struct spu *spu = ctx->spu;
+	int ret = 0;
+	u32 stat;
+
+	spin_lock_irq(&spu->register_lock);
+	stat = in_be32(&spu->problem->mb_stat_R);
+
+	/* if the requested event is there, return the poll
+	   mask, otherwise enable the interrupt to get notified,
+	   but first mark any pending interrupts as done so
+	   we don't get woken up unnecessarily */
+
+	if (events & (POLLIN | POLLRDNORM)) {
+		if (stat & 0xff0000)
+			ret |= POLLIN | POLLRDNORM;
+		else {
+			spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR);
+			spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		}
+	}
+	if (events & (POLLOUT | POLLWRNORM)) {
+		if (stat & 0x00ff00)
+			ret = POLLOUT | POLLWRNORM;
+		else {
+			spu_int_stat_clear(spu, 2,
+					CLASS2_MAILBOX_THRESHOLD_INTR);
+			spu_int_mask_or(spu, 2,
+					CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+		}
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int ret;
+
+	spin_lock_irq(&spu->register_lock);
+	if (in_be32(&prob->mb_stat_R) & 0xff0000) {
+		/* read the first available word */
+		*data = in_be64(&priv2->puint_mb_R);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt */
+		spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		ret = 0;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static int spu_hw_wbox_write(struct spu_context *ctx, u32 data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	int ret;
+
+	spin_lock_irq(&spu->register_lock);
+	if (in_be32(&prob->mb_stat_R) & 0x00ff00) {
+		/* we have space to write wbox_data to */
+		out_be32(&prob->spu_mb_W, data);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt when space
+		   becomes available */
+		spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+		ret = 0;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static void spu_hw_signal1_write(struct spu_context *ctx, u32 data)
+{
+	out_be32(&ctx->spu->problem->signal_notify1, data);
+}
+
+static void spu_hw_signal2_write(struct spu_context *ctx, u32 data)
+{
+	out_be32(&ctx->spu->problem->signal_notify2, data);
+}
+
+static void spu_hw_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 tmp;
+
+	spin_lock_irq(&spu->register_lock);
+	tmp = in_be64(&priv2->spu_cfg_RW);
+	if (val)
+		tmp |= 1;
+	else
+		tmp &= ~1;
+	out_be64(&priv2->spu_cfg_RW, tmp);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal1_type_get(struct spu_context *ctx)
+{
+	return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 1) != 0);
+}
+
+static void spu_hw_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 tmp;
+
+	spin_lock_irq(&spu->register_lock);
+	tmp = in_be64(&priv2->spu_cfg_RW);
+	if (val)
+		tmp |= 2;
+	else
+		tmp &= ~2;
+	out_be64(&priv2->spu_cfg_RW, tmp);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal2_type_get(struct spu_context *ctx)
+{
+	return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 2) != 0);
+}
+
+static u32 spu_hw_npc_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_npc_RW);
+}
+
+static void spu_hw_npc_write(struct spu_context *ctx, u32 val)
+{
+	out_be32(&ctx->spu->problem->spu_npc_RW, val);
+}
+
+static u32 spu_hw_status_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_status_R);
+}
+
+static char *spu_hw_get_ls(struct spu_context *ctx)
+{
+	return ctx->spu->local_store;
+}
+
+static void spu_hw_privcntl_write(struct spu_context *ctx, u64 val)
+{
+	out_be64(&ctx->spu->priv2->spu_privcntl_RW, val);
+}
+
+static u32 spu_hw_runcntl_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_runcntl_RW);
+}
+
+static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val)
+{
+	spin_lock_irq(&ctx->spu->register_lock);
+	if (val & SPU_RUNCNTL_ISOLATE)
+		spu_hw_privcntl_write(ctx,
+			SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK);
+	out_be32(&ctx->spu->problem->spu_runcntl_RW, val);
+	spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_runcntl_stop(struct spu_context *ctx)
+{
+	spin_lock_irq(&ctx->spu->register_lock);
+	out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+	while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING)
+		cpu_relax();
+	spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_master_start(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static void spu_hw_master_stop(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode)
+{
+	struct spu_problem __iomem *prob = ctx->spu->problem;
+	int ret;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	ret = -EAGAIN;
+	if (in_be32(&prob->dma_querytype_RW))
+		goto out;
+	ret = 0;
+	out_be32(&prob->dma_querymask_RW, mask);
+	out_be32(&prob->dma_querytype_RW, mode);
+out:
+	spin_unlock_irq(&ctx->spu->register_lock);
+	return ret;
+}
+
+static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_tagstatus_R);
+}
+
+static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_qstatus_R);
+}
+
+static int spu_hw_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	u32 status;
+	struct spu_problem __iomem *prob = ctx->spu->problem;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	out_be32(&prob->mfc_lsa_W, cmd->lsa);
+	out_be64(&prob->mfc_ea_W, cmd->ea);
+	out_be32(&prob->mfc_union_W.by32.mfc_size_tag32,
+				cmd->size << 16 | cmd->tag);
+	out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32,
+				cmd->class << 16 | cmd->cmd);
+	status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+	spin_unlock_irq(&ctx->spu->register_lock);
+
+	switch (status & 0xffff) {
+	case 0:
+		return 0;
+	case 2:
+		return -EAGAIN;
+	default:
+		return -EINVAL;
+	}
+}
+
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+	struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
+struct spu_context_ops spu_hw_ops = {
+	.mbox_read = spu_hw_mbox_read,
+	.mbox_stat_read = spu_hw_mbox_stat_read,
+	.mbox_stat_poll = spu_hw_mbox_stat_poll,
+	.ibox_read = spu_hw_ibox_read,
+	.wbox_write = spu_hw_wbox_write,
+	.signal1_write = spu_hw_signal1_write,
+	.signal2_write = spu_hw_signal2_write,
+	.signal1_type_set = spu_hw_signal1_type_set,
+	.signal1_type_get = spu_hw_signal1_type_get,
+	.signal2_type_set = spu_hw_signal2_type_set,
+	.signal2_type_get = spu_hw_signal2_type_get,
+	.npc_read = spu_hw_npc_read,
+	.npc_write = spu_hw_npc_write,
+	.status_read = spu_hw_status_read,
+	.get_ls = spu_hw_get_ls,
+	.privcntl_write = spu_hw_privcntl_write,
+	.runcntl_read = spu_hw_runcntl_read,
+	.runcntl_write = spu_hw_runcntl_write,
+	.runcntl_stop = spu_hw_runcntl_stop,
+	.master_start = spu_hw_master_start,
+	.master_stop = spu_hw_master_stop,
+	.set_mfc_query = spu_hw_set_mfc_query,
+	.read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_hw_get_mfc_free_elements,
+	.send_mfc_command = spu_hw_send_mfc_command,
+	.restart_dma = spu_hw_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
new file mode 100644
index 0000000..cb85d23
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -0,0 +1,873 @@
+
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/backing-dev.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/parser.h>
+
+#include <asm/prom.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+struct spufs_sb_info {
+	int debug;
+};
+
+static struct kmem_cache *spufs_inode_cache;
+char *isolated_loader;
+static int isolated_loader_size;
+
+static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static struct inode *
+spufs_alloc_inode(struct super_block *sb)
+{
+	struct spufs_inode_info *ei;
+
+	ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
+	if (!ei)
+		return NULL;
+
+	ei->i_gang = NULL;
+	ei->i_ctx = NULL;
+	ei->i_openers = 0;
+
+	return &ei->vfs_inode;
+}
+
+static void
+spufs_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
+}
+
+static void
+spufs_init_once(void *p)
+{
+	struct spufs_inode_info *ei = p;
+
+	inode_init_once(&ei->vfs_inode);
+}
+
+static struct inode *
+spufs_new_inode(struct super_block *sb, int mode)
+{
+	struct inode *inode;
+
+	inode = new_inode(sb);
+	if (!inode)
+		goto out;
+
+	inode->i_mode = mode;
+	inode->i_uid = current->fsuid;
+	inode->i_gid = current->fsgid;
+	inode->i_blocks = 0;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+out:
+	return inode;
+}
+
+static int
+spufs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    (attr->ia_size != inode->i_size))
+		return -EINVAL;
+	return inode_setattr(inode, attr);
+}
+
+
+static int
+spufs_new_file(struct super_block *sb, struct dentry *dentry,
+		const struct file_operations *fops, int mode,
+		size_t size, struct spu_context *ctx)
+{
+	static struct inode_operations spufs_file_iops = {
+		.setattr = spufs_setattr,
+	};
+	struct inode *inode;
+	int ret;
+
+	ret = -ENOSPC;
+	inode = spufs_new_inode(sb, S_IFREG | mode);
+	if (!inode)
+		goto out;
+
+	ret = 0;
+	inode->i_op = &spufs_file_iops;
+	inode->i_fop = fops;
+	inode->i_size = size;
+	inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
+	d_add(dentry, inode);
+out:
+	return ret;
+}
+
+static void
+spufs_delete_inode(struct inode *inode)
+{
+	struct spufs_inode_info *ei = SPUFS_I(inode);
+
+	if (ei->i_ctx)
+		put_spu_context(ei->i_ctx);
+	if (ei->i_gang)
+		put_spu_gang(ei->i_gang);
+	clear_inode(inode);
+}
+
+static void spufs_prune_dir(struct dentry *dir)
+{
+	struct dentry *dentry, *tmp;
+
+	mutex_lock(&dir->d_inode->i_mutex);
+	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
+		spin_lock(&dcache_lock);
+		spin_lock(&dentry->d_lock);
+		if (!(d_unhashed(dentry)) && dentry->d_inode) {
+			dget_locked(dentry);
+			__d_drop(dentry);
+			spin_unlock(&dentry->d_lock);
+			simple_unlink(dir->d_inode, dentry);
+			spin_unlock(&dcache_lock);
+			dput(dentry);
+		} else {
+			spin_unlock(&dentry->d_lock);
+			spin_unlock(&dcache_lock);
+		}
+	}
+	shrink_dcache_parent(dir);
+	mutex_unlock(&dir->d_inode->i_mutex);
+}
+
+/* Caller must hold parent->i_mutex */
+static int spufs_rmdir(struct inode *parent, struct dentry *dir)
+{
+	/* remove all entries */
+	spufs_prune_dir(dir);
+	d_drop(dir);
+
+	return simple_rmdir(parent, dir);
+}
+
+static int spufs_fill_dir(struct dentry *dir, struct spufs_tree_descr *files,
+			  int mode, struct spu_context *ctx)
+{
+	struct dentry *dentry, *tmp;
+	int ret;
+
+	while (files->name && files->name[0]) {
+		ret = -ENOMEM;
+		dentry = d_alloc_name(dir, files->name);
+		if (!dentry)
+			goto out;
+		ret = spufs_new_file(dir->d_sb, dentry, files->ops,
+					files->mode & mode, files->size, ctx);
+		if (ret)
+			goto out;
+		files++;
+	}
+	return 0;
+out:
+	/*
+	 * remove all children from dir. dir->inode is not set so don't
+	 * just simply use spufs_prune_dir() and panic afterwards :)
+	 * dput() looks like it will do the right thing:
+	 * - dec parent's ref counter
+	 * - remove child from parent's child list
+	 * - free child's inode if possible
+	 * - free child
+	 */
+	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
+		dput(dentry);
+	}
+
+	shrink_dcache_parent(dir);
+	return ret;
+}
+
+static int spufs_dir_close(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx;
+	struct inode *parent;
+	struct dentry *dir;
+	int ret;
+
+	dir = file->f_path.dentry;
+	parent = dir->d_parent->d_inode;
+	ctx = SPUFS_I(dir->d_inode)->i_ctx;
+
+	mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
+	ret = spufs_rmdir(parent, dir);
+	mutex_unlock(&parent->i_mutex);
+	WARN_ON(ret);
+
+	/* We have to give up the mm_struct */
+	spu_forget(ctx);
+
+	return dcache_dir_close(inode, file);
+}
+
+const struct file_operations spufs_context_fops = {
+	.open		= dcache_dir_open,
+	.release	= spufs_dir_close,
+	.llseek		= dcache_dir_lseek,
+	.read		= generic_read_dir,
+	.readdir	= dcache_readdir,
+	.fsync		= simple_sync_file,
+};
+EXPORT_SYMBOL_GPL(spufs_context_fops);
+
+static int
+spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
+		int mode)
+{
+	int ret;
+	struct inode *inode;
+	struct spu_context *ctx;
+
+	ret = -ENOSPC;
+	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+	if (!inode)
+		goto out;
+
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		inode->i_mode &= S_ISGID;
+	}
+	ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
+	SPUFS_I(inode)->i_ctx = ctx;
+	if (!ctx)
+		goto out_iput;
+
+	ctx->flags = flags;
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+	if (flags & SPU_CREATE_NOSCHED)
+		ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
+					 mode, ctx);
+	else
+		ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
+
+	if (ret)
+		goto out_free_ctx;
+
+	if (spufs_get_sb_info(dir->i_sb)->debug)
+		ret = spufs_fill_dir(dentry, spufs_dir_debug_contents,
+				mode, ctx);
+
+	if (ret)
+		goto out_free_ctx;
+
+	d_instantiate(dentry, inode);
+	dget(dentry);
+	inc_nlink(dir);
+	inc_nlink(dentry->d_inode);
+	goto out;
+
+out_free_ctx:
+	spu_forget(ctx);
+	put_spu_context(ctx);
+out_iput:
+	iput(inode);
+out:
+	return ret;
+}
+
+static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt)
+{
+	int ret;
+	struct file *filp;
+
+	ret = get_unused_fd();
+	if (ret < 0) {
+		dput(dentry);
+		mntput(mnt);
+		goto out;
+	}
+
+	filp = dentry_open(dentry, mnt, O_RDONLY);
+	if (IS_ERR(filp)) {
+		put_unused_fd(ret);
+		ret = PTR_ERR(filp);
+		goto out;
+	}
+
+	filp->f_op = &spufs_context_fops;
+	fd_install(ret, filp);
+out:
+	return ret;
+}
+
+static struct spu_context *
+spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
+						struct file *filp)
+{
+	struct spu_context *tmp, *neighbor, *err;
+	int count, node;
+	int aff_supp;
+
+	aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
+					struct spu, cbe_list))->aff_list);
+
+	if (!aff_supp)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_GANG)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_AFFINITY_MEM &&
+	    gang->aff_ref_ctx &&
+	    gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
+		return ERR_PTR(-EEXIST);
+
+	if (gang->aff_flags & AFF_MERGED)
+		return ERR_PTR(-EBUSY);
+
+	neighbor = NULL;
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (!filp || filp->f_op != &spufs_context_fops)
+			return ERR_PTR(-EINVAL);
+
+		neighbor = get_spu_context(
+				SPUFS_I(filp->f_dentry->d_inode)->i_ctx);
+
+		if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
+		    !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
+		    !list_entry(neighbor->aff_list.next, struct spu_context,
+		    aff_list)->aff_head) {
+			err = ERR_PTR(-EEXIST);
+			goto out_put_neighbor;
+		}
+
+		if (gang != neighbor->gang) {
+			err = ERR_PTR(-EINVAL);
+			goto out_put_neighbor;
+		}
+
+		count = 1;
+		list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+			count++;
+		if (list_empty(&neighbor->aff_list))
+			count++;
+
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			if ((cbe_spu_info[node].n_spus - atomic_read(
+				&cbe_spu_info[node].reserved_spus)) >= count)
+				break;
+		}
+
+		if (node == MAX_NUMNODES) {
+			err = ERR_PTR(-EEXIST);
+			goto out_put_neighbor;
+		}
+	}
+
+	return neighbor;
+
+out_put_neighbor:
+	put_spu_context(neighbor);
+	return err;
+}
+
+static void
+spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
+					struct spu_context *neighbor)
+{
+	if (flags & SPU_CREATE_AFFINITY_MEM)
+		ctx->gang->aff_ref_ctx = ctx;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (list_empty(&neighbor->aff_list)) {
+			list_add_tail(&neighbor->aff_list,
+				&ctx->gang->aff_list_head);
+			neighbor->aff_head = 1;
+		}
+
+		if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
+		    || list_entry(neighbor->aff_list.next, struct spu_context,
+							aff_list)->aff_head) {
+			list_add(&ctx->aff_list, &neighbor->aff_list);
+		} else  {
+			list_add_tail(&ctx->aff_list, &neighbor->aff_list);
+			if (neighbor->aff_head) {
+				neighbor->aff_head = 0;
+				ctx->aff_head = 1;
+			}
+		}
+
+		if (!ctx->gang->aff_ref_ctx)
+			ctx->gang->aff_ref_ctx = ctx;
+	}
+}
+
+static int
+spufs_create_context(struct inode *inode, struct dentry *dentry,
+			struct vfsmount *mnt, int flags, int mode,
+			struct file *aff_filp)
+{
+	int ret;
+	int affinity;
+	struct spu_gang *gang;
+	struct spu_context *neighbor;
+
+	ret = -EPERM;
+	if ((flags & SPU_CREATE_NOSCHED) &&
+	    !capable(CAP_SYS_NICE))
+		goto out_unlock;
+
+	ret = -EINVAL;
+	if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
+	    == SPU_CREATE_ISOLATE)
+		goto out_unlock;
+
+	ret = -ENODEV;
+	if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
+		goto out_unlock;
+
+	gang = NULL;
+	neighbor = NULL;
+	affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
+	if (affinity) {
+		gang = SPUFS_I(inode)->i_gang;
+		ret = -EINVAL;
+		if (!gang)
+			goto out_unlock;
+		mutex_lock(&gang->aff_mutex);
+		neighbor = spufs_assert_affinity(flags, gang, aff_filp);
+		if (IS_ERR(neighbor)) {
+			ret = PTR_ERR(neighbor);
+			goto out_aff_unlock;
+		}
+	}
+
+	ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
+	if (ret)
+		goto out_aff_unlock;
+
+	if (affinity) {
+		spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
+								neighbor);
+		if (neighbor)
+			put_spu_context(neighbor);
+	}
+
+	/*
+	 * get references for dget and mntget, will be released
+	 * in error path of *_open().
+	 */
+	ret = spufs_context_open(dget(dentry), mntget(mnt));
+	if (ret < 0) {
+		WARN_ON(spufs_rmdir(inode, dentry));
+		if (affinity)
+			mutex_unlock(&gang->aff_mutex);
+		mutex_unlock(&inode->i_mutex);
+		spu_forget(SPUFS_I(dentry->d_inode)->i_ctx);
+		goto out;
+	}
+
+out_aff_unlock:
+	if (affinity)
+		mutex_unlock(&gang->aff_mutex);
+out_unlock:
+	mutex_unlock(&inode->i_mutex);
+out:
+	dput(dentry);
+	return ret;
+}
+
+static int
+spufs_mkgang(struct inode *dir, struct dentry *dentry, int mode)
+{
+	int ret;
+	struct inode *inode;
+	struct spu_gang *gang;
+
+	ret = -ENOSPC;
+	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+	if (!inode)
+		goto out;
+
+	ret = 0;
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		inode->i_mode &= S_ISGID;
+	}
+	gang = alloc_spu_gang();
+	SPUFS_I(inode)->i_ctx = NULL;
+	SPUFS_I(inode)->i_gang = gang;
+	if (!gang)
+		goto out_iput;
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+
+	d_instantiate(dentry, inode);
+	inc_nlink(dir);
+	inc_nlink(dentry->d_inode);
+	return ret;
+
+out_iput:
+	iput(inode);
+out:
+	return ret;
+}
+
+static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt)
+{
+	int ret;
+	struct file *filp;
+
+	ret = get_unused_fd();
+	if (ret < 0) {
+		dput(dentry);
+		mntput(mnt);
+		goto out;
+	}
+
+	filp = dentry_open(dentry, mnt, O_RDONLY);
+	if (IS_ERR(filp)) {
+		put_unused_fd(ret);
+		ret = PTR_ERR(filp);
+		goto out;
+	}
+
+	filp->f_op = &simple_dir_operations;
+	fd_install(ret, filp);
+out:
+	return ret;
+}
+
+static int spufs_create_gang(struct inode *inode,
+			struct dentry *dentry,
+			struct vfsmount *mnt, int mode)
+{
+	int ret;
+
+	ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
+	if (ret)
+		goto out;
+
+	/*
+	 * get references for dget and mntget, will be released
+	 * in error path of *_open().
+	 */
+	ret = spufs_gang_open(dget(dentry), mntget(mnt));
+	if (ret < 0) {
+		int err = simple_rmdir(inode, dentry);
+		WARN_ON(err);
+	}
+
+out:
+	mutex_unlock(&inode->i_mutex);
+	dput(dentry);
+	return ret;
+}
+
+
+static struct file_system_type spufs_type;
+
+long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
+							struct file *filp)
+{
+	struct dentry *dentry;
+	int ret;
+
+	ret = -EINVAL;
+	/* check if we are on spufs */
+	if (nd->path.dentry->d_sb->s_type != &spufs_type)
+		goto out;
+
+	/* don't accept undefined flags */
+	if (flags & (~SPU_CREATE_FLAG_ALL))
+		goto out;
+
+	/* only threads can be underneath a gang */
+	if (nd->path.dentry != nd->path.dentry->d_sb->s_root) {
+		if ((flags & SPU_CREATE_GANG) ||
+		    !SPUFS_I(nd->path.dentry->d_inode)->i_gang)
+			goto out;
+	}
+
+	dentry = lookup_create(nd, 1);
+	ret = PTR_ERR(dentry);
+	if (IS_ERR(dentry))
+		goto out_dir;
+
+	ret = -EEXIST;
+	if (dentry->d_inode)
+		goto out_dput;
+
+	mode &= ~current->fs->umask;
+
+	if (flags & SPU_CREATE_GANG)
+		ret = spufs_create_gang(nd->path.dentry->d_inode,
+					 dentry, nd->path.mnt, mode);
+	else
+		ret = spufs_create_context(nd->path.dentry->d_inode,
+					    dentry, nd->path.mnt, flags, mode,
+					    filp);
+	if (ret >= 0)
+		fsnotify_mkdir(nd->path.dentry->d_inode, dentry);
+	return ret;
+
+out_dput:
+	dput(dentry);
+out_dir:
+	mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
+out:
+	return ret;
+}
+
+/* File system initialization */
+enum {
+	Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
+};
+
+static const match_table_t spufs_tokens = {
+	{ Opt_uid,   "uid=%d" },
+	{ Opt_gid,   "gid=%d" },
+	{ Opt_mode,  "mode=%o" },
+	{ Opt_debug, "debug" },
+	{ Opt_err,    NULL  },
+};
+
+static int
+spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token, option;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, spufs_tokens, args);
+		switch (token) {
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+				return 0;
+			root->i_uid = option;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return 0;
+			root->i_gid = option;
+			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				return 0;
+			root->i_mode = option | S_IFDIR;
+			break;
+		case Opt_debug:
+			spufs_get_sb_info(sb)->debug = 1;
+			break;
+		default:
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static void spufs_exit_isolated_loader(void)
+{
+	free_pages((unsigned long) isolated_loader,
+			get_order(isolated_loader_size));
+}
+
+static void
+spufs_init_isolated_loader(void)
+{
+	struct device_node *dn;
+	const char *loader;
+	int size;
+
+	dn = of_find_node_by_path("/spu-isolation");
+	if (!dn)
+		return;
+
+	loader = of_get_property(dn, "loader", &size);
+	if (!loader)
+		return;
+
+	/* the loader must be align on a 16 byte boundary */
+	isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size));
+	if (!isolated_loader)
+		return;
+
+	isolated_loader_size = size;
+	memcpy(isolated_loader, loader, size);
+	printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
+}
+
+static int
+spufs_create_root(struct super_block *sb, void *data)
+{
+	struct inode *inode;
+	int ret;
+
+	ret = -ENODEV;
+	if (!spu_management_ops)
+		goto out;
+
+	ret = -ENOMEM;
+	inode = spufs_new_inode(sb, S_IFDIR | 0775);
+	if (!inode)
+		goto out;
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+	SPUFS_I(inode)->i_ctx = NULL;
+	inc_nlink(inode);
+
+	ret = -EINVAL;
+	if (!spufs_parse_options(sb, data, inode))
+		goto out_iput;
+
+	ret = -ENOMEM;
+	sb->s_root = d_alloc_root(inode);
+	if (!sb->s_root)
+		goto out_iput;
+
+	return 0;
+out_iput:
+	iput(inode);
+out:
+	return ret;
+}
+
+static int
+spufs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct spufs_sb_info *info;
+	static struct super_operations s_ops = {
+		.alloc_inode = spufs_alloc_inode,
+		.destroy_inode = spufs_destroy_inode,
+		.statfs = simple_statfs,
+		.delete_inode = spufs_delete_inode,
+		.drop_inode = generic_delete_inode,
+		.show_options = generic_show_options,
+	};
+
+	save_mount_options(sb, data);
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = SPUFS_MAGIC;
+	sb->s_op = &s_ops;
+	sb->s_fs_info = info;
+
+	return spufs_create_root(sb, data);
+}
+
+static int
+spufs_get_sb(struct file_system_type *fstype, int flags,
+		const char *name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_single(fstype, flags, data, spufs_fill_super, mnt);
+}
+
+static struct file_system_type spufs_type = {
+	.owner = THIS_MODULE,
+	.name = "spufs",
+	.get_sb = spufs_get_sb,
+	.kill_sb = kill_litter_super,
+};
+
+static int __init spufs_init(void)
+{
+	int ret;
+
+	ret = -ENODEV;
+	if (!spu_management_ops)
+		goto out;
+
+	ret = -ENOMEM;
+	spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
+			sizeof(struct spufs_inode_info), 0,
+			SLAB_HWCACHE_ALIGN, spufs_init_once);
+
+	if (!spufs_inode_cache)
+		goto out;
+	ret = spu_sched_init();
+	if (ret)
+		goto out_cache;
+	ret = register_filesystem(&spufs_type);
+	if (ret)
+		goto out_sched;
+	ret = register_spu_syscalls(&spufs_calls);
+	if (ret)
+		goto out_fs;
+
+	spufs_init_isolated_loader();
+
+	return 0;
+
+out_fs:
+	unregister_filesystem(&spufs_type);
+out_sched:
+	spu_sched_exit();
+out_cache:
+	kmem_cache_destroy(spufs_inode_cache);
+out:
+	return ret;
+}
+module_init(spufs_init);
+
+static void __exit spufs_exit(void)
+{
+	spu_sched_exit();
+	spufs_exit_isolated_loader();
+	unregister_spu_syscalls(&spufs_calls);
+	unregister_filesystem(&spufs_type);
+	kmem_cache_destroy(spufs_inode_cache);
+}
+module_exit(spufs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
new file mode 100644
index 0000000..0e9f325
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,183 @@
+/*
+ * SPU local store allocation routines
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu.h>
+
+#include "spufs.h"
+
+static int spu_alloc_lscsa_std(struct spu_state *csa)
+{
+	struct spu_lscsa *lscsa;
+	unsigned char *p;
+
+	lscsa = vmalloc(sizeof(struct spu_lscsa));
+	if (!lscsa)
+		return -ENOMEM;
+	memset(lscsa, 0, sizeof(struct spu_lscsa));
+	csa->lscsa = lscsa;
+
+	/* Set LS pages reserved to allow for user-space mapping. */
+	for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		SetPageReserved(vmalloc_to_page(p));
+
+	return 0;
+}
+
+static void spu_free_lscsa_std(struct spu_state *csa)
+{
+	/* Clear reserved bit before vfree. */
+	unsigned char *p;
+
+	if (csa->lscsa == NULL)
+		return;
+
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		ClearPageReserved(vmalloc_to_page(p));
+
+	vfree(csa->lscsa);
+}
+
+#ifdef CONFIG_SPU_FS_64K_LS
+
+#define SPU_64K_PAGE_SHIFT	16
+#define SPU_64K_PAGE_ORDER	(SPU_64K_PAGE_SHIFT - PAGE_SHIFT)
+#define SPU_64K_PAGE_COUNT	(1ul << SPU_64K_PAGE_ORDER)
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+	struct page	**pgarray;
+	unsigned char	*p;
+	int		i, j, n_4k;
+
+	/* Check availability of 64K pages */
+	if (!spu_64k_pages_available())
+		goto fail;
+
+	csa->use_big_pages = 1;
+
+	pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n",
+		 csa);
+
+	/* First try to allocate our 64K pages. We need 5 of them
+	 * with the current implementation. In the future, we should try
+	 * to separate the lscsa with the actual local store image, thus
+	 * allowing us to require only 4 64K pages per context
+	 */
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) {
+		/* XXX This is likely to fail, we should use a special pool
+		 *     similiar to what hugetlbfs does.
+		 */
+		csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL,
+						  SPU_64K_PAGE_ORDER);
+		if (csa->lscsa_pages[i] == NULL)
+			goto fail;
+	}
+
+	pr_debug(" success ! creating vmap...\n");
+
+	/* Now we need to create a vmalloc mapping of these for the kernel
+	 * and SPU context switch code to use. Currently, we stick to a
+	 * normal kernel vmalloc mapping, which in our case will be 4K
+	 */
+	n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES;
+	pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL);
+	if (pgarray == NULL)
+		goto fail;
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+		for (j = 0; j < SPU_64K_PAGE_COUNT; j++)
+			/* We assume all the struct page's are contiguous
+			 * which should be hopefully the case for an order 4
+			 * allocation..
+			 */
+			pgarray[i * SPU_64K_PAGE_COUNT + j] =
+				csa->lscsa_pages[i] + j;
+	csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL);
+	kfree(pgarray);
+	if (csa->lscsa == NULL)
+		goto fail;
+
+	memset(csa->lscsa, 0, sizeof(struct spu_lscsa));
+
+	/* Set LS pages reserved to allow for user-space mapping.
+	 *
+	 * XXX isn't that a bit obsolete ? I think we should just
+	 * make sure the page count is high enough. Anyway, won't harm
+	 * for now
+	 */
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		SetPageReserved(vmalloc_to_page(p));
+
+	pr_debug(" all good !\n");
+
+	return 0;
+fail:
+	pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n");
+	spu_free_lscsa(csa);
+	return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+	unsigned char *p;
+	int i;
+
+	if (!csa->use_big_pages) {
+		spu_free_lscsa_std(csa);
+		return;
+	}
+	csa->use_big_pages = 0;
+
+	if (csa->lscsa == NULL)
+		goto free_pages;
+
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		ClearPageReserved(vmalloc_to_page(p));
+
+	vunmap(csa->lscsa);
+	csa->lscsa = NULL;
+
+ free_pages:
+
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+		if (csa->lscsa_pages[i])
+			__free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER);
+}
+
+#else /* CONFIG_SPU_FS_64K_LS */
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+	return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+	spu_free_lscsa_std(csa);
+}
+
+#endif /* !defined(CONFIG_SPU_FS_64K_LS) */
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
new file mode 100644
index 0000000..c58bd36
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -0,0 +1,451 @@
+#define DEBUG
+
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include "spufs.h"
+
+/* interrupt-level stop callback function. */
+void spufs_stop_callback(struct spu *spu, int irq)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	/*
+	 * It should be impossible to preempt a context while an exception
+	 * is being processed, since the context switch code is specially
+	 * coded to deal with interrupts ... But, just in case, sanity check
+	 * the context pointer.  It is OK to return doing nothing since
+	 * the exception will be regenerated when the context is resumed.
+	 */
+	if (ctx) {
+		/* Copy exception arguments into module specific structure */
+		switch(irq) {
+		case 0 :
+			ctx->csa.class_0_pending = spu->class_0_pending;
+			ctx->csa.class_0_dar = spu->class_0_dar;
+			break;
+		case 1 :
+			ctx->csa.class_1_dsisr = spu->class_1_dsisr;
+			ctx->csa.class_1_dar = spu->class_1_dar;
+			break;
+		case 2 :
+			break;
+		}
+
+		/* ensure that the exception status has hit memory before a
+		 * thread waiting on the context's stop queue is woken */
+		smp_wmb();
+
+		wake_up_all(&ctx->stop_wq);
+	}
+}
+
+int spu_stopped(struct spu_context *ctx, u32 *stat)
+{
+	u64 dsisr;
+	u32 stopped;
+
+	stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+		SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+
+top:
+	*stat = ctx->ops->status_read(ctx);
+	if (*stat & stopped) {
+		/*
+		 * If the spu hasn't finished stopping, we need to
+		 * re-read the register to get the stopped value.
+		 */
+		if (*stat & SPU_STATUS_RUNNING)
+			goto top;
+		return 1;
+	}
+
+	if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
+		return 1;
+
+	dsisr = ctx->csa.class_1_dsisr;
+	if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))
+		return 1;
+
+	if (ctx->csa.class_0_pending)
+		return 1;
+
+	return 0;
+}
+
+static int spu_setup_isolated(struct spu_context *ctx)
+{
+	int ret;
+	u64 __iomem *mfc_cntl;
+	u64 sr1;
+	u32 status;
+	unsigned long timeout;
+	const u32 status_loading = SPU_STATUS_RUNNING
+		| SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS;
+
+	ret = -ENODEV;
+	if (!isolated_loader)
+		goto out;
+
+	/*
+	 * We need to exclude userspace access to the context.
+	 *
+	 * To protect against memory access we invalidate all ptes
+	 * and make sure the pagefault handlers block on the mutex.
+	 */
+	spu_unmap_mappings(ctx);
+
+	mfc_cntl = &ctx->spu->priv2->mfc_control_RW;
+
+	/* purge the MFC DMA queue to ensure no spurious accesses before we
+	 * enter kernel mode */
+	timeout = jiffies + HZ;
+	out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST);
+	while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK)
+			!= MFC_CNTL_PURGE_DMA_COMPLETE) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n",
+					__func__);
+			ret = -EIO;
+			goto out;
+		}
+		cond_resched();
+	}
+
+	/* put the SPE in kernel mode to allow access to the loader */
+	sr1 = spu_mfc_sr1_get(ctx->spu);
+	sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+	/* start the loader */
+	ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32);
+	ctx->ops->signal2_write(ctx,
+			(unsigned long)isolated_loader & 0xffffffff);
+
+	ctx->ops->runcntl_write(ctx,
+			SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+
+	ret = 0;
+	timeout = jiffies + HZ;
+	while (((status = ctx->ops->status_read(ctx)) & status_loading) ==
+				status_loading) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout waiting for loader\n",
+					__func__);
+			ret = -EIO;
+			goto out_drop_priv;
+		}
+		cond_resched();
+	}
+
+	if (!(status & SPU_STATUS_RUNNING)) {
+		/* If isolated LOAD has failed: run SPU, we will get a stop-and
+		 * signal later. */
+		pr_debug("%s: isolated LOAD failed\n", __func__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+		ret = -EACCES;
+		goto out_drop_priv;
+	}
+
+	if (!(status & SPU_STATUS_ISOLATED_STATE)) {
+		/* This isn't allowed by the CBEA, but check anyway */
+		pr_debug("%s: SPU fell out of isolated mode?\n", __func__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP);
+		ret = -EINVAL;
+		goto out_drop_priv;
+	}
+
+out_drop_priv:
+	/* Finished accessing the loader. Drop kernel mode */
+	sr1 |= MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+out:
+	return ret;
+}
+
+static int spu_run_init(struct spu_context *ctx, u32 *npc)
+{
+	unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
+	int ret;
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+	/*
+	 * NOSCHED is synchronous scheduling with respect to the caller.
+	 * The caller waits for the context to be loaded.
+	 */
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		if (ctx->state == SPU_STATE_SAVED) {
+			ret = spu_activate(ctx, 0);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/*
+	 * Apply special setup as required.
+	 */
+	if (ctx->flags & SPU_CREATE_ISOLATE) {
+		if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) {
+			ret = spu_setup_isolated(ctx);
+			if (ret)
+				return ret;
+		}
+
+		/*
+		 * If userspace has set the runcntrl register (eg, to
+		 * issue an isolated exit), we need to re-set it here
+		 */
+		runcntl = ctx->ops->runcntl_read(ctx) &
+			(SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+		if (runcntl == 0)
+			runcntl = SPU_RUNCNTL_RUNNABLE;
+	} else {
+		unsigned long privcntl;
+
+		if (test_thread_flag(TIF_SINGLESTEP))
+			privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP;
+		else
+			privcntl = SPU_PRIVCNTL_MODE_NORMAL;
+
+		ctx->ops->privcntl_write(ctx, privcntl);
+		ctx->ops->npc_write(ctx, *npc);
+	}
+
+	ctx->ops->runcntl_write(ctx, runcntl);
+
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		spuctx_switch_state(ctx, SPU_UTIL_USER);
+	} else {
+
+		if (ctx->state == SPU_STATE_SAVED) {
+			ret = spu_activate(ctx, 0);
+			if (ret)
+				return ret;
+		} else {
+			spuctx_switch_state(ctx, SPU_UTIL_USER);
+		}
+	}
+
+	set_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+	return 0;
+}
+
+static int spu_run_fini(struct spu_context *ctx, u32 *npc,
+			       u32 *status)
+{
+	int ret = 0;
+
+	spu_del_from_rq(ctx);
+
+	*status = ctx->ops->status_read(ctx);
+	*npc = ctx->ops->npc_read(ctx);
+
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	clear_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+	spu_switch_log_notify(NULL, ctx, SWITCH_LOG_EXIT, *status);
+	spu_release(ctx);
+
+	if (signal_pending(current))
+		ret = -ERESTARTSYS;
+
+	return ret;
+}
+
+/*
+ * SPU syscall restarting is tricky because we violate the basic
+ * assumption that the signal handler is running on the interrupted
+ * thread. Here instead, the handler runs on PowerPC user space code,
+ * while the syscall was called from the SPU.
+ * This means we can only do a very rough approximation of POSIX
+ * signal semantics.
+ */
+static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret,
+			  unsigned int *npc)
+{
+	int ret;
+
+	switch (*spu_ret) {
+	case -ERESTARTSYS:
+	case -ERESTARTNOINTR:
+		/*
+		 * Enter the regular syscall restarting for
+		 * sys_spu_run, then restart the SPU syscall
+		 * callback.
+		 */
+		*npc -= 8;
+		ret = -ERESTARTSYS;
+		break;
+	case -ERESTARTNOHAND:
+	case -ERESTART_RESTARTBLOCK:
+		/*
+		 * Restart block is too hard for now, just return -EINTR
+		 * to the SPU.
+		 * ERESTARTNOHAND comes from sys_pause, we also return
+		 * -EINTR from there.
+		 * Assume that we need to be restarted ourselves though.
+		 */
+		*spu_ret = -EINTR;
+		ret = -ERESTARTSYS;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unexpected return code %ld\n",
+			__func__, *spu_ret);
+		ret = 0;
+	}
+	return ret;
+}
+
+static int spu_process_callback(struct spu_context *ctx)
+{
+	struct spu_syscall_block s;
+	u32 ls_pointer, npc;
+	void __iomem *ls;
+	long spu_ret;
+	int ret;
+
+	/* get syscall block from local store */
+	npc = ctx->ops->npc_read(ctx) & ~3;
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+	ls_pointer = in_be32(ls + npc);
+	if (ls_pointer > (LS_SIZE - sizeof(s)))
+		return -EFAULT;
+	memcpy_fromio(&s, ls + ls_pointer, sizeof(s));
+
+	/* do actual syscall without pinning the spu */
+	ret = 0;
+	spu_ret = -ENOSYS;
+	npc += 4;
+
+	if (s.nr_ret < __NR_syscalls) {
+		spu_release(ctx);
+		/* do actual system call from here */
+		spu_ret = spu_sys_callback(&s);
+		if (spu_ret <= -ERESTARTSYS) {
+			ret = spu_handle_restartsys(ctx, &spu_ret, &npc);
+		}
+		mutex_lock(&ctx->state_mutex);
+		if (ret == -ERESTARTSYS)
+			return ret;
+	}
+
+	/* need to re-get the ls, as it may have changed when we released the
+	 * spu */
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+
+	/* write result, jump over indirect pointer */
+	memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret));
+	ctx->ops->npc_write(ctx, npc);
+	ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+	return ret;
+}
+
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
+{
+	int ret;
+	struct spu *spu;
+	u32 status;
+
+	if (mutex_lock_interruptible(&ctx->run_mutex))
+		return -ERESTARTSYS;
+
+	ctx->event_return = 0;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out_unlock;
+
+	spu_enable_spu(ctx);
+
+	spu_update_sched_info(ctx);
+
+	ret = spu_run_init(ctx, npc);
+	if (ret) {
+		spu_release(ctx);
+		goto out;
+	}
+
+	do {
+		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
+		if (unlikely(ret)) {
+			/*
+			 * This is nasty: we need the state_mutex for all the
+			 * bookkeeping even if the syscall was interrupted by
+			 * a signal. ewww.
+			 */
+			mutex_lock(&ctx->state_mutex);
+			break;
+		}
+		spu = ctx->spu;
+		if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
+						&ctx->sched_flags))) {
+			if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
+				spu_switch_notify(spu, ctx);
+				continue;
+			}
+		}
+
+		spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+		if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+		    (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
+			ret = spu_process_callback(ctx);
+			if (ret)
+				break;
+			status &= ~SPU_STATUS_STOPPED_BY_STOP;
+		}
+		ret = spufs_handle_class1(ctx);
+		if (ret)
+			break;
+
+		ret = spufs_handle_class0(ctx);
+		if (ret)
+			break;
+
+		if (signal_pending(current))
+			ret = -ERESTARTSYS;
+	} while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
+				      SPU_STATUS_STOPPED_BY_HALT |
+				       SPU_STATUS_SINGLE_STEP)));
+
+	spu_disable_spu(ctx);
+	ret = spu_run_fini(ctx, npc, &status);
+	spu_yield(ctx);
+
+	if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	    (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
+		ctx->stats.libassist++;
+
+	if ((ret == 0) ||
+	    ((ret == -ERESTARTSYS) &&
+	     ((status & SPU_STATUS_STOPPED_BY_HALT) ||
+	      (status & SPU_STATUS_SINGLE_STEP) ||
+	      ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	       (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
+		ret = status;
+
+	/* Note: we don't need to force_sig SIGTRAP on single-step
+	 * since we have TIF_SINGLESTEP set, thus the kernel will do
+	 * it upon return from the syscall anyawy
+	 */
+	if (unlikely(status & SPU_STATUS_SINGLE_STEP))
+		ret = -ERESTARTSYS;
+
+	else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP)
+	    && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) {
+		force_sig(SIGTRAP, current);
+		ret = -ERESTARTSYS;
+	}
+
+out:
+	*event = ctx->event_return;
+out_unlock:
+	mutex_unlock(&ctx->run_mutex);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
new file mode 100644
index 0000000..2ad914c
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -0,0 +1,1173 @@
+/* sched.c - SPU scheduler.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * 2006-03-31	NUMA domains added.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/completion.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/numa.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/pid_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/marker.h>
+
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_priv1.h>
+#include "spufs.h"
+
+struct spu_prio_array {
+	DECLARE_BITMAP(bitmap, MAX_PRIO);
+	struct list_head runq[MAX_PRIO];
+	spinlock_t runq_lock;
+	int nr_waiting;
+};
+
+static unsigned long spu_avenrun[3];
+static struct spu_prio_array *spu_prio;
+static struct task_struct *spusched_task;
+static struct timer_list spusched_timer;
+static struct timer_list spuloadavg_timer;
+
+/*
+ * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
+ */
+#define NORMAL_PRIO		120
+
+/*
+ * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
+ * tick for every 10 CPU scheduler ticks.
+ */
+#define SPUSCHED_TICK		(10)
+
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is
+ * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs.
+ */
+#define MIN_SPU_TIMESLICE	max(5 * HZ / (1000 * SPUSCHED_TICK), 1)
+#define DEF_SPU_TIMESLICE	(100 * HZ / (1000 * SPUSCHED_TICK))
+
+#define MAX_USER_PRIO		(MAX_PRIO - MAX_RT_PRIO)
+#define SCALE_PRIO(x, prio) \
+	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)
+
+/*
+ * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
+ * [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+void spu_set_timeslice(struct spu_context *ctx)
+{
+	if (ctx->prio < NORMAL_PRIO)
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio);
+	else
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
+}
+
+/*
+ * Update scheduling information from the owning thread.
+ */
+void __spu_update_sched_info(struct spu_context *ctx)
+{
+	/*
+	 * assert that the context is not on the runqueue, so it is safe
+	 * to change its scheduling parameters.
+	 */
+	BUG_ON(!list_empty(&ctx->rq));
+
+	/*
+	 * 32-Bit assignments are atomic on powerpc, and we don't care about
+	 * memory ordering here because retrieving the controlling thread is
+	 * per definition racy.
+	 */
+	ctx->tid = current->pid;
+
+	/*
+	 * We do our own priority calculations, so we normally want
+	 * ->static_prio to start with. Unfortunately this field
+	 * contains junk for threads with a realtime scheduling
+	 * policy so we have to look at ->prio in this case.
+	 */
+	if (rt_prio(current->prio))
+		ctx->prio = current->prio;
+	else
+		ctx->prio = current->static_prio;
+	ctx->policy = current->policy;
+
+	/*
+	 * TO DO: the context may be loaded, so we may need to activate
+	 * it again on a different node. But it shouldn't hurt anything
+	 * to update its parameters, because we know that the scheduler
+	 * is not actively looking at this field, since it is not on the
+	 * runqueue. The context will be rescheduled on the proper node
+	 * if it is timesliced or preempted.
+	 */
+	ctx->cpus_allowed = current->cpus_allowed;
+
+	/* Save the current cpu id for spu interrupt routing. */
+	ctx->last_ran = raw_smp_processor_id();
+}
+
+void spu_update_sched_info(struct spu_context *ctx)
+{
+	int node;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		node = ctx->spu->node;
+
+		/*
+		 * Take list_mutex to sync with find_victim().
+		 */
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		__spu_update_sched_info(ctx);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	} else {
+		__spu_update_sched_info(ctx);
+	}
+}
+
+static int __node_allowed(struct spu_context *ctx, int node)
+{
+	if (nr_cpus_node(node)) {
+		cpumask_t mask = node_to_cpumask(node);
+
+		if (cpus_intersects(mask, ctx->cpus_allowed))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int node_allowed(struct spu_context *ctx, int node)
+{
+	int rval;
+
+	spin_lock(&spu_prio->runq_lock);
+	rval = __node_allowed(ctx, node);
+	spin_unlock(&spu_prio->runq_lock);
+
+	return rval;
+}
+
+void do_notify_spus_active(void)
+{
+	int node;
+
+	/*
+	 * Wake up the active spu_contexts.
+	 *
+	 * When the awakened processes see their "notify_active" flag is set,
+	 * they will call spu_switch_notify().
+	 */
+	for_each_online_node(node) {
+		struct spu *spu;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->alloc_state != SPU_FREE) {
+				struct spu_context *ctx = spu->ctx;
+				set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+					&ctx->sched_flags);
+				mb();
+				wake_up_all(&ctx->stop_wq);
+			}
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+}
+
+/**
+ * spu_bind_context - bind spu context to physical spu
+ * @spu:	physical spu to bind to
+ * @ctx:	context to bind
+ */
+static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
+{
+	spu_context_trace(spu_bind_context__enter, ctx, spu);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
+
+	ctx->stats.slb_flt_base = spu->stats.slb_flt;
+	ctx->stats.class2_intr_base = spu->stats.class2_intr;
+
+	spu_associate_mm(spu, ctx->owner);
+
+	spin_lock_irq(&spu->register_lock);
+	spu->ctx = ctx;
+	spu->flags = 0;
+	ctx->spu = spu;
+	ctx->ops = &spu_hw_ops;
+	spu->pid = current->pid;
+	spu->tgid = current->tgid;
+	spu->ibox_callback = spufs_ibox_callback;
+	spu->wbox_callback = spufs_wbox_callback;
+	spu->stop_callback = spufs_stop_callback;
+	spu->mfc_callback = spufs_mfc_callback;
+	spin_unlock_irq(&spu->register_lock);
+
+	spu_unmap_mappings(ctx);
+
+	spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0);
+	spu_restore(&ctx->csa, spu);
+	spu->timestamp = jiffies;
+	spu_switch_notify(spu, ctx);
+	ctx->state = SPU_STATE_RUNNABLE;
+
+	spuctx_switch_state(ctx, SPU_UTIL_USER);
+}
+
+/*
+ * Must be used with the list_mutex held.
+ */
+static inline int sched_spu(struct spu *spu)
+{
+	BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
+	return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
+}
+
+static void aff_merge_remaining_ctxs(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+
+	list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
+		if (list_empty(&ctx->aff_list))
+			list_add(&ctx->aff_list, &gang->aff_list_head);
+	}
+	gang->aff_flags |= AFF_MERGED;
+}
+
+static void aff_set_offsets(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+	int offset;
+
+	offset = -1;
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset--;
+	}
+
+	offset = 0;
+	list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset++;
+	}
+
+	gang->aff_flags |= AFF_OFFSETS_SET;
+}
+
+static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
+		 int group_size, int lowest_offset)
+{
+	struct spu *spu;
+	int node, n;
+
+	/*
+	 * TODO: A better algorithm could be used to find a good spu to be
+	 *       used as reference location for the ctxs chain.
+	 */
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		/*
+		 * "available_spus" counts how many spus are not potentially
+		 * going to be used by other affinity gangs whose reference
+		 * context is already in place. Although this code seeks to
+		 * avoid having affinity gangs with a summed amount of
+		 * contexts bigger than the amount of spus in the node,
+		 * this may happen sporadically. In this case, available_spus
+		 * becomes negative, which is harmless.
+		 */
+		int available_spus;
+
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		available_spus = 0;
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->ctx && spu->ctx->gang && !spu->ctx->aff_offset
+					&& spu->ctx->gang->aff_ref_spu)
+				available_spus -= spu->ctx->gang->contexts;
+			available_spus++;
+		}
+		if (available_spus < ctx->gang->contexts) {
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+			continue;
+		}
+
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if ((!mem_aff || spu->has_mem_affinity) &&
+							sched_spu(spu)) {
+				mutex_unlock(&cbe_spu_info[node].list_mutex);
+				return spu;
+			}
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+	return NULL;
+}
+
+static void aff_set_ref_point_location(struct spu_gang *gang)
+{
+	int mem_aff, gs, lowest_offset;
+	struct spu_context *ctx;
+	struct spu *tmp;
+
+	mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
+	lowest_offset = 0;
+	gs = 0;
+
+	list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+		gs++;
+
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		lowest_offset = ctx->aff_offset;
+	}
+
+	gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs,
+							lowest_offset);
+}
+
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
+{
+	struct spu *spu;
+
+	spu = NULL;
+	if (offset >= 0) {
+		list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+			BUG_ON(spu->node != node);
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset--;
+		}
+	} else {
+		list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+			BUG_ON(spu->node != node);
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset++;
+		}
+	}
+
+	return spu;
+}
+
+/*
+ * affinity_check is called each time a context is going to be scheduled.
+ * It returns the spu ptr on which the context must run.
+ */
+static int has_affinity(struct spu_context *ctx)
+{
+	struct spu_gang *gang = ctx->gang;
+
+	if (list_empty(&ctx->aff_list))
+		return 0;
+
+	if (atomic_read(&ctx->gang->aff_sched_count) == 0)
+		ctx->gang->aff_ref_spu = NULL;
+
+	if (!gang->aff_ref_spu) {
+		if (!(gang->aff_flags & AFF_MERGED))
+			aff_merge_remaining_ctxs(gang);
+		if (!(gang->aff_flags & AFF_OFFSETS_SET))
+			aff_set_offsets(gang);
+		aff_set_ref_point_location(gang);
+	}
+
+	return gang->aff_ref_spu != NULL;
+}
+
+/**
+ * spu_unbind_context - unbind spu context from physical spu
+ * @spu:	physical spu to unbind from
+ * @ctx:	context to unbind
+ */
+static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
+{
+	u32 status;
+
+	spu_context_trace(spu_unbind_context__enter, ctx, spu);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ 	if (spu->ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
+
+	if (ctx->gang)
+		/*
+		 * If ctx->gang->aff_sched_count is positive, SPU affinity is
+		 * being considered in this gang. Using atomic_dec_if_positive
+		 * allow us to skip an explicit check for affinity in this gang
+		 */
+		atomic_dec_if_positive(&ctx->gang->aff_sched_count);
+
+	spu_switch_notify(spu, NULL);
+	spu_unmap_mappings(ctx);
+	spu_save(&ctx->csa, spu);
+	spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0);
+
+	spin_lock_irq(&spu->register_lock);
+	spu->timestamp = jiffies;
+	ctx->state = SPU_STATE_SAVED;
+	spu->ibox_callback = NULL;
+	spu->wbox_callback = NULL;
+	spu->stop_callback = NULL;
+	spu->mfc_callback = NULL;
+	spu->pid = 0;
+	spu->tgid = 0;
+	ctx->ops = &spu_backing_ops;
+	spu->flags = 0;
+	spu->ctx = NULL;
+	spin_unlock_irq(&spu->register_lock);
+
+	spu_associate_mm(spu, NULL);
+
+	ctx->stats.slb_flt +=
+		(spu->stats.slb_flt - ctx->stats.slb_flt_base);
+	ctx->stats.class2_intr +=
+		(spu->stats.class2_intr - ctx->stats.class2_intr_base);
+
+	/* This maps the underlying spu state to idle */
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	ctx->spu = NULL;
+
+	if (spu_stopped(ctx, &status))
+		wake_up_all(&ctx->stop_wq);
+}
+
+/**
+ * spu_add_to_rq - add a context to the runqueue
+ * @ctx:       context to add
+ */
+static void __spu_add_to_rq(struct spu_context *ctx)
+{
+	/*
+	 * Unfortunately this code path can be called from multiple threads
+	 * on behalf of a single context due to the way the problem state
+	 * mmap support works.
+	 *
+	 * Fortunately we need to wake up all these threads at the same time
+	 * and can simply skip the runqueue addition for every but the first
+	 * thread getting into this codepath.
+	 *
+	 * It's still quite hacky, and long-term we should proxy all other
+	 * threads through the owner thread so that spu_run is in control
+	 * of all the scheduling activity for a given context.
+	 */
+	if (list_empty(&ctx->rq)) {
+		list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
+		set_bit(ctx->prio, spu_prio->bitmap);
+		if (!spu_prio->nr_waiting++)
+			__mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	}
+}
+
+static void spu_add_to_rq(struct spu_context *ctx)
+{
+	spin_lock(&spu_prio->runq_lock);
+	__spu_add_to_rq(ctx);
+	spin_unlock(&spu_prio->runq_lock);
+}
+
+static void __spu_del_from_rq(struct spu_context *ctx)
+{
+	int prio = ctx->prio;
+
+	if (!list_empty(&ctx->rq)) {
+		if (!--spu_prio->nr_waiting)
+			del_timer(&spusched_timer);
+		list_del_init(&ctx->rq);
+
+		if (list_empty(&spu_prio->runq[prio]))
+			clear_bit(prio, spu_prio->bitmap);
+	}
+}
+
+void spu_del_from_rq(struct spu_context *ctx)
+{
+	spin_lock(&spu_prio->runq_lock);
+	__spu_del_from_rq(ctx);
+	spin_unlock(&spu_prio->runq_lock);
+}
+
+static void spu_prio_wait(struct spu_context *ctx)
+{
+	DEFINE_WAIT(wait);
+
+	/*
+	 * The caller must explicitly wait for a context to be loaded
+	 * if the nosched flag is set.  If NOSCHED is not set, the caller
+	 * queues the context and waits for an spu event or error.
+	 */
+	BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED));
+
+	spin_lock(&spu_prio->runq_lock);
+	prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
+	if (!signal_pending(current)) {
+		__spu_add_to_rq(ctx);
+		spin_unlock(&spu_prio->runq_lock);
+		mutex_unlock(&ctx->state_mutex);
+		schedule();
+		mutex_lock(&ctx->state_mutex);
+		spin_lock(&spu_prio->runq_lock);
+		__spu_del_from_rq(ctx);
+	}
+	spin_unlock(&spu_prio->runq_lock);
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&ctx->stop_wq, &wait);
+}
+
+static struct spu *spu_get_idle(struct spu_context *ctx)
+{
+	struct spu *spu, *aff_ref_spu;
+	int node, n;
+
+	spu_context_nospu_trace(spu_get_idle__enter, ctx);
+
+	if (ctx->gang) {
+		mutex_lock(&ctx->gang->aff_mutex);
+		if (has_affinity(ctx)) {
+			aff_ref_spu = ctx->gang->aff_ref_spu;
+			atomic_inc(&ctx->gang->aff_sched_count);
+			mutex_unlock(&ctx->gang->aff_mutex);
+			node = aff_ref_spu->node;
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			spu = ctx_location(aff_ref_spu, ctx->aff_offset, node);
+			if (spu && spu->alloc_state == SPU_FREE)
+				goto found;
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			atomic_dec(&ctx->gang->aff_sched_count);
+			goto not_found;
+		}
+		mutex_unlock(&ctx->gang->aff_mutex);
+	}
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->alloc_state == SPU_FREE)
+				goto found;
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+
+ not_found:
+	spu_context_nospu_trace(spu_get_idle__not_found, ctx);
+	return NULL;
+
+ found:
+	spu->alloc_state = SPU_USED;
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	spu_context_trace(spu_get_idle__found, ctx, spu);
+	spu_init_channels(spu);
+	return spu;
+}
+
+/**
+ * find_victim - find a lower priority context to preempt
+ * @ctx:	canidate context for running
+ *
+ * Returns the freed physical spu to run the new context on.
+ */
+static struct spu *find_victim(struct spu_context *ctx)
+{
+	struct spu_context *victim = NULL;
+	struct spu *spu;
+	int node, n;
+
+	spu_context_nospu_trace(spu_find_victim__enter, ctx);
+
+	/*
+	 * Look for a possible preemption candidate on the local node first.
+	 * If there is no candidate look at the other nodes.  This isn't
+	 * exactly fair, but so far the whole spu scheduler tries to keep
+	 * a strong node affinity.  We might want to fine-tune this in
+	 * the future.
+	 */
+ restart:
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			struct spu_context *tmp = spu->ctx;
+
+			if (tmp && tmp->prio > ctx->prio &&
+			    !(tmp->flags & SPU_CREATE_NOSCHED) &&
+			    (!victim || tmp->prio > victim->prio)) {
+				victim = spu->ctx;
+			}
+		}
+		if (victim)
+			get_spu_context(victim);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+		if (victim) {
+			/*
+			 * This nests ctx->state_mutex, but we always lock
+			 * higher priority contexts before lower priority
+			 * ones, so this is safe until we introduce
+			 * priority inheritance schemes.
+			 *
+			 * XXX if the highest priority context is locked,
+			 * this can loop a long time.  Might be better to
+			 * look at another context or give up after X retries.
+			 */
+			if (!mutex_trylock(&victim->state_mutex)) {
+				put_spu_context(victim);
+				victim = NULL;
+				goto restart;
+			}
+
+			spu = victim->spu;
+			if (!spu || victim->prio <= ctx->prio) {
+				/*
+				 * This race can happen because we've dropped
+				 * the active list mutex.  Not a problem, just
+				 * restart the search.
+				 */
+				mutex_unlock(&victim->state_mutex);
+				put_spu_context(victim);
+				victim = NULL;
+				goto restart;
+			}
+
+			spu_context_trace(__spu_deactivate__unload, ctx, spu);
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			cbe_spu_info[node].nr_active--;
+			spu_unbind_context(spu, victim);
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			victim->stats.invol_ctx_switch++;
+			spu->stats.invol_ctx_switch++;
+			if (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags))
+				spu_add_to_rq(victim);
+
+			mutex_unlock(&victim->state_mutex);
+			put_spu_context(victim);
+
+			return spu;
+		}
+	}
+
+	return NULL;
+}
+
+static void __spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+	int node = spu->node;
+	int success = 0;
+
+	spu_set_timeslice(ctx);
+
+	mutex_lock(&cbe_spu_info[node].list_mutex);
+	if (spu->ctx == NULL) {
+		spu_bind_context(spu, ctx);
+		cbe_spu_info[node].nr_active++;
+		spu->alloc_state = SPU_USED;
+		success = 1;
+	}
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+	if (success)
+		wake_up_all(&ctx->run_wq);
+	else
+		spu_add_to_rq(ctx);
+}
+
+static void spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+	/* not a candidate for interruptible because it's called either
+	   from the scheduler thread or from spu_deactivate */
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->state == SPU_STATE_SAVED)
+		__spu_schedule(spu, ctx);
+	spu_release(ctx);
+}
+
+/**
+ * spu_unschedule - remove a context from a spu, and possibly release it.
+ * @spu:	The SPU to unschedule from
+ * @ctx:	The context currently scheduled on the SPU
+ * @free_spu	Whether to free the SPU for other contexts
+ *
+ * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the
+ * SPU is made available for other contexts (ie, may be returned by
+ * spu_get_idle). If this is zero, the caller is expected to schedule another
+ * context to this spu.
+ *
+ * Should be called with ctx->state_mutex held.
+ */
+static void spu_unschedule(struct spu *spu, struct spu_context *ctx,
+		int free_spu)
+{
+	int node = spu->node;
+
+	mutex_lock(&cbe_spu_info[node].list_mutex);
+	cbe_spu_info[node].nr_active--;
+	if (free_spu)
+		spu->alloc_state = SPU_FREE;
+	spu_unbind_context(spu, ctx);
+	ctx->stats.invol_ctx_switch++;
+	spu->stats.invol_ctx_switch++;
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+}
+
+/**
+ * spu_activate - find a free spu for a context and execute it
+ * @ctx:	spu context to schedule
+ * @flags:	flags (currently ignored)
+ *
+ * Tries to find a free spu to run @ctx.  If no free spu is available
+ * add the context to the runqueue so it gets woken up once an spu
+ * is available.
+ */
+int spu_activate(struct spu_context *ctx, unsigned long flags)
+{
+	struct spu *spu;
+
+	/*
+	 * If there are multiple threads waiting for a single context
+	 * only one actually binds the context while the others will
+	 * only be able to acquire the state_mutex once the context
+	 * already is in runnable state.
+	 */
+	if (ctx->spu)
+		return 0;
+
+spu_activate_top:
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+
+	spu = spu_get_idle(ctx);
+	/*
+	 * If this is a realtime thread we try to get it running by
+	 * preempting a lower priority thread.
+	 */
+	if (!spu && rt_prio(ctx->prio))
+		spu = find_victim(ctx);
+	if (spu) {
+		unsigned long runcntl;
+
+		runcntl = ctx->ops->runcntl_read(ctx);
+		__spu_schedule(spu, ctx);
+		if (runcntl & SPU_RUNCNTL_RUNNABLE)
+			spuctx_switch_state(ctx, SPU_UTIL_USER);
+
+		return 0;
+	}
+
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		spu_prio_wait(ctx);
+		goto spu_activate_top;
+	}
+
+	spu_add_to_rq(ctx);
+
+	return 0;
+}
+
+/**
+ * grab_runnable_context - try to find a runnable context
+ *
+ * Remove the highest priority context on the runqueue and return it
+ * to the caller.  Returns %NULL if no runnable context was found.
+ */
+static struct spu_context *grab_runnable_context(int prio, int node)
+{
+	struct spu_context *ctx;
+	int best;
+
+	spin_lock(&spu_prio->runq_lock);
+	best = find_first_bit(spu_prio->bitmap, prio);
+	while (best < prio) {
+		struct list_head *rq = &spu_prio->runq[best];
+
+		list_for_each_entry(ctx, rq, rq) {
+			/* XXX(hch): check for affinity here aswell */
+			if (__node_allowed(ctx, node)) {
+				__spu_del_from_rq(ctx);
+				goto found;
+			}
+		}
+		best++;
+	}
+	ctx = NULL;
+ found:
+	spin_unlock(&spu_prio->runq_lock);
+	return ctx;
+}
+
+static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_context *new = NULL;
+
+	if (spu) {
+		new = grab_runnable_context(max_prio, spu->node);
+		if (new || force) {
+			spu_unschedule(spu, ctx, new == NULL);
+			if (new) {
+				if (new->flags & SPU_CREATE_NOSCHED)
+					wake_up(&new->stop_wq);
+				else {
+					spu_release(ctx);
+					spu_schedule(spu, new);
+					/* this one can't easily be made
+					   interruptible */
+					mutex_lock(&ctx->state_mutex);
+				}
+			}
+		}
+	}
+
+	return new != NULL;
+}
+
+/**
+ * spu_deactivate - unbind a context from it's physical spu
+ * @ctx:	spu context to unbind
+ *
+ * Unbind @ctx from the physical spu it is running on and schedule
+ * the highest priority context to run on the freed physical spu.
+ */
+void spu_deactivate(struct spu_context *ctx)
+{
+	spu_context_nospu_trace(spu_deactivate__enter, ctx);
+	__spu_deactivate(ctx, 1, MAX_PRIO);
+}
+
+/**
+ * spu_yield -	yield a physical spu if others are waiting
+ * @ctx:	spu context to yield
+ *
+ * Check if there is a higher priority context waiting and if yes
+ * unbind @ctx from the physical spu and schedule the highest
+ * priority context to run on the freed physical spu instead.
+ */
+void spu_yield(struct spu_context *ctx)
+{
+	spu_context_nospu_trace(spu_yield__enter, ctx);
+	if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
+		mutex_lock(&ctx->state_mutex);
+		__spu_deactivate(ctx, 0, MAX_PRIO);
+		mutex_unlock(&ctx->state_mutex);
+	}
+}
+
+static noinline void spusched_tick(struct spu_context *ctx)
+{
+	struct spu_context *new = NULL;
+	struct spu *spu = NULL;
+
+	if (spu_acquire(ctx))
+		BUG();	/* a kernel thread never has signals pending */
+
+	if (ctx->state != SPU_STATE_RUNNABLE)
+		goto out;
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		goto out;
+	if (ctx->policy == SCHED_FIFO)
+		goto out;
+
+	if (--ctx->time_slice && test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+		goto out;
+
+	spu = ctx->spu;
+
+	spu_context_trace(spusched_tick__preempt, ctx, spu);
+
+	new = grab_runnable_context(ctx->prio + 1, spu->node);
+	if (new) {
+		spu_unschedule(spu, ctx, 0);
+		if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+			spu_add_to_rq(ctx);
+	} else {
+		spu_context_nospu_trace(spusched_tick__newslice, ctx);
+		if (!ctx->time_slice)
+			ctx->time_slice++;
+	}
+out:
+	spu_release(ctx);
+
+	if (new)
+		spu_schedule(spu, new);
+}
+
+/**
+ * count_active_contexts - count nr of active tasks
+ *
+ * Return the number of tasks currently running or waiting to run.
+ *
+ * Note that we don't take runq_lock / list_mutex here.  Reading
+ * a single 32bit value is atomic on powerpc, and we don't care
+ * about memory ordering issues here.
+ */
+static unsigned long count_active_contexts(void)
+{
+	int nr_active = 0, node;
+
+	for (node = 0; node < MAX_NUMNODES; node++)
+		nr_active += cbe_spu_info[node].nr_active;
+	nr_active += spu_prio->nr_waiting;
+
+	return nr_active;
+}
+
+/**
+ * spu_calc_load - update the avenrun load estimates.
+ *
+ * No locking against reading these values from userspace, as for
+ * the CPU loadavg code.
+ */
+static void spu_calc_load(void)
+{
+	unsigned long active_tasks; /* fixed-point */
+
+	active_tasks = count_active_contexts() * FIXED_1;
+	CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks);
+	CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks);
+	CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
+}
+
+static void spusched_wake(unsigned long data)
+{
+	mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	wake_up_process(spusched_task);
+}
+
+static void spuloadavg_wake(unsigned long data)
+{
+	mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ);
+	spu_calc_load();
+}
+
+static int spusched_thread(void *unused)
+{
+	struct spu *spu;
+	int node;
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			struct mutex *mtx = &cbe_spu_info[node].list_mutex;
+
+			mutex_lock(mtx);
+			list_for_each_entry(spu, &cbe_spu_info[node].spus,
+					cbe_list) {
+				struct spu_context *ctx = spu->ctx;
+
+				if (ctx) {
+					get_spu_context(ctx);
+					mutex_unlock(mtx);
+					spusched_tick(ctx);
+					mutex_lock(mtx);
+					put_spu_context(ctx);
+				}
+			}
+			mutex_unlock(mtx);
+		}
+	}
+
+	return 0;
+}
+
+void spuctx_switch_state(struct spu_context *ctx,
+		enum spu_utilization_state new_state)
+{
+	unsigned long long curtime;
+	signed long long delta;
+	struct timespec ts;
+	struct spu *spu;
+	enum spu_utilization_state old_state;
+	int node;
+
+	ktime_get_ts(&ts);
+	curtime = timespec_to_ns(&ts);
+	delta = curtime - ctx->stats.tstamp;
+
+	WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+	WARN_ON(delta < 0);
+
+	spu = ctx->spu;
+	old_state = ctx->stats.util_state;
+	ctx->stats.util_state = new_state;
+	ctx->stats.tstamp = curtime;
+
+	/*
+	 * Update the physical SPU utilization statistics.
+	 */
+	if (spu) {
+		ctx->stats.times[old_state] += delta;
+		spu->stats.times[old_state] += delta;
+		spu->stats.util_state = new_state;
+		spu->stats.tstamp = curtime;
+		node = spu->node;
+		if (old_state == SPU_UTIL_USER)
+			atomic_dec(&cbe_spu_info[node].busy_spus);
+		if (new_state == SPU_UTIL_USER)
+			atomic_inc(&cbe_spu_info[node].busy_spus);
+	}
+}
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static int show_spu_loadavg(struct seq_file *s, void *private)
+{
+	int a, b, c;
+
+	a = spu_avenrun[0] + (FIXED_1/200);
+	b = spu_avenrun[1] + (FIXED_1/200);
+	c = spu_avenrun[2] + (FIXED_1/200);
+
+	/*
+	 * Note that last_pid doesn't really make much sense for the
+	 * SPU loadavg (it even seems very odd on the CPU side...),
+	 * but we include it here to have a 100% compatible interface.
+	 */
+	seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+		LOAD_INT(a), LOAD_FRAC(a),
+		LOAD_INT(b), LOAD_FRAC(b),
+		LOAD_INT(c), LOAD_FRAC(c),
+		count_active_contexts(),
+		atomic_read(&nr_spu_contexts),
+		current->nsproxy->pid_ns->last_pid);
+	return 0;
+}
+
+static int spu_loadavg_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_spu_loadavg, NULL);
+}
+
+static const struct file_operations spu_loadavg_fops = {
+	.open		= spu_loadavg_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int __init spu_sched_init(void)
+{
+	struct proc_dir_entry *entry;
+	int err = -ENOMEM, i;
+
+	spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
+	if (!spu_prio)
+		goto out;
+
+	for (i = 0; i < MAX_PRIO; i++) {
+		INIT_LIST_HEAD(&spu_prio->runq[i]);
+		__clear_bit(i, spu_prio->bitmap);
+	}
+	spin_lock_init(&spu_prio->runq_lock);
+
+	setup_timer(&spusched_timer, spusched_wake, 0);
+	setup_timer(&spuloadavg_timer, spuloadavg_wake, 0);
+
+	spusched_task = kthread_run(spusched_thread, NULL, "spusched");
+	if (IS_ERR(spusched_task)) {
+		err = PTR_ERR(spusched_task);
+		goto out_free_spu_prio;
+	}
+
+	mod_timer(&spuloadavg_timer, 0);
+
+	entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops);
+	if (!entry)
+		goto out_stop_kthread;
+
+	pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
+			SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
+	return 0;
+
+ out_stop_kthread:
+	kthread_stop(spusched_task);
+ out_free_spu_prio:
+	kfree(spu_prio);
+ out:
+	return err;
+}
+
+void spu_sched_exit(void)
+{
+	struct spu *spu;
+	int node;
+
+	remove_proc_entry("spu_loadavg", NULL);
+
+	del_timer_sync(&spusched_timer);
+	del_timer_sync(&spuloadavg_timer);
+	kthread_stop(spusched_task);
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+			if (spu->alloc_state != SPU_FREE)
+				spu->alloc_state = SPU_FREE;
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+	kfree(spu_prio);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c
new file mode 100644
index 0000000..21a9c95
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c
@@ -0,0 +1,336 @@
+/*
+ * spu_restore.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context restore sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE                 0x40000	/* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+#define BR_INSTR		0x327fff80	/* br -4         */
+#define NOP_INSTR		0x40200000	/* nop           */
+#define HEQ_INSTR		0x7b000000	/* heq $0, $0    */
+#define STOP_INSTR		0x00000000	/* stop 0x0      */
+#define ILLEGAL_INSTR		0x00800000	/* illegal instr */
+#define RESTORE_COMPLETE	0x00003ffc	/* stop 0x3ffc   */
+
+static inline void fetch_regs_from_mem(addr64 lscsa_ea)
+{
+	unsigned int ls = (unsigned int)&regs_spill[0];
+	unsigned int size = sizeof(regs_spill);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x40;	/* GET */
+
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_upper_240kb(addr64 lscsa_ea)
+{
+	unsigned int ls = 16384;
+	unsigned int list = (unsigned int)&dma_list[0];
+	unsigned int size = sizeof(dma_list);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x44;	/* GETL */
+
+	/* Restore, Step 4:
+	 *    Enqueue the GETL command (tag 0) to the MFC SPU command
+	 *    queue to transfer the upper 240 kb of LS from CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, list);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_decr(void)
+{
+	unsigned int offset;
+	unsigned int decr_running;
+	unsigned int decr;
+
+	/* Restore, Step 6(moved):
+	 *    If the LSCSA "decrementer running" flag is set
+	 *    then write the SPU_WrDec channel with the
+	 *    decrementer value from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(decr_status);
+	decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
+	if (decr_running) {
+		offset = LSCSA_QW_OFFSET(decr);
+		decr = regs_spill[offset].slot[0];
+		spu_writech(SPU_WrDec, decr);
+	}
+}
+
+static inline void write_ppu_mb(void)
+{
+	unsigned int offset;
+	unsigned int data;
+
+	/* Restore, Step 11:
+	 *    Write the MFC_WrOut_MB channel with the PPU_MB
+	 *    data from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(ppu_mb);
+	data = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrOutMbox, data);
+}
+
+static inline void write_ppuint_mb(void)
+{
+	unsigned int offset;
+	unsigned int data;
+
+	/* Restore, Step 12:
+	 *    Write the MFC_WrInt_MB channel with the PPUINT_MB
+	 *    data from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(ppuint_mb);
+	data = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrOutIntrMbox, data);
+}
+
+static inline void restore_fpcr(void)
+{
+	unsigned int offset;
+	vector unsigned int fpcr;
+
+	/* Restore, Step 13:
+	 *    Restore the floating-point status and control
+	 *    register from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(fpcr);
+	fpcr = regs_spill[offset].v;
+	spu_mtfpscr(fpcr);
+}
+
+static inline void restore_srr0(void)
+{
+	unsigned int offset;
+	unsigned int srr0;
+
+	/* Restore, Step 14:
+	 *    Restore the SPU SRR0 data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(srr0);
+	srr0 = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrSRR0, srr0);
+}
+
+static inline void restore_event_mask(void)
+{
+	unsigned int offset;
+	unsigned int event_mask;
+
+	/* Restore, Step 15:
+	 *    Restore the SPU_RdEventMsk data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(event_mask);
+	event_mask = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void restore_tag_mask(void)
+{
+	unsigned int offset;
+	unsigned int tag_mask;
+
+	/* Restore, Step 16:
+	 *    Restore the SPU_RdTagMsk data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(tag_mask);
+	tag_mask = regs_spill[offset].slot[0];
+	spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void restore_complete(void)
+{
+	extern void exit_fini(void);
+	unsigned int *exit_instrs = (unsigned int *)exit_fini;
+	unsigned int offset;
+	unsigned int stopped_status;
+	unsigned int stopped_code;
+
+	/* Restore, Step 18:
+	 *    Issue a stop-and-signal instruction with
+	 *    "good context restore" signal value.
+	 *
+	 * Restore, Step 19:
+	 *    There may be additional instructions placed
+	 *    here by the PPE Sequence for SPU Context
+	 *    Restore in order to restore the correct
+	 *    "stopped state".
+	 *
+	 *    This step is handled here by analyzing the
+	 *    LSCSA.stopped_status and then modifying the
+	 *    exit() function to behave appropriately.
+	 */
+
+	offset = LSCSA_QW_OFFSET(stopped_status);
+	stopped_status = regs_spill[offset].slot[0];
+	stopped_code = regs_spill[offset].slot[1];
+
+	switch (stopped_status) {
+	case SPU_STOPPED_STATUS_P_I:
+		/* SPU_Status[P,I]=1.  Add illegal instruction
+		 * followed by stop-and-signal instruction after
+		 * end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_P_H:
+		/* SPU_Status[P,H]=1.  Add 'heq $0, $0' followed
+		 * by stop-and-signal instruction after end of
+		 * restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = HEQ_INSTR;
+		exit_instrs[2] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_S_P:
+		/* SPU_Status[S,P]=1.  Add nop instruction
+		 * followed by 'br -4' after end of restore
+		 * code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = STOP_INSTR | stopped_code;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_S_I:
+		/* SPU_Status[S,I]=1.  Add  illegal instruction
+		 * followed by 'br -4' after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_I:
+		/* SPU_Status[I]=1. Add illegal instruction followed
+		 * by infinite loop after end of restore sequence.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_S:
+		/* SPU_Status[S]=1. Add two 'nop' instructions. */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = NOP_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_H:
+		/* SPU_Status[H]=1. Add 'heq $0, $0' instruction
+		 * after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = HEQ_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_P:
+		/* SPU_Status[P]=1. Add stop-and-signal instruction
+		 * after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_R:
+		/* SPU_Status[I,S,H,P,R]=0. Add infinite loop. */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = NOP_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	default:
+		/* SPU_Status[R]=1. No additonal instructions. */
+		break;
+	}
+	spu_sync();
+}
+
+/**
+ * main - entry point for SPU-side context restore.
+ *
+ * This code deviates from the documented sequence in the
+ * following aspects:
+ *
+ *	1. The EA for LSCSA is passed from PPE in the
+ *	   signal notification channels.
+ *	2. The register spill area is pulled by SPU
+ *	   into LS, rather than pushed by PPE.
+ *	3. All 128 registers are restored by exit().
+ *	4. The exit() function is modified at run
+ *	   time in order to properly restore the
+ *	   SPU_Status register.
+ */
+int main()
+{
+	addr64 lscsa_ea;
+
+	lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+	lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+	fetch_regs_from_mem(lscsa_ea);
+
+	set_event_mask();		/* Step 1.  */
+	set_tag_mask();			/* Step 2.  */
+	build_dma_list(lscsa_ea);	/* Step 3.  */
+	restore_upper_240kb(lscsa_ea);	/* Step 4.  */
+					/* Step 5: done by 'exit'. */
+	enqueue_putllc(lscsa_ea);	/* Step 7. */
+	set_tag_update();		/* Step 8. */
+	read_tag_status();		/* Step 9. */
+	restore_decr();			/* moved Step 6. */
+	read_llar_status();		/* Step 10. */
+	write_ppu_mb();			/* Step 11. */
+	write_ppuint_mb();		/* Step 12. */
+	restore_fpcr();			/* Step 13. */
+	restore_srr0();			/* Step 14. */
+	restore_event_mask();		/* Step 15. */
+	restore_tag_mask();		/* Step 16. */
+					/* Step 17. done by 'exit'. */
+	restore_complete();		/* Step 18. */
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
new file mode 100644
index 0000000..2905949
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
@@ -0,0 +1,116 @@
+/*
+ * crt0_r.S: Entry function for SPU-side context restore.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry and exit function for SPU-side of the context restore
+ * sequence.  Sets up an initial stack frame, then branches to
+ * 'main'.  On return, restores all 128 registers from the LSCSA
+ * and exits.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+	/* Initialize the stack pointer to point to 16368
+	 * (16kb-16). The back chain pointer is initialized
+	 * to NULL.
+	 */
+	il      $0, 0
+	il      $SP, 16368
+	stqd    $0, 0($SP)
+
+	/* Allocate a minimum stack frame for the called main.
+	 * This is needed so that main has a place to save the
+	 * link register when it calls another function.
+	 */
+	stqd    $SP, -160($SP)
+	ai      $SP, $SP, -160
+
+	/* Call the program's main function. */
+	brsl    $0, main
+
+.global exit
+.global	_exit
+exit:
+_exit:
+	/* SPU Context Restore, Step 5: Restore the remaining 112 GPRs. */
+	ila     $3, regs_spill + 256
+restore_regs:
+	lqr     $4, restore_reg_insts
+restore_reg_loop:
+	ai      $4, $4, 4
+	.balignl 16, 0x40200000
+restore_reg_insts:       /* must be quad-word aligned. */
+	lqd     $16, 0($3)
+	lqd     $17, 16($3)
+	lqd     $18, 32($3)
+	lqd     $19, 48($3)
+	andi    $5, $4, 0x7F
+	stqr    $4, restore_reg_insts
+	ai      $3, $3, 64
+	brnz    $5, restore_reg_loop
+
+	/* SPU Context Restore Step 17: Restore the first 16 GPRs. */
+	lqa $0, regs_spill + 0
+	lqa $1, regs_spill + 16
+	lqa $2, regs_spill + 32
+	lqa $3, regs_spill + 48
+	lqa $4, regs_spill + 64
+	lqa $5, regs_spill + 80
+	lqa $6, regs_spill + 96
+	lqa $7, regs_spill + 112
+	lqa $8, regs_spill + 128
+	lqa $9, regs_spill + 144
+	lqa $10, regs_spill + 160
+	lqa $11, regs_spill + 176
+	lqa $12, regs_spill + 192
+	lqa $13, regs_spill + 208
+	lqa $14, regs_spill + 224
+	lqa $15, regs_spill + 240
+
+	/* Under normal circumstances, the 'exit' function
+	 * terminates with 'stop SPU_RESTORE_COMPLETE',
+	 * indicating that the SPU-side restore code has
+	 * completed.
+	 *
+	 * However it is possible that instructions immediately
+	 * following the 'stop 0x3ffc' have been modified at run
+	 * time so as to recreate the exact SPU_Status settings
+	 * from the application, e.g. illegal instruciton, halt,
+	 * etc.
+	 */
+.global exit_fini
+.global	_exit_fini
+exit_fini:
+_exit_fini:
+	stop	SPU_RESTORE_COMPLETE
+	stop	0
+	stop	0
+	stop	0
+
+	/* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
new file mode 100644
index 0000000..f383b02
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
@@ -0,0 +1,935 @@
+/*
+ * spu_restore_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_restore.c.
+ * Do not edit!
+ */
+static unsigned int spu_restore_code[]  __attribute__((__aligned__(128))) = {
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33001180,
+0x42034003,
+0x33800284,
+0x1c010204,
+0x40200000,
+0x40200000,
+0x40200000,
+0x34000190,
+0x34004191,
+0x34008192,
+0x3400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffa85,
+0x3080b000,
+0x3080b201,
+0x3080b402,
+0x3080b603,
+0x3080b804,
+0x3080ba05,
+0x3080bc06,
+0x3080be07,
+0x3080c008,
+0x3080c209,
+0x3080c40a,
+0x3080c60b,
+0x3080c80c,
+0x3080ca0d,
+0x3080cc0e,
+0x3080ce0f,
+0x00003ffc,
+0x00000000,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb0a14103,
+0x01a00204,
+0x3ec10083,
+0x4202c002,
+0xb0a14203,
+0x21a00802,
+0x3fbf028a,
+0x3f20050a,
+0x3fbe0502,
+0x3fe30102,
+0x21a00882,
+0x3f82028b,
+0x3fe3058b,
+0x3fbf0584,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x04000203,
+0x21a00903,
+0x40848002,
+0x21a00982,
+0x40800003,
+0x21a00a03,
+0x40802002,
+0x21a00a82,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x10002612,
+0x42a00003,
+0x42074006,
+0x1800c204,
+0x40a00008,
+0x40800789,
+0x1c010305,
+0x34000302,
+0x1cffc489,
+0x3ec00303,
+0x3ec00287,
+0xb0408403,
+0x24000302,
+0x34000282,
+0x1c020306,
+0xb0408207,
+0x18020204,
+0x24000282,
+0x217ffa09,
+0x04000402,
+0x21a00802,
+0x3fbe0504,
+0x3fe30204,
+0x21a00884,
+0x42074002,
+0x21a00902,
+0x40803c03,
+0x21a00983,
+0x04000485,
+0x21a00a05,
+0x40802202,
+0x21a00a82,
+0x21a00805,
+0x21a00884,
+0x3fbf0582,
+0x3f200102,
+0x3fbe0102,
+0x3fe30102,
+0x21a00902,
+0x40804003,
+0x21a00983,
+0x21a00a05,
+0x40805a02,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x30809c03,
+0x34000182,
+0x14004102,
+0x21002082,
+0x01a00d82,
+0x3080a003,
+0x34000182,
+0x21a00e02,
+0x3080a203,
+0x34000182,
+0x21a00f02,
+0x3080a403,
+0x34000182,
+0x77400100,
+0x3080a603,
+0x34000182,
+0x21a00702,
+0x3080a803,
+0x34000182,
+0x21a00082,
+0x3080aa03,
+0x34000182,
+0x21a00b02,
+0x4020007f,
+0x3080ae02,
+0x42004805,
+0x3080ac04,
+0x34000103,
+0x34000202,
+0x1cffc183,
+0x3b810106,
+0x0f608184,
+0x42013802,
+0x5c020183,
+0x38810102,
+0x3b810102,
+0x21000e83,
+0x4020007f,
+0x35000100,
+0x00000470,
+0x000002f8,
+0x00000430,
+0x00000360,
+0x000002f8,
+0x000003c8,
+0x000004a8,
+0x00000298,
+0x00000360,
+0x00200000,
+0x409ffe02,
+0x30801203,
+0x40800208,
+0x3ec40084,
+0x40800407,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38820282,
+0x41004003,
+0xb0408189,
+0x28820282,
+0x3881c282,
+0xb0408304,
+0x2881c282,
+0x00400000,
+0x40800003,
+0x35000000,
+0x30809e03,
+0x34000182,
+0x21a00382,
+0x4020007f,
+0x327fde00,
+0x409ffe02,
+0x30801203,
+0x40800206,
+0x3ec40084,
+0x40800407,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38818282,
+0x41004003,
+0xb040818a,
+0x10005b0b,
+0x41201003,
+0x28818282,
+0x3881c282,
+0xb0408184,
+0x41193f83,
+0x60ffc003,
+0x2881c282,
+0x38820282,
+0xb0408189,
+0x28820282,
+0x327fef80,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40086,
+0x4120100b,
+0x10005b14,
+0x40800404,
+0x3ac1c289,
+0x40800608,
+0xb060c106,
+0x3ac10286,
+0x3ac2028a,
+0x20801203,
+0x3881c282,
+0x41193f83,
+0x60ffc003,
+0xb0408589,
+0x2881c282,
+0x38810282,
+0xb0408586,
+0x28810282,
+0x38820282,
+0xb040818a,
+0x28820282,
+0x4020007f,
+0x327fe280,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40084,
+0x40800408,
+0x10005b14,
+0x40800609,
+0x3ac1c28a,
+0x3ac2028b,
+0xb060c104,
+0x3ac24284,
+0x20801203,
+0x41201003,
+0x3881c282,
+0xb040830a,
+0x2881c282,
+0x38820282,
+0xb040818b,
+0x41193f83,
+0x60ffc003,
+0x28820282,
+0x38824282,
+0xb0408184,
+0x28824282,
+0x4020007f,
+0x327fd580,
+0x409ffe02,
+0x1000658e,
+0x40800206,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38818282,
+0x4020007f,
+0x327fd800,
+0x409ffe03,
+0x30801202,
+0x40800207,
+0x3ec40084,
+0x10005b09,
+0x3ac1c288,
+0xb0408184,
+0x4020007f,
+0x4020007f,
+0x20801202,
+0x3881c282,
+0xb0408308,
+0x2881c282,
+0x327fc680,
+0x409ffe02,
+0x1000588b,
+0x40800208,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38820282,
+0x327fbd80,
+0x00200000,
+0x00000da0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d90,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000db0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dc0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d80,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000df0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000de0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dd0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e04,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e00,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save.c b/arch/powerpc/platforms/cell/spufs/spu_save.c
new file mode 100644
index 0000000..ae95cc1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save.c
@@ -0,0 +1,195 @@
+/*
+ * spu_save.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context save sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE                 0x40000	/* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+static inline void save_event_mask(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 2:
+	 *    Read the SPU_RdEventMsk channel and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(event_mask);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdEventMask);
+}
+
+static inline void save_tag_mask(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 3:
+	 *    Read the SPU_RdTagMsk channel and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(tag_mask);
+	regs_spill[offset].slot[0] = spu_readch(MFC_RdTagMask);
+}
+
+static inline void save_upper_240kb(addr64 lscsa_ea)
+{
+	unsigned int ls = 16384;
+	unsigned int list = (unsigned int)&dma_list[0];
+	unsigned int size = sizeof(dma_list);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x24;	/* PUTL */
+
+	/* Save, Step 7:
+	 *    Enqueue the PUTL command (tag 0) to the MFC SPU command
+	 *    queue to transfer the remaining 240 kb of LS to CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, list);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_fpcr(void)
+{
+	// vector unsigned int fpcr;
+	unsigned int offset;
+
+	/* Save, Step 9:
+	 *    Issue the floating-point status and control register
+	 *    read instruction, and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(fpcr);
+	regs_spill[offset].v = spu_mffpscr();
+}
+
+static inline void save_decr(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 10:
+	 *    Read and save the SPU_RdDec channel data to
+	 *    the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(decr);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdDec);
+}
+
+static inline void save_srr0(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 11:
+	 *    Read and save the SPU_WSRR0 channel data to
+	 *    the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(srr0);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdSRR0);
+}
+
+static inline void spill_regs_to_mem(addr64 lscsa_ea)
+{
+	unsigned int ls = (unsigned int)&regs_spill[0];
+	unsigned int size = sizeof(regs_spill);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x20;	/* PUT */
+
+	/* Save, Step 13:
+	 *    Enqueue a PUT command (tag 0) to send the LSCSA
+	 *    to the CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void enqueue_sync(addr64 lscsa_ea)
+{
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0xCC;
+
+	/* Save, Step 14:
+	 *    Enqueue an MFC_SYNC command (tag 0).
+	 */
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_complete(void)
+{
+	/* Save, Step 18:
+	 *    Issue a stop-and-signal instruction indicating
+	 *    "save complete".  Note: This function will not
+	 *    return!!
+	 */
+	spu_stop(SPU_SAVE_COMPLETE);
+}
+
+/**
+ * main - entry point for SPU-side context save.
+ *
+ * This code deviates from the documented sequence as follows:
+ *
+ *      1. The EA for LSCSA is passed from PPE in the
+ *         signal notification channels.
+ *      2. All 128 registers are saved by crt0.o.
+ */
+int main()
+{
+	addr64 lscsa_ea;
+
+	lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+	lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+
+	/* Step 1: done by exit(). */
+	save_event_mask();	/* Step 2.  */
+	save_tag_mask();	/* Step 3.  */
+	set_event_mask();	/* Step 4.  */
+	set_tag_mask();		/* Step 5.  */
+	build_dma_list(lscsa_ea);	/* Step 6.  */
+	save_upper_240kb(lscsa_ea);	/* Step 7.  */
+	/* Step 8: done by exit(). */
+	save_fpcr();		/* Step 9.  */
+	save_decr();		/* Step 10. */
+	save_srr0();		/* Step 11. */
+	enqueue_putllc(lscsa_ea);	/* Step 12. */
+	spill_regs_to_mem(lscsa_ea);	/* Step 13. */
+	enqueue_sync(lscsa_ea);	/* Step 14. */
+	set_tag_update();	/* Step 15. */
+	read_tag_status();	/* Step 16. */
+	read_llar_status();	/* Step 17. */
+	save_complete();	/* Step 18. */
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
new file mode 100644
index 0000000..6659d6a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
@@ -0,0 +1,102 @@
+/*
+ * crt0_s.S: Entry function for SPU-side context save.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry function for SPU-side of the context save sequence.
+ * Saves all 128 GPRs, sets up an initial stack frame, then
+ * branches to 'main'.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+	/* SPU Context Save Step 1: Save the first 16 GPRs. */
+	stqa $0, regs_spill + 0
+	stqa $1, regs_spill + 16
+	stqa $2, regs_spill + 32
+	stqa $3, regs_spill + 48
+	stqa $4, regs_spill + 64
+	stqa $5, regs_spill + 80
+	stqa $6, regs_spill + 96
+	stqa $7, regs_spill + 112
+	stqa $8, regs_spill + 128
+	stqa $9, regs_spill + 144
+	stqa $10, regs_spill + 160
+	stqa $11, regs_spill + 176
+	stqa $12, regs_spill + 192
+	stqa $13, regs_spill + 208
+	stqa $14, regs_spill + 224
+	stqa $15, regs_spill + 240
+
+	/* SPU Context Save, Step 8: Save the remaining 112 GPRs. */
+	ila     $3, regs_spill + 256
+save_regs:
+	lqr     $4, save_reg_insts
+save_reg_loop:
+	ai      $4, $4, 4
+	.balignl 16, 0x40200000
+save_reg_insts:       /* must be quad-word aligned. */
+	stqd    $16, 0($3)
+	stqd    $17, 16($3)
+	stqd    $18, 32($3)
+	stqd    $19, 48($3)
+	andi    $5, $4, 0x7F
+	stqr    $4, save_reg_insts
+	ai      $3, $3, 64
+	brnz    $5, save_reg_loop
+
+	/* Initialize the stack pointer to point to 16368
+	 * (16kb-16). The back chain pointer is initialized
+	 * to NULL.
+	 */
+	il	$0, 0
+	il	$SP, 16368
+	stqd	$0, 0($SP)
+
+	/* Allocate a minimum stack frame for the called main.
+	 * This is needed so that main has a place to save the
+	 * link register when it calls another function.
+	 */
+	stqd	$SP, -160($SP)
+	ai	$SP, $SP, -160
+
+	/* Call the program's main function. */
+	brsl	$0, main
+
+	/* In this case main should not return; if it does
+	 * there has been an error in the sequence.  Execute
+	 * stop-and-signal with code=0.
+	 */
+.global exit
+.global	_exit
+exit:
+_exit:
+	stop	0x0
+
+	/* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
+
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
new file mode 100644
index 0000000..b9f81ac
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
@@ -0,0 +1,743 @@
+/*
+ * spu_save_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_save.c.
+ * Do not edit!
+ */
+static unsigned int spu_save_code[]  __attribute__((__aligned__(128))) = {
+0x20805000,
+0x20805201,
+0x20805402,
+0x20805603,
+0x20805804,
+0x20805a05,
+0x20805c06,
+0x20805e07,
+0x20806008,
+0x20806209,
+0x2080640a,
+0x2080660b,
+0x2080680c,
+0x20806a0d,
+0x20806c0e,
+0x20806e0f,
+0x4201c003,
+0x33800184,
+0x1c010204,
+0x40200000,
+0x24000190,
+0x24004191,
+0x24008192,
+0x2400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffb85,
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33000180,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb1c38103,
+0x01a00204,
+0x3ec10082,
+0x4201400d,
+0xb1c38202,
+0x01a00583,
+0x34218682,
+0x3ed80684,
+0xb0408184,
+0x24218682,
+0x01a00603,
+0x00200000,
+0x34214682,
+0x3ed40684,
+0xb0408184,
+0x40800003,
+0x24214682,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x4020007f,
+0x1000251e,
+0x42a00002,
+0x32800008,
+0x4205c00c,
+0x00200000,
+0x40a0000b,
+0x3f82070f,
+0x4080020a,
+0x40800709,
+0x3fe3078f,
+0x3fbf0783,
+0x3f200183,
+0x3fbe0183,
+0x3fe30187,
+0x18008387,
+0x4205c002,
+0x3ac30404,
+0x1cffc489,
+0x00200000,
+0x18008403,
+0x38830402,
+0x4cffc486,
+0x3ac28185,
+0xb0408584,
+0x28830402,
+0x1c020408,
+0x38828182,
+0xb0408385,
+0x1802c387,
+0x28828182,
+0x217ff886,
+0x04000582,
+0x32800007,
+0x21a00802,
+0x3fbf0705,
+0x3f200285,
+0x3fbe0285,
+0x3fe30285,
+0x21a00885,
+0x04000603,
+0x21a00903,
+0x40803c02,
+0x21a00982,
+0x04000386,
+0x21a00a06,
+0x40801202,
+0x21a00a82,
+0x73000003,
+0x24200683,
+0x01a00404,
+0x00200000,
+0x34204682,
+0x3ec40683,
+0xb0408203,
+0x24204682,
+0x01a00783,
+0x00200000,
+0x3421c682,
+0x3edc0684,
+0xb0408184,
+0x2421c682,
+0x21a00806,
+0x21a00885,
+0x3fbf0784,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x21a00904,
+0x40804002,
+0x21a00982,
+0x21a00a06,
+0x40805a02,
+0x21a00a82,
+0x04000683,
+0x21a00803,
+0x21a00885,
+0x21a00904,
+0x40848002,
+0x21a00982,
+0x21a00a06,
+0x40801002,
+0x21a00a82,
+0x21a00a06,
+0x40806602,
+0x00200000,
+0x35800009,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x01a00d83,
+0x00003ffb,
+0x40800003,
+0x4020007f,
+0x35000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/spu_utils.h b/arch/powerpc/platforms/cell/spufs/spu_utils.h
new file mode 100644
index 0000000..58359fe
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_utils.h
@@ -0,0 +1,160 @@
+/*
+ * utils.h: Utilities for SPU-side of the context switch operation.
+ *
+ * (C) Copyright IBM 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _SPU_CONTEXT_UTILS_H_
+#define _SPU_CONTEXT_UTILS_H_
+
+/*
+ * 64-bit safe EA.
+ */
+typedef union {
+	unsigned long long ull;
+	unsigned int ui[2];
+} addr64;
+
+/*
+ * 128-bit register template.
+ */
+typedef union {
+	unsigned int slot[4];
+	vector unsigned int v;
+} spu_reg128v;
+
+/*
+ * DMA list structure.
+ */
+struct dma_list_elem {
+	unsigned int size;
+	unsigned int ea_low;
+};
+
+/*
+ * Declare storage for 8-byte aligned DMA list.
+ */
+struct dma_list_elem dma_list[15] __attribute__ ((aligned(8)));
+
+/*
+ * External definition for storage
+ * declared in crt0.
+ */
+extern spu_reg128v regs_spill[NR_SPU_SPILL_REGS];
+
+/*
+ * Compute LSCSA byte offset for a given field.
+ */
+static struct spu_lscsa *dummy = (struct spu_lscsa *)0;
+#define LSCSA_BYTE_OFFSET(_field)  \
+	((char *)(&(dummy->_field)) - (char *)(&(dummy->gprs[0].slot[0])))
+#define LSCSA_QW_OFFSET(_field)  (LSCSA_BYTE_OFFSET(_field) >> 4)
+
+static inline void set_event_mask(void)
+{
+	unsigned int event_mask = 0;
+
+	/* Save, Step 4:
+	 * Restore, Step 1:
+	 *    Set the SPU_RdEventMsk channel to zero to mask
+	 *    all events.
+	 */
+	spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void set_tag_mask(void)
+{
+	unsigned int tag_mask = 1;
+
+	/* Save, Step 5:
+	 * Restore, Step 2:
+	 *    Set the SPU_WrTagMsk channel to '01' to unmask
+	 *    only tag group 0.
+	 */
+	spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void build_dma_list(addr64 lscsa_ea)
+{
+	unsigned int ea_low;
+	int i;
+
+	/* Save, Step 6:
+	 * Restore, Step 3:
+	 *    Update the effective address for the CSA in the
+	 *    pre-canned DMA-list in local storage.
+	 */
+	ea_low = lscsa_ea.ui[1];
+	ea_low += LSCSA_BYTE_OFFSET(ls[16384]);
+
+	for (i = 0; i < 15; i++, ea_low += 16384) {
+		dma_list[i].size = 16384;
+		dma_list[i].ea_low = ea_low;
+	}
+}
+
+static inline void enqueue_putllc(addr64 lscsa_ea)
+{
+	unsigned int ls = 0;
+	unsigned int size = 128;
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0xB4;	/* PUTLLC */
+
+	/* Save, Step 12:
+	 * Restore, Step 7:
+	 *    Send a PUTLLC (tag 0) command to the MFC using
+	 *    an effective address in the CSA in order to
+	 *    remove any possible lock-line reservation.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void set_tag_update(void)
+{
+	unsigned int update_any = 1;
+
+	/* Save, Step 15:
+	 * Restore, Step 8:
+	 *    Write the MFC_TagUpdate channel with '01'.
+	 */
+	spu_writech(MFC_WrTagUpdate, update_any);
+}
+
+static inline void read_tag_status(void)
+{
+	/* Save, Step 16:
+	 * Restore, Step 9:
+	 *    Read the MFC_TagStat channel data.
+	 */
+	spu_readch(MFC_RdTagStat);
+}
+
+static inline void read_llar_status(void)
+{
+	/* Save, Step 17:
+	 * Restore, Step 10:
+	 *    Read the MFC_AtomicStat channel data.
+	 */
+	spu_readch(MFC_RdAtomicStat);
+}
+
+#endif				/* _SPU_CONTEXT_UTILS_H_ */
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
new file mode 100644
index 0000000..15c62d3
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -0,0 +1,381 @@
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef SPUFS_H
+#define SPUFS_H
+
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cpumask.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+
+#define SPUFS_PS_MAP_SIZE	0x20000
+#define SPUFS_MFC_MAP_SIZE	0x1000
+#define SPUFS_CNTL_MAP_SIZE	0x1000
+#define SPUFS_CNTL_MAP_SIZE	0x1000
+#define SPUFS_SIGNAL_MAP_SIZE	PAGE_SIZE
+#define SPUFS_MSS_MAP_SIZE	0x1000
+
+/* The magic number for our file system */
+enum {
+	SPUFS_MAGIC = 0x23c9b64e,
+};
+
+struct spu_context_ops;
+struct spu_gang;
+
+/* ctx->sched_flags */
+enum {
+	SPU_SCHED_NOTIFY_ACTIVE,
+	SPU_SCHED_WAS_ACTIVE,	/* was active upon spu_acquire_saved()  */
+	SPU_SCHED_SPU_RUN,	/* context is within spu_run */
+};
+
+enum {
+	SWITCH_LOG_BUFSIZE = 4096,
+};
+
+enum {
+	SWITCH_LOG_START,
+	SWITCH_LOG_STOP,
+	SWITCH_LOG_EXIT,
+};
+
+struct switch_log {
+	wait_queue_head_t	wait;
+	unsigned long		head;
+	unsigned long		tail;
+	struct switch_log_entry {
+		struct timespec	tstamp;
+		s32		spu_id;
+		u32		type;
+		u32		val;
+		u64		timebase;
+	} log[];
+};
+
+struct spu_context {
+	struct spu *spu;		  /* pointer to a physical SPU */
+	struct spu_state csa;		  /* SPU context save area. */
+	spinlock_t mmio_lock;		  /* protects mmio access */
+	struct address_space *local_store; /* local store mapping.  */
+	struct address_space *mfc;	   /* 'mfc' area mappings. */
+	struct address_space *cntl;	   /* 'control' area mappings. */
+	struct address_space *signal1;	   /* 'signal1' area mappings. */
+	struct address_space *signal2;	   /* 'signal2' area mappings. */
+	struct address_space *mss;	   /* 'mss' area mappings. */
+	struct address_space *psmap;	   /* 'psmap' area mappings. */
+	struct mutex mapping_lock;
+	u64 object_id;		   /* user space pointer for oprofile */
+
+	enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
+	struct mutex state_mutex;
+	struct mutex run_mutex;
+
+	struct mm_struct *owner;
+
+	struct kref kref;
+	wait_queue_head_t ibox_wq;
+	wait_queue_head_t wbox_wq;
+	wait_queue_head_t stop_wq;
+	wait_queue_head_t mfc_wq;
+	wait_queue_head_t run_wq;
+	struct fasync_struct *ibox_fasync;
+	struct fasync_struct *wbox_fasync;
+	struct fasync_struct *mfc_fasync;
+	u32 tagwait;
+	struct spu_context_ops *ops;
+	struct work_struct reap_work;
+	unsigned long flags;
+	unsigned long event_return;
+
+	struct list_head gang_list;
+	struct spu_gang *gang;
+	struct kref *prof_priv_kref;
+	void ( * prof_priv_release) (struct kref *kref);
+
+	/* owner thread */
+	pid_t tid;
+
+	/* scheduler fields */
+	struct list_head rq;
+	unsigned int time_slice;
+	unsigned long sched_flags;
+	cpumask_t cpus_allowed;
+	int policy;
+	int prio;
+	int last_ran;
+
+	/* statistics */
+	struct {
+		/* updates protected by ctx->state_mutex */
+		enum spu_utilization_state util_state;
+		unsigned long long tstamp;	/* time of last state switch */
+		unsigned long long times[SPU_UTIL_MAX];
+		unsigned long long vol_ctx_switch;
+		unsigned long long invol_ctx_switch;
+		unsigned long long min_flt;
+		unsigned long long maj_flt;
+		unsigned long long hash_flt;
+		unsigned long long slb_flt;
+		unsigned long long slb_flt_base; /* # at last ctx switch */
+		unsigned long long class2_intr;
+		unsigned long long class2_intr_base; /* # at last ctx switch */
+		unsigned long long libassist;
+	} stats;
+
+	/* context switch log */
+	struct switch_log *switch_log;
+
+	struct list_head aff_list;
+	int aff_head;
+	int aff_offset;
+};
+
+struct spu_gang {
+	struct list_head list;
+	struct mutex mutex;
+	struct kref kref;
+	int contexts;
+
+	struct spu_context *aff_ref_ctx;
+	struct list_head aff_list_head;
+	struct mutex aff_mutex;
+	int aff_flags;
+	struct spu *aff_ref_spu;
+	atomic_t aff_sched_count;
+};
+
+/* Flag bits for spu_gang aff_flags */
+#define AFF_OFFSETS_SET		1
+#define AFF_MERGED		2
+
+struct mfc_dma_command {
+	int32_t pad;	/* reserved */
+	uint32_t lsa;	/* local storage address */
+	uint64_t ea;	/* effective address */
+	uint16_t size;	/* transfer size */
+	uint16_t tag;	/* command tag */
+	uint16_t class;	/* class ID */
+	uint16_t cmd;	/* command opcode */
+};
+
+
+/* SPU context query/set operations. */
+struct spu_context_ops {
+	int (*mbox_read) (struct spu_context * ctx, u32 * data);
+	 u32(*mbox_stat_read) (struct spu_context * ctx);
+	unsigned int (*mbox_stat_poll)(struct spu_context *ctx,
+					unsigned int events);
+	int (*ibox_read) (struct spu_context * ctx, u32 * data);
+	int (*wbox_write) (struct spu_context * ctx, u32 data);
+	 u32(*signal1_read) (struct spu_context * ctx);
+	void (*signal1_write) (struct spu_context * ctx, u32 data);
+	 u32(*signal2_read) (struct spu_context * ctx);
+	void (*signal2_write) (struct spu_context * ctx, u32 data);
+	void (*signal1_type_set) (struct spu_context * ctx, u64 val);
+	 u64(*signal1_type_get) (struct spu_context * ctx);
+	void (*signal2_type_set) (struct spu_context * ctx, u64 val);
+	 u64(*signal2_type_get) (struct spu_context * ctx);
+	 u32(*npc_read) (struct spu_context * ctx);
+	void (*npc_write) (struct spu_context * ctx, u32 data);
+	 u32(*status_read) (struct spu_context * ctx);
+	char*(*get_ls) (struct spu_context * ctx);
+	void (*privcntl_write) (struct spu_context *ctx, u64 data);
+	 u32 (*runcntl_read) (struct spu_context * ctx);
+	void (*runcntl_write) (struct spu_context * ctx, u32 data);
+	void (*runcntl_stop) (struct spu_context * ctx);
+	void (*master_start) (struct spu_context * ctx);
+	void (*master_stop) (struct spu_context * ctx);
+	int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode);
+	u32 (*read_mfc_tagstatus)(struct spu_context * ctx);
+	u32 (*get_mfc_free_elements)(struct spu_context *ctx);
+	int (*send_mfc_command)(struct spu_context * ctx,
+				struct mfc_dma_command * cmd);
+	void (*dma_info_read) (struct spu_context * ctx,
+			       struct spu_dma_info * info);
+	void (*proxydma_info_read) (struct spu_context * ctx,
+				    struct spu_proxydma_info * info);
+	void (*restart_dma)(struct spu_context *ctx);
+};
+
+extern struct spu_context_ops spu_hw_ops;
+extern struct spu_context_ops spu_backing_ops;
+
+struct spufs_inode_info {
+	struct spu_context *i_ctx;
+	struct spu_gang *i_gang;
+	struct inode vfs_inode;
+	int i_openers;
+};
+#define SPUFS_I(inode) \
+	container_of(inode, struct spufs_inode_info, vfs_inode)
+
+struct spufs_tree_descr {
+	const char *name;
+	const struct file_operations *ops;
+	int mode;
+	size_t size;
+};
+
+extern struct spufs_tree_descr spufs_dir_contents[];
+extern struct spufs_tree_descr spufs_dir_nosched_contents[];
+extern struct spufs_tree_descr spufs_dir_debug_contents[];
+
+/* system call implementation */
+extern struct spufs_calls spufs_calls;
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
+long spufs_create(struct nameidata *nd, unsigned int flags,
+			mode_t mode, struct file *filp);
+/* ELF coredump callbacks for writing SPU ELF notes */
+extern int spufs_coredump_extra_notes_size(void);
+extern int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset);
+
+extern const struct file_operations spufs_context_fops;
+
+/* gang management */
+struct spu_gang *alloc_spu_gang(void);
+struct spu_gang *get_spu_gang(struct spu_gang *gang);
+int put_spu_gang(struct spu_gang *gang);
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
+
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+int spufs_handle_class0(struct spu_context *ctx);
+
+/* affinity */
+struct spu *affinity_check(struct spu_context *ctx);
+
+/* context management */
+extern atomic_t nr_spu_contexts;
+static inline int __must_check spu_acquire(struct spu_context *ctx)
+{
+	return mutex_lock_interruptible(&ctx->state_mutex);
+}
+
+static inline void spu_release(struct spu_context *ctx)
+{
+	mutex_unlock(&ctx->state_mutex);
+}
+
+struct spu_context * alloc_spu_context(struct spu_gang *gang);
+void destroy_spu_context(struct kref *kref);
+struct spu_context * get_spu_context(struct spu_context *ctx);
+int put_spu_context(struct spu_context *ctx);
+void spu_unmap_mappings(struct spu_context *ctx);
+
+void spu_forget(struct spu_context *ctx);
+int __must_check spu_acquire_saved(struct spu_context *ctx);
+void spu_release_saved(struct spu_context *ctx);
+
+int spu_stopped(struct spu_context *ctx, u32 * stat);
+void spu_del_from_rq(struct spu_context *ctx);
+int spu_activate(struct spu_context *ctx, unsigned long flags);
+void spu_deactivate(struct spu_context *ctx);
+void spu_yield(struct spu_context *ctx);
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+		u32 type, u32 val);
+void spu_set_timeslice(struct spu_context *ctx);
+void spu_update_sched_info(struct spu_context *ctx);
+void __spu_update_sched_info(struct spu_context *ctx);
+int __init spu_sched_init(void);
+void spu_sched_exit(void);
+
+extern char *isolated_loader;
+
+/*
+ * spufs_wait
+ *	Same as wait_event_interruptible(), except that here
+ *	we need to call spu_release(ctx) before sleeping, and
+ *	then spu_acquire(ctx) when awoken.
+ *
+ * 	Returns with state_mutex re-acquired when successfull or
+ * 	with -ERESTARTSYS and the state_mutex dropped when interrupted.
+ */
+
+#define spufs_wait(wq, condition)					\
+({									\
+	int __ret = 0;							\
+	DEFINE_WAIT(__wait);						\
+	for (;;) {							\
+		prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		spu_release(ctx);					\
+		if (signal_pending(current)) {				\
+			__ret = -ERESTARTSYS;				\
+			break;						\
+		}							\
+		schedule();						\
+		__ret = spu_acquire(ctx);				\
+		if (__ret)						\
+			break;						\
+	}								\
+	finish_wait(&(wq), &__wait);					\
+	__ret;								\
+})
+
+size_t spu_wbox_write(struct spu_context *ctx, u32 data);
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data);
+
+/* irq callback funcs. */
+void spufs_ibox_callback(struct spu *spu);
+void spufs_wbox_callback(struct spu *spu);
+void spufs_stop_callback(struct spu *spu, int irq);
+void spufs_mfc_callback(struct spu *spu);
+void spufs_dma_callback(struct spu *spu, int type);
+
+extern struct spu_coredump_calls spufs_coredump_calls;
+struct spufs_coredump_reader {
+	char *name;
+	ssize_t (*read)(struct spu_context *ctx,
+			char __user *buffer, size_t size, loff_t *pos);
+	u64 (*get)(struct spu_context *ctx);
+	size_t size;
+};
+extern struct spufs_coredump_reader spufs_coredump_read[];
+extern int spufs_coredump_num_notes;
+
+extern int spu_init_csa(struct spu_state *csa);
+extern void spu_fini_csa(struct spu_state *csa);
+extern int spu_save(struct spu_state *prev, struct spu *spu);
+extern int spu_restore(struct spu_state *new, struct spu *spu);
+extern int spu_switch(struct spu_state *prev, struct spu_state *new,
+		      struct spu *spu);
+extern int spu_alloc_lscsa(struct spu_state *csa);
+extern void spu_free_lscsa(struct spu_state *csa);
+
+extern void spuctx_switch_state(struct spu_context *ctx,
+		enum spu_utilization_state new_state);
+
+#define spu_context_trace(name, ctx, spu) \
+	trace_mark(name, "ctx %p spu %p", ctx, spu);
+#define spu_context_nospu_trace(name, ctx) \
+	trace_mark(name, "ctx %p", ctx);
+
+#endif
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
new file mode 100644
index 0000000..d0b1f3f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2007 IBM Deutschland Entwicklung GmbH
+ *	Released under GPL v2.
+ *
+ * Partially based on net/ipv4/tcp_probe.c.
+ *
+ * Simple tracing facility for spu contexts.
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <linux/proc_fs.h>
+#include <linux/wait.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+#include "spufs.h"
+
+struct spu_probe {
+	const char *name;
+	const char *format;
+	marker_probe_func *probe_func;
+};
+
+struct sputrace {
+	ktime_t tstamp;
+	int owner_tid; /* owner */
+	int curr_tid;
+	const char *name;
+	int number;
+};
+
+static int bufsize __read_mostly = 16384;
+MODULE_PARM_DESC(bufsize, "Log buffer size (number of records)");
+module_param(bufsize, int, 0);
+
+
+static DEFINE_SPINLOCK(sputrace_lock);
+static DECLARE_WAIT_QUEUE_HEAD(sputrace_wait);
+static ktime_t sputrace_start;
+static unsigned long sputrace_head, sputrace_tail;
+static struct sputrace *sputrace_log;
+static int sputrace_logging;
+
+static int sputrace_used(void)
+{
+	return (sputrace_head - sputrace_tail) % bufsize;
+}
+
+static inline int sputrace_avail(void)
+{
+	return bufsize - sputrace_used();
+}
+
+static int sputrace_sprint(char *tbuf, int n)
+{
+	const struct sputrace *t = sputrace_log + sputrace_tail % bufsize;
+	struct timespec tv =
+		ktime_to_timespec(ktime_sub(t->tstamp, sputrace_start));
+
+	return snprintf(tbuf, n,
+		"[%lu.%09lu] %d: %s (ctxthread = %d, spu = %d)\n",
+		(unsigned long) tv.tv_sec,
+		(unsigned long) tv.tv_nsec,
+		t->curr_tid,
+		t->name,
+		t->owner_tid,
+		t->number);
+}
+
+static ssize_t sputrace_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	int error = 0, cnt = 0;
+
+	if (!buf || len < 0)
+		return -EINVAL;
+
+	while (cnt < len) {
+		char tbuf[128];
+		int width;
+
+		/* If we have data ready to return, don't block waiting
+		 * for more */
+		if (cnt > 0 && sputrace_used() == 0)
+			break;
+
+		error = wait_event_interruptible(sputrace_wait,
+						 sputrace_used() > 0);
+		if (error)
+			break;
+
+		spin_lock(&sputrace_lock);
+		if (sputrace_head == sputrace_tail) {
+			spin_unlock(&sputrace_lock);
+			continue;
+		}
+
+		width = sputrace_sprint(tbuf, sizeof(tbuf));
+		if (width < len)
+			sputrace_tail = (sputrace_tail + 1) % bufsize;
+		spin_unlock(&sputrace_lock);
+
+		if (width >= len)
+			break;
+
+		error = copy_to_user(buf + cnt, tbuf, width);
+		if (error)
+			break;
+		cnt += width;
+	}
+
+	return cnt == 0 ? error : cnt;
+}
+
+static int sputrace_open(struct inode *inode, struct file *file)
+{
+	int rc;
+
+	spin_lock(&sputrace_lock);
+	if (sputrace_logging) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	sputrace_logging = 1;
+	sputrace_head = sputrace_tail = 0;
+	sputrace_start = ktime_get();
+	rc = 0;
+
+out:
+	spin_unlock(&sputrace_lock);
+	return rc;
+}
+
+static int sputrace_release(struct inode *inode, struct file *file)
+{
+	spin_lock(&sputrace_lock);
+	sputrace_logging = 0;
+	spin_unlock(&sputrace_lock);
+	return 0;
+}
+
+static const struct file_operations sputrace_fops = {
+	.owner   = THIS_MODULE,
+	.open    = sputrace_open,
+	.read    = sputrace_read,
+	.release = sputrace_release,
+};
+
+static void sputrace_log_item(const char *name, struct spu_context *ctx,
+		struct spu *spu)
+{
+	spin_lock(&sputrace_lock);
+
+	if (!sputrace_logging) {
+		spin_unlock(&sputrace_lock);
+		return;
+	}
+
+	if (sputrace_avail() > 1) {
+		struct sputrace *t = sputrace_log + sputrace_head;
+
+		t->tstamp = ktime_get();
+		t->owner_tid = ctx->tid;
+		t->name = name;
+		t->curr_tid = current->pid;
+		t->number = spu ? spu->number : -1;
+
+		sputrace_head = (sputrace_head + 1) % bufsize;
+	} else {
+		printk(KERN_WARNING
+		       "sputrace: lost samples due to full buffer.\n");
+	}
+	spin_unlock(&sputrace_lock);
+
+	wake_up(&sputrace_wait);
+}
+
+static void spu_context_event(void *probe_private, void *call_data,
+		const char *format, va_list *args)
+{
+	struct spu_probe *p = probe_private;
+	struct spu_context *ctx;
+	struct spu *spu;
+
+	ctx = va_arg(*args, struct spu_context *);
+	spu = va_arg(*args, struct spu *);
+
+	sputrace_log_item(p->name, ctx, spu);
+}
+
+static void spu_context_nospu_event(void *probe_private, void *call_data,
+		const char *format, va_list *args)
+{
+	struct spu_probe *p = probe_private;
+	struct spu_context *ctx;
+
+	ctx = va_arg(*args, struct spu_context *);
+
+	sputrace_log_item(p->name, ctx, NULL);
+}
+
+struct spu_probe spu_probes[] = {
+	{ "spu_bind_context__enter", "ctx %p spu %p", spu_context_event },
+	{ "spu_unbind_context__enter", "ctx %p spu %p", spu_context_event },
+	{ "spu_get_idle__enter", "ctx %p", spu_context_nospu_event },
+	{ "spu_get_idle__found", "ctx %p spu %p", spu_context_event },
+	{ "spu_get_idle__not_found", "ctx %p", spu_context_nospu_event },
+	{ "spu_find_victim__enter", "ctx %p", spu_context_nospu_event },
+	{ "spusched_tick__preempt", "ctx %p spu %p", spu_context_event },
+	{ "spusched_tick__newslice", "ctx %p", spu_context_nospu_event },
+	{ "spu_yield__enter", "ctx %p", spu_context_nospu_event },
+	{ "spu_deactivate__enter", "ctx %p", spu_context_nospu_event },
+	{ "__spu_deactivate__unload", "ctx %p spu %p", spu_context_event },
+	{ "spufs_ps_fault__enter", "ctx %p", spu_context_nospu_event },
+	{ "spufs_ps_fault__sleep", "ctx %p", spu_context_nospu_event },
+	{ "spufs_ps_fault__wake", "ctx %p spu %p", spu_context_event },
+	{ "spufs_ps_fault__insert", "ctx %p spu %p", spu_context_event },
+	{ "spu_acquire_saved__enter", "ctx %p", spu_context_nospu_event },
+	{ "destroy_spu_context__enter", "ctx %p", spu_context_nospu_event },
+	{ "spufs_stop_callback__enter", "ctx %p spu %p", spu_context_event },
+};
+
+static int __init sputrace_init(void)
+{
+	struct proc_dir_entry *entry;
+	int i, error = -ENOMEM;
+
+	sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL);
+	if (!sputrace_log)
+		goto out;
+
+	entry = proc_create("sputrace", S_IRUSR, NULL, &sputrace_fops);
+	if (!entry)
+		goto out_free_log;
+
+	for (i = 0; i < ARRAY_SIZE(spu_probes); i++) {
+		struct spu_probe *p = &spu_probes[i];
+
+		error = marker_probe_register(p->name, p->format,
+					      p->probe_func, p);
+		if (error)
+			printk(KERN_INFO "Unable to register probe %s\n",
+					p->name);
+	}
+
+	return 0;
+
+out_free_log:
+	kfree(sputrace_log);
+out:
+	return -ENOMEM;
+}
+
+static void __exit sputrace_exit(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(spu_probes); i++)
+		marker_probe_unregister(spu_probes[i].name,
+			spu_probes[i].probe_func, &spu_probes[i]);
+
+	remove_proc_entry("sputrace", NULL);
+	kfree(sputrace_log);
+	marker_synchronize_unregister();
+}
+
+module_init(sputrace_init);
+module_exit(sputrace_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
new file mode 100644
index 0000000..3df9a36
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -0,0 +1,2222 @@
+/*
+ * spu_switch.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * Host-side part of SPU context switch sequence outlined in
+ * Synergistic Processor Element, Book IV.
+ *
+ * A fully premptive switch of an SPE is very expensive in terms
+ * of time and system resources.  SPE Book IV indicates that SPE
+ * allocation should follow a "serially reusable device" model,
+ * in which the SPE is assigned a task until it completes.  When
+ * this is not possible, this sequence may be used to premptively
+ * save, and then later (optionally) restore the context of a
+ * program executing on an SPE.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+
+#include "spufs.h"
+
+#include "spu_save_dump.h"
+#include "spu_restore_dump.h"
+
+#if 0
+#define POLL_WHILE_TRUE(_c) {				\
+    do {						\
+    } while (_c);					\
+  }
+#else
+#define RELAX_SPIN_COUNT				1000
+#define POLL_WHILE_TRUE(_c) {				\
+    do {						\
+	int _i;						\
+	for (_i=0; _i<RELAX_SPIN_COUNT && (_c); _i++) { \
+	    cpu_relax();				\
+	}						\
+	if (unlikely(_c)) yield();			\
+	else break;					\
+    } while (_c);					\
+  }
+#endif				/* debug */
+
+#define POLL_WHILE_FALSE(_c)	POLL_WHILE_TRUE(!(_c))
+
+static inline void acquire_spu_lock(struct spu *spu)
+{
+	/* Save, Step 1:
+	 * Restore, Step 1:
+	 *    Acquire SPU-specific mutual exclusion lock.
+	 *    TBD.
+	 */
+}
+
+static inline void release_spu_lock(struct spu *spu)
+{
+	/* Restore, Step 76:
+	 *    Release SPU-specific mutual exclusion lock.
+	 *    TBD.
+	 */
+}
+
+static inline int check_spu_isolate(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 isolate_state;
+
+	/* Save, Step 2:
+	 * Save, Step 6:
+	 *     If SPU_Status[E,L,IS] any field is '1', this
+	 *     SPU is in isolate state and cannot be context
+	 *     saved at this time.
+	 */
+	isolate_state = SPU_STATUS_ISOLATED_STATE |
+	    SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS;
+	return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0;
+}
+
+static inline void disable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 3:
+	 * Restore, Step 2:
+	 *     Save INT_Mask_class0 in CSA.
+	 *     Write INT_MASK_class0 with value of 0.
+	 *     Save INT_Mask_class1 in CSA.
+	 *     Write INT_MASK_class1 with value of 0.
+	 *     Save INT_Mask_class2 in CSA.
+	 *     Write INT_MASK_class2 with value of 0.
+	 *     Synchronize all three interrupts to be sure
+	 *     we no longer execute a handler on another CPU.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	if (csa) {
+		csa->priv1.int_mask_class0_RW = spu_int_mask_get(spu, 0);
+		csa->priv1.int_mask_class1_RW = spu_int_mask_get(spu, 1);
+		csa->priv1.int_mask_class2_RW = spu_int_mask_get(spu, 2);
+	}
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, 0ul);
+	spu_int_mask_set(spu, 2, 0ul);
+	eieio();
+	spin_unlock_irq(&spu->register_lock);
+
+	/*
+	 * This flag needs to be set before calling synchronize_irq so
+	 * that the update will be visible to the relevant handlers
+	 * via a simple load.
+	 */
+	set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+	clear_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+	synchronize_irq(spu->irqs[0]);
+	synchronize_irq(spu->irqs[1]);
+	synchronize_irq(spu->irqs[2]);
+}
+
+static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 4:
+	 * Restore, Step 25.
+	 *    Set a software watchdog timer, which specifies the
+	 *    maximum allowable time for a context save sequence.
+	 *
+	 *    For present, this implementation will not set a global
+	 *    watchdog timer, as virtualization & variable system load
+	 *    may cause unpredictable execution times.
+	 */
+}
+
+static inline void inhibit_user_access(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 5:
+	 * Restore, Step 3:
+	 *     Inhibit user-space access (if provided) to this
+	 *     SPU by unmapping the virtual pages assigned to
+	 *     the SPU memory-mapped I/O (MMIO) for problem
+	 *     state. TBD.
+	 */
+}
+
+static inline void set_switch_pending(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 7:
+	 * Restore, Step 5:
+	 *     Set a software context switch pending flag.
+	 *     Done above in Step 3 - disable_interrupts().
+	 */
+}
+
+static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 8:
+	 *     Suspend DMA and save MFC_CNTL.
+	 */
+	switch (in_be64(&priv2->mfc_control_RW) &
+	       MFC_CNTL_SUSPEND_DMA_STATUS_MASK) {
+	case MFC_CNTL_SUSPEND_IN_PROGRESS:
+		POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+				  MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+				 MFC_CNTL_SUSPEND_COMPLETE);
+		/* fall through */
+	case MFC_CNTL_SUSPEND_COMPLETE:
+		if (csa)
+			csa->priv2.mfc_control_RW =
+				in_be64(&priv2->mfc_control_RW) |
+				MFC_CNTL_SUSPEND_DMA_QUEUE;
+		break;
+	case MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION:
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+		POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+				  MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+				 MFC_CNTL_SUSPEND_COMPLETE);
+		if (csa)
+			csa->priv2.mfc_control_RW =
+				in_be64(&priv2->mfc_control_RW) &
+				~MFC_CNTL_SUSPEND_DMA_QUEUE &
+				~MFC_CNTL_SUSPEND_MASK;
+		break;
+	}
+}
+
+static inline void save_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 9:
+	 *     Save SPU_Runcntl in the CSA.  This value contains
+	 *     the "Application Desired State".
+	 */
+	csa->prob.spu_runcntl_RW = in_be32(&prob->spu_runcntl_RW);
+}
+
+static inline void save_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 10:
+	 *     Save MFC_SR1 in the CSA.
+	 */
+	csa->priv1.mfc_sr1_RW = spu_mfc_sr1_get(spu);
+}
+
+static inline void save_spu_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 11:
+	 *     Read SPU_Status[R], and save to CSA.
+	 */
+	if ((in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) == 0) {
+		csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+	} else {
+		u32 stopped;
+
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+		stopped =
+		    SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+		    SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+		if ((in_be32(&prob->spu_status_R) & stopped) == 0)
+			csa->prob.spu_status_R = SPU_STATUS_RUNNING;
+		else
+			csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+	}
+}
+
+static inline void save_mfc_stopped_status(struct spu_state *csa,
+		struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	const u64 mask = MFC_CNTL_DECREMENTER_RUNNING |
+			MFC_CNTL_DMA_QUEUES_EMPTY;
+
+	/* Save, Step 12:
+	 *     Read MFC_CNTL[Ds].  Update saved copy of
+	 *     CSA.MFC_CNTL[Ds].
+	 *
+	 * update: do the same with MFC_CNTL[Q].
+	 */
+	csa->priv2.mfc_control_RW &= ~mask;
+	csa->priv2.mfc_control_RW |= in_be64(&priv2->mfc_control_RW) & mask;
+}
+
+static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 13:
+	 *     Write MFC_CNTL[Dh] set to a '1' to halt
+	 *     the decrementer.
+	 */
+	out_be64(&priv2->mfc_control_RW,
+		 MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
+	eieio();
+}
+
+static inline void save_timebase(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 14:
+	 *    Read PPE Timebase High and Timebase low registers
+	 *    and save in CSA.  TBD.
+	 */
+	csa->suspend_time = get_cycles();
+}
+
+static inline void remove_other_spu_access(struct spu_state *csa,
+					   struct spu *spu)
+{
+	/* Save, Step 15:
+	 *     Remove other SPU access to this SPU by unmapping
+	 *     this SPU's pages from their address space.  TBD.
+	 */
+}
+
+static inline void do_mfc_mssync(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 16:
+	 * Restore, Step 11.
+	 *     Write SPU_MSSync register. Poll SPU_MSSync[P]
+	 *     for a value of 0.
+	 */
+	out_be64(&prob->spc_mssync_RW, 1UL);
+	POLL_WHILE_TRUE(in_be64(&prob->spc_mssync_RW) & MS_SYNC_PENDING);
+}
+
+static inline void issue_mfc_tlbie(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 17:
+	 * Restore, Step 12.
+	 * Restore, Step 48.
+	 *     Write TLB_Invalidate_Entry[IS,VPN,L,Lp]=0 register.
+	 *     Then issue a PPE sync instruction.
+	 */
+	spu_tlb_invalidate(spu);
+	mb();
+}
+
+static inline void handle_pending_interrupts(struct spu_state *csa,
+					     struct spu *spu)
+{
+	/* Save, Step 18:
+	 *     Handle any pending interrupts from this SPU
+	 *     here.  This is OS or hypervisor specific.  One
+	 *     option is to re-enable interrupts to handle any
+	 *     pending interrupts, with the interrupt handlers
+	 *     recognizing the software Context Switch Pending
+	 *     flag, to ensure the SPU execution or MFC command
+	 *     queue is not restarted.  TBD.
+	 */
+}
+
+static inline void save_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Save, Step 19:
+	 *     If MFC_Cntl[Se]=0 then save
+	 *     MFC command queues.
+	 */
+	if ((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DMA_QUEUES_EMPTY) == 0) {
+		for (i = 0; i < 8; i++) {
+			csa->priv2.puq[i].mfc_cq_data0_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data0_RW);
+			csa->priv2.puq[i].mfc_cq_data1_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data1_RW);
+			csa->priv2.puq[i].mfc_cq_data2_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data2_RW);
+			csa->priv2.puq[i].mfc_cq_data3_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data3_RW);
+		}
+		for (i = 0; i < 16; i++) {
+			csa->priv2.spuq[i].mfc_cq_data0_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data0_RW);
+			csa->priv2.spuq[i].mfc_cq_data1_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data1_RW);
+			csa->priv2.spuq[i].mfc_cq_data2_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data2_RW);
+			csa->priv2.spuq[i].mfc_cq_data3_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data3_RW);
+		}
+	}
+}
+
+static inline void save_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 20:
+	 *     Save the PPU_QueryMask register
+	 *     in the CSA.
+	 */
+	csa->prob.dma_querymask_RW = in_be32(&prob->dma_querymask_RW);
+}
+
+static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 21:
+	 *     Save the PPU_QueryType register
+	 *     in the CSA.
+	 */
+	csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW);
+}
+
+static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save the Prxy_TagStatus register in the CSA.
+	 *
+	 * It is unnecessary to restore dma_tagstatus_R, however,
+	 * dma_tagstatus_R in the CSA is accessed via backing_ops, so
+	 * we must save it.
+	 */
+	csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R);
+}
+
+static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 22:
+	 *     Save the MFC_CSR_TSQ register
+	 *     in the LSCSA.
+	 */
+	csa->priv2.spu_tag_status_query_RW =
+	    in_be64(&priv2->spu_tag_status_query_RW);
+}
+
+static inline void save_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 23:
+	 *     Save the MFC_CSR_CMD1 and MFC_CSR_CMD2
+	 *     registers in the CSA.
+	 */
+	csa->priv2.spu_cmd_buf1_RW = in_be64(&priv2->spu_cmd_buf1_RW);
+	csa->priv2.spu_cmd_buf2_RW = in_be64(&priv2->spu_cmd_buf2_RW);
+}
+
+static inline void save_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 24:
+	 *     Save the MFC_CSR_ATO register in
+	 *     the CSA.
+	 */
+	csa->priv2.spu_atomic_status_RW = in_be64(&priv2->spu_atomic_status_RW);
+}
+
+static inline void save_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 25:
+	 *     Save the MFC_TCLASS_ID register in
+	 *     the CSA.
+	 */
+	csa->priv1.mfc_tclass_id_RW = spu_mfc_tclass_id_get(spu);
+}
+
+static inline void set_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 26:
+	 * Restore, Step 23.
+	 *     Write the MFC_TCLASS_ID register with
+	 *     the value 0x10000000.
+	 */
+	spu_mfc_tclass_id_set(spu, 0x10000000);
+	eieio();
+}
+
+static inline void purge_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 27:
+	 * Restore, Step 14.
+	 *     Write MFC_CNTL[Pc]=1 (purge queue).
+	 */
+	out_be64(&priv2->mfc_control_RW,
+			MFC_CNTL_PURGE_DMA_REQUEST |
+			MFC_CNTL_SUSPEND_MASK);
+	eieio();
+}
+
+static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 28:
+	 *     Poll MFC_CNTL[Ps] until value '11' is read
+	 *     (purge complete).
+	 */
+	POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+			 MFC_CNTL_PURGE_DMA_STATUS_MASK) ==
+			 MFC_CNTL_PURGE_DMA_COMPLETE);
+}
+
+static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 30:
+	 * Restore, Step 18:
+	 *     Write MFC_SR1 with MFC_SR1[D=0,S=1] and
+	 *     MFC_SR1[TL,R,Pr,T] set correctly for the
+	 *     OS specific environment.
+	 *
+	 *     Implementation note: The SPU-side code
+	 *     for save/restore is privileged, so the
+	 *     MFC_SR1[Pr] bit is not set.
+	 *
+	 */
+	spu_mfc_sr1_set(spu, (MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+			      MFC_STATE1_RELOCATE_MASK |
+			      MFC_STATE1_BUS_TLBIE_MASK));
+}
+
+static inline void save_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 31:
+	 *     Save SPU_NPC in the CSA.
+	 */
+	csa->prob.spu_npc_RW = in_be32(&prob->spu_npc_RW);
+}
+
+static inline void save_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 32:
+	 *     Save SPU_PrivCntl in the CSA.
+	 */
+	csa->priv2.spu_privcntl_RW = in_be64(&priv2->spu_privcntl_RW);
+}
+
+static inline void reset_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 33:
+	 * Restore, Step 16:
+	 *     Write SPU_PrivCntl[S,Le,A] fields reset to 0.
+	 */
+	out_be64(&priv2->spu_privcntl_RW, 0UL);
+	eieio();
+}
+
+static inline void save_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 34:
+	 *     Save SPU_LSLR in the CSA.
+	 */
+	csa->priv2.spu_lslr_RW = in_be64(&priv2->spu_lslr_RW);
+}
+
+static inline void reset_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 35:
+	 * Restore, Step 17.
+	 *     Reset SPU_LSLR.
+	 */
+	out_be64(&priv2->spu_lslr_RW, LS_ADDR_MASK);
+	eieio();
+}
+
+static inline void save_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 36:
+	 *     Save SPU_Cfg in the CSA.
+	 */
+	csa->priv2.spu_cfg_RW = in_be64(&priv2->spu_cfg_RW);
+}
+
+static inline void save_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 37:
+	 *     Save PM_Trace_Tag_Wait_Mask in the CSA.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void save_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 38:
+	 *     Save RA_GROUP_ID register and the
+	 *     RA_ENABLE reigster in the CSA.
+	 */
+	csa->priv1.resource_allocation_groupID_RW =
+		spu_resource_allocation_groupID_get(spu);
+	csa->priv1.resource_allocation_enable_RW =
+		spu_resource_allocation_enable_get(spu);
+}
+
+static inline void save_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 39:
+	 *     Save MB_Stat register in the CSA.
+	 */
+	csa->prob.mb_stat_R = in_be32(&prob->mb_stat_R);
+}
+
+static inline void save_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 40:
+	 *     Save the PPU_MB register in the CSA.
+	 */
+	csa->prob.pu_mb_R = in_be32(&prob->pu_mb_R);
+}
+
+static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 41:
+	 *     Save the PPUINT_MB register in the CSA.
+	 */
+	csa->priv2.puint_mb_R = in_be64(&priv2->puint_mb_R);
+}
+
+static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	int i;
+
+	/* Save, Step 42:
+	 */
+
+	/* Save CH 1, without channel count */
+	out_be64(&priv2->spu_chnlcntptr_RW, 1);
+	csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);
+
+	/* Save the following CH: [0,3,4,24,25,27] */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		csa->spu_chnldata_RW[idx] = in_be64(&priv2->spu_chnldata_RW);
+		csa->spu_chnlcnt_RW[idx] = in_be64(&priv2->spu_chnlcnt_RW);
+		out_be64(&priv2->spu_chnldata_RW, 0UL);
+		out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+		eieio();
+	}
+}
+
+static inline void save_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Save, Step 43:
+	 *     Save SPU Read Mailbox Channel.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+	eieio();
+	csa->spu_chnlcnt_RW[29] = in_be64(&priv2->spu_chnlcnt_RW);
+	for (i = 0; i < 4; i++) {
+		csa->spu_mailbox_data[i] = in_be64(&priv2->spu_chnldata_RW);
+	}
+	out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+	eieio();
+}
+
+static inline void save_mfc_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 44:
+	 *     Save MFC_CMD Channel.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 21UL);
+	eieio();
+	csa->spu_chnlcnt_RW[21] = in_be64(&priv2->spu_chnlcnt_RW);
+	eieio();
+}
+
+static inline void reset_ch(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[4] = { 21UL, 23UL, 28UL, 30UL };
+	u64 ch_counts[4] = { 16UL, 1UL, 1UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Save, Step 45:
+	 *     Reset the following CH: [21, 23, 28, 30]
+	 */
+	for (i = 0; i < 4; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 46:
+	 * Restore, Step 25.
+	 *     Write MFC_CNTL[Sc]=0 (resume queue processing).
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE);
+}
+
+static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu,
+		unsigned int *code, int code_size)
+{
+	/* Save, Step 47:
+	 * Restore, Step 30.
+	 *     If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All
+	 *     register, then initialize SLB_VSID and SLB_ESID
+	 *     to provide access to SPU context save code and
+	 *     LSCSA.
+	 *
+	 *     This implementation places both the context
+	 *     switch code and LSCSA in kernel address space.
+	 *
+	 *     Further this implementation assumes that the
+	 *     MFC_SR1[R]=1 (in other words, assume that
+	 *     translation is desired by OS environment).
+	 */
+	spu_invalidate_slbs(spu);
+	spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size);
+}
+
+static inline void set_switch_active(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 48:
+	 * Restore, Step 23.
+	 *     Change the software context switch pending flag
+	 *     to context switch active.  This implementation does
+	 *     not uses a switch active flag.
+	 *
+	 * Now that we have saved the mfc in the csa, we can add in the
+	 * restart command if an exception occurred.
+	 */
+	if (test_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags))
+		csa->priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+	clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+	mb();
+}
+
+static inline void enable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long class1_mask = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+	    CLASS1_ENABLE_STORAGE_FAULT_INTR;
+
+	/* Save, Step 49:
+	 * Restore, Step 22:
+	 *     Reset and then enable interrupts, as
+	 *     needed by OS.
+	 *
+	 *     This implementation enables only class1
+	 *     (translation) interrupts.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, class1_mask);
+	spu_int_mask_set(spu, 2, 0ul);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static inline int send_mfc_dma(struct spu *spu, unsigned long ea,
+			       unsigned int ls_offset, unsigned int size,
+			       unsigned int tag, unsigned int rclass,
+			       unsigned int cmd)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union mfc_tag_size_class_cmd command;
+	unsigned int transfer_size;
+	volatile unsigned int status = 0x0;
+
+	while (size > 0) {
+		transfer_size =
+		    (size > MFC_MAX_DMA_SIZE) ? MFC_MAX_DMA_SIZE : size;
+		command.u.mfc_size = transfer_size;
+		command.u.mfc_tag = tag;
+		command.u.mfc_rclassid = rclass;
+		command.u.mfc_cmd = cmd;
+		do {
+			out_be32(&prob->mfc_lsa_W, ls_offset);
+			out_be64(&prob->mfc_ea_W, ea);
+			out_be64(&prob->mfc_union_W.all64, command.all64);
+			status =
+			    in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+			if (unlikely(status & 0x2)) {
+				cpu_relax();
+			}
+		} while (status & 0x3);
+		size -= transfer_size;
+		ea += transfer_size;
+		ls_offset += transfer_size;
+	}
+	return 0;
+}
+
+static inline void save_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = 16384;
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_PUT_CMD;
+
+	/* Save, Step 50:
+	 *     Issue a DMA command to copy the first 16K bytes
+	 *     of local storage to the CSA.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 51:
+	 * Restore, Step 31.
+	 *     Write SPU_NPC[IE]=0 and SPU_NPC[LSA] to entry
+	 *     point address of context save code in local
+	 *     storage.
+	 *
+	 *     This implementation uses SPU-side save/restore
+	 *     programs with entry points at LSA of 0.
+	 */
+	out_be32(&prob->spu_npc_RW, 0);
+	eieio();
+}
+
+static inline void set_signot1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union {
+		u64 ull;
+		u32 ui[2];
+	} addr64;
+
+	/* Save, Step 52:
+	 * Restore, Step 32:
+	 *    Write SPU_Sig_Notify_1 register with upper 32-bits
+	 *    of the CSA.LSCSA effective address.
+	 */
+	addr64.ull = (u64) csa->lscsa;
+	out_be32(&prob->signal_notify1, addr64.ui[0]);
+	eieio();
+}
+
+static inline void set_signot2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union {
+		u64 ull;
+		u32 ui[2];
+	} addr64;
+
+	/* Save, Step 53:
+	 * Restore, Step 33:
+	 *    Write SPU_Sig_Notify_2 register with lower 32-bits
+	 *    of the CSA.LSCSA effective address.
+	 */
+	addr64.ull = (u64) csa->lscsa;
+	out_be32(&prob->signal_notify2, addr64.ui[1]);
+	eieio();
+}
+
+static inline void send_save_code(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&spu_save_code[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = sizeof(spu_save_code);
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GETFS_CMD;
+
+	/* Save, Step 54:
+	 *     Issue a DMA command to copy context save code
+	 *     to local storage and start SPU.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 55:
+	 * Restore, Step 38.
+	 *     Write PPU_QueryMask=1 (enable Tag Group 0)
+	 *     and issue eieio instruction.
+	 */
+	out_be32(&prob->dma_querymask_RW, MFC_TAGID_TO_TAGMASK(0));
+	eieio();
+}
+
+static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask = MFC_TAGID_TO_TAGMASK(0);
+	unsigned long flags;
+
+	/* Save, Step 56:
+	 * Restore, Step 39.
+	 * Restore, Step 39.
+	 * Restore, Step 46.
+	 *     Poll PPU_TagStatus[gn] until 01 (Tag group 0 complete)
+	 *     or write PPU_QueryType[TS]=01 and wait for Tag Group
+	 *     Complete Interrupt.  Write INT_Stat_Class0 or
+	 *     INT_Stat_Class2 with value of 'handled'.
+	 */
+	POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask);
+
+	local_irq_save(flags);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	local_irq_restore(flags);
+}
+
+static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	unsigned long flags;
+
+	/* Save, Step 57:
+	 * Restore, Step 40.
+	 *     Poll until SPU_Status[R]=0 or wait for SPU Class 0
+	 *     or SPU Class 2 interrupt.  Write INT_Stat_class0
+	 *     or INT_Stat_class2 with value of handled.
+	 */
+	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+	local_irq_save(flags);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	local_irq_restore(flags);
+}
+
+static inline int check_save_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 complete;
+
+	/* Save, Step 54:
+	 *     If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+	 *     context save succeeded, otherwise context save
+	 *     failed.
+	 */
+	complete = ((SPU_SAVE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+		    SPU_STATUS_STOPPED_BY_STOP);
+	return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 4:
+	 *    If required, notify the "using application" that
+	 *    the SPU task has been terminated.  TBD.
+	 */
+}
+
+static inline void suspend_mfc_and_halt_decr(struct spu_state *csa,
+		struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 7:
+	 *     Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend
+	 *     the queue and halt the decrementer.
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
+		 MFC_CNTL_DECREMENTER_HALTED);
+	eieio();
+}
+
+static inline void wait_suspend_mfc_complete(struct spu_state *csa,
+					     struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 8:
+	 * Restore, Step 47.
+	 *     Poll MFC_CNTL[Ss] until 11 is returned.
+	 */
+	POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+			 MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+			 MFC_CNTL_SUSPEND_COMPLETE);
+}
+
+static inline int suspend_spe(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 9:
+	 *    If SPU_Status[R]=1, stop SPU execution
+	 *    and wait for stop to complete.
+	 *
+	 *    Returns       1 if SPU_Status[R]=1 on entry.
+	 *                  0 otherwise
+	 */
+	if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) {
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if ((in_be32(&prob->spu_status_R) &
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
+		    || (in_be32(&prob->spu_status_R) &
+			SPU_STATUS_ISOLATED_STATE)) {
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+			out_be32(&prob->spu_runcntl_RW, 0x2);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_WAITING_FOR_CHANNEL) {
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 10:
+	 *    If SPU_Status[R]=0 and SPU_Status[E,L,IS]=1,
+	 *    release SPU from isolate state.
+	 */
+	if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) {
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
+			spu_mfc_sr1_set(spu,
+					MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+			eieio();
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if ((in_be32(&prob->spu_status_R) &
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
+		    || (in_be32(&prob->spu_status_R) &
+			SPU_STATUS_ISOLATED_STATE)) {
+			spu_mfc_sr1_set(spu,
+					MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+			eieio();
+			out_be32(&prob->spu_runcntl_RW, 0x2);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+	}
+}
+
+static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 20:
+	 */
+
+	/* Reset CH 1 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 1);
+	out_be64(&priv2->spu_chnldata_RW, 0UL);
+
+	/* Reset the following CH: [0,3,4,24,25,27] */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnldata_RW, 0UL);
+		out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+		eieio();
+	}
+}
+
+static inline void reset_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[5] = { 21UL, 23UL, 28UL, 29UL, 30UL };
+	u64 ch_counts[5] = { 16UL, 1UL, 1UL, 0UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 21:
+	 *     Reset the following CH: [21, 23, 28, 29, 30]
+	 */
+	for (i = 0; i < 5; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void setup_spu_status_part1(struct spu_state *csa,
+					  struct spu *spu)
+{
+	u32 status_P = SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_I = SPU_STATUS_INVALID_INSTR;
+	u32 status_H = SPU_STATUS_STOPPED_BY_HALT;
+	u32 status_S = SPU_STATUS_SINGLE_STEP;
+	u32 status_S_I = SPU_STATUS_SINGLE_STEP | SPU_STATUS_INVALID_INSTR;
+	u32 status_S_P = SPU_STATUS_SINGLE_STEP | SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_P_H = SPU_STATUS_STOPPED_BY_HALT |SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_P_I = SPU_STATUS_STOPPED_BY_STOP |SPU_STATUS_INVALID_INSTR;
+	u32 status_code;
+
+	/* Restore, Step 27:
+	 *     If the CSA.SPU_Status[I,S,H,P]=1 then add the correct
+	 *     instruction sequence to the end of the SPU based restore
+	 *     code (after the "context restored" stop and signal) to
+	 *     restore the correct SPU status.
+	 *
+	 *     NOTE: Rather than modifying the SPU executable, we
+	 *     instead add a new 'stopped_status' field to the
+	 *     LSCSA.  The SPU-side restore reads this field and
+	 *     takes the appropriate action when exiting.
+	 */
+
+	status_code =
+	    (csa->prob.spu_status_R >> SPU_STOP_STATUS_SHIFT) & 0xFFFF;
+	if ((csa->prob.spu_status_R & status_P_I) == status_P_I) {
+
+		/* SPU_Status[P,I]=1 - Illegal Instruction followed
+		 * by Stop and Signal instruction, followed by 'br -4'.
+		 *
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_I;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_P_H) == status_P_H) {
+
+		/* SPU_Status[P,H]=1 - Halt Conditional, followed
+		 * by Stop and Signal instruction, followed by
+		 * 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_H;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_S_P) == status_S_P) {
+
+		/* SPU_Status[S,P]=1 - Stop and Signal instruction
+		 * followed by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_P;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_S_I) == status_S_I) {
+
+		/* SPU_Status[S,I]=1 - Illegal instruction followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_I;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_P) == status_P) {
+
+		/* SPU_Status[P]=1 - Stop and Signal instruction
+		 * followed by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_H) == status_H) {
+
+		/* SPU_Status[H]=1 - Halt Conditional, followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_H;
+
+	} else if ((csa->prob.spu_status_R & status_S) == status_S) {
+
+		/* SPU_Status[S]=1 - Two nop instructions.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S;
+
+	} else if ((csa->prob.spu_status_R & status_I) == status_I) {
+
+		/* SPU_Status[I]=1 - Illegal instruction followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_I;
+
+	}
+}
+
+static inline void setup_spu_status_part2(struct spu_state *csa,
+					  struct spu *spu)
+{
+	u32 mask;
+
+	/* Restore, Step 28:
+	 *     If the CSA.SPU_Status[I,S,H,P,R]=0 then
+	 *     add a 'br *' instruction to the end of
+	 *     the SPU based restore code.
+	 *
+	 *     NOTE: Rather than modifying the SPU executable, we
+	 *     instead add a new 'stopped_status' field to the
+	 *     LSCSA.  The SPU-side restore reads this field and
+	 *     takes the appropriate action when exiting.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT |
+	    SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+	if (!(csa->prob.spu_status_R & mask)) {
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_R;
+	}
+}
+
+static inline void restore_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 29:
+	 *     Restore RA_GROUP_ID register and the
+	 *     RA_ENABLE reigster from the CSA.
+	 */
+	spu_resource_allocation_groupID_set(spu,
+			csa->priv1.resource_allocation_groupID_RW);
+	spu_resource_allocation_enable_set(spu,
+			csa->priv1.resource_allocation_enable_RW);
+}
+
+static inline void send_restore_code(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&spu_restore_code[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = sizeof(spu_restore_code);
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GETFS_CMD;
+
+	/* Restore, Step 37:
+	 *     Issue MFC DMA command to copy context
+	 *     restore code to local storage.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void setup_decr(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 34:
+	 *     If CSA.MFC_CNTL[Ds]=1 (decrementer was
+	 *     running) then adjust decrementer, set
+	 *     decrementer running status in LSCSA,
+	 *     and set decrementer "wrapped" status
+	 *     in LSCSA.
+	 */
+	if (csa->priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) {
+		cycles_t resume_time = get_cycles();
+		cycles_t delta_time = resume_time - csa->suspend_time;
+
+		csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING;
+		if (csa->lscsa->decr.slot[0] < delta_time) {
+			csa->lscsa->decr_status.slot[0] |=
+				 SPU_DECR_STATUS_WRAPPED;
+		}
+
+		csa->lscsa->decr.slot[0] -= delta_time;
+	} else {
+		csa->lscsa->decr_status.slot[0] = 0;
+	}
+}
+
+static inline void setup_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 35:
+	 *     Copy the CSA.PU_MB data into the LSCSA.
+	 */
+	csa->lscsa->ppu_mb.slot[0] = csa->prob.pu_mb_R;
+}
+
+static inline void setup_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 36:
+	 *     Copy the CSA.PUINT_MB data into the LSCSA.
+	 */
+	csa->lscsa->ppuint_mb.slot[0] = csa->priv2.puint_mb_R;
+}
+
+static inline int check_restore_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 complete;
+
+	/* Restore, Step 40:
+	 *     If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+	 *     context restore succeeded, otherwise context restore
+	 *     failed.
+	 */
+	complete = ((SPU_RESTORE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+		    SPU_STATUS_STOPPED_BY_STOP);
+	return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void restore_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 41:
+	 *     Restore SPU_PrivCntl from the CSA.
+	 */
+	out_be64(&priv2->spu_privcntl_RW, csa->priv2.spu_privcntl_RW);
+	eieio();
+}
+
+static inline void restore_status_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask;
+
+	/* Restore, Step 42:
+	 *     If any CSA.SPU_Status[I,S,H,P]=1, then
+	 *     restore the error or single step state.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+	if (csa->prob.spu_status_R & mask) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+	}
+}
+
+static inline void restore_status_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask;
+
+	/* Restore, Step 43:
+	 *     If all CSA.SPU_Status[I,S,H,P,R]=0 then write
+	 *     SPU_RunCntl[R0R1]='01', wait for SPU_Status[R]=1,
+	 *     then write '00' to SPU_RunCntl[R0R1] and wait
+	 *     for SPU_Status[R]=0.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT |
+	    SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+	if (!(csa->prob.spu_status_R & mask)) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+		POLL_WHILE_FALSE(in_be32(&prob->spu_status_R) &
+				 SPU_STATUS_RUNNING);
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+	}
+}
+
+static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = 16384;
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GET_CMD;
+
+	/* Restore, Step 44:
+	 *     Issue a DMA command to restore the first
+	 *     16kb of local storage from CSA.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 47.
+	 *     Write MFC_Cntl[Sc,Sm]='1','0' to suspend
+	 *     the queue.
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+	eieio();
+}
+
+static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 49:
+	 *     Write INT_MASK_class0 with value of 0.
+	 *     Write INT_MASK_class1 with value of 0.
+	 *     Write INT_MASK_class2 with value of 0.
+	 *     Write INT_STAT_class0 with value of -1.
+	 *     Write INT_STAT_class1 with value of -1.
+	 *     Write INT_STAT_class2 with value of -1.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, 0ul);
+	spu_int_mask_set(spu, 2, 0ul);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static inline void restore_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Restore, Step 50:
+	 *     If MFC_Cntl[Se]!=0 then restore
+	 *     MFC command queues.
+	 */
+	if ((csa->priv2.mfc_control_RW & MFC_CNTL_DMA_QUEUES_EMPTY_MASK) == 0) {
+		for (i = 0; i < 8; i++) {
+			out_be64(&priv2->puq[i].mfc_cq_data0_RW,
+				 csa->priv2.puq[i].mfc_cq_data0_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data1_RW,
+				 csa->priv2.puq[i].mfc_cq_data1_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data2_RW,
+				 csa->priv2.puq[i].mfc_cq_data2_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data3_RW,
+				 csa->priv2.puq[i].mfc_cq_data3_RW);
+		}
+		for (i = 0; i < 16; i++) {
+			out_be64(&priv2->spuq[i].mfc_cq_data0_RW,
+				 csa->priv2.spuq[i].mfc_cq_data0_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data1_RW,
+				 csa->priv2.spuq[i].mfc_cq_data1_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data2_RW,
+				 csa->priv2.spuq[i].mfc_cq_data2_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data3_RW,
+				 csa->priv2.spuq[i].mfc_cq_data3_RW);
+		}
+	}
+	eieio();
+}
+
+static inline void restore_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 51:
+	 *     Restore the PPU_QueryMask register from CSA.
+	 */
+	out_be32(&prob->dma_querymask_RW, csa->prob.dma_querymask_RW);
+	eieio();
+}
+
+static inline void restore_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 52:
+	 *     Restore the PPU_QueryType register from CSA.
+	 */
+	out_be32(&prob->dma_querytype_RW, csa->prob.dma_querytype_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 53:
+	 *     Restore the MFC_CSR_TSQ register from CSA.
+	 */
+	out_be64(&priv2->spu_tag_status_query_RW,
+		 csa->priv2.spu_tag_status_query_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 54:
+	 *     Restore the MFC_CSR_CMD1 and MFC_CSR_CMD2
+	 *     registers from CSA.
+	 */
+	out_be64(&priv2->spu_cmd_buf1_RW, csa->priv2.spu_cmd_buf1_RW);
+	out_be64(&priv2->spu_cmd_buf2_RW, csa->priv2.spu_cmd_buf2_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 55:
+	 *     Restore the MFC_CSR_ATO register from CSA.
+	 */
+	out_be64(&priv2->spu_atomic_status_RW, csa->priv2.spu_atomic_status_RW);
+}
+
+static inline void restore_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 56:
+	 *     Restore the MFC_TCLASS_ID register from CSA.
+	 */
+	spu_mfc_tclass_id_set(spu, csa->priv1.mfc_tclass_id_RW);
+	eieio();
+}
+
+static inline void set_llr_event(struct spu_state *csa, struct spu *spu)
+{
+	u64 ch0_cnt, ch0_data;
+	u64 ch1_data;
+
+	/* Restore, Step 57:
+	 *    Set the Lock Line Reservation Lost Event by:
+	 *      1. OR CSA.SPU_Event_Status with bit 21 (Lr) set to 1.
+	 *      2. If CSA.SPU_Channel_0_Count=0 and
+	 *         CSA.SPU_Wr_Event_Mask[Lr]=1 and
+	 *         CSA.SPU_Event_Status[Lr]=0 then set
+	 *         CSA.SPU_Event_Status_Count=1.
+	 */
+	ch0_cnt = csa->spu_chnlcnt_RW[0];
+	ch0_data = csa->spu_chnldata_RW[0];
+	ch1_data = csa->spu_chnldata_RW[1];
+	csa->spu_chnldata_RW[0] |= MFC_LLR_LOST_EVENT;
+	if ((ch0_cnt == 0) && !(ch0_data & MFC_LLR_LOST_EVENT) &&
+	    (ch1_data & MFC_LLR_LOST_EVENT)) {
+		csa->spu_chnlcnt_RW[0] = 1;
+	}
+}
+
+static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 58:
+	 *     If the status of the CSA software decrementer
+	 *     "wrapped" flag is set, OR in a '1' to
+	 *     CSA.SPU_Event_Status[Tm].
+	 */
+	if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED))
+		return;
+
+	if ((csa->spu_chnlcnt_RW[0] == 0) &&
+	    (csa->spu_chnldata_RW[1] & 0x20) &&
+	    !(csa->spu_chnldata_RW[0] & 0x20))
+		csa->spu_chnlcnt_RW[0] = 1;
+
+	csa->spu_chnldata_RW[0] |= 0x20;
+}
+
+static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	int i;
+
+	/* Restore, Step 59:
+	 *	Restore the following CH: [0,3,4,24,25,27]
+	 */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[idx]);
+		out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[idx]);
+		eieio();
+	}
+}
+
+static inline void restore_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[3] = { 9UL, 21UL, 23UL };
+	u64 ch_counts[3] = { 1UL, 16UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 60:
+	 *     Restore the following CH: [9,21,23].
+	 */
+	ch_counts[0] = 1UL;
+	ch_counts[1] = csa->spu_chnlcnt_RW[21];
+	ch_counts[2] = 1UL;
+	for (i = 0; i < 3; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void restore_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 61:
+	 *     Restore the SPU_LSLR register from CSA.
+	 */
+	out_be64(&priv2->spu_lslr_RW, csa->priv2.spu_lslr_RW);
+	eieio();
+}
+
+static inline void restore_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 62:
+	 *     Restore the SPU_Cfg register from CSA.
+	 */
+	out_be64(&priv2->spu_cfg_RW, csa->priv2.spu_cfg_RW);
+	eieio();
+}
+
+static inline void restore_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 63:
+	 *     Restore PM_Trace_Tag_Wait_Mask from CSA.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void restore_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 64:
+	 *     Restore SPU_NPC from CSA.
+	 */
+	out_be32(&prob->spu_npc_RW, csa->prob.spu_npc_RW);
+	eieio();
+}
+
+static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Restore, Step 65:
+	 *     Restore MFC_RdSPU_MB from CSA.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+	eieio();
+	out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[29]);
+	for (i = 0; i < 4; i++) {
+		out_be64(&priv2->spu_chnldata_RW, csa->spu_mailbox_data[i]);
+	}
+	eieio();
+}
+
+static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 dummy = 0;
+
+	/* Restore, Step 66:
+	 *     If CSA.MB_Stat[P]=0 (mailbox empty) then
+	 *     read from the PPU_MB register.
+	 */
+	if ((csa->prob.mb_stat_R & 0xFF) == 0) {
+		dummy = in_be32(&prob->pu_mb_R);
+		eieio();
+	}
+}
+
+static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 dummy = 0UL;
+
+	/* Restore, Step 66:
+	 *     If CSA.MB_Stat[I]=0 (mailbox empty) then
+	 *     read from the PPUINT_MB register.
+	 */
+	if ((csa->prob.mb_stat_R & 0xFF0000) == 0) {
+		dummy = in_be64(&priv2->puint_mb_R);
+		eieio();
+		spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		eieio();
+	}
+}
+
+static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 69:
+	 *     Restore the MFC_SR1 register from CSA.
+	 */
+	spu_mfc_sr1_set(spu, csa->priv1.mfc_sr1_RW);
+	eieio();
+}
+
+static inline void set_int_route(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	spu_cpu_affinity_set(spu, ctx->last_ran);
+}
+
+static inline void restore_other_spu_access(struct spu_state *csa,
+					    struct spu *spu)
+{
+	/* Restore, Step 70:
+	 *     Restore other SPU mappings to this SPU. TBD.
+	 */
+}
+
+static inline void restore_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 71:
+	 *     If CSA.SPU_Status[R]=1 then write
+	 *     SPU_RunCntl[R0R1]='01'.
+	 */
+	if (csa->prob.spu_status_R & SPU_STATUS_RUNNING) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+	}
+}
+
+static inline void restore_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 72:
+	 *    Restore the MFC_CNTL register for the CSA.
+	 */
+	out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW);
+	eieio();
+
+	/*
+	 * The queue is put back into the same state that was evident prior to
+	 * the context switch. The suspend flag is added to the saved state in
+	 * the csa, if the operational state was suspending or suspended. In
+	 * this case, the code that suspended the mfc is responsible for
+	 * continuing it. Note that SPE faults do not change the operational
+	 * state of the spu.
+	 */
+}
+
+static inline void enable_user_access(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 73:
+	 *     Enable user-space access (if provided) to this
+	 *     SPU by mapping the virtual pages assigned to
+	 *     the SPU memory-mapped I/O (MMIO) for problem
+	 *     state. TBD.
+	 */
+}
+
+static inline void reset_switch_active(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 74:
+	 *     Reset the "context switch active" flag.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 75:
+	 *     Re-enable SPU interrupts.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_mask_set(spu, 0, csa->priv1.int_mask_class0_RW);
+	spu_int_mask_set(spu, 1, csa->priv1.int_mask_class1_RW);
+	spu_int_mask_set(spu, 2, csa->priv1.int_mask_class2_RW);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static int quiece_spu(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Combined steps 2-18 of SPU context save sequence, which
+	 * quiesce the SPU state (disable SPU execution, MFC command
+	 * queues, decrementer, SPU interrupts, etc.).
+	 *
+	 * Returns      0 on success.
+	 *              2 if failed step 2.
+	 *              6 if failed step 6.
+	 */
+
+	if (check_spu_isolate(prev, spu)) {	/* Step 2. */
+		return 2;
+	}
+	disable_interrupts(prev, spu);	        /* Step 3. */
+	set_watchdog_timer(prev, spu);	        /* Step 4. */
+	inhibit_user_access(prev, spu);	        /* Step 5. */
+	if (check_spu_isolate(prev, spu)) {	/* Step 6. */
+		return 6;
+	}
+	set_switch_pending(prev, spu);	        /* Step 7. */
+	save_mfc_cntl(prev, spu);		/* Step 8. */
+	save_spu_runcntl(prev, spu);	        /* Step 9. */
+	save_mfc_sr1(prev, spu);	        /* Step 10. */
+	save_spu_status(prev, spu);	        /* Step 11. */
+	save_mfc_stopped_status(prev, spu);     /* Step 12. */
+	halt_mfc_decr(prev, spu);	        /* Step 13. */
+	save_timebase(prev, spu);		/* Step 14. */
+	remove_other_spu_access(prev, spu);	/* Step 15. */
+	do_mfc_mssync(prev, spu);	        /* Step 16. */
+	issue_mfc_tlbie(prev, spu);	        /* Step 17. */
+	handle_pending_interrupts(prev, spu);	/* Step 18. */
+
+	return 0;
+}
+
+static void save_csa(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Combine steps 19-44 of SPU context save sequence, which
+	 * save regions of the privileged & problem state areas.
+	 */
+
+	save_mfc_queues(prev, spu);	/* Step 19. */
+	save_ppu_querymask(prev, spu);	/* Step 20. */
+	save_ppu_querytype(prev, spu);	/* Step 21. */
+	save_ppu_tagstatus(prev, spu);  /* NEW.     */
+	save_mfc_csr_tsq(prev, spu);	/* Step 22. */
+	save_mfc_csr_cmd(prev, spu);	/* Step 23. */
+	save_mfc_csr_ato(prev, spu);	/* Step 24. */
+	save_mfc_tclass_id(prev, spu);	/* Step 25. */
+	set_mfc_tclass_id(prev, spu);	/* Step 26. */
+	save_mfc_cmd(prev, spu);	/* Step 26a - moved from 44. */
+	purge_mfc_queue(prev, spu);	/* Step 27. */
+	wait_purge_complete(prev, spu);	/* Step 28. */
+	setup_mfc_sr1(prev, spu);	/* Step 30. */
+	save_spu_npc(prev, spu);	/* Step 31. */
+	save_spu_privcntl(prev, spu);	/* Step 32. */
+	reset_spu_privcntl(prev, spu);	/* Step 33. */
+	save_spu_lslr(prev, spu);	/* Step 34. */
+	reset_spu_lslr(prev, spu);	/* Step 35. */
+	save_spu_cfg(prev, spu);	/* Step 36. */
+	save_pm_trace(prev, spu);	/* Step 37. */
+	save_mfc_rag(prev, spu);	/* Step 38. */
+	save_ppu_mb_stat(prev, spu);	/* Step 39. */
+	save_ppu_mb(prev, spu);	        /* Step 40. */
+	save_ppuint_mb(prev, spu);	/* Step 41. */
+	save_ch_part1(prev, spu);	/* Step 42. */
+	save_spu_mb(prev, spu);	        /* Step 43. */
+	reset_ch(prev, spu);	        /* Step 45. */
+}
+
+static void save_lscsa(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Perform steps 46-57 of SPU context save sequence,
+	 * which save regions of the local store and register
+	 * file.
+	 */
+
+	resume_mfc_queue(prev, spu);	/* Step 46. */
+	/* Step 47. */
+	setup_mfc_slbs(prev, spu, spu_save_code, sizeof(spu_save_code));
+	set_switch_active(prev, spu);	/* Step 48. */
+	enable_interrupts(prev, spu);	/* Step 49. */
+	save_ls_16kb(prev, spu);	/* Step 50. */
+	set_spu_npc(prev, spu);	        /* Step 51. */
+	set_signot1(prev, spu);		/* Step 52. */
+	set_signot2(prev, spu);		/* Step 53. */
+	send_save_code(prev, spu);	/* Step 54. */
+	set_ppu_querymask(prev, spu);	/* Step 55. */
+	wait_tag_complete(prev, spu);	/* Step 56. */
+	wait_spu_stopped(prev, spu);	/* Step 57. */
+}
+
+static void force_spu_isolate_exit(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Stop SPE execution and wait for completion. */
+	out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+	iobarrier_rw();
+	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+	/* Restart SPE master runcntl. */
+	spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+	iobarrier_w();
+
+	/* Initiate isolate exit request and wait for completion. */
+	out_be64(&priv2->spu_privcntl_RW, 4LL);
+	iobarrier_w();
+	out_be32(&prob->spu_runcntl_RW, 2);
+	iobarrier_rw();
+	POLL_WHILE_FALSE((in_be32(&prob->spu_status_R)
+				& SPU_STATUS_STOPPED_BY_STOP));
+
+	/* Reset load request to normal. */
+	out_be64(&priv2->spu_privcntl_RW, SPU_PRIVCNT_LOAD_REQUEST_NORMAL);
+	iobarrier_w();
+}
+
+/**
+ * stop_spu_isolate
+ *	Check SPU run-control state and force isolated
+ *	exit function as necessary.
+ */
+static void stop_spu_isolate(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) {
+		/* The SPU is in isolated state; the only way
+		 * to get it out is to perform an isolated
+		 * exit (clean) operation.
+		 */
+		force_spu_isolate_exit(spu);
+	}
+}
+
+static void harvest(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Perform steps 2-25 of SPU context restore sequence,
+	 * which resets an SPU either after a failed save, or
+	 * when using SPU for first time.
+	 */
+
+	disable_interrupts(prev, spu);	        /* Step 2.  */
+	inhibit_user_access(prev, spu);	        /* Step 3.  */
+	terminate_spu_app(prev, spu);	        /* Step 4.  */
+	set_switch_pending(prev, spu);	        /* Step 5.  */
+	stop_spu_isolate(spu);			/* NEW.     */
+	remove_other_spu_access(prev, spu);	/* Step 6.  */
+	suspend_mfc_and_halt_decr(prev, spu);	/* Step 7.  */
+	wait_suspend_mfc_complete(prev, spu);	/* Step 8.  */
+	if (!suspend_spe(prev, spu))	        /* Step 9.  */
+		clear_spu_status(prev, spu);	/* Step 10. */
+	do_mfc_mssync(prev, spu);	        /* Step 11. */
+	issue_mfc_tlbie(prev, spu);	        /* Step 12. */
+	handle_pending_interrupts(prev, spu);	/* Step 13. */
+	purge_mfc_queue(prev, spu);	        /* Step 14. */
+	wait_purge_complete(prev, spu);	        /* Step 15. */
+	reset_spu_privcntl(prev, spu);	        /* Step 16. */
+	reset_spu_lslr(prev, spu);              /* Step 17. */
+	setup_mfc_sr1(prev, spu);	        /* Step 18. */
+	spu_invalidate_slbs(spu);		/* Step 19. */
+	reset_ch_part1(prev, spu);	        /* Step 20. */
+	reset_ch_part2(prev, spu);	        /* Step 21. */
+	enable_interrupts(prev, spu);	        /* Step 22. */
+	set_switch_active(prev, spu);	        /* Step 23. */
+	set_mfc_tclass_id(prev, spu);	        /* Step 24. */
+	resume_mfc_queue(prev, spu);	        /* Step 25. */
+}
+
+static void restore_lscsa(struct spu_state *next, struct spu *spu)
+{
+	/*
+	 * Perform steps 26-40 of SPU context restore sequence,
+	 * which restores regions of the local store and register
+	 * file.
+	 */
+
+	set_watchdog_timer(next, spu);	        /* Step 26. */
+	setup_spu_status_part1(next, spu);	/* Step 27. */
+	setup_spu_status_part2(next, spu);	/* Step 28. */
+	restore_mfc_rag(next, spu);	        /* Step 29. */
+	/* Step 30. */
+	setup_mfc_slbs(next, spu, spu_restore_code, sizeof(spu_restore_code));
+	set_spu_npc(next, spu);	                /* Step 31. */
+	set_signot1(next, spu);	                /* Step 32. */
+	set_signot2(next, spu);	                /* Step 33. */
+	setup_decr(next, spu);	                /* Step 34. */
+	setup_ppu_mb(next, spu);	        /* Step 35. */
+	setup_ppuint_mb(next, spu);	        /* Step 36. */
+	send_restore_code(next, spu);	        /* Step 37. */
+	set_ppu_querymask(next, spu);	        /* Step 38. */
+	wait_tag_complete(next, spu);	        /* Step 39. */
+	wait_spu_stopped(next, spu);	        /* Step 40. */
+}
+
+static void restore_csa(struct spu_state *next, struct spu *spu)
+{
+	/*
+	 * Combine steps 41-76 of SPU context restore sequence, which
+	 * restore regions of the privileged & problem state areas.
+	 */
+
+	restore_spu_privcntl(next, spu);	/* Step 41. */
+	restore_status_part1(next, spu);	/* Step 42. */
+	restore_status_part2(next, spu);	/* Step 43. */
+	restore_ls_16kb(next, spu);	        /* Step 44. */
+	wait_tag_complete(next, spu);	        /* Step 45. */
+	suspend_mfc(next, spu);	                /* Step 46. */
+	wait_suspend_mfc_complete(next, spu);	/* Step 47. */
+	issue_mfc_tlbie(next, spu);	        /* Step 48. */
+	clear_interrupts(next, spu);	        /* Step 49. */
+	restore_mfc_queues(next, spu);	        /* Step 50. */
+	restore_ppu_querymask(next, spu);	/* Step 51. */
+	restore_ppu_querytype(next, spu);	/* Step 52. */
+	restore_mfc_csr_tsq(next, spu);	        /* Step 53. */
+	restore_mfc_csr_cmd(next, spu);	        /* Step 54. */
+	restore_mfc_csr_ato(next, spu);	        /* Step 55. */
+	restore_mfc_tclass_id(next, spu);	/* Step 56. */
+	set_llr_event(next, spu);	        /* Step 57. */
+	restore_decr_wrapped(next, spu);	/* Step 58. */
+	restore_ch_part1(next, spu);	        /* Step 59. */
+	restore_ch_part2(next, spu);	        /* Step 60. */
+	restore_spu_lslr(next, spu);	        /* Step 61. */
+	restore_spu_cfg(next, spu);	        /* Step 62. */
+	restore_pm_trace(next, spu);	        /* Step 63. */
+	restore_spu_npc(next, spu);	        /* Step 64. */
+	restore_spu_mb(next, spu);	        /* Step 65. */
+	check_ppu_mb_stat(next, spu);	        /* Step 66. */
+	check_ppuint_mb_stat(next, spu);	/* Step 67. */
+	spu_invalidate_slbs(spu);		/* Modified Step 68. */
+	restore_mfc_sr1(next, spu);	        /* Step 69. */
+	set_int_route(next, spu);		/* NEW      */
+	restore_other_spu_access(next, spu);	/* Step 70. */
+	restore_spu_runcntl(next, spu);	        /* Step 71. */
+	restore_mfc_cntl(next, spu);	        /* Step 72. */
+	enable_user_access(next, spu);	        /* Step 73. */
+	reset_switch_active(next, spu);	        /* Step 74. */
+	reenable_interrupts(next, spu);	        /* Step 75. */
+}
+
+static int __do_spu_save(struct spu_state *prev, struct spu *spu)
+{
+	int rc;
+
+	/*
+	 * SPU context save can be broken into three phases:
+	 *
+	 *     (a) quiesce [steps 2-16].
+	 *     (b) save of CSA, performed by PPE [steps 17-42]
+	 *     (c) save of LSCSA, mostly performed by SPU [steps 43-52].
+	 *
+	 * Returns      0 on success.
+	 *              2,6 if failed to quiece SPU
+	 *              53 if SPU-side of save failed.
+	 */
+
+	rc = quiece_spu(prev, spu);	        /* Steps 2-16. */
+	switch (rc) {
+	default:
+	case 2:
+	case 6:
+		harvest(prev, spu);
+		return rc;
+		break;
+	case 0:
+		break;
+	}
+	save_csa(prev, spu);	                /* Steps 17-43. */
+	save_lscsa(prev, spu);	                /* Steps 44-53. */
+	return check_save_status(prev, spu);	/* Step 54.     */
+}
+
+static int __do_spu_restore(struct spu_state *next, struct spu *spu)
+{
+	int rc;
+
+	/*
+	 * SPU context restore can be broken into three phases:
+	 *
+	 *    (a) harvest (or reset) SPU [steps 2-24].
+	 *    (b) restore LSCSA [steps 25-40], mostly performed by SPU.
+	 *    (c) restore CSA [steps 41-76], performed by PPE.
+	 *
+	 * The 'harvest' step is not performed here, but rather
+	 * as needed below.
+	 */
+
+	restore_lscsa(next, spu);	        /* Steps 24-39. */
+	rc = check_restore_status(next, spu);	/* Step 40.     */
+	switch (rc) {
+	default:
+		/* Failed. Return now. */
+		return rc;
+		break;
+	case 0:
+		/* Fall through to next step. */
+		break;
+	}
+	restore_csa(next, spu);
+
+	return 0;
+}
+
+/**
+ * spu_save - SPU context save, with locking.
+ * @prev: pointer to SPU context save area, to be saved.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Acquire locks, perform the save operation then return.
+ */
+int spu_save(struct spu_state *prev, struct spu *spu)
+{
+	int rc;
+
+	acquire_spu_lock(spu);	        /* Step 1.     */
+	rc = __do_spu_save(prev, spu);	/* Steps 2-53. */
+	release_spu_lock(spu);
+	if (rc != 0 && rc != 2 && rc != 6) {
+		panic("%s failed on SPU[%d], rc=%d.\n",
+		      __func__, spu->number, rc);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spu_save);
+
+/**
+ * spu_restore - SPU context restore, with harvest and locking.
+ * @new: pointer to SPU context save area, to be restored.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Perform harvest + restore, as we may not be coming
+ * from a previous successful save operation, and the
+ * hardware state is unknown.
+ */
+int spu_restore(struct spu_state *new, struct spu *spu)
+{
+	int rc;
+
+	acquire_spu_lock(spu);
+	harvest(NULL, spu);
+	spu->slb_replace = 0;
+	rc = __do_spu_restore(new, spu);
+	release_spu_lock(spu);
+	if (rc) {
+		panic("%s failed on SPU[%d] rc=%d.\n",
+		       __func__, spu->number, rc);
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(spu_restore);
+
+static void init_prob(struct spu_state *csa)
+{
+	csa->spu_chnlcnt_RW[9] = 1;
+	csa->spu_chnlcnt_RW[21] = 16;
+	csa->spu_chnlcnt_RW[23] = 1;
+	csa->spu_chnlcnt_RW[28] = 1;
+	csa->spu_chnlcnt_RW[30] = 1;
+	csa->prob.spu_runcntl_RW = SPU_RUNCNTL_STOP;
+	csa->prob.mb_stat_R = 0x000400;
+}
+
+static void init_priv1(struct spu_state *csa)
+{
+	/* Enable decode, relocate, tlbie response, master runcntl. */
+	csa->priv1.mfc_sr1_RW = MFC_STATE1_LOCAL_STORAGE_DECODE_MASK |
+	    MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+	    MFC_STATE1_PROBLEM_STATE_MASK |
+	    MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK;
+
+	/* Enable OS-specific set of interrupts. */
+	csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR |
+	    CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR |
+	    CLASS0_ENABLE_SPU_ERROR_INTR;
+	csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+	    CLASS1_ENABLE_STORAGE_FAULT_INTR;
+	csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR |
+	    CLASS2_ENABLE_SPU_HALT_INTR |
+	    CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR;
+}
+
+static void init_priv2(struct spu_state *csa)
+{
+	csa->priv2.spu_lslr_RW = LS_ADDR_MASK;
+	csa->priv2.mfc_control_RW = MFC_CNTL_RESUME_DMA_QUEUE |
+	    MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION |
+	    MFC_CNTL_DMA_QUEUES_EMPTY_MASK;
+}
+
+/**
+ * spu_alloc_csa - allocate and initialize an SPU context save area.
+ *
+ * Allocate and initialize the contents of an SPU context save area.
+ * This includes enabling address translation, interrupt masks, etc.,
+ * as appropriate for the given OS environment.
+ *
+ * Note that storage for the 'lscsa' is allocated separately,
+ * as it is by far the largest of the context save regions,
+ * and may need to be pinned or otherwise specially aligned.
+ */
+int spu_init_csa(struct spu_state *csa)
+{
+	int rc;
+
+	if (!csa)
+		return -EINVAL;
+	memset(csa, 0, sizeof(struct spu_state));
+
+	rc = spu_alloc_lscsa(csa);
+	if (rc)
+		return rc;
+
+	spin_lock_init(&csa->register_lock);
+
+	init_prob(csa);
+	init_priv1(csa);
+	init_priv2(csa);
+
+	return 0;
+}
+
+void spu_fini_csa(struct spu_state *csa)
+{
+	spu_free_lscsa(csa);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
new file mode 100644
index 0000000..c23617c
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -0,0 +1,91 @@
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+/**
+ * sys_spu_run - run code loaded into an SPU
+ *
+ * @unpc:    next program counter for the SPU
+ * @ustatus: status of the SPU
+ *
+ * This system call transfers the control of execution of a
+ * user space thread to an SPU. It will return when the
+ * SPU has finished executing or when it hits an error
+ * condition and it will be interrupted if a signal needs
+ * to be delivered to a handler in user space.
+ *
+ * The next program counter is set to the passed value
+ * before the SPU starts fetching code and the user space
+ * pointer gets updated with the new value when returning
+ * from kernel space.
+ *
+ * The status value returned from spu_run reflects the
+ * value of the spu_status register after the SPU has stopped.
+ *
+ */
+static long do_spu_run(struct file *filp,
+			__u32 __user *unpc,
+			__u32 __user *ustatus)
+{
+	long ret;
+	struct spufs_inode_info *i;
+	u32 npc, status;
+
+	ret = -EFAULT;
+	if (get_user(npc, unpc))
+		goto out;
+
+	/* check if this file was created by spu_create */
+	ret = -EINVAL;
+	if (filp->f_op != &spufs_context_fops)
+		goto out;
+
+	i = SPUFS_I(filp->f_path.dentry->d_inode);
+	ret = spufs_run_spu(i->i_ctx, &npc, &status);
+
+	if (put_user(npc, unpc))
+		ret = -EFAULT;
+
+	if (ustatus && put_user(status, ustatus))
+		ret = -EFAULT;
+out:
+	return ret;
+}
+
+static long do_spu_create(const char __user *pathname, unsigned int flags,
+		mode_t mode, struct file *neighbor)
+{
+	char *tmp;
+	int ret;
+
+	tmp = getname(pathname);
+	ret = PTR_ERR(tmp);
+	if (!IS_ERR(tmp)) {
+		struct nameidata nd;
+
+		ret = path_lookup(tmp, LOOKUP_PARENT, &nd);
+		if (!ret) {
+			nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE;
+			ret = spufs_create(&nd, flags, mode, neighbor);
+			path_put(&nd.path);
+		}
+		putname(tmp);
+	}
+
+	return ret;
+}
+
+struct spufs_calls spufs_calls = {
+	.create_thread = do_spu_create,
+	.spu_run = do_spu_run,
+	.coredump_extra_notes_size = spufs_coredump_extra_notes_size,
+	.coredump_extra_notes_write = spufs_coredump_extra_notes_write,
+	.notify_spus_active = do_notify_spus_active,
+	.owner = THIS_MODULE,
+};