Merge git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc-merge

author: Linus Torvalds <torvalds@g5.osdl.org> 2006-01-10 08:28:32 -0800
committer: Linus Torvalds <torvalds@g5.osdl.org> 2006-01-10 08:28:32 -0800
commit: a62e68488dd5ddb07776555fd7e0435c6d021ac1 (patch)
tree: d6cec15baa1ddfee108ef77b315dfdea5e3fa71c /arch/powerpc
parent: ab396e91bfe953db26fa1083d9c3e7a4fbe0334a (diff)
parent: 3b212db9217d02e623eaa12f41c9b5f8c6a99535 (diff)
download: op-kernel-dev-a62e68488dd5ddb07776555fd7e0435c6d021ac1.zip
op-kernel-dev-a62e68488dd5ddb07776555fd7e0435c6d021ac1.tar.gz
16 files changed, 854 insertions, 425 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 28004f0..935d965 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -275,6 +275,7 @@ config PPC_PSERIES
 	select PPC_I8259
 	select PPC_RTAS
 	select RTAS_ERROR_LOGGING
+	select PPC_UDBG_16550
 	default y
 
 config PPC_CHRP
@@ -284,6 +285,7 @@ config PPC_CHRP
 	select PPC_INDIRECT_PCI
 	select PPC_RTAS
 	select PPC_MPC106
+	select PPC_UDBG_16550
 	default y
 
 config PPC_PMAC
@@ -306,6 +308,7 @@ config PPC_PREP
 	depends on PPC_MULTIPLATFORM && PPC32 && BROKEN
 	select PPC_I8259
 	select PPC_INDIRECT_PCI
+	select PPC_UDBG_16550
 	default y
 
 config PPC_MAPLE
@@ -314,6 +317,7 @@ config PPC_MAPLE
 	select U3_DART
 	select MPIC_BROKEN_U3
 	select GENERIC_TBSYNC
+	select PPC_UDBG_16550
 	default n
 	help
           This option enables support for the Maple 970FX Evaluation Board.
@@ -324,6 +328,7 @@ config PPC_CELL
 	depends on PPC_MULTIPLATFORM && PPC64
 	select PPC_RTAS
 	select MMIO_NVRAM
+	select PPC_UDBG_16550
 
 config PPC_OF
 	def_bool y
@@ -370,6 +375,10 @@ config MPIC_BROKEN_U3
 	depends on PPC_MAPLE
 	default y
 
+config PPC_UDBG_16550
+	bool
+	default n
+
 config CELL_IIC
 	depends on PPC_CELL
 	bool
@@ -403,7 +412,7 @@ config PPC_MPC106
 
 config GENERIC_TBSYNC
 	bool
-	default y if CONFIG_PPC32 && CONFIG_SMP
+	default y if PPC32 && SMP
 	default n
 
 source "drivers/cpufreq/Kconfig"
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 17ed501..144e284 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -54,7 +54,7 @@ obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
 obj-$(CONFIG_6xx)		+= idle_6xx.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_SERIAL_8250)	+= legacy_serial.o udbg_16550.o
+obj-$(CONFIG_PPC_UDBG_16550)	+= legacy_serial.o udbg_16550.o
 module-$(CONFIG_PPC64)		+= module_64.o
 obj-$(CONFIG_MODULES)		+= $(module-y)
 
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index fc60a77..ba21a6c 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -381,7 +381,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 	dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
 	dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
 
-	dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/
+	dev->cfg_size = pci_cfg_space_size(dev);
 
 	sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
 		dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index b275814..16d9a90 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -244,7 +244,6 @@ EXPORT_SYMBOL(set_context);
 extern long mol_trampoline;
 EXPORT_SYMBOL(mol_trampoline); /* For MOL */
 EXPORT_SYMBOL(flush_hash_pages); /* For MOL */
-EXPORT_SYMBOL_GPL(__handle_mm_fault); /* For MOL */
 #ifdef CONFIG_SMP
 extern int mmu_hash_lock;
 EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 45b8109..5579f65 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -72,7 +72,7 @@ static int of_device_available(struct device_node * dn)
         return 0;
 }
 
-static int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
 {
 	int returnval = -1;
 	unsigned long buid, addr;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index e5d285a..db72a92 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -299,9 +299,7 @@ void __init setup_arch(char **cmdline_p)
 	if (ppc_md.init_early)
 		ppc_md.init_early();
 
-#ifdef CONFIG_SERIAL_8250
 	find_legacy_serial_ports();
-#endif
 	finish_device_tree();
 
 	smp_setup_cpu_maps();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 81567e9..c4b7696 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -472,9 +472,7 @@ void __init setup_system(void)
 	 * hash table management for us, thus ioremap works. We do that early
 	 * so that further code can be debugged
 	 */
-#ifdef CONFIG_SERIAL_8250
 	find_legacy_serial_ports();
-#endif
 
 	/*
 	 * "Finish" the device-tree, that is do the actual parsing of
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 0ee44be..475249d 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -552,30 +552,6 @@ asmlinkage long compat_sys_sched_rr_get_interval(u32 pid, struct compat_timespec
 	return ret;
 }
 
-asmlinkage int compat_sys_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf)
-{
-	return sys_pciconfig_read((unsigned long) bus,
-				  (unsigned long) dfn,
-				  (unsigned long) off,
-				  (unsigned long) len,
-				  compat_ptr(ubuf));
-}
-
-asmlinkage int compat_sys_pciconfig_write(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf)
-{
-	return sys_pciconfig_write((unsigned long) bus,
-				   (unsigned long) dfn,
-				   (unsigned long) off,
-				   (unsigned long) len,
-				   compat_ptr(ubuf));
-}
-
-asmlinkage int compat_sys_pciconfig_iobase(u32 which, u32 in_bus, u32 in_devfn)
-{
-	return sys_pciconfig_iobase(which, in_bus, in_devfn);
-}
-
-
 /* Note: it is necessary to treat mode as an unsigned int,
  * with the corresponding cast to a signed int to insure that the 
  * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index dd2ab85..6801317 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -239,9 +239,9 @@ SYS32ONLY(ftruncate64)
 SYSX(sys_ni_syscall,sys_stat64,sys_stat64)
 SYSX(sys_ni_syscall,sys_lstat64,sys_lstat64)
 SYSX(sys_ni_syscall,sys_fstat64,sys_fstat64)
-COMPAT_SYS(pciconfig_read)
-COMPAT_SYS(pciconfig_write)
-COMPAT_SYS(pciconfig_iobase)
+SYSCALL(pciconfig_read)
+SYSCALL(pciconfig_write)
+SYSCALL(pciconfig_iobase)
 SYSCALL(ni_syscall)
 SYSCALL(getdents64)
 SYSCALL(pivot_root)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 2ffca63..7b278d83 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -174,7 +174,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size,
 	pa = addr & PAGE_MASK;
 	size = PAGE_ALIGN(addr + size) - pa;
 
-	if (size == 0)
+	if ((size == 0) || (pa == 0))
 		return NULL;
 
 	if (mem_init_done) {
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index dd73e38..a1cb4d2 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -71,9 +71,6 @@
 #define DBG(fmt...)
 #endif
 
-extern void generic_find_legacy_serial_ports(u64 *physport,
-		unsigned int *default_speed);
-
 static void maple_restart(char *cmd)
 {
 	unsigned int maple_nvram_base;
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 6accdd1..61616d1 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_IBMVIO)	+= vio.o
 obj-$(CONFIG_XICS)	+= xics.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
-obj-$(CONFIG_EEH)	+= eeh.o eeh_event.o
+obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o eeh_driver.o eeh_event.o
 
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 7fbfd16..17cea7f 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -76,15 +76,14 @@
  */
 #define EEH_MAX_FAILS	100000
 
-/* Misc forward declaraions */
-static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn);
-
 /* RTAS tokens */
 static int ibm_set_eeh_option;
 static int ibm_set_slot_reset;
 static int ibm_read_slot_reset_state;
 static int ibm_read_slot_reset_state2;
 static int ibm_slot_error_detail;
+static int ibm_get_config_addr_info;
+static int ibm_configure_bridge;
 
 int eeh_subsystem_enabled;
 EXPORT_SYMBOL(eeh_subsystem_enabled);
@@ -98,308 +97,23 @@ static DEFINE_SPINLOCK(slot_errbuf_lock);
 static int eeh_error_buf_size;
 
 /* System monitoring statistics */
-static DEFINE_PER_CPU(unsigned long, no_device);
-static DEFINE_PER_CPU(unsigned long, no_dn);
-static DEFINE_PER_CPU(unsigned long, no_cfg_addr);
-static DEFINE_PER_CPU(unsigned long, ignored_check);
-static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
-static DEFINE_PER_CPU(unsigned long, false_positives);
-static DEFINE_PER_CPU(unsigned long, ignored_failures);
-static DEFINE_PER_CPU(unsigned long, slot_resets);
-
-/**
- * The pci address cache subsystem.  This subsystem places
- * PCI device address resources into a red-black tree, sorted
- * according to the address range, so that given only an i/o
- * address, the corresponding PCI device can be **quickly**
- * found. It is safe to perform an address lookup in an interrupt
- * context; this ability is an important feature.
- *
- * Currently, the only customer of this code is the EEH subsystem;
- * thus, this code has been somewhat tailored to suit EEH better.
- * In particular, the cache does *not* hold the addresses of devices
- * for which EEH is not enabled.
- *
- * (Implementation Note: The RB tree seems to be better/faster
- * than any hash algo I could think of for this problem, even
- * with the penalty of slow pointer chases for d-cache misses).
- */
-struct pci_io_addr_range
-{
-	struct rb_node rb_node;
-	unsigned long addr_lo;
-	unsigned long addr_hi;
-	struct pci_dev *pcidev;
-	unsigned int flags;
-};
-
-static struct pci_io_addr_cache
-{
-	struct rb_root rb_root;
-	spinlock_t piar_lock;
-} pci_io_addr_cache_root;
-
-static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
-{
-	struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
-
-	while (n) {
-		struct pci_io_addr_range *piar;
-		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-
-		if (addr < piar->addr_lo) {
-			n = n->rb_left;
-		} else {
-			if (addr > piar->addr_hi) {
-				n = n->rb_right;
-			} else {
-				pci_dev_get(piar->pcidev);
-				return piar->pcidev;
-			}
-		}
-	}
-
-	return NULL;
-}
-
-/**
- * pci_get_device_by_addr - Get device, given only address
- * @addr: mmio (PIO) phys address or i/o port number
- *
- * Given an mmio phys address, or a port number, find a pci device
- * that implements this address.  Be sure to pci_dev_put the device
- * when finished.  I/O port numbers are assumed to be offset
- * from zero (that is, they do *not* have pci_io_addr added in).
- * It is safe to call this function within an interrupt.
- */
-static struct pci_dev *pci_get_device_by_addr(unsigned long addr)
-{
-	struct pci_dev *dev;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-	dev = __pci_get_device_by_addr(addr);
-	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-	return dev;
-}
-
-#ifdef DEBUG
-/*
- * Handy-dandy debug print routine, does nothing more
- * than print out the contents of our addr cache.
- */
-static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
-{
-	struct rb_node *n;
-	int cnt = 0;
-
-	n = rb_first(&cache->rb_root);
-	while (n) {
-		struct pci_io_addr_range *piar;
-		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-		printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",
-		       (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
-		       piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
-		cnt++;
-		n = rb_next(n);
-	}
-}
-#endif
-
-/* Insert address range into the rb tree. */
-static struct pci_io_addr_range *
-pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
-		      unsigned long ahi, unsigned int flags)
-{
-	struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
-	struct rb_node *parent = NULL;
-	struct pci_io_addr_range *piar;
-
-	/* Walk tree, find a place to insert into tree */
-	while (*p) {
-		parent = *p;
-		piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
-		if (ahi < piar->addr_lo) {
-			p = &parent->rb_left;
-		} else if (alo > piar->addr_hi) {
-			p = &parent->rb_right;
-		} else {
-			if (dev != piar->pcidev ||
-			    alo != piar->addr_lo || ahi != piar->addr_hi) {
-				printk(KERN_WARNING "PIAR: overlapping address range\n");
-			}
-			return piar;
-		}
-	}
-	piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
-	if (!piar)
-		return NULL;
+static unsigned long no_device;
+static unsigned long no_dn;
+static unsigned long no_cfg_addr;
+static unsigned long ignored_check;
+static unsigned long total_mmio_ffs;
+static unsigned long false_positives;
+static unsigned long ignored_failures;
+static unsigned long slot_resets;
 
-	piar->addr_lo = alo;
-	piar->addr_hi = ahi;
-	piar->pcidev = dev;
-	piar->flags = flags;
-
-#ifdef DEBUG
-	printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",
-	                  alo, ahi, pci_name (dev));
-#endif
-
-	rb_link_node(&piar->rb_node, parent, p);
-	rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
-
-	return piar;
-}
-
-static void __pci_addr_cache_insert_device(struct pci_dev *dev)
-{
-	struct device_node *dn;
-	struct pci_dn *pdn;
-	int i;
-	int inserted = 0;
-
-	dn = pci_device_to_OF_node(dev);
-	if (!dn) {
-		printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev));
-		return;
-	}
-
-	/* Skip any devices for which EEH is not enabled. */
-	pdn = PCI_DN(dn);
-	if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
-	    pdn->eeh_mode & EEH_MODE_NOCHECK) {
-#ifdef DEBUG
-		printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",
-		       pci_name(dev), pdn->node->full_name);
-#endif
-		return;
-	}
-
-	/* The cache holds a reference to the device... */
-	pci_dev_get(dev);
-
-	/* Walk resources on this device, poke them into the tree */
-	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-		unsigned long start = pci_resource_start(dev,i);
-		unsigned long end = pci_resource_end(dev,i);
-		unsigned int flags = pci_resource_flags(dev,i);
-
-		/* We are interested only bus addresses, not dma or other stuff */
-		if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
-			continue;
-		if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
-			 continue;
-		pci_addr_cache_insert(dev, start, end, flags);
-		inserted = 1;
-	}
-
-	/* If there was nothing to add, the cache has no reference... */
-	if (!inserted)
-		pci_dev_put(dev);
-}
-
-/**
- * pci_addr_cache_insert_device - Add a device to the address cache
- * @dev: PCI device whose I/O addresses we are interested in.
- *
- * In order to support the fast lookup of devices based on addresses,
- * we maintain a cache of devices that can be quickly searched.
- * This routine adds a device to that cache.
- */
-static void pci_addr_cache_insert_device(struct pci_dev *dev)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-	__pci_addr_cache_insert_device(dev);
-	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-}
-
-static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
-{
-	struct rb_node *n;
-	int removed = 0;
-
-restart:
-	n = rb_first(&pci_io_addr_cache_root.rb_root);
-	while (n) {
-		struct pci_io_addr_range *piar;
-		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-
-		if (piar->pcidev == dev) {
-			rb_erase(n, &pci_io_addr_cache_root.rb_root);
-			removed = 1;
-			kfree(piar);
-			goto restart;
-		}
-		n = rb_next(n);
-	}
-
-	/* The cache no longer holds its reference to this device... */
-	if (removed)
-		pci_dev_put(dev);
-}
-
-/**
- * pci_addr_cache_remove_device - remove pci device from addr cache
- * @dev: device to remove
- *
- * Remove a device from the addr-cache tree.
- * This is potentially expensive, since it will walk
- * the tree multiple times (once per resource).
- * But so what; device removal doesn't need to be that fast.
- */
-static void pci_addr_cache_remove_device(struct pci_dev *dev)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-	__pci_addr_cache_remove_device(dev);
-	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-}
-
-/**
- * pci_addr_cache_build - Build a cache of I/O addresses
- *
- * Build a cache of pci i/o addresses.  This cache will be used to
- * find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scaned for devices (after all device resources are known).
- */
-void __init pci_addr_cache_build(void)
-{
-	struct device_node *dn;
-	struct pci_dev *dev = NULL;
-
-	if (!eeh_subsystem_enabled)
-		return;
-
-	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
-
-	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
-		/* Ignore PCI bridges ( XXX why ??) */
-		if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
-			continue;
-		}
-		pci_addr_cache_insert_device(dev);
-
-		/* Save the BAR's; firmware doesn't restore these after EEH reset */
-		dn = pci_device_to_OF_node(dev);
-		eeh_save_bars(dev, PCI_DN(dn));
-	}
-
-#ifdef DEBUG
-	/* Verify tree built up above, echo back the list of addrs. */
-	pci_addr_cache_print(&pci_io_addr_cache_root);
-#endif
-}
+#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
 
 /* --------------------------------------------------------------- */
-/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */
+/* Below lies the EEH event infrastructure */
 
 void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
 {
+	int config_addr;
 	unsigned long flags;
 	int rc;
 
@@ -407,8 +121,13 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
 	spin_lock_irqsave(&slot_errbuf_lock, flags);
 	memset(slot_errbuf, 0, eeh_error_buf_size);
 
+	/* Use PE configuration address, if present */
+	config_addr = pdn->eeh_config_addr;
+	if (pdn->eeh_pe_config_addr)
+		config_addr = pdn->eeh_pe_config_addr;
+
 	rc = rtas_call(ibm_slot_error_detail,
-	               8, 1, NULL, pdn->eeh_config_addr,
+	               8, 1, NULL, config_addr,
 	               BUID_HI(pdn->phb->buid),
 	               BUID_LO(pdn->phb->buid), NULL, 0,
 	               virt_to_phys(slot_errbuf),
@@ -428,6 +147,7 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
 static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
 {
 	int token, outputs;
+	int config_addr;
 
 	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
 		token = ibm_read_slot_reset_state2;
@@ -438,7 +158,12 @@ static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
 		outputs = 3;
 	}
 
-	return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr,
+	/* Use PE configuration address, if present */
+	config_addr = pdn->eeh_config_addr;
+	if (pdn->eeh_pe_config_addr)
+		config_addr = pdn->eeh_pe_config_addr;
+
+	return rtas_call(token, 3, outputs, rets, config_addr,
 			 BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
 }
 
@@ -462,7 +187,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
 /** 
  * Return the "partitionable endpoint" (pe) under which this device lies
  */
-static struct device_node * find_device_pe(struct device_node *dn)
+struct device_node * find_device_pe(struct device_node *dn)
 {
 	while ((dn->parent) && PCI_DN(dn->parent) &&
 	      (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
@@ -485,6 +210,11 @@ static void __eeh_mark_slot (struct device_node *dn, int mode_flag)
 		if (PCI_DN(dn)) {
 			PCI_DN(dn)->eeh_mode |= mode_flag;
 
+			/* Mark the pci device driver too */
+			struct pci_dev *dev = PCI_DN(dn)->pcidev;
+			if (dev && dev->driver)
+				dev->error_state = pci_channel_io_frozen;
+
 			if (dn->child)
 				__eeh_mark_slot (dn->child, mode_flag);
 		}
@@ -495,6 +225,11 @@ static void __eeh_mark_slot (struct device_node *dn, int mode_flag)
 void eeh_mark_slot (struct device_node *dn, int mode_flag)
 {
 	dn = find_device_pe (dn);
+
+	/* Back up one, since config addrs might be shared */
+	if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr)
+		dn = dn->parent;
+
 	PCI_DN(dn)->eeh_mode |= mode_flag;
 	__eeh_mark_slot (dn->child, mode_flag);
 }
@@ -516,7 +251,13 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&confirm_error_lock, flags);
+	
 	dn = find_device_pe (dn);
+	
+	/* Back up one, since config addrs might be shared */
+	if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr)
+		dn = dn->parent;
+
 	PCI_DN(dn)->eeh_mode &= ~mode_flag;
 	PCI_DN(dn)->eeh_check_count = 0;
 	__eeh_clear_slot (dn->child, mode_flag);
@@ -544,15 +285,16 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	int rets[3];
 	unsigned long flags;
 	struct pci_dn *pdn;
+	enum pci_channel_state state;
 	int rc = 0;
 
-	__get_cpu_var(total_mmio_ffs)++;
+	total_mmio_ffs++;
 
 	if (!eeh_subsystem_enabled)
 		return 0;
 
 	if (!dn) {
-		__get_cpu_var(no_dn)++;
+		no_dn++;
 		return 0;
 	}
 	pdn = PCI_DN(dn);
@@ -560,7 +302,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	/* Access to IO BARs might get this far and still not want checking. */
 	if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
 	    pdn->eeh_mode & EEH_MODE_NOCHECK) {
-		__get_cpu_var(ignored_check)++;
+		ignored_check++;
 #ifdef DEBUG
 		printk ("EEH:ignored check (%x) for %s %s\n", 
 		        pdn->eeh_mode, pci_name (dev), dn->full_name);
@@ -568,8 +310,8 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 		return 0;
 	}
 
-	if (!pdn->eeh_config_addr) {
-		__get_cpu_var(no_cfg_addr)++;
+	if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) {
+		no_cfg_addr++;
 		return 0;
 	}
 
@@ -611,7 +353,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	if (ret != 0) {
 		printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
 		       ret, dn->full_name);
-		__get_cpu_var(false_positives)++;
+		false_positives++;
 		rc = 0;
 		goto dn_unlock;
 	}
@@ -620,14 +362,14 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	if (rets[1] != 1) {
 		printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
 		       ret, dn->full_name);
-		__get_cpu_var(false_positives)++;
+		false_positives++;
 		rc = 0;
 		goto dn_unlock;
 	}
 
 	/* If not the kind of error we know about, punt. */
 	if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
-		__get_cpu_var(false_positives)++;
+		false_positives++;
 		rc = 0;
 		goto dn_unlock;
 	}
@@ -635,12 +377,12 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	/* Note that config-io to empty slots may fail;
 	 * we recognize empty because they don't have children. */
 	if ((rets[0] == 5) && (dn->child == NULL)) {
-		__get_cpu_var(false_positives)++;
+		false_positives++;
 		rc = 0;
 		goto dn_unlock;
 	}
 
-	__get_cpu_var(slot_resets)++;
+	slot_resets++;
  
 	/* Avoid repeated reports of this failure, including problems
 	 * with other functions on this device, and functions under
@@ -648,8 +390,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	eeh_mark_slot (dn, EEH_MODE_ISOLATED);
 	spin_unlock_irqrestore(&confirm_error_lock, flags);
 
-	eeh_send_failure_event (dn, dev, rets[0], rets[2]);
-	
+	state = pci_channel_io_normal;
+	if ((rets[0] == 2) || (rets[0] == 4))
+		state = pci_channel_io_frozen;
+	if (rets[0] == 5)
+		state = pci_channel_io_perm_failure;
+	eeh_send_failure_event (dn, dev, state, rets[2]);
+
 	/* Most EEH events are due to device driver bugs.  Having
 	 * a stack trace will help the device-driver authors figure
 	 * out what happened.  So print that out. */
@@ -685,7 +432,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
 	addr = eeh_token_to_phys((unsigned long __force) token);
 	dev = pci_get_device_by_addr(addr);
 	if (!dev) {
-		__get_cpu_var(no_device)++;
+		no_device++;
 		return val;
 	}
 
@@ -716,11 +463,16 @@ eeh_slot_availability(struct pci_dn *pdn)
 	if (rc) return rc;
 
 	if (rets[1] == 0) return -1;  /* EEH is not supported */
-	if (rets[0] == 0)  return 0;  /* Oll Korrect */
+	if (rets[0] == 0) return 0;   /* Oll Korrect */
 	if (rets[0] == 5) {
 		if (rets[2] == 0) return -1; /* permanently unavailable */
 		return rets[2]; /* number of millisecs to wait */
 	}
+	if (rets[0] == 1)
+		return 250;
+
+	printk (KERN_ERR "EEH: Slot unavailable: rc=%d, rets=%d %d %d\n",
+		rc, rets[0], rets[1], rets[2]);
 	return -1;
 }
 
@@ -737,6 +489,7 @@ eeh_slot_availability(struct pci_dn *pdn)
 static void
 rtas_pci_slot_reset(struct pci_dn *pdn, int state)
 {
+	int config_addr;
 	int rc;
 
 	BUG_ON (pdn==NULL); 
@@ -747,8 +500,13 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
 		return;
 	}
 
+	/* Use PE configuration address, if present */
+	config_addr = pdn->eeh_config_addr;
+	if (pdn->eeh_pe_config_addr)
+		config_addr = pdn->eeh_pe_config_addr;
+
 	rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
-	               pdn->eeh_config_addr,
+	               config_addr,
 	               BUID_HI(pdn->phb->buid),
 	               BUID_LO(pdn->phb->buid),
 	               state);
@@ -761,9 +519,11 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
 
 /** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
  *  dn -- device node to be reset.
+ *
+ *  Return 0 if success, else a non-zero value.
  */
 
-void
+int
 rtas_set_slot_reset(struct pci_dn *pdn)
 {
 	int i, rc;
@@ -793,10 +553,21 @@ rtas_set_slot_reset(struct pci_dn *pdn)
 	 * ready to be used; if not, wait for recovery. */
 	for (i=0; i<10; i++) {
 		rc = eeh_slot_availability (pdn);
-		if (rc <= 0) break;
+		if (rc < 0)
+			printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n", rc, pdn->node->full_name);
+		if (rc == 0)
+			return 0;
+		if (rc < 0)
+			return -1;
 
 		msleep (rc+100);
 	}
+
+	rc = eeh_slot_availability (pdn);
+	if (rc)
+		printk (KERN_ERR "EEH: timeout resetting slot %s\n", pdn->node->full_name);
+
+	return rc;
 }
 
 /* ------------------------------------------------------- */
@@ -851,7 +622,7 @@ void eeh_restore_bars(struct pci_dn *pdn)
 	if (!pdn) 
 		return;
 	
-	if (! pdn->eeh_is_bridge)
+	if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
 		__restore_bars (pdn);
 
 	dn = pdn->node->child;
@@ -869,30 +640,30 @@ void eeh_restore_bars(struct pci_dn *pdn)
  * PCI devices are added individuallly; but, for the restore,
  * an entire slot is reset at a time.
  */
-static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn)
+static void eeh_save_bars(struct pci_dn *pdn)
 {
 	int i;
 
-	if (!pdev || !pdn )
+	if (!pdn )
 		return;
 	
 	for (i = 0; i < 16; i++)
-		pci_read_config_dword(pdev, i * 4, &pdn->config_space[i]);
-
-	if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
-		pdn->eeh_is_bridge = 1;
+		rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]);
 }
 
 void
 rtas_configure_bridge(struct pci_dn *pdn)
 {
-	int token = rtas_token ("ibm,configure-bridge");
+	int config_addr;
 	int rc;
 
-	if (token == RTAS_UNKNOWN_SERVICE)
-		return;
-	rc = rtas_call(token,3,1, NULL,
-	               pdn->eeh_config_addr,
+	/* Use PE configuration address, if present */
+	config_addr = pdn->eeh_config_addr;
+	if (pdn->eeh_pe_config_addr)
+		config_addr = pdn->eeh_pe_config_addr;
+
+	rc = rtas_call(ibm_configure_bridge,3,1, NULL,
+	               config_addr,
 	               BUID_HI(pdn->phb->buid),
 	               BUID_LO(pdn->phb->buid));
 	if (rc) {
@@ -927,6 +698,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
 	int enable;
 	struct pci_dn *pdn = PCI_DN(dn);
 
+	pdn->class_code = 0;
 	pdn->eeh_mode = 0;
 	pdn->eeh_check_count = 0;
 	pdn->eeh_freeze_count = 0;
@@ -943,6 +715,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
 		pdn->eeh_mode |= EEH_MODE_NOCHECK;
 		return NULL;
 	}
+	pdn->class_code = *class_code;
 
 	/*
 	 * Now decide if we are going to "Disable" EEH checking
@@ -953,8 +726,10 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
 	 * But there are a few cases like display devices that make sense.
 	 */
 	enable = 1;	/* i.e. we will do checking */
+#if 0
 	if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
 		enable = 0;
+#endif
 
 	if (!enable)
 		pdn->eeh_mode |= EEH_MODE_NOCHECK;
@@ -973,8 +748,22 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
 			eeh_subsystem_enabled = 1;
 			pdn->eeh_mode |= EEH_MODE_SUPPORTED;
 			pdn->eeh_config_addr = regs[0];
+
+			/* If the newer, better, ibm,get-config-addr-info is supported, 
+			 * then use that instead. */
+			pdn->eeh_pe_config_addr = 0;
+			if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+				unsigned int rets[2];
+				ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets, 
+					pdn->eeh_config_addr, 
+					info->buid_hi, info->buid_lo,
+					0);
+				if (ret == 0)
+					pdn->eeh_pe_config_addr = rets[0];
+			}
 #ifdef DEBUG
-			printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);
+			printk(KERN_DEBUG "EEH: %s: eeh enabled, config=%x pe_config=%x\n",
+			       dn->full_name, pdn->eeh_config_addr, pdn->eeh_pe_config_addr);
 #endif
 		} else {
 
@@ -993,6 +782,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
 		       dn->full_name);
 	}
 
+	eeh_save_bars(pdn);
 	return NULL;
 }
 
@@ -1026,6 +816,8 @@ void __init eeh_init(void)
 	ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
 	ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
 	ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
+	ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
+	ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
 
 	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
 		return;
@@ -1080,12 +872,10 @@ void eeh_add_device_early(struct device_node *dn)
 	if (!dn || !PCI_DN(dn))
 		return;
 	phb = PCI_DN(dn)->phb;
-	if (NULL == phb || 0 == phb->buid) {
-		printk(KERN_WARNING "EEH: Expected buid but found none for %s\n",
-		       dn->full_name);
-		dump_stack();
+
+	/* USB Bus children of PCI devices will not have BUID's */
+	if (NULL == phb || 0 == phb->buid)
 		return;
-	}
 
 	info.buid_hi = BUID_HI(phb->buid);
 	info.buid_lo = BUID_LO(phb->buid);
@@ -1127,7 +917,6 @@ void eeh_add_device_late(struct pci_dev *dev)
 	pdn->pcidev = dev;
 
 	pci_addr_cache_insert_device (dev);
-	eeh_save_bars(dev, pdn);
 }
 EXPORT_SYMBOL_GPL(eeh_add_device_late);
 
@@ -1175,25 +964,9 @@ EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
 
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
-	unsigned int cpu;
-	unsigned long ffs = 0, positives = 0, failures = 0;
-	unsigned long resets = 0;
-	unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0;
-
-	for_each_cpu(cpu) {
-		ffs += per_cpu(total_mmio_ffs, cpu);
-		positives += per_cpu(false_positives, cpu);
-		failures += per_cpu(ignored_failures, cpu);
-		resets += per_cpu(slot_resets, cpu);
-		no_dev += per_cpu(no_device, cpu);
-		no_dn += per_cpu(no_dn, cpu);
-		no_cfg += per_cpu(no_cfg_addr, cpu);
-		no_check += per_cpu(ignored_check, cpu);
-	}
-
 	if (0 == eeh_subsystem_enabled) {
 		seq_printf(m, "EEH Subsystem is globally disabled\n");
-		seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
+		seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
 	} else {
 		seq_printf(m, "EEH Subsystem is enabled\n");
 		seq_printf(m,
@@ -1205,8 +978,10 @@ static int proc_eeh_show(struct seq_file *m, void *v)
 				"eeh_false_positives=%ld\n"
 				"eeh_ignored_failures=%ld\n"
 				"eeh_slot_resets=%ld\n",
-				no_dev, no_dn, no_cfg, no_check,
-				ffs, positives, failures, resets);
+				no_device, no_dn, no_cfg_addr, 
+				ignored_check, total_mmio_ffs, 
+				false_positives, ignored_failures, 
+				slot_resets);
 	}
 
 	return 0;
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
new file mode 100644
index 0000000..d4a402c
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -0,0 +1,316 @@
+/*
+ * eeh_cache.c
+ * PCI address cache; allows the lookup of PCI devices based on I/O address
+ *
+ * Copyright (C) 2004 Linas Vepstas <linas@austin.ibm.com> IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+#undef DEBUG
+
+/**
+ * The pci address cache subsystem.  This subsystem places
+ * PCI device address resources into a red-black tree, sorted
+ * according to the address range, so that given only an i/o
+ * address, the corresponding PCI device can be **quickly**
+ * found. It is safe to perform an address lookup in an interrupt
+ * context; this ability is an important feature.
+ *
+ * Currently, the only customer of this code is the EEH subsystem;
+ * thus, this code has been somewhat tailored to suit EEH better.
+ * In particular, the cache does *not* hold the addresses of devices
+ * for which EEH is not enabled.
+ *
+ * (Implementation Note: The RB tree seems to be better/faster
+ * than any hash algo I could think of for this problem, even
+ * with the penalty of slow pointer chases for d-cache misses).
+ */
+struct pci_io_addr_range
+{
+	struct rb_node rb_node;
+	unsigned long addr_lo;
+	unsigned long addr_hi;
+	struct pci_dev *pcidev;
+	unsigned int flags;
+};
+
+static struct pci_io_addr_cache
+{
+	struct rb_root rb_root;
+	spinlock_t piar_lock;
+} pci_io_addr_cache_root;
+
+static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
+{
+	struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
+
+	while (n) {
+		struct pci_io_addr_range *piar;
+		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+		if (addr < piar->addr_lo) {
+			n = n->rb_left;
+		} else {
+			if (addr > piar->addr_hi) {
+				n = n->rb_right;
+			} else {
+				pci_dev_get(piar->pcidev);
+				return piar->pcidev;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * pci_get_device_by_addr - Get device, given only address
+ * @addr: mmio (PIO) phys address or i/o port number
+ *
+ * Given an mmio phys address, or a port number, find a pci device
+ * that implements this address.  Be sure to pci_dev_put the device
+ * when finished.  I/O port numbers are assumed to be offset
+ * from zero (that is, they do *not* have pci_io_addr added in).
+ * It is safe to call this function within an interrupt.
+ */
+struct pci_dev *pci_get_device_by_addr(unsigned long addr)
+{
+	struct pci_dev *dev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+	dev = __pci_get_device_by_addr(addr);
+	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+	return dev;
+}
+
+#ifdef DEBUG
+/*
+ * Handy-dandy debug print routine, does nothing more
+ * than print out the contents of our addr cache.
+ */
+static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
+{
+	struct rb_node *n;
+	int cnt = 0;
+
+	n = rb_first(&cache->rb_root);
+	while (n) {
+		struct pci_io_addr_range *piar;
+		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+		printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",
+		       (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
+		       piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
+		cnt++;
+		n = rb_next(n);
+	}
+}
+#endif
+
+/* Insert address range into the rb tree. */
+static struct pci_io_addr_range *
+pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
+		      unsigned long ahi, unsigned int flags)
+{
+	struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct pci_io_addr_range *piar;
+
+	/* Walk tree, find a place to insert into tree */
+	while (*p) {
+		parent = *p;
+		piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
+		if (ahi < piar->addr_lo) {
+			p = &parent->rb_left;
+		} else if (alo > piar->addr_hi) {
+			p = &parent->rb_right;
+		} else {
+			if (dev != piar->pcidev ||
+			    alo != piar->addr_lo || ahi != piar->addr_hi) {
+				printk(KERN_WARNING "PIAR: overlapping address range\n");
+			}
+			return piar;
+		}
+	}
+	piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
+	if (!piar)
+		return NULL;
+
+	piar->addr_lo = alo;
+	piar->addr_hi = ahi;
+	piar->pcidev = dev;
+	piar->flags = flags;
+
+#ifdef DEBUG
+	printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",
+	                  alo, ahi, pci_name (dev));
+#endif
+
+	rb_link_node(&piar->rb_node, parent, p);
+	rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
+
+	return piar;
+}
+
+static void __pci_addr_cache_insert_device(struct pci_dev *dev)
+{
+	struct device_node *dn;
+	struct pci_dn *pdn;
+	int i;
+	int inserted = 0;
+
+	dn = pci_device_to_OF_node(dev);
+	if (!dn) {
+		printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev));
+		return;
+	}
+
+	/* Skip any devices for which EEH is not enabled. */
+	pdn = PCI_DN(dn);
+	if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
+	    pdn->eeh_mode & EEH_MODE_NOCHECK) {
+#ifdef DEBUG
+		printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",
+		       pci_name(dev), pdn->node->full_name);
+#endif
+		return;
+	}
+
+	/* The cache holds a reference to the device... */
+	pci_dev_get(dev);
+
+	/* Walk resources on this device, poke them into the tree */
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+		unsigned long start = pci_resource_start(dev,i);
+		unsigned long end = pci_resource_end(dev,i);
+		unsigned int flags = pci_resource_flags(dev,i);
+
+		/* We are interested only bus addresses, not dma or other stuff */
+		if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+			continue;
+		if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
+			 continue;
+		pci_addr_cache_insert(dev, start, end, flags);
+		inserted = 1;
+	}
+
+	/* If there was nothing to add, the cache has no reference... */
+	if (!inserted)
+		pci_dev_put(dev);
+}
+
+/**
+ * pci_addr_cache_insert_device - Add a device to the address cache
+ * @dev: PCI device whose I/O addresses we are interested in.
+ *
+ * In order to support the fast lookup of devices based on addresses,
+ * we maintain a cache of devices that can be quickly searched.
+ * This routine adds a device to that cache.
+ */
+void pci_addr_cache_insert_device(struct pci_dev *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+	__pci_addr_cache_insert_device(dev);
+	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
+{
+	struct rb_node *n;
+	int removed = 0;
+
+restart:
+	n = rb_first(&pci_io_addr_cache_root.rb_root);
+	while (n) {
+		struct pci_io_addr_range *piar;
+		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+		if (piar->pcidev == dev) {
+			rb_erase(n, &pci_io_addr_cache_root.rb_root);
+			removed = 1;
+			kfree(piar);
+			goto restart;
+		}
+		n = rb_next(n);
+	}
+
+	/* The cache no longer holds its reference to this device... */
+	if (removed)
+		pci_dev_put(dev);
+}
+
+/**
+ * pci_addr_cache_remove_device - remove pci device from addr cache
+ * @dev: device to remove
+ *
+ * Remove a device from the addr-cache tree.
+ * This is potentially expensive, since it will walk
+ * the tree multiple times (once per resource).
+ * But so what; device removal doesn't need to be that fast.
+ */
+void pci_addr_cache_remove_device(struct pci_dev *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+	__pci_addr_cache_remove_device(dev);
+	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+/**
+ * pci_addr_cache_build - Build a cache of I/O addresses
+ *
+ * Build a cache of pci i/o addresses.  This cache will be used to
+ * find the pci device that corresponds to a given address.
+ * This routine scans all pci busses to build the cache.
+ * Must be run late in boot process, after the pci controllers
+ * have been scaned for devices (after all device resources are known).
+ */
+void __init pci_addr_cache_build(void)
+{
+	struct device_node *dn;
+	struct pci_dev *dev = NULL;
+
+	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
+
+	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		/* Ignore PCI bridges */
+		if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
+			continue;
+
+		pci_addr_cache_insert_device(dev);
+
+		dn = pci_device_to_OF_node(dev);
+		pci_dev_get (dev);  /* matching put is in eeh_remove_device() */
+		PCI_DN(dn)->pcidev = dev;
+	}
+
+#ifdef DEBUG
+	/* Verify tree built up above, echo back the list of addrs. */
+	pci_addr_cache_print(&pci_io_addr_cache_root);
+#endif
+}
+
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
new file mode 100644
index 0000000..6373372
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -0,0 +1,376 @@
+/*
+ * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
+ * Copyright (C) 2004, 2005 Linas Vepstas <linas@linas.org>
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <linas@us.ibm.com>
+ *
+ */
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+
+
+static inline const char * pcid_name (struct pci_dev *pdev)
+{
+	if (pdev->dev.driver)
+		return pdev->dev.driver->name;
+	return "";
+}
+
+#ifdef DEBUG
+static void print_device_node_tree (struct pci_dn *pdn, int dent)
+{
+	int i;
+	if (!pdn) return;
+	for (i=0;i<dent; i++)
+		printk(" ");
+	printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
+		pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
+		pdn->eeh_pe_config_addr, pdn->node->full_name);
+	dent += 3;
+	struct device_node *pc = pdn->node->child;
+	while (pc) {
+		print_device_node_tree(PCI_DN(pc), dent);
+		pc = pc->sibling;
+	}
+}
+#endif
+
+/** 
+ * irq_in_use - return true if this irq is being used 
+ */
+static int irq_in_use(unsigned int irq)
+{
+	int rc = 0;
+	unsigned long flags;
+   struct irq_desc *desc = irq_desc + irq;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	if (desc->action)
+		rc = 1;
+	spin_unlock_irqrestore(&desc->lock, flags);
+	return rc;
+}
+
+/* ------------------------------------------------------- */
+/** eeh_report_error - report an EEH error to each device,
+ *  collect up and merge the device responses.
+ */
+
+static void eeh_report_error(struct pci_dev *dev, void *userdata)
+{
+	enum pci_ers_result rc, *res = userdata;
+	struct pci_driver *driver = dev->driver;
+
+	dev->error_state = pci_channel_io_frozen;
+
+	if (!driver)
+		return;
+
+	if (irq_in_use (dev->irq)) {
+		struct device_node *dn = pci_device_to_OF_node(dev);
+		PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
+		disable_irq_nosync(dev->irq);
+	}
+	if (!driver->err_handler)
+		return;
+	if (!driver->err_handler->error_detected)
+		return;
+
+	rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);
+	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+	if (*res == PCI_ERS_RESULT_NEED_RESET) return;
+	if (*res == PCI_ERS_RESULT_DISCONNECT &&
+	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+}
+
+/** eeh_report_reset -- tell this device that the pci slot
+ *  has been reset.
+ */
+
+static void eeh_report_reset(struct pci_dev *dev, void *userdata)
+{
+	struct pci_driver *driver = dev->driver;
+	struct device_node *dn = pci_device_to_OF_node(dev);
+
+	if (!driver)
+		return;
+
+	if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) {
+		PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
+		enable_irq(dev->irq);
+	}
+	if (!driver->err_handler)
+		return;
+	if (!driver->err_handler->slot_reset)
+		return;
+
+	driver->err_handler->slot_reset(dev);
+}
+
+static void eeh_report_resume(struct pci_dev *dev, void *userdata)
+{
+	struct pci_driver *driver = dev->driver;
+
+	dev->error_state = pci_channel_io_normal;
+
+	if (!driver)
+		return;
+	if (!driver->err_handler)
+		return;
+	if (!driver->err_handler->resume)
+		return;
+
+	driver->err_handler->resume(dev);
+}
+
+static void eeh_report_failure(struct pci_dev *dev, void *userdata)
+{
+	struct pci_driver *driver = dev->driver;
+
+	dev->error_state = pci_channel_io_perm_failure;
+
+	if (!driver)
+		return;
+
+	if (irq_in_use (dev->irq)) {
+		struct device_node *dn = pci_device_to_OF_node(dev);
+		PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
+		disable_irq_nosync(dev->irq);
+	}
+	if (!driver->err_handler)
+		return;
+	if (!driver->err_handler->error_detected)
+		return;
+	driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
+}
+
+/* ------------------------------------------------------- */
+/**
+ * handle_eeh_events -- reset a PCI device after hard lockup.
+ *
+ * pSeries systems will isolate a PCI slot if the PCI-Host
+ * bridge detects address or data parity errors, DMA's
+ * occuring to wild addresses (which usually happen due to
+ * bugs in device drivers or in PCI adapter firmware).
+ * Slot isolations also occur if #SERR, #PERR or other misc
+ * PCI-related errors are detected.
+ *
+ * Recovery process consists of unplugging the device driver
+ * (which generated hotplug events to userspace), then issuing
+ * a PCI #RST to the device, then reconfiguring the PCI config
+ * space for all bridges & devices under this slot, and then
+ * finally restarting the device drivers (which cause a second
+ * set of hotplug events to go out to userspace).
+ */
+
+/**
+ * eeh_reset_device() -- perform actual reset of a pci slot
+ * Args: bus: pointer to the pci bus structure corresponding
+ *            to the isolated slot. A non-null value will
+ *            cause all devices under the bus to be removed
+ *            and then re-added.
+ *     pe_dn: pointer to a "Partionable Endpoint" device node.
+ *            This is the top-level structure on which pci
+ *            bus resets can be performed.
+ */
+
+static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
+{
+	int rc;
+	if (bus)
+		pcibios_remove_pci_devices(bus);
+
+	/* Reset the pci controller. (Asserts RST#; resets config space).
+	 * Reconfigure bridges and devices. Don't try to bring the system
+	 * up if the reset failed for some reason. */
+	rc = rtas_set_slot_reset(pe_dn);
+	if (rc)
+		return rc;
+
+ 	/* New-style config addrs might be shared across multiple devices,
+ 	 * Walk over all functions on this device */
+ 	if (pe_dn->eeh_pe_config_addr) {
+ 		struct device_node *pe = pe_dn->node;
+ 		pe = pe->parent->child;
+ 		while (pe) {
+ 			struct pci_dn *ppe = PCI_DN(pe);
+ 			if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) {
+ 				rtas_configure_bridge(ppe);
+ 				eeh_restore_bars(ppe);
+ 			}
+ 			pe = pe->sibling;
+ 		}
+ 	} else {
+ 		rtas_configure_bridge(pe_dn);
+ 		eeh_restore_bars(pe_dn);
+ 	}
+
+	/* Give the system 5 seconds to finish running the user-space
+	 * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes, 
+	 * this is a hack, but if we don't do this, and try to bring 
+	 * the device up before the scripts have taken it down, 
+	 * potentially weird things happen.
+	 */
+	if (bus) {
+		ssleep (5);
+		pcibios_add_pci_devices(bus);
+	}
+
+	return 0;
+}
+
+/* The longest amount of time to wait for a pci device
+ * to come back on line, in seconds.
+ */
+#define MAX_WAIT_FOR_RECOVERY 15
+
+void handle_eeh_events (struct eeh_event *event)
+{
+	struct device_node *frozen_dn;
+	struct pci_dn *frozen_pdn;
+	struct pci_bus *frozen_bus;
+	int rc = 0;
+	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
+
+	frozen_dn = find_device_pe(event->dn);
+	frozen_bus = pcibios_find_pci_bus(frozen_dn);
+
+	if (!frozen_dn) {
+		printk(KERN_ERR "EEH: Error: Cannot find partition endpoint for %s\n",
+		        pci_name(event->dev));
+		return;
+	}
+
+	/* There are two different styles for coming up with the PE.
+	 * In the old style, it was the highest EEH-capable device
+	 * which was always an EADS pci bridge.  In the new style,
+	 * there might not be any EADS bridges, and even when there are,
+	 * the firmware marks them as "EEH incapable". So another
+	 * two-step is needed to find the pci bus.. */
+	if (!frozen_bus)
+		frozen_bus = pcibios_find_pci_bus (frozen_dn->parent);
+
+	if (!frozen_bus) {
+		printk(KERN_ERR "EEH: Cannot find PCI bus for %s\n",
+		        frozen_dn->full_name);
+		return;
+	}
+
+#if 0
+	/* We may get "permanent failure" messages on empty slots.
+	 * These are false alarms. Empty slots have no child dn. */
+	if ((event->state == pci_channel_io_perm_failure) && (frozen_device == NULL))
+		return;
+#endif
+
+	frozen_pdn = PCI_DN(frozen_dn);
+	frozen_pdn->eeh_freeze_count++;
+	
+	if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
+		goto hard_fail;
+
+	/* If the reset state is a '5' and the time to reset is 0 (infinity)
+	 * or is more then 15 seconds, then mark this as a permanent failure.
+	 */
+	if ((event->state == pci_channel_io_perm_failure) &&
+	    ((event->time_unavail <= 0) ||
+	     (event->time_unavail > MAX_WAIT_FOR_RECOVERY*1000)))
+		goto hard_fail;
+
+	eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */);
+	printk(KERN_WARNING
+	   "EEH: This PCI device has failed %d times since last reboot: %s - %s\n",
+		frozen_pdn->eeh_freeze_count,
+		pci_name (frozen_pdn->pcidev), 
+		pcid_name(frozen_pdn->pcidev));
+
+	/* Walk the various device drivers attached to this slot through
+	 * a reset sequence, giving each an opportunity to do what it needs
+	 * to accomplish the reset.  Each child gets a report of the
+	 * status ... if any child can't handle the reset, then the entire
+	 * slot is dlpar removed and added.
+	 */
+	pci_walk_bus(frozen_bus, eeh_report_error, &result);
+
+	/* If all device drivers were EEH-unaware, then shut
+	 * down all of the device drivers, and hope they
+	 * go down willingly, without panicing the system.
+	 */
+	if (result == PCI_ERS_RESULT_NONE) {
+		rc = eeh_reset_device(frozen_pdn, frozen_bus);
+		if (rc)
+			goto hard_fail;
+	}
+
+	/* If any device called out for a reset, then reset the slot */
+	if (result == PCI_ERS_RESULT_NEED_RESET) {
+		rc = eeh_reset_device(frozen_pdn, NULL);
+		if (rc)
+			goto hard_fail;
+		pci_walk_bus(frozen_bus, eeh_report_reset, 0);
+	}
+
+	/* If all devices reported they can proceed, the re-enable PIO */
+	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+		/* XXX Not supported; we brute-force reset the device */
+		rc = eeh_reset_device(frozen_pdn, NULL);
+		if (rc)
+			goto hard_fail;
+		pci_walk_bus(frozen_bus, eeh_report_reset, 0);
+	}
+
+	/* Tell all device drivers that they can resume operations */
+	pci_walk_bus(frozen_bus, eeh_report_resume, 0);
+
+	return;
+	
+hard_fail:
+	/*
+	 * About 90% of all real-life EEH failures in the field
+	 * are due to poorly seated PCI cards. Only 10% or so are
+	 * due to actual, failed cards.
+	 */
+	printk(KERN_ERR
+	   "EEH: PCI device %s - %s has failed %d times \n"
+	   "and has been permanently disabled.  Please try reseating\n"
+	   "this device or replacing it.\n",
+		pci_name (frozen_pdn->pcidev), 
+		pcid_name(frozen_pdn->pcidev), 
+		frozen_pdn->eeh_freeze_count);
+
+	eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */);
+
+	/* Notify all devices that they're about to go down. */
+	pci_walk_bus(frozen_bus, eeh_report_failure, 0);
+
+	/* Shut down the device drivers for good. */
+	pcibios_remove_pci_devices(frozen_bus);
+}
+
+/* ---------- end of file ---------- */
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index 9249733..9a9961f 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -21,6 +21,7 @@
 #include <linux/list.h>
 #include <linux/pci.h>
 #include <asm/eeh_event.h>
+#include <asm/ppc-pci.h>
 
 /** Overview:
  *  EEH error states may be detected within exception handlers;
@@ -37,31 +38,6 @@ static void eeh_thread_launcher(void *);
 DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL);
 
 /**
- * eeh_panic - call panic() for an eeh event that cannot be handled.
- * The philosophy of this routine is that it is better to panic and
- * halt the OS than it is to risk possible data corruption by
- * oblivious device drivers that don't know better.
- *
- * @dev pci device that had an eeh event
- * @reset_state current reset state of the device slot
- */
-static void eeh_panic(struct pci_dev *dev, int reset_state)
-{
-	/*
-	 * Since the panic_on_oops sysctl is used to halt the system
-	 * in light of potential corruption, we can use it here.
-	 */
-	if (panic_on_oops) {
-		panic("EEH: MMIO failure (%d) on device:%s\n", reset_state,
-		      pci_name(dev));
-	}
-	else {
-		printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n",
-		       reset_state, pci_name(dev));
-	}
-}
-
-/**
  * eeh_event_handler - dispatch EEH events.  The detection of a frozen
  * slot can occur inside an interrupt, where it can be hard to do
  * anything about it.  The goal of this routine is to pull these
@@ -82,10 +58,16 @@ static int eeh_event_handler(void * dummy)
 
 		spin_lock_irqsave(&eeh_eventlist_lock, flags);
 		event = NULL;
+
+		/* Unqueue the event, get ready to process. */
 		if (!list_empty(&eeh_eventlist)) {
 			event = list_entry(eeh_eventlist.next, struct eeh_event, list);
 			list_del(&event->list);
 		}
+		
+		if (event)
+			eeh_mark_slot(event->dn, EEH_MODE_RECOVERING);
+
 		spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
 		if (event == NULL)
 			break;
@@ -93,8 +75,11 @@ static int eeh_event_handler(void * dummy)
 		printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
 		       pci_name(event->dev));
 
-		eeh_panic (event->dev, event->state);
+		handle_eeh_events(event);
+
+		eeh_clear_slot(event->dn, EEH_MODE_RECOVERING);
 
+		pci_dev_put(event->dev);
 		kfree(event);
 	}
 
@@ -122,7 +107,7 @@ static void eeh_thread_launcher(void *dummy)
  */
 int eeh_send_failure_event (struct device_node *dn,
                             struct pci_dev *dev,
-                            int state,
+                            enum pci_channel_state state,
                             int time_unavail)
 {
 	unsigned long flags;
author	Linus Torvalds <torvalds@g5.osdl.org>	2006-01-10 08:28:32 -0800
committer	Linus Torvalds <torvalds@g5.osdl.org>	2006-01-10 08:28:32 -0800
commit	a62e68488dd5ddb07776555fd7e0435c6d021ac1 (patch)
tree	d6cec15baa1ddfee108ef77b315dfdea5e3fa71c /arch/powerpc
parent	ab396e91bfe953db26fa1083d9c3e7a4fbe0334a (diff)
parent	3b212db9217d02e623eaa12f41c9b5f8c6a99535 (diff)
download	op-kernel-dev-a62e68488dd5ddb07776555fd7e0435c6d021ac1.zip op-kernel-dev-a62e68488dd5ddb07776555fd7e0435c6d021ac1.tar.gz