summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGavin Shan <gwshan@linux.vnet.ibm.com>2014-06-10 11:41:55 +1000
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-08-05 15:28:47 +1000
commit05ec424e38fbba43829820b8f3634154f812e67e (patch)
tree3adea52ae78ec67819c5cde299436fa4c4fd37c7
parent9287b95ec9ded0a4458094ebd967502263d80112 (diff)
downloadop-kernel-dev-05ec424e38fbba43829820b8f3634154f812e67e.zip
op-kernel-dev-05ec424e38fbba43829820b8f3634154f812e67e.tar.gz
powerpc/eeh: Avoid event on passed PE
We must not handle EEH error on devices which are passed to somebody else. Instead, we expect that the frozen device owner detects an EEH error and recovers from it. This avoids EEH error handling on passed through devices so the device owner gets a chance to handle them. Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Acked-by: Alexander Graf <agraf@suse.de> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/eeh.h7
-rw-r--r--arch/powerpc/kernel/eeh.c8
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c3
3 files changed, 17 insertions, 1 deletions
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index fab7743..9537d83 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -25,6 +25,7 @@
#include <linux/list.h>
#include <linux/string.h>
#include <linux/time.h>
+#include <linux/atomic.h>
struct pci_dev;
struct pci_bus;
@@ -84,6 +85,7 @@ struct eeh_pe {
int freeze_count; /* Times of froze up */
struct timeval tstamp; /* Time on first-time freeze */
int false_positives; /* Times of reported #ff's */
+ atomic_t pass_dev_cnt; /* Count of passed through devs */
struct eeh_pe *parent; /* Parent PE */
struct list_head child_list; /* Link PE to the child list */
struct list_head edevs; /* Link list of EEH devices */
@@ -93,6 +95,11 @@ struct eeh_pe {
#define eeh_pe_for_each_dev(pe, edev, tmp) \
list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
+static inline bool eeh_pe_passed(struct eeh_pe *pe)
+{
+ return pe ? !!atomic_read(&pe->pass_dev_cnt) : false;
+}
+
/*
* The struct is used to trace EEH state for the associated
* PCI device node or PCI device. In future, it might
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 86e2570..c8f1a9d 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -400,6 +400,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
if (ret > 0)
return ret;
+ /*
+ * If the PE isn't owned by us, we shouldn't check the
+ * state. Instead, let the owner handle it if the PE has
+ * been frozen.
+ */
+ if (eeh_pe_passed(pe))
+ return 0;
+
/* If we already have a pending isolation event for this
* slot, we know it's bad already, we don't need to check.
* Do this checking under a lock; as multiple PCI devices
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 8ad0c5b..f6abdb1 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -812,7 +812,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
ret = EEH_NEXT_ERR_NONE;
- } else if ((*pe)->state & EEH_PE_ISOLATED) {
+ } else if ((*pe)->state & EEH_PE_ISOLATED ||
+ eeh_pe_passed(*pe)) {
ret = EEH_NEXT_ERR_NONE;
} else {
pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
OpenPOWER on IntegriCloud