From 8a90523639f49dc4b4fa7ae47bb9c8ed73ea8577 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Sat, 11 Jul 2009 16:48:03 -0400
Subject: drm/i915: refactor error detection & collection

This patch refactors the existing error detection and collection code,
placing most of it in i915_handle_error(). Additionally, we introduce a
work queue for scheduling post-crash tasks such as generating a uevent.
Using the uevent facility, userspace should be able to capture a
post-mortem dump for diagnostics.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Ben Gamari <bgamari.foss@gmail.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |   1 +
 drivers/gpu/drm/i915/i915_gem_debugfs.c |   2 +
 drivers/gpu/drm/i915/i915_irq.c         | 232 ++++++++++++++++++++++----------
 3 files changed, 161 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d087528..b05b44d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -229,6 +229,7 @@ typedef struct drm_i915_private {
 
 	spinlock_t error_lock;
 	struct drm_i915_error_state *first_error;
+	struct work_struct error_work;
 
 	/* Register state */
 	u8 saveLBB;
diff --git a/drivers/gpu/drm/i915/i915_gem_debugfs.c b/drivers/gpu/drm/i915/i915_gem_debugfs.c
index 9a44bfc..cb3b974 100644
--- a/drivers/gpu/drm/i915/i915_gem_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_gem_debugfs.c
@@ -343,6 +343,8 @@ static int i915_error_state(struct seq_file *m, void *unused)
 
 	error = dev_priv->first_error;
 
+	seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
+		   error->time.tv_usec);
 	seq_printf(m, "EIR: 0x%08x\n", error->eir);
 	seq_printf(m, "  PGTBL_ER: 0x%08x\n", error->pgtbl_er);
 	seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7ba23a6..f340b3f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -290,6 +290,35 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev)
 	return ret;
 }
 
+/**
+ * i915_error_work_func - do process context error handling work
+ * @work: work struct
+ *
+ * Fire an error uevent so userspace can see that a hang or error
+ * was detected.
+ */
+static void i915_error_work_func(struct work_struct *work)
+{
+	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
+						    error_work);
+	struct drm_device *dev = dev_priv->dev;
+	char *event_string = "ERROR=1";
+	char *envp[] = { event_string, NULL };
+
+	DRM_DEBUG("generating error event\n");
+
+	kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp);
+}
+
+/**
+ * i915_capture_error_state - capture an error record for later analysis
+ * @dev: drm device
+ *
+ * Should be called when an error is detected (either a hang or an error
+ * interrupt) to capture error state from the time of the error.  Fills
+ * out a structure which becomes available in debugfs for user level tools
+ * to pick up.
+ */
 static void i915_capture_error_state(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -325,12 +354,137 @@ static void i915_capture_error_state(struct drm_device *dev)
 		error->acthd = I915_READ(ACTHD_I965);
 	}
 
+	do_gettimeofday(&error->time);
+
 	dev_priv->first_error = error;
 
 out:
 	spin_unlock_irqrestore(&dev_priv->error_lock, flags);
 }
 
+/**
+ * i915_handle_error - handle an error interrupt
+ * @dev: drm device
+ *
+ * Do some basic checking of regsiter state at error interrupt time and
+ * dump it to the syslog.  Also call i915_capture_error_state() to make
+ * sure we get a record and make it available in debugfs.  Fire a uevent
+ * so userspace knows something bad happened (should trigger collection
+ * of a ring dump etc.).
+ */
+static void i915_handle_error(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 eir = I915_READ(EIR);
+	u32 pipea_stats = I915_READ(PIPEASTAT);
+	u32 pipeb_stats = I915_READ(PIPEBSTAT);
+
+	i915_capture_error_state(dev);
+
+	printk(KERN_ERR "render error detected, EIR: 0x%08x\n",
+	       eir);
+
+	if (IS_G4X(dev)) {
+		if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) {
+			u32 ipeir = I915_READ(IPEIR_I965);
+
+			printk(KERN_ERR "  IPEIR: 0x%08x\n",
+			       I915_READ(IPEIR_I965));
+			printk(KERN_ERR "  IPEHR: 0x%08x\n",
+			       I915_READ(IPEHR_I965));
+			printk(KERN_ERR "  INSTDONE: 0x%08x\n",
+			       I915_READ(INSTDONE_I965));
+			printk(KERN_ERR "  INSTPS: 0x%08x\n",
+			       I915_READ(INSTPS));
+			printk(KERN_ERR "  INSTDONE1: 0x%08x\n",
+			       I915_READ(INSTDONE1));
+			printk(KERN_ERR "  ACTHD: 0x%08x\n",
+			       I915_READ(ACTHD_I965));
+			I915_WRITE(IPEIR_I965, ipeir);
+			(void)I915_READ(IPEIR_I965);
+		}
+		if (eir & GM45_ERROR_PAGE_TABLE) {
+			u32 pgtbl_err = I915_READ(PGTBL_ER);
+			printk(KERN_ERR "page table error\n");
+			printk(KERN_ERR "  PGTBL_ER: 0x%08x\n",
+			       pgtbl_err);
+			I915_WRITE(PGTBL_ER, pgtbl_err);
+			(void)I915_READ(PGTBL_ER);
+		}
+	}
+
+	if (IS_I9XX(dev)) {
+		if (eir & I915_ERROR_PAGE_TABLE) {
+			u32 pgtbl_err = I915_READ(PGTBL_ER);
+			printk(KERN_ERR "page table error\n");
+			printk(KERN_ERR "  PGTBL_ER: 0x%08x\n",
+			       pgtbl_err);
+			I915_WRITE(PGTBL_ER, pgtbl_err);
+			(void)I915_READ(PGTBL_ER);
+		}
+	}
+
+	if (eir & I915_ERROR_MEMORY_REFRESH) {
+		printk(KERN_ERR "memory refresh error\n");
+		printk(KERN_ERR "PIPEASTAT: 0x%08x\n",
+		       pipea_stats);
+		printk(KERN_ERR "PIPEBSTAT: 0x%08x\n",
+		       pipeb_stats);
+		/* pipestat has already been acked */
+	}
+	if (eir & I915_ERROR_INSTRUCTION) {
+		printk(KERN_ERR "instruction error\n");
+		printk(KERN_ERR "  INSTPM: 0x%08x\n",
+		       I915_READ(INSTPM));
+		if (!IS_I965G(dev)) {
+			u32 ipeir = I915_READ(IPEIR);
+
+			printk(KERN_ERR "  IPEIR: 0x%08x\n",
+			       I915_READ(IPEIR));
+			printk(KERN_ERR "  IPEHR: 0x%08x\n",
+			       I915_READ(IPEHR));
+			printk(KERN_ERR "  INSTDONE: 0x%08x\n",
+			       I915_READ(INSTDONE));
+			printk(KERN_ERR "  ACTHD: 0x%08x\n",
+			       I915_READ(ACTHD));
+			I915_WRITE(IPEIR, ipeir);
+			(void)I915_READ(IPEIR);
+		} else {
+			u32 ipeir = I915_READ(IPEIR_I965);
+
+			printk(KERN_ERR "  IPEIR: 0x%08x\n",
+			       I915_READ(IPEIR_I965));
+			printk(KERN_ERR "  IPEHR: 0x%08x\n",
+			       I915_READ(IPEHR_I965));
+			printk(KERN_ERR "  INSTDONE: 0x%08x\n",
+			       I915_READ(INSTDONE_I965));
+			printk(KERN_ERR "  INSTPS: 0x%08x\n",
+			       I915_READ(INSTPS));
+			printk(KERN_ERR "  INSTDONE1: 0x%08x\n",
+			       I915_READ(INSTDONE1));
+			printk(KERN_ERR "  ACTHD: 0x%08x\n",
+			       I915_READ(ACTHD_I965));
+			I915_WRITE(IPEIR_I965, ipeir);
+			(void)I915_READ(IPEIR_I965);
+		}
+	}
+
+	I915_WRITE(EIR, eir);
+	(void)I915_READ(EIR);
+	eir = I915_READ(EIR);
+	if (eir) {
+		/*
+		 * some errors might have become stuck,
+		 * mask them.
+		 */
+		DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir);
+		I915_WRITE(EMR, I915_READ(EMR) | eir);
+		I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
+	}
+
+	schedule_work(&dev_priv->error_work);
+}
+
 irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
@@ -372,6 +526,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 		pipea_stats = I915_READ(PIPEASTAT);
 		pipeb_stats = I915_READ(PIPEBSTAT);
 
+		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
+			i915_handle_error(dev);
+
 		/*
 		 * Clear the PIPE(A|B)STAT regs before the IIR
 		 */
@@ -409,80 +566,6 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 			I915_READ(PORT_HOTPLUG_STAT);
 		}
 
-		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) {
-			u32 eir = I915_READ(EIR);
-
-			i915_capture_error_state(dev);
-
-			printk(KERN_ERR "render error detected, EIR: 0x%08x\n",
-			       eir);
-			if (eir & I915_ERROR_PAGE_TABLE) {
-				u32 pgtbl_err = I915_READ(PGTBL_ER);
-				printk(KERN_ERR "page table error\n");
-				printk(KERN_ERR "  PGTBL_ER: 0x%08x\n",
-				       pgtbl_err);
-				I915_WRITE(PGTBL_ER, pgtbl_err);
-				(void)I915_READ(PGTBL_ER);
-			}
-			if (eir & I915_ERROR_MEMORY_REFRESH) {
-				printk(KERN_ERR "memory refresh error\n");
-				printk(KERN_ERR "PIPEASTAT: 0x%08x\n",
-				       pipea_stats);
-				printk(KERN_ERR "PIPEBSTAT: 0x%08x\n",
-				       pipeb_stats);
-				/* pipestat has already been acked */
-			}
-			if (eir & I915_ERROR_INSTRUCTION) {
-				printk(KERN_ERR "instruction error\n");
-				printk(KERN_ERR "  INSTPM: 0x%08x\n",
-				       I915_READ(INSTPM));
-				if (!IS_I965G(dev)) {
-					u32 ipeir = I915_READ(IPEIR);
-
-					printk(KERN_ERR "  IPEIR: 0x%08x\n",
-					       I915_READ(IPEIR));
-					printk(KERN_ERR "  IPEHR: 0x%08x\n",
-						   I915_READ(IPEHR));
-					printk(KERN_ERR "  INSTDONE: 0x%08x\n",
-						   I915_READ(INSTDONE));
-					printk(KERN_ERR "  ACTHD: 0x%08x\n",
-						   I915_READ(ACTHD));
-					I915_WRITE(IPEIR, ipeir);
-					(void)I915_READ(IPEIR);
-				} else {
-					u32 ipeir = I915_READ(IPEIR_I965);
-
-					printk(KERN_ERR "  IPEIR: 0x%08x\n",
-					       I915_READ(IPEIR_I965));
-					printk(KERN_ERR "  IPEHR: 0x%08x\n",
-					       I915_READ(IPEHR_I965));
-					printk(KERN_ERR "  INSTDONE: 0x%08x\n",
-					       I915_READ(INSTDONE_I965));
-					printk(KERN_ERR "  INSTPS: 0x%08x\n",
-					       I915_READ(INSTPS));
-					printk(KERN_ERR "  INSTDONE1: 0x%08x\n",
-					       I915_READ(INSTDONE1));
-					printk(KERN_ERR "  ACTHD: 0x%08x\n",
-					       I915_READ(ACTHD_I965));
-					I915_WRITE(IPEIR_I965, ipeir);
-					(void)I915_READ(IPEIR_I965);
-				}
-			}
-
-			I915_WRITE(EIR, eir);
-			(void)I915_READ(EIR);
-			eir = I915_READ(EIR);
-			if (eir) {
-				/*
-				 * some errors might have become stuck,
-				 * mask them.
-				 */
-				DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir);
-				I915_WRITE(EMR, I915_READ(EMR) | eir);
-				I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
-			}
-		}
-
 		I915_WRITE(IIR, iir);
 		new_iir = I915_READ(IIR); /* Flush posted writes */
 
@@ -830,6 +913,7 @@ void i915_driver_irq_preinstall(struct drm_device * dev)
 	atomic_set(&dev_priv->irq_received, 0);
 
 	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
+	INIT_WORK(&dev_priv->error_work, i915_error_work_func);
 
 	if (IS_IGDNG(dev)) {
 		igdng_irq_preinstall(dev);
-- 
cgit v1.1