summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_drv.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-12-05 17:27:57 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2017-12-05 21:51:00 +0000
commit107783d00a067cd3e2ebf4564b1b8c03915186e9 (patch)
tree3cad89839d5f79e31ee3bc06d8c22bbb7f7a711e /drivers/gpu/drm/i915/i915_drv.c
parent5888fc9eac3c2ff96e76aeeb865fdb46ab2d711e (diff)
downloadop-kernel-dev-107783d00a067cd3e2ebf4564b1b8c03915186e9.zip
op-kernel-dev-107783d00a067cd3e2ebf4564b1b8c03915186e9.tar.gz
drm/i915: Taint (TAINT_WARN) the kernel if the GPU reset fails
History tells us that if we cannot reset the GPU now, we never will. This then impacts everything that is run subsequently. On failing the reset, we mark the driver as wedged, trying to prevent further execution on the GPU, forcing userspace to fallback to using the CPU to update its framebuffers and let the user know what happened. We also want to go one step further and add a taint to the kernel so that any subsequent faults can be traced back to this failure. This is useful for CI, where if the GPU/driver fails we want to reboot and restart testing rather than continue on into oblivion. For everyone else, the warning taint is a testament to the system unreliability. TAINT_WARN is used anytime a WARN() is emitted, which is suitable for our purposes here as well; the driver/system may behave unexpectedly after the failure. v2: Also taint if the recovery fails (again history shows us that is typically fatal). v3: Use TAINT_WARN References: https://bugs.freedesktop.org/show_bug.cgi?id=103514 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: MichaƂ Winiarski <michal.winiarski@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Acked-by: Tomi Sarvela <tomi.p.sarvela@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171205172757.32609-1-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_drv.c')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c20
1 files changed, 17 insertions, 3 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 962d7c7..b2e210b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1897,9 +1897,9 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
disable_irq(i915->drm.irq);
ret = i915_gem_reset_prepare(i915);
if (ret) {
- DRM_ERROR("GPU recovery failed\n");
+ dev_err(i915->drm.dev, "GPU recovery failed\n");
intel_gpu_reset(i915, ALL_ENGINES);
- goto error;
+ goto taint;
}
if (!intel_has_gpu_reset(i915)) {
@@ -1916,7 +1916,7 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
}
if (ret) {
dev_err(i915->drm.dev, "Failed to reset chip\n");
- goto error;
+ goto taint;
}
i915_gem_reset(i915);
@@ -1959,6 +1959,20 @@ wakeup:
wake_up_bit(&error->flags, I915_RESET_HANDOFF);
return;
+taint:
+ /*
+ * History tells us that if we cannot reset the GPU now, we
+ * never will. This then impacts everything that is run
+ * subsequently. On failing the reset, we mark the driver
+ * as wedged, preventing further execution on the GPU.
+ * We also want to go one step further and add a taint to the
+ * kernel so that any subsequent faults can be traced back to
+ * this failure. This is important for CI, where if the
+ * GPU/driver fails we would like to reboot and restart testing
+ * rather than continue on into oblivion. For everyone else,
+ * the system should still plod along, but they have been warned!
+ */
+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
error:
i915_gem_set_wedged(i915);
i915_gem_retire_requests(i915);
OpenPOWER on IntegriCloud