summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_debugfs.c
diff options
context:
space:
mode:
authorMika Kuoppala <mika.kuoppala@linux.intel.com>2016-11-18 15:09:04 +0200
committerMika Kuoppala <mika.kuoppala@intel.com>2016-11-21 14:36:40 +0200
commit3fe3b030bd2d7a51c12aa6fe0e5178b9f1a726ec (patch)
tree225cdffc34ad23568ea900ecaa8045ed0273cff1 /drivers/gpu/drm/i915/i915_debugfs.c
parent6e16d028e441b0b2c141aaecb39f4838cd2964b5 (diff)
downloadop-kernel-dev-3fe3b030bd2d7a51c12aa6fe0e5178b9f1a726ec.zip
op-kernel-dev-3fe3b030bd2d7a51c12aa6fe0e5178b9f1a726ec.tar.gz
drm/i915: Decouple hang detection from hangcheck period
Hangcheck state accumulation has gained more steps along the years, like head movement and more recently the subunit inactivity check. As the subunit sampling is only done if the previous state check showed inactivity, we have added more stages (and time) to reach a hang verdict. Asymmetric engine states led to different actual weight of 'one hangcheck unit' and it was demonstrated in some hangs that due to difference in stages, simpler engines were accused falsely of a hang as their scoring was much more quicker to accumulate above the hang treshold. To completely decouple the hangcheck guilty score from the hangcheck period, convert hangcheck score to a rough period of inactivity measurement. As these are tracked as jiffies, they are meaningful also across reset boundaries. This makes finding a guilty engine more accurate across multi engine activity scenarios, especially across asymmetric engines. We lose the ability to detect cross batch malicious attempts to hinder the progress. Plan is to move this functionality to be part of context banning which is more natural fit, later in the series. v2: use time_before macros (Chris) reinstate the pardoning of moving engine after hc (Chris) v3: avoid global state for per engine stall detection (Chris) v4: take timeline last retirement into account (Chris) v5: do debug print on pardoning, split out retirement timestamp (Chris) Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_debugfs.c')
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c17
1 files changed, 11 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index fb47efd..437212a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1351,10 +1351,12 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
engine->hangcheck.seqno, seqno[id],
intel_engine_last_submit(engine));
- seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
+ seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n",
yesno(intel_engine_has_waiter(engine)),
yesno(test_bit(engine->id,
- &dev_priv->gpu_error.missed_irq_rings)));
+ &dev_priv->gpu_error.missed_irq_rings)),
+ yesno(engine->hangcheck.stalled));
+
spin_lock_irq(&b->lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = container_of(rb, typeof(*w), node);
@@ -1367,8 +1369,11 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
(long long)acthd[id]);
- seq_printf(m, "\tscore = %d\n", engine->hangcheck.score);
- seq_printf(m, "\taction = %d\n", engine->hangcheck.action);
+ seq_printf(m, "\taction = %s(%d) %d ms ago\n",
+ hangcheck_action_to_str(engine->hangcheck.action),
+ engine->hangcheck.action,
+ jiffies_to_msecs(jiffies -
+ engine->hangcheck.action_timestamp));
if (engine->id == RCS) {
seq_puts(m, "\tinstdone read =\n");
@@ -3162,11 +3167,11 @@ static int i915_engine_info(struct seq_file *m, void *unused)
u64 addr;
seq_printf(m, "%s\n", engine->name);
- seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [score %d]\n",
+ seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n",
intel_engine_get_seqno(engine),
intel_engine_last_submit(engine),
engine->hangcheck.seqno,
- engine->hangcheck.score);
+ jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
rcu_read_lock();
OpenPOWER on IntegriCloud