summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c237
1 files changed, 119 insertions, 118 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9440593..f89ac7a8 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -34,16 +34,25 @@
#include "i915_drv.h"
-static const char *engine_str(int engine)
-{
- switch (engine) {
- case RCS: return "render";
- case VCS: return "bsd";
- case BCS: return "blt";
- case VECS: return "vebox";
- case VCS2: return "bsd2";
- default: return "";
- }
+static inline const struct intel_engine_cs *
+engine_lookup(const struct drm_i915_private *i915, unsigned int id)
+{
+ if (id >= I915_NUM_ENGINES)
+ return NULL;
+
+ return i915->engine[id];
+}
+
+static inline const char *
+__engine_name(const struct intel_engine_cs *engine)
+{
+ return engine ? engine->name : "";
+}
+
+static const char *
+engine_name(const struct drm_i915_private *i915, unsigned int id)
+{
+ return __engine_name(engine_lookup(i915, id));
}
static const char *tiling_flag(int tiling)
@@ -345,7 +354,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
err_puts(m, purgeable_flag(err->purgeable));
err_puts(m, err->userptr ? " userptr" : "");
err_puts(m, err->engine != -1 ? " " : "");
- err_puts(m, engine_str(err->engine));
+ err_puts(m, engine_name(m->i915, err->engine));
err_puts(m, i915_cache_level_str(m->i915, err->cache_level));
if (err->name)
@@ -387,6 +396,11 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
ee->instdone.row[slice][subslice]);
}
+static const char *bannable(const struct drm_i915_error_context *ctx)
+{
+ return ctx->bannable ? "" : " (unbannable)";
+}
+
static void error_print_request(struct drm_i915_error_state_buf *m,
const char *prefix,
const struct drm_i915_error_request *erq)
@@ -405,9 +419,10 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
const char *header,
const struct drm_i915_error_context *ctx)
{
- err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n",
+ err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n",
header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
- ctx->priority, ctx->ban_score, ctx->guilty, ctx->active);
+ ctx->priority, ctx->ban_score, bannable(ctx),
+ ctx->guilty, ctx->active);
}
static void error_print_engine(struct drm_i915_error_state_buf *m,
@@ -415,7 +430,8 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
{
int n;
- err_printf(m, "%s command stream:\n", engine_str(ee->engine_id));
+ err_printf(m, "%s command stream:\n",
+ engine_name(m->i915, ee->engine_id));
err_printf(m, " IDLE?: %s\n", yesno(ee->idle));
err_printf(m, " START: 0x%08x\n", ee->start);
err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head);
@@ -563,11 +579,14 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
}
static void err_print_capabilities(struct drm_i915_error_state_buf *m,
- const struct intel_device_info *info)
+ const struct intel_device_info *info,
+ const struct intel_driver_caps *caps)
{
struct drm_printer p = i915_error_printer(m);
intel_device_info_dump_flags(info, &p);
+ intel_driver_caps_print(caps, &p);
+ intel_device_info_dump_topology(&info->sseu, &p);
}
static void err_print_params(struct drm_i915_error_state_buf *m,
@@ -610,6 +629,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
{
struct drm_i915_private *dev_priv = m->i915;
struct drm_i915_error_object *obj;
+ struct timespec64 ts;
int i, j;
if (!error) {
@@ -620,21 +640,25 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
if (*error->error_msg)
err_printf(m, "%s\n", error->error_msg);
err_printf(m, "Kernel: " UTS_RELEASE "\n");
- err_printf(m, "Time: %ld s %ld us\n",
- error->time.tv_sec, error->time.tv_usec);
- err_printf(m, "Boottime: %ld s %ld us\n",
- error->boottime.tv_sec, error->boottime.tv_usec);
- err_printf(m, "Uptime: %ld s %ld us\n",
- error->uptime.tv_sec, error->uptime.tv_usec);
+ ts = ktime_to_timespec64(error->time);
+ err_printf(m, "Time: %lld s %ld us\n",
+ (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
+ ts = ktime_to_timespec64(error->boottime);
+ err_printf(m, "Boottime: %lld s %ld us\n",
+ (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
+ ts = ktime_to_timespec64(error->uptime);
+ err_printf(m, "Uptime: %lld s %ld us\n",
+ (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
if (error->engine[i].hangcheck_stalled &&
error->engine[i].context.pid) {
- err_printf(m, "Active process (on ring %s): %s [%d], score %d\n",
- engine_str(i),
+ err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
+ engine_name(m->i915, i),
error->engine[i].context.comm,
error->engine[i].context.pid,
- error->engine[i].context.ban_score);
+ error->engine[i].context.ban_score,
+ bannable(&error->engine[i].context));
}
}
err_printf(m, "Reset count: %u\n", error->reset_count);
@@ -722,12 +746,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
if (obj) {
err_puts(m, dev_priv->engine[i]->name);
if (ee->context.pid)
- err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)",
+ err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)",
ee->context.comm,
ee->context.pid,
ee->context.handle,
ee->context.hw_id,
- ee->context.ban_score);
+ ee->context.ban_score,
+ bannable(&ee->context));
err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
upper_32_bits(obj->gtt_offset),
lower_32_bits(obj->gtt_offset));
@@ -786,7 +811,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
if (error->display)
intel_display_print_error_state(m, error->display);
- err_print_capabilities(m, &error->device_info);
+ err_print_capabilities(m, &error->device_info, &error->driver_caps);
err_print_params(m, &error->params);
err_print_uc(m, &error->uc);
@@ -967,7 +992,7 @@ out:
static inline uint32_t
__active_get_seqno(struct i915_gem_active *active)
{
- struct drm_i915_gem_request *request;
+ struct i915_request *request;
request = __i915_gem_active_peek(active);
return request ? request->global_seqno : 0;
@@ -976,7 +1001,7 @@ __active_get_seqno(struct i915_gem_active *active)
static inline int
__active_get_engine_id(struct i915_gem_active *active)
{
- struct drm_i915_gem_request *request;
+ struct i915_request *request;
request = __i915_gem_active_peek(active);
return request ? request->engine->id : -1;
@@ -997,8 +1022,8 @@ static void capture_bo(struct drm_i915_error_buffer *err,
err->engine = __active_get_engine_id(&obj->frontbuffer_write);
err->gtt_offset = vma->node.start;
- err->read_domains = obj->base.read_domains;
- err->write_domain = obj->base.write_domain;
+ err->read_domains = obj->read_domains;
+ err->write_domain = obj->write_domain;
err->fence_reg = vma->fence ? vma->fence->id : -1;
err->tiling = i915_gem_object_get_tiling(obj);
err->dirty = obj->mm.dirty;
@@ -1060,9 +1085,9 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv,
return error_code;
}
-static void i915_gem_record_fences(struct drm_i915_private *dev_priv,
- struct i915_gpu_state *error)
+static void gem_record_fences(struct i915_gpu_state *error)
{
+ struct drm_i915_private *dev_priv = error->i915;
int i;
if (INTEL_GEN(dev_priv) >= 6) {
@@ -1078,27 +1103,6 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv,
error->nfence = i;
}
-static inline u32
-gen8_engine_sync_index(struct intel_engine_cs *engine,
- struct intel_engine_cs *other)
-{
- int idx;
-
- /*
- * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
- * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
- * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
- * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
- * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
- */
-
- idx = (other - engine) - 1;
- if (idx < 0)
- idx += I915_NUM_ENGINES;
-
- return idx;
-}
-
static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
{
@@ -1269,7 +1273,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
}
}
-static void record_request(struct drm_i915_gem_request *request,
+static void record_request(struct i915_request *request,
struct drm_i915_error_request *erq)
{
erq->context = request->ctx->hw_id;
@@ -1286,10 +1290,10 @@ static void record_request(struct drm_i915_gem_request *request,
}
static void engine_record_requests(struct intel_engine_cs *engine,
- struct drm_i915_gem_request *first,
+ struct i915_request *first,
struct drm_i915_error_engine *ee)
{
- struct drm_i915_gem_request *request;
+ struct i915_request *request;
int count;
count = 0;
@@ -1339,7 +1343,7 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine,
unsigned int n;
for (n = 0; n < execlists_num_ports(execlists); n++) {
- struct drm_i915_gem_request *rq = port_request(&execlists->port[n]);
+ struct i915_request *rq = port_request(&execlists->port[n]);
if (!rq)
break;
@@ -1369,14 +1373,15 @@ static void record_context(struct drm_i915_error_context *e,
e->hw_id = ctx->hw_id;
e->priority = ctx->priority;
e->ban_score = atomic_read(&ctx->ban_score);
+ e->bannable = i915_gem_context_is_bannable(ctx);
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
}
-static void request_record_user_bo(struct drm_i915_gem_request *request,
+static void request_record_user_bo(struct i915_request *request,
struct drm_i915_error_engine *ee)
{
- struct i915_gem_capture_list *c;
+ struct i915_capture_list *c;
struct drm_i915_error_object **bo;
long count;
@@ -1420,16 +1425,16 @@ capture_object(struct drm_i915_private *dev_priv,
}
}
-static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
- struct i915_gpu_state *error)
+static void gem_record_rings(struct i915_gpu_state *error)
{
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ struct drm_i915_private *i915 = error->i915;
+ struct i915_ggtt *ggtt = &i915->ggtt;
int i;
for (i = 0; i < I915_NUM_ENGINES; i++) {
- struct intel_engine_cs *engine = dev_priv->engine[i];
+ struct intel_engine_cs *engine = i915->engine[i];
struct drm_i915_error_engine *ee = &error->engine[i];
- struct drm_i915_gem_request *request;
+ struct i915_request *request;
ee->engine_id = -1;
@@ -1456,17 +1461,16 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
* by userspace.
*/
ee->batchbuffer =
- i915_error_object_create(dev_priv,
- request->batch);
+ i915_error_object_create(i915, request->batch);
- if (HAS_BROKEN_CS_TLB(dev_priv))
+ if (HAS_BROKEN_CS_TLB(i915))
ee->wa_batchbuffer =
- i915_error_object_create(dev_priv,
+ i915_error_object_create(i915,
engine->scratch);
request_record_user_bo(request, ee);
ee->ctx =
- i915_error_object_create(dev_priv,
+ i915_error_object_create(i915,
request->ctx->engine[i].state);
error->simulated |=
@@ -1480,27 +1484,24 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
ee->cpu_ring_head = ring->head;
ee->cpu_ring_tail = ring->tail;
ee->ringbuffer =
- i915_error_object_create(dev_priv, ring->vma);
+ i915_error_object_create(i915, ring->vma);
engine_record_requests(engine, request, ee);
}
ee->hws_page =
- i915_error_object_create(dev_priv,
+ i915_error_object_create(i915,
engine->status_page.vma);
- ee->wa_ctx =
- i915_error_object_create(dev_priv, engine->wa_ctx.vma);
+ ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma);
- ee->default_state =
- capture_object(dev_priv, engine->default_state);
+ ee->default_state = capture_object(i915, engine->default_state);
}
}
-static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
- struct i915_gpu_state *error,
- struct i915_address_space *vm,
- int idx)
+static void gem_capture_vm(struct i915_gpu_state *error,
+ struct i915_address_space *vm,
+ int idx)
{
struct drm_i915_error_buffer *active_bo;
struct i915_vma *vma;
@@ -1523,8 +1524,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
error->active_bo_count[idx] = count;
}
-static void i915_capture_active_buffers(struct drm_i915_private *dev_priv,
- struct i915_gpu_state *error)
+static void capture_active_buffers(struct i915_gpu_state *error)
{
int cnt = 0, i, j;
@@ -1544,14 +1544,13 @@ static void i915_capture_active_buffers(struct drm_i915_private *dev_priv,
for (j = 0; j < i && !found; j++)
found = error->engine[j].vm == ee->vm;
if (!found)
- i915_gem_capture_vm(dev_priv, error, ee->vm, cnt++);
+ gem_capture_vm(error, ee->vm, cnt++);
}
}
-static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
- struct i915_gpu_state *error)
+static void capture_pinned_buffers(struct i915_gpu_state *error)
{
- struct i915_address_space *vm = &dev_priv->ggtt.base;
+ struct i915_address_space *vm = &error->i915->ggtt.base;
struct drm_i915_error_buffer *bo;
struct i915_vma *vma;
int count_inactive, count_active;
@@ -1601,9 +1600,9 @@ static void capture_uc_state(struct i915_gpu_state *error)
}
/* Capture all registers which don't fit into another category. */
-static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
- struct i915_gpu_state *error)
+static void capture_reg_state(struct i915_gpu_state *error)
{
+ struct drm_i915_private *dev_priv = error->i915;
int i;
/* General organization
@@ -1700,23 +1699,25 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
engine_mask ? "reset" : "continue");
}
-static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
- struct i915_gpu_state *error)
+static void capture_gen_state(struct i915_gpu_state *error)
{
- error->awake = dev_priv->gt.awake;
- error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count);
- error->suspended = dev_priv->runtime_pm.suspended;
+ struct drm_i915_private *i915 = error->i915;
+
+ error->awake = i915->gt.awake;
+ error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count);
+ error->suspended = i915->runtime_pm.suspended;
error->iommu = -1;
#ifdef CONFIG_INTEL_IOMMU
error->iommu = intel_iommu_gfx_mapped;
#endif
- error->reset_count = i915_reset_count(&dev_priv->gpu_error);
- error->suspend_count = dev_priv->suspend_count;
+ error->reset_count = i915_reset_count(&i915->gpu_error);
+ error->suspend_count = i915->suspend_count;
memcpy(&error->device_info,
- INTEL_INFO(dev_priv),
+ INTEL_INFO(i915),
sizeof(error->device_info));
+ error->driver_caps = i915->caps;
}
static __always_inline void dup_param(const char *type, void *x)
@@ -1737,21 +1738,19 @@ static int capture(void *data)
{
struct i915_gpu_state *error = data;
- do_gettimeofday(&error->time);
- error->boottime = ktime_to_timeval(ktime_get_boottime());
- error->uptime =
- ktime_to_timeval(ktime_sub(ktime_get(),
- error->i915->gt.last_init_time));
+ error->time = ktime_get_real();
+ error->boottime = ktime_get_boottime();
+ error->uptime = ktime_sub(ktime_get(),
+ error->i915->gt.last_init_time);
capture_params(error);
+ capture_gen_state(error);
capture_uc_state(error);
-
- i915_capture_gen_state(error->i915, error);
- i915_capture_reg_state(error->i915, error);
- i915_gem_record_fences(error->i915, error);
- i915_gem_record_rings(error->i915, error);
- i915_capture_active_buffers(error->i915, error);
- i915_capture_pinned_buffers(error->i915, error);
+ capture_reg_state(error);
+ gem_record_fences(error);
+ gem_record_rings(error);
+ capture_active_buffers(error);
+ capture_pinned_buffers(error);
error->overlay = intel_overlay_capture_error_state(error->i915);
error->display = intel_display_capture_error_state(error->i915);
@@ -1780,14 +1779,16 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
/**
* i915_capture_error_state - capture an error record for later analysis
- * @dev: drm device
+ * @i915: i915 device
+ * @engine_mask: the mask of engines triggering the hang
+ * @error_msg: a message to insert into the error capture header
*
* Should be called when an error is detected (either a hang or an error
* interrupt) to capture error state from the time of the error. Fills
* out a structure which becomes available in debugfs for user level tools
* to pick up.
*/
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
+void i915_capture_error_state(struct drm_i915_private *i915,
u32 engine_mask,
const char *error_msg)
{
@@ -1798,25 +1799,25 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
if (!i915_modparams.error_capture)
return;
- if (READ_ONCE(dev_priv->gpu_error.first_error))
+ if (READ_ONCE(i915->gpu_error.first_error))
return;
- error = i915_capture_gpu_state(dev_priv);
+ error = i915_capture_gpu_state(i915);
if (!error) {
DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
return;
}
- i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
+ i915_error_capture_msg(i915, error, engine_mask, error_msg);
DRM_INFO("%s\n", error->error_msg);
if (!error->simulated) {
- spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
- if (!dev_priv->gpu_error.first_error) {
- dev_priv->gpu_error.first_error = error;
+ spin_lock_irqsave(&i915->gpu_error.lock, flags);
+ if (!i915->gpu_error.first_error) {
+ i915->gpu_error.first_error = error;
error = NULL;
}
- spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
+ spin_unlock_irqrestore(&i915->gpu_error.lock, flags);
}
if (error) {
@@ -1831,7 +1832,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n");
DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n",
- dev_priv->drm.primary->index);
+ i915->drm.primary->index);
warned = true;
}
}
OpenPOWER on IntegriCloud