summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-02-05 09:41:39 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2018-02-05 10:59:22 +0000
commit302e55d7be959502058878e9edb1d369a73598d4 (patch)
tree133028c3e09de8641c07748322b3371ae4df0a77 /drivers/gpu/drm/i915
parent55ef72f24fae5d41febe6b6ebf7304f9a2cb9471 (diff)
drm/i915: Report if an unbannable context is involved in a GPU hang
Since unbannable contexts are special and supposed not to be causing GPU hangs in the first place, make it clear when they are implicated in said hang. In practice, most unbannable contexts are those created by igt for the express purpose of throwing untold thousands of hangs at the GPU and wish to keep doing so to finish the test. Normally they are cleaned up, but it's when they or the other unbannable kernel contexts stay stuck in an erroneous state that we need to worry and so need highlighting. Suggested-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180205094139.10671-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h1
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c21
2 files changed, 16 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5ed220e28402..a241620f22ad 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -555,6 +555,7 @@ struct i915_gpu_state {
int ban_score;
int active;
int guilty;
+ bool bannable;
} context;
struct drm_i915_error_object {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a81351d9e3a6..67c902412193 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -396,6 +396,11 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
ee->instdone.row[slice][subslice]);
}
+static const char *bannable(const struct drm_i915_error_context *ctx)
+{
+ return ctx->bannable ? "" : " (unbannable)";
+}
+
static void error_print_request(struct drm_i915_error_state_buf *m,
const char *prefix,
const struct drm_i915_error_request *erq)
@@ -414,9 +419,10 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
const char *header,
const struct drm_i915_error_context *ctx)
{
- err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n",
+ err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n",
header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
- ctx->priority, ctx->ban_score, ctx->guilty, ctx->active);
+ ctx->priority, ctx->ban_score, bannable(ctx),
+ ctx->guilty, ctx->active);
}
static void error_print_engine(struct drm_i915_error_state_buf *m,
@@ -644,11 +650,12 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
if (error->engine[i].hangcheck_stalled &&
error->engine[i].context.pid) {
- err_printf(m, "Active process (on ring %s): %s [%d], score %d\n",
+ err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
engine_name(m->i915, i),
error->engine[i].context.comm,
error->engine[i].context.pid,
- error->engine[i].context.ban_score);
+ error->engine[i].context.ban_score,
+ bannable(&error->engine[i].context));
}
}
err_printf(m, "Reset count: %u\n", error->reset_count);
@@ -736,12 +743,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
if (obj) {
err_puts(m, dev_priv->engine[i]->name);
if (ee->context.pid)
- err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)",
+ err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)",
ee->context.comm,
ee->context.pid,
ee->context.handle,
ee->context.hw_id,
- ee->context.ban_score);
+ ee->context.ban_score,
+ bannable(&ee->context));
err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
upper_32_bits(obj->gtt_offset),
lower_32_bits(obj->gtt_offset));
@@ -1383,6 +1391,7 @@ static void record_context(struct drm_i915_error_context *e,
e->hw_id = ctx->hw_id;
e->priority = ctx->priority;
e->ban_score = atomic_read(&ctx->ban_score);
+ e->bannable = i915_gem_context_is_bannable(ctx);
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
}