diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_reset.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_reset.c | 105 |
1 files changed, 73 insertions, 32 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index c97423a76642..1c51296646e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -21,6 +21,7 @@ #include "intel_reset.h" #include "uc/intel_guc.h" +#include "uc/intel_guc_submission.h" #define RESET_MAX_RETRIES 3 @@ -40,27 +41,29 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) static void engine_skip_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *hung_ctx = rq->gem_context; + struct intel_context *hung_ctx = rq->context; if (!i915_request_is_active(rq)) return; lockdep_assert_held(&engine->active.lock); list_for_each_entry_continue(rq, &engine->active.requests, sched.link) - if (rq->gem_context == hung_ctx) + if (rq->context == hung_ctx) i915_request_skip(rq, -EIO); } -static void client_mark_guilty(struct drm_i915_file_private *file_priv, - const struct i915_gem_context *ctx) +static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) { - unsigned int score; + struct drm_i915_file_private *file_priv = ctx->file_priv; unsigned long prev_hang; + unsigned int score; + + if (IS_ERR_OR_NULL(file_priv)) + return; - if (i915_gem_context_is_banned(ctx)) + score = 0; + if (banned) score = I915_CLIENT_SCORE_CONTEXT_BAN; - else - score = 0; prev_hang = xchg(&file_priv->hang_timestamp, jiffies); if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) @@ -75,17 +78,38 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv, } } -static bool context_mark_guilty(struct i915_gem_context *ctx) +static bool mark_guilty(struct i915_request *rq) { + struct i915_gem_context *ctx; unsigned long prev_hang; bool banned; int i; + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; + rcu_read_unlock(); + if (!ctx) + return false; + + if (i915_gem_context_is_closed(ctx)) { + intel_context_set_banned(rq->context); + banned = true; + goto out; + } + atomic_inc(&ctx->guilty_count); /* Cool contexts are too cool to be banned! (Used for reset testing.) */ - if (!i915_gem_context_is_bannable(ctx)) - return false; + if (!i915_gem_context_is_bannable(ctx)) { + banned = false; + goto out; + } + + dev_notice(ctx->i915->drm.dev, + "%s context reset due to GPU hang\n", + ctx->name); /* Record the timestamp for the last N hangs */ prev_hang = ctx->hang_timestamp[0]; @@ -100,18 +124,25 @@ static bool context_mark_guilty(struct i915_gem_context *ctx) if (banned) { DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n", ctx->name, atomic_read(&ctx->guilty_count)); - i915_gem_context_set_banned(ctx); + intel_context_set_banned(rq->context); } - if (!IS_ERR_OR_NULL(ctx->file_priv)) - client_mark_guilty(ctx->file_priv, ctx); + client_mark_guilty(ctx, banned); +out: + i915_gem_context_put(ctx); return banned; } -static void context_mark_innocent(struct i915_gem_context *ctx) +static void mark_innocent(struct i915_request *rq) { - atomic_inc(&ctx->active_count); + struct i915_gem_context *ctx; + + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx) + atomic_inc(&ctx->active_count); + rcu_read_unlock(); } void __i915_request_reset(struct i915_request *rq, bool guilty) @@ -124,14 +155,16 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) GEM_BUG_ON(i915_request_completed(rq)); + rcu_read_lock(); /* protect the GEM context */ if (guilty) { i915_request_skip(rq, -EIO); - if (context_mark_guilty(rq->gem_context)) + if (mark_guilty(rq)) engine_skip_context(rq); } else { dma_fence_set_error(&rq->fence, -EAGAIN); - context_mark_innocent(rq->gem_context); + mark_innocent(rq); } + rcu_read_unlock(); } static bool i915_in_reset(struct pci_dev *pdev) @@ -647,7 +680,8 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * GPU state upon resume, i.e. fail to restart after a reset. */ intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); - engine->reset.prepare(engine); + if (engine->reset.prepare) + engine->reset.prepare(engine); } static void revoke_mmaps(struct intel_gt *gt) @@ -667,8 +701,13 @@ static void revoke_mmaps(struct intel_gt *gt) continue; GEM_BUG_ON(vma->fence != >->ggtt->fence_regs[i]); - node = &vma->obj->base.vma_node; + + if (!vma->mmo) + continue; + + node = &vma->mmo->vma_node; vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; + unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping, drm_vma_node_offset_addr(node) + vma_offset, vma->size, @@ -722,10 +761,11 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) static void reset_finish_engine(struct intel_engine_cs *engine) { - engine->reset.finish(engine); + if (engine->reset.finish) + engine->reset.finish(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); - intel_engine_breadcrumbs_irq(engine); + intel_engine_signal_breadcrumbs(engine); } static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) @@ -754,7 +794,7 @@ static void nop_submit_request(struct i915_request *request) i915_request_mark_complete(request); spin_unlock_irqrestore(&engine->active.lock, flags); - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); } static void __intel_gt_set_wedged(struct intel_gt *gt) @@ -799,7 +839,8 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) /* Mark all executing requests as skipped */ for_each_engine(engine, gt, id) - engine->cancel_requests(engine); + if (engine->reset.cancel) + engine->reset.cancel(engine); reset_finish(gt, awake); @@ -820,7 +861,6 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) { struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl; - unsigned long flags; bool ok; if (!test_bit(I915_WEDGED, >->reset.flags)) @@ -842,7 +882,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * * No more can be submitted until we reset the wedged bit. */ - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); list_for_each_entry(tl, &timelines->active_list, link) { struct dma_fence *fence; @@ -850,7 +890,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) if (!fence) continue; - spin_unlock_irqrestore(&timelines->lock, flags); + spin_unlock(&timelines->lock); /* * All internal dependencies (i915_requests) will have @@ -863,10 +903,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) dma_fence_put(fence); /* Restart iteration after droping lock */ - spin_lock_irqsave(&timelines->lock, flags); + spin_lock(&timelines->lock); tl = list_entry(&timelines->active_list, typeof(*tl), link); } - spin_unlock_irqrestore(&timelines->lock, flags); + spin_unlock(&timelines->lock); /* We must reset pending GPU events before restoring our submission */ ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ @@ -1070,9 +1110,10 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) { struct intel_gt *gt = engine->gt; + bool uses_guc = intel_engine_in_guc_submission_mode(engine); int ret; - GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags); + ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)); if (!intel_engine_pm_get_if_awake(engine)) @@ -1085,14 +1126,14 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) "Resetting %s for %s\n", engine->name, msg); atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); - if (!engine->gt->uc.guc.execbuf_client) + if (!uses_guc) ret = intel_gt_reset_engine(engine); else ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", - engine->gt->uc.guc.execbuf_client ? "GuC " : "", + uses_guc ? "GuC " : "", engine->name, ret); goto out; } |