diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_lrc.c | 203 |
1 files changed, 134 insertions, 69 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 87e6c5bdd2dc..24322ef08aa4 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -446,6 +446,9 @@ static int queue_prio(const struct intel_engine_execlists *execlists) * we have to flip the index value to become priority. */ p = to_priolist(rb); + if (!I915_USER_PRIORITY_SHIFT) + return p->priority; + return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); } @@ -1103,7 +1106,7 @@ static struct i915_request * __unwind_incomplete_requests(struct intel_engine_cs *engine) { struct i915_request *rq, *rn, *active = NULL; - struct list_head *uninitialized_var(pl); + struct list_head *pl; int prio = I915_PRIORITY_INVALID; lockdep_assert_held(&engine->active.lock); @@ -1134,6 +1137,13 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + /* Check in case we rollback so far we wrap [size/2] */ + if (intel_ring_direction(rq->ring, + intel_ring_wrap(rq->ring, + rq->tail), + rq->ring->tail) > 0) + rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; + active = rq; } else { struct intel_engine_cs *owner = rq->context->engine; @@ -1370,6 +1380,8 @@ __execlists_schedule_in(struct i915_request *rq) ce->lrc.ccid |= engine->execlists.ccid; __intel_gt_pm_get(engine->gt); + if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active)) + intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -1402,8 +1414,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) struct virtual_engine *ve = container_of(ce, typeof(*ve), context); struct i915_request *next = READ_ONCE(ve->request); - if (next && next->execution_mask & ~rq->execution_mask) - tasklet_schedule(&ve->base.execlists.tasklet); + if (next == rq || (next && next->execution_mask & ~rq->execution_mask)) + tasklet_hi_schedule(&ve->base.execlists.tasklet); } static inline void @@ -1438,6 +1450,8 @@ __execlists_schedule_out(struct i915_request *rq, intel_context_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)) + intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); intel_gt_pm_put_async(engine->gt); /* @@ -1498,8 +1512,9 @@ static u64 execlists_update_context(struct i915_request *rq) * HW has a tendency to ignore us rewinding the TAIL to the end of * an earlier request. */ + GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); + prev = rq->ring->tail; tail = intel_ring_set_tail(rq->ring, rq->tail); - prev = ce->lrc_reg_state[CTX_RING_TAIL]; if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) desc |= CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state[CTX_RING_TAIL] = tail; @@ -1628,9 +1643,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, ccid = ce->lrc.ccid; /* - * Sentinels are supposed to be lonely so they flush the - * current exection off the HW. Check that they are the - * only request in the pending submission. + * Sentinels are supposed to be the last request so they flush + * the current execution off the HW. Check that they are the only + * request in the pending submission. */ if (sentinel) { GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", @@ -1639,15 +1654,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, port - execlists->pending); return false; } - sentinel = i915_request_has_sentinel(rq); - if (sentinel && port != execlists->pending) { - GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n", - engine->name, - ce->timeline->fence_context, - port - execlists->pending); - return false; - } /* Hold tightly onto the lock to prevent concurrent retires! */ if (!spin_trylock_irqsave(&rq->lock, flags)) @@ -1895,7 +1902,8 @@ static void defer_active(struct intel_engine_cs *engine) static bool need_timeslice(const struct intel_engine_cs *engine, - const struct i915_request *rq) + const struct i915_request *rq, + const struct rb_node *rb) { int hint; @@ -1903,9 +1911,28 @@ need_timeslice(const struct intel_engine_cs *engine, return false; hint = engine->execlists.queue_priority_hint; + + if (rb) { + const struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + const struct intel_engine_cs *inflight = + intel_context_inflight(&ve->context); + + if (!inflight || inflight == engine) { + struct i915_request *next; + + rcu_read_lock(); + next = READ_ONCE(ve->request); + if (next) + hint = max(hint, rq_prio(next)); + rcu_read_unlock(); + } + } + if (!list_is_last(&rq->sched.link, &engine->active.requests)) hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); + GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE); return hint >= effective_prio(rq); } @@ -1939,7 +1966,7 @@ static int switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) { if (list_is_last(&rq->sched.link, &engine->active.requests)) - return INT_MIN; + return engine->execlists.queue_priority_hint; return rq_prio(list_next_entry(rq, sched.link)); } @@ -1977,10 +2004,9 @@ static void set_timeslice(struct intel_engine_cs *engine) set_timer_ms(&engine->execlists.timer, duration); } -static void start_timeslice(struct intel_engine_cs *engine) +static void start_timeslice(struct intel_engine_cs *engine, int prio) { struct intel_engine_execlists *execlists = &engine->execlists; - const int prio = queue_prio(execlists); unsigned long duration; if (!intel_engine_has_timeslices(engine)) @@ -2140,7 +2166,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) __unwind_incomplete_requests(engine); last = NULL; - } else if (need_timeslice(engine, last) && + } else if (need_timeslice(engine, last, rb) && timeslice_expired(execlists, last)) { if (i915_request_completed(last)) { tasklet_hi_schedule(&execlists->tasklet); @@ -2188,7 +2214,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - start_timeslice(engine); + start_timeslice(engine, queue_prio(execlists)); return; } } @@ -2223,7 +2249,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - start_timeslice(engine); + start_timeslice(engine, rq_prio(rq)); return; /* leave this for another sibling */ } @@ -2418,6 +2444,7 @@ done: set_preempt_timeout(engine, *active); execlists_submit_ports(engine); } else { + start_timeslice(engine, execlists->queue_priority_hint); skip_submit: ring_set_paused(engine, 0); } @@ -2542,6 +2569,25 @@ static void process_csb(struct intel_engine_cs *engine) return; /* + * We will consume all events from HW, or at least pretend to. + * + * The sequence of events from the HW is deterministic, and derived + * from our writes to the ELSP, with a smidgen of variability for + * the arrival of the asynchronous requests wrt to the inflight + * execution. If the HW sends an event that does not correspond with + * the one we are expecting, we have to abandon all hope as we lose + * all tracking of what the engine is actually executing. We will + * only detect we are out of sequence with the HW when we get an + * 'impossible' event because we have already drained our own + * preemption/promotion queue. If this occurs, we know that we likely + * lost track of execution earlier and must unwind and restart, the + * simplest way is by stop processing the event queue and force the + * engine to reset. + */ + execlists->csb_head = tail; + ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); + + /* * Hopefully paired with a wmb() in HW! * * We must complete the read of the write pointer before any reads @@ -2550,8 +2596,6 @@ static void process_csb(struct intel_engine_cs *engine) * we perform the READ_ONCE(*csb_write). */ rmb(); - - ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); do { bool promote; @@ -2586,6 +2630,11 @@ static void process_csb(struct intel_engine_cs *engine) if (promote) { struct i915_request * const *old = execlists->active; + if (GEM_WARN_ON(!*execlists->pending)) { + execlists->error_interrupt |= ERROR_CSB; + break; + } + ring_set_paused(engine, 0); /* Point active to the new ELSP; prevent overwriting */ @@ -2608,7 +2657,10 @@ static void process_csb(struct intel_engine_cs *engine) WRITE_ONCE(execlists->pending[0], NULL); } else { - GEM_BUG_ON(!*execlists->active); + if (GEM_WARN_ON(!*execlists->active)) { + execlists->error_interrupt |= ERROR_CSB; + break; + } /* port0 completed, advanced to port1 */ trace_ports(execlists, "completed", execlists->active); @@ -2659,7 +2711,6 @@ static void process_csb(struct intel_engine_cs *engine) } } while (head != tail); - execlists->csb_head = head; set_timeslice(engine); /* @@ -2978,12 +3029,12 @@ static u32 active_ccid(struct intel_engine_cs *engine) return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI); } -static bool execlists_capture(struct intel_engine_cs *engine) +static void execlists_capture(struct intel_engine_cs *engine) { struct execlists_capture *cap; if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) - return true; + return; /* * We need to _quickly_ capture the engine state before we reset. @@ -2992,7 +3043,7 @@ static bool execlists_capture(struct intel_engine_cs *engine) */ cap = capture_regs(engine); if (!cap) - return true; + return; spin_lock_irq(&engine->active.lock); cap->rq = active_context(engine, active_ccid(engine)); @@ -3029,14 +3080,13 @@ static bool execlists_capture(struct intel_engine_cs *engine) INIT_WORK(&cap->work, execlists_capture_work); schedule_work(&cap->work); - return true; + return; err_rq: i915_request_put(cap->rq); err_free: i915_gpu_coredump_put(cap->error); kfree(cap); - return false; } static void execlists_reset(struct intel_engine_cs *engine, const char *msg) @@ -3056,10 +3106,8 @@ static void execlists_reset(struct intel_engine_cs *engine, const char *msg) tasklet_disable_nosync(&engine->execlists.tasklet); ring_set_paused(engine, 1); /* Freeze the current request in place */ - if (execlists_capture(engine)) - intel_engine_reset(engine, msg); - else - ring_set_paused(engine, 0); + execlists_capture(engine); + intel_engine_reset(engine, msg); tasklet_enable(&engine->execlists.tasklet); clear_and_wake_up_bit(bit, lock); @@ -3090,9 +3138,18 @@ static void execlists_submission_tasklet(unsigned long data) process_csb(engine); if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { + const char *msg; + + /* Generate the error message in priority wrt to the user! */ + if (engine->execlists.error_interrupt & GENMASK(15, 0)) + msg = "CS error"; /* thrown by a user payload */ + else if (engine->execlists.error_interrupt & ERROR_CSB) + msg = "invalid CSB event"; + else + msg = "internal error"; + engine->execlists.error_interrupt = 0; - if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */ - execlists_reset(engine, "CS error"); + execlists_reset(engine, msg); } if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { @@ -3143,13 +3200,6 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) if (reset_in_progress(execlists)) return; /* defer until we restart the engine following reset */ - /* Hopefully we clear execlists->pending[] to let us through */ - if (READ_ONCE(execlists->pending[0]) && - tasklet_trylock(&execlists->tasklet)) { - process_csb(engine); - tasklet_unlock(&execlists->tasklet); - } - __execlists_submission_tasklet(engine); } @@ -3172,11 +3222,25 @@ static bool ancestor_on_hold(const struct intel_engine_cs *engine, return !list_empty(&engine->active.hold) && hold_request(rq); } +static void flush_csb(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *el = &engine->execlists; + + if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) { + if (!reset_in_progress(el)) + process_csb(engine); + tasklet_unlock(&el->tasklet); + } +} + static void execlists_submit_request(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; unsigned long flags; + /* Hopefully we clear execlists->pending[] to let us through */ + flush_csb(engine); + /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); @@ -3388,7 +3452,7 @@ __execlists_update_reg_state(const struct intel_context *ce, /* RPCS */ if (engine->class == RENDER_CLASS) { regs[CTX_R_PWR_CLK_STATE] = - intel_sseu_make_rpcs(engine->i915, &ce->sseu); + intel_sseu_make_rpcs(engine->gt, &ce->sseu); i915_oa_init_reg_state(ce, engine); } @@ -3509,7 +3573,7 @@ static int emit_pdps(struct i915_request *rq) int err, i; u32 *cs; - GEM_BUG_ON(intel_vgpu_active(rq->i915)); + GEM_BUG_ON(intel_vgpu_active(rq->engine->i915)); /* * Beware ye of the dragons, this sequence is magic! @@ -3846,7 +3910,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, &wa_ctx->per_ctx }; wa_bb_func_t wa_bb_fn[2]; - struct page *page; void *batch, *batch_ptr; unsigned int i; int ret; @@ -3882,14 +3945,14 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } - page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); - batch = batch_ptr = kmap_atomic(page); + batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); /* * Emit the two workaround batch buffers, recording the offset from the * start of the workaround batch buffer object for each and their * respective sizes. */ + batch_ptr = batch; for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { wa_bb[i]->offset = batch_ptr - batch; if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, @@ -3901,10 +3964,10 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) batch_ptr = wa_bb_fn[i](engine, batch_ptr); wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } + GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); - BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); - - kunmap_atomic(batch); + __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch); + __i915_gem_object_release_map(wa_ctx->vma->obj); if (ret) lrc_destroy_wa_ctx(engine); @@ -4488,11 +4551,11 @@ static int gen8_emit_flush_render(struct i915_request *request, * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL * pipe control. */ - if (IS_GEN(request->i915, 9)) + if (IS_GEN(request->engine->i915, 9)) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0)) + if (IS_KBL_REVID(request->engine->i915, 0, KBL_REVID_B0)) dc_flush_wa = true; } @@ -4739,6 +4802,14 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode) return 0; } +static void assert_request_valid(struct i915_request *rq) +{ + struct intel_ring *ring __maybe_unused = rq->ring; + + /* Can we unwind this request without appearing to go forwards? */ + GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -4751,6 +4822,9 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); + /* Check that entire request is less than half the ring */ + assert_request_valid(request); + return cs; } @@ -5358,13 +5432,8 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve) * typically be the first we inspect for submission. */ swp = prandom_u32_max(ve->num_siblings); - if (!swp) - return; - - swap(ve->siblings[swp], ve->siblings[0]); - if (!intel_engine_has_relative_mmio(ve->siblings[0])) - virtual_update_register_offsets(ve->context.lrc_reg_state, - ve->siblings[0]); + if (swp) + swap(ve->siblings[swp], ve->siblings[0]); } static int virtual_context_alloc(struct intel_context *ce) @@ -5377,15 +5446,9 @@ static int virtual_context_alloc(struct intel_context *ce) static int virtual_context_pin(struct intel_context *ce) { struct virtual_engine *ve = container_of(ce, typeof(*ve), context); - int err; /* Note: we must use a real engine class for setting up reg state */ - err = __execlists_context_pin(ce, ve->siblings[0]); - if (err) - return err; - - virtual_engine_initial_hint(ve); - return 0; + return __execlists_context_pin(ce, ve->siblings[0]); } static void virtual_context_enter(struct intel_context *ce) @@ -5560,7 +5623,7 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(!list_empty(virtual_queue(ve))); list_move_tail(&rq->sched.link, virtual_queue(ve)); - tasklet_schedule(&ve->base.execlists.tasklet); + tasklet_hi_schedule(&ve->base.execlists.tasklet); } spin_unlock_irqrestore(&ve->base.active.lock, flags); @@ -5650,6 +5713,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); intel_engine_init_breadcrumbs(&ve->base); intel_engine_init_execlists(&ve->base); + ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */ ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; @@ -5731,6 +5795,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.flags |= I915_ENGINE_IS_VIRTUAL; + virtual_engine_initial_hint(ve); return &ve->context; err_put: |