From e61e0f51ba7974bb575cdc23220b573e5cd4ff2a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 21 Feb 2018 09:56:36 +0000 Subject: drm/i915: Rename drm_i915_gem_request to i915_request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to de-emphasize the link between the request (dependency, execution and fence tracking) from GEM and so rename the struct from drm_i915_gem_request to i915_request. That is we may implement the GEM user interface on top of requests, but they are an abstraction for tracking execution rather than an implementation detail of GEM. (Since they are not tied to HW, we keep the i915 prefix as opposed to intel.) In short, the spatch: @@ @@ - struct drm_i915_gem_request + struct i915_request A corollary to contracting the type name, we also harmonise on using 'rq' shorthand for local variables where space if of the essence and repetition makes 'request' unwieldy. For globals and struct members, 'request' is still much preferred for its clarity. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Michał Winiarski Cc: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20180221095636.6649-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala Reviewed-by: Michał Winiarski Acked-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_engine_cs.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f3c5100d629e..c31544406974 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1426,20 +1426,20 @@ int init_workarounds_ring(struct intel_engine_cs *engine) return 0; } -int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) +int intel_ring_workarounds_emit(struct i915_request *rq) { - struct i915_workarounds *w = &req->i915->workarounds; + struct i915_workarounds *w = &rq->i915->workarounds; u32 *cs; int ret, i; if (w->count == 0) return 0; - ret = req->engine->emit_flush(req, EMIT_BARRIER); + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); if (ret) return ret; - cs = intel_ring_begin(req, (w->count * 2 + 2)); + cs = intel_ring_begin(rq, w->count * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1450,9 +1450,9 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - ret = req->engine->emit_flush(req, EMIT_BARRIER); + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); if (ret) return ret; @@ -1552,7 +1552,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) { const struct i915_gem_context * const kernel_context = engine->i915->kernel_context; - struct drm_i915_gem_request *rq; + struct i915_request *rq; lockdep_assert_held(&engine->i915->drm.struct_mutex); @@ -1664,12 +1664,12 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) } static void print_request(struct drm_printer *m, - struct drm_i915_gem_request *rq, + struct i915_request *rq, const char *prefix) { drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %dms: %s\n", prefix, rq->global_seqno, - i915_gem_request_completed(rq) ? "!" : "", + i915_request_completed(rq) ? "!" : "", rq->ctx->hw_id, rq->fence.seqno, rq->priotree.priority, jiffies_to_msecs(jiffies - rq->emitted_jiffies), @@ -1803,7 +1803,7 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, rcu_read_lock(); for (idx = 0; idx < execlists_num_ports(execlists); idx++) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int count; rq = port_unpack(&execlists->port[idx], &count); @@ -1837,7 +1837,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct intel_breadcrumbs * const b = &engine->breadcrumbs; const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct rb_node *rb; if (header) { @@ -1866,12 +1866,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); rq = list_first_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); + struct i915_request, link); if (&rq->link != &engine->timeline->requests) print_request(m, rq, "\t\tfirst "); rq = list_last_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); + struct i915_request, link); if (&rq->link != &engine->timeline->requests) print_request(m, rq, "\t\tlast "); -- cgit v1.2.3 From f6322eddaff7662e81178a28730e420bf934a512 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 22 Feb 2018 14:22:29 +0000 Subject: drm/i915/preemption: Allow preemption between submission ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes we need to boost the priority of an in-flight request, which may lead to the situation where the second submission port then contains a higher priority context than the first and so we need to inject a preemption event. To do so we must always check inside execlists_dequeue() whether there is a priority inversion between the ports themselves as well as the head of the priority sorted queue, and we cannot just skip dequeuing if the queue is empty. As Michał noted, this doesn't simply extend to handling more than 2-port submission, as we may need to reorder within the array of executing requests which themselves are lower priority than the first. A task for later! Signed-off-by: Chris Wilson Cc: Michał Winiarski Cc: Michel Thierry Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180222142229.14517-1-chris@chris-wilson.co.uk Reviewed-by: Michał Winiarski Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 + drivers/gpu/drm/i915/intel_guc_submission.c | 17 +-- drivers/gpu/drm/i915/intel_lrc.c | 161 ++++++++++++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 10 ++ 4 files changed, 112 insertions(+), 78 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index c31544406974..ce7fcf55ba18 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); + execlists->queue_priority = INT_MIN; execlists->queue = RB_ROOT; execlists->first = NULL; } @@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_lock_irq(&engine->timeline->lock); list_for_each_entry(rq, &engine->timeline->requests, link) print_request(m, rq, "\t\tE "); + drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); for (rb = execlists->first; rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 649113c7a3c2..586dde579903 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -75,6 +75,11 @@ * */ +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + static inline bool is_high_priority(struct intel_guc_client *client) { return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || @@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine) rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - if (!rb) - goto unlock; - if (port_isset(port)) { if (engine->i915->preempt_context) { struct guc_preempt_work *preempt_work = &engine->i915->guc.preempt_work[engine->id]; - if (rb_entry(rb, struct i915_priolist, node)->priority > + if (execlists->queue_priority > max(port_request(port)->priotree.priority, 0)) { execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); @@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine) } GEM_BUG_ON(port_isset(port)); - do { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + while (rb) { + struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { @@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } while (rb); + } done: + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; execlists->first = rb; if (submit) { port_assign(port, last); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 964885b5d7cb..14288743909f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + +static inline int rq_prio(const struct i915_request *rq) +{ + return rq->priotree.priority; +} + +static inline bool need_preempt(const struct intel_engine_cs *engine, + const struct i915_request *last, + int prio) +{ + return engine->i915->preempt_context && prio > max(rq_prio(last), 0); +} + /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context @@ -224,7 +241,7 @@ find_priolist: parent = &execlists->queue.rb_node; while (*parent) { rb = *parent; - p = rb_entry(rb, typeof(*p), node); + p = to_priolist(rb); if (prio > p->priority) { parent = &rb->rb_left; } else if (prio < p->priority) { @@ -264,7 +281,7 @@ find_priolist: if (first) execlists->first = &p->node; - return ptr_pack_bits(p, first, 1); + return p; } static void unwind_wa_tail(struct i915_request *rq) @@ -290,14 +307,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) __i915_request_unsubmit(rq); unwind_wa_tail(rq); - GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID); - if (rq->priotree.priority != last_prio) { - p = lookup_priolist(engine, - &rq->priotree, - rq->priotree.priority); - p = ptr_mask_bits(p, 1); - - last_prio = rq->priotree.priority; + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != last_prio) { + last_prio = rq_prio(rq); + p = lookup_priolist(engine, &rq->priotree, last_prio); } list_add(&rq->priotree.link, &p->requests); @@ -397,10 +410,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n", engine->name, n, port[n].context_id, count, - rq->global_seqno); + rq->global_seqno, + rq_prio(rq)); } else { GEM_BUG_ON(!n); desc = 0; @@ -453,12 +467,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine) _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)); + /* + * Switch to our empty preempt context so + * the state of the GPU is known (idle). + */ GEM_TRACE("%s\n", engine->name); for (n = execlists_num_ports(&engine->execlists); --n; ) elsp_write(0, engine->execlists.elsp); elsp_write(ce->lrc_desc, engine->execlists.elsp); execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); + execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); } static void execlists_dequeue(struct intel_engine_cs *engine) @@ -495,8 +514,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - if (!rb) - goto unlock; if (last) { /* @@ -519,54 +536,48 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) goto unlock; - if (engine->i915->preempt_context && - rb_entry(rb, struct i915_priolist, node)->priority > - max(last->priotree.priority, 0)) { - /* - * Switch to our empty preempt context so - * the state of the GPU is known (idle). - */ + if (need_preempt(engine, last, execlists->queue_priority)) { inject_preempt_context(engine); - execlists_set_active(execlists, - EXECLISTS_ACTIVE_PREEMPT); goto unlock; - } else { - /* - * In theory, we could coalesce more requests onto - * the second port (the first port is active, with - * no preemptions pending). However, that means we - * then have to deal with the possible lite-restore - * of the second port (as we submit the ELSP, there - * may be a context-switch) but also we may complete - * the resubmission before the context-switch. Ergo, - * coalescing onto the second port will cause a - * preemption event, but we cannot predict whether - * that will affect port[0] or port[1]. - * - * If the second port is already active, we can wait - * until the next context-switch before contemplating - * new requests. The GPU will be busy and we should be - * able to resubmit the new ELSP before it idles, - * avoiding pipeline bubbles (momentary pauses where - * the driver is unable to keep up the supply of new - * work). - */ - if (port_count(&port[1])) - goto unlock; - - /* WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent - * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_breadcrumb() for - * where we prepare the padding after the - * end of the request. - */ - last->tail = last->wa_tail; } + + /* + * In theory, we could coalesce more requests onto + * the second port (the first port is active, with + * no preemptions pending). However, that means we + * then have to deal with the possible lite-restore + * of the second port (as we submit the ELSP, there + * may be a context-switch) but also we may complete + * the resubmission before the context-switch. Ergo, + * coalescing onto the second port will cause a + * preemption event, but we cannot predict whether + * that will affect port[0] or port[1]. + * + * If the second port is already active, we can wait + * until the next context-switch before contemplating + * new requests. The GPU will be busy and we should be + * able to resubmit the new ELSP before it idles, + * avoiding pipeline bubbles (momentary pauses where + * the driver is unable to keep up the supply of new + * work). However, we have to double check that the + * priorities of the ports haven't been switch. + */ + if (port_count(&port[1])) + goto unlock; + + /* + * WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == rq:TAIL as we resubmit the + * request. See gen8_emit_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; } - do { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + while (rb) { + struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { @@ -628,8 +639,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } while (rb); + } done: + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; execlists->first = rb; if (submit) port_assign(port, last); @@ -690,7 +702,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Flush the queued requests to the timeline list (for retiring). */ rb = execlists->first; while (rb) { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + struct i915_priolist *p = to_priolist(rb); list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { INIT_LIST_HEAD(&rq->priotree.link); @@ -708,7 +720,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ - + execlists->queue_priority = INT_MIN; execlists->queue = RB_ROOT; execlists->first = NULL; GEM_BUG_ON(port_isset(execlists->port)); @@ -864,10 +876,11 @@ static void execlists_submission_tasklet(unsigned long data) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", engine->name, port->context_id, count, - rq ? rq->global_seqno : 0); + rq ? rq->global_seqno : 0, + rq ? rq_prio(rq) : 0); /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); @@ -912,15 +925,19 @@ static void execlists_submission_tasklet(unsigned long data) intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); } -static void insert_request(struct intel_engine_cs *engine, - struct i915_priotree *pt, - int prio) +static void queue_request(struct intel_engine_cs *engine, + struct i915_priotree *pt, + int prio) { - struct i915_priolist *p = lookup_priolist(engine, pt, prio); + list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests); +} - list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); - if (ptr_unmask_bits(p, 1)) +static void submit_queue(struct intel_engine_cs *engine, int prio) +{ + if (prio > engine->execlists.queue_priority) { + engine->execlists.queue_priority = prio; tasklet_hi_schedule(&engine->execlists.tasklet); + } } static void execlists_submit_request(struct i915_request *request) @@ -931,7 +948,8 @@ static void execlists_submit_request(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - insert_request(engine, &request->priotree, request->priotree.priority); + queue_request(engine, &request->priotree, rq_prio(request)); + submit_queue(engine, rq_prio(request)); GEM_BUG_ON(!engine->execlists.first); GEM_BUG_ON(list_empty(&request->priotree.link)); @@ -987,7 +1005,7 @@ static void execlists_schedule(struct i915_request *request, int prio) * static void update_priorities(struct i915_priotree *pt, prio) { * list_for_each_entry(dep, &pt->signalers_list, signal_link) * update_priorities(dep->signal, prio) - * insert_request(pt); + * queue_request(pt); * } * but that may have unlimited recursion depth and so runs a very * real risk of overunning the kernel stack. Instead, we build @@ -1050,8 +1068,9 @@ static void execlists_schedule(struct i915_request *request, int prio) pt->priority = prio; if (!list_empty(&pt->link)) { __list_del_entry(&pt->link); - insert_request(engine, pt, prio); + queue_request(engine, pt, prio); } + submit_queue(engine, prio); } spin_unlock_irq(&engine->timeline->lock); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a9b83bf7e837..bbacf4d0f4cb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -257,6 +257,16 @@ struct intel_engine_execlists { */ unsigned int port_mask; + /** + * @queue_priority: Highest pending priority. + * + * When we add requests into the queue, or adjust the priority of + * executing requests, we compute the maximum priority of those + * pending requests. We can then use this value to determine if + * we need to preempt the executing requests to service the queue. + */ + int queue_priority; + /** * @queue: queue of requests, in priority lists */ -- cgit v1.2.3 From 367a35a6c6c7ea84fe3f47825668ca017cb566ed Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 28 Feb 2018 09:47:32 +0000 Subject: drm/i915: Don't deref request->ctx inside unlocked print_request() Although we protect the request itself, we don't lock inside intel_engine_dump() and so the request maybe retired as we peek into it. One consequence is that the request->ctx may be freed before we dereference it, leading to a use-after-free. Replace the hw_id we are peeking from inside request->ctx with the request->fence.context, with which we can still track from which context the request originated (although to tie to HW reports requires a little more legwork, but is good enough to follow the GEM traces). [52640.729670] general protection fault: 0000 [#2] SMP [52640.729694] Dumping ftrace buffer: [52640.729701] (ftrace buffer empty) [52640.729705] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic x86_pkg_\ temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul snd_hda_intel snd_hda_codec snd_hwdep gha\ sh_clmulni_intel snd_hda_core snd_pcm mei_me mei i915 r8169 mii prime_numbers i2c_hid [52640.729748] CPU: 2 PID: 4335 Comm: gem_exec_schedu Tainted: G UD W 4.16.0-rc3+ #7 [52640.729759] Hardware name: Acer Aspire E5-575G/Ironman_SK , BIOS V1.12 08/02/2016 [52640.729803] RIP: 0010:print_request+0x2b/0xb0 [i915] [52640.729811] RSP: 0018:ffffc90001453c18 EFLAGS: 00010206 [52640.729820] RAX: 6b6b6b6b6b6b6b6b RBX: ffff8801e0292d40 RCX: 0000000000000006 [52640.729829] RDX: ffffc90001453c60 RSI: ffff8801e0292d40 RDI: 0000000000000003 [52640.729838] RBP: ffffc90001453d80 R08: 0000000000000000 R09: 0000000000000001 [52640.729847] R10: ffffc90001453bd0 R11: ffffc90001453c73 R12: ffffc90001453c60 [52640.729856] R13: ffffc90001453d80 R14: ffff8801d5a683c8 R15: ffff8801e0292d40 [52640.729866] FS: 00007f1ee50548c0(0000) GS:ffff8801e8200000(0000) knlGS:0000000000000000 [52640.729876] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [52640.729884] CR2: 00007f1ee5077000 CR3: 00000001d9411004 CR4: 00000000003606e0 [52640.729893] Call Trace: [52640.729922] intel_engine_print_registers+0x623/0x890 [i915] [52640.729948] intel_engine_dump+0x4a3/0x590 [i915] [52640.729957] ? seq_printf+0x3a/0x50 [52640.729977] i915_engine_info+0xb8/0xe0 [i915] [52640.729984] ? drm_mode_gamma_get_ioctl+0xf0/0xf0 [52640.729990] seq_read+0xd5/0x410 [52640.729997] full_proxy_read+0x4b/0x70 [52640.730004] __vfs_read+0x1e/0x120 [52640.730009] ? do_sys_open+0x134/0x220 [52640.730015] ? kmem_cache_free+0x174/0x2b0 [52640.730021] vfs_read+0xa1/0x150 [52640.730026] SyS_read+0x40/0xa0 [52640.730032] do_syscall_64+0x65/0x1a0 [52640.730038] entry_SYSCALL_64_after_hwframe+0x42/0xb7 Reported-by: Mika Kuoppala Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180228094732.28462-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ce7fcf55ba18..3e1107ecb6ee 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1668,10 +1668,10 @@ static void print_request(struct drm_printer *m, struct i915_request *rq, const char *prefix) { - drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %dms: %s\n", prefix, + drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix, rq->global_seqno, i915_request_completed(rq) ? "!" : "", - rq->ctx->hw_id, rq->fence.seqno, + rq->fence.context, rq->fence.seqno, rq->priotree.priority, jiffies_to_msecs(jiffies - rq->emitted_jiffies), rq->timeline->common->name); -- cgit v1.2.3 From 5f79e7c6754249dd71f3124c9c7604aab2880c20 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Fri, 2 Mar 2018 18:14:57 +0200 Subject: drm/i915/icl: Correctly initialize the Gen11 engines Gen11 has up to 4 VCS and up to 2 VECS engines, this patch adds mmio base definitions for all of them. Bspec: 20944 Bspec: 7021 v2: Set the correct mmio_base in intel_engines_init_mmio; updating the base mmio values any later would cause incorrect reads in i915_gem_sanitize (Michel). Cc: Tvrtko Ursulin Cc: Ceraolo Spurio, Daniele Signed-off-by: Oscar Mateo Signed-off-by: Michel Thierry Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20180302161501.28594-2-mika.kuoppala@linux.intel.com Signed-off-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_reg.h | 6 +++++ drivers/gpu/drm/i915/intel_engine_cs.c | 44 +++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 95a2e51ecbb0..d7023f15f0ed 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2345,7 +2345,13 @@ enum i915_power_well_id { #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 #define GEN8_BSD2_RING_BASE 0x1c000 +#define GEN11_BSD_RING_BASE 0x1c0000 +#define GEN11_BSD2_RING_BASE 0x1c4000 +#define GEN11_BSD3_RING_BASE 0x1d0000 +#define GEN11_BSD4_RING_BASE 0x1d4000 #define VEBOX_RING_BASE 0x1a000 +#define GEN11_VEBOX_RING_BASE 0x1c8000 +#define GEN11_VEBOX2_RING_BASE 0x1d8000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base) _MMIO((base)+0x30) #define RING_HEAD(base) _MMIO((base)+0x34) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 3e1107ecb6ee..911fc08658c5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -123,6 +123,22 @@ static const struct engine_info intel_engines[] = { .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, }, + [VCS3] = { + .hw_id = VCS3_HW, + .uabi_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 2, + .mmio_base = GEN11_BSD3_RING_BASE, + .irq_shift = 0, /* not used */ + }, + [VCS4] = { + .hw_id = VCS4_HW, + .uabi_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 3, + .mmio_base = GEN11_BSD4_RING_BASE, + .irq_shift = 0, /* not used */ + }, [VECS] = { .hw_id = VECS_HW, .uabi_id = I915_EXEC_VEBOX, @@ -131,6 +147,14 @@ static const struct engine_info intel_engines[] = { .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, }, + [VECS2] = { + .hw_id = VECS2_HW, + .uabi_id = I915_EXEC_VEBOX, + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 1, + .mmio_base = GEN11_VEBOX2_RING_BASE, + .irq_shift = 0, /* not used */ + }, }; /** @@ -230,7 +254,25 @@ intel_engine_setup(struct drm_i915_private *dev_priv, class_info->name, info->instance) >= sizeof(engine->name)); engine->hw_id = engine->guc_id = info->hw_id; - engine->mmio_base = info->mmio_base; + if (INTEL_GEN(dev_priv) >= 11) { + switch (engine->id) { + case VCS: + engine->mmio_base = GEN11_BSD_RING_BASE; + break; + case VCS2: + engine->mmio_base = GEN11_BSD2_RING_BASE; + break; + case VECS: + engine->mmio_base = GEN11_VEBOX_RING_BASE; + break; + default: + /* take the original value for all other engines */ + engine->mmio_base = info->mmio_base; + break; + } + } else { + engine->mmio_base = info->mmio_base; + } engine->irq_shift = info->irq_shift; engine->class = info->class; engine->instance = info->instance; -- cgit v1.2.3 From ac52da6af826d05f02c03fcde4a0651d070783b2 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 2 Mar 2018 18:14:58 +0200 Subject: drm/i915/icl: new context descriptor support Starting from Gen11 the context descriptor format has been updated in the HW. The hw_id field has been considerably reduced in size and engine class and instance fields have been added. There is a slight name clashing issue because the field that we call hw_id is actually called SW Context ID in the specs for Gen11+. With the current size of the hw_id field we can have a maximum of 2k contexts at any time, but we could use the sw_counter field (which is sw defined) to increase that because the HW requirement is that engine_id + sw id + sw_counter is a unique number. GuC uses a similar method to support more contexts but does its tracking at lrc level. To avoid doing an implementation that will need to be reworked once GuC support lands, defer it for now and mark it as TODO. v2: rebased, add documentation, fix GEN11_ENGINE_INSTANCE_SHIFT v3: rebased, bring back lost code from i915_gem_context.c v4: make TODO comment more generic v5: be consistent with bit ordering, add extra checks (Chris) Cc: Oscar Mateo Cc: Chris Wilson Cc: Mika Kuoppala Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Oscar Mateo Link: https://patchwork.freedesktop.org/patch/msgid/20180302161501.28594-3-mika.kuoppala@linux.intel.com Signed-off-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_context.c | 11 ++++++++-- drivers/gpu/drm/i915/i915_reg.h | 6 ++++++ drivers/gpu/drm/i915/intel_engine_cs.c | 3 +++ drivers/gpu/drm/i915/intel_lrc.c | 36 +++++++++++++++++++++++++++++++-- 5 files changed, 53 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7eec99d7fad4..78dd318df18e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2103,6 +2103,7 @@ struct drm_i915_private { */ struct ida hw_ida; #define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ } contexts; u32 fdi_rx_config; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a73340ae9419..f2cbea7cf940 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -211,9 +211,15 @@ static void context_close(struct i915_gem_context *ctx) static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) { int ret; + unsigned int max; + + if (INTEL_GEN(dev_priv) >= 11) + max = GEN11_MAX_CONTEXT_HW_ID; + else + max = MAX_CONTEXT_HW_ID; ret = ida_simple_get(&dev_priv->contexts.hw_ida, - 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + 0, max, GFP_KERNEL); if (ret < 0) { /* Contexts are only released when no longer active. * Flush any pending retires to hopefully release some @@ -221,7 +227,7 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) */ i915_retire_requests(dev_priv); ret = ida_simple_get(&dev_priv->contexts.hw_ida, - 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + 0, max, GFP_KERNEL); if (ret < 0) return ret; } @@ -463,6 +469,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) /* Using the simple ida interface, the max is limited by sizeof(int) */ BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->contexts.hw_ida); /* lowest priority; idle task */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d7023f15f0ed..a778b93f60d2 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3912,6 +3912,12 @@ enum { #define GEN8_CTX_ID_SHIFT 32 #define GEN8_CTX_ID_WIDTH 21 +#define GEN11_SW_CTX_ID_SHIFT 37 +#define GEN11_SW_CTX_ID_WIDTH 11 +#define GEN11_ENGINE_CLASS_SHIFT 61 +#define GEN11_ENGINE_CLASS_WIDTH 3 +#define GEN11_ENGINE_INSTANCE_SHIFT 48 +#define GEN11_ENGINE_INSTANCE_WIDTH 6 #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 911fc08658c5..4ba139c27fba 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -234,6 +234,9 @@ intel_engine_setup(struct drm_i915_private *dev_priv, GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); class_info = &intel_engine_classes[info->class]; + BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); + BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + if (GEM_WARN_ON(info->class > MAX_ENGINE_CLASS)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 75d2daa4f6c1..69838f668862 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -204,6 +204,18 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * bits 32-52: ctx ID, a globally unique tag * bits 53-54: mbz, reserved for use by hardware * bits 55-63: group ID, currently unused and set to 0 + * + * Starting from Gen11, the upper dword of the descriptor has a new format: + * + * bits 32-36: reserved + * bits 37-47: SW context ID + * bits 48:53: engine instance + * bit 54: mbz, reserved for use by hardware + * bits 55-60: SW counter + * bits 61-63: engine class + * + * engine info, SW context ID and SW counter need to form a unique number + * (Context ID) per lrc. */ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, @@ -212,12 +224,32 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, struct intel_context *ce = &ctx->engine[engine->id]; u64 desc; - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1< (BIT(GEN8_CTX_ID_WIDTH))); + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); desc = ctx->desc_template; /* bits 0-11 */ + GEM_BUG_ON(desc & GENMASK_ULL(63, 12)); + desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; /* bits 12-31 */ - desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ + GEM_BUG_ON(desc & GENMASK_ULL(63, 32)); + + if (INTEL_GEN(ctx->i915) >= 11) { + GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); + desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; + /* bits 37-47 */ + + desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; + /* bits 48-53 */ + + /* TODO: decide what to do with SW counter (bits 55-60) */ + + desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; + /* bits 61-63 */ + } else { + GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); + desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ + } ce->lrc_desc = desc; } -- cgit v1.2.3